001package Torello.HTML.NodeSearch;
002
003import java.util.*;
004import Torello.HTML.*;
005
006/**
007 * A simple, demonstrative set of functions for retrieving {@code HTMLNode's} from a web-page
008 * (a 'Workbook Class').
009 * 
010 * <EMBED CLASS='external-html' DATA-FILE-ID=ELEMENTS>
011 */
012@Torello.JavaDoc.StaticFunctional
013public class Elements
014{
015    private Elements() { }
016
017    /**
018     * Retrieves the start and end points of the web-page body in the underlying HTML 
019     * page-{@code Vector}.
020     * All nodes between {@code <BODY> ... </BODY>} will be included.
021     * 
022     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
023     * 
024     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
025     * HTML sublist.
026     * 
027     * @see InnerTagFindInclusive
028     */
029    public static DotPair findBody(Vector<? extends HTMLNode> html)
030    { return InnerTagFindInclusive.first(html, "body"); }
031
032    /**
033     * Gets the nodes of the web-page body.
034     * All nodes between {@code <BODY> ... </BODY>} will be included.
035     * 
036     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
037     * @return The requested HTML sublist, as a {@code Vector}.
038     * @see InnerTagGetInclusive
039     */
040    public static Vector<HTMLNode> getBody(Vector<? extends HTMLNode> html)
041    { return InnerTagGetInclusive.first(html, "body"); }
042
043    /**
044     * Retrieves the start and end points of the web-page header in the underlying HTML 
045     * page-{@code Vector}.
046     * All nodes between {@code <HEAD> ... </HEAD>} will be included.
047     * 
048     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
049     * 
050     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested 
051     * HTML sublist.
052     * 
053     * @see InnerTagFindInclusive
054     */
055    public static DotPair findHead(Vector<? extends HTMLNode> html)
056    { return InnerTagFindInclusive.first(html, "head"); }
057
058    /**
059     * Gets the nodes of the web-page header.
060     * All nodes between {@code <HEAD> ... </HEAD>} will be included.
061     * 
062     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
063     * @return The requested HTML sublist, as a {@code Vector}.
064     * @see InnerTagGetInclusive
065     */
066    public static Vector<HTMLNode> getHead(Vector<? extends HTMLNode> html)
067    { return InnerTagGetInclusive.first(html, "head"); }
068
069    /**
070     * Gets all {@code <META NAME="..." CONTENT="...">} (or {@code <META CHARSET="...">}
071     * and {@code <META HTTP-EQUIV="...">}) elements in a web-page header - returned via
072     * their position in the page-{@code Vector}.
073     * 
074     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
075     * 
076     * @return The requested HTML Elements, as an integer-array list of index-pointers to
077     * the underlying {@code Vector}.
078     * 
079     * @see TagNodeFind
080     */
081    public static int[] findMeta(Vector<? extends HTMLNode> html)
082    { return TagNodeFind.all(html, TC.OpeningTags, "meta"); }
083
084    /**
085     * Gets all {@code <META NAME="..." CONTENT="...">} (or {@code <META CHARSET="...">}
086     * and {@code <META HTTP-EQUIV="...">}) elements in a web-page header.
087     * 
088     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
089     * @return The requested HTML Elements, as {@code TagNode's}, in a return {@code Vector}.
090     * @see TagNodeGet
091     */
092    public static Vector<TagNode> getMeta(Vector<? extends HTMLNode> html)
093    { return TagNodeGet.all(html, TC.OpeningTags, "meta"); }
094
095    /**
096     * Gets all {@code <LINK REL="..." HREF="...">} elements in a web-page header - returned 
097     * via their position in the page-{@code Vector}.
098     * 
099     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
100     * 
101     * @return The requested HTML Elements, as an integer-array list of index-pointers to
102     * the underlying {@code Vector}.
103     * 
104     * @see TagNodeFind
105     */
106    public static int[] findLink(Vector<? extends HTMLNode> html)
107    { return TagNodeFind.all(html, TC.OpeningTags, "link"); }
108
109    /**
110     * Gets all {@code <LINK REL="..." HREF="...">} elements in a web-page header.
111     * 
112     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
113     * @return The requested HTML Elements, as {@code TagNode's}, in a return {@code Vector}.
114     * @see TagNodeGet
115     */
116    public static Vector<TagNode> getLink(Vector<? extends HTMLNode> html)
117    { return TagNodeGet.all(html, TC.OpeningTags, "link"); }
118
119    /**
120     * Returns the start and end positions in the page-{@code Vector} of the HTML
121     * {@code <TITLE>...</TITLE>} elements.
122     * 
123     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
124     * 
125     * @return The start and end index pointers, as a {@code DotPair}, of the HTML
126     * requested HTML sublist.
127     * 
128     * @see InnerTagFindInclusive
129     */
130    public static DotPair findTitle(Vector<? extends HTMLNode> html)
131    { return TagNodeFindInclusive.first(html, "title"); }
132
133    /**
134     * Returns the {@code <TITLE>...</TITLE>} elements sub-list from the HTML page-{@code Vector}.
135     * 
136     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
137     * @return The requested HTML sublist, as a {@code Vector}.
138     * @see InnerTagGetInclusive
139     */
140    public static Vector<HTMLNode> getTitle(Vector<? extends HTMLNode> html)
141    { return TagNodeGetInclusive.first(html, "title"); }
142
143    /**
144     * Returns the {@code String} encapsulated by the HTML {@code 'HEAD'}-section's
145     * {@code "<TITLE>...</TITLE>"} element, if there such an element.  If there is no such
146     * element, null is returned.  If there is a {@code 'TITLE'} element, but it has the 
147     * empty-{@code String} (zero-length-string) an empty {@code String} is returned.
148     * 
149     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
150     * Retrieves the {@code 'TITLE'} of an HTML page - by getting the {@code String}-text between
151     * the {@code 'TITLE'} elements.
152     * 
153     * @return The title string
154     */ 
155    public static String titleString(Vector<? extends HTMLNode> html)
156    {
157        Vector<HTMLNode> title = getTitle(html);
158
159        if (title == null) return null;
160        
161        return Util.textNodesString(title);
162    }
163
164    /**
165     * This method will find the very first HTML {@code 'TABLE'}
166     * (<CODE>&lt;TABLE&gt; &lt;TH&gt;...&lt;/TH&gt; &lt;TR&gt; &lt;TD&gt;..&lt;/TD&gt; ...
167     * &lt;/TR&gt; ... &lt;/TABLE&gt;</CODE>) element set.  This returns the {@code Vector}
168     * Position starting and ending boundaries {@code DotPair.start, DotPair.end} rather than
169     * pointer-references to the nodes.  This is what the <B>{@code 'FIND'}</B> keyword usually
170     * means in this HTML-Scrape package.
171     * 
172     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
173     * 
174     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested HTML
175     * sublist.
176     * 
177     * @see TagNodeFindInclusive
178     */
179    public static DotPair findTable(Vector<? extends HTMLNode> html)
180    { return TagNodeFindInclusive.first(html, "table"); }
181
182    /**
183     * This method will find the very first HTML {@code 'TABLE'}
184     * (<CODE>&lt;TABLE&gt; &lt;TH&gt;...&lt;/TH&gt; &lt;TR&gt; &lt;TD&gt;..&lt;/TD&gt; ...
185     * &lt;/TR&gt; ... &lt;/TABLE&gt;</CODE>) element set. This returns the {@code Vector} Position
186     * starting and ending boundaries {@code DotPair.start, DotPair.end} rather than
187     * pointer-references to the nodes.  This is what the <B>{@code 'FIND'}</B> keyword usually
188     * means in this HTML-Scrape package.
189     * 
190     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
191     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
192     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
193     * 
194     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested HTML
195     * sublist.
196     * 
197     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
198     * @see TagNodeFindInclusive
199     */
200    public static DotPair findTable(Vector<? extends HTMLNode> html, int sPos, int ePos)
201    { return TagNodeFindInclusive.first(html, sPos, ePos, "table"); }
202
203    /**
204     * This method will get the very first HTML {@code 'TABLE'}
205     * (<CODE>&lt;TABLE&gt; &lt;TR&gt; &lt;TH&gt;...&lt;/TH&gt; &lt;/TR&gt; &lt;TR&gt;
206     * &lt;TD&gt;..&lt;/TD&gt; ... &lt;/TR&gt; ... &lt;/TABLE&gt;</CODE>) element set.  This
207     * returns a sub-{@code Vector} (an actual {@code Vector<HTMLNode>} object, not a {@code Vector
208     * / array} starting and ending indices pair). This is what the <B>{@code 'GET'}</B> keyword
209     * usually means in this HTML-Scrape package.
210     * 
211     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
212     * @return The requested HTML sublist, as a {@code Vector}.
213     * @see TagNodeGetInclusive
214     */
215    public static Vector<HTMLNode> getTable(Vector<? extends HTMLNode> html)
216    { return TagNodeGetInclusive.first(html, "table"); }
217
218    /**
219     * This method will get the very first HTML {@code 'TABLE'}
220     * (<CODE>&lt;TABLE&gt; &lt;TH&gt;...&lt;/TH&gt; &lt;TR&gt; &lt;TD&gt;..&lt;/TD&gt; ...
221     * &lt;/TR&gt; ... &lt;/TABLE&gt;</CODE>) element set.  This returns a sub-vector (an actual
222     * {@code Vector<HTMLNode>} object, not a {@code Vector / array} starting and ending indices
223     * pair). This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape
224     * package.
225     * 
226     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
227     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
228     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
229     * @return The requested HTML sublist, as a {@code Vector}.
230     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
231     * @see TagNodeGetInclusive
232     */
233    public static Vector<HTMLNode>  getTable(Vector<? extends HTMLNode> html, int sPos, int ePos)
234    { return TagNodeGetInclusive.first(html, sPos, ePos, "table"); }
235
236
237
238
239
240
241
242    /**
243     * This method will find the very first first HTML {@code 'SELECT-OPTION'} set.
244     * (<CODE>&lt;SELECT&gt; ... &lt;OPTION&gt; ... &lt;/OPTION&gt; .. &lt;/SELECT&gt;</CODE>)
245     * element set.  This returns the {@code Vector} Position starting and ending boundaries
246     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
247     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
248     * 
249     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
250     * 
251     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
252     * HTML sublist.
253     * 
254     * @see TagNodeFindInclusive
255     */
256    public static DotPair findSelect(Vector<? extends HTMLNode> html)
257    { return TagNodeFindInclusive.first(html, "select"); }
258
259    /**
260     * This method will find the very first first HTML {@code 'SELECT-OPTION'} set.
261     * (<CODE>&lt;SELECT&gt; ... &lt;OPTION&gt; ... &lt;/OPTION&gt; .. &lt;/SELECT&gt;</CODE>)
262     * element set.  This returns the {@code Vector} Position starting and ending boundaries
263     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
264     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
265     * 
266     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
267     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
268     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
269     * 
270     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
271     * HTML sublist.
272     * 
273     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
274     * 
275     * @see TagNodeFindInclusive
276     */
277    public static DotPair findSelect(Vector<? extends HTMLNode> html, int sPos, int ePos)
278    { return TagNodeFindInclusive.first(html, sPos, ePos, "select"); }
279
280    /**
281     * This method will find the very first first HTML {@code 'SELECT-OPTION'} set.
282     * (<CODE>&lt;SELECT&gt; ... &lt;OPTION&gt; ... &lt;/OPTION&gt; .. &lt;/SELECT&gt;</CODE>)
283     * element set.  This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not
284     * a {@code Vector / array} starting and ending indices pair.)  This is what the 
285     * <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
286     * 
287     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
288     * @return The requested HTML sublist, as a {@code Vector}.
289     * @see TagNodeGetInclusive
290     */
291    public static Vector<HTMLNode> getSelect(Vector<? extends HTMLNode> html)
292    { return TagNodeGetInclusive.first(html, "select"); }
293
294    /**
295     * This method will find the very first first HTML {@code 'SELECT-OPTION'} set.
296     * (<CODE>&lt;SELECT&gt; ... &lt;OPTION&gt; ... &lt;/OPTION&gt; .. &lt;/SELECT&gt;</CODE>)
297     * element set.  This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not
298     * a {@code Vector / array} starting and ending indices pair).  This is what the
299     * <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
300     * 
301     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
302     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
303     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
304     * @return The requested HTML sublist, as a {@code Vector}.
305     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
306     * @see TagNodeGetInclusive
307     */
308    public static Vector<HTMLNode> getSelect(Vector<? extends HTMLNode> html, int sPos, int ePos)
309    { return TagNodeGetInclusive.first(html, sPos, ePos, "select"); }
310    
311    
312    
313    
314
315
316
317    /**
318     * This method will find the very first HTML Un-Ordered List
319     * (<CODE>&lt;UL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/UL&gt;</CODE>) element set.
320     * This returns the {@code Vector} Position starting and ending boundaries
321     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
322     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
323     * 
324     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
325     * 
326     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
327     * HTML sublist.
328     * 
329     * @see TagNodeFindInclusive
330     */
331    public static DotPair findUL(Vector<? extends HTMLNode> html)
332    { return TagNodeFindInclusive.first(html, "ul"); }
333
334    /**
335     * This method will find the very first HTML Un-Ordered List
336     * (<CODE>&lt;UL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/UL&gt;</CODE>) element set.
337     * This returns the {@code Vector} Position starting and ending boundaries
338     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
339     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
340     * 
341     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
342     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
343     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
344     * 
345     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested HTML
346     * sublist.
347     * 
348     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
349     * @see TagNodeFindInclusive
350     */
351    public static DotPair findUL(Vector<? extends HTMLNode> html, int sPos, int ePos)
352    { return TagNodeFindInclusive.first(html, sPos, ePos, "ul"); }
353
354    /**
355     * This method will find the very first HTML Un-Ordered List
356     * (<CODE>&lt;UL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/UL&gt;</CODE>) element set.
357     * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a
358     * {@code Vector / array} starting and ending indices pair).
359     * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
360     * 
361     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
362     * @return The requested HTML sublist, as a {@code Vector}.
363     * @see TagNodeGetInclusive
364     */
365    public static Vector<HTMLNode> getUL(Vector<? extends HTMLNode> html)
366    { return TagNodeGetInclusive.first(html, "ul"); }
367
368    /**
369     * This method will find the very first HTML Un-Ordered List
370     * (<CODE>&lt;UL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/UL&gt;</CODE>) element set.
371     * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a
372     *  {@code Vector / array} starting and ending indices pair).
373     * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
374     * 
375     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
376     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
377     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
378     * @return The requested HTML sublist, as a {@code Vector}.
379     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
380     * @see TagNodeGetInclusive
381     */
382    public static Vector<HTMLNode> getUL(Vector<? extends HTMLNode> html, int sPos, int ePos)
383    { return TagNodeGetInclusive.first(html, sPos, ePos, "ul"); }
384
385
386
387
388
389
390
391    /**
392     * This method will find the very first HTML Un-Ordered List
393     * (<CODE>&lt;OL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/OL&gt;</CODE>) element set.
394     * This returns the {@code Vector} Position starting and ending boundaries
395     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
396     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
397     * 
398     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
399     * 
400     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
401     * HTML sublist.
402     * 
403     * @see TagNodeFindInclusive
404     */
405    public static DotPair findOL(Vector<? extends HTMLNode> html)
406    { return TagNodeFindInclusive.first(html, "ol"); }
407    
408    /**
409     * This method will find the very first HTML Un-Ordered List
410     * (<CODE>&lt;OL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/OL&gt;</CODE>) element set.
411     * This returns the {@code Vector} Position starting and ending boundaries
412     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This
413     * is what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
414     * 
415     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
416     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
417     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
418     * 
419     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
420     * HTML sublist.
421     * 
422     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
423     * @see TagNodeFindInclusive
424     */
425    public static DotPair findOL(Vector<? extends HTMLNode> html, int sPos, int ePos)
426    { return TagNodeFindInclusive.first(html, sPos, ePos, "ol"); }
427
428    /**
429     * This method will find the very first HTML Un-Ordered List
430     * (<CODE>&lt;OL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/OL&gt;</CODE>) element set.
431     * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a 
432     * {@code Vector / array} starting and ending indices pair).
433     * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
434     * 
435     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
436     * @return The requested HTML sublist, as a {@code Vector}.
437     * @see TagNodeGetInclusive
438     */
439    public static Vector<HTMLNode> getOL(Vector<? extends HTMLNode> html)
440    { return TagNodeGetInclusive.first(html, "ol"); }
441
442    /**
443     * This method will find the very first HTML Un-Ordered List
444     * (<CODE>&lt;OL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/OL&gt;</CODE>) element set.
445     * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a
446     * {@code Vector / array} starting and ending indices pair).
447     * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
448     * 
449     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
450     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
451     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
452     * @return The requested HTML sublist, as a {@code Vector}.
453     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
454     * @see TagNodeGetInclusive
455     */
456    public static Vector<HTMLNode> getOL(Vector<? extends HTMLNode> html, int sPos, int ePos)
457    { return TagNodeGetInclusive.first(html, sPos, ePos, "ol"); }
458
459
460
461
462
463    /**
464     * This will use the "L1 Inclusive" concept defined in this HTML package to provide a list
465     * (returned using the type: {@code java.util.Vector<DotPair>}) of each element that fits the
466     * <CODE>&lt;OPTION&gt; ... &lt;/OPTION&gt;</CODE> HTML "select-option element" structure.
467     * 
468     * @param selectList An HTML list of {@code TagNode's} and {@code TextNode's} that constitute
469     * an selection-option drop-down menu.  This list cannot contain extraneous {@code TagNode's} 
470     * or {@code TextNode's}, but rather, must begin and end with the open and close "select"
471     * HTML drop-down menu Tags.
472     * 
473     * @return A <I>"list of lists"</I> - specifically, a list of <B>{@code Torello.HTML.DotPair
474     * }</B>, each of which delineate a complete {@code <OPTION> ... </OPTION>} sub-list that are
475     * present within this HTML "select" drop-down-menu structure.
476     * 
477     * @throws MalformedHTMLException This method in no way performs a complete evaluation of the
478     * HTML structure provided by the user in the <B>{@code Vector<? extends HTMLNode> list}
479     * parameter </B> that is passed.  However rules that are related to the HTML
480     * elements "Select Option" {@code <SELECT>...<OPTION> ... </OPTION> ... </SELECT>} are
481     * inspected.
482     * 
483     * <BR /><BR /><UL CLASS=JDUL>
484     * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B> exact HTML
485     *      elements</B> - {@code <SELECT>, </SELECT>} </I>, then this exception is thrown.
486     * </LI>
487     * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous text"
488     *      in between the {@code <OPTION> ... </OPTION> or <SELECT> ... </SELECT>} list-start 
489     *      and list-end demarcated HTML TagNodes, then the
490     *      {@code Torello.HTML.MalformedHTMLException } will, again, be thrown
491     * </LI>
492     * </UL>
493     * 
494     * @see #checkEndPoints(Vector, String[])
495     * @see #checkL1(Vector, Vector)
496     * @see TagNodeFindL1Inclusive
497     */
498    public static Vector<DotPair> findAllOption
499        (Vector<? extends HTMLNode> selectList) throws MalformedHTMLException
500    {
501        checkEndPoints(selectList, "select");
502
503        Vector<DotPair> ret = TagNodeFindL1Inclusive.all(selectList, "option");
504
505        checkL1(selectList, ret);
506
507        return ret;
508    }
509
510    /**
511     * This does the exact same thing as {@code findAllOption(Vector)} but the returned value is
512     * converted from "sublist endpoints" (a vector of start/end pairs), and into a "List of 
513     * Sub-Lists", which is specifically a list {@code (java.util.Vector<>)} containing sub-lists
514     * (also: {@code java.util.Vector<HTMLNode>})
515     *
516     * <BR /><BR /><B>NOTE:</B> All of the rules and conditions explained in the comments for
517     * method <B>{@code findAllOption(Vector)}</B> apply to this method as well.
518     * 
519     * @param selectList An HTML list of {@code TagNode's} and {@code TextNode's} that constitute
520     * an selection-option drop-down menu.
521     * This list cannot contain extraneous {@code TagNode's} or {@code TextNode's}, but rather,
522     * must begin and end with the open and close "select" HTML drop-down menu Tags.
523     * 
524     * @return A <I>"list of lists"</I> - specifically, a list of
525     * <B>{@code java.util.Vector<HTMLNode>} (sublists)</B>, each of which delineate
526     * a complete {@code <OPTION> ... </OPTION>} sub-list that are present within this HTML
527     * "select" drop-down-menu structure.
528     * 
529     * @throws MalformedHTMLException This method in no way performs a complete evaluation of the
530     * HTML structure provided by the user in the <B>{@code Vector<? extends HTMLNode> list} 
531     * parameter </B> that is passed.  However rules that are related to the HTML
532     * elements "Select Option" {@code <SELECT>...<OPTION> ... </OPTION> ... </SELECT>} are
533     * inspected.
534     *
535     * <BR ><BR /><UL CLASS=JDUL>
536     * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B> exact HTML 
537     *      elements</B> - {@code <SELECT>, </SELECT>}</I>, then this exception is thrown.
538     * </LI>
539     * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous
540     *      text" in between the {@code <OPTION> ... </OPTION> or <SELECT> ... </SELECT>}
541     *      list-start and list-end demarcated HTML TagNodes, then the
542     *      {@code Torello.HTML.MalformedHTMLException } will, again, be thrown
543     * </LI>
544     * </UL>
545     * 
546     * @see DPUtil#toVectors(Vector, Iterable)
547     */
548    public static Vector<Vector<HTMLNode>> getAllOption
549        (Vector<? extends HTMLNode> selectList) throws MalformedHTMLException
550    { return DPUtil.toVectors(selectList, findAllOption(selectList)); }
551
552
553
554
555
556
557
558
559
560
561    /**
562     * This will use the "L1 Inclusive" concept defined in this HTML package to provide a list
563     * (returned using the type:
564     * {@code java.util.Vector<DotPair>}) of each element that fits the
565     * <CODE>&lt;LI&gt; ... &lt;/LI&gt;</CODE> HTML "list element" structure.
566     * 
567     * @param list An HTML list of {@code TagNode's} and {@code TextNode's} that constitute an 
568     * ordered or unordered list.  This list cannot contain
569     * extraneous {@code TagNode's} or {@code TextNode's}, but rather, must begin and end with
570     * the open and close list Tags.
571     * 
572     * @return A <I>"list of lists"</I> - specifically, a list of
573     * <B>{@code Torello.HTML.DotPair}</B>, each of which delineate a complete {@code <LI> ...
574     * </LI>} sub-list that are present within this HTML list structure.
575     * 
576     * @throws MalformedHTMLException This method in no way performs a complete evaluation of the
577     * HTML structure provided by the user in the <B>{@code Vector<? extends HTMLNode> list}
578     * parameter </B> that is passed.  However rules that are related to the HTML elements
579     * "Ordered List" {@code <OL>...</OL>} and "unordered list" {@code <UL>...</UL>} are
580     * inspected.
581     * 
582     * <BR /><BR /><UL CLASS=JDUL>
583     * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B>same HTML
584     *      elements</B> - specifically {@code <OL>, <UL>} </I>, then this exception is thrown.
585     * </LI>
586     * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous text"
587     *      in between the {@code <OL> or <UL> ... </OL> or </UL>} list-start and list-end 
588     *      demarcated HTML TagNodes, then the {@code Torello.HTML.MalformedHTMLException }
589     *      will, again, be thrown
590     * </LI>
591     * </UL>
592     * 
593     * @see #checkEndPoints(Vector, String[])
594     * @see #checkL1(Vector, Vector)
595     * @see TagNodeFindL1Inclusive
596     */
597    public static Vector<DotPair> findAllLI(Vector<? extends HTMLNode> list)
598        throws MalformedHTMLException
599    {
600        checkEndPoints(list, "ol", "ul");
601
602        Vector<DotPair> ret = TagNodeFindL1Inclusive.all(list, "li");
603
604        checkL1(list, ret);
605
606        return ret;
607    }
608
609    /**
610     * This does the exact same thing as {@code findAllLI(Vector)} but the returned value is
611     * converted from "sublist endpoints" (a vector of start/end pairs), and into a "List of
612     * Sub-Lists", which is specifically a list {@code (java.util.Vector<>)} containing sub-lists
613     * (also: {@code java.util.Vector<HTMLNode>})
614     * 
615     * <BR /><BR /><B>NOTE:</B> All of the rules and conditions explained in the comments for
616     * method <B>{@code findAllLI(Vector)}</B> apply to this method as well.
617     * 
618     * @param list An HTML list of {@code TagNode's} and {@code TextNode's} that constitute an
619     * ordered or unordered list.  This list cannot contain extraneous {@code TagNode's} or
620     * {@code TextNode's}, but rather, must begin and end with the open and close list Tags.
621     * 
622     * @return A <I>"list of lists"</I> - specifically, a list of
623     * <B>{@code java.util.Vector<HTMLNode>} (sublists)</B>, each of which delineate
624     * a complete &lt;UL&gt;...&lt;/UL&gt; sub-list that are present within this HTML list
625     * structure.
626     * 
627     * @throws MalformedHTMLException This method in no way performs a complete evaluation of the
628     * HTML structure provided by the 
629     * user in the <B>{@code Vector<? extends HTMLNode> list} parameter </B> that is passed.
630     * However rules that are related to the HTML elements "Ordered List"
631     * (<CODE>&lt;OL&gt;...&lt;/OL&gt;</CODE>) and "unordered list"
632     * (<CODE>&lt;UL&gt;...&lt;/UL&gt;</CODE>) are inspected.
633     *
634     * <BR /><BR /><UL CLASS=JDUL>
635     * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B>same HTML
636     *      elements</B> - specifically {@code <OL>, <UL>} </I>, then this exception is thrown.
637     * </LI>
638     * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous text"
639     *      in between the {@code <OL> or <UL> ... </OL> or </UL>} list-start and list-end
640     *      demarcated HTML {@code TagNode's}, then the {@code Torello.HTML.MalformedHTMLException}
641     *      will, again, be thrown.
642     * </LI>
643     * </UL>
644     * 
645     * @see DPUtil#toVectors(Vector, Iterable)
646     */
647    public static Vector<Vector<HTMLNode>> getAllLI
648        (Vector<? extends HTMLNode> list) throws MalformedHTMLException
649    { return DPUtil.toVectors(list, findAllLI(list)); }
650
651
652
653
654
655    /**
656     * This method is used to guarantee precisely two conditions to the passed HTML Tag list.
657     *
658     * <BR /><BR /><UL CLASS=JDUL>
659     * <LI> <B>Condition 1:</B> The {@code Vector<HTMLNode> list } parameter begins and ends with
660     *      the <I>exact same HTML Tag</I>, (for instance: {@code <H1> ... </H1>}, or perhaps
661     *      {@code <LI> ... </LI> })
662     * </LI>
663     * <LI> <B>Condition 2:</B> The HTML-Tag that is found at the start and end of this list is one
664     *      contained within the {@code 'tokList'} variable-length {@code String-array} parameter.
665     *      (if the {@code 'tokList'} parameter was a {@code java.lang.String[] tokList = { "th",
666     *      "tr" }}, then the passed "HTMLNode list" ({@code Vector}) parameter would have to begin
667     *      and end with either: {@code <TH> ... </TH> } or with {@code <TR> ... </TR> }
668     * </LI>
669     * </UL>
670     *
671     * <BR />Much of the java code in this method is used to provide some explanatory Exception
672     * message information.
673     * 
674     * @param list This is supposed to be a typical "open" and "close" HTML TagNode structure.  It
675     * may be anything including:
676     * <SPAN STYLE="color: green;">{@code <DIV ID="..."> ... </DIV> }, or
677     * {@code <TABLE ...> ... </TABLE> }, or even {@code <BODY> ... </BODY> }
678     * </SPAN>
679     * 
680     * @param tokList This is expected to be the possible set of tokens with which this HTML list
681     * may begin or end with.
682     * 
683     * @return If the passed list parameter passes both the conditions specified above, then the
684     * token from the list of tokens that were provided is returned.
685     * 
686     * <BR /><BR /><B>NOTE:</B> If the list does not meet these conditions, a
687     * {@code Torello.HTML.MalformedHTMLException } will be thrown with an
688     * explanatory exception-message (and, obviously, the method will not return anything!)
689     * 
690     * @throws MalformedHTMLException Some explanatory information is provided to the coder for
691     * what has failed with the input list.
692     */
693    protected static String checkEndPoints
694        (Vector<? extends HTMLNode> list, String... tokList) throws MalformedHTMLException
695    { return checkEndPoints(list, 0, list.size()-1, tokList); }
696
697    /**
698     * This method, functionally, does the exact same thing as "checkEndPoints" - but with the
699     * endpoints specified.  It is being kept with <B><I>protected</I></B> access since it might
700     * be unclear what endpoints are being checked.  The previous method has many java exception
701     * case strings laboriously typed out.  Rather than retype this, this method is being
702     * introduced. Functionally, it does the same thing as {@code checkEndPoints(Vector, String)}
703     * - except it does not use {@code list.elementAt(0)} or
704     * {@code list.elementAt(element.size()-1)} as the starting and ending points.
705     * 
706     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
707     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
708     * @param tokList The list of valid HTML Element names (tokens).
709     * @see #checkEndPoints(Vector, String[])
710     */
711    protected static String checkEndPoints
712        (Vector<? extends HTMLNode> list, int sPos, int ePos, String... tokList)
713        throws MalformedHTMLException
714    {
715        HTMLNode n = null;      String tok = null;
716        
717        if ((n = list.elementAt(sPos)).isTagNode())
718            tok = ((TagNode) n).tok;
719
720        else throw new MalformedHTMLException(
721            "This list does not begin an HTML TagNode, but rather a: " +
722            n.getClass().getName() + "\n" + n.str
723        );
724        
725        if (! (n = list.elementAt(ePos)).isTagNode())
726
727            throw new MalformedHTMLException(
728                "This list does not end with an HTML TagNode, but rather a : " +
729                n.getClass().getName() + "\n" + n.str
730            );
731
732        if (! ((TagNode) n).tok.equals(tok))
733
734            throw new MalformedHTMLException(
735                "This list does not begin and end with the same HTML TagNode:\n" +
736                "[OpeningTag: " + tok + "]\t[ClosingTag: " + ((TagNode) n).tok + "]"
737            );
738
739        for (String t : tokList) if (t.equals(tok)) return tok;
740
741        String expectedTokList = "";
742
743        for (String t: tokList) expectedTokList += " " + t;
744
745        throw new MalformedHTMLException(
746            "The opening and closing HTML Tag tokens for this list are not members of the " +
747            "tokList parameter set...\n" +
748            "Expected HTML Tag List: " + expectedTokList + "\nFound Tag: " + tok
749        );
750    }
751
752    /**
753     * This checks that the sublists demarcated by the {@code Vector<DotPair> htmlSubLists } 
754     * parameter are properly formatted HTML.  It would be easier to provide an example of 
755     * "proper HTML formatting" and "improper HTML formatting" here, rather that trying to explain
756     * this using English.
757     *
758     * <BR /><BR />
759     * <B>PROPER HTML:</B>
760     * 
761     * <DIV CLASS="HTML">{@code
762     * <UL>
763     *  <LI> This is a list element.</LI>
764     *  <LI> This is another list element.</LI>
765     *  <LI> This list element contains <B><I> extra-tags</I></B> like "bold", "italics", and
766     *       even a <A HREF="http://Torello.Directory">link!</A></LI>
767     * </UL>
768     * }</DIV>
769     *
770     * <BR /><B>IMPROPER HTML:</B>
771     * 
772     * <DIV CLASS="HTML">{@code
773     * <UL>
774     * This text should not be here, and constitutes "malformed HTML"
775     * <LI> This LI element is just fine.</LI>
776     * <A HREF="http://ChineseNewsBoard.com">This link</A> should be between LI elements
777     * <LI> This LI element is also just fine!</LI>
778     * </UL> 
779     * }</DIV>
780     * <BR />In the above two lists, the latter would generate a MalformedHTMLException
781     * 
782     * @throws MalformedHTMLException whenever improper HTML is presented to this function
783     */
784    protected static void checkL1(Vector<? extends HTMLNode> list, Vector<DotPair> sublists)
785        throws MalformedHTMLException
786    { checkL1(list, 0, list.size()-1, sublists); }
787
788    /**
789     * This method, functionally, does the exact same thing as "checkEL1" - but with the endpoints
790     * specified.  It is being kept with <B><I>protected</I></B> access since it might be unclear
791     * what endpoints are being checked.  The previous method has many java exception case 
792     * {@code String's} laboriously typed out.  Rather than retype this, this method is being
793     * introduced.  Functionally, it does the same thing as
794     * {@code checkL1(Vector, String)} - except it does not use {@code list.elementAt(0)}
795     * or {@code list.elementAt(element.size()-1) } as the starting and ending points.
796     * 
797     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
798     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
799     * @see #checkL1(Vector, Vector)
800     */
801    protected static void checkL1
802        (Vector<? extends HTMLNode> list, int sPos, int ePos, Vector<DotPair> sublists)
803        throws MalformedHTMLException
804    {
805        int         last    = sPos;
806        int         t       = ePos - 1;
807        HTMLNode    n       = null;
808
809        for (DotPair sublist : sublists)
810
811            if (sublist.start == (last+1)) last = sublist.end;
812
813            else
814            {
815                if ((sublist.start < (last+1)) || (sublist.start >= t))
816
817                    throw new IllegalArgumentException(
818                        "The provided subLists parameter does not contain subLists that are in " +
819                        "order of the original list.  The 'list of sublists' must contain " +
820                        "sublists that are in increasing sorted order.\n" +
821                        "Specifically, each sublist must contain start and end points that are " +
822                        "sequentially increasing.  Also, they may not overlap."
823                    );
824
825                else
826                {
827                    for (int i=(last+1); i < sublist.start; i++)
828
829                        if ((n = list.elementAt(i)).isTagNode())
830
831                            throw new MalformedHTMLException(
832                                "There is a spurious HTML-Tag element at Vector position: " + i +
833                                "\n=>\t" + n.str
834                            );
835
836                        else if (n.isTextNode() && (n.str.trim().length() > 0))
837
838                            throw new MalformedHTMLException(
839                                "There is a spurious Text-Node element at Vector position: " + i +
840                                "\n=>\t" + n.str
841                            );
842                }
843            }
844    }
845
846}