001package Torello.HTML;
002
003import java.util.*;
004import Torello.HTML.NodeSearch.*;
005
006/**
007 * A simple, demonstrative set of functions for retrieving {@code HTMLNode's} from a web-page
008 * (a 'Workbook Class').
009 * 
010 * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID=ELEMENTS>
011 */
012@Torello.HTML.Tools.JavaDoc.StaticFunctional
013public class Elements
014{
015    private Elements() { }
016
017    /**
018     * Retrieves the start and end points of the web-page body in the underlying HTML 
019     * page-{@code Vector}.
020     * All nodes between {@code <BODY> ... </BODY>} will be included.
021     * 
022     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
023     * 
024     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
025     * HTML sublist.
026     * 
027     * @see InnerTagFindInclusive
028     */
029    public static DotPair findBody(Vector<? extends HTMLNode> html)
030    { return InnerTagFindInclusive.first(html, "body"); }
031
032    /**
033     * Gets the nodes of the web-page body.
034     * All nodes between {@code <BODY> ... </BODY>} will be included.
035     * 
036     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
037     * 
038     * @return The requested HTML sublist, as a {@code Vector}.
039     * 
040     * @see InnerTagGetInclusive
041     */
042    public static Vector<HTMLNode> getBody(Vector<? extends HTMLNode> html)
043    { return InnerTagGetInclusive.first(html, "body"); }
044
045    /**
046     * Retrieves the start and end points of the web-page header in the underlying HTML 
047     * page-{@code Vector}.
048     * All nodes between {@code <HEAD> ... </HEAD>} will be included.
049     * 
050     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
051     * 
052     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested 
053     * HTML sublist.
054     * 
055     * @see InnerTagFindInclusive
056     */
057    public static DotPair findHead(Vector<? extends HTMLNode> html)
058    { return InnerTagFindInclusive.first(html, "head"); }
059
060    /**
061     * Gets the nodes of the web-page header.
062     * All nodes between {@code <HEAD> ... </HEAD>} will be included.
063     * 
064     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
065     * 
066     * @return The requested HTML sublist, as a {@code Vector}.
067     * 
068     * @see InnerTagGetInclusive
069     */
070    public static Vector<HTMLNode> getHead(Vector<? extends HTMLNode> html)
071    { return InnerTagGetInclusive.first(html, "head"); }
072
073    /**
074     * Gets all {@code <META NAME="..." CONTENT="...">} (or {@code <META CHARSET="...">}
075     * and {@code <META HTTP-EQUIV="...">}) elements in a web-page header - returned via
076     * their position in the page-{@code Vector}.
077     * 
078     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
079     * 
080     * @return The requested HTML Elements, as an integer-array list of index-pointers to
081     * the underlying {@code Vector}.
082     * 
083     * @see TagNodeFind
084     */
085    public static int[] findMeta(Vector<? extends HTMLNode> html)
086    { return TagNodeFind.all(html, TC.OpeningTags, "meta"); }
087
088    /**
089     * Gets all {@code <META NAME="..." CONTENT="...">} (or {@code <META CHARSET="...">}
090     * and {@code <META HTTP-EQUIV="...">}) elements in a web-page header.
091     * 
092     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
093     * 
094     * @return The requested HTML Elements, as {@code TagNode's}, in a return {@code Vector}.
095     * 
096     * @see TagNodeGet
097     */
098    public static Vector<TagNode> getMeta(Vector<? extends HTMLNode> html)
099    { return TagNodeGet.all(html, TC.OpeningTags, "meta"); }
100
101    /**
102     * Gets all {@code <LINK REL="..." HREF="...">} elements in a web-page header - returned 
103     * via their position in the page-{@code Vector}.
104     * 
105     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
106     * 
107     * @return The requested HTML Elements, as an integer-array list of index-pointers to
108     * the underlying {@code Vector}.
109     * 
110     * @see TagNodeFind
111     */
112    public static int[] findLink(Vector<? extends HTMLNode> html)
113    { return TagNodeFind.all(html, TC.OpeningTags, "link"); }
114
115    /**
116     * Gets all {@code <LINK REL="..." HREF="...">} elements in a web-page header.
117     * 
118     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
119     * 
120     * @return The requested HTML Elements, as {@code TagNode's}, in a return {@code Vector}.
121     * 
122     * @see TagNodeGet
123     */
124    public static Vector<TagNode> getLink(Vector<? extends HTMLNode> html)
125    { return TagNodeGet.all(html, TC.OpeningTags, "link"); }
126
127    /**
128     * Returns the start and end positions in the page-{@code Vector} of the HTML
129     * {@code <TITLE>...</TITLE>} elements.
130     * 
131     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
132     * 
133     * @return The start and end index pointers, as a {@code DotPair}, of the HTML
134     * requested HTML sublist.
135     * 
136     * @see InnerTagFindInclusive
137     */
138    public static DotPair findTitle(Vector<? extends HTMLNode> html)
139    { return TagNodeFindInclusive.first(html, "title"); }
140
141    /**
142     * Returns the {@code <TITLE>...</TITLE>} elements sub-list from the HTML page-{@code Vector}.
143     * 
144     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
145     * 
146     * @return The requested HTML sublist, as a {@code Vector}.
147     * 
148     * @see InnerTagGetInclusive
149     */
150    public static Vector<HTMLNode> getTitle(Vector<? extends HTMLNode> html)
151    { return TagNodeGetInclusive.first(html, "title"); }
152
153    /**
154     * Returns the {@code String} encapsulated by the HTML {@code 'HEAD'}-section's
155     * {@code "<TITLE>...</TITLE>"} element, if there such an element.  If there is no such
156     * element, null is returned.  If there is a {@code 'TITLE'} element, but it has the 
157     * empty-{@code String} (zero-length-string) an empty {@code String} is returned.
158     * 
159     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
160     * Retrieves the {@code 'TITLE'} of an HTML page - by getting the {@code String}-text between
161     * the {@code 'TITLE'} elements.
162     * 
163     * @return The title string
164     */ 
165    public static String titleString(Vector<? extends HTMLNode> html)
166    {
167        Vector<HTMLNode> title = getTitle(html);
168
169        if (title == null) return null;
170        
171        return Util.textNodesString(title);
172    }
173
174    /**
175     * This method will find the very first HTML {@code 'TABLE'}
176     * (<CODE>&lt;TABLE&gt; &lt;TH&gt;...&lt;/TH&gt; &lt;TR&gt; &lt;TD&gt;..&lt;/TD&gt; ...
177     * &lt;/TR&gt; ... &lt;/TABLE&gt;</CODE>) element set.  This returns the {@code Vector}
178     * Position starting and ending boundaries {@code DotPair.start, DotPair.end} rather than
179     * pointer-references to the nodes.  This is what the <B>{@code 'FIND'}</B> keyword usually
180     * means in this HTML-Scrape package.
181     * 
182     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
183     * 
184     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested HTML
185     * sublist.
186     * 
187     * @see TagNodeFindInclusive
188     */
189    public static DotPair findTable(Vector<? extends HTMLNode> html)
190    { return TagNodeFindInclusive.first(html, "table"); }
191
192    /**
193     * This method will find the very first HTML {@code 'TABLE'}
194     * (<CODE>&lt;TABLE&gt; &lt;TH&gt;...&lt;/TH&gt; &lt;TR&gt; &lt;TD&gt;..&lt;/TD&gt; ...
195     * &lt;/TR&gt; ... &lt;/TABLE&gt;</CODE>) element set. This returns the {@code Vector} Position
196     * starting and ending boundaries {@code DotPair.start, DotPair.end} rather than
197     * pointer-references to the nodes.  This is what the <B>{@code 'FIND'}</B> keyword usually
198     * means in this HTML-Scrape package.
199     * 
200     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
201     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
202     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
203     * 
204     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested HTML
205     * sublist.
206     * 
207     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
208     * 
209     * @see TagNodeFindInclusive
210     */
211    public static DotPair findTable(Vector<? extends HTMLNode> html, int sPos, int ePos)
212    { return TagNodeFindInclusive.first(html, sPos, ePos, "table"); }
213
214    /**
215     * This method will get the very first HTML {@code 'TABLE'}
216     * (<CODE>&lt;TABLE&gt; &lt;TR&gt; &lt;TH&gt;...&lt;/TH&gt; &lt;/TR&gt; &lt;TR&gt;
217     * &lt;TD&gt;..&lt;/TD&gt; ... &lt;/TR&gt; ... &lt;/TABLE&gt;</CODE>) element set.  This
218     * returns a sub-{@code Vector} (an actual {@code Vector<HTMLNode>} object, not a {@code Vector
219     * / array} starting and ending indices pair). This is what the <B>{@code 'GET'}</B> keyword
220     * usually means in this HTML-Scrape package.
221     * 
222     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
223     * 
224     * @return The requested HTML sublist, as a {@code Vector}.
225     * 
226     * @see TagNodeGetInclusive
227     */
228    public static Vector<HTMLNode> getTable(Vector<? extends HTMLNode> html)
229    { return TagNodeGetInclusive.first(html, "table"); }
230
231    /**
232     * This method will get the very first HTML {@code 'TABLE'}
233     * (<CODE>&lt;TABLE&gt; &lt;TH&gt;...&lt;/TH&gt; &lt;TR&gt; &lt;TD&gt;..&lt;/TD&gt; ...
234     * &lt;/TR&gt; ... &lt;/TABLE&gt;</CODE>) element set.  This returns a sub-vector (an actual
235     * {@code Vector<HTMLNode>} object, not a {@code Vector / array} starting and ending indices
236     * pair). This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape
237     * package.
238     * 
239     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
240     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
241     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
242     * 
243     * @return The requested HTML sublist, as a {@code Vector}.
244     * 
245     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
246     * 
247     * @see TagNodeGetInclusive
248     */
249    public static Vector<HTMLNode>  getTable(Vector<? extends HTMLNode> html, int sPos, int ePos)
250    { return TagNodeGetInclusive.first(html, sPos, ePos, "table"); }
251
252
253
254
255
256
257
258    /**
259     * This method will find the very first first HTML {@code 'SELECT-OPTION'} set.
260     * (<CODE>&lt;SELECT&gt; ... &lt;OPTION&gt; ... &lt;/OPTION&gt; .. &lt;/SELECT&gt;</CODE>)
261     * element set.  This returns the {@code Vector} Position starting and ending boundaries
262     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
263     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
264     * 
265     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
266     * 
267     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
268     * HTML sublist.
269     * 
270     * @see TagNodeFindInclusive
271     */
272    public static DotPair findSelect(Vector<? extends HTMLNode> html)
273    { return TagNodeFindInclusive.first(html, "select"); }
274
275    /**
276     * This method will find the very first first HTML {@code 'SELECT-OPTION'} set.
277     * (<CODE>&lt;SELECT&gt; ... &lt;OPTION&gt; ... &lt;/OPTION&gt; .. &lt;/SELECT&gt;</CODE>)
278     * element set.  This returns the {@code Vector} Position starting and ending boundaries
279     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
280     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
281     * 
282     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
283     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
284     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
285     * 
286     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
287     * HTML sublist.
288     * 
289     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
290     * 
291     * @see TagNodeFindInclusive
292     */
293    public static DotPair findSelect(Vector<? extends HTMLNode> html, int sPos, int ePos)
294    { return TagNodeFindInclusive.first(html, sPos, ePos, "select"); }
295
296    /**
297     * This method will find the very first first HTML {@code 'SELECT-OPTION'} set.
298     * (<CODE>&lt;SELECT&gt; ... &lt;OPTION&gt; ... &lt;/OPTION&gt; .. &lt;/SELECT&gt;</CODE>)
299     * element set.  This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not
300     * a {@code Vector / array} starting and ending indices pair.)  This is what the 
301     * <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
302     * 
303     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
304     * 
305     * @return The requested HTML sublist, as a {@code Vector}.
306     * 
307     * @see TagNodeGetInclusive
308     */
309    public static Vector<HTMLNode> getSelect(Vector<? extends HTMLNode> html)
310    { return TagNodeGetInclusive.first(html, "select"); }
311
312    /**
313     * This method will find the very first first HTML {@code 'SELECT-OPTION'} set.
314     * (<CODE>&lt;SELECT&gt; ... &lt;OPTION&gt; ... &lt;/OPTION&gt; .. &lt;/SELECT&gt;</CODE>)
315     * element set.  This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not
316     * a {@code Vector / array} starting and ending indices pair).  This is what the
317     * <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
318     * 
319     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
320     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
321     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
322     * 
323     * @return The requested HTML sublist, as a {@code Vector}.
324     * 
325     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
326     * 
327     * @see TagNodeGetInclusive
328     */
329    public static Vector<HTMLNode> getSelect(Vector<? extends HTMLNode> html, int sPos, int ePos)
330    { return TagNodeGetInclusive.first(html, sPos, ePos, "select"); }
331    
332    
333    
334    
335
336
337
338    /**
339     * This method will find the very first HTML Un-Ordered List
340     * (<CODE>&lt;UL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/UL&gt;</CODE>) element set.
341     * This returns the {@code Vector} Position starting and ending boundaries
342     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
343     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
344     * 
345     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
346     * 
347     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
348     * HTML sublist.
349     * 
350     * @see TagNodeFindInclusive
351     */
352    public static DotPair findUL(Vector<? extends HTMLNode> html)
353    { return TagNodeFindInclusive.first(html, "ul"); }
354
355    /**
356     * This method will find the very first HTML Un-Ordered List
357     * (<CODE>&lt;UL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/UL&gt;</CODE>) element set.
358     * This returns the {@code Vector} Position starting and ending boundaries
359     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
360     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
361     * 
362     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
363     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
364     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
365     * 
366     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested HTML
367     * sublist.
368     * 
369     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
370     * 
371     * @see TagNodeFindInclusive
372     */
373    public static DotPair findUL(Vector<? extends HTMLNode> html, int sPos, int ePos)
374    { return TagNodeFindInclusive.first(html, sPos, ePos, "ul"); }
375
376    /**
377     * This method will find the very first HTML Un-Ordered List
378     * (<CODE>&lt;UL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/UL&gt;</CODE>) element set.
379     * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a
380     * {@code Vector / array} starting and ending indices pair).
381     * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
382     * 
383     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
384     * 
385     * @return The requested HTML sublist, as a {@code Vector}.
386     * 
387     * @see TagNodeGetInclusive
388     */
389    public static Vector<HTMLNode> getUL(Vector<? extends HTMLNode> html)
390    { return TagNodeGetInclusive.first(html, "ul"); }
391
392    /**
393     * This method will find the very first HTML Un-Ordered List
394     * (<CODE>&lt;UL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/UL&gt;</CODE>) element set.
395     * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a
396     *  {@code Vector / array} starting and ending indices pair).
397     * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
398     * 
399     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
400     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
401     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
402     * 
403     * @return The requested HTML sublist, as a {@code Vector}.
404     * 
405     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
406     * 
407     * @see TagNodeGetInclusive
408     */
409    public static Vector<HTMLNode> getUL(Vector<? extends HTMLNode> html, int sPos, int ePos)
410    { return TagNodeGetInclusive.first(html, sPos, ePos, "ul"); }
411
412
413
414
415
416
417
418    /**
419     * This method will find the very first HTML Un-Ordered List
420     * (<CODE>&lt;OL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/OL&gt;</CODE>) element set.
421     * This returns the {@code Vector} Position starting and ending boundaries
422     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This is
423     * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
424     * 
425     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
426     * 
427     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
428     * HTML sublist.
429     * 
430     * @see TagNodeFindInclusive
431     */
432    public static DotPair findOL(Vector<? extends HTMLNode> html)
433    { return TagNodeFindInclusive.first(html, "ol"); }
434    
435    /**
436     * This method will find the very first HTML Un-Ordered List
437     * (<CODE>&lt;OL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/OL&gt;</CODE>) element set.
438     * This returns the {@code Vector} Position starting and ending boundaries
439     * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes.  This
440     * is what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package.
441     * 
442     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
443     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
444     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
445     * 
446     * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested
447     * HTML sublist.
448     * 
449     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
450     * 
451     * @see TagNodeFindInclusive
452     */
453    public static DotPair findOL(Vector<? extends HTMLNode> html, int sPos, int ePos)
454    { return TagNodeFindInclusive.first(html, sPos, ePos, "ol"); }
455
456    /**
457     * This method will find the very first HTML Un-Ordered List
458     * (<CODE>&lt;OL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/OL&gt;</CODE>) element set.
459     * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a 
460     * {@code Vector / array} starting and ending indices pair).
461     * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
462     * 
463     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
464     * 
465     * @return The requested HTML sublist, as a {@code Vector}.
466     * 
467     * @see TagNodeGetInclusive
468     */
469    public static Vector<HTMLNode> getOL(Vector<? extends HTMLNode> html)
470    { return TagNodeGetInclusive.first(html, "ol"); }
471
472    /**
473     * This method will find the very first HTML Un-Ordered List
474     * (<CODE>&lt;OL&gt; ..&lt;LI&gt;...&lt;/LI&gt; ... &lt;/OL&gt;</CODE>) element set.
475     * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a
476     * {@code Vector / array} starting and ending indices pair).
477     * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package.
478     * 
479     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
480     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
481     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
482     * 
483     * @return The requested HTML sublist, as a {@code Vector}.
484     * 
485     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
486     * 
487     * @see TagNodeGetInclusive
488     */
489    public static Vector<HTMLNode> getOL(Vector<? extends HTMLNode> html, int sPos, int ePos)
490    { return TagNodeGetInclusive.first(html, sPos, ePos, "ol"); }
491
492
493
494
495
496    /**
497     * This will use the "L1 Inclusive" concept defined in this HTML package to provide a list
498     * (returned using the type: {@code java.util.Vector<DotPair>}) of each element that fits the
499     * <CODE>&lt;OPTION&gt; ... &lt;/OPTION&gt;</CODE> HTML "select-option element" structure.
500     * 
501     * @param selectList An HTML list of {@code TagNode's} and {@code TextNode's} that constitute
502     * an selection-option drop-down menu.  This list cannot contain extraneous {@code TagNode's} 
503     * or {@code TextNode's}, but rather, must begin and end with the open and close "select"
504     * HTML drop-down menu Tags.
505     * 
506     * @return A <I>"list of lists"</I> - specifically, a list of <B>{@code Torello.HTML.DotPair
507     * }</B>, each of which delineate a complete {@code <OPTION> ... </OPTION>} sub-list that are
508     * present within this HTML "select" drop-down-menu structure.
509     * 
510     * @throws MalformedHTMLException This method in no way performs a complete evaluation of the
511     * HTML structure provided by the user in the <B>{@code Vector<? extends HTMLNode> list}
512     * parameter </B> that is passed.  However rules that are related to the HTML
513     * elements "Select Option" {@code <SELECT>...<OPTION> ... </OPTION> ... </SELECT>} are
514     * inspected.
515     * 
516     * <BR /><BR /><UL CLASS="JDUL">
517     * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B> exact HTML
518     *      elements</B> - {@code <SELECT>, </SELECT>} </I>, then this exception is thrown.
519     * </LI>
520     * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous text"
521     *      in between the {@code <OPTION> ... </OPTION> or <SELECT> ... </SELECT>} list-start 
522     *      and list-end demarcated HTML TagNodes, then the
523     *      {@code Torello.HTML.MalformedHTMLException } will, again, be thrown
524     * </LI>
525     * </UL>
526     * 
527     * @see #checkEndPoints(Vector, String[])
528     * @see #checkL1(Vector, Vector)
529     * @see TagNodeFindL1Inclusive
530     */
531    public static Vector<DotPair> findAllOption
532        (Vector<? extends HTMLNode> selectList) throws MalformedHTMLException
533    {
534        checkEndPoints(selectList, "select");
535
536        Vector<DotPair> ret = TagNodeFindL1Inclusive.all(selectList, "option");
537
538        checkL1(selectList, ret);
539
540        return ret;
541    }
542
543    /**
544     * This does the exact same thing as {@code findAllOption(Vector)} but the returned value is
545     * converted from "sublist endpoints" (a vector of start/end pairs), and into a "List of 
546     * Sub-Lists", which is specifically a list {@code (java.util.Vector<>)} containing sub-lists
547     * (also: {@code java.util.Vector<HTMLNode>})
548     *
549     * <BR /><BR /><B>NOTE:</B> All of the rules and conditions explained in the comments for
550     * method <B>{@code findAllOption(Vector)}</B> apply to this method as well.
551     * 
552     * @param selectList An HTML list of {@code TagNode's} and {@code TextNode's} that constitute
553     * an selection-option drop-down menu.
554     * This list cannot contain extraneous {@code TagNode's} or {@code TextNode's}, but rather,
555     * must begin and end with the open and close "select" HTML drop-down menu Tags.
556     * 
557     * @return A <I>"list of lists"</I> - specifically, a list of
558     * <B>{@code java.util.Vector<HTMLNode>} (sublists)</B>, each of which delineate
559     * a complete {@code <OPTION> ... </OPTION>} sub-list that are present within this HTML
560     * "select" drop-down-menu structure.
561     * 
562     * @throws MalformedHTMLException This method in no way performs a complete evaluation of the
563     * HTML structure provided by the user in the <B>{@code Vector<? extends HTMLNode> list} 
564     * parameter </B> that is passed.  However rules that are related to the HTML
565     * elements "Select Option" {@code <SELECT>...<OPTION> ... </OPTION> ... </SELECT>} are
566     * inspected.
567     *
568     * <BR ><BR /><UL CLASS="JDUL">
569     * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B> exact HTML 
570     *      elements</B> - {@code <SELECT>, </SELECT>}</I>, then this exception is thrown.
571     * </LI>
572     * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous
573     *      text" in between the {@code <OPTION> ... </OPTION> or <SELECT> ... </SELECT>}
574     *      list-start and list-end demarcated HTML TagNodes, then the
575     *      {@code Torello.HTML.MalformedHTMLException } will, again, be thrown
576     * </LI>
577     * </UL>
578     * 
579     * @see DotPair#toVectors(Vector, Iterable)
580     */
581    public static Vector<Vector<HTMLNode>> getAllOption
582        (Vector<? extends HTMLNode> selectList) throws MalformedHTMLException
583    { return DotPair.toVectors(selectList, findAllOption(selectList)); }
584
585
586
587
588
589
590
591
592
593
594    /**
595     * This will use the "L1 Inclusive" concept defined in this HTML package to provide a list
596     * (returned using the type:
597     * {@code java.util.Vector<DotPair>}) of each element that fits the
598     * <CODE>&lt;LI&gt; ... &lt;/LI&gt;</CODE> HTML "list element" structure.
599     * 
600     * @param list An HTML list of {@code TagNode's} and {@code TextNode's} that constitute an 
601     * ordered or unordered list.  This list cannot contain
602     * extraneous {@code TagNode's} or {@code TextNode's}, but rather, must begin and end with
603     * the open and close list Tags.
604     * 
605     * @return A <I>"list of lists"</I> - specifically, a list of
606     * <B>{@code Torello.HTML.DotPair}</B>, each of which delineate a complete {@code <LI> ...
607     * </LI>} sub-list that are present within this HTML list structure.
608     * 
609     * @throws MalformedHTMLException This method in no way performs a complete evaluation of the
610     * HTML structure provided by the user in the <B>{@code Vector<? extends HTMLNode> list}
611     * parameter </B> that is passed.  However rules that are related to the HTML elements
612     * "Ordered List" {@code <OL>...</OL>} and "unordered list" {@code <UL>...</UL>} are
613     * inspected.
614     * 
615     * <BR /><BR /><UL CLASS="JDUL">
616     * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B>same HTML
617     *      elements</B> - specifically {@code <OL>, <UL>} </I>, then this exception is thrown.
618     * </LI>
619     * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous text"
620     *      in between the {@code <OL> or <UL> ... </OL> or </UL>} list-start and list-end 
621     *      demarcated HTML TagNodes, then the {@code Torello.HTML.MalformedHTMLException }
622     *      will, again, be thrown
623     * </LI>
624     * </UL>
625     * 
626     * @see #checkEndPoints(Vector, String[])
627     * @see #checkL1(Vector, Vector)
628     * @see TagNodeFindL1Inclusive
629     */
630    public static Vector<DotPair> findAllLI(Vector<? extends HTMLNode> list)
631        throws MalformedHTMLException
632    {
633        checkEndPoints(list, "ol", "ul");
634
635        Vector<DotPair> ret = TagNodeFindL1Inclusive.all(list, "li");
636
637        checkL1(list, ret);
638
639        return ret;
640    }
641
642    /**
643     * This does the exact same thing as {@code findAllLI(Vector)} but the returned value is
644     * converted from "sublist endpoints" (a vector of start/end pairs), and into a "List of
645     * Sub-Lists", which is specifically a list {@code (java.util.Vector<>)} containing sub-lists
646     * (also: {@code java.util.Vector<HTMLNode>})
647     * 
648     * <BR /><BR /><B>NOTE:</B> All of the rules and conditions explained in the comments for
649     * method <B>{@code findAllLI(Vector)}</B> apply to this method as well.
650     * 
651     * @param list An HTML list of {@code TagNode's} and {@code TextNode's} that constitute an
652     * ordered or unordered list.  This list cannot contain extraneous {@code TagNode's} or
653     * {@code TextNode's}, but rather, must begin and end with the open and close list Tags.
654     * 
655     * @return A <I>"list of lists"</I> - specifically, a list of
656     * <B>{@code java.util.Vector<HTMLNode>} (sublists)</B>, each of which delineate
657     * a complete &lt;UL&gt;...&lt;/UL&gt; sub-list that are present within this HTML list
658     * structure.
659     * 
660     * @throws MalformedHTMLException This method in no way performs a complete evaluation of the
661     * HTML structure provided by the 
662     * user in the <B>{@code Vector<? extends HTMLNode> list} parameter </B> that is passed.
663     * However rules that are related to the HTML elements "Ordered List"
664     * (<CODE>&lt;OL&gt;...&lt;/OL&gt;</CODE>) and "unordered list"
665     * (<CODE>&lt;UL&gt;...&lt;/UL&gt;</CODE>) are inspected.
666     *
667     * <BR /><BR /><UL CLASS="JDUL">
668     * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B>same HTML
669     *      elements</B> - specifically {@code <OL>, <UL>} </I>, then this exception is thrown.
670     * </LI>
671     * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous text"
672     *      in between the {@code <OL> or <UL> ... </OL> or </UL>} list-start and list-end
673     *      demarcated HTML {@code TagNode's}, then the {@code Torello.HTML.MalformedHTMLException}
674     *      will, again, be thrown.
675     * </LI>
676     * </UL>
677     * 
678     * @see DotPair#toVectors(Vector, Iterable)
679     */
680    public static Vector<Vector<HTMLNode>> getAllLI
681        (Vector<? extends HTMLNode> list) throws MalformedHTMLException
682    { return DotPair.toVectors(list, findAllLI(list)); }
683
684
685
686
687
688    /**
689     * This method is used to guarantee precisely two conditions to the passed HTML Tag list.
690     *
691     * <BR /><BR /><UL CLASS="JDUL">
692     * <LI> <B>Condition 1:</B> The {@code Vector<HTMLNode> list } parameter begins and ends with
693     *      the <I>exact same HTML Tag</I>, (for instance: {@code <H1> ... </H1>}, or perhaps
694     *      {@code <LI> ... </LI> })
695     * </LI>
696     * <LI> <B>Condition 2:</B> The HTML-Tag that is found at the start and end of this list is one
697     *      contained within the {@code 'tokList'} variable-length {@code String-array} parameter.
698     *      (if the {@code 'tokList'} parameter was a {@code java.lang.String[] tokList = { "th",
699     *      "tr" }}, then the passed "HTMLNode list" ({@code Vector}) parameter would have to begin
700     *      and end with either: {@code <TH> ... </TH> } or with {@code <TR> ... </TR> }
701     * </LI>
702     * </UL>
703     *
704     * <BR />Much of the java code in this method is used to provide some explanatory Exception
705     * message information.
706     * 
707     * @param list This is supposed to be a typical "open" and "close" HTML TagNode structure.  It
708     * may be anything including:
709     * <SPAN STYLE="color: green;">{@code <DIV ID="..."> ... </DIV> }, or
710     * {@code <TABLE ...> ... </TABLE> }, or even {@code <BODY> ... </BODY> }
711     * </SPAN>
712     * 
713     * @param tokList This is expected to be the possible set of tokens with which this HTML list
714     * may begin or end with.
715     * 
716     * @return If the passed list parameter passes both the conditions specified above, then the
717     * token from the list of tokens that were provided is returned.
718     * 
719     * <BR /><BR /><B>NOTE:</B> If the list does not meet these conditions, a
720     * {@code Torello.HTML.MalformedHTMLException } will be thrown with an
721     * explanatory exception-message (and, obviously, the method will not return anything!)
722     * 
723     * @throws MalformedHTMLException Some explanatory information is provided to the coder for
724     * what has failed with the input list.
725     */
726    protected static String checkEndPoints
727        (Vector<? extends HTMLNode> list, String... tokList) throws MalformedHTMLException
728    { return checkEndPoints(list, 0, list.size()-1, tokList); }
729
730    /**
731     * This method, functionally, does the exact same thing as "checkEndPoints" - but with the
732     * endpoints specified.  It is being kept with <B><I>protected</I></B> access since it might
733     * be unclear what endpoints are being checked.  The previous method has many java exception
734     * case strings laboriously typed out.  Rather than retype this, this method is being
735     * introduced. Functionally, it does the same thing as {@code checkEndPoints(Vector, String)}
736     * - except it does not use {@code list.elementAt(0)} or
737     * {@code list.elementAt(element.size()-1)} as the starting and ending points.
738     * 
739     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
740     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
741     * @param tokList The list of valid HTML Element names (tokens).
742     * 
743     * @see #checkEndPoints(Vector, String[])
744     */
745    protected static String checkEndPoints
746        (Vector<? extends HTMLNode> list, int sPos, int ePos, String... tokList)
747        throws MalformedHTMLException
748    {
749        HTMLNode n = null;      String tok = null;
750        
751        if ((n = list.elementAt(sPos)).isTagNode())
752            tok = ((TagNode) n).tok;
753
754        else throw new MalformedHTMLException(
755            "This list does not begin an HTML TagNode, but rather a: " +
756            n.getClass().getName() + "\n" + n.str
757        );
758        
759        if (! (n = list.elementAt(ePos)).isTagNode())
760
761            throw new MalformedHTMLException(
762                "This list does not end with an HTML TagNode, but rather a : " +
763                n.getClass().getName() + "\n" + n.str
764            );
765
766        if (! ((TagNode) n).tok.equals(tok))
767
768            throw new MalformedHTMLException(
769                "This list does not begin and end with the same HTML TagNode:\n" +
770                "[OpeningTag: " + tok + "]\t[ClosingTag: " + ((TagNode) n).tok + "]"
771            );
772
773        for (String t : tokList) if (t.equals(tok)) return tok;
774
775        String expectedTokList = "";
776
777        for (String t: tokList) expectedTokList += " " + t;
778
779        throw new MalformedHTMLException(
780            "The opening and closing HTML Tag tokens for this list are not members of the " +
781            "tokList parameter set...\n" +
782            "Expected HTML Tag List: " + expectedTokList + "\nFound Tag: " + tok
783        );
784    }
785
786    /**
787     * This checks that the sublists demarcated by the {@code Vector<DotPair> htmlSubLists } 
788     * parameter are properly formatted HTML.  It would be easier to provide an example of 
789     * "proper HTML formatting" and "improper HTML formatting" here, rather that trying to explain
790     * this using English.
791     *
792     * <BR /><BR />
793     * <B>PROPER HTML:</B>
794     * 
795     * <DIV CLASS="HTML">{@code
796     * <UL>
797     *  <LI> This is a list element.</LI>
798     *  <LI> This is another list element.</LI>
799     *  <LI> This list element contains <B><I> extra-tags</I></B> like "bold", "italics", and
800     *       even a <A HREF="http://Torello.Directory">link!</A></LI>
801     * </UL>
802     * }</DIV>
803     *
804     * <BR /><B>IMPROPER HTML:</B>
805     * 
806     * <DIV CLASS="HTML">{@code
807     * <UL>
808     * This text should not be here, and constitutes "malformed HTML"
809     * <LI> This LI element is just fine.</LI>
810     * <A HREF="http://ChineseNewsBoard.com">This link</A> should be between LI elements
811     * <LI> This LI element is also just fine!</LI>
812     * </UL> 
813     * }</DIV>
814     * <BR />In the above two lists, the latter would generate a MalformedHTMLException
815     * 
816     * @throws MalformedHTMLException whenever improper HTML is presented to this function
817     */
818    protected static void checkL1(Vector<? extends HTMLNode> list, Vector<DotPair> sublists)
819        throws MalformedHTMLException
820    { checkL1(list, 0, list.size()-1, sublists); }
821
822    /**
823     * This method, functionally, does the exact same thing as "checkEL1" - but with the endpoints
824     * specified.  It is being kept with <B><I>protected</I></B> access since it might be unclear
825     * what endpoints are being checked.  The previous method has many java exception case 
826     * {@code String's} laboriously typed out.  Rather than retype this, this method is being
827     * introduced.  Functionally, it does the same thing as
828     * {@code checkL1(Vector, String)} - except it does not use {@code list.elementAt(0)}
829     * or {@code list.elementAt(element.size()-1) } as the starting and ending points.
830     * 
831     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
832     * 
833     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
834     * 
835     * @see #checkL1(Vector, Vector)
836     */
837    protected static void checkL1
838        (Vector<? extends HTMLNode> list, int sPos, int ePos, Vector<DotPair> sublists)
839        throws MalformedHTMLException
840    {
841        int         last    = sPos;
842        int         t       = ePos - 1;
843        HTMLNode    n       = null;
844
845        for (DotPair sublist : sublists)
846
847            if (sublist.start == (last+1)) last = sublist.end;
848
849            else
850            {
851                if ((sublist.start < (last+1)) || (sublist.start >= t))
852
853                    throw new IllegalArgumentException(
854                        "The provided subLists parameter does not contain subLists that are in " +
855                        "order of the original list.  The 'list of sublists' must contain " +
856                        "sublists that are in increasing sorted order.\n" +
857                        "Specifically, each sublist must contain start and end points that are " +
858                        "sequentially increasing.  Also, they may not overlap."
859                    );
860
861                else
862                {
863                    for (int i=(last+1); i < sublist.start; i++)
864
865                        if ((n = list.elementAt(i)).isTagNode())
866
867                            throw new MalformedHTMLException(
868                                "There is a spurious HTML-Tag element at Vector position: " + i +
869                                "\n=>\t" + n.str
870                            );
871
872                        else if (n.isTextNode() && (n.str.trim().length() > 0))
873
874                            throw new MalformedHTMLException(
875                                "There is a spurious Text-Node element at Vector position: " + i +
876                                "\n=>\t" + n.str
877                            );
878                }
879            }
880    }
881
882}