001package Torello.HTML;
002
003import java.util.*;
004import java.util.regex.*;
005import java.util.stream.*;
006
007import java.util.function.Predicate;
008
009import Torello.HTML.NodeSearch.*;
010import Torello.Java.*;
011
012/**
013 * A long list of utilities for searching, finding, extracting and removing HTML from 
014 * Vectorized-HTML.
015 * 
016 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=UTIL>
017 */
018@Torello.JavaDoc.StaticFunctional
019public class Util
020{
021    private Util() { }
022
023
024    // ********************************************************************************************
025    // ********************************************************************************************
026    // Trim TextNode Strings
027    // ********************************************************************************************
028    // ********************************************************************************************
029
030
031    /**
032     * Convenience Method.
033     * <BR />Invokes: {@link #trimTextNodes(Vector, int, int, boolean)}
034     */
035    public static int trimTextNodes(Vector<HTMLNode> page, boolean deleteZeroLengthStrings)
036    { return trimTextNodes(page, 0, -1, deleteZeroLengthStrings); }
037
038    /**
039     * Convenience Method.
040     * <BR />Receives: {@code DotPair}
041     * <BR />Invokes: {@link #trimTextNodes(Vector, int, int, boolean)}
042     */
043    public static int trimTextNodes
044        (Vector<HTMLNode> page, DotPair dp, boolean deleteZeroLengthStrings)
045    { return trimTextNodes(page, dp.start, dp.end + 1, deleteZeroLengthStrings); }
046
047    /**
048     * This will iterate through the entire {@code Vector<HTMLNode>}, and invoke
049     * {@code java.lang.String.trim()} on each {@code TextNode} on the page.  If this invocation
050     * results in a reduction of {@code String.length()}, then a new {@code TextNode} will be
051     * instantiated whose {@code TextNode.str} field is set to the result of the
052     * {@code String.trim(old_node.str)} operation.
053     * 
054     * @param deleteZeroLengthStrings If a {@code TextNode's} length is zero (before or after
055     * {@code trim()} is called) and when this parameter is {@code TRUE}, that {@code TextNode}
056     * must be removed from the {@code Vector}.
057     * 
058     * @return Any node that is trimmed or deleted will increment the counter.  This counter
059     * final-value is returned
060     */
061    public static int trimTextNodes
062        (Vector<HTMLNode> page, int sPos, int ePos, boolean deleteZeroLengthStrings)
063    {
064        int                 counter = 0;
065        IntStream.Builder   b       = deleteZeroLengthStrings ? IntStream.builder() : null;
066        HTMLNode            n       = null;
067        LV                  l       = new LV(page, sPos, ePos);
068
069        for (int i=l.start; i < l.end; i++)
070
071            if ((n = page.elementAt(i)).isTextNode())
072            {
073                String  trimmed         = n.str.trim();
074                int     trimmedLength   = trimmed.length();
075
076                if ((trimmedLength == 0) && deleteZeroLengthStrings)
077                    { b.add(i); counter++; }
078
079                else if (trimmedLength < n.str.length())
080                    { page.setElementAt(new TextNode(trimmed), i); counter++; }
081            }
082
083        if (deleteZeroLengthStrings) Util.Remove.nodesOPT(page, b.build().toArray());
084
085        return counter;
086    }
087
088
089    // ********************************************************************************************
090    // ********************************************************************************************
091    // Vectorized-HTML To-String Methods
092    // ********************************************************************************************
093    // ********************************************************************************************
094
095
096    /** 
097     * Convenience Method.
098     * <BR />Invokes: {@link #rangeToString(Vector, int, int)}
099     */
100    public static String pageToString(Vector<? extends HTMLNode> html)
101    { return rangeToString(html, 0, -1); }
102
103    /**
104     * Convenience Method.
105     * <BR />Receives: {@code DotPair}
106     * <BR />Invokes: {@link #rangeToString(Vector, int, int)}
107     */
108    public static String rangeToString(Vector<? extends HTMLNode> html, DotPair dp)
109    { return rangeToString(html, dp.start, dp.end + 1); }
110
111    /**
112     * The purpose of this method/function is to convert a portion of the contents of an HTML-Page,
113     * currently being represented as a {@code Vector} of {@code HTMLNode's} into a {@code String.}
114     * Two {@code 'int'} parameters are provided in this method's signature to define a sub-list
115     * of a page to be converted to a {@code java.lang.String}
116     * 
117     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
118     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
119     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
120     * 
121     * @return The {@code Vector} converted into a {@code String}.
122     * 
123     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
124     * 
125     * @see #pageToString(Vector)
126     * @see #rangeToString(Vector, DotPair)
127     */
128    public static String rangeToString(Vector<? extends HTMLNode> html, int sPos, int ePos)
129    {
130        StringBuilder   ret = new StringBuilder();
131        LV              l   = new LV(html, sPos, ePos);
132
133        for (int i=l.start; i < l.end; i++) ret.append(html.elementAt(i).str);
134
135        return ret.toString();
136    }
137
138
139    // ********************************************************************************************
140    // ********************************************************************************************
141    // Vectorized-HTML TextNode To-String Methods
142    // ********************************************************************************************
143    // ********************************************************************************************
144
145
146    /**
147     * Convenience Method.
148     * <BR />Invokes: {@link #textNodesString(Vector, int, int)}
149     */
150    public static String textNodesString(Vector<? extends HTMLNode> html)
151    { return textNodesString(html, 0, -1); }
152
153    /**
154     * Convenience Method.
155     * <BR />Receives: {@code DotPair}
156     * <BR />Invokes: {@link #textNodesString(Vector, int, int)}
157     */
158    public static String textNodesString(Vector<? extends HTMLNode> html, DotPair dp)
159    { return textNodesString(html, dp.start, dp.end + 1); }
160
161    /**
162     * This will return a {@code String} that is comprised of ONLY the {@code TextNode's} contained
163     * within the input {@code Vector} - <I>and furthermore, only nodes that are situated between
164     * index {@code int 'sPos'} and index {@code int 'ePos'} in that {@code Vector.}</I>
165     * 
166     * <BR /><BR />The {@code for-loop} that iterates the input-{@code Vector} parameter will
167     * simply skip an instance of {@code 'TagNode'} and {@code 'CommentNode'} when building the
168     * output return {@code String.}.
169     * 
170     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
171     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
172     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
173     * 
174     * @return This will return a {@code String} that is comprised of the text-only elements in the
175     * web-page or sub-page.  Only text between the requested {@code Vector}-indices is included.
176     * 
177     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
178     * 
179     * @see #textNodesString(Vector, DotPair)
180     * @see #textNodesString(Vector)
181     */
182    public static String textNodesString(Vector<? extends HTMLNode> html, int sPos, int ePos)
183    {
184        StringBuilder   sb  = new StringBuilder();
185        LV              l   = new LV(html, sPos, ePos);
186        HTMLNode        n;
187
188        for (int i=l.start; i < l.end; i++)
189            if ((n = html.elementAt(i)).isTextNode())
190                sb.append(n.str);
191
192        return sb.toString();
193    }
194
195
196    // ********************************************************************************************
197    // ********************************************************************************************
198    // TextNode Modification Operations - "Escape Text Nodes"
199    // ********************************************************************************************
200    // ********************************************************************************************
201
202
203    /**
204     * Convenience Method.
205     * <BR />Invokes: {@link #escapeTextNodes(Vector, int, int)}
206     */
207    public static int escapeTextNodes(Vector<HTMLNode> html)
208    { return escapeTextNodes(html, 0, -1); }
209
210    /**
211     * Convenience Method.
212     * <BR />Receives: {@code DotPair} 
213     * <BR />Invokes: {@link #escapeTextNodes(Vector, int, int)}
214     */
215    public static int escapeTextNodes(Vector<HTMLNode> html, DotPair dp)
216    { return escapeTextNodes(html, dp.start, dp.end + 1); }
217
218    /**
219     * Will call {@code HTML.Escape.replaceAll} on each {@code TextNode} in the range of
220     * {@code sPos ... ePos}
221     * 
222     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
223     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
224     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
225     * 
226     * @return The number of {@code TextNode's} that changed as a result of the
227     * {@code Escape.replaceAll(n.str)} loop.
228     * 
229     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
230     * 
231     * @see Escape#replaceAll(String)
232     */
233    public static int escapeTextNodes(Vector<HTMLNode> html, int sPos, int ePos)
234    {
235        LV          l       = new LV(html, sPos, ePos);
236        HTMLNode    n       = null;
237        String      s       = null;
238        int         counter = 0;
239
240        for (int i=l.start; i < l.end; i++)
241
242            if ((n = html.elementAt(i)).isTextNode())
243                if (! (s = Escape.replace(n.str)).equals(n.str))
244                {
245                    html.setElementAt(new TextNode(s), i);
246                    counter++;
247                }
248
249        return counter;
250    }
251
252
253    // ********************************************************************************************
254    // ********************************************************************************************
255    // Clone HTML Vectors
256    // ********************************************************************************************
257    // ********************************************************************************************
258
259
260    /**
261     * Convenience Method.
262     * <BR />Invokes: {@link #cloneRange(Vector, int, int)}
263     */
264    public static Vector<HTMLNode> clone(Vector<? extends HTMLNode> html)
265    { return cloneRange(html, 0, -1); }
266
267    /**
268     * Convenience Method.
269     * <BR />Receives: {@code DotPair}
270     * <BR />Invokes: {@link #cloneRange(Vector, int, int)}
271     */
272    public static Vector<HTMLNode> cloneRange(Vector<? extends HTMLNode> html, DotPair dp)
273    { return cloneRange(html, dp.start, dp.end + 1); }
274
275    /**
276     * Copies (clones!) a sub-range of the HTML page, stores the results in a {@code Vector}, and
277     * returns it.
278     * 
279     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
280     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
281     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
282     * 
283     * @return The "cloned" (copied) sub-range specified by {@code 'sPos'} and {@code 'ePos'.}
284     * 
285     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
286     * 
287     * @see #cloneRange(Vector, DotPair)
288     */
289    public static Vector<HTMLNode> cloneRange(Vector<? extends HTMLNode> html, int sPos, int ePos)
290    {
291        LV                  l   = new LV(html, sPos, ePos);
292        Vector<HTMLNode>    ret = new Vector<>(l.size());
293
294        // Copy the range specified into the return vector
295        //
296        // HOW THIS WAS DONE BEFORE NOTICING Vector.subList
297        //
298        // for (int i = l.start; i < l.end; i++) ret.addElement(html.elementAt(i));
299
300        ret.addAll(html.subList(l.start, l.end));
301
302        return ret;
303    }
304
305
306
307    // ********************************************************************************************
308    // ********************************************************************************************
309    // String Length of the TextNode's
310    // ********************************************************************************************
311    // ********************************************************************************************
312
313
314    /**
315     * Convenience Method.
316     * <BR />Receives: {@code DotPair}
317     * <BR />Invokes: {@link #textStrLength(Vector, int, int)}
318     */
319    public static int textStrLength(Vector<? extends HTMLNode> html, DotPair dp)
320    { return textStrLength(html, dp.start, dp.end + 1); }
321
322    /**
323     * Convenience Method.
324     * <BR />Invokes: {@link #textStrLength(Vector, int, int)}
325     */
326    public static int textStrLength(Vector<? extends HTMLNode> html)
327    { return textStrLength(html, 0, -1); }
328
329    /**
330     * This method will return the length of the strings <I><B>contained by all/only instances of
331     * {@code 'TextNode'}</B></I> among the nodes of the input HTML-{@code Vector}.   This is
332     * identical to the behavior of the method with the same name, but includes starting and ending
333     * bounds on the html {@code Vector}: {@code 'sPos'} &amp; {@code 'ePos'}.
334     * 
335     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
336     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
337     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
338     * 
339     * @return The sum of the lengths of the text contained by text-nodes in the {@code Vector} 
340     * between {@code 'sPos'} and {@code 'ePos'}.
341     * 
342     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
343     */
344    public static int textStrLength(Vector<? extends HTMLNode> html, int sPos, int ePos)
345    {
346        HTMLNode    n;
347        int         sum = 0;
348        LV          l   = new LV(html, sPos, ePos);
349
350        // Counts the length of each "String" in a "TextNode" between sPos and ePos
351        for (int i=l.start; i < l.end; i++)
352
353            if ((n = html.elementAt(i)).isTextNode())
354                sum += n.str.length();
355
356        return sum;
357    }
358
359
360    // ********************************************************************************************
361    // ********************************************************************************************
362    // Compact Adjacent / Adjoining TextNode's
363    // ********************************************************************************************
364    // ********************************************************************************************
365
366
367    /**
368     * Convenience Method.
369     * <BR />Invokes: {@link #compactTextNodes(Vector, int, int)}
370     */
371    public static int compactTextNodes(Vector<HTMLNode> html)
372    { return compactTextNodes(html, 0, html.size()); }
373
374    /**
375     * Convenience Method.
376     * <BR />Receives: {@code DotPair}
377     * <BR />Invokes: {@link #compactTextNodes(Vector, int, int)} 
378     */
379    public static int compactTextNodes(Vector<HTMLNode> html, DotPair dp)
380    { return compactTextNodes(html, dp.start, dp.end + 1); }     
381
382    /**
383     * Occasionally, when removing instances of {@code TagNode} from a vectorized-html 
384     * page, certain instances of {@code TextNode} which were not adjacent / neighbours in
385     * the {@code Vector}, all of a sudden become adjacent.  Although there are no major problems
386     * with contiguous instances of {@code TextNode} from the Search Algorithm's perspective,
387     * for programmer's, it can sometimes be befuddling to realize that the output text that
388     * is returned from a call to {@code Util.pageToString(html)} is not being found because
389     * the text that is left is broken amongst multiple instances of adjacent TextNodes.
390     *
391     * <BR /><BR />This method merely combines "Adjacent" instances of {@code class TextNode}
392     * in the {@code Vector} into single instances of {@code class TextNode}
393     *
394     * @param html Any vectorized-html web-page.  If this page contain any contiguously placed
395     * {@code TextNode's}, the extra's will be eliminated, and the internal-string's inside the
396     * node's ({@code TextNode.str}) will be combined.  This action will reduce the size of the
397     * actual html-{@code Vector}.
398     * 
399     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
400     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
401     * 
402     * @return The number of nodes that were eliminated after being combined, or 0 if there
403     * were no text-nodes that were removed.
404     * 
405     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
406     * 
407     * @see HTMLNode#str
408     * @see TextNode
409     */
410    public static int compactTextNodes(Vector<HTMLNode> html, int sPos, int ePos)
411    {
412        LV      l           = new LV(html, sPos, ePos);
413        boolean compacting  = false;
414        int     firstPos    = -1;
415        int     delta       = 0;
416
417        for (int i=l.start; i < (l.end - delta); i++)
418
419            if (html.elementAt(i).isTextNode())
420            {
421                if (compacting) continue;   // Not in "Compacting Mode"
422                compacting  = true;         // Start "Compacting Mode" - this is a TextNode
423                firstPos    = i;
424            }
425
426            else if (compacting && (firstPos < (i-1)))  // Else - Must be a TagNode or CommentNode
427            {
428                // Save compacted TextNode String's into this StringBuilder
429                StringBuilder compacted = new StringBuilder();
430
431                // Iterate all TextNodes that were adjacent, put them together into StringBuilder
432                for (int j=firstPos; j < i; j++) compacted.append(html.elementAt(j).str);
433
434                // Place this new "aggregate TextNode" at location of the first TextNode that
435                // was compacted into this StringBuilder
436
437                html.setElementAt(new TextNode(compacted.toString()), firstPos);
438
439                // Remove the rest of the positions in the Vector that had TextNode's.  These have
440                // all been put together into the "Aggregate TextNode" at position "firstPos"
441
442                Util.Remove.range(html, firstPos + 1, i);
443
444                // The change in the size of the Vector needs to be accounted for.
445                delta += (i - firstPos - 1);
446
447                // Change the loop-counter variable, too, since the size of the Vector has changed.
448                i = firstPos + 1;
449
450                // Since we just hit a CommentNode, or TagNode, exit "Compacting Mode."
451                compacting = false;
452
453            }
454
455            // NOTE: This, ALSO, MUST BE a TagNode or CommentNode (just like the previous
456            //       if-else branch !)
457            // TRICKY: Don't forget this 'else' !
458
459            else compacting = false;
460
461        // Added - Don't forget the case where the Vector ends with a series of TextNodes
462        // TRICKY TOO! (Same as the HTML Parser... The ending or 'trailing' nodes must be parsed
463
464        int lastNodePos = html.size() - 1;
465
466        if (html.elementAt(lastNodePos).isTextNode()) if (compacting && (firstPos < lastNodePos))
467        {
468            StringBuilder compacted = new StringBuilder();
469
470            // Compact the TextNodes that were identified at the end of the Vector range.
471            for (int j=firstPos; j <= lastNodePos; j++) compacted.append(html.elementAt(j).str);
472
473            // Replace the group of TextNode's at the end of the Vector, with the single, aggregate
474            html.setElementAt(new TextNode(compacted.toString()), firstPos);
475            Util.Remove.range(html, firstPos + 1, lastNodePos + 1);
476        }
477
478        return delta;
479    }
480
481
482    // ********************************************************************************************
483    // ********************************************************************************************
484    // String-Length Operations
485    // ********************************************************************************************
486    // ********************************************************************************************
487
488
489    /**
490     * Convenience Method.
491     * <BR />Invokes: {@link #strLength(Vector, int, int)}
492     */
493    public static int strLength(Vector<? extends HTMLNode> html)
494    { return strLength(html, 0, -1); }
495
496    /**
497     * Convenience Method.
498     * <BR />Receives: {@code DotPair}
499     * <BR />Invokes: {@link #strLength(Vector, int, int)} 
500     */
501    public static int strLength(Vector<? extends HTMLNode> html, DotPair dp)
502    { return strLength(html, dp.start, dp.end + 1); }
503
504    /**
505     * This method simply adds / sums the {@code String}-length of every {@code HTMLNode.str }
506     * field in the passed page-{@code Vector}.  It only counts nodes between parameters
507     * {@code sPos} (inclusive) and {@code ePos} (exclusive).
508     * 
509     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
510     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
511     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
512     * 
513     * @return The total length <B><I>- in characters -</I></B> of the sub-page of HTML between
514     * {@code 'sPos'} and {@code 'ePos'}
515     * 
516     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
517     * 
518     * @see #strLength(Vector)
519     */
520    public static int strLength(Vector<? extends HTMLNode> html, int sPos, int ePos)
521    {
522        int ret = 0;
523        LV  l   = new LV(html, sPos, ePos);
524
525        for (int i=l.start; i < l.end; i++) ret += html.elementAt(i).str.length();
526
527        return ret;
528    }
529
530
531    // ********************************************************************************************
532    // ********************************************************************************************
533    // Hash-Code Operations
534    // ********************************************************************************************
535    // ********************************************************************************************
536
537
538    /**
539     * Convenience Method.
540     * <BR />Invokes: {@link #hashCode(Vector, int, int)}
541     */
542    public static int hashCode(Vector<? extends HTMLNode> html)
543    { return hashCode(html, 0, -1); }
544
545    /**
546     * Convenience Method.
547     * <BR />Receives: {@code DotPair}
548     * <BR />Invokes: {@link #hashCode(Vector, int, int)} 
549     */
550    public static int hashCode(Vector<? extends HTMLNode> html, DotPair dp)
551    { return hashCode(html, dp.start, dp.end + 1); }
552
553    /**
554     * Generates a hash-code for a vectorized html page-{@code Vector}.
555     * 
556     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
557     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
558     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
559     * 
560     * @return Returns the {@code String.hashCode()} of the <I><B>partial HTML-page</B></i> as if
561     * it were not being stored as a {@code Vector}, but rather as HTML inside of a
562     * Java-{@code String}.
563     * 
564     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
565     * 
566     * @see #hashCode(Vector)
567     */
568    public static int hashCode(Vector<? extends HTMLNode> html, int sPos, int ePos)
569    {
570        int h   = 0;
571        LV  lv  = new LV(html, sPos, ePos);
572
573        for (int j=lv.start; j < lv.end; j++)
574        {
575            String  s = html.elementAt(j).str;
576            int     l = s.length();
577
578            // This line has been copied from the jdk8/jdk8 "String.hashCode()" method.
579            // The difference is that it iterates over the entire vector
580
581            for (int i=0; i < l; i++) h = 31 * h + s.charAt(i);
582        }
583
584        return h;
585    }
586
587
588    // ********************************************************************************************
589    // ********************************************************************************************
590    // JSON Script Nodes
591    // ********************************************************************************************
592    // ********************************************************************************************
593
594
595    /**
596     * Convenience Method.
597     * <BR />Invokes: {@link #getJSONScriptBlocks(Vector, int, int)}
598     */
599    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html)
600    { return getJSONScriptBlocks(html, 0, -1); }
601
602    /**
603     * Convenience Method.
604     * <BR />Receives: {@code DotPair}.
605     * <BR />Invokes: {@link #getJSONScriptBlocks(Vector, int, int)}
606     */
607    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html, DotPair dp)
608    { return getJSONScriptBlocks(html, dp.start, dp.end + 1); }
609
610    /**
611     * This method shall search for any and all {@code <SCRIPT TYPE="json">}
612     * <I>JSON TEXT</I> {@code </SCRIPT>} block present in a range of Vectorized HTML.  The
613     * search method shall simply look for the toke {@code "JSON"} in the {@code TYPE} attribute
614     * of each and every {@code <SCRIPT> TagNode} that is found on the page.  The validity of the
615     * {@code JSON} found within such blocks <I>is not checked for validity, nor is it even
616     * guaranteed to be {@code JSON} data!</I>
617     * 
618     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
619     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
620     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
621     * 
622     * @return This will return a {@code java.util.stream.Stream<String>} of each of the 
623     * {@code JSON} elements present in the specified range of the Vectorized HTML passed to
624     * parameter {@code 'html'}.
625     * 
626     * <EMBED CLASS='external-html' DATA-FILE-ID=STRMCNVT>
627     * 
628     * @see StrTokCmpr#containsIgnoreCase(String, Predicate, String)
629     * @see Util#rangeToString(Vector, int, int)
630     */
631    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html, int sPos, int ePos)
632    {
633        // Whenever building lists, it is usually easiest to use a Stream.Builder
634        Stream.Builder<String> b = Stream.builder();
635
636        // This Predicate simply tests that if the substring "json" (CASE INSENSITIVE) is found
637        // in the TYPE attribute of a <SCRIPT TYPE=...> node, that the token-string is, indeed a
638        // word - not a substring of some other word.  For instance: TYPE="json" would PASS, but
639        // TYPE="rajsong" would FAIL - because the token string is not surrounded by white-space
640
641        final Predicate<String> tester = (String s) ->
642            StrTokCmpr.containsIgnoreCase
643                (s, (Character c) -> ! Character.isLetterOrDigit(c), "json");
644
645        // Find all <SCRIPT> node-blocks whose "TYPE" attribute abides by the tester
646        // String-Predicate named above.
647
648        Vector<DotPair> jsonDPList = InnerTagFindInclusive.all
649            (html, sPos, ePos, "script", "type", tester);
650
651        // Convert each of these DotPair element into a java.lang.String
652        // Add the String to the Stream.Builder<String>
653
654        for (DotPair jsonDP : jsonDPList)
655            if (jsonDP.size() > 2)
656                b.accept(Util.rangeToString(html, jsonDP.start + 1, jsonDP.end));
657
658        // Build the Stream, and return it.
659        return b.build();
660    }
661
662
663    // ********************************************************************************************
664    // ********************************************************************************************
665    // MISC
666    // ********************************************************************************************
667    // ********************************************************************************************
668
669
670    /**
671     * Inserts nodes, and allows a 'varargs' parameter.
672     * 
673     * @param html Any HTML Page
674     * 
675     * @param pos The position in the original {@code Vector} where the nodes shall be inserted.
676     * 
677     * @param nodes A list of nodes to insert.
678     */
679    public static void insertNodes(Vector<HTMLNode> html, int pos, HTMLNode... nodes)
680    {
681        Vector<HTMLNode> nodesVec = new Vector<>(nodes.length);
682        for (HTMLNode node : nodes) nodesVec.addElement(node);
683        html.addAll(pos, nodesVec);
684    }
685
686    /**
687     * Convenience Method.
688     * <BR />Invokes: {@link #replaceRange(Vector, int, int, Vector)}
689     */
690    public static void replaceRange
691        (Vector<HTMLNode> page, DotPair range, Vector<HTMLNode> newNodes)
692    { replaceRange(page, range.start, range.end+1, newNodes); }
693
694    /**
695     * Replaces any all and all {@code HTMLNode's} located between the {@code Vector} locations
696     * {@code 'sPos'} (inclusive) and {@code 'ePos'} (exclusive).  By exclusive, this means that
697     * the {@code HTMLNode} located at positon {@code 'ePos'} <B><I>will not</I></B> be replaced,
698     * but the one at {@code 'sPos'} <I><B>is replaced</B></I>.
699     * 
700     * <BR /><BR />The size of the {@code Vector} will change by {@code newNodes.size() - 
701     * (ePos + sPos)}.  The contents situated between {@code Vector} location {@code sPos} and
702     * {@code sPos + newNodes.size()} will, indeed, be the contents of the {@code 'newNodes'}
703     * parameter.
704     * 
705     * @param page Any Java HTML page, constructed of {@code HTMLNode (TagNode & TextNode)}
706     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
707     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
708     * @param newNodes Any Java HTML page-{@code Vector} of {@code HTMLNode}.
709     * 
710     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
711     * 
712     * @see #pollRange(Vector, int, int)
713     * @see Remove#range(Vector, int, int)
714     * @see #replaceRange(Vector, DotPair, Vector)
715     */
716    public static void replaceRange
717        (Vector<HTMLNode> page, int sPos, int ePos, Vector<HTMLNode> newNodes)
718    {
719        // Torello.Java.LV
720        LV l = new LV(sPos, ePos, page);
721
722        int oldSize     = ePos - sPos;
723        int newSize     = newNodes.size();
724        int insertPos   = sPos;
725        int i           = 0;
726
727        while ((i < newSize) && (i < oldSize))
728            page.setElementAt(newNodes.elementAt(i++), insertPos++);
729
730
731        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
732        // CASE ONE:
733        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
734
735        if (newSize == oldSize) return;
736
737
738        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
739        // CASE TWO:
740        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
741        //
742        // The new Vector is SMALLER than the old sub-range
743        // The rest of the nodes just need to be trashed
744        //
745        // OLD-WAY: (Before realizing what Vector.subList is actually doing)
746        // Util.removeRange(page, insertPos, ePos);
747
748        if (newSize < oldSize) page.subList(insertPos, ePos).clear();
749
750
751        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
752        // CASE THREE:
753        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
754        //
755        // The new Vector is BIGGER than the old sub-range
756        // There are still more nodes to insert.
757
758        else page.addAll(ePos, newNodes.subList(i, newSize));
759    }
760
761    /**
762     * Java's {@code java.util.Vector} class does not allow public access to the
763     * {@code removeRange(start, end)} function.  It is listed as {@code 'protected'} in Java's
764     * Documentation about the {@code class Vector.}  This method upstages that, and performs the
765     * {@code 'Poll'} operation, where the nodes are first removed, stored, and then return as a
766     * function result.
767     * 
768     * <BR /><BR /><B CLASS=JDDescLabel>Poll a Range:</B>
769     * 
770     * <BR />The nodes that are removed are placed in a separate return {@code Vector}, and
771     * returned as a result to this method.
772     * 
773     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
774     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
775     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
776     * 
777     * @return A complete list ({@code Vector<HTMLNode>}) of the nodes that were removed.
778     * 
779     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
780     * 
781     * @see Remove#range(Vector, int, int)
782     * @see Remove#range(Vector, DotPair)
783     * @see #pollRange(Vector, DotPair)
784     */
785    public static Vector<HTMLNode> pollRange(Vector<? extends HTMLNode> html, int sPos, int ePos)
786    {
787        // The original version of this method is preserved inside comments at the bottom of this
788        // method.  Prior to seeing the Sun-Oracle Docs explaining that the return from the SubList
789        // operation "mirrors changes" back to to the original vector, the code in the comments is
790        // how this method was accomplished.
791
792        LV                          l       = new LV(html, sPos, ePos);
793        Vector<HTMLNode>            ret     = new Vector<HTMLNode>(l.end - l.start);
794        List<? extends HTMLNode>    list    = html.subList(l.start, l.end);
795
796        // Copy the Nodes into the return Vector that the end-user receives
797        ret.addAll(list);
798
799        // Clear the nodes out of the original Vector.  The Sun-Oracle Docs 
800        // state that the returned sub-list is "mirrored back into" the original
801
802        list.clear();
803
804        // Return the Vector to the user.  Note that the List<HTMLNode> CANNOT be returned,
805        // because of it's mirror-qualities, and because this method expects a vector.
806
807        return ret;
808
809        /*
810        // BEFORE READING ABOUT Vector.subList(...), this is how this was accomplished:
811        // NOTE: It isn't so clear how the List<HTMLNode> works - likely it doesn't actually
812        //       create any new memory-allocated arrays, it is just an "overlay"
813
814        // Copy the elements from the input vector into the return vector
815        for (int i=l.start; i < l.end; i++) ret.add(html.elementAt(i));
816
817        // Remove the range from the input vector (this is the meaning of 'poll')
818        Util.removeRange(html, sPos, ePos);
819
820        return ret;
821        */
822    }
823
824    /**
825     * Convenience Method.
826     * <BR />Receives: {@code DotPair}
827     * <BR />Invokes: {@link #pollRange(Vector, int, int)}. 
828     */
829    public static Vector<HTMLNode> pollRange(Vector<? extends HTMLNode> html, DotPair dp)
830    { return pollRange(html, dp.start, dp.end + 1); }
831
832    /**
833     * This removes every element from the {@code Vector} beginning at position 0, all the way to
834     * position {@code 'pos'} (exclusive).  The {@code elementAt(pos)} remains in the original page
835     * input-{@code Vector}.  This is the definition of 'exclusive'.
836     * 
837     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
838     * 
839     * @param pos Any position within the range of the input {@code Vector}.
840     * 
841     * @return The elements in the {@code Vector} from position: {@code 0 ('zero')} all the way to
842     * position: {@code 'pos'}
843     */
844    public static Vector<HTMLNode> split(Vector<? extends HTMLNode> html, int pos)
845    { return pollRange(html, 0, pos); }
846
847
848    // ********************************************************************************************
849    // ********************************************************************************************
850    // Static Inner-Class: Count 
851    // ********************************************************************************************
852    // ********************************************************************************************
853
854
855    @Torello.JavaDoc.StaticFunctional
856    public static class Count 
857    {
858        private Count() { }
859
860
861        // ****************************************************************************************
862        // ****************************************************************************************
863        // Count TextNode's
864        // ****************************************************************************************
865        // ****************************************************************************************
866
867
868        /**
869         * Convenience Method.
870         * <BR />Invokes: {@link #textNodes(Vector, int, int)}
871         */
872        public static int textNodes(Vector<HTMLNode> page)
873        { return textNodes(page, 0, -1); }
874
875        /**
876         * Convenience Method.
877         * <BR />Receives: {@code DotPair}
878         * <BR />Invokes: {@link #textNodes(Vector, int, int)}
879         */
880        public static int textNodes(Vector<HTMLNode> page, DotPair dp)
881        { return textNodes(page, dp.start, dp.end + 1); }
882
883        /**
884         * Counts the number of {@code TextNode's} in a {@code Vector<HTMLNode>} between the
885         * demarcated array / {@code Vector} positions, {@code 'sPos'} and {@code 'ePos'}
886         * 
887         * @param page Any HTML page.
888         * 
889         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
890         * 
891         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
892         * 
893         * @return The number of {@code TextNode's} in the {@code Vector} between the demarcated
894         * indices.
895         * 
896         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
897         */
898        public static int textNodes(Vector<HTMLNode> page, int sPos, int ePos)
899        {
900            int counter = 0;
901            LV  l       = new LV(page, sPos, ePos);
902
903            // Iterates the entire page between sPos and ePos, incrementing the count for every
904            // instance of text-node.
905
906            for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTextNode()) counter++;
907
908            return counter;
909        }
910
911
912        // ****************************************************************************************
913        // ****************************************************************************************
914        // Count CommentNode's
915        // ****************************************************************************************
916        // ****************************************************************************************
917
918
919        /**
920         * Convenience Method.
921         * <BR />Invokes: {@link #commentNodes(Vector, int, int)}
922         */
923        public static int commentNodes(Vector<HTMLNode> page)
924        { return commentNodes(page, 0, -1); }
925
926        /**
927         * Convenience Method.
928         * <BR />Receives: {@code DotPair}
929         * <BR />Invokes: {@link #commentNodes(Vector, int, int)} 
930         */
931        public static int commentNodes(Vector<HTMLNode> page, DotPair dp)
932        { return commentNodes(page, dp.start, dp.end + 1); }
933
934        /**
935         * Counts the number of {@code CommentNode's} in an {@code Vector<HTMLNode>} between the
936         * demarcated array / {@code Vector} positions.
937         * 
938         * @param page Any HTML page.
939         * 
940         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
941         * 
942         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
943         * 
944         * @return The number of {@code CommentNode's} in the {@code Vector} between the demarcated
945         * indices.
946         * 
947         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
948         */
949        public static int commentNodes(Vector<HTMLNode> page, int sPos, int ePos)
950        {
951            int counter = 0;
952            LV  l       = new LV(page, sPos, ePos);
953
954            // Iterates the entire page between sPos and ePos, incrementing the count for every
955            // instance of comment-node.
956
957            for (int i=l.start; i < l.end; i++)  if (page.elementAt(i).isCommentNode()) counter++;
958
959            return counter;
960        }
961
962
963        // ****************************************************************************************
964        // ****************************************************************************************
965        // Count TagNode's
966        // ****************************************************************************************
967        // ****************************************************************************************
968
969
970        /**
971         * Convenience Method.
972         * <BR />Invokes: {@link #tagNodes(Vector, int, int)}
973         */
974        public static int tagNodes(Vector<HTMLNode> page)
975        { return tagNodes(page, 0, -1); }
976
977        /**
978         * Convenience Method.
979         * <BR />Receives: {@code DotPair}
980         * <BR />Invokes: {@link #tagNodes(Vector, int, int)} 
981         */
982        public static int tagNodes(Vector<HTMLNode> page, DotPair dp)
983        { return tagNodes(page, dp.start, dp.end + 1); }
984
985        /**
986         * Counts the number of {@code TagNode's} in a {@code Vector<HTMLNode>} between the
987         * demarcated array / {@code Vector} positions.
988         * 
989         * @param page Any HTML page.
990         * 
991         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
992         * 
993         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
994         * 
995         * @return The number of {@code TagNode's} in the {@code Vector}.
996         * 
997         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
998         */
999        public static int tagNodes(Vector<HTMLNode> page, int sPos, int ePos)
1000        {
1001            int counter = 0;
1002            LV  l       = new LV(page, sPos, ePos);
1003
1004            // Iterates the entire page between sPos and ePos, incrementing the count for every
1005            // instance of TagNode.
1006
1007            for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTagNode()) counter++;
1008
1009            return counter;
1010        }
1011
1012
1013        // ****************************************************************************************
1014        // ****************************************************************************************
1015        // Count New Lines
1016        // ****************************************************************************************
1017        // ****************************************************************************************
1018
1019
1020        /**
1021         * Convenience Method.
1022         * <BR />Invokes: {@link #newLines(Vector, int, int)}
1023         */
1024        public static int newLines(Vector<? extends HTMLNode> html)
1025        { return newLines(html, 0, -1); }
1026
1027        /**
1028         * Convenience Method.
1029         * <BR />Receives: {@code DotPair}
1030         * <BR />Invokes: {@link #newLines(Vector, int, int)} 
1031         */
1032        public static int newLines(Vector<? extends HTMLNode> html, DotPair dp)
1033        { return newLines(html, dp.start, dp.end + 1); }
1034
1035
1036        /**
1037         * This will count the number of new-line symbols present <B><I>- on the partial HTML
1038         * page</I></B>. The count will include a sum of every {@code HTMLNode.str} that
1039         * contains the standard new-line symbols: {@code \r\n, \r, \n}, meaning that UNIX, MSFT,
1040         * Apple, etc. forms of text-line rendering should all be treated equally.
1041         * 
1042         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1043         * 
1044         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1045         * 
1046         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1047         * 
1048         * @return The number of new-line characters in all of the {@code HTMLNode's} that occur
1049         * between vectorized-page positions {@code 'sPos'} and {@code 'ePos.'}
1050         * 
1051         * <BR /><BR /><B>NOTE:</B> The regular-expression used here 'NEWLINEP' is as follows:
1052         * 
1053         * <DIV CLASS="SNIP">{@code
1054         * private static final Pattern NEWLINEP = Pattern.compile("\\r\\n|\\r|\\n");
1055         * }</DIV>
1056         * 
1057         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1058         * 
1059         * @see StringParse#NEWLINEP
1060         */
1061        public static int newLines(Vector<? extends HTMLNode> html, int sPos, int ePos)
1062        {
1063            int newLineCount    = 0;
1064            LV  l               = new LV(html, sPos, ePos);
1065
1066            for (int i=l.start; i < l.end; i++)
1067
1068                // Uses the Torello.Java.StringParse "New Line RegEx"
1069                for (   Matcher m = StringParse.NEWLINEP.matcher(html.elementAt(i).str);
1070                        m.find();
1071                        newLineCount++);
1072
1073            return newLineCount;
1074        }
1075    }
1076
1077
1078    // ********************************************************************************************
1079    // ********************************************************************************************
1080    // Static Inner-Class: Remove 
1081    // ********************************************************************************************
1082    // ********************************************************************************************
1083
1084
1085    @Torello.JavaDoc.StaticFunctional
1086    public static class Remove 
1087    {
1088        private Remove() { }
1089
1090
1091        // ****************************************************************************************
1092        // ****************************************************************************************
1093        // TextNode Removal Operations
1094        // ****************************************************************************************
1095        // ****************************************************************************************
1096
1097
1098        /**
1099         * Convenience Method.
1100         * <BR />Invokes: {@link #allTextNodes(Vector, int, int)}
1101         */
1102        public static int allTextNodes(Vector<HTMLNode> page)
1103        { return allTextNodes(page, 0, -1); }
1104
1105        /**
1106         * Convenience Method.
1107         * <BR />Receives: {@code DotPair}
1108         * <BR />Invokes: {@link #allTextNodes(Vector, int, int)}
1109         */
1110        public static int allTextNodes(Vector<HTMLNode> page, DotPair dp)
1111        { return allTextNodes(page, dp.start, dp.end + 1); }
1112
1113        /**
1114         * Takes a sub-section of an HTML {@code Vector} and removes all {@code TextNode} present
1115         * 
1116         * @param page Any HTML page
1117         * 
1118         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1119         * 
1120         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1121         * 
1122         * @return The number of HTML {@code TextNode's} that were removed
1123         * 
1124         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1125         * 
1126         * @see TextNode
1127         * @see #nodesOPT(Vector, int[])
1128         */
1129        public static int allTextNodes(Vector<HTMLNode> page, int sPos, int ePos)
1130        {
1131            IntStream.Builder   b = IntStream.builder();
1132            LV                  l = new LV(page, sPos, ePos);
1133
1134            // Use Java-Streams to build the list of nodes that are valid text-nodes.
1135            for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTextNode()) b.add(i);
1136
1137            // Build the stream and convert it to an int[] (integer-array)
1138            int[] posArr = b.build().toArray();
1139
1140            // The integer array is guaranteed to be sorted, and contain valid vector-indices.
1141            nodesOPT(page, posArr);
1142
1143            return posArr.length;
1144        }
1145
1146
1147        // ****************************************************************************************
1148        // ****************************************************************************************
1149        // TagNode Removal Operations
1150        // ****************************************************************************************
1151        // ****************************************************************************************
1152
1153
1154        /**
1155         * Convenience Method.
1156         * <BR />Invokes: {@link #allTagNodes(Vector, int, int)}
1157         */
1158        public static int allTagNodes(Vector<HTMLNode> page) 
1159        { return allTagNodes(page, 0, -1); }
1160
1161        /**
1162         * Convenience Method.
1163         * <BR />Receives: {@code DotPair} 
1164         * <BR />Invokes: {@link #allTagNodes(Vector, int, int)}
1165         */
1166        public static int allTagNodes(Vector<HTMLNode> page, DotPair dp)
1167        { return allTagNodes(page, dp.start, dp.end + 1); }
1168
1169        /**
1170         * Takes a sub-section of an HTML {@code Vector} and removes all {@code TagNode} present
1171         * 
1172         * @param page Any HTML page
1173         * 
1174         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1175         * 
1176         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1177         * 
1178         * @return The number of HTML {@code TagNode's} that were removed
1179         * 
1180         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1181         * 
1182         * @see TagNode
1183         * @see #nodesOPT(Vector, int[])
1184         */
1185        public static int allTagNodes(Vector<HTMLNode> page, int sPos, int ePos)
1186        {
1187            IntStream.Builder   b = IntStream.builder();
1188            LV                  l = new LV(page, sPos, ePos);
1189
1190            // Use Java-Streams to build the list of nodes that are valid tag-nodes.
1191            for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTagNode()) b.add(i);
1192
1193            // Build the stream and convert it to an int[] (integer-array)
1194            int[] posArr = b.build().toArray();
1195
1196            // The integer array is guaranteed to be sorted, and contain valid vector-indices.
1197            nodesOPT(page, posArr);
1198
1199            return posArr.length;
1200        }
1201
1202
1203        // ****************************************************************************************
1204        // ****************************************************************************************
1205        // CommentNode Removal Operations
1206        // ****************************************************************************************
1207        // ****************************************************************************************
1208
1209
1210        /**
1211         * Convenience Method.
1212         * <BR />Invokes: {@link #allCommentNodes(Vector, int, int)}
1213         */
1214        public static int allCommentNodes(Vector<HTMLNode> page)
1215        { return allCommentNodes(page, 0, -1); }
1216
1217        /**
1218         * Convenience Method.
1219         * <BR />Receives: {@code DotPair}
1220         * <BR />Invokes: {@link #allCommentNodes(Vector, int, int)}
1221         */
1222        public static int allCommentNodes(Vector<HTMLNode> page, DotPair dp)
1223        { return allCommentNodes(page, dp.start, dp.end + 1); }
1224
1225        /**
1226         * Takes a sub-section of an HTML {@code Vector} and removes all {@code CommentNode}
1227         * present
1228         * 
1229         * @param page Any HTML page
1230         * 
1231         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1232         * 
1233         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1234         * 
1235         * @return The number of HTML {@code CommentNode's} that were removed
1236         * 
1237         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1238         * 
1239         * @see CommentNode
1240         * @see #nodesOPT(Vector, int[])
1241         */
1242        public static int allCommentNodes(Vector<HTMLNode> page, int sPos, int ePos)
1243        {
1244            IntStream.Builder   b       = IntStream.builder();
1245            LV                  l       = new LV(page, sPos, ePos);
1246
1247            // Use Java-Streams to build the list of nodes that are valid comment-nodes.
1248            for (int i=l.start; i < l.end; i++)
1249                if (page.elementAt(i).isCommentNode())
1250                    b.add(i);
1251
1252            // Build the stream and convert it to an int[] (integer-array)
1253            int[] posArr = b.build().toArray();
1254
1255            // The integer array is guaranteed to be sorted, and contain valid vector-indices.
1256            nodesOPT(page, posArr);
1257
1258            return posArr.length; 
1259        }
1260
1261
1262        // ****************************************************************************************
1263        // ****************************************************************************************
1264        // Remove All Inner Tags
1265        // ****************************************************************************************
1266        // ****************************************************************************************
1267
1268
1269        /**
1270         * Convenience Method.
1271         * <BR />Invokes: {@link #allInnerTags(Vector, int, int)}
1272         */
1273        public static int allInnerTags(Vector<HTMLNode> html)
1274        { return allInnerTags(html, 0, -1); }
1275
1276        /**
1277         * Convenience Method.
1278         * <BR />Receives: {@code DotPair}
1279         * <BR />Invokes: {@link #allInnerTags(Vector, int, int)}
1280         */
1281        public static int allInnerTags(Vector<? super TagNode> html, DotPair dp)
1282        { return allInnerTags(html, dp.start, dp.end + 1); }
1283
1284        /**
1285         * This method removes all inner-tags (all attributes) from every {@link TagNode} inside of
1286         * an HTML page.  It does this by replacing every {@code TagNode} in the {@code Vector}
1287         * with the pre-instantiated, publicly-available {@code TagNode} which can be obtained by a
1288         * call to the class {@code HTMLTags.hasTag(token, TC)}.
1289         * 
1290         * <BR /><BR /><B CLASS=JDDescLabel>Replacing {@code TagNode's:}</B>
1291         * 
1292         * <BR />This method determines whether a fresh {@link TagNode} is to be inserted by
1293         * measuring the length of the internal {@link TagNode#str} field (a {@code String} field).
1294         * If the length {@code TagNode.str} is not equal to the HTML token {@link TagNode#tok}
1295         * length <B><I>plus 2</I></B>, then a fresh, pre-instantiated, node is replaced.
1296         * 
1297         * <BR /><BR />The {@code '+2'} figure comes from the additional characters {@code '<'} and
1298         * {@code '>'} that start and end every HTML {@code TagNode}
1299         * 
1300         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1301         * 
1302         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1303         * 
1304         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1305         * 
1306         * @return The number of {@code TagNode} elements that have were replaced with
1307         * zero-attribute HTML Element Tags.
1308         * 
1309         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1310         *
1311         * @throws ClassCastException If {@code 'html'} contains references that do not inherit
1312         * {@code HTMLNode}.
1313         */
1314        @SuppressWarnings("unchecked")
1315        public static int allInnerTags(Vector<? super TagNode> html, int sPos, int ePos)
1316        {
1317            int     ret = 0;
1318            LV      l   = new LV(sPos, ePos, html);
1319            TagNode tn;
1320
1321            for (int i = (l.end-1); i >= l.start; i--)
1322
1323                if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null)
1324
1325                {
1326                    ret++;
1327
1328                    // HTMLTags.hasTag(tok, TC) gets an empty and pre-instantiated TagNode,
1329                    // where TagNode.tok == 'tn.tok' and TagNode.isClosing = false
1330
1331                    html.setElementAt(HTMLTags.hasTag(tn.tok, TC.OpeningTags), i);
1332                }
1333
1334            return ret;
1335        }
1336
1337
1338        // ****************************************************************************************
1339        // ****************************************************************************************
1340        // Style-Node & Script-Node Block Removal Operations
1341        // ****************************************************************************************
1342        // ****************************************************************************************
1343
1344
1345        /**
1346         * Removes all HTML {@code 'style'} Node blocks.
1347         * 
1348         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1349         * 
1350         * @return The number of {@code <STYLE>}-Node Blocks that were removed
1351         */
1352        public static int styleNodeBlocks(Vector<? extends HTMLNode> html)
1353        {
1354            int removeCount = 0;
1355
1356            while (TagNodeRemoveInclusive.first(html, "style") > 0) removeCount++;
1357
1358            return removeCount;
1359        }
1360
1361        /**
1362         * Removes all {@code 'script'} Node blocks.
1363         * 
1364         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1365         * 
1366         * @return The number of {@code SCRIPT}-Node Blocks that were removed
1367         */
1368        public static int scriptNodeBlocks(Vector<? extends HTMLNode> html)
1369        {
1370            int removeCount = 0;
1371
1372            while (TagNodeRemoveInclusive.first(html, "script") > 0) removeCount++;
1373
1374            return removeCount;
1375        }
1376
1377
1378        // ****************************************************************************************
1379        // ****************************************************************************************
1380        // Remove a Sub-Range of nodes
1381        // ****************************************************************************************
1382        // ****************************************************************************************
1383
1384
1385        /**
1386         * Java's {@code java.util.Vector} class does not allow public access to the
1387         * {@code removeRange(start, end)} function.  It is protected in Java's Documentation about
1388         * the {@code Vector} class.  This method does exactly that, nothing else.
1389         * 
1390         * @param page Any Java HTML page, constructed of {@code HTMLNode (TagNode & TextNode)}
1391         * 
1392         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1393         * 
1394         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1395         * 
1396         * @return the number of nodes removed.
1397         * 
1398         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1399         * 
1400         * @see #pollRange(Vector, int, int)
1401         * @see #range(Vector, DotPair)
1402         */
1403        public static <T extends HTMLNode> int range(Vector<T> page, int sPos, int ePos)
1404        {
1405            // Torello.Java.LV
1406            LV  l = new LV(sPos, ePos, page);
1407
1408            // According to the Sun-Oracle Docs, the returned sublist "mirros" the original vector,
1409            // which means that when it is changed, so is the original vector.
1410
1411            page.subList(l.start, l.end).clear();
1412
1413            return l.size();
1414        }
1415
1416        /**
1417         * Convenience Method.
1418         * <BR />Receives: {@code DotPair}
1419         * <BR />Invokes: {@link #range(Vector, int, int)} 
1420         */
1421        public static int range(Vector<? extends HTMLNode> html, DotPair dp)
1422        { return range(html, dp.start, dp.end + 1); }
1423
1424
1425        // ****************************************************************************************
1426        // ****************************************************************************************
1427        // Remove Specified Nodes by Vector-Index
1428        // ****************************************************************************************
1429        // ****************************************************************************************
1430
1431
1432        /**
1433         * <SPAN STYLE="color: red;"><B>OPT: Optimized</B></SPAN>
1434         * 
1435         * <BR /><BR />This method does the same thing as
1436         * {@link Remove#nodes(boolean, Vector, int[])}, but all error checking is skipped, and the
1437         * input integer array is presumed to have been sorted. There are no guarantees about the
1438         * behavior of this method if the input array {@code 'posArr'} is not sorted,
1439         * <I>least-to-greatest,</I> or if there are duplicate or negative values in this array.
1440         * 
1441         * <BR /><BR /><B CLASS=JDDescLabel>Empty Var-Args:</B>
1442         * 
1443         * <BR />If the var-args input integer-array parameter is empty, this method shall exit
1444         * gracefully (and immediately).
1445         * 
1446         * @param page Any HTML-Page, usually ones generated by {@code HTMLPage.getPageTokens}, but
1447         * these may be obtained or created in any fashion so necessary.
1448         * 
1449         * @param posArr An array of integers which list/identify the nodes in the page to be
1450         * removed. Because this implementation has been optimized, no error checking will be
1451         * performed on this input.  It is presumed to be sorted, least-to-greatest, and that all
1452         * values in the array are valid-indices into the vectorized-html parameter {@code 'page'}
1453         */
1454        public static <T extends HTMLNode> void nodesOPT(Vector<T> page, int... posArr)
1455        {
1456            if (posArr.length == 0) return;
1457
1458            int endingInsertPos = page.size() - posArr.length;
1459            int posArrIndex     = 0;
1460            int insertPos       = posArr[0];
1461            int retrievePos     = posArr[0];
1462
1463            // There is very little that can be documented about these two loops.  Took 3 hours
1464            // to figure out.  Read the variables names for "best documentation"
1465
1466            while (insertPos < endingInsertPos)
1467            {
1468                // This inner-loop is necessary for when the posArr has consecutive-elements that
1469                // are *ALSO* consecutive-pointers.
1470                //
1471                // For instance, this invokation:
1472                // Util.removeNodes(page, 4, 5, 6); ...
1473                //      where 4, 5, and 6 are consecutive - the inner while-loop is required.
1474                //
1475                // For this invokation: 
1476                // Util.removeNodes(page, 2, 4, 6); 
1477                //      the inner-loop is not entered.
1478
1479                while ((posArrIndex < posArr.length) && (retrievePos == posArr[posArrIndex]))
1480                { retrievePos++; posArrIndex++; }
1481
1482                page.setElementAt(page.elementAt(retrievePos++), insertPos++);
1483            }
1484
1485            // Remove all remaining elements in the tail of the array.
1486            page.setSize(page.size() - posArr.length);
1487        }
1488
1489
1490        /**
1491         * This method remove each HTMLNode from the passed-parameter {@code 'page'}
1492         * listed/identified by the input array {@code 'nodeList'}.
1493         * 
1494         * <BR /><BR /><B CLASS=JDDescLabel>Empty Var-Args:</B>
1495         * 
1496         * <BR />If the var-args input integer-array parameter is empty, this method shall exit
1497         * gracefully (and immediately).
1498         * 
1499         * @param preserveInputArray This is a convenience input parameter that allows a programmer
1500         * to "preserve" the original input-parameter integer-array that is passed to this method.
1501         * It could be argued this parameter is "superfluous" - however, keep in mind that the
1502         * passed parameter {@code 'nodeList'} <B><I>must be sorted</I></B> before this method is
1503         * able function properly. There is a sort that's performed within the body of this method.
1504         * Just in case that the original order of the integer-array input-parameter must be
1505         * preserved, its possible to request for the sort to operate on "a clone" of the
1506         * input-parameter integer-array, instead of the original integer-array {@code 'nodeList'}
1507         * itself. 
1508         * 
1509         * @param page Any HTML-Page, usually ones generated by
1510         * {@code HTMLPage.getPageTokens(...)}, but these may be obtained or created in any fashion
1511         * so necessary. 
1512         * 
1513         * @param nodeList An array of integers which list/identify the nodes in the page to be
1514         * removed.
1515         * 
1516         * @throws IllegalArgumentException If the {@code 'nodeList'} contains duplicate entries.
1517         * Obviously, no {@code HTMLNode} may be removed from the {@code Vector<HTMLNode>} more
1518         * than once.
1519         * 
1520         * @throws IndexOutOfBoundsException If the nodeList contains index-pointers / items that
1521         * are not within the bounds of the passed HTML-Page {@code Vector}.
1522         */
1523        public static <T extends HTMLNode> void nodes
1524            (boolean preserveInputArray, Vector<T> page, int... nodeList)
1525        {
1526            if (nodeList.length == 0) return;
1527
1528            // @Safe Var Args
1529            int[]   posArr  = preserveInputArray ? nodeList.clone() : nodeList;
1530            int     len     = posArr.length;
1531
1532            Arrays.sort(posArr);
1533
1534            // Check for duplicates in the nodeList, no HTMLNode may be removed twice!
1535            for (int i=0; i < (len - 1); i++)
1536
1537                if (posArr[i] == posArr[i+1]) throw new IllegalArgumentException(
1538                    "The input array contains duplicate items, this is not allowed.\n" +
1539                    "This is since each array-entry is intended to be a pointer/index for items " +
1540                    "to be removed.\nNo item can possibly be removed twice.!"
1541                );
1542
1543            // Make sure all nodes are within the bounds of the original Vector.  (no negative 
1544            // indexes, no indexes greater than the size of the Vector)
1545
1546            if ((posArr[0] < 0) || (posArr[len - 1] >= page.size()))
1547
1548                throw new IndexOutOfBoundsException (
1549                    "The input array contains entries which are not within the bounds of the " +
1550                    "original-passed Vector.\nHTMLPage Vector has: " + page.size() +
1551                        " elements.\n" +
1552                    "Maximum element in the nodeList is [" + posArr[len - 1] + "], and the " +
1553                        "minimum element is: [" + posArr[0] + "]"
1554                );
1555
1556            int endingInsertPos = page.size() - posArr.length;
1557            int posArrIndex     = 0;
1558            int insertPos       = posArr[0];
1559            int retrievePos     = posArr[0];
1560
1561            // There is very little that can be documented about these two loops.  Took 3 hours
1562            // to figure out.  Read the variables names for "best documentation"
1563
1564            while (insertPos < endingInsertPos)
1565            {
1566                // This inner-loop is necessary for when the posArr has consecutive-elements that
1567                // are *ALSO* consecutive-pointers.
1568                //
1569                // For instance, this invocation:
1570                // Util.removeNodes(page, 4, 5, 6);
1571                //      where 4, 5, and 6 are consecutive - the inner while-loop is required.
1572                //
1573                // For this invocation: 
1574                // Util.removeNodes(page, 2, 4, 6);
1575                //      the inner-loop is not entered.
1576
1577                while ((posArrIndex < posArr.length) && (retrievePos == posArr[posArrIndex])) 
1578                { retrievePos++; posArrIndex++; }
1579
1580                page.setElementAt(page.elementAt(retrievePos++), insertPos++);
1581            }
1582
1583            // Remove all remaining elements in the tail of the array.
1584            page.setSize(page.size() - posArr.length);
1585        }
1586
1587
1588        // ****************************************************************************************
1589        // ****************************************************************************************
1590        // Inclusive-Empty Removal Operations
1591        // ****************************************************************************************
1592        // ****************************************************************************************
1593
1594
1595        /**
1596         * Convenience Method.
1597         * <BR />Invokes: {@link #inclusiveEmpty(Vector, int, int, String[])}
1598         */
1599        public static int inclusiveEmpty(Vector<HTMLNode> page, String... htmlTags)
1600        { return inclusiveEmpty(page, 0, -1, htmlTags); }
1601
1602        /**
1603         * Convenience Method.
1604         * <BR />Receives: {@code DotPair}
1605         * <BR />Invokes: {@link #inclusiveEmpty(Vector, int, int, String[])}
1606         */
1607        public static int inclusiveEmpty(Vector<HTMLNode> page, DotPair dp, String... htmlTags)
1608        { return inclusiveEmpty(page, dp.start, dp.end + 1, htmlTags); }
1609
1610        /**
1611         * This will do an "Inclusive Search" using the standard class
1612         * {@link TagNodeInclusiveIterator} in the {@code package NodeSearch}.  Then it will
1613         * inspect the contents of the subsections. Any subsections that do not contain any
1614         * instances of {@code HTMLNode} in between them, or any subsections that only contain
1615         * "blank-text" (white-space) between them shall be removed. 
1616         * 
1617         * <BR /><BR /><B CLASS=JDDescLabel>Recursive Method:</B>
1618         * 
1619         * <BR />The search logic shall perform multiple <I><B>recursive iterations</B></I> of
1620         * itself, such that if, for instance, the user requested that all empty HTML divider
1621         * ({@code <DIV>}) elements be removed, if after removing a set a dividers resulted in more
1622         * empty ones (nested {@code <DIV>} elements), then an additional removal shall be called.
1623         * <I>This recursion shall continue until there are no empty HTML elements of the types
1624         * listed by</I> {@code 'htmlTags'}
1625         *
1626         * @param page Any vectorized-html page or sub-page.
1627         * 
1628         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1629         * 
1630         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1631         * 
1632         * @param htmlTags The list of <I>inclusive</I> (non-singleton) html elements to search for
1633         * possibly being empty container tags.
1634         * 
1635         * @return The number of {@code HTMLNode's} that were removed.
1636         * 
1637         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1638         */
1639        public static int inclusiveEmpty
1640            (Vector<HTMLNode> page, int sPos, int ePos, String... htmlTags)
1641        {
1642            DotPair subList;
1643
1644            int             removed = 0;
1645            HNLIInclusive   iter    = TagNodeInclusiveIterator.iter(page, htmlTags);
1646            LV              l       = new LV(page, sPos, ePos);
1647
1648            iter.restrictCursor(l);
1649
1650            TOP:
1651            while (iter.hasNext())
1652
1653                // If there is only the opening & closing pair, with nothing in between,
1654                // then the pair must be removed because it is "Empty" (Inclusive Empty)
1655
1656                if ((subList = iter.nextDotPair()).size() == 2)
1657                {
1658                    iter.remove();
1659                    ePos -= subList.size();
1660                    removed += subList.size();
1661                }
1662
1663                else
1664                {
1665                    // If there is any TagNode in between the start-end pair, then this is NOT
1666                    // EMPTY.  In this case, skip to the next start-end opening-closing pair.
1667
1668                    for (int i=(subList.start + 1); i < subList.end; i++)
1669                        if (! page.elementAt(i).isTextNode())
1670                            continue TOP;
1671
1672                    // If there were only TextNode's between an opening-closing TagNode Pair....
1673                    // **AND** those TextNode's are only white-space, then this also considered
1674                    // Inclusively Empty.  (Get all TextNode's, and if .trim() reduces the length()
1675                    // to zero, then it was only white-space.
1676
1677                    if (Util.textNodesString(page, subList).trim().length() == 0)
1678                    {
1679                        iter.remove();
1680                        ePos -= subList.size();
1681                        removed += subList.size();
1682                    }
1683                }
1684
1685            // This process must be continued recursively, because if any inner, for instance,
1686            // <DIV> ... </DIV> was removed, then the outer list must be re-checked...
1687
1688            if (removed > 0)
1689                return removed + Remove.inclusiveEmpty(page, sPos, ePos, htmlTags);
1690            else
1691                return 0;
1692        }
1693
1694
1695        // ****************************************************************************************
1696        // ****************************************************************************************
1697        // Miscellaneous Removal Operations
1698        // ****************************************************************************************
1699        // ****************************************************************************************
1700
1701
1702        /**
1703         * Removes the first and last element of a vectorized-HTML web-page, or sub-page.
1704         * Generally, this could be used to remove the surrounding tag's {@code '<DIV>'} ...
1705         * {@code '</DIV>'}, or something similar.
1706         * 
1707         * <BR /><BR />This method <B STYLE="color: red;">WILL NOT CHECK</B> whether there are
1708         * matching HTML open-and-close tags at the end beginning and end of this sub-section.
1709         * Generally, though, that is how this method is intended to be used.
1710         * 
1711         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1712         * 
1713         * @throws IllegalArgumentException If the {@code Vector} has fewer than two elements.
1714         */
1715        public static void firstLast(Vector<? extends HTMLNode> html)
1716        {
1717            int size = html.size();
1718
1719            if (size < 2) throw new IllegalArgumentException(
1720                "You have requested that the first and last elements the input 'page' parameter " +
1721                "(a vector) be removed.  However, the vector size is only [" + size  + "], so " +
1722                "this cannot be performed."
1723            );
1724
1725            // NOTE: *** This removes elementAt(0) and elementAt(size-1)
1726            //       *** NOT ALL ELEMENTS BETWEEN 0 and (size-1)
1727
1728            Util.Remove.nodesOPT(html, 0, size-1);
1729        }
1730
1731    }
1732
1733
1734    // ********************************************************************************************
1735    // ********************************************************************************************
1736    // Static Inner-Class: Inclusive 
1737    // ********************************************************************************************
1738    // ********************************************************************************************
1739
1740
1741    /**
1742     * Tools for finding the matching-closing tag of any open {@link TagNode}.
1743     * 
1744     * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=UTILINCL>
1745     */
1746    @Torello.JavaDoc.StaticFunctional
1747    public static class Inclusive
1748    {
1749        private Inclusive() { }
1750
1751    
1752        // ****************************************************************************************
1753        // ****************************************************************************************
1754        // Inclusive Find/Get
1755        // ****************************************************************************************
1756        // ****************************************************************************************
1757
1758        /**
1759         * This finds the closing HTML {@code 'TagNode'} match for a given opening
1760         * {@code 'TagNode'} in a given-input html page or sub-section.
1761         *
1762         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1763         *
1764         * @param nodeIndex An index into that {@code Vector}.  This index must point to an
1765         * {@code HTMLNode} element that is:
1766         *
1767         * <BR /><BR /><OL CLASS=JDOL>
1768         * <LI>An instance of {@code TagNode}</LI>
1769         * <LI>A {@code TagNode} whose {@code 'isClosing'} field is {@code FALSE}</LI>
1770         * <LI>Is not a {@code 'singleton'} HTML element-token
1771         * (i.e. {@code <IMG>, <BR>, <H1>, etc...})
1772         * </LI>
1773         * </OL>
1774         *
1775         * @return An "inclusive search" finds {@code OpeningTag} and {@code ClosingTag} pairs - 
1776         * <I>and returns all the elements between them in the contents of a 
1777         * return-{@code Vector}, or {@code Vector DotPair}-end-point value</I>.  This method
1778         * will take a particular node of a {@code Vector}, and (as long it has a match) 
1779         * find it's <I><B>closing {@code HTMLNode} match.</B></I>  The integer returned will
1780         * be the index into this page of the closing, matching {@code TagNode.}
1781         *
1782         * @throws TagNodeExpectedException If the node in the {@code Vector}-parameter
1783         * {@code 'html'} contained at index {@code 'nodeIndex'} is not an instance of
1784         * {@code TagNode}, then this exception is thrown.
1785         *
1786         * @throws OpeningTagNodeExpectedException If the node in the {@code Vector}-parameter 
1787         * {@code 'html'} at index {@code 'nodeIndex'} is a closing version of the HTML element,
1788         * then this exception shall throw.
1789         *
1790         * @throws InclusiveException If the node in {@code Vector}-parameter {@code 'html'},
1791         * pointed-to by index {@code 'nodeIndex'} is an HTML {@code 'Singleton'} / Self-Closing
1792         * Tag, then this exception will be thrown.
1793         *
1794         * @see TagNode
1795         * @see TagNode#tok
1796         * @see TagNode#isClosing
1797         * @see HTMLNode
1798         */
1799        public static int find(Vector<? extends HTMLNode> html, int nodeIndex)
1800        {
1801            TagNode     tn  = null;
1802            HTMLNode    n   = null;
1803            String      tok = null;
1804
1805            if (! html.elementAt(nodeIndex).isTagNode())
1806
1807                throw new TagNodeExpectedException (
1808                    "You have attempted to find a closing tag to match an opening one, " +
1809                    "but the 'nodeIndex' (" + nodeIndex + ") you have passed doesn't contain " +
1810                    "an instance of TagNode."
1811                );
1812
1813            else tn = (TagNode) html.elementAt(nodeIndex);
1814
1815            if (tn.isClosing) throw new OpeningTagNodeExpectedException(
1816                "The TagNode indicated by 'nodeIndex' = " + nodeIndex + " has its 'isClosing' " +
1817                "boolean as TRUE - this is not an opening TagNode, but it must be to continue."
1818            );
1819
1820            // Checks to ensure this token is not a 'self-closing' or 'singleton' tag.
1821            // If it is an exception shall throw.
1822            InclusiveException.check(tok = tn.tok);
1823
1824            int end         = html.size();
1825            int openCount   = 1;
1826
1827            for (int pos = (nodeIndex+1); pos < end; pos++)
1828
1829                if ((n = html.elementAt(pos)).isTagNode())
1830                    if ((tn = ((TagNode) n)).tok.equals(tok))
1831                    {
1832                        // This keeps a "Depth Count" - where "depth" is just the number of 
1833                        // opened tags, for which a matching, closing tag hasn't been found yet.
1834
1835                        openCount += (tn.isClosing ? -1 : 1);
1836
1837                        // When all open-tags of the specified HTML Element 'tok' have been
1838                        // found, search has finished.
1839
1840                        if (openCount == 0) return pos;
1841                    }
1842
1843            // The closing-matching tag was not found
1844            return -1;
1845        }
1846
1847        /**
1848         * Convenience Method.
1849         * <BR />Invokes: {@link #find(Vector, int)}
1850         * <BR />Converts: output to <B><CODE>'GET'</CODE></B> format ({@code Vector}-sublist)
1851         * <BR />Using: {@link Util#cloneRange(Vector, int, int)}
1852         */
1853        public static Vector<HTMLNode> get(Vector<? extends HTMLNode> html, int nodeIndex)
1854        { 
1855            int endPos = find(html, nodeIndex);
1856
1857            return (endPos == -1) ? null : cloneRange(html, nodeIndex, endPos + 1);
1858        }
1859
1860        /**
1861         * Convenience Method.
1862         * <BR />Invokes: {@link #find(Vector, int)}
1863         * <BR />Converts: output to <B><CODE>'PEEK'</CODE></B> format ({@code SubSection})
1864         * <BR />Using: {@link Util#cloneRange(Vector, int, int)}
1865         */
1866        public static SubSection peek(Vector<? extends HTMLNode> html, int nodeIndex)
1867        {
1868            int endPos = find(html, nodeIndex);
1869
1870            return (endPos == -1) ? null : new SubSection(
1871                new DotPair(nodeIndex, endPos),
1872                cloneRange(html, nodeIndex, endPos + 1)
1873            );
1874        }
1875
1876        /**
1877         * Convenience Method.
1878         * <BR />Invokes: {@link #find(Vector, int)}
1879         * <BR />Converts: output to <B><CODE>'POLL'</CODE></B> format ({@code Vector}-sublist),
1880         * <BR />Using: {@link Util#pollRange(Vector, int, int)}
1881         * <BR />Removes: The requested Sub-List
1882         */
1883        public static Vector<HTMLNode> poll(Vector<? extends HTMLNode> html, int nodeIndex)
1884        {
1885            int endPos = find(html, nodeIndex);
1886
1887            return (endPos == -1) ? null : pollRange(html, nodeIndex, endPos + 1);
1888        }
1889
1890        /**
1891         * Convenience Method.
1892         * <BR />Invokes: {@link #find(Vector, int)}
1893         * <BR />Converts: output to <B><CODE>'REMOVE'</CODE></B> format ({@code int} - number
1894         * of nodes removed)
1895         * <BR />Using: {@link Remove#range(Vector, int, int)}
1896         * <BR />Removes: The requested Sub-List
1897         */
1898        public static int remove(Vector<? extends HTMLNode> html, int nodeIndex)
1899        {
1900            int endPos = find(html, nodeIndex);
1901
1902            return (endPos == -1) ? 0 : Util.Remove.range(html, nodeIndex, endPos + 1);
1903        }
1904
1905
1906        // ****************************************************************************************
1907        // ****************************************************************************************
1908        // Optimized Methods, Inclusive Find/Get/Subsection
1909        // ****************************************************************************************
1910        // ****************************************************************************************
1911
1912        /**
1913         * Convenience Method.  
1914         * <BR />Invokes: {@link #dotPairOPT(Vector, int)}
1915         * <BR />Converts: output to {@code Vector<HTMLNode>}
1916         */
1917        public static Vector<HTMLNode> vectorOPT(Vector<? extends HTMLNode> html, int tagPos)
1918        {
1919            DotPair dp = dotPairOPT(html, tagPos);
1920
1921            if (dp == null) return null;
1922            else            return Util.cloneRange(html, dp.start, dp.end + 1);
1923        }
1924
1925        /**
1926         * Convenience Method.
1927         * <BR />Invokes: {@link #dotPairOPT(Vector, int)}
1928         * <BR />Converts: output to {@code SubSection}
1929         */
1930        public static SubSection subSectionOPT(Vector<? extends HTMLNode> html, int tagPos)
1931        {
1932            DotPair dp = dotPairOPT(html, tagPos);
1933
1934            if (dp == null) return null;
1935            else            return new SubSection(dp, Util.cloneRange(html, dp.start, dp.end + 1));
1936        }
1937
1938        /**
1939         * 
1940         * <EMBED CLASS='external-html' DATA-FILE-ID=UTILIOPT>
1941         * <!-- Inclusive Opt Description -->
1942         * 
1943         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1944         * 
1945         * @param tagPos <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTTP>
1946         * 
1947         * @return A <B>'DotPair'</B> version of an inclusive, end-to-end HTML tag-element.
1948         * 
1949         * <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTJSN> 
1950         * <!-- Note on JS-DOM Tree innerHTML -->
1951         * 
1952         * @see TagNode
1953         * @see TagNode#isClosing
1954         * @see TagNode#tok
1955         * @see DotPair
1956         */
1957        public static DotPair dotPairOPT(Vector<? extends HTMLNode> html, int tagPos)
1958        {
1959            // Temp Variables
1960            HTMLNode n;     TagNode tn;     int openCount = 1;
1961
1962            int len = html.size();
1963
1964            // This is the name (token) of the "Opening HTML Element", we are searching for
1965            // the matching, closing element
1966
1967            String tok = ((TagNode) html.elementAt(tagPos)).tok;
1968
1969            for (int i = (tagPos+1); i < len; i++)
1970
1971                if ((n = html.elementAt(i)).isTagNode())
1972                    if ((tn = (TagNode) n).tok.equals(tok))
1973                    {
1974                        // This keeps a "Depth Count" - where "depth" is just the number of 
1975                        // opened tags, for which a matching, closing tag hasn't been found yet.
1976
1977                        openCount += (tn.isClosing ? -1 : 1);
1978
1979                        // When all open-tags of the specified HTML Element 'tok' have been
1980                        // found, search has finished.
1981
1982                        if (openCount == 0) return new DotPair(tagPos, i);
1983                    }
1984
1985            // Was not found
1986            return null;
1987        }
1988
1989        /**
1990         * Convenience Method.
1991         * <BR />Invokes: {@link #dotPairOPT(Vector, int, int)}
1992         * <BR />Converts: output to {@code Vector<HTMLNode>}
1993         */
1994        public static Vector<HTMLNode> vectorOPT
1995            (Vector<? extends HTMLNode> html, int tagPos, int end)
1996        {
1997            DotPair dp = dotPairOPT(html, tagPos, end);
1998
1999            if (dp == null) return null;
2000            else            return Util.cloneRange(html, dp.start, dp.end + 1);
2001        }
2002
2003        /**
2004         * Convenience Method.
2005         * <BR />Invokes: {@link #dotPairOPT(Vector, int, int)}
2006         * <BR />Converts: output to {@code SubSection}
2007        */
2008        public static SubSection subSectionOPT
2009            (Vector<? extends HTMLNode> html, int tagPos, int end)
2010        {
2011            DotPair dp = dotPairOPT(html, tagPos, end);
2012
2013            if (dp == null) return null;
2014            else            return new SubSection(dp, Util.cloneRange(html, dp.start, dp.end + 1));
2015        }
2016
2017        /**
2018         * 
2019         * <EMBED CLASS='external-html' DATA-FILE-ID=UTILIOPT>
2020         * <!-- Inclusive Opt Description -->
2021         * 
2022         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
2023         * 
2024         * @param tagPos <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTTP>
2025         * 
2026         * @param end <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTEND>
2027         * 
2028         * @return A <B>'DotPair'</B> version of an inclusive, end-to-end HTML tag-element.
2029         * 
2030         * <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTJSN>
2031         * <!-- Note on JS-DOM Tree innerHTML -->
2032         * 
2033         * @see TagNode
2034         * @see TagNode#isClosing
2035         * @see TagNode#tok
2036         * @see DotPair
2037         */
2038        public static DotPair dotPairOPT(Vector<? extends HTMLNode> html, int tagPos, int end)
2039        {
2040            // Temp Variables
2041            HTMLNode n;     TagNode tn;     int openCount = 1;      int endPos;
2042
2043            // This is the name (token) of the "Opening HTML Element", we are searching for
2044            // the matching, closing element
2045            String tok = ((TagNode) html.elementAt(tagPos)).tok;
2046
2047            for (endPos = (tagPos+1); endPos < end; endPos++)
2048
2049                if ((n = html.elementAt(endPos)).isTagNode())
2050                    if ((tn = (TagNode) n).tok.equals(tok))
2051                    {
2052                        // This keeps a "Depth Count" - where "depth" is just the number of
2053                        // opened tags, for which a matching, closing tag hasn't been found yet.
2054                        openCount += (tn.isClosing ? -1 : 1);
2055
2056                        // When all open-tags of the specified HTML Element 'tok' have been
2057                        // found, search has finished.
2058                        if (openCount == 0) return new DotPair(tagPos, endPos);
2059                    }
2060
2061            // The end of the vectorized-html page (or subsection) was reached, but the
2062            // matching-closing element was not found.
2063            return null; // assert(endPos == html.size());
2064        }
2065    }
2066}