001package Torello.HTML;
002
003import java.util.*;
004import java.util.regex.*;
005import java.util.stream.*;
006
007import java.util.function.Predicate;
008
009import Torello.HTML.NodeSearch.*;
010import Torello.Java.*;
011
012import Torello.Java.Shell.C;
013
014/**
015 * A long list of utilities for searching, finding, extracting and removing HTML from 
016 * Vectorized-HTML.
017 * 
018 * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID="UTIL">
019 */
020@Torello.HTML.Tools.JavaDoc.StaticFunctional
021public class Util
022{
023    private Util() { }
024
025
026    // ***************************************************************************************
027    // ***************************************************************************************
028    // Trim TextNode Strings
029    // ***************************************************************************************
030    // ***************************************************************************************
031
032
033    /**
034     * Convenience Method.
035     * <BR />Invokes: {@link #trimTextNodes(Vector, int, int, boolean)}
036     */
037    public static int trimTextNodes(Vector<HTMLNode> page, boolean deleteZeroLengthStrings)
038    { return trimTextNodes(page, 0, -1, deleteZeroLengthStrings); }
039
040    /**
041     * Convenience Method.
042     * <BR />Receives: {@code DotPair}
043     * <BR />Invokes: {@link #trimTextNodes(Vector, int, int, boolean)}
044     */
045    public static int trimTextNodes(Vector<HTMLNode> page, DotPair dp, boolean deleteZeroLengthStrings)
046    { return trimTextNodes(page, dp.start, dp.end + 1, deleteZeroLengthStrings); }
047
048    /**
049     * This will iterate through the entire {@code Vector<HTMLNode>}, and invoke
050     * {@code java.lang.String.trim()} on each {@code TextNode} on the page.  If this invocation
051     * results in a reduction of {@code String.length()}, then a new {@code TextNode} will be
052     * instantiated whose {@code TextNode.str} field is set to the result of the
053     * {@code String.trim(old_node.str)} operation.
054     * 
055     * @param deleteZeroLengthStrings If a {@code TextNode's} length is zero (before or after
056     * {@code trim()} is called) and when this parameter is <B>TRUE</B>, that {@code TextNode} must
057     * be removed from the {@code Vector}.
058     * 
059     * @return Any node that is trimmed or deleted will increment the counter.  This counter
060     * final-value is returned
061     */
062    public static int trimTextNodes
063        (Vector<HTMLNode> page, int sPos, int ePos, boolean deleteZeroLengthStrings)
064    {
065        int                 counter = 0;
066        IntStream.Builder   b       = deleteZeroLengthStrings ? IntStream.builder() : null;
067        HTMLNode            n       = null;
068        LV                  l       = new LV(page, sPos, ePos);
069
070        for (int i=l.start; i < l.end; i++)
071
072            if ((n = page.elementAt(i)).isTextNode())
073            {
074                String  trimmed         = n.str.trim();
075                int     trimmedLength   = trimmed.length();
076
077                if ((trimmedLength == 0) && deleteZeroLengthStrings)
078                    { b.add(i); counter++; }
079
080                else if (trimmedLength < n.str.length())
081                    { page.setElementAt(new TextNode(trimmed), i); counter++; }
082            }
083
084        if (deleteZeroLengthStrings) removeNodesOPT(page, b.build().toArray());
085
086        return counter;
087    }
088
089
090    // ***************************************************************************************
091    // ***************************************************************************************
092    // Inclusive-Empty Removal Operations
093    // ***************************************************************************************
094    // ***************************************************************************************
095
096
097    /**
098     * Convenience Method.
099     * <BR />Invokes: {@link #removeInclusiveEmpty(Vector, int, int, String[])}
100     */
101    public static int removeInclusiveEmpty(Vector<HTMLNode> page, String... htmlTags)
102    { return removeInclusiveEmpty(page, 0, -1, htmlTags); }
103
104    /**
105     * Convenience Method.
106     * <BR />Receives: {@code DotPair}
107     * <BR />Invokes: {@link #removeInclusiveEmpty(Vector, int, int, String[])}
108     */
109    public static int removeInclusiveEmpty(Vector<HTMLNode> page, DotPair dp, String... htmlTags)
110    { return removeInclusiveEmpty(page, dp.start, dp.end + 1, htmlTags); }
111
112    /**
113     * This will do an "Inclusive Search" using the standard {@code class TagNodeInclusiveIterator}
114     * in the {@code package NodeSearch}.  Then it will inspect the contents of the subsections.
115     * Any subsections that do not contain any instances of {@code HTMLNode} in between them, or
116     * any subsections that only contain "blank-text" (white-space) between them shall be removed.
117     * 
118     * <BR /><BR /><B><SPAN STYLE="color: red;">IMPORTANT:</B></SPAN> The search logic shall
119     * perform multiple <I><B>recursive iterations</B></I> of itself, such that if, for instance,
120     * the user requested that all empty HTML divider ({@code <DIV>}) elements be removed, if after
121     * removing a set a dividers resulted in more empty ones (nested {@code <DIV>} elements), then
122     * an additional removal shall be called.  <I>This recursion shall continue until there are no
123     * empty HTML elements of the types listed by</I> {@code 'htmlTags'}
124     *
125     * @param page Any vectorized-html page or sub-page.
126     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
127     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
128     * 
129     * @param htmlTags The list of <I>inclusive</I> (non-singleton) html elements to search for
130     * possibly being empty container tags.
131     * 
132     * @return The number of {@code HTMLNode's} that were removed.
133     * 
134     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
135     */
136    public static int removeInclusiveEmpty
137        (Vector<HTMLNode> page, int sPos, int ePos, String... htmlTags)
138    {
139        DotPair         subList;
140        int             removed = 0;
141        HNLIInclusive   iter    = TagNodeInclusiveIterator.iter(page, htmlTags);
142        LV              l       = new LV(page, sPos, ePos);
143
144        iter.restrictCursor(l);
145
146        TOP:
147        while (iter.hasNext())
148
149            // If there is only the opening & closing pair, with nothing in between,
150            // then the pair must be removed because it is "Empty" (Inclusive Empty)
151
152            if ((subList = iter.nextDotPair()).size() == 2)
153            {
154                iter.remove();
155                ePos -= subList.size();
156                removed += subList.size();
157            }
158
159            else
160            {
161                // If there is any TagNode in between the start-end pair, then this is NOT EMPTY
162                // In this case, skip to the next start-end opening-closing pair.
163
164                for (int i=(subList.start + 1); i < subList.end; i++)
165                    if (! page.elementAt(i).isTextNode())
166                        continue TOP;
167
168                // If there were only TextNode's between an opening-closing TagNode Pair....
169                // **AND** those TextNode's are only white-space, then this also considered
170                // Inclusively Empty.  (Get all TextNode's, and if .trim() reduces the length()
171                // to zero, then it was only white-space.
172
173                if (Util.textNodesString(page, subList).trim().length() == 0)
174                {
175                    iter.remove();
176                    ePos -= subList.size();
177                    removed += subList.size();
178                }
179            }
180
181        // This process must be continued recursively, because if any inner, for instance,
182        // <DIV> ... </DIV> was removed, then the outer list must be re-checked...
183
184        if (removed > 0)
185            return removed + removeInclusiveEmpty(page, sPos, ePos, htmlTags);
186        else
187            return 0;
188    }
189
190
191    // ***************************************************************************************
192    // ***************************************************************************************
193    // Vectorized-HTML To-String Methods
194    // ***************************************************************************************
195    // ***************************************************************************************
196
197
198    /** 
199     * Convenience Method.
200     * <BR />Invokes: {@link #rangeToString(Vector, int, int)}
201     */
202    public static String pageToString(Vector<? extends HTMLNode> html)
203    { return rangeToString(html, 0, -1); }
204
205    /**
206     * Convenience Method.
207     * <BR />Receives: {@code DotPair}
208     * <BR />Invokes: {@link #rangeToString(Vector, int, int)}
209     */
210    public static String rangeToString(Vector<? extends HTMLNode> html, DotPair dp)
211    { return rangeToString(html, dp.start, dp.end + 1); }
212
213    /**
214     * The purpose of this method/function is to convert a portion of the contents of an HTML-Page,
215     * currently being represented as a {@code Vector} of {@code HTMLNode's} into a {@code String.}
216     * Two {@code 'int'} parameters are provided in this method's signature to define a sub-list
217     * of a page to be converted to a {@code java.lang.String}
218     * 
219     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
220     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
221     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
222     * 
223     * @return The {@code Vector} converted into a {@code String}.
224     * 
225     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
226     * 
227     * @see #pageToString(Vector)
228     * @see #rangeToString(Vector, DotPair)
229     */
230    public static String rangeToString(Vector<? extends HTMLNode> html, int sPos, int ePos)
231    {
232        StringBuilder   ret = new StringBuilder();
233        LV              l   = new LV(html, sPos, ePos);
234
235        for (int i=l.start; i < l.end; i++) ret.append(html.elementAt(i).str);
236
237        return ret.toString();
238    }
239
240
241    // ***************************************************************************************
242    // ***************************************************************************************
243    // Vectorized-HTML TextNode To-String Methods
244    // ***************************************************************************************
245    // ***************************************************************************************
246
247
248    /**
249     * Convenience Method.
250     * <BR />Invokes: {@link #textNodesString(Vector, int, int)}
251     */
252    public static String textNodesString(Vector<? extends HTMLNode> html)
253    { return textNodesString(html, 0, -1); }
254
255    /**
256     * Convenience Method.
257     * <BR />Receives: {@code DotPair}
258     * <BR />Invokes: {@link #textNodesString(Vector, int, int)}
259     */
260    public static String textNodesString(Vector<? extends HTMLNode> html, DotPair dp)
261    { return textNodesString(html, dp.start, dp.end + 1); }
262
263    /**
264     * This will return a {@code String} that is comprised of ONLY the {@code TextNode's} contained
265     * within the input {@code Vector} - <I>and furthermore, only nodes that are situated between
266     * index {@code int 'sPos'} and index {@code int 'ePos'} in that {@code Vector.}</I>
267     * 
268     * <BR /><BR />The {@code for-loop} that iterates the input-{@code Vector} parameter will
269     * simply skip an instance of {@code 'TagNode'} and {@code 'CommentNode'} when building the
270     * output return {@code String.}.
271     * 
272     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
273     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
274     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
275     * 
276     * @return This will return a {@code String} that is comprised of the text-only elements in the
277     * web-page or sub-page.  Only text between the requested {@code Vector}-indices is included.
278     * 
279     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
280     * 
281     * @see #textNodesString(Vector, DotPair)
282     * @see #textNodesString(Vector)
283     */
284    public static String textNodesString(Vector<? extends HTMLNode> html, int sPos, int ePos)
285    {
286        StringBuilder   sb  = new StringBuilder();
287        LV              l   = new LV(html, sPos, ePos);
288        HTMLNode        n;
289
290        for (int i=l.start; i < l.end; i++)
291            if ((n = html.elementAt(i)).isTextNode())
292                sb.append(n.str);
293
294        return sb.toString();
295    }
296
297
298    // ***************************************************************************************
299    // ***************************************************************************************
300    // TextNode Removal Operations
301    // ***************************************************************************************
302    // ***************************************************************************************
303
304
305    /**
306     * Convenience Method.
307     * <BR />Invokes: {@link #removeAllTextNodes(Vector, int, int)}
308     */
309    public static int removeAllTextNodes(Vector<HTMLNode> page)
310    { return removeAllTextNodes(page, 0, -1); }
311
312    /**
313     * Convenience Method.
314     * <BR />Receives: {@code DotPair}
315     * <BR />Invokes: {@link #removeAllTextNodes(Vector, int, int)}
316     */
317    public static int removeAllTextNodes(Vector<HTMLNode> page, DotPair dp)
318    { return removeAllTextNodes(page, dp.start, dp.end + 1); }
319
320    /**
321     * Takes a sub-section of an HTML {@code Vector} and removes all {@code TextNode} present
322     * 
323     * @param page Any HTML page
324     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
325     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
326     * 
327     * @return The number of HTML {@code TextNode's} that were removed
328     * 
329     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
330     * 
331     * @see TextNode
332     * @see #removeNodesOPT(Vector, int[])
333     */
334    public static int removeAllTextNodes(Vector<HTMLNode> page, int sPos, int ePos)
335    {
336        IntStream.Builder   b = IntStream.builder();
337        LV                  l = new LV(page, sPos, ePos);
338
339        // Use Java-Streams to build the list of nodes that are valid text-nodes.
340        for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTextNode()) b.add(i);
341
342        // Build the stream and convert it to an int[] (integer-array)
343        int[] posArr = b.build().toArray();
344
345        // The integer array is guaranteed to be sorted, and contain valid vector-indices.
346        removeNodesOPT(page, posArr);
347
348        return posArr.length;
349    }
350
351
352    // ***************************************************************************************
353    // ***************************************************************************************
354    // TagNode Removal Operations
355    // ***************************************************************************************
356    // ***************************************************************************************
357
358
359    /**
360     * Convenience Method.
361     * <BR />Invokes: {@link #removeAllTagNodes(Vector, int, int)}
362     */
363    public static int removeAllTagNodes(Vector<HTMLNode> page) 
364    { return removeAllTagNodes(page, 0, -1); }
365
366    /**
367     * Convenience Method.
368     * <BR />Receives: {@code DotPair} 
369     * <BR />Invokes: {@link #removeAllTagNodes(Vector, int, int)}
370     */
371    public static int removeAllTagNodes(Vector<HTMLNode> page, DotPair dp)
372    { return removeAllTagNodes(page, dp.start, dp.end + 1); }
373
374    /**
375     * Takes a sub-section of an HTML {@code Vector} and removes all {@code TagNode} present
376     * 
377     * @param page Any HTML page
378     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
379     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
380     * 
381     * @return The number of HTML {@code TagNode's} that were removed
382     * 
383     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
384     * 
385     * @see TagNode
386     * @see #removeNodesOPT(Vector, int[])
387     */
388    public static int removeAllTagNodes(Vector<HTMLNode> page, int sPos, int ePos)
389    {
390        IntStream.Builder   b = IntStream.builder();
391        LV                  l = new LV(page, sPos, ePos);
392
393        // Use Java-Streams to build the list of nodes that are valid tag-nodes.
394        for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTagNode()) b.add(i);
395
396        // Build the stream and convert it to an int[] (integer-array)
397        int[] posArr = b.build().toArray();
398
399        // The integer array is guaranteed to be sorted, and contain valid vector-indices.
400        removeNodesOPT(page, posArr);
401
402        return posArr.length;
403    }
404
405
406    // ***************************************************************************************
407    // ***************************************************************************************
408    // CommentNode Removal Operations
409    // ***************************************************************************************
410    // ***************************************************************************************
411
412
413    /**
414     * Convenience Method.
415     * <BR />Invokes: {@link #removeAllCommentNodes(Vector, int, int)}
416     */
417    public static int removeAllCommentNodes(Vector<HTMLNode> page)
418    { return removeAllCommentNodes(page, 0, -1); }
419
420    /**
421     * Convenience Method.
422     * <BR />Receives: {@code DotPair}
423     * <BR />Invokes: {@link #removeAllCommentNodes(Vector, int, int)}
424     */
425    public static int removeAllCommentNodes(Vector<HTMLNode> page, DotPair dp)
426    { return removeAllCommentNodes(page, dp.start, dp.end + 1); }
427
428    /**
429     * Takes a sub-section of an HTML {@code Vector} and removes all {@code CommentNode} present
430     * 
431     * @param page Any HTML page
432     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
433     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
434     * 
435     * @return The number of HTML {@code CommentNode's} that were removed
436     * 
437     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
438     * 
439     * @see CommentNode
440     * @see #removeNodesOPT(Vector, int[])
441     */
442    public static int removeAllCommentNodes(Vector<HTMLNode> page, int sPos, int ePos)
443    {
444        IntStream.Builder   b       = IntStream.builder();
445        LV                  l       = new LV(page, sPos, ePos);
446
447        // Use Java-Streams to build the list of nodes that are valid comment-nodes.
448        for (int i=l.start; i < l.end; i++)
449            if (page.elementAt(i).isCommentNode())
450                b.add(i);
451
452        // Build the stream and convert it to an int[] (integer-array)
453        int[] posArr = b.build().toArray();
454
455        // The integer array is guaranteed to be sorted, and contain valid vector-indices.
456        removeNodesOPT(page, posArr);
457
458        return posArr.length; 
459    }
460
461
462    // ***************************************************************************************
463    // ***************************************************************************************
464    // TextNode Modification Operations - "Escape Text Nodes"
465    // ***************************************************************************************
466    // ***************************************************************************************
467
468
469    /**
470     * Convenience Method.
471     * <BR />Invokes: {@link #escapeTextNodes(Vector, int, int)}
472     */
473    public static int escapeTextNodes(Vector<HTMLNode> html)
474    { return escapeTextNodes(html, 0, -1); }
475
476    /**
477     * Convenience Method.
478     * <BR />Receives: {@code DotPair} 
479     * <BR />Invokes: {@link #escapeTextNodes(Vector, int, int)}
480     */
481    public static int escapeTextNodes(Vector<HTMLNode> html, DotPair dp)
482    { return escapeTextNodes(html, dp.start, dp.end + 1); }
483
484    /**
485     * Will call {@code HTML.Escape.replaceAll} on each {@code TextNode} in the range of
486     * {@code sPos ... ePos}
487     * 
488     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
489     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
490     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
491     * 
492     * @return The number of {@code TextNode's} that changed as a result of the
493     * {@code Escape.replaceAll(n.str)} loop.
494     * 
495     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
496     * 
497     * @see Escape#replaceAll(String)
498     */
499    public static int escapeTextNodes(Vector<HTMLNode> html, int sPos, int ePos)
500    {
501        LV          l       = new LV(html, sPos, ePos);
502        HTMLNode    n       = null;
503        String      s       = null;
504        int         counter = 0;
505
506        for (int i=l.start; i < l.end; i++)
507
508            if ((n = html.elementAt(i)).isTextNode())
509                if (! (s = Escape.replace(n.str)).equals(n.str))
510                {
511                    html.setElementAt(new TextNode(s), i);
512                    counter++;
513                }
514
515        return counter;
516    }
517
518
519    // ***************************************************************************************
520    // ***************************************************************************************
521    // Clone HTML Vectors
522    // ***************************************************************************************
523    // ***************************************************************************************
524
525
526    /**
527     * Convenience Method.
528     * <BR />Invokes: {@link #cloneRange(Vector, int, int)}
529     */
530    public static Vector<HTMLNode> clone(Vector<? extends HTMLNode> html)
531    { return cloneRange(html, 0, -1); }
532
533    /**
534     * Convenience Method.
535     * <BR />Receives: {@code DotPair}
536     * <BR />Invokes: {@link #cloneRange(Vector, int, int)}
537     */
538    public static Vector<HTMLNode> cloneRange(Vector<? extends HTMLNode> html, DotPair dp)
539    { return cloneRange(html, dp.start, dp.end + 1); }
540
541    /**
542     * Copies (clones!) a sub-range of the HTML page, stores the results in a {@code Vector}, and
543     * returns it.
544     * 
545     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
546     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
547     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
548     * 
549     * @return The "cloned" (copied) sub-range specified by {@code 'sPos'} and {@code 'ePos'.}
550     * 
551     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
552     * 
553     * @see #cloneRange(Vector, DotPair)
554     */
555    public static Vector<HTMLNode> cloneRange(Vector<? extends HTMLNode> html, int sPos, int ePos)
556    {
557        LV                  l   = new LV(html, sPos, ePos);
558        Vector<HTMLNode>    ret = new Vector<>(l.size());
559
560        // Copy the range specified into the return vector
561        //
562        // HOW THIS WAS DONE BEFORE NOTICING Vector.subList
563        //
564        // for (int i = l.start; i < l.end; i++) ret.addElement(html.elementAt(i));
565
566        ret.addAll(html.subList(l.start, l.end));
567
568        return ret;
569    }
570
571
572    // ***************************************************************************************
573    // ***************************************************************************************
574    // Remove All Inner Tags
575    // ***************************************************************************************
576    // ***************************************************************************************
577
578
579    /**
580     * Convenience Method.
581     * <BR />Invokes: {@link #removeAllInnerTags(Vector, int, int)}
582     */
583    public static int removeAllInnerTags(Vector<HTMLNode> html)
584    { return removeAllInnerTags(html, 0, -1); }
585
586    /**
587     * Convenience Method.
588     * <BR />Receives: {@code DotPair}
589     * <BR />Invokes: {@link #removeAllInnerTags(Vector, int, int)}
590     */
591    public static int removeAllInnerTags(Vector<? super TagNode> html, DotPair dp)
592    { return removeAllInnerTags(html, dp.start, dp.end + 1); }
593
594    /**
595     * This method removes all inner-tags (all attributes) from every {@code TagNode} inside of an
596     * HTML page.  It does this by replacing every {@code TagNode} in the {@code Vector} with the
597     * pre-instantiated, publicly-available {@code TagNode} which can be obtained by a call to the
598     * class {@code HTMLTags.hasTag(token, TC)}.
599     * 
600     * <BR /><BR /><B>NOTE:</B> This method determines whether a fresh {@code TagNode} is to be
601     * inserted by measuring the length of the internal {@code TagNode.str} (a {@code String})
602     * field. If {@code TagNode.str.length()} is not equal to the HTML token {@code TagNode.tok}
603     * length <B>plus 2</B>, then a fresh, pre-instantiated, node is replaced.  The {@code '+2'}
604     * figure comes from the additional characters {@code '<'} and {@code '>'} that start and end
605     * every HTML {@code TagNode}
606     * 
607     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
608     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
609     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
610     * 
611     * @return The number of {@code TagNode} elements that have were replaced with zero-attribute
612     * HTML Element Tags.
613     * 
614     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
615     *
616     * @throws ClassCastException If {@code 'html'} contains references that do not inherit
617     * {@code HTMLNode}.
618     */
619    @SuppressWarnings("unchecked")
620    public static int removeAllInnerTags(Vector<? super TagNode> html, int sPos, int ePos)
621    {
622        int     ret = 0;
623        LV      l   = new LV(sPos, ePos, html);
624        TagNode tn;
625
626        for (int i = (l.end-1); i >= l.start; i--)
627
628            if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null)
629
630            {
631                ret++;
632
633                // HTMLTags.hasTag(tok, TC) gets an empty and pre-instantiated TagNode,
634                // where TagNode.tok == 'tn.tok' and TagNode.isClosing = false
635
636                html.setElementAt(HTMLTags.hasTag(tn.tok, TC.OpeningTags), i);
637            }
638
639        return ret;
640    }
641
642
643    // ***************************************************************************************
644    // ***************************************************************************************
645    // String Length of the TextNode's
646    // ***************************************************************************************
647    // ***************************************************************************************
648
649
650    /**
651     * Convenience Method.
652     * <BR />Receives: {@code DotPair}
653     * <BR />Invokes: {@link #textStrLength(Vector, int, int)}
654     */
655    public static int textStrLength(Vector<? extends HTMLNode> html, DotPair dp)
656    { return textStrLength(html, dp.start, dp.end + 1); }
657
658    /**
659     * Convenience Method.
660     * <BR />Invokes: {@link #textStrLength(Vector, int, int)}
661     */
662    public static int textStrLength(Vector<? extends HTMLNode> html)
663    { return textStrLength(html, 0, -1); }
664
665    /**
666     * This method will return the length of the strings <I><B>contained by all/only instances of
667     * {@code 'TextNode'}</B></I> among the nodes of the input HTML-{@code Vector}.   This is
668     * identical to the behavior of the method with the same name, but includes starting and ending
669     * bounds on the html {@code Vector}: {@code 'sPos'} &amp; {@code 'ePos'}.
670     * 
671     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
672     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
673     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
674     * 
675     * @return The sum of the lengths of the text contained by text-nodes in the {@code Vector} 
676     * between {@code 'sPos'} and {@code 'ePos'}.
677     * 
678     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
679     */
680    public static int textStrLength(Vector<? extends HTMLNode> html, int sPos, int ePos)
681    {
682        HTMLNode    n;
683        int         sum = 0;
684        LV          l   = new LV(html, sPos, ePos);
685
686        // Counts the length of each "String" in a "TextNode" between sPos and ePos
687        for (int i=l.start; i < l.end; i++)
688
689            if ((n = html.elementAt(i)).isTextNode())
690                sum += n.str.length();
691
692        return sum;
693    }
694
695
696    // ***************************************************************************************
697    // ***************************************************************************************
698    // Compact Adjacent / Adjoining TextNode's
699    // ***************************************************************************************
700    // ***************************************************************************************
701
702
703    /**
704     * Convenience Method.
705     * <BR />Invokes: {@link #compactTextNodes(Vector, int, int)}
706     */
707    public static int compactTextNodes(Vector<HTMLNode> html)
708    { return compactTextNodes(html, 0, html.size()); }
709
710    /**
711     * Convenience Method.
712     * <BR />Receives: {@code DotPair}
713     * <BR />Invokes: {@link #compactTextNodes(Vector, int, int)} 
714     */
715    public static int compactTextNodes(Vector<HTMLNode> html, DotPair dp)
716    { return compactTextNodes(html, dp.start, dp.end + 1); }     
717
718    /**
719     * Occasionally, when removing instances of {@code TagNode} from a vectorized-html 
720     * page, certain instances of {@code TextNode} which were not adjacent / neighbours in
721     * the {@code Vector}, all of a sudden become adjacent.  Although there are no major problems
722     * with contiguous instances of {@code TextNode} from the Search Algorithm's perspective,
723     * for programmer's, it can sometimes be befuddling to realize that the output text that
724     * is returned from a call to {@code Util.pageToString(html)} is not being found because
725     * the text that is left is broken amongst multiple instances of adjacent TextNodes.
726     *
727     * <BR /><BR />This method merely combines "Adjacent" instances of {@code class TextNode}
728     * in the {@code Vector} into single instances of {@code class TextNode}
729     *
730     * @param html Any vectorized-html web-page.  If this page contain any contiguously placed
731     * {@code TextNode's}, the extra's will be eliminated, and the internal-string's inside the
732     * node's ({@code TextNode.str}) will be combined.  This action will reduce the size of the
733     * actual html-{@code Vector}.
734     * 
735     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
736     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
737     * 
738     * @return The number of nodes that were eliminated after being combined, or 0 if there
739     * were no text-nodes that were removed.
740     * 
741     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
742     * 
743     * @see HTMLNode#str
744     * @see TextNode
745     */
746    public static int compactTextNodes(Vector<HTMLNode> html, int sPos, int ePos)
747    {
748        LV      l           = new LV(html, sPos, ePos);
749        boolean compacting  = false;
750        int     firstPos    = -1;
751        int     delta       = 0;
752
753        for (int i=l.start; i < (l.end - delta); i++)
754
755            if (html.elementAt(i).isTextNode())         // Is a TextNode
756            {
757                if (compacting) continue;               // Not in "Compacting Mode"
758                compacting = true;  firstPos = i;       // Start "Compacting Mode" - this is a TextNode
759            }
760
761            else if (compacting && (firstPos < (i-1)))  // Else - Must be a TagNode or CommentNode
762            {
763                // Save compacted TextNode String's into this StringBuilder
764                StringBuilder compacted = new StringBuilder();
765
766                // Iterate all TextNodes that were adjacent, put them together into StringBuilder
767                for (int j=firstPos; j < i; j++) compacted.append(html.elementAt(j).str);
768
769                // Place this new "aggregate TextNode" at location of the first TextNode that
770                // was compacted into this StringBuilder
771
772                html.setElementAt(new TextNode(compacted.toString()), firstPos);
773
774                // Remove the rest of the positions in the Vector that had TextNode's.  These have
775                // all been put together into the "Aggregate TextNode" at position "firstPos"
776
777                Util.removeRange(html, firstPos + 1, i);
778
779                // The change in the size of the Vector needs to be accounted for.
780                delta += (i - firstPos - 1);
781
782                // Change the loop-counter variable, too, since the size of the Vector has changed.
783                i = firstPos + 1;
784
785                // Since we just hit a CommentNode, or TagNode, exit "Compacting Mode."
786                compacting = false;
787
788            }
789
790            // NOTE: This, ALSO, MUST BE a TagNode or CommentNode (just like the previous
791            //       if-else branch !)
792            // TRICKY: Don't forget this 'else' !
793
794            else compacting = false;
795
796        // Added - Don't forget the case where the Vector ends with a series of TextNodes
797        // TRICKY TOO! (Same as the HTML Parser... The ending or 'trailing' nodes must be parsed
798
799        int lastNodePos = html.size() - 1;
800
801        if (html.elementAt(lastNodePos).isTextNode()) if (compacting && (firstPos < lastNodePos))
802        {
803            StringBuilder compacted = new StringBuilder();
804
805            // Compact the TextNodes that were identified at the end of the Vector range.
806            for (int j=firstPos; j <= lastNodePos; j++) compacted.append(html.elementAt(j).str);
807
808            // Replace the group of TextNode's at the end of the Vector, with the single, aggregate
809            html.setElementAt(new TextNode(compacted.toString()), firstPos);
810            Util.removeRange(html, firstPos + 1, lastNodePos + 1);
811        }
812
813        return delta;
814    }
815
816
817    // ***************************************************************************************
818    // ***************************************************************************************
819    // Count New Lines
820    // ***************************************************************************************
821    // ***************************************************************************************
822
823
824    /**
825     * Convenience Method.
826     * <BR />Invokes: {@link #countNewLines(Vector, int, int)}
827     */
828    public static int countNewLines(Vector<? extends HTMLNode> html)
829    { return countNewLines(html, 0, -1); }
830
831    /**
832     * Convenience Method.
833     * <BR />Receives: {@code DotPair}
834     * <BR />Invokes: {@link #countNewLines(Vector, int, int)} 
835     */
836    public static int countNewLines(Vector<? extends HTMLNode> html, DotPair dp)
837    { return countNewLines(html, dp.start, dp.end + 1); }
838
839
840    /**
841     * This will count the number of new-line symbols present <B><I>- on the partial HTML
842     * page</I></B>. The count will include a sum of every {@code HTMLNode.str} that
843     * contains the standard new-line symbols: {@code \r\n, \r, \n}, meaning that UNIX, MSFT,
844     * Apple, etc. forms of text-line rendering should all be treated equally.
845     * 
846     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
847     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
848     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
849     * 
850     * @return The number of new-line characters in all of the {@code HTMLNode's} that occur
851     * between vectorized-page positions {@code 'sPos'} and {@code 'ePos.'}
852     * 
853     * <BR /><BR /><B>NOTE:</B> The regular-expression used here 'NEWLINEP' is as follows:
854     * 
855     * <DIV CLASS="SNIP">{@code
856     * private static final Pattern NEWLINEP = Pattern.compile("\\r\\n|\\r|\\n");
857     * }</DIV>
858     * 
859     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
860     * 
861     * @see StringParse#NEWLINEP
862     */
863    public static int countNewLines(Vector<? extends HTMLNode> html, int sPos, int ePos)
864    {
865        int newLineCount    = 0;
866        LV  l               = new LV(html, sPos, ePos);
867
868        for (int i=l.start; i < l.end; i++)
869
870            // Uses the Torello.Java.StringParse "New Line RegEx"
871            for (   Matcher m = StringParse.NEWLINEP.matcher(html.elementAt(i).str);
872                    m.find();
873                    newLineCount++);
874
875        return newLineCount;
876    }
877
878
879    // ***************************************************************************************
880    // ***************************************************************************************
881    // Count TextNode's
882    // ***************************************************************************************
883    // ***************************************************************************************
884
885
886    /**
887     * Convenience Method.
888     * <BR />Invokes: {@link #countTextNodes(Vector, int, int)}
889     */
890    public static int countTextNodes(Vector<HTMLNode> page)
891    { return countTextNodes(page, 0, -1); }
892
893    /**
894     * Convenience Method.
895     * <BR />Receives: {@code DotPair}
896     * <BR />Invokes: {@link #countTextNodes(Vector, int, int)}
897     */
898    public static int countTextNodes(Vector<HTMLNode> page, DotPair dp)
899    { return countTextNodes(page, dp.start, dp.end + 1); }
900
901    /**
902     * Counts the number of {@code TextNode's} in a {@code Vector<HTMLNode>} between the demarcated
903     * array / {@code Vector} positions, {@code 'sPos'} and {@code 'ePos'}
904     * 
905     * @param page Any HTML page.
906     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
907     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
908     * 
909     * @return The number of {@code TextNode's} in the {@code Vector} between the demarcated
910     * indices.
911     * 
912     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
913     */
914    public static int countTextNodes(Vector<HTMLNode> page, int sPos, int ePos)
915    {
916        int counter = 0;
917        LV  l       = new LV(page, sPos, ePos);
918
919        // Iterates the entire page between sPos and ePos, incrementing the count for every
920        // instance of text-node.
921
922        for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTextNode()) counter++;
923
924        return counter;
925    }
926
927
928    // ***************************************************************************************
929    // ***************************************************************************************
930    // Count CommentNode's
931    // ***************************************************************************************
932    // ***************************************************************************************
933
934
935    /**
936     * Convenience Method.
937     * <BR />Invokes: {@link #countCommentNodes(Vector, int, int)}
938     */
939    public static int countCommentNodes(Vector<HTMLNode> page)
940    { return countCommentNodes(page, 0, -1); }
941
942    /**
943     * Convenience Method.
944     * <BR />Receives: {@code DotPair}
945     * <BR />Invokes: {@link #countCommentNodes(Vector, int, int)} 
946     */
947    public static int countCommentNodes(Vector<HTMLNode> page, DotPair dp)
948    { return countCommentNodes(page, dp.start, dp.end + 1); }
949
950    /**
951     * Counts the number of {@code CommentNode's} in an {@code Vector<HTMLNode>} between the
952     * demarcated array / {@code Vector} positions.
953     * 
954     * @param page Any HTML page.
955     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
956     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
957     * 
958     * @return The number of {@code CommentNode's} in the {@code Vector} between the demarcated
959     * indices.
960     * 
961     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
962     */
963    public static int countCommentNodes(Vector<HTMLNode> page, int sPos, int ePos)
964    {
965        int counter = 0;
966        LV  l       = new LV(page, sPos, ePos);
967
968        // Iterates the entire page between sPos and ePos, incrementing the count for every
969        // instance of comment-node.
970
971        for (int i=l.start; i < l.end; i++)  if (page.elementAt(i).isCommentNode()) counter++;
972
973        return counter;
974    }
975
976
977    // ***************************************************************************************
978    // ***************************************************************************************
979    // Count TagNode's
980    // ***************************************************************************************
981    // ***************************************************************************************
982
983
984    /**
985     * Convenience Method.
986     * <BR />Invokes: {@link #countTagNodes(Vector, int, int)}
987     */
988    public static int countTagNodes(Vector<HTMLNode> page)
989    { return countTagNodes(page, 0, -1); }
990
991    /**
992     * Convenience Method.
993     * <BR />Receives: {@code DotPair}
994     * <BR />Invokes: {@link #countTagNodes(Vector, int, int)} 
995     */
996    public static int countTagNodes(Vector<HTMLNode> page, DotPair dp)
997    { return countTagNodes(page, dp.start, dp.end + 1); }
998
999    /**
1000     * Counts the number of {@code TagNode's} in a {@code Vector<HTMLNode>} between the demarcated 
1001     * array / {@code Vector} positions.
1002     * 
1003     * @param page Any HTML page.
1004     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1005     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1006     * 
1007     * @return The number of {@code TagNode's} in the {@code Vector}.
1008     * 
1009     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
1010     */
1011    public static int countTagNodes(Vector<HTMLNode> page, int sPos, int ePos)
1012    {
1013        int counter = 0;
1014        LV  l       = new LV(page, sPos, ePos);
1015
1016        // Iterates the entire page between sPos and ePos, incrementing the count for every
1017        // instance of TagNode.
1018
1019        for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTagNode()) counter++;
1020
1021        return counter;
1022    }
1023
1024
1025    // ***************************************************************************************
1026    // ***************************************************************************************
1027    // String-Length Operations
1028    // ***************************************************************************************
1029    // ***************************************************************************************
1030
1031
1032    /**
1033     * Convenience Method.
1034     * <BR />Invokes: {@link #strLength(Vector, int, int)}
1035     */
1036    public static int strLength(Vector<? extends HTMLNode> html)
1037    { return strLength(html, 0, -1); }
1038
1039    /**
1040     * Convenience Method.
1041     * <BR />Receives: {@code DotPair}
1042     * <BR />Invokes: {@link #strLength(Vector, int, int)} 
1043     */
1044    public static int strLength(Vector<? extends HTMLNode> html, DotPair dp)
1045    { return strLength(html, dp.start, dp.end + 1); }
1046
1047    /**
1048     * This method simply adds / sums the {@code String}-length of every {@code HTMLNode.str }
1049     * field in the passed page-{@code Vector}.  It only counts nodes between parameters
1050     * {@code sPos} (inclusive) and {@code ePos} (exclusive).
1051     * 
1052     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1053     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1054     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1055     * 
1056     * @return The total length <B><I>- in characters -</I></B> of the sub-page of HTML between
1057     * {@code 'sPos'} and {@code 'ePos'}
1058     * 
1059     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
1060     * 
1061     * @see #strLength(Vector)
1062     */
1063    public static int strLength(Vector<? extends HTMLNode> html, int sPos, int ePos)
1064    {
1065        int ret = 0;
1066        LV  l   = new LV(html, sPos, ePos);
1067
1068        for (int i=l.start; i < l.end; i++) ret += html.elementAt(i).str.length();
1069
1070        return ret;
1071    }
1072
1073
1074    // ***************************************************************************************
1075    // ***************************************************************************************
1076    // Hash-Code Operations
1077    // ***************************************************************************************
1078    // ***************************************************************************************
1079
1080
1081    /**
1082     * Convenience Method.
1083     * <BR />Invokes: {@link #hashCode(Vector, int, int)}
1084     */
1085    public static int hashCode(Vector<? extends HTMLNode> html)
1086    { return hashCode(html, 0, -1); }
1087
1088    /**
1089     * Convenience Method.
1090     * <BR />Receives: {@code DotPair}
1091     * <BR />Invokes: {@link #hashCode(Vector, int, int)} 
1092     */
1093    public static int hashCode(Vector<? extends HTMLNode> html, DotPair dp)
1094    { return hashCode(html, dp.start, dp.end + 1); }
1095
1096    /**
1097     * Generates a hash-code for a vectorized html page-{@code Vector}.
1098     * 
1099     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1100     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1101     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1102     * 
1103     * @return Returns the {@code String.hashCode()} of the <I><B>partial HTML-page</B></i> as if
1104     * it were not being stored as a {@code Vector}, but rather as HTML inside of a
1105     * Java-{@code String}.
1106     * 
1107     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
1108     * 
1109     * @see #hashCode(Vector)
1110     */
1111    public static int hashCode(Vector<? extends HTMLNode> html, int sPos, int ePos)
1112    {
1113        int h   = 0;
1114        LV  lv  = new LV(html, sPos, ePos);
1115
1116        for (int j=lv.start; j < lv.end; j++)
1117        {
1118            String  s = html.elementAt(j).str;
1119            int     l = s.length();
1120
1121            // This line has been copied from the jdk8/jdk8 "String.hashCode()" method.
1122            // The difference is that it iterates over the entire vector
1123
1124            for (int i=0; i < l; i++) h = 31 * h + s.charAt(i);
1125        }
1126
1127        return h;
1128    }
1129
1130
1131    // ***************************************************************************************
1132    // ***************************************************************************************
1133    // Style-Node & Script-Node Block Removal Operations
1134    // ***************************************************************************************
1135    // ***************************************************************************************
1136
1137
1138    /**
1139     * Removes all HTML {@code 'style'} Node blocks.
1140     * 
1141     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1142     * 
1143     * @return The number of {@code <STYLE>}-Node Blocks that were removed
1144     */
1145    public static int removeStyleNodeBlocks(Vector<? extends HTMLNode> html)
1146    {
1147        int removeCount = 0;
1148
1149        while (TagNodeRemoveInclusive.first(html, "style") > 0) removeCount++;
1150
1151        return removeCount;
1152    }
1153
1154    /**
1155     * Removes all {@code 'script'} Node blocks.
1156     * 
1157     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1158     * 
1159     * @return The number of {@code SCRIPT}-Node Blocks that were removed
1160     */
1161    public static int removeScriptNodeBlocks(Vector<? extends HTMLNode> html)
1162    {
1163        int removeCount = 0;
1164
1165        while (TagNodeRemoveInclusive.first(html, "script") > 0) removeCount++;
1166
1167        return removeCount;
1168    }
1169
1170
1171    // ***************************************************************************************
1172    // ***************************************************************************************
1173    // JSON Script Nodes
1174    // ***************************************************************************************
1175    // ***************************************************************************************
1176
1177    /**
1178     * Convenience Method.
1179     * <BR />Invokes: {@link #getJSONScriptBlocks(Vector, int, int)}
1180     */
1181    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html)
1182    { return getJSONScriptBlocks(html, 0, -1); }
1183
1184    /**
1185     * Convenience Method.
1186     * <BR />Receives: {@code DotPair}.
1187     * <BR />Invokes: {@link #getJSONScriptBlocks(Vector, int, int)}
1188     */
1189    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html, DotPair dp)
1190    { return getJSONScriptBlocks(html, dp.start, dp.end + 1); }
1191
1192    /**
1193     * This method shall search for any and all {@code <SCRIPT TYPE="json">}
1194     * <I>JSON TEXT</I> {@code </SCRIPT>} block present in a range of Vectorized HTML.  The
1195     * search method shall simply look for the toke {@code "JSON"} in the {@code TYPE} attribute
1196     * of each and every {@code <SCRIPT> TagNode} that is found on the page.  The validity of the
1197     * {@code JSON} found within such blocks <I>is not checked for validity, nor is it even
1198     * guaranteed to be {@code JSON} data!</I>
1199     * 
1200     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1201     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1202     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1203     * 
1204     * @return This will return a {@code java.util.stream.Stream<String>} of each of the 
1205     * {@code JSON} elements present in the specified range of the Vectorized HTML passed to
1206     * parameter {@code 'html'}.
1207     * 
1208     * <EMBED CLASS="external-html" DATA-FILE-ID="STRMCNVT">
1209     * 
1210     * @see StrTokCmpr#containsIgnoreCase(String, Predicate, String)
1211     * @see Util#rangeToString(Vector, int, int)
1212     */
1213    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html, int sPos, int ePos)
1214    {
1215        // Whenever building lists, it is usually easiest to use a Stream.Builder
1216        Stream.Builder<String> b = Stream.builder();
1217
1218        // This Predicate simply tests that if the substring "json" (CASE INSENSITIVE) is found
1219        // in the TYPE attribute of a <SCRIPT TYPE=...> node, that the token-string is, indeed a
1220        // word - not a substring of some other word.  For instance: TYPE="json" would PASS, but
1221        // TYPE="rajsong" would FAIL - because the token string is not surrounded by white-space
1222
1223        final Predicate<String> tester = (String s) ->
1224            StrTokCmpr.containsIgnoreCase(s, (Character c) -> ! Character.isLetterOrDigit(c), "json");
1225
1226        // Find all <SCRIPT> node-blocks whose "TYPE" attribute abides by the tester String-predicate
1227        // named above.
1228
1229        Vector<DotPair> jsonDPList = InnerTagFindInclusive.all
1230            (html, sPos, ePos, "script", "type", tester);
1231
1232        // Convert each of these DotPair element into a java.lang.String
1233        // Add the String to the Stream.Builder<String>
1234
1235        for (DotPair jsonDP : jsonDPList)
1236            if (jsonDP.size() > 2)
1237                b.accept(Util.rangeToString(html, jsonDP.start + 1, jsonDP.end));
1238
1239        // Build the Stream, and return it.
1240        return b.build();
1241    }
1242
1243    // ***************************************************************************************
1244    // ***************************************************************************************
1245    // MISC
1246    // ***************************************************************************************
1247    // ***************************************************************************************
1248
1249
1250    /**
1251     * Inserts nodes, and allows a 'varargs' parameter.
1252     * 
1253     * @param html Any HTML Page
1254     * 
1255     * @param pos The position in the original {@code Vector} where the nodes shall be inserted.
1256     * 
1257     * @param nodes A list of nodes to insert.
1258     */
1259    public static void insertNodes(Vector<HTMLNode> html, int pos, HTMLNode... nodes)
1260    {
1261        Vector<HTMLNode> nodesVec = new Vector<>(nodes.length);
1262        for (HTMLNode node : nodes) nodesVec.addElement(node);
1263        html.addAll(pos, nodesVec);
1264    }
1265
1266    /**
1267     * <SPAN STYLE="color: red;"><B>OPT: Optimized</B></SPAN>
1268     * 
1269     * <BR /><BR />This method does the same thing as {@link #removeNodes(boolean, Vector, int[])}
1270     * but all error checking is skipped, and the input integer array is presumed to have
1271     * been sorted. There are no guarantees about the behavior of this method if the input array
1272     * {@code 'posArr'} is not sorted, <I>least-to-greatest,</I> or if there are duplicate or
1273     * negative values in this array.
1274     * 
1275     * <BR /><BR /><B>NOTE:</B> If the var-args input integer-array parameter is empty, this method
1276     * shall exit gracefully, and immediately.
1277     * 
1278     * @param page Any HTML-Page, usually ones generated by {@code HTMLPage.getPageTokens(...)},
1279     * but these may be obtained or created in any fashion so necessary.
1280     * 
1281     * @param posArr An array of integers which list/identify the nodes in the page to be removed.
1282     * Because this implementation has been optimized, no error checking will be performed on this
1283     * input.  It is presumed to be sorted, least-to-greatest, and that all values in the array are
1284     * valid-indices into the vectorized-html parameter {@code 'page'}
1285     */
1286    public static <T extends HTMLNode> void removeNodesOPT(Vector<T> page, int... posArr)
1287    {
1288        if (posArr.length == 0) return;
1289
1290        int endingInsertPos = page.size() - posArr.length;
1291        int posArrIndex     = 0;
1292        int insertPos       = posArr[0];
1293        int retrievePos     = posArr[0];
1294
1295        // There is very little that can be documented about these two loops.  Took 3 hours
1296        // to figure out.  Read the variables names for "best documentation"
1297
1298        while (insertPos < endingInsertPos)
1299        {
1300            // This inner-loop is necessary for when the posArr has consecutive-elements that
1301            // are *ALSO* consecutive-pointers.
1302            //
1303            // For instance, this invokation:
1304            // Util.removeNodes(page, 4, 5, 6); ...
1305            //      where 4, 5, and 6 are consecutive - the inner while-loop is required.
1306            //
1307            // For this invokation: 
1308            // Util.removeNodes(page, 2, 4, 6); 
1309            //      the inner-loop is not entered.
1310
1311            while ((posArrIndex < posArr.length) && (retrievePos == posArr[posArrIndex]))
1312            { retrievePos++; posArrIndex++; }
1313
1314            page.setElementAt(page.elementAt(retrievePos++), insertPos++);
1315        }
1316
1317        // Remove all remaining elements in the tail of the array.
1318        page.setSize(page.size() - posArr.length);
1319    }
1320
1321
1322    /**
1323     * This method remove each HTMLNode from the passed-parameter {@code 'page'} listed/identified 
1324     * by the input array {@code 'nodeList'}.
1325     * 
1326     * <BR /><BR /><B>NOTE:</B> If the var-args input integer-array parameter is empty, this method
1327     * shall exit gracefully, and immediately.
1328     * 
1329     * @param preserveInputArray This is a convenience input parameter that allows a programmer to
1330     * "preserve" the original input-parameter integer-array that is passed to this method.  It
1331     * could be argued this parameter is "superfluous" - however, keep in mind that the passed
1332     * parameter {@code 'nodeList'} <B><I>must be sorted</I></B> before this method is able
1333     * function properly. There is a sort that's performed within the body of this method.  Just in
1334     * case that the original order of the integer-array input-parameter must be preserved, its
1335     * possible to request for the sort to operate on "a clone" of the input-parameter
1336     * integer-array, instead of the original integer-array {@code 'nodeList'} itself.
1337     * 
1338     * @param page Any HTML-Page, usually ones generated by {@code HTMLPage.getPageTokens(...)},
1339     * but these may be obtained or created in any fashion so necessary.
1340     * 
1341     * @param nodeList An array of integers which list/identify the nodes in the page to be
1342     * removed.
1343     * 
1344     * @throws IllegalArgumentException If the {@code 'nodeList'} contains duplicate entries.
1345     * Obviously, no {@code HTMLNode} may be removed from the {@code Vector<HTMLNode>} more than
1346     * once.
1347     * 
1348     * @throws IndexOutOfBoundsException If the nodeList contains index-pointers / items that are
1349     * not within the bounds of the passed HTML-Page {@code Vector}.
1350     */
1351    public static <T extends HTMLNode> void removeNodes
1352        (boolean preserveInputArray, Vector<T> page, int... nodeList)
1353    {
1354        if (nodeList.length == 0) return;
1355
1356        // @Safe Var Args
1357        int[]   posArr  = preserveInputArray ? nodeList.clone() : nodeList;
1358        int     len     = posArr.length;
1359
1360        Arrays.sort(posArr);
1361
1362        // Check for duplicates in the nodeList, no HTMLNode may be removed twice!
1363        for (int i=0; i < (len - 1); i++)
1364
1365            if (posArr[i] == posArr[i+1]) throw new IllegalArgumentException(
1366                "The input array contains duplicate items, this is not allowed.\n" +
1367                "This is since each array-entry is intended to be a pointer/index for items to " +
1368                "be removed.\nNo item can possibly be removed twice.!"
1369            );
1370
1371        // Make sure all nodes are within the bounds of the original Vector.  (no negative indexes,
1372        // no indexes greater than the size of the Vector)
1373
1374        if ((posArr[0] < 0) || (posArr[len - 1] >= page.size()))
1375
1376            throw new IndexOutOfBoundsException (
1377                "The input array contains entries which are not within the bounds of the " +
1378                "original-passed Vector.\nHTMLPage Vector has: " + page.size() + " elements.\n" +
1379                "Maximum element in the nodeList is [" + posArr[len - 1] + "], and the minimum " +
1380                "element is: [" + posArr[0] + "]"
1381            );
1382
1383        int endingInsertPos = page.size() - posArr.length;
1384        int posArrIndex     = 0;
1385        int insertPos       = posArr[0];
1386        int retrievePos     = posArr[0];
1387
1388        // There is very little that can be documented about these two loops.  Took 3 hours
1389        // to figure out.  Read the variables names for "best documentation"
1390
1391        while (insertPos < endingInsertPos)
1392        {
1393            // This inner-loop is necessary for when the posArr has consecutive-elements that
1394            // are *ALSO* consecutive-pointers.
1395            //
1396            // For instance, this invocation:
1397            // Util.removeNodes(page, 4, 5, 6);
1398            //      where 4, 5, and 6 are consecutive - the inner while-loop is required.
1399            //
1400            // For this invocation: 
1401            // Util.removeNodes(page, 2, 4, 6);
1402            //      the inner-loop is not entered.
1403
1404            while ((posArrIndex < posArr.length) && (retrievePos == posArr[posArrIndex])) 
1405            { retrievePos++; posArrIndex++; }
1406
1407            page.setElementAt(page.elementAt(retrievePos++), insertPos++);
1408        }
1409
1410        // Remove all remaining elements in the tail of the array.
1411        page.setSize(page.size() - posArr.length);
1412    }
1413
1414    /**
1415     * Convenience Method.
1416     * <BR />Invokes: {@link #replaceRange(Vector, int, int, Vector)}
1417     */
1418    public static void replaceRange
1419        (Vector<HTMLNode> page, DotPair range, Vector<HTMLNode> newNodes)
1420    { replaceRange(page, range.start, range.end+1, newNodes); }
1421
1422    /**
1423     * Replaces any all and all {@code HTMLNode's} located between the {@code Vector} locations
1424     * {@code 'sPos'} (inclusive) and {@code 'ePos'} (exclusive).  By exclusive, this means that
1425     * the {@code HTMLNode} located at positon {@code 'ePos'} <B><I>will not</I></B> be replaced,
1426     * but the one at {@code 'sPos'} <I><B>is replaced</B></I>.
1427     * 
1428     * <BR /><BR />The size of the {@code Vector} will change by {@code newNodes.size() - 
1429     * (ePos + sPos)}.  The contents situated between {@code Vector} location {@code sPos} and
1430     * {@code sPos + newNodes.size()} will, indeed, be the contents of the {@code 'newNodes'}
1431     * parameter.
1432     * 
1433     * @param page Any Java HTML page, constructed of {@code HTMLNode (TagNode & TextNode)}
1434     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1435     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1436     * @param newNodes Any Java HTML page-{@code Vector} of {@code HTMLNode}.
1437     * 
1438     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
1439     * 
1440     * @see #pollRange(Vector, int, int)
1441     * @see #removeRange(Vector, int, int)
1442     * @see #replaceRange(Vector, DotPair, Vector)
1443     */
1444    public static void replaceRange
1445        (Vector<HTMLNode> page, int sPos, int ePos, Vector<HTMLNode> newNodes)
1446    {
1447        // Torello.Java.LV
1448        LV l = new LV(sPos, ePos, page);
1449
1450        int oldSize     = ePos - sPos;
1451        int newSize     = newNodes.size();
1452        int insertPos   = sPos;
1453        int i           = 0;
1454
1455        while ((i < newSize) && (i < oldSize))
1456            page.setElementAt(newNodes.elementAt(i++), insertPos++);
1457
1458
1459        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1460        // CASE ONE:
1461        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1462
1463        if (newSize == oldSize) return;
1464
1465
1466        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1467        // CASE TWO:
1468        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1469        //
1470        // The new Vector is SMALLER than the old sub-range
1471        // The rest of the nodes just need to be trashed
1472        //
1473        // OLD-WAY: (Before realizing what Vector.subList is actually doing)
1474        // Util.removeRange(page, insertPos, ePos);
1475
1476        if (newSize < oldSize) page.subList(insertPos, ePos).clear();
1477
1478
1479        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1480        // CASE THREE:
1481        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1482        //
1483        // The new Vector is BIGGER than the old sub-range
1484        // There are still more nodes to insert.
1485
1486        else page.addAll(ePos, newNodes.subList(i, newSize));
1487    }
1488
1489    /**
1490     * Java's {@code java.util.Vector} class does not allow public access to the
1491     * {@code removeRange(start, end)} function.  It is protected in Java's Documentation about
1492     * the {@code Vector} class.  This method does exactly that, nothing else.
1493     * 
1494     * @param page Any Java HTML page, constructed of {@code HTMLNode (TagNode & TextNode)}
1495     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1496     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1497     * 
1498     * @return the number of nodes removed.
1499     * 
1500     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
1501     * 
1502     * @see #pollRange(Vector, int, int)
1503     * @see #removeRange(Vector, DotPair)
1504     */
1505    public static <T extends HTMLNode> int removeRange(Vector<T> page, int sPos, int ePos)
1506    {
1507        // Torello.Java.LV
1508        LV  l = new LV(sPos, ePos, page);
1509
1510        // According to the Sun-Oracle Docs, the returned sublist "mirros" the original vector,
1511        // which means that when it is changed, so is the original vector.
1512
1513        page.subList(l.start, l.end).clear();
1514
1515        return l.size();
1516
1517        /*
1518        // BEFORE DISCOVERING THE METHOD Vector.subList(start, end), this is how this worked.
1519        // It seemed very inefficient before realizing what was actually happening.
1520
1521        // Shift the nodes in position Vector[l.end through page.size()] to vector-position
1522        // Vector[l.start]
1523        int end = page.size() - l.end - 1;
1524
1525        for (int i=0; i <= end; i++) page.setElementAt(page.elementAt(l.end + i), l.start + i);
1526
1527        // Number of nodes to remove
1528        int numToRemove = l.end - l.start;
1529
1530        // Remove the tail - all nodes starting at:
1531        // vector-position[page.size() - (l.end - l.start)]
1532        page.setSize(page.size() - numToRemove);
1533
1534        return numToRemove;
1535        */
1536    }
1537
1538    /**
1539     * Convenience Method.
1540     * <BR />Receives: {@code DotPair}
1541     * <BR />Invokes: {@link #removeRange(Vector, int, int)} 
1542     */
1543    public static int removeRange(Vector<? extends HTMLNode> html, DotPair dp)
1544    { return removeRange(html, dp.start, dp.end + 1); }
1545
1546    /**
1547     * Java's {@code java.util.Vector} class does not allow public access to the
1548     * {@code removeRange(start, end)} function.  It is listed as {@code 'protected'} in Java's
1549     * Documentation about the {@code class Vector.}  This method upstages that, and performs the
1550     * {@code 'Poll'} operation, where the nodes are first removed, stored, and then return as a
1551     * function result.
1552     * 
1553     * <BR /><BR /><B>FURTHERMORE:</B> The nodes that are removed are placed in a separate return
1554     * {@code Vector}, and returned as a result to this method.
1555     * 
1556     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1557     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1558     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1559     * 
1560     * @return A complete list ({@code Vector<HTMLNode>}) of the nodes that were removed.
1561     * 
1562     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
1563     * 
1564     * @see #removeRange(Vector, int, int)
1565     * @see #removeRange(Vector, DotPair)
1566     * @see #pollRange(Vector, DotPair)
1567     */
1568    public static Vector<HTMLNode> pollRange(Vector<? extends HTMLNode> html, int sPos, int ePos)
1569    {
1570        // The original version of this method is preserved inside comments at the bottom of this
1571        // method.  Prior to seeing the Sun-Oracle Docs explaining that the return from the SubList
1572        // operation "mirrors changes" back to to the original vector, the code in the comments is
1573        // how this method was accomplished.
1574
1575        LV                          l       = new LV(html, sPos, ePos);
1576        Vector<HTMLNode>            ret     = new Vector<HTMLNode>(l.end - l.start);
1577        List<? extends HTMLNode>    list    = html.subList(l.start, l.end);
1578
1579        // Copy the Nodes into the return Vector that the end-user receives
1580        ret.addAll(list);
1581
1582        // Clear the nodes out of the original Vector.  The Sun-Oracle Docs 
1583        // state that the returned sub-list is "mirrored back into" the original
1584
1585        list.clear();
1586
1587        // Return the Vector to the user.  Note that the List<HTMLNode> CANNOT be returned,
1588        // because of it's mirror-qualities, and because this method expects a vector.
1589
1590        return ret;
1591
1592        /*
1593        // BEFORE READING ABOUT Vector.subList(...), this is how this was accomplished:
1594        // NOTE: It isn't so clear how the List<HTMLNode> works - likely it doesn't actually
1595        //       create any new memory-allocated arrays, it is just an "overlay"
1596
1597        // Copy the elements from the input vector into the return vector
1598        for (int i=l.start; i < l.end; i++) ret.add(html.elementAt(i));
1599
1600        // Remove the range from the input vector (this is the meaning of 'poll')
1601        Util.removeRange(html, sPos, ePos);
1602
1603        return ret;
1604        */
1605    }
1606
1607    /**
1608     * Convenience Method.
1609     * <BR />Receives: {@code DotPair}
1610     * <BR />Invokes: {@link #pollRange(Vector, int, int)}. 
1611     */
1612    public static Vector<HTMLNode> pollRange(Vector<? extends HTMLNode> html, DotPair dp)
1613    { return pollRange(html, dp.start, dp.end + 1); }
1614
1615    /**
1616     * This removes every element from the {@code Vector} beginning at position 0, all the way to
1617     * position {@code 'pos'} (exclusive).  The {@code elementAt(pos)} remains in the original page
1618     * input-{@code Vector}.  This is the definition of 'exclusive'.
1619     * 
1620     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1621     * 
1622     * @param pos Any position within the range of the input {@code Vector}.
1623     * 
1624     * @return The elements in the {@code Vector} from position: {@code 0 ('zero')} all the way to
1625     * position: {@code 'pos'}
1626     */
1627    public static Vector<HTMLNode> split(Vector<? extends HTMLNode> html, int pos)
1628    { return pollRange(html, 0, pos); }
1629
1630    /**
1631     * Removes the first and last element of a vectorized-HTML web-page, or sub-page.  Generally,
1632     * this could be used to remove the surrounding tag's {@code '<DIV>' ... '</DIV>'}, or
1633     * something similar.
1634     * 
1635     * <BR /><BR /><SPAN STYLE="color: red;"><B>IMPORTANT:</B></SPAN> This method <B>WILL NOT
1636     * CHECK</B> whether there are matching HTML open-and-close tags at the end beginning and end
1637     * of this sub-section.  Generally, though, that is how this method may be used.
1638     * 
1639     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1640     * 
1641     * @throws IllegalArgumentException If the {@code Vector} has fewer than two elements.
1642     */
1643    public static void removeFirstLast(Vector<? extends HTMLNode> html)
1644    {
1645        int size = html.size();
1646
1647        if (size < 2) throw new IllegalArgumentException(
1648            "You have requested that the first and last elements the input 'page' parameter (a vector) be removed.  " +
1649            "However, the vector size is only [" + size  + "], so this cannot be performed."
1650        );
1651
1652        // NOTE: *** This removes elementAt(0) and elementAt(size-1)
1653        //       *** NOT ALL ELEMENTS BETWEEN 0 and (size-1)
1654
1655        Util.removeNodesOPT(html, 0, size-1);
1656    }
1657
1658
1659    // ***************************************************************************************
1660    // ***************************************************************************************
1661    // Inclusive 
1662    // ***************************************************************************************
1663    // ***************************************************************************************
1664
1665
1666    /**
1667     * Tools for finding the matching-closing tag of any open {@link TagNode}.
1668     * 
1669     * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID="UTILINCL">
1670     */
1671    @Torello.HTML.Tools.JavaDoc.StaticFunctional
1672    public static class Inclusive
1673    {
1674        private Inclusive() { }
1675
1676        // ***************************************************************************************
1677        // ***************************************************************************************
1678        // Inclusive Find/Get
1679        // ***************************************************************************************
1680        // ***************************************************************************************
1681
1682        /**
1683         * This finds the closing HTML {@code 'TagNode'} match for a given opening
1684         * {@code 'TagNode'} in a given-input html page or sub-section.
1685         *
1686         * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1687         *
1688         * @param nodeIndex An index into that {@code Vector}.  This index must point to an
1689         * {@code HTMLNode} element that is:
1690         *
1691         * <BR /><BR /><OL CLASS="JDOL">
1692         * <LI>An instance of {@code TagNode}</LI>
1693         * <LI>A {@code TagNode} whose {@code 'isClosing'} field is <B>FALSE</B></LI>
1694         * <LI>Is not a {@code 'singleton'} HTML element-token
1695         * (i.e. {@code <IMG>, <BR>, <H1>, etc...})
1696         * </LI>
1697         * </OL>
1698         *
1699         * @return An "inclusive search" finds {@code OpeningTag} and {@code ClosingTag} pairs - 
1700         * <I>and returns all the elements between them in the contents of a 
1701         * return-{@code Vector}, or {@code Vector DotPair}-end-point value</I>.  This method
1702         * will take a particular node of a {@code Vector}, and (as long it has a match) 
1703         * find it's <I><B>closing {@code HTMLNode} match.</B></I>  The integer returned will
1704         * be the index into this page of the closing, matching {@code TagNode.}
1705         *
1706         * @throws TagNodeExpectedException If the node in the {@code Vector}-parameter
1707         * {@code 'html'} contained at index {@code 'nodeIndex'} is not an instance of
1708         * {@code TagNode}, then this exception is thrown.
1709         *
1710         * @throws OpeningTagNodeExpectedException If the node in the {@code Vector}-parameter 
1711         * {@code 'html'} at index {@code 'nodeIndex'} is a closing version of the HTML element,
1712         * then this exception shall throw.
1713         *
1714         * @throws InclusiveException If the node in {@code Vector}-parameter {@code 'html'},
1715         * pointed-to by index {@code 'nodeIndex'} is an HTML {@code 'Singleton'} / Self-Closing
1716         * Tag, then this exception will be thrown.
1717         *
1718         * @see TagNode
1719         * @see TagNode#tok
1720         * @see TagNode#isClosing
1721         * @see HTMLNode
1722         */
1723        public static int find(Vector<? extends HTMLNode> html, int nodeIndex)
1724        {
1725            TagNode     tn  = null;
1726            HTMLNode    n   = null;
1727            String      tok = null;
1728
1729            if (! html.elementAt(nodeIndex).isTagNode())
1730
1731                throw new TagNodeExpectedException (
1732                    "You have attempted to find a closing tag to match an opening one, " +
1733                    "but the 'nodeIndex' (" + nodeIndex + ") you have passed doesn't contain " +
1734                    "an instance of TagNode."
1735                );
1736
1737            else tn = (TagNode) html.elementAt(nodeIndex);
1738
1739            if (tn.isClosing) throw new OpeningTagNodeExpectedException(
1740                "The TagNode indicated by 'nodeIndex' = " + nodeIndex + " has its 'isClosing' " +
1741                "boolean as TRUE - this is not an opening TagNode, but it must be to continue."
1742            );
1743
1744            // Checks to ensure this token is not a 'self-closing' or 'singleton' tag.
1745            // If it is an exception shall throw.
1746            InclusiveException.check(tok = tn.tok);
1747
1748            int end         = html.size();
1749            int openCount   = 1;
1750
1751            for (int pos = (nodeIndex+1); pos < end; pos++)
1752
1753                if ((n = html.elementAt(pos)).isTagNode())
1754                    if ((tn = ((TagNode) n)).tok.equals(tok))
1755                    {
1756                        // This keeps a "Depth Count" - where "depth" is just the number of 
1757                        // opened tags, for which a matching, closing tag hasn't been found yet.
1758
1759                        openCount += (tn.isClosing ? -1 : 1);
1760
1761                        // When all open-tags of the specified HTML Element 'tok' have been
1762                        // found, search has finished.
1763
1764                        if (openCount == 0) return pos;
1765                    }
1766
1767            // The closing-matching tag was not found
1768            return -1;
1769        }
1770
1771        /**
1772         * Convenience Method.
1773         * <BR />Invokes: {@link #find(Vector, int)}
1774         * <BR />Converts: output to <B><CODE>'GET'</CODE></B> format ({@code Vector}-sublist)
1775         * <BR />Using: {@link Util#cloneRange(Vector, int, int)}
1776         */
1777        public static Vector<HTMLNode> get(Vector<? extends HTMLNode> html, int nodeIndex)
1778        { 
1779            int endPos = find(html, nodeIndex);
1780
1781            return (endPos == -1) ? null : cloneRange(html, nodeIndex, endPos + 1);
1782        }
1783
1784        /**
1785         * Convenience Method.
1786         * <BR />Invokes: {@link #find(Vector, int)}
1787         * <BR />Converts: output to <B><CODE>'PEEK'</CODE></B> format ({@code SubSection})
1788         * <BR />Using: {@link Util#cloneRange(Vector, int, int)}
1789         */
1790        public static SubSection peek(Vector<? extends HTMLNode> html, int nodeIndex)
1791        {
1792            int endPos = find(html, nodeIndex);
1793
1794            return (endPos == -1) ? null : new SubSection(
1795                new DotPair(nodeIndex, endPos),
1796                cloneRange(html, nodeIndex, endPos + 1)
1797            );
1798        }
1799
1800        /**
1801         * Convenience Method.
1802         * <BR />Invokes: {@link #find(Vector, int)}
1803         * <BR />Converts: output to <B><CODE>'POLL'</CODE></B> format ({@code Vector}-sublist),
1804         * <BR />Using: {@link Util#pollRange(Vector, int, int)}
1805         * <BR />Removes: The requested Sub-List
1806         */
1807        public static Vector<HTMLNode> poll(Vector<? extends HTMLNode> html, int nodeIndex)
1808        {
1809            int endPos = find(html, nodeIndex);
1810
1811            return (endPos == -1) ? null : pollRange(html, nodeIndex, endPos + 1);
1812        }
1813
1814        /**
1815         * Convenience Method.
1816         * <BR />Invokes: {@link #find(Vector, int)}
1817         * <BR />Converts: output to <B><CODE>'REMOVE'</CODE></B> format ({@code int} - number
1818         * of nodes removed)
1819         * <BR />Using: {@link #removeRange(Vector, int, int)}
1820         * <BR />Removes: The requested Sub-List
1821         */
1822        public static int remove(Vector<? extends HTMLNode> html, int nodeIndex)
1823        {
1824            int endPos = find(html, nodeIndex);
1825
1826            return (endPos == -1) ? 0 : removeRange(html, nodeIndex, endPos + 1);
1827        }
1828
1829        // ***************************************************************************************
1830        // ***************************************************************************************
1831        // Optimized Methods, Inclusive Find/Get/Subsection
1832        // ***************************************************************************************
1833        // ***************************************************************************************
1834
1835        /**
1836         * Convenience Method.  
1837         * <BR />Invokes: {@link #dotPairOPT(Vector, int)}
1838         * <BR />Converts: output to {@code Vector<HTMLNode>}
1839         */
1840        public static Vector<HTMLNode> vectorOPT(Vector<? extends HTMLNode> html, int tagPos)
1841        {
1842            DotPair dp = dotPairOPT(html, tagPos);
1843
1844            if (dp == null) return null;
1845            else            return Util.cloneRange(html, dp.start, dp.end + 1);
1846        }
1847
1848        /**
1849         * Convenience Method.
1850         * <BR />Invokes: {@link #dotPairOPT(Vector, int)}
1851         * <BR />Converts: output to {@code SubSection}
1852         */
1853        public static SubSection subSectionOPT(Vector<? extends HTMLNode> html, int tagPos)
1854        {
1855            DotPair dp = dotPairOPT(html, tagPos);
1856
1857            if (dp == null) return null;
1858            else            return new SubSection(dp, Util.cloneRange(html, dp.start, dp.end + 1));
1859        }
1860
1861        /**
1862         * <EMBED CLASS="external-html" DATA-FILE-ID="UTILIOPT">
1863         * <!-- Inclusive Opt Description -->
1864         * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1865         * @param tagPos <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTTP">
1866         * @return A <B>'DotPair'</B> version of an inclusive, end-to-end HTML tag-element.
1867         * <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTJSN"> 
1868         * <!-- Note on JS-DOM Tree innerHTML -->
1869         * @see TagNode
1870         * @see TagNode#isClosing
1871         * @see TagNode#tok
1872         * @see DotPair
1873         */
1874        public static DotPair dotPairOPT(Vector<? extends HTMLNode> html, int tagPos)
1875        {
1876            // Temp Variables
1877            HTMLNode n;     TagNode tn;     int openCount = 1;
1878
1879            int len = html.size();
1880
1881            // This is the name (token) of the "Opening HTML Element", we are searching for
1882            // the matching, closing element
1883
1884            String tok = ((TagNode) html.elementAt(tagPos)).tok;
1885
1886            for (int i = (tagPos+1); i < len; i++)
1887
1888                if ((n = html.elementAt(i)).isTagNode())
1889                    if ((tn = (TagNode) n).tok.equals(tok))
1890                    {
1891                        // This keeps a "Depth Count" - where "depth" is just the number of 
1892                        // opened tags, for which a matching, closing tag hasn't been found yet.
1893
1894                        openCount += (tn.isClosing ? -1 : 1);
1895
1896                        // When all open-tags of the specified HTML Element 'tok' have been
1897                        // found, search has finished.
1898
1899                        if (openCount == 0) return new DotPair(tagPos, i);
1900                    }
1901
1902            // Was not found
1903            return null;
1904        }
1905
1906        /**
1907         * Convenience Method.
1908         * <BR />Invokes: {@link #dotPairOPT(Vector, int, int)}
1909         * <BR />Converts: output to {@code Vector<HTMLNode>}
1910         */
1911        public static Vector<HTMLNode> vectorOPT
1912            (Vector<? extends HTMLNode> html, int tagPos, int end)
1913        {
1914            DotPair dp = dotPairOPT(html, tagPos, end);
1915
1916            if (dp == null) return null;
1917            else            return Util.cloneRange(html, dp.start, dp.end + 1);
1918        }
1919
1920        /**
1921         * Convenience Method.
1922         * <BR />Invokes: {@link #dotPairOPT(Vector, int, int)}
1923         * <BR />Converts: output to {@code SubSection}
1924        */
1925        public static SubSection subSectionOPT
1926            (Vector<? extends HTMLNode> html, int tagPos, int end)
1927        {
1928            DotPair dp = dotPairOPT(html, tagPos, end);
1929
1930            if (dp == null) return null;
1931            else            return new SubSection(dp, Util.cloneRange(html, dp.start, dp.end + 1));
1932        }
1933
1934        /**
1935         * <EMBED CLASS="external-html" DATA-FILE-ID="UTILIOPT">
1936         * <!-- Inclusive Opt Description -->
1937         * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1938         * @param tagPos <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTTP">
1939         * @param end <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTEND">
1940         * @return A <B>'DotPair'</B> version of an inclusive, end-to-end HTML tag-element.
1941         * <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTJSN">
1942         * <!-- Note on JS-DOM Tree innerHTML -->
1943         * @see TagNode
1944         * @see TagNode#isClosing
1945         * @see TagNode#tok
1946         * @see DotPair
1947         */
1948        public static DotPair dotPairOPT(Vector<? extends HTMLNode> html, int tagPos, int end)
1949        {
1950            // Temp Variables
1951            HTMLNode n;     TagNode tn;     int openCount = 1;      int endPos;
1952
1953            // This is the name (token) of the "Opening HTML Element", we are searching for
1954            // the matching, closing element
1955            String tok = ((TagNode) html.elementAt(tagPos)).tok;
1956
1957            for (endPos = (tagPos+1); endPos < end; endPos++)
1958
1959                if ((n = html.elementAt(endPos)).isTagNode())
1960                    if ((tn = (TagNode) n).tok.equals(tok))
1961                    {
1962                        // This keeps a "Depth Count" - where "depth" is just the number of
1963                        // opened tags, for which a matching, closing tag hasn't been found yet.
1964                        openCount += (tn.isClosing ? -1 : 1);
1965
1966                        // When all open-tags of the specified HTML Element 'tok' have been
1967                        // found, search has finished.
1968                        if (openCount == 0) return new DotPair(tagPos, endPos);
1969                    }
1970
1971            // The end of the vectorized-html page (or subsection) was reached, but the
1972            // matching-closing element was not found.
1973            return null; // assert(endPos == html.size());
1974        }
1975    }
1976}