001package Torello.HTML;
002
003import java.util.*;
004import java.util.regex.*;
005import java.util.stream.*;
006
007import java.util.function.Predicate;
008
009import Torello.HTML.NodeSearch.*;
010import Torello.Java.*;
011
012import Torello.Java.Additional.Ret2;
013
014/**
015 * A long list of utilities for searching, finding, extracting and removing HTML from 
016 * Vectorized-HTML.
017 * 
018 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=UTIL>
019 */
020@Torello.JavaDoc.StaticFunctional
021public class Util
022{
023    private Util() { }
024
025
026    // ********************************************************************************************
027    // ********************************************************************************************
028    // Trim TextNode Strings
029    // ********************************************************************************************
030    // ********************************************************************************************
031
032
033    /**
034     * Convenience Method.
035     * <BR />Invokes: {@link #trimTextNodes(Vector, int, int, boolean)}
036     */
037    public static int trimTextNodes(Vector<HTMLNode> page, boolean deleteZeroLengthStrings)
038    { return trimTextNodes(page, 0, -1, deleteZeroLengthStrings); }
039
040    /**
041     * Convenience Method.
042     * <BR />Receives: {@code DotPair}
043     * <BR />Invokes: {@link #trimTextNodes(Vector, int, int, boolean)}
044     */
045    public static int trimTextNodes
046        (Vector<HTMLNode> page, DotPair dp, boolean deleteZeroLengthStrings)
047    { return trimTextNodes(page, dp.start, dp.end + 1, deleteZeroLengthStrings); }
048
049    /**
050     * This will iterate through the entire {@code Vector<HTMLNode>}, and invoke
051     * {@code java.lang.String.trim()} on each {@code TextNode} on the page.  If this invocation
052     * results in a reduction of {@code String.length()}, then a new {@code TextNode} will be
053     * instantiated whose {@code TextNode.str} field is set to the result of the
054     * {@code String.trim(old_node.str)} operation.
055     * 
056     * @param deleteZeroLengthStrings If a {@code TextNode's} length is zero (before or after
057     * {@code trim()} is called) and when this parameter is {@code TRUE}, that {@code TextNode}
058     * must be removed from the {@code Vector}.
059     * 
060     * @return Any node that is trimmed or deleted will increment the counter.  This counter
061     * final-value is returned
062     */
063    public static int trimTextNodes
064        (Vector<HTMLNode> page, int sPos, int ePos, boolean deleteZeroLengthStrings)
065    {
066        int                 counter = 0;
067        IntStream.Builder   b       = deleteZeroLengthStrings ? IntStream.builder() : null;
068        HTMLNode            n       = null;
069        LV                  l       = new LV(page, sPos, ePos);
070
071        for (int i=l.start; i < l.end; i++)
072
073            if ((n = page.elementAt(i)).isTextNode())
074            {
075                String  trimmed         = n.str.trim();
076                int     trimmedLength   = trimmed.length();
077
078                if ((trimmedLength == 0) && deleteZeroLengthStrings)
079                    { b.add(i); counter++; }
080
081                else if (trimmedLength < n.str.length())
082                    { page.setElementAt(new TextNode(trimmed), i); counter++; }
083            }
084
085        if (deleteZeroLengthStrings) Util.Remove.nodesOPT(page, b.build().toArray());
086
087        return counter;
088    }
089
090
091    // ********************************************************************************************
092    // ********************************************************************************************
093    // Vectorized-HTML To-String Methods
094    // ********************************************************************************************
095    // ********************************************************************************************
096
097
098    /** 
099     * Convenience Method.
100     * <BR />Invokes: {@link #rangeToString(Vector, int, int)}
101     */
102    public static String pageToString(Vector<? extends HTMLNode> html)
103    { return rangeToString(html, 0, -1); }
104
105    /**
106     * Convenience Method.
107     * <BR />Receives: {@code DotPair}
108     * <BR />Invokes: {@link #rangeToString(Vector, int, int)}
109     */
110    public static String rangeToString(Vector<? extends HTMLNode> html, DotPair dp)
111    { return rangeToString(html, dp.start, dp.end + 1); }
112
113    /**
114     * The purpose of this method/function is to convert a portion of the contents of an HTML-Page,
115     * currently being represented as a {@code Vector} of {@code HTMLNode's} into a {@code String.}
116     * Two {@code 'int'} parameters are provided in this method's signature to define a sub-list
117     * of a page to be converted to a {@code java.lang.String}
118     * 
119     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
120     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
121     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
122     * 
123     * @return The {@code Vector} converted into a {@code String}.
124     * 
125     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
126     * 
127     * @see #pageToString(Vector)
128     * @see #rangeToString(Vector, DotPair)
129     */
130    public static String rangeToString(Vector<? extends HTMLNode> html, int sPos, int ePos)
131    {
132        StringBuilder   ret = new StringBuilder();
133        LV              l   = new LV(html, sPos, ePos);
134
135        for (int i=l.start; i < l.end; i++) ret.append(html.elementAt(i).str);
136
137        return ret.toString();
138    }
139
140
141    // ********************************************************************************************
142    // ********************************************************************************************
143    // Vectorized-HTML TextNode To-String Methods
144    // ********************************************************************************************
145    // ********************************************************************************************
146
147
148    /**
149     * Convenience Method.
150     * <BR />Invokes: {@link #textNodesString(Vector, int, int)}
151     */
152    public static String textNodesString(Vector<? extends HTMLNode> html)
153    { return textNodesString(html, 0, -1); }
154
155    /**
156     * Convenience Method.
157     * <BR />Receives: {@code DotPair}
158     * <BR />Invokes: {@link #textNodesString(Vector, int, int)}
159     */
160    public static String textNodesString(Vector<? extends HTMLNode> html, DotPair dp)
161    { return textNodesString(html, dp.start, dp.end + 1); }
162
163    /**
164     * This will return a {@code String} that is comprised of ONLY the {@code TextNode's} contained
165     * within the input {@code Vector} - <I>and furthermore, only nodes that are situated between
166     * index {@code int 'sPos'} and index {@code int 'ePos'} in that {@code Vector.}</I>
167     * 
168     * <BR /><BR />The {@code for-loop} that iterates the input-{@code Vector} parameter will
169     * simply skip an instance of {@code 'TagNode'} and {@code 'CommentNode'} when building the
170     * output return {@code String.}.
171     * 
172     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
173     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
174     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
175     * 
176     * @return This will return a {@code String} that is comprised of the text-only elements in the
177     * web-page or sub-page.  Only text between the requested {@code Vector}-indices is included.
178     * 
179     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
180     * 
181     * @see #textNodesString(Vector, DotPair)
182     * @see #textNodesString(Vector)
183     */
184    public static String textNodesString(Vector<? extends HTMLNode> html, int sPos, int ePos)
185    {
186        StringBuilder   sb  = new StringBuilder();
187        LV              l   = new LV(html, sPos, ePos);
188        HTMLNode        n;
189
190        for (int i=l.start; i < l.end; i++)
191            if ((n = html.elementAt(i)).isTextNode())
192                sb.append(n.str);
193
194        return sb.toString();
195    }
196
197
198    // ********************************************************************************************
199    // ********************************************************************************************
200    // TextNode Modification Operations - "Escape Text Nodes"
201    // ********************************************************************************************
202    // ********************************************************************************************
203
204
205    /**
206     * Convenience Method.
207     * <BR />Invokes: {@link #escapeTextNodes(Vector, int, int)}
208     */
209    public static int escapeTextNodes(Vector<HTMLNode> html)
210    { return escapeTextNodes(html, 0, -1); }
211
212    /**
213     * Convenience Method.
214     * <BR />Receives: {@code DotPair} 
215     * <BR />Invokes: {@link #escapeTextNodes(Vector, int, int)}
216     */
217    public static int escapeTextNodes(Vector<HTMLNode> html, DotPair dp)
218    { return escapeTextNodes(html, dp.start, dp.end + 1); }
219
220    /**
221     * Will call {@code HTML.Escape.replaceAll} on each {@code TextNode} in the range of
222     * {@code sPos ... ePos}
223     * 
224     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
225     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
226     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
227     * 
228     * @return The number of {@code TextNode's} that changed as a result of the
229     * {@code Escape.replaceAll(n.str)} loop.
230     * 
231     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
232     * 
233     * @see Escape#replaceAll(String)
234     */
235    public static int escapeTextNodes(Vector<HTMLNode> html, int sPos, int ePos)
236    {
237        LV          l       = new LV(html, sPos, ePos);
238        HTMLNode    n       = null;
239        String      s       = null;
240        int         counter = 0;
241
242        for (int i=l.start; i < l.end; i++)
243
244            if ((n = html.elementAt(i)).isTextNode())
245                if (! (s = Escape.replace(n.str)).equals(n.str))
246                {
247                    html.setElementAt(new TextNode(s), i);
248                    counter++;
249                }
250
251        return counter;
252    }
253
254
255    // ********************************************************************************************
256    // ********************************************************************************************
257    // Clone HTML Vectors
258    // ********************************************************************************************
259    // ********************************************************************************************
260
261
262    /**
263     * Convenience Method.
264     * <BR />Invokes: {@link #cloneRange(Vector, int, int)}
265     */
266    public static Vector<HTMLNode> clone(Vector<? extends HTMLNode> html)
267    { return cloneRange(html, 0, -1); }
268
269    /**
270     * Convenience Method.
271     * <BR />Receives: {@code DotPair}
272     * <BR />Invokes: {@link #cloneRange(Vector, int, int)}
273     */
274    public static Vector<HTMLNode> cloneRange(Vector<? extends HTMLNode> html, DotPair dp)
275    { return cloneRange(html, dp.start, dp.end + 1); }
276
277    /**
278     * Copies (clones!) a sub-range of the HTML page, stores the results in a {@code Vector}, and
279     * returns it.
280     * 
281     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
282     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
283     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
284     * 
285     * @return The "cloned" (copied) sub-range specified by {@code 'sPos'} and {@code 'ePos'.}
286     * 
287     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
288     * 
289     * @see #cloneRange(Vector, DotPair)
290     */
291    public static Vector<HTMLNode> cloneRange(Vector<? extends HTMLNode> html, int sPos, int ePos)
292    {
293        LV                  l   = new LV(html, sPos, ePos);
294        Vector<HTMLNode>    ret = new Vector<>(l.size());
295
296        // Copy the range specified into the return vector
297        //
298        // HOW THIS WAS DONE BEFORE NOTICING Vector.subList
299        //
300        // for (int i = l.start; i < l.end; i++) ret.addElement(html.elementAt(i));
301
302        ret.addAll(html.subList(l.start, l.end));
303
304        return ret;
305    }
306
307
308
309    // ********************************************************************************************
310    // ********************************************************************************************
311    // String Length of the TextNode's
312    // ********************************************************************************************
313    // ********************************************************************************************
314
315
316    /**
317     * Convenience Method.
318     * <BR />Receives: {@code DotPair}
319     * <BR />Invokes: {@link #textStrLength(Vector, int, int)}
320     */
321    public static int textStrLength(Vector<? extends HTMLNode> html, DotPair dp)
322    { return textStrLength(html, dp.start, dp.end + 1); }
323
324    /**
325     * Convenience Method.
326     * <BR />Invokes: {@link #textStrLength(Vector, int, int)}
327     */
328    public static int textStrLength(Vector<? extends HTMLNode> html)
329    { return textStrLength(html, 0, -1); }
330
331    /**
332     * This method will return the length of the strings <I><B>contained by all/only instances of
333     * {@code 'TextNode'}</B></I> among the nodes of the input HTML-{@code Vector}.   This is
334     * identical to the behavior of the method with the same name, but includes starting and ending
335     * bounds on the html {@code Vector}: {@code 'sPos'} &amp; {@code 'ePos'}.
336     * 
337     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
338     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
339     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
340     * 
341     * @return The sum of the lengths of the text contained by text-nodes in the {@code Vector} 
342     * between {@code 'sPos'} and {@code 'ePos'}.
343     * 
344     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
345     */
346    public static int textStrLength(Vector<? extends HTMLNode> html, int sPos, int ePos)
347    {
348        HTMLNode    n;
349        int         sum = 0;
350        LV          l   = new LV(html, sPos, ePos);
351
352        // Counts the length of each "String" in a "TextNode" between sPos and ePos
353        for (int i=l.start; i < l.end; i++)
354
355            if ((n = html.elementAt(i)).isTextNode())
356                sum += n.str.length();
357
358        return sum;
359    }
360
361
362    // ********************************************************************************************
363    // ********************************************************************************************
364    // Compact Adjacent / Adjoining TextNode's
365    // ********************************************************************************************
366    // ********************************************************************************************
367
368
369    /**
370     * Convenience Method.
371     * <BR />Invokes: {@link #compactTextNodes(Vector, int, int)}
372     */
373    public static int compactTextNodes(Vector<HTMLNode> html)
374    { return compactTextNodes(html, 0, html.size()); }
375
376    /**
377     * Convenience Method.
378     * <BR />Receives: {@code DotPair}
379     * <BR />Invokes: {@link #compactTextNodes(Vector, int, int)} 
380     */
381    public static int compactTextNodes(Vector<HTMLNode> html, DotPair dp)
382    { return compactTextNodes(html, dp.start, dp.end + 1); }     
383
384    /**
385     * Occasionally, when removing instances of {@code TagNode} from a vectorized-html 
386     * page, certain instances of {@code TextNode} which were not adjacent / neighbours in
387     * the {@code Vector}, all of a sudden become adjacent.  Although there are no major problems
388     * with contiguous instances of {@code TextNode} from the Search Algorithm's perspective,
389     * for programmer's, it can sometimes be befuddling to realize that the output text that
390     * is returned from a call to {@code Util.pageToString(html)} is not being found because
391     * the text that is left is broken amongst multiple instances of adjacent TextNodes.
392     *
393     * <BR /><BR />This method merely combines "Adjacent" instances of {@code class TextNode}
394     * in the {@code Vector} into single instances of {@code class TextNode}
395     *
396     * @param html Any vectorized-html web-page.  If this page contain any contiguously placed
397     * {@code TextNode's}, the extra's will be eliminated, and the internal-string's inside the
398     * node's ({@code TextNode.str}) will be combined.  This action will reduce the size of the
399     * actual html-{@code Vector}.
400     * 
401     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
402     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
403     * 
404     * @return The number of nodes that were eliminated after being combined, or 0 if there
405     * were no text-nodes that were removed.
406     * 
407     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
408     * 
409     * @see HTMLNode#str
410     * @see TextNode
411     */
412    public static int compactTextNodes(Vector<HTMLNode> html, int sPos, int ePos)
413    {
414        LV      l           = new LV(html, sPos, ePos);
415        boolean compacting  = false;
416        int     firstPos    = -1;
417        int     delta       = 0;
418
419        for (int i=l.start; i < (l.end - delta); i++)
420
421            if (html.elementAt(i).isTextNode())
422            {
423                if (compacting) continue;   // Not in "Compacting Mode"
424                compacting  = true;         // Start "Compacting Mode" - this is a TextNode
425                firstPos    = i;
426            }
427
428            else if (compacting && (firstPos < (i-1)))  // Else - Must be a TagNode or CommentNode
429            {
430                // Save compacted TextNode String's into this StringBuilder
431                StringBuilder compacted = new StringBuilder();
432
433                // Iterate all TextNodes that were adjacent, put them together into StringBuilder
434                for (int j=firstPos; j < i; j++) compacted.append(html.elementAt(j).str);
435
436                // Place this new "aggregate TextNode" at location of the first TextNode that
437                // was compacted into this StringBuilder
438
439                html.setElementAt(new TextNode(compacted.toString()), firstPos);
440
441                // Remove the rest of the positions in the Vector that had TextNode's.  These have
442                // all been put together into the "Aggregate TextNode" at position "firstPos"
443
444                Util.Remove.range(html, firstPos + 1, i);
445
446                // The change in the size of the Vector needs to be accounted for.
447                delta += (i - firstPos - 1);
448
449                // Change the loop-counter variable, too, since the size of the Vector has changed.
450                i = firstPos + 1;
451
452                // Since we just hit a CommentNode, or TagNode, exit "Compacting Mode."
453                compacting = false;
454
455            }
456
457            // NOTE: This, ALSO, MUST BE a TagNode or CommentNode (just like the previous
458            //       if-else branch !)
459            // TRICKY: Don't forget this 'else' !
460
461            else compacting = false;
462
463        // Added - Don't forget the case where the Vector ends with a series of TextNodes
464        // TRICKY TOO! (Same as the HTML Parser... The ending or 'trailing' nodes must be parsed
465
466        int lastNodePos = html.size() - 1;
467
468        if (html.elementAt(lastNodePos).isTextNode()) if (compacting && (firstPos < lastNodePos))
469        {
470            StringBuilder compacted = new StringBuilder();
471
472            // Compact the TextNodes that were identified at the end of the Vector range.
473            for (int j=firstPos; j <= lastNodePos; j++) compacted.append(html.elementAt(j).str);
474
475            // Replace the group of TextNode's at the end of the Vector, with the single, aggregate
476            html.setElementAt(new TextNode(compacted.toString()), firstPos);
477            Util.Remove.range(html, firstPos + 1, lastNodePos + 1);
478        }
479
480        return delta;
481    }
482
483
484    // ********************************************************************************************
485    // ********************************************************************************************
486    // String-Length Operations
487    // ********************************************************************************************
488    // ********************************************************************************************
489
490
491    /**
492     * Convenience Method.
493     * <BR />Invokes: {@link #strLength(Vector, int, int)}
494     */
495    public static int strLength(Vector<? extends HTMLNode> html)
496    { return strLength(html, 0, -1); }
497
498    /**
499     * Convenience Method.
500     * <BR />Receives: {@code DotPair}
501     * <BR />Invokes: {@link #strLength(Vector, int, int)} 
502     */
503    public static int strLength(Vector<? extends HTMLNode> html, DotPair dp)
504    { return strLength(html, dp.start, dp.end + 1); }
505
506    /**
507     * This method simply adds / sums the {@code String}-length of every {@code HTMLNode.str }
508     * field in the passed page-{@code Vector}.  It only counts nodes between parameters
509     * {@code sPos} (inclusive) and {@code ePos} (exclusive).
510     * 
511     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
512     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
513     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
514     * 
515     * @return The total length <B><I>- in characters -</I></B> of the sub-page of HTML between
516     * {@code 'sPos'} and {@code 'ePos'}
517     * 
518     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
519     * 
520     * @see #strLength(Vector)
521     */
522    public static int strLength(Vector<? extends HTMLNode> html, int sPos, int ePos)
523    {
524        int ret = 0;
525        LV  l   = new LV(html, sPos, ePos);
526
527        for (int i=l.start; i < l.end; i++) ret += html.elementAt(i).str.length();
528
529        return ret;
530    }
531
532
533    // ********************************************************************************************
534    // ********************************************************************************************
535    // Hash-Code Operations
536    // ********************************************************************************************
537    // ********************************************************************************************
538
539
540    /**
541     * Convenience Method.
542     * <BR />Invokes: {@link #hashCode(Vector, int, int)}
543     */
544    public static int hashCode(Vector<? extends HTMLNode> html)
545    { return hashCode(html, 0, -1); }
546
547    /**
548     * Convenience Method.
549     * <BR />Receives: {@code DotPair}
550     * <BR />Invokes: {@link #hashCode(Vector, int, int)} 
551     */
552    public static int hashCode(Vector<? extends HTMLNode> html, DotPair dp)
553    { return hashCode(html, dp.start, dp.end + 1); }
554
555    /**
556     * Generates a hash-code for a vectorized html page-{@code Vector}.
557     * 
558     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
559     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
560     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
561     * 
562     * @return Returns the {@code String.hashCode()} of the <I><B>partial HTML-page</B></i> as if
563     * it were not being stored as a {@code Vector}, but rather as HTML inside of a
564     * Java-{@code String}.
565     * 
566     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
567     * 
568     * @see #hashCode(Vector)
569     */
570    public static int hashCode(Vector<? extends HTMLNode> html, int sPos, int ePos)
571    {
572        int h   = 0;
573        LV  lv  = new LV(html, sPos, ePos);
574
575        for (int j=lv.start; j < lv.end; j++)
576        {
577            String  s = html.elementAt(j).str;
578            int     l = s.length();
579
580            // This line has been copied from the jdk8/jdk8 "String.hashCode()" method.
581            // The difference is that it iterates over the entire vector
582
583            for (int i=0; i < l; i++) h = 31 * h + s.charAt(i);
584        }
585
586        return h;
587    }
588
589
590    // ********************************************************************************************
591    // ********************************************************************************************
592    // JSON Script Nodes
593    // ********************************************************************************************
594    // ********************************************************************************************
595
596
597    /**
598     * Convenience Method.
599     * <BR />Invokes: {@link #getJSONScriptBlocks(Vector, int, int)}
600     */
601    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html)
602    { return getJSONScriptBlocks(html, 0, -1); }
603
604    /**
605     * Convenience Method.
606     * <BR />Receives: {@code DotPair}.
607     * <BR />Invokes: {@link #getJSONScriptBlocks(Vector, int, int)}
608     */
609    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html, DotPair dp)
610    { return getJSONScriptBlocks(html, dp.start, dp.end + 1); }
611
612    /**
613     * This method shall search for any and all {@code <SCRIPT TYPE="json">}
614     * <I>JSON TEXT</I> {@code </SCRIPT>} block present in a range of Vectorized HTML.  The
615     * search method shall simply look for the toke {@code "JSON"} in the {@code TYPE} attribute
616     * of each and every {@code <SCRIPT> TagNode} that is found on the page.  The validity of the
617     * {@code JSON} found within such blocks <I>is not checked for validity, nor is it even
618     * guaranteed to be {@code JSON} data!</I>
619     * 
620     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
621     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
622     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
623     * 
624     * @return This will return a {@code java.util.stream.Stream<String>} of each of the 
625     * {@code JSON} elements present in the specified range of the Vectorized HTML passed to
626     * parameter {@code 'html'}.
627     * 
628     * <EMBED CLASS='external-html' DATA-FILE-ID=STRMCNVT>
629     * 
630     * @see StrTokCmpr#containsIgnoreCase(String, Predicate, String)
631     * @see Util#rangeToString(Vector, int, int)
632     */
633    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html, int sPos, int ePos)
634    {
635        // Whenever building lists, it is usually easiest to use a Stream.Builder
636        Stream.Builder<String> b = Stream.builder();
637
638        // This Predicate simply tests that if the substring "json" (CASE INSENSITIVE) is found
639        // in the TYPE attribute of a <SCRIPT TYPE=...> node, that the token-string is, indeed a
640        // word - not a substring of some other word.  For instance: TYPE="json" would PASS, but
641        // TYPE="rajsong" would FAIL - because the token string is not surrounded by white-space
642
643        final Predicate<String> tester = (String s) ->
644            StrTokCmpr.containsIgnoreCase
645                (s, (Character c) -> ! Character.isLetterOrDigit(c), "json");
646
647        // Find all <SCRIPT> node-blocks whose "TYPE" attribute abides by the tester
648        // String-Predicate named above.
649
650        Vector<DotPair> jsonDPList = InnerTagFindInclusive.all
651            (html, sPos, ePos, "script", "type", tester);
652
653        // Convert each of these DotPair element into a java.lang.String
654        // Add the String to the Stream.Builder<String>
655
656        for (DotPair jsonDP : jsonDPList)
657            if (jsonDP.size() > 2)
658                b.accept(Util.rangeToString(html, jsonDP.start + 1, jsonDP.end));
659
660        // Build the Stream, and return it.
661        return b.build();
662    }
663
664
665    // ********************************************************************************************
666    // ********************************************************************************************
667    // MISC
668    // ********************************************************************************************
669    // ********************************************************************************************
670
671
672    /**
673     * Inserts nodes, and allows a 'varargs' parameter.
674     * 
675     * @param html Any HTML Page
676     * 
677     * @param pos The position in the original {@code Vector} where the nodes shall be inserted.
678     * 
679     * @param nodes A list of nodes to insert.
680     */
681    public static void insertNodes(Vector<HTMLNode> html, int pos, HTMLNode... nodes)
682    {
683        Vector<HTMLNode> nodesVec = new Vector<>(nodes.length);
684        for (HTMLNode node : nodes) nodesVec.addElement(node);
685        html.addAll(pos, nodesVec);
686    }
687
688    /**
689     * Convenience Method.
690     * <BR />Invokes: {@link #replaceRange(Vector, int, int, Vector)}
691     */
692    public static void replaceRange
693        (Vector<HTMLNode> page, DotPair range, Vector<HTMLNode> newNodes)
694    { replaceRange(page, range.start, range.end+1, newNodes); }
695
696    /**
697     * Replaces any all and all {@code HTMLNode's} located between the {@code Vector} locations
698     * {@code 'sPos'} (inclusive) and {@code 'ePos'} (exclusive).  By exclusive, this means that
699     * the {@code HTMLNode} located at positon {@code 'ePos'} <B><I>will not</I></B> be replaced,
700     * but the one at {@code 'sPos'} <I><B>is replaced</B></I>.
701     * 
702     * <BR /><BR />The size of the {@code Vector} will change by {@code newNodes.size() - 
703     * (ePos + sPos)}.  The contents situated between {@code Vector} location {@code sPos} and
704     * {@code sPos + newNodes.size()} will, indeed, be the contents of the {@code 'newNodes'}
705     * parameter.
706     * 
707     * @param page Any Java HTML page, constructed of {@code HTMLNode (TagNode & TextNode)}
708     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
709     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
710     * @param newNodes Any Java HTML page-{@code Vector} of {@code HTMLNode}.
711     * 
712     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
713     * 
714     * @see #pollRange(Vector, int, int)
715     * @see Remove#range(Vector, int, int)
716     * @see #replaceRange(Vector, DotPair, Vector)
717     */
718    public static void replaceRange
719        (Vector<HTMLNode> page, int sPos, int ePos, Vector<HTMLNode> newNodes)
720    {
721        // Torello.Java.LV
722        LV l = new LV(sPos, ePos, page);
723
724        int oldSize     = ePos - sPos;
725        int newSize     = newNodes.size();
726        int insertPos   = sPos;
727        int i           = 0;
728
729        while ((i < newSize) && (i < oldSize))
730            page.setElementAt(newNodes.elementAt(i++), insertPos++);
731
732
733        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
734        // CASE ONE:
735        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
736
737        if (newSize == oldSize) return;
738
739
740        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
741        // CASE TWO:
742        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
743        //
744        // The new Vector is SMALLER than the old sub-range
745        // The rest of the nodes just need to be trashed
746        //
747        // OLD-WAY: (Before realizing what Vector.subList is actually doing)
748        // Util.removeRange(page, insertPos, ePos);
749
750        if (newSize < oldSize) page.subList(insertPos, ePos).clear();
751
752
753        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
754        // CASE THREE:
755        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
756        //
757        // The new Vector is BIGGER than the old sub-range
758        // There are still more nodes to insert.
759
760        else page.addAll(ePos, newNodes.subList(i, newSize));
761    }
762
763    /**
764     * Java's {@code java.util.Vector} class does not allow public access to the
765     * {@code removeRange(start, end)} function.  It is listed as {@code 'protected'} in Java's
766     * Documentation about the {@code class Vector.}  This method upstages that, and performs the
767     * {@code 'Poll'} operation, where the nodes are first removed, stored, and then return as a
768     * function result.
769     * 
770     * <BR /><BR /><B CLASS=JDDescLabel>Poll a Range:</B>
771     * 
772     * <BR />The nodes that are removed are placed in a separate return {@code Vector}, and
773     * returned as a result to this method.
774     * 
775     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
776     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
777     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
778     * 
779     * @return A complete list ({@code Vector<HTMLNode>}) of the nodes that were removed.
780     * 
781     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
782     * 
783     * @see Remove#range(Vector, int, int)
784     * @see Remove#range(Vector, DotPair)
785     * @see #pollRange(Vector, DotPair)
786     */
787    public static Vector<HTMLNode> pollRange(Vector<? extends HTMLNode> html, int sPos, int ePos)
788    {
789        // The original version of this method is preserved inside comments at the bottom of this
790        // method.  Prior to seeing the Sun-Oracle Docs explaining that the return from the SubList
791        // operation "mirrors changes" back to to the original vector, the code in the comments is
792        // how this method was accomplished.
793
794        LV                          l       = new LV(html, sPos, ePos);
795        Vector<HTMLNode>            ret     = new Vector<HTMLNode>(l.end - l.start);
796        List<? extends HTMLNode>    list    = html.subList(l.start, l.end);
797
798        // Copy the Nodes into the return Vector that the end-user receives
799        ret.addAll(list);
800
801        // Clear the nodes out of the original Vector.  The Sun-Oracle Docs 
802        // state that the returned sub-list is "mirrored back into" the original
803
804        list.clear();
805
806        // Return the Vector to the user.  Note that the List<HTMLNode> CANNOT be returned,
807        // because of it's mirror-qualities, and because this method expects a vector.
808
809        return ret;
810
811        /*
812        // BEFORE READING ABOUT Vector.subList(...), this is how this was accomplished:
813        // NOTE: It isn't so clear how the List<HTMLNode> works - likely it doesn't actually
814        //       create any new memory-allocated arrays, it is just an "overlay"
815
816        // Copy the elements from the input vector into the return vector
817        for (int i=l.start; i < l.end; i++) ret.add(html.elementAt(i));
818
819        // Remove the range from the input vector (this is the meaning of 'poll')
820        Util.removeRange(html, sPos, ePos);
821
822        return ret;
823        */
824    }
825
826    /**
827     * Convenience Method.
828     * <BR />Receives: {@code DotPair}
829     * <BR />Invokes: {@link #pollRange(Vector, int, int)}. 
830     */
831    public static Vector<HTMLNode> pollRange(Vector<? extends HTMLNode> html, DotPair dp)
832    { return pollRange(html, dp.start, dp.end + 1); }
833
834    /**
835     * This removes every element from the {@code Vector} beginning at position 0, all the way to
836     * position {@code 'pos'} (exclusive).  The {@code elementAt(pos)} remains in the original page
837     * input-{@code Vector}.  This is the definition of 'exclusive'.
838     * 
839     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
840     * 
841     * @param pos Any position within the range of the input {@code Vector}.
842     * 
843     * @return The elements in the {@code Vector} from position: {@code 0 ('zero')} all the way to
844     * position: {@code 'pos'}
845     */
846    public static Vector<HTMLNode> split(Vector<? extends HTMLNode> html, int pos)
847    { return pollRange(html, 0, pos); }
848
849
850    // ********************************************************************************************
851    // ********************************************************************************************
852    // Static Inner-Class: Count 
853    // ********************************************************************************************
854    // ********************************************************************************************
855
856
857    @Torello.JavaDoc.StaticFunctional
858    public static class Count 
859    {
860        private Count() { }
861
862
863        // ****************************************************************************************
864        // ****************************************************************************************
865        // Count TextNode's
866        // ****************************************************************************************
867        // ****************************************************************************************
868
869
870        /**
871         * Convenience Method.
872         * <BR />Invokes: {@link #textNodes(Vector, int, int)}
873         */
874        public static int textNodes(Vector<HTMLNode> page)
875        { return textNodes(page, 0, -1); }
876
877        /**
878         * Convenience Method.
879         * <BR />Receives: {@code DotPair}
880         * <BR />Invokes: {@link #textNodes(Vector, int, int)}
881         */
882        public static int textNodes(Vector<HTMLNode> page, DotPair dp)
883        { return textNodes(page, dp.start, dp.end + 1); }
884
885        /**
886         * Counts the number of {@code TextNode's} in a {@code Vector<HTMLNode>} between the
887         * demarcated array / {@code Vector} positions, {@code 'sPos'} and {@code 'ePos'}
888         * 
889         * @param page Any HTML page.
890         * 
891         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
892         * 
893         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
894         * 
895         * @return The number of {@code TextNode's} in the {@code Vector} between the demarcated
896         * indices.
897         * 
898         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
899         */
900        public static int textNodes(Vector<HTMLNode> page, int sPos, int ePos)
901        {
902            int counter = 0;
903            LV  l       = new LV(page, sPos, ePos);
904
905            // Iterates the entire page between sPos and ePos, incrementing the count for every
906            // instance of text-node.
907
908            for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTextNode()) counter++;
909
910            return counter;
911        }
912
913
914        // ****************************************************************************************
915        // ****************************************************************************************
916        // Count CommentNode's
917        // ****************************************************************************************
918        // ****************************************************************************************
919
920
921        /**
922         * Convenience Method.
923         * <BR />Invokes: {@link #commentNodes(Vector, int, int)}
924         */
925        public static int commentNodes(Vector<HTMLNode> page)
926        { return commentNodes(page, 0, -1); }
927
928        /**
929         * Convenience Method.
930         * <BR />Receives: {@code DotPair}
931         * <BR />Invokes: {@link #commentNodes(Vector, int, int)} 
932         */
933        public static int commentNodes(Vector<HTMLNode> page, DotPair dp)
934        { return commentNodes(page, dp.start, dp.end + 1); }
935
936        /**
937         * Counts the number of {@code CommentNode's} in an {@code Vector<HTMLNode>} between the
938         * demarcated array / {@code Vector} positions.
939         * 
940         * @param page Any HTML page.
941         * 
942         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
943         * 
944         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
945         * 
946         * @return The number of {@code CommentNode's} in the {@code Vector} between the demarcated
947         * indices.
948         * 
949         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
950         */
951        public static int commentNodes(Vector<HTMLNode> page, int sPos, int ePos)
952        {
953            int counter = 0;
954            LV  l       = new LV(page, sPos, ePos);
955
956            // Iterates the entire page between sPos and ePos, incrementing the count for every
957            // instance of comment-node.
958
959            for (int i=l.start; i < l.end; i++)  if (page.elementAt(i).isCommentNode()) counter++;
960
961            return counter;
962        }
963
964
965        // ****************************************************************************************
966        // ****************************************************************************************
967        // Count TagNode's
968        // ****************************************************************************************
969        // ****************************************************************************************
970
971
972        /**
973         * Convenience Method.
974         * <BR />Invokes: {@link #tagNodes(Vector, int, int)}
975         */
976        public static int tagNodes(Vector<HTMLNode> page)
977        { return tagNodes(page, 0, -1); }
978
979        /**
980         * Convenience Method.
981         * <BR />Receives: {@code DotPair}
982         * <BR />Invokes: {@link #tagNodes(Vector, int, int)} 
983         */
984        public static int tagNodes(Vector<HTMLNode> page, DotPair dp)
985        { return tagNodes(page, dp.start, dp.end + 1); }
986
987        /**
988         * Counts the number of {@code TagNode's} in a {@code Vector<HTMLNode>} between the
989         * demarcated array / {@code Vector} positions.
990         * 
991         * @param page Any HTML page.
992         * 
993         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
994         * 
995         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
996         * 
997         * @return The number of {@code TagNode's} in the {@code Vector}.
998         * 
999         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1000         */
1001        public static int tagNodes(Vector<HTMLNode> page, int sPos, int ePos)
1002        {
1003            int counter = 0;
1004            LV  l       = new LV(page, sPos, ePos);
1005
1006            // Iterates the entire page between sPos and ePos, incrementing the count for every
1007            // instance of TagNode.
1008
1009            for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTagNode()) counter++;
1010
1011            return counter;
1012        }
1013
1014
1015        // ****************************************************************************************
1016        // ****************************************************************************************
1017        // Count TagNode's, put results in a java table/map
1018        // ****************************************************************************************
1019        // ****************************************************************************************
1020
1021
1022        /**
1023         * Convenience Method.
1024         * <BR />Invokes: {@link #tagNodes(Vector, int, int)}
1025         */
1026        public static Ret2<
1027                Hashtable<String, Integer>,
1028                Hashtable<String, Integer>
1029            >
1030            tagNodesToTable(Vector<HTMLNode> page)
1031        { return tagNodesToTable(page, 0, -1); }
1032
1033        /**
1034         * Convenience Method.
1035         * <BR />Receives: {@code DotPair}
1036         * <BR />Invokes: {@link #tagNodesToTable(Vector, int, int)} 
1037         */
1038        public static Ret2<
1039                Hashtable<String, Integer>,
1040                Hashtable<String, Integer>
1041            >
1042            tagNodesToTable(Vector<HTMLNode> page, DotPair dp)
1043        { return tagNodesToTable(page, dp.start, dp.end + 1); }
1044
1045        /**
1046         * For each tag in HTML-5 (according to class {@link HTMLTags}, this method counts the
1047         * number of instances of each {@code TagNode} contained by a {@code Vector<HTMLNode>}.
1048         * The count is performed on nodes between the parameter-provided array-indices, and the
1049         * results are placed into two {@code Hashtable's}.
1050         * 
1051         * @param page Any HTML page.
1052         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1053         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1054         * 
1055         * @return The returned {@link Ret2} instance contains the following data:
1056         * 
1057         * <BR /><BR /><UL CLASS=JDUL>
1058         * <LI> <B STYLE='color: red;'>{@code ret2.a}:</B>
1059         * 
1060         *      <BR /><BR />A {@code java.util.Hashtable} that contains one entry for each HTML-Tag
1061         *      present within the page's demarcated array-indicies - {@code 'sPos'} and
1062         *      {@code 'ePos'}.
1063         * 
1064         *      <BR /><BR />The keys in this table are Java {@code String's} that contain a 
1065         *      Lower-Case {@link TagNode#tok Tag-Token} (such as: "div", "p", "span", etc...).
1066         *      The values in this table contain a count on <B CLASS='color: red;'>the number of
1067         *      Open-Tags that were identified within the page.</B>
1068         *      <BR /><BR /></LI>
1069         * 
1070         * <LI> <B STYLE='color: red;'>{@code ret2.b}:</B>
1071         * 
1072         *      <BR /><BR />A {@code java.util.Hashtable} with counts for each and every 
1073         *      "Closed Tag" on the page, all in an identical manner to that which was described,
1074         *      above, for {@code ret2.a} - except the counts in this table are for Closed-Tag's
1075         *      rather than Open-Tag's - {@code </div>} tags, rather than {@code <DIV ...>} tags.
1076         *      </LI>
1077         * 
1078         * </UL>
1079         * 
1080         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1081         */
1082        public static Ret2<
1083                Hashtable<String, Integer>,
1084                Hashtable<String, Integer>
1085            >
1086            tagNodesToTable(Vector<HTMLNode> page, int sPos, int ePos)
1087        {
1088            LV      l   = new LV(page, sPos, ePos);
1089            TagNode tn  = null;
1090
1091            Hashtable<String, Integer> openTags     = new Hashtable<>();
1092            Hashtable<String, Integer> closedTags   = new Hashtable<>();
1093
1094            // Iterates the entire page between sPos and ePos, incrementing the count for every
1095            // instance of TagNode.
1096
1097            for (int i=l.start; i < l.end; i++)
1098            {
1099                if ((tn = page.elementAt(i).ifTagNode()) == null) continue;
1100
1101                Hashtable<String, Integer>  ht      = tn.isClosing ? closedTags : openTags;
1102                Integer                     count   = ht.get(tn.tok);
1103
1104                if (count == null)  count = 1;
1105                else                count = count + 1;
1106
1107                ht.put(tn.tok, count);
1108            }
1109
1110            return new Ret2<>(openTags, closedTags);
1111        }
1112
1113
1114        // ****************************************************************************************
1115        // ****************************************************************************************
1116        // Count New Lines
1117        // ****************************************************************************************
1118        // ****************************************************************************************
1119
1120
1121        /**
1122         * Convenience Method.
1123         * <BR />Invokes: {@link #newLines(Vector, int, int)}
1124         */
1125        public static int newLines(Vector<? extends HTMLNode> html)
1126        { return newLines(html, 0, -1); }
1127
1128        /**
1129         * Convenience Method.
1130         * <BR />Receives: {@code DotPair}
1131         * <BR />Invokes: {@link #newLines(Vector, int, int)} 
1132         */
1133        public static int newLines(Vector<? extends HTMLNode> html, DotPair dp)
1134        { return newLines(html, dp.start, dp.end + 1); }
1135
1136
1137        /**
1138         * This will count the number of new-line symbols present <B><I>- on the partial HTML
1139         * page</I></B>. The count will include a sum of every {@code HTMLNode.str} that
1140         * contains the standard new-line symbols: {@code \r\n, \r, \n}, meaning that UNIX, MSFT,
1141         * Apple, etc. forms of text-line rendering should all be treated equally.
1142         * 
1143         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1144         * 
1145         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1146         * 
1147         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1148         * 
1149         * @return The number of new-line characters in all of the {@code HTMLNode's} that occur
1150         * between vectorized-page positions {@code 'sPos'} and {@code 'ePos.'}
1151         * 
1152         * <BR /><BR /><B>NOTE:</B> The regular-expression used here 'NEWLINEP' is as follows:
1153         * 
1154         * <DIV CLASS="SNIP">{@code
1155         * private static final Pattern NEWLINEP = Pattern.compile("\\r\\n|\\r|\\n");
1156         * }</DIV>
1157         * 
1158         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1159         * 
1160         * @see StringParse#NEWLINEP
1161         */
1162        public static int newLines(Vector<? extends HTMLNode> html, int sPos, int ePos)
1163        {
1164            int newLineCount    = 0;
1165            LV  l               = new LV(html, sPos, ePos);
1166
1167            for (int i=l.start; i < l.end; i++)
1168
1169                // Uses the Torello.Java.StringParse "New Line RegEx"
1170                for (   Matcher m = StringParse.NEWLINEP.matcher(html.elementAt(i).str);
1171                        m.find();
1172                        newLineCount++);
1173
1174            return newLineCount;
1175        }
1176    }
1177
1178
1179    // ********************************************************************************************
1180    // ********************************************************************************************
1181    // Static Inner-Class: Remove 
1182    // ********************************************************************************************
1183    // ********************************************************************************************
1184
1185
1186    @Torello.JavaDoc.StaticFunctional
1187    public static class Remove 
1188    {
1189        private Remove() { }
1190
1191
1192        // ****************************************************************************************
1193        // ****************************************************************************************
1194        // TextNode Removal Operations
1195        // ****************************************************************************************
1196        // ****************************************************************************************
1197
1198
1199        /**
1200         * Convenience Method.
1201         * <BR />Invokes: {@link #allTextNodes(Vector, int, int)}
1202         */
1203        public static int allTextNodes(Vector<HTMLNode> page)
1204        { return allTextNodes(page, 0, -1); }
1205
1206        /**
1207         * Convenience Method.
1208         * <BR />Receives: {@code DotPair}
1209         * <BR />Invokes: {@link #allTextNodes(Vector, int, int)}
1210         */
1211        public static int allTextNodes(Vector<HTMLNode> page, DotPair dp)
1212        { return allTextNodes(page, dp.start, dp.end + 1); }
1213
1214        /**
1215         * Takes a sub-section of an HTML {@code Vector} and removes all {@code TextNode} present
1216         * 
1217         * @param page Any HTML page
1218         * 
1219         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1220         * 
1221         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1222         * 
1223         * @return The number of HTML {@code TextNode's} that were removed
1224         * 
1225         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1226         * 
1227         * @see TextNode
1228         * @see #nodesOPT(Vector, int[])
1229         */
1230        public static int allTextNodes(Vector<HTMLNode> page, int sPos, int ePos)
1231        {
1232            IntStream.Builder   b = IntStream.builder();
1233            LV                  l = new LV(page, sPos, ePos);
1234
1235            // Use Java-Streams to build the list of nodes that are valid text-nodes.
1236            for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTextNode()) b.add(i);
1237
1238            // Build the stream and convert it to an int[] (integer-array)
1239            int[] posArr = b.build().toArray();
1240
1241            // The integer array is guaranteed to be sorted, and contain valid vector-indices.
1242            nodesOPT(page, posArr);
1243
1244            return posArr.length;
1245        }
1246
1247
1248        // ****************************************************************************************
1249        // ****************************************************************************************
1250        // TagNode Removal Operations
1251        // ****************************************************************************************
1252        // ****************************************************************************************
1253
1254
1255        /**
1256         * Convenience Method.
1257         * <BR />Invokes: {@link #allTagNodes(Vector, int, int)}
1258         */
1259        public static int allTagNodes(Vector<HTMLNode> page) 
1260        { return allTagNodes(page, 0, -1); }
1261
1262        /**
1263         * Convenience Method.
1264         * <BR />Receives: {@code DotPair} 
1265         * <BR />Invokes: {@link #allTagNodes(Vector, int, int)}
1266         */
1267        public static int allTagNodes(Vector<HTMLNode> page, DotPair dp)
1268        { return allTagNodes(page, dp.start, dp.end + 1); }
1269
1270        /**
1271         * Takes a sub-section of an HTML {@code Vector} and removes all {@code TagNode} present
1272         * 
1273         * @param page Any HTML page
1274         * 
1275         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1276         * 
1277         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1278         * 
1279         * @return The number of HTML {@code TagNode's} that were removed
1280         * 
1281         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1282         * 
1283         * @see TagNode
1284         * @see #nodesOPT(Vector, int[])
1285         */
1286        public static int allTagNodes(Vector<HTMLNode> page, int sPos, int ePos)
1287        {
1288            IntStream.Builder   b = IntStream.builder();
1289            LV                  l = new LV(page, sPos, ePos);
1290
1291            // Use Java-Streams to build the list of nodes that are valid tag-nodes.
1292            for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTagNode()) b.add(i);
1293
1294            // Build the stream and convert it to an int[] (integer-array)
1295            int[] posArr = b.build().toArray();
1296
1297            // The integer array is guaranteed to be sorted, and contain valid vector-indices.
1298            nodesOPT(page, posArr);
1299
1300            return posArr.length;
1301        }
1302
1303
1304        // ****************************************************************************************
1305        // ****************************************************************************************
1306        // CommentNode Removal Operations
1307        // ****************************************************************************************
1308        // ****************************************************************************************
1309
1310
1311        /**
1312         * Convenience Method.
1313         * <BR />Invokes: {@link #allCommentNodes(Vector, int, int)}
1314         */
1315        public static int allCommentNodes(Vector<HTMLNode> page)
1316        { return allCommentNodes(page, 0, -1); }
1317
1318        /**
1319         * Convenience Method.
1320         * <BR />Receives: {@code DotPair}
1321         * <BR />Invokes: {@link #allCommentNodes(Vector, int, int)}
1322         */
1323        public static int allCommentNodes(Vector<HTMLNode> page, DotPair dp)
1324        { return allCommentNodes(page, dp.start, dp.end + 1); }
1325
1326        /**
1327         * Takes a sub-section of an HTML {@code Vector} and removes all {@code CommentNode}
1328         * present
1329         * 
1330         * @param page Any HTML page
1331         * 
1332         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1333         * 
1334         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1335         * 
1336         * @return The number of HTML {@code CommentNode's} that were removed
1337         * 
1338         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1339         * 
1340         * @see CommentNode
1341         * @see #nodesOPT(Vector, int[])
1342         */
1343        public static int allCommentNodes(Vector<HTMLNode> page, int sPos, int ePos)
1344        {
1345            IntStream.Builder   b       = IntStream.builder();
1346            LV                  l       = new LV(page, sPos, ePos);
1347
1348            // Use Java-Streams to build the list of nodes that are valid comment-nodes.
1349            for (int i=l.start; i < l.end; i++)
1350                if (page.elementAt(i).isCommentNode())
1351                    b.add(i);
1352
1353            // Build the stream and convert it to an int[] (integer-array)
1354            int[] posArr = b.build().toArray();
1355
1356            // The integer array is guaranteed to be sorted, and contain valid vector-indices.
1357            nodesOPT(page, posArr);
1358
1359            return posArr.length; 
1360        }
1361
1362
1363        // ****************************************************************************************
1364        // ****************************************************************************************
1365        // Remove All Inner Tags
1366        // ****************************************************************************************
1367        // ****************************************************************************************
1368
1369
1370        /**
1371         * Convenience Method.
1372         * <BR />Invokes: {@link #allInnerTags(Vector, int, int)}
1373         */
1374        public static int allInnerTags(Vector<HTMLNode> html)
1375        { return allInnerTags(html, 0, -1); }
1376
1377        /**
1378         * Convenience Method.
1379         * <BR />Receives: {@code DotPair}
1380         * <BR />Invokes: {@link #allInnerTags(Vector, int, int)}
1381         */
1382        public static int allInnerTags(Vector<? super TagNode> html, DotPair dp)
1383        { return allInnerTags(html, dp.start, dp.end + 1); }
1384
1385        /**
1386         * This method removes all inner-tags (all attributes) from every {@link TagNode} inside of
1387         * an HTML page.  It does this by replacing every {@code TagNode} in the {@code Vector}
1388         * with the pre-instantiated, publicly-available {@code TagNode} which can be obtained by a
1389         * call to the class {@code HTMLTags.hasTag(token, TC)}.
1390         * 
1391         * <BR /><BR /><B CLASS=JDDescLabel>Replacing {@code TagNode's:}</B>
1392         * 
1393         * <BR />This method determines whether a fresh {@link TagNode} is to be inserted by
1394         * measuring the length of the internal {@link TagNode#str} field (a {@code String} field).
1395         * If the length {@code TagNode.str} is not equal to the HTML token {@link TagNode#tok}
1396         * length <B><I>plus 2</I></B>, then a fresh, pre-instantiated, node is replaced.
1397         * 
1398         * <BR /><BR />The {@code '+2'} figure comes from the additional characters {@code '<'} and
1399         * {@code '>'} that start and end every HTML {@code TagNode}
1400         * 
1401         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1402         * 
1403         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1404         * 
1405         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1406         * 
1407         * @return The number of {@code TagNode} elements that have were replaced with
1408         * zero-attribute HTML Element Tags.
1409         * 
1410         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1411         *
1412         * @throws ClassCastException If {@code 'html'} contains references that do not inherit
1413         * {@code HTMLNode}.
1414         */
1415        @SuppressWarnings("unchecked")
1416        public static int allInnerTags(Vector<? super TagNode> html, int sPos, int ePos)
1417        {
1418            int     ret = 0;
1419            LV      l   = new LV(sPos, ePos, html);
1420            TagNode tn;
1421
1422            for (int i = (l.end-1); i >= l.start; i--)
1423
1424                if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null)
1425
1426                {
1427                    ret++;
1428
1429                    // HTMLTags.hasTag(tok, TC) gets an empty and pre-instantiated TagNode,
1430                    // where TagNode.tok == 'tn.tok' and TagNode.isClosing = false
1431
1432                    html.setElementAt(HTMLTags.hasTag(tn.tok, TC.OpeningTags), i);
1433                }
1434
1435            return ret;
1436        }
1437
1438
1439        // ****************************************************************************************
1440        // ****************************************************************************************
1441        // Style-Node & Script-Node Block Removal Operations
1442        // ****************************************************************************************
1443        // ****************************************************************************************
1444
1445
1446        /**
1447         * Removes all HTML {@code 'style'} Node blocks.
1448         * 
1449         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1450         * 
1451         * @return The number of {@code <STYLE>}-Node Blocks that were removed
1452         */
1453        public static int styleNodeBlocks(Vector<? extends HTMLNode> html)
1454        {
1455            int removeCount = 0;
1456
1457            while (TagNodeRemoveInclusive.first(html, "style") > 0) removeCount++;
1458
1459            return removeCount;
1460        }
1461
1462        /**
1463         * Removes all {@code 'script'} Node blocks.
1464         * 
1465         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1466         * 
1467         * @return The number of {@code SCRIPT}-Node Blocks that were removed
1468         */
1469        public static int scriptNodeBlocks(Vector<? extends HTMLNode> html)
1470        {
1471            int removeCount = 0;
1472
1473            while (TagNodeRemoveInclusive.first(html, "script") > 0) removeCount++;
1474
1475            return removeCount;
1476        }
1477
1478
1479        // ****************************************************************************************
1480        // ****************************************************************************************
1481        // Remove a Sub-Range of nodes
1482        // ****************************************************************************************
1483        // ****************************************************************************************
1484
1485
1486        /**
1487         * Java's {@code java.util.Vector} class does not allow public access to the
1488         * {@code removeRange(start, end)} function.  It is protected in Java's Documentation about
1489         * the {@code Vector} class.  This method does exactly that, nothing else.
1490         * 
1491         * @param page Any Java HTML page, constructed of {@code HTMLNode (TagNode & TextNode)}
1492         * 
1493         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1494         * 
1495         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1496         * 
1497         * @return the number of nodes removed.
1498         * 
1499         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1500         * 
1501         * @see #pollRange(Vector, int, int)
1502         * @see #range(Vector, DotPair)
1503         */
1504        public static <T extends HTMLNode> int range(Vector<T> page, int sPos, int ePos)
1505        {
1506            // Torello.Java.LV
1507            LV  l = new LV(sPos, ePos, page);
1508
1509            // According to the Sun-Oracle Docs, the returned sublist "mirros" the original vector,
1510            // which means that when it is changed, so is the original vector.
1511
1512            page.subList(l.start, l.end).clear();
1513
1514            return l.size();
1515        }
1516
1517        /**
1518         * Convenience Method.
1519         * <BR />Receives: {@code DotPair}
1520         * <BR />Invokes: {@link #range(Vector, int, int)} 
1521         */
1522        public static int range(Vector<? extends HTMLNode> html, DotPair dp)
1523        { return range(html, dp.start, dp.end + 1); }
1524
1525
1526        // ****************************************************************************************
1527        // ****************************************************************************************
1528        // Remove Specified Nodes by Vector-Index
1529        // ****************************************************************************************
1530        // ****************************************************************************************
1531
1532
1533        /**
1534         * <SPAN STYLE="color: red;"><B>OPT: Optimized</B></SPAN>
1535         * 
1536         * <BR /><BR />This method does the same thing as
1537         * {@link Remove#nodes(boolean, Vector, int[])}, but all error checking is skipped, and the
1538         * input integer array is presumed to have been sorted. There are no guarantees about the
1539         * behavior of this method if the input array {@code 'posArr'} is not sorted,
1540         * <I>least-to-greatest,</I> or if there are duplicate or negative values in this array.
1541         * 
1542         * <BR /><BR /><B CLASS=JDDescLabel>Empty Var-Args:</B>
1543         * 
1544         * <BR />If the var-args input integer-array parameter is empty, this method shall exit
1545         * gracefully (and immediately).
1546         * 
1547         * @param page Any HTML-Page, usually ones generated by {@code HTMLPage.getPageTokens}, but
1548         * these may be obtained or created in any fashion so necessary.
1549         * 
1550         * @param posArr An array of integers which list/identify the nodes in the page to be
1551         * removed. Because this implementation has been optimized, no error checking will be
1552         * performed on this input.  It is presumed to be sorted, least-to-greatest, and that all
1553         * values in the array are valid-indices into the vectorized-html parameter {@code 'page'}
1554         */
1555        public static <T extends HTMLNode> void nodesOPT(Vector<T> page, int... posArr)
1556        {
1557            if (posArr.length == 0) return;
1558
1559            int endingInsertPos = page.size() - posArr.length;
1560            int posArrIndex     = 0;
1561            int insertPos       = posArr[0];
1562            int retrievePos     = posArr[0];
1563
1564            // There is very little that can be documented about these two loops.  Took 3 hours
1565            // to figure out.  Read the variables names for "best documentation"
1566
1567            while (insertPos < endingInsertPos)
1568            {
1569                // This inner-loop is necessary for when the posArr has consecutive-elements that
1570                // are *ALSO* consecutive-pointers.
1571                //
1572                // For instance, this invokation:
1573                // Util.removeNodes(page, 4, 5, 6); ...
1574                //      where 4, 5, and 6 are consecutive - the inner while-loop is required.
1575                //
1576                // For this invokation: 
1577                // Util.removeNodes(page, 2, 4, 6); 
1578                //      the inner-loop is not entered.
1579
1580                while ((posArrIndex < posArr.length) && (retrievePos == posArr[posArrIndex]))
1581                { retrievePos++; posArrIndex++; }
1582
1583                page.setElementAt(page.elementAt(retrievePos++), insertPos++);
1584            }
1585
1586            // Remove all remaining elements in the tail of the array.
1587            page.setSize(page.size() - posArr.length);
1588        }
1589
1590
1591        /**
1592         * This method remove each HTMLNode from the passed-parameter {@code 'page'}
1593         * listed/identified by the input array {@code 'nodeList'}.
1594         * 
1595         * <BR /><BR /><B CLASS=JDDescLabel>Empty Var-Args:</B>
1596         * 
1597         * <BR />If the var-args input integer-array parameter is empty, this method shall exit
1598         * gracefully (and immediately).
1599         * 
1600         * @param preserveInputArray This is a convenience input parameter that allows a programmer
1601         * to "preserve" the original input-parameter integer-array that is passed to this method.
1602         * It could be argued this parameter is "superfluous" - however, keep in mind that the
1603         * passed parameter {@code 'nodeList'} <B><I>must be sorted</I></B> before this method is
1604         * able function properly. There is a sort that's performed within the body of this method.
1605         * Just in case that the original order of the integer-array input-parameter must be
1606         * preserved, its possible to request for the sort to operate on "a clone" of the
1607         * input-parameter integer-array, instead of the original integer-array {@code 'nodeList'}
1608         * itself. 
1609         * 
1610         * @param page Any HTML-Page, usually ones generated by
1611         * {@code HTMLPage.getPageTokens(...)}, but these may be obtained or created in any fashion
1612         * so necessary. 
1613         * 
1614         * @param nodeList An array of integers which list/identify the nodes in the page to be
1615         * removed.
1616         * 
1617         * @throws IllegalArgumentException If the {@code 'nodeList'} contains duplicate entries.
1618         * Obviously, no {@code HTMLNode} may be removed from the {@code Vector<HTMLNode>} more
1619         * than once.
1620         * 
1621         * @throws IndexOutOfBoundsException If the nodeList contains index-pointers / items that
1622         * are not within the bounds of the passed HTML-Page {@code Vector}.
1623         */
1624        public static <T extends HTMLNode> void nodes
1625            (boolean preserveInputArray, Vector<T> page, int... nodeList)
1626        {
1627            if (nodeList.length == 0) return;
1628
1629            // @Safe Var Args
1630            int[]   posArr  = preserveInputArray ? nodeList.clone() : nodeList;
1631            int     len     = posArr.length;
1632
1633            Arrays.sort(posArr);
1634
1635            // Check for duplicates in the nodeList, no HTMLNode may be removed twice!
1636            for (int i=0; i < (len - 1); i++)
1637
1638                if (posArr[i] == posArr[i+1]) throw new IllegalArgumentException(
1639                    "The input array contains duplicate items, this is not allowed.\n" +
1640                    "This is since each array-entry is intended to be a pointer/index for items " +
1641                    "to be removed.\nNo item can possibly be removed twice.!"
1642                );
1643
1644            // Make sure all nodes are within the bounds of the original Vector.  (no negative 
1645            // indexes, no indexes greater than the size of the Vector)
1646
1647            if ((posArr[0] < 0) || (posArr[len - 1] >= page.size()))
1648
1649                throw new IndexOutOfBoundsException (
1650                    "The input array contains entries which are not within the bounds of the " +
1651                    "original-passed Vector.\nHTMLPage Vector has: " + page.size() +
1652                        " elements.\n" +
1653                    "Maximum element in the nodeList is [" + posArr[len - 1] + "], and the " +
1654                        "minimum element is: [" + posArr[0] + "]"
1655                );
1656
1657            int endingInsertPos = page.size() - posArr.length;
1658            int posArrIndex     = 0;
1659            int insertPos       = posArr[0];
1660            int retrievePos     = posArr[0];
1661
1662            // There is very little that can be documented about these two loops.  Took 3 hours
1663            // to figure out.  Read the variables names for "best documentation"
1664
1665            while (insertPos < endingInsertPos)
1666            {
1667                // This inner-loop is necessary for when the posArr has consecutive-elements that
1668                // are *ALSO* consecutive-pointers.
1669                //
1670                // For instance, this invocation:
1671                // Util.removeNodes(page, 4, 5, 6);
1672                //      where 4, 5, and 6 are consecutive - the inner while-loop is required.
1673                //
1674                // For this invocation: 
1675                // Util.removeNodes(page, 2, 4, 6);
1676                //      the inner-loop is not entered.
1677
1678                while ((posArrIndex < posArr.length) && (retrievePos == posArr[posArrIndex])) 
1679                { retrievePos++; posArrIndex++; }
1680
1681                page.setElementAt(page.elementAt(retrievePos++), insertPos++);
1682            }
1683
1684            // Remove all remaining elements in the tail of the array.
1685            page.setSize(page.size() - posArr.length);
1686        }
1687
1688
1689        // ****************************************************************************************
1690        // ****************************************************************************************
1691        // Inclusive-Empty Removal Operations
1692        // ****************************************************************************************
1693        // ****************************************************************************************
1694
1695
1696        /**
1697         * Convenience Method.
1698         * <BR />Invokes: {@link #inclusiveEmpty(Vector, int, int, String[])}
1699         */
1700        public static int inclusiveEmpty(Vector<HTMLNode> page, String... htmlTags)
1701        { return inclusiveEmpty(page, 0, -1, htmlTags); }
1702
1703        /**
1704         * Convenience Method.
1705         * <BR />Receives: {@code DotPair}
1706         * <BR />Invokes: {@link #inclusiveEmpty(Vector, int, int, String[])}
1707         */
1708        public static int inclusiveEmpty(Vector<HTMLNode> page, DotPair dp, String... htmlTags)
1709        { return inclusiveEmpty(page, dp.start, dp.end + 1, htmlTags); }
1710
1711        /**
1712         * This will do an "Inclusive Search" using the standard class
1713         * {@link TagNodeInclusiveIterator} in the {@code package NodeSearch}.  Then it will
1714         * inspect the contents of the subsections. Any subsections that do not contain any
1715         * instances of {@code HTMLNode} in between them, or any subsections that only contain
1716         * "blank-text" (white-space) between them shall be removed. 
1717         * 
1718         * <BR /><BR /><B CLASS=JDDescLabel>Recursive Method:</B>
1719         * 
1720         * <BR />The search logic shall perform multiple <I><B>recursive iterations</B></I> of
1721         * itself, such that if, for instance, the user requested that all empty HTML divider
1722         * ({@code <DIV>}) elements be removed, if after removing a set a dividers resulted in more
1723         * empty ones (nested {@code <DIV>} elements), then an additional removal shall be called.
1724         * <I>This recursion shall continue until there are no empty HTML elements of the types
1725         * listed by</I> {@code 'htmlTags'}
1726         *
1727         * @param page Any vectorized-html page or sub-page.
1728         * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1729         * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1730         * 
1731         * @param htmlTags The list of <I>inclusive</I> (non-singleton) html elements to search for
1732         * possibly being empty container tags.
1733         * 
1734         * @return The number of {@code HTMLNode's} that were removed.
1735         * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1736         */
1737        public static int inclusiveEmpty
1738            (Vector<HTMLNode> page, int sPos, int ePos, String... htmlTags)
1739        {
1740            DotPair subList;
1741
1742            int             removed = 0;
1743            HNLIInclusive   iter    = TagNodeInclusiveIterator.iter(page, htmlTags);
1744            LV              l       = new LV(page, sPos, ePos);
1745
1746            iter.restrictCursor(l);
1747
1748            TOP:
1749            while (iter.hasNext())
1750
1751                // If there is only the opening & closing pair, with nothing in between,
1752                // then the pair must be removed because it is "Empty" (Inclusive Empty)
1753
1754                if ((subList = iter.nextDotPair()).size() == 2)
1755                {
1756                    iter.remove();
1757                    ePos -= subList.size();
1758                    removed += subList.size();
1759                }
1760
1761                else
1762                {
1763                    // If there is any TagNode in between the start-end pair, then this is NOT
1764                    // EMPTY.  In this case, skip to the next start-end opening-closing pair.
1765
1766                    for (int i=(subList.start + 1); i < subList.end; i++)
1767                        if (! page.elementAt(i).isTextNode())
1768                            continue TOP;
1769
1770                    // If there were only TextNode's between an opening-closing TagNode Pair....
1771                    // **AND** those TextNode's are only white-space, then this also considered
1772                    // Inclusively Empty.  (Get all TextNode's, and if .trim() reduces the length()
1773                    // to zero, then it was only white-space.
1774
1775                    if (Util.textNodesString(page, subList).trim().length() == 0)
1776                    {
1777                        iter.remove();
1778                        ePos -= subList.size();
1779                        removed += subList.size();
1780                    }
1781                }
1782
1783            // This process must be continued recursively, because if any inner, for instance,
1784            // <DIV> ... </DIV> was removed, then the outer list must be re-checked...
1785
1786            if (removed > 0)
1787                return removed + Remove.inclusiveEmpty(page, sPos, ePos, htmlTags);
1788            else
1789                return 0;
1790        }
1791
1792
1793        // ****************************************************************************************
1794        // ****************************************************************************************
1795        // Miscellaneous Removal Operations
1796        // ****************************************************************************************
1797        // ****************************************************************************************
1798
1799
1800        /**
1801         * Removes the first and last element of a vectorized-HTML web-page, or sub-page.
1802         * Generally, this could be used to remove the surrounding tag's {@code '<DIV>'} ...
1803         * {@code '</DIV>'}, or something similar.
1804         * 
1805         * <BR /><BR />This method <B STYLE="color: red;">WILL NOT CHECK</B> whether there are
1806         * matching HTML open-and-close tags at the end beginning and end of this sub-section.
1807         * Generally, though, that is how this method is intended to be used.
1808         * 
1809         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1810         * 
1811         * @throws IllegalArgumentException If the {@code Vector} has fewer than two elements.
1812         */
1813        public static void firstLast(Vector<? extends HTMLNode> html)
1814        {
1815            int size = html.size();
1816
1817            if (size < 2) throw new IllegalArgumentException(
1818                "You have requested that the first and last elements the input 'page' parameter " +
1819                "(a vector) be removed.  However, the vector size is only [" + size  + "], so " +
1820                "this cannot be performed."
1821            );
1822
1823            // NOTE: *** This removes elementAt(0) and elementAt(size-1)
1824            //       *** NOT ALL ELEMENTS BETWEEN 0 and (size-1)
1825
1826            Util.Remove.nodesOPT(html, 0, size-1);
1827        }
1828
1829    }
1830
1831
1832    // ********************************************************************************************
1833    // ********************************************************************************************
1834    // Static Inner-Class: Inclusive 
1835    // ********************************************************************************************
1836    // ********************************************************************************************
1837
1838
1839    /**
1840     * Tools for finding the matching-closing tag of any open {@link TagNode}.
1841     * 
1842     * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=UTILINCL>
1843     */
1844    @Torello.JavaDoc.StaticFunctional
1845    public static class Inclusive
1846    {
1847        private Inclusive() { }
1848
1849    
1850        // ****************************************************************************************
1851        // ****************************************************************************************
1852        // Inclusive Find/Get
1853        // ****************************************************************************************
1854        // ****************************************************************************************
1855
1856        /**
1857         * This finds the closing HTML {@code 'TagNode'} match for a given opening
1858         * {@code 'TagNode'} in a given-input html page or sub-section.
1859         *
1860         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
1861         *
1862         * @param nodeIndex An index into that {@code Vector}.  This index must point to an
1863         * {@code HTMLNode} element that is:
1864         *
1865         * <BR /><BR /><OL CLASS=JDOL>
1866         * <LI>An instance of {@code TagNode}</LI>
1867         * <LI>A {@code TagNode} whose {@code 'isClosing'} field is {@code FALSE}</LI>
1868         * <LI>Is not a {@code 'singleton'} HTML element-token
1869         * (i.e. {@code <IMG>, <BR>, <H1>, etc...})
1870         * </LI>
1871         * </OL>
1872         *
1873         * @return An "inclusive search" finds {@code OpeningTag} and {@code ClosingTag} pairs - 
1874         * <I>and returns all the elements between them in the contents of a 
1875         * return-{@code Vector}, or {@code Vector DotPair}-end-point value</I>.  This method
1876         * will take a particular node of a {@code Vector}, and (as long it has a match) 
1877         * find it's <I><B>closing {@code HTMLNode} match.</B></I>  The integer returned will
1878         * be the index into this page of the closing, matching {@code TagNode.}
1879         *
1880         * @throws TagNodeExpectedException If the node in the {@code Vector}-parameter
1881         * {@code 'html'} contained at index {@code 'nodeIndex'} is not an instance of
1882         * {@code TagNode}, then this exception is thrown.
1883         *
1884         * @throws OpeningTagNodeExpectedException If the node in the {@code Vector}-parameter 
1885         * {@code 'html'} at index {@code 'nodeIndex'} is a closing version of the HTML element,
1886         * then this exception shall throw.
1887         *
1888         * @throws InclusiveException If the node in {@code Vector}-parameter {@code 'html'},
1889         * pointed-to by index {@code 'nodeIndex'} is an HTML {@code 'Singleton'} / Self-Closing
1890         * Tag, then this exception will be thrown.
1891         *
1892         * @see TagNode
1893         * @see TagNode#tok
1894         * @see TagNode#isClosing
1895         * @see HTMLNode
1896         */
1897        public static int find(Vector<? extends HTMLNode> html, int nodeIndex)
1898        {
1899            TagNode     tn  = null;
1900            HTMLNode    n   = null;
1901            String      tok = null;
1902
1903            if (! html.elementAt(nodeIndex).isTagNode())
1904
1905                throw new TagNodeExpectedException (
1906                    "You have attempted to find a closing tag to match an opening one, " +
1907                    "but the 'nodeIndex' (" + nodeIndex + ") you have passed doesn't contain " +
1908                    "an instance of TagNode."
1909                );
1910
1911            else tn = (TagNode) html.elementAt(nodeIndex);
1912
1913            if (tn.isClosing) throw new OpeningTagNodeExpectedException(
1914                "The TagNode indicated by 'nodeIndex' = " + nodeIndex + " has its 'isClosing' " +
1915                "boolean as TRUE - this is not an opening TagNode, but it must be to continue."
1916            );
1917
1918            // Checks to ensure this token is not a 'self-closing' or 'singleton' tag.
1919            // If it is an exception shall throw.
1920            InclusiveException.check(tok = tn.tok);
1921
1922            int end         = html.size();
1923            int openCount   = 1;
1924
1925            for (int pos = (nodeIndex+1); pos < end; pos++)
1926
1927                if ((n = html.elementAt(pos)).isTagNode())
1928                    if ((tn = ((TagNode) n)).tok.equals(tok))
1929                    {
1930                        // This keeps a "Depth Count" - where "depth" is just the number of 
1931                        // opened tags, for which a matching, closing tag hasn't been found yet.
1932
1933                        openCount += (tn.isClosing ? -1 : 1);
1934
1935                        // When all open-tags of the specified HTML Element 'tok' have been
1936                        // found, search has finished.
1937
1938                        if (openCount == 0) return pos;
1939                    }
1940
1941            // The closing-matching tag was not found
1942            return -1;
1943        }
1944
1945        /**
1946         * Convenience Method.
1947         * <BR />Invokes: {@link #find(Vector, int)}
1948         * <BR />Converts: output to <B><CODE>'GET'</CODE></B> format ({@code Vector}-sublist)
1949         * <BR />Using: {@link Util#cloneRange(Vector, int, int)}
1950         */
1951        public static Vector<HTMLNode> get(Vector<? extends HTMLNode> html, int nodeIndex)
1952        { 
1953            int endPos = find(html, nodeIndex);
1954
1955            return (endPos == -1) ? null : cloneRange(html, nodeIndex, endPos + 1);
1956        }
1957
1958        /**
1959         * Convenience Method.
1960         * <BR />Invokes: {@link #find(Vector, int)}
1961         * <BR />Converts: output to <B><CODE>'PEEK'</CODE></B> format ({@code SubSection})
1962         * <BR />Using: {@link Util#cloneRange(Vector, int, int)}
1963         */
1964        public static SubSection peek(Vector<? extends HTMLNode> html, int nodeIndex)
1965        {
1966            int endPos = find(html, nodeIndex);
1967
1968            return (endPos == -1) ? null : new SubSection(
1969                new DotPair(nodeIndex, endPos),
1970                cloneRange(html, nodeIndex, endPos + 1)
1971            );
1972        }
1973
1974        /**
1975         * Convenience Method.
1976         * <BR />Invokes: {@link #find(Vector, int)}
1977         * <BR />Converts: output to <B><CODE>'POLL'</CODE></B> format ({@code Vector}-sublist),
1978         * <BR />Using: {@link Util#pollRange(Vector, int, int)}
1979         * <BR />Removes: The requested Sub-List
1980         */
1981        public static Vector<HTMLNode> poll(Vector<? extends HTMLNode> html, int nodeIndex)
1982        {
1983            int endPos = find(html, nodeIndex);
1984
1985            return (endPos == -1) ? null : pollRange(html, nodeIndex, endPos + 1);
1986        }
1987
1988        /**
1989         * Convenience Method.
1990         * <BR />Invokes: {@link #find(Vector, int)}
1991         * <BR />Converts: output to <B><CODE>'REMOVE'</CODE></B> format ({@code int} - number
1992         * of nodes removed)
1993         * <BR />Using: {@link Remove#range(Vector, int, int)}
1994         * <BR />Removes: The requested Sub-List
1995         */
1996        public static int remove(Vector<? extends HTMLNode> html, int nodeIndex)
1997        {
1998            int endPos = find(html, nodeIndex);
1999
2000            return (endPos == -1) ? 0 : Util.Remove.range(html, nodeIndex, endPos + 1);
2001        }
2002
2003
2004        // ****************************************************************************************
2005        // ****************************************************************************************
2006        // Optimized Methods, Inclusive Find/Get/Subsection
2007        // ****************************************************************************************
2008        // ****************************************************************************************
2009
2010        /**
2011         * Convenience Method.  
2012         * <BR />Invokes: {@link #dotPairOPT(Vector, int)}
2013         * <BR />Converts: output to {@code Vector<HTMLNode>}
2014         */
2015        public static Vector<HTMLNode> vectorOPT(Vector<? extends HTMLNode> html, int tagPos)
2016        {
2017            DotPair dp = dotPairOPT(html, tagPos);
2018
2019            if (dp == null) return null;
2020            else            return Util.cloneRange(html, dp.start, dp.end + 1);
2021        }
2022
2023        /**
2024         * Convenience Method.
2025         * <BR />Invokes: {@link #dotPairOPT(Vector, int)}
2026         * <BR />Converts: output to {@code SubSection}
2027         */
2028        public static SubSection subSectionOPT(Vector<? extends HTMLNode> html, int tagPos)
2029        {
2030            DotPair dp = dotPairOPT(html, tagPos);
2031
2032            if (dp == null) return null;
2033            else            return new SubSection(dp, Util.cloneRange(html, dp.start, dp.end + 1));
2034        }
2035
2036        /**
2037         * 
2038         * <EMBED CLASS='external-html' DATA-FILE-ID=UTILIOPT>
2039         * <!-- Inclusive Opt Description -->
2040         * 
2041         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
2042         * @param tagPos <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTTP>
2043         * @return A <B>'DotPair'</B> version of an inclusive, end-to-end HTML tag-element.
2044         * 
2045         * <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTJSN> 
2046         * <!-- Note on JS-DOM Tree innerHTML -->
2047         * 
2048         * @see TagNode
2049         * @see TagNode#isClosing
2050         * @see TagNode#tok
2051         * @see DotPair
2052         */
2053        public static DotPair dotPairOPT(Vector<? extends HTMLNode> html, int tagPos)
2054        {
2055            // Temp Variables
2056            HTMLNode n;     TagNode tn;     int openCount = 1;
2057
2058            int len = html.size();
2059
2060            // This is the name (token) of the "Opening HTML Element", we are searching for
2061            // the matching, closing element
2062
2063            String tok = ((TagNode) html.elementAt(tagPos)).tok;
2064
2065            for (int i = (tagPos+1); i < len; i++)
2066
2067                if ((n = html.elementAt(i)).isTagNode())
2068                    if ((tn = (TagNode) n).tok.equals(tok))
2069                    {
2070                        // This keeps a "Depth Count" - where "depth" is just the number of 
2071                        // opened tags, for which a matching, closing tag hasn't been found yet.
2072
2073                        openCount += (tn.isClosing ? -1 : 1);
2074
2075                        // When all open-tags of the specified HTML Element 'tok' have been
2076                        // found, search has finished.
2077
2078                        if (openCount == 0) return new DotPair(tagPos, i);
2079                    }
2080
2081            // Was not found
2082            return null;
2083        }
2084
2085        /**
2086         * Convenience Method.
2087         * <BR />Invokes: {@link #dotPairOPT(Vector, int, int)}
2088         * <BR />Converts: output to {@code Vector<HTMLNode>}
2089         */
2090        public static Vector<HTMLNode> vectorOPT
2091            (Vector<? extends HTMLNode> html, int tagPos, int end)
2092        {
2093            DotPair dp = dotPairOPT(html, tagPos, end);
2094
2095            if (dp == null) return null;
2096            else            return Util.cloneRange(html, dp.start, dp.end + 1);
2097        }
2098
2099        /**
2100         * Convenience Method.
2101         * <BR />Invokes: {@link #dotPairOPT(Vector, int, int)}
2102         * <BR />Converts: output to {@code SubSection}
2103        */
2104        public static SubSection subSectionOPT
2105            (Vector<? extends HTMLNode> html, int tagPos, int end)
2106        {
2107            DotPair dp = dotPairOPT(html, tagPos, end);
2108
2109            if (dp == null) return null;
2110            else            return new SubSection(dp, Util.cloneRange(html, dp.start, dp.end + 1));
2111        }
2112
2113        /**
2114         * 
2115         * <EMBED CLASS='external-html' DATA-FILE-ID=UTILIOPT>
2116         * <!-- Inclusive Opt Description -->
2117         * 
2118         * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
2119         * @param tagPos <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTTP>
2120         * @param end <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTEND>
2121         * 
2122         * @return A <B>'DotPair'</B> version of an inclusive, end-to-end HTML tag-element.
2123         * 
2124         * <EMBED CLASS='external-html' DATA-FILE-ID=UTILOPTJSN>
2125         * <!-- Note on JS-DOM Tree innerHTML -->
2126         * 
2127         * @see TagNode
2128         * @see TagNode#isClosing
2129         * @see TagNode#tok
2130         * @see DotPair
2131         */
2132        public static DotPair dotPairOPT(Vector<? extends HTMLNode> html, int tagPos, int end)
2133        {
2134            // Temp Variables
2135            HTMLNode n;     TagNode tn;     int openCount = 1;      int endPos;
2136
2137            // This is the name (token) of the "Opening HTML Element", we are searching for
2138            // the matching, closing element
2139            String tok = ((TagNode) html.elementAt(tagPos)).tok;
2140
2141            for (endPos = (tagPos+1); endPos < end; endPos++)
2142
2143                if ((n = html.elementAt(endPos)).isTagNode())
2144                    if ((tn = (TagNode) n).tok.equals(tok))
2145                    {
2146                        // This keeps a "Depth Count" - where "depth" is just the number of
2147                        // opened tags, for which a matching, closing tag hasn't been found yet.
2148                        openCount += (tn.isClosing ? -1 : 1);
2149
2150                        // When all open-tags of the specified HTML Element 'tok' have been
2151                        // found, search has finished.
2152                        if (openCount == 0) return new DotPair(tagPos, endPos);
2153                    }
2154
2155            // The end of the vectorized-html page (or subsection) was reached, but the
2156            // matching-closing element was not found.
2157            return null; // assert(endPos == html.size());
2158        }
2159    }
2160}