001package Torello.HTML;
002
003import Torello.HTML.NodeSearch.*;
004import Torello.Java.FileRW; // used in @see comments
005import Torello.Java.StringParse;
006import Torello.Java.Additional.Ret2;
007
008import java.util.*;
009import java.util.stream.IntStream;
010
011/**
012 * Utilities for checking that opening and closing {@link TagNode} elements match up (that the HTML
013 * is balanced).
014 * 
015 * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE>
016 */
017@Torello.JavaDoc.StaticFunctional
018public class Balance
019{
020    private Balance() { }
021
022    /**
023     * Invokes:
024     * 
025     * <BR /><BR /><UL CLASS=JDUL>
026     *  <LI>{@link #check(Vector)}</LI>
027     *  <LI>{@link #checkNonZero(Hashtable)}</LI>
028     *  <LI>{@link #toStringBalance(Hashtable)}</LI>
029     * </UL>
030     * 
031     * <DIV CLASS=EXAMPLE>{@code
032     * String b = Balance.CB(a.articleBody);
033     * System.out.println((b == null) ? "Page has Balanced HTML" : b);
034     * 
035     * // If Page has equal number of open and close tags prints:
036     * // Page Has Balanced HTML
037     * // OTHERWISE PRINTS REPORT
038     * }</DIV>
039     * 
040     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
041     * 
042     * @return Will return null if the snippet or page has 'balanced' HTML, otherwise returns the
043     * trimmed balance-report as a {@code String}.
044     */
045    public static String CB(Vector<HTMLNode> html)
046    {
047        String ret = toStringBalance(checkNonZero(check(html)));
048
049        return (ret.length() == 0) ? null : ret;
050    }
051
052    /**
053     * Creates a {@code Hashtable} that has a count of all open and closed HTML tags found on the
054     * page.
055     *
056     * <BR /><BR />This {@code Hashtable} may be regarded as maintaining "counts" on each-and-every
057     * HTML tag to identify whether there is <I><B>a one-to-one balance mapping between opening and
058     * closing tags</I></B> for each element.  When the {@code Hashtable} generated by
059     * this method is non-zero (for a particular HTML-Tag) it means that there are an unequal
060     * number of opening and closing elements for that tag.
061     * 
062     * <BR /><BR />Suppose this method were to produce a {@code Hashtable}, and that
063     * {@code Hashtable} queried for a count on the HTML <B CLASS=JDHTags>{@code <DIV>}</B> tag
064     * (dividers).  If that count turned out to be a non-zero positive number it would mean that
065     * the Vectorized-HTML had more opening <B CLASS=JDHTags>{@code <DIV>}</B> tags than the 
066     * number of closing <B CLASS=JDHTags>{@code </DIV>}</B> tags on that page.
067     * 
068     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
069     * 
070     * <BR /><BR />The following example will help explain the use of this method.  If an HTML page
071     * needs to be checked to see that all elements are properly opened and closed, this method can
072     * be used to return a list of any HTML element tag that does not have an equal number of
073     * opening and closing tags.
074     * 
075     * <BR /><BR />In this example, the generated Java-Doc HTML-Page for class {@code TagNode} is
076     * checked.
077     * 
078     * <DIV CLASS="EXAMPLE">{@code
079     * String                      html    = FileRW.loadFileToString(htmlFileName);
080     * Vector<HTMLNode>            v       = HTMLPage.getPageTokens(html, false);
081     * Hashtable<String, Integer>  b       = Balance.check(v);
082     * StringBuffer                sb      = new StringBuffer();
083     *
084     * // This part just prints a text-output to a string buffer, which is printed to the screen.
085     * for (String key : b.keySet())
086     * {
087     *     Integer i = b.get(key);
088     * 
089     *     // Only print keys that had a "non-zero count"
090     *     // A Non-Zero-Count implies Opening-Tag-Count and Closing-Tag-Count are not equal!
091     * 
092     *     if (i.intValue() != 0) sb.append(key + "\t" + i.intValue() + "\n");
093     * }
094     * 
095     * // This example output was: "i   -1", because of an unclosed italics element.
096     * // NOTE: To find where this unclosed element is, use method: nonNestedCheck(Vector, String)
097     * }</DIV>
098     * 
099     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
100     * 
101     * @return A {@code Hashtable} map of the count of each HTML-Tag present in the
102     * input {@code Vector}.
103     * 
104     * <BR /><BR />For instance, if this {@code Vector} had five
105     * <B CLASS=JDHTags>{@code <A HREF=...>}</B> (Anchor-Link) tags, and six
106     * <B CLASS=JDHTags>{@code </A>}</B> tags, then the returned {@code Hashtable} would have a
107     * {@code String}-key equal to {@code "A"} with an integer value of {@code -1}.
108     * 
109     * @see FileRW#loadFileToString(String)
110     * @see HTMLPage#getPageTokens(CharSequence, boolean)
111     */
112    public static Hashtable<String, Integer> check(Vector<? super TagNode> html)
113    {
114        Hashtable<String, Integer> ht = new Hashtable<>();
115
116        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
117        // not HTML Comments
118
119        for (Object o : html) if (o instanceof TagNode)
120        {
121            TagNode tn = (TagNode) o;
122
123            // Singleton tags are also known as 'self-closing' tags.  BR, HR, IMG, etc...
124            if (HTMLTags.isSingleton(tn.tok)) continue;
125
126            Integer I = ht.get(tn.tok);
127            int     i = (I != null) ? I.intValue() : 0;
128
129            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
130            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
131
132            i += tn.isClosing ? -1 : 1;
133
134            // Update the return result Hashtable for this particular HTML-Element (tn.tok)
135            ht.put(tn.tok, Integer.valueOf(i));
136        }
137
138        return ht;
139    }
140
141    /**
142     * Creates an array that includes an open-and-close {@code 'count'} for each HTML-Tag / 
143     * that was requested via the passed input {@code String[]}-Array parameter {@code 'htmlTags'}.
144     * 
145     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
146     * 
147     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
148     * 
149     * <BR /><BR />The HTML-Element Open-Close-Counts are computed from this page.
150     * 
151     * @param htmlTags This may be one, or many, HTML-Tags whose open-close count needs to be
152     * computed.  Any HTML Element that is not present in this list - <I>will not have a count
153     * computed.</I>
154     * 
155     * <BR /><BR />The {@code count} results which are stored in an {@code int[]}-Array that should
156     * be considered "parallel" to this input Var-Args-Array.
157     * 
158     * @return An array of the count of each html-element present in the input vectorized-html
159     * parameter {@code 'html'}.
160     * For instance, If the following values were passed to this method:
161     * 
162     * <BR /><BR /><UL CLASS=JDUL>
163     * <LI> A Vectorized-HTML page that had 5 {@code '<SPAN ...>'} open-elements, and 6
164     *      {@code '</SPAN>'} closing {@code SPAN}-Tags.
165     *      </LI>
166     * 
167     * <LI> And at least one of the {@code String's} in the Var-Args parameter {@code 'htmlTags'}
168     *      was equal to the {@code String} {@code "SPAN"} (case insensitive).
169     *      </LI>
170     * 
171     * <LI> <B>==&gt;</B> Then the array-position corresponding to the position in array 
172     *      {@code 'htmlTags'} that had the {@code "SPAN"} would have a value of {@code '-1'}.
173     *      </LI>
174     * </UL>
175     * 
176     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
177     * 
178     * @throws SingletonException If and of the {@code String}-Tags passed to parameter
179     * {@code 'htmlTags'} are {@code 'singleton'} (Self-Closing) Tags, then this exception throws
180     */
181    public static int[] check(Vector<? super TagNode> html, String... htmlTags)
182    {
183        // Check that these are all valid HTML Tags, throw an exception if not.
184        htmlTags = ARGCHECK.htmlTags(htmlTags);
185
186        // Temporary Hash-table, used to store the count of each htmlTag
187        Hashtable<String, Integer> ht = new Hashtable<>();
188
189        // Initialize the temporary hash-table.  This will be discarded at the end of the method,
190        // and converted into a parallel array.  (Parallel to the input String... htmlTags array).
191        // Also, check to make sure the user hasn't requested a count of Singleton HTML Elements.
192
193        for (String htmlTag : htmlTags)
194        {
195            if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
196                "One of the tags you have passed: [" + htmlTag + "] is a singleton-tag, " +
197                "and is only allowed opening versions of the tag."
198            );
199
200            ht.put(htmlTag, Integer.valueOf(0));
201        }
202
203        Integer I;
204
205        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
206        // not HTML Comments
207        for (Object o : html) if (o instanceof TagNode)
208        {
209            TagNode tn = (TagNode) o;
210
211            // Get the current count from the hash-table
212            I = ht.get(tn.tok);
213
214            // The hash-table only holds elements we are counting, if null, then skip.
215            if (I == null) continue;
216
217            // Save the new, computed count, in the hash-table
218            //
219            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
220            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
221
222            ht.put(tn.tok, Integer.valueOf(I.intValue() + (tn.isClosing ? -1 : 1)));
223        }
224
225        // Convert the hash-table to an integer-array, and return this to the user
226        int[] ret = new int[htmlTags.length];
227
228        for (int i=0; i < ret.length; i++)
229            ret[i] = 0;
230
231        for (int i=0; i < htmlTags.length; i++)
232            if ((I = ht.get(htmlTags[i])) != null) 
233                ret[i] = I.intValue();
234    
235        return ret;
236    }
237
238    /**
239     * Creates a {@code Hashtable} that has a count of all open and closed HTML-Tags found on
240     * the page - whose count-value is not equal to zero.
241     * 
242     * <BR /><BR />This method will report when there are unbalanced HTML-Tags on a page, <I><B>and
243     * strictly ignore any &amp; all tags with a count of zero</B></I>.  Specifically, if a tag has
244     * a {@code 1-to-1} open-close count, then it will not have any keys avialable in the returned
245     * {@code Hashtable}.
246     *
247     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
248     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_CLONE> <!-- Clone Note -->
249     *
250     * @param ht This should be a {@code Hashtable} that was produced by a call to one of the two
251     * available {@code check(...)} methods.
252     * 
253     * @return A {@code Hashtable} map of the count of each html-element present in this
254     * {@code Vector}.  For instance, if this {@code Vector} had 5 {@code '<A ...>'} (Anchor-Link)
255     * elements, and six {@code '</A>'} then this {@code Hashtable} would have a {@code String}-key
256     * {@code 'a'} with an integer value of {@code '-1'}.
257     */
258    public static Hashtable<String, Integer> checkNonZero(Hashtable<String, Integer> ht)
259    {
260        @SuppressWarnings("unchecked")
261        Hashtable<String, Integer>  ret     = (Hashtable<String, Integer>) ht.clone();
262        Enumeration<String>         keys    = ret.keys();
263
264        while (keys.hasMoreElements())
265        {
266            String key = keys.nextElement();
267
268            // Remove any keys (HTML element-names) that have a normal ('0') count.
269            if (ret.get(key).intValue() == 0) ret.remove(key);
270        }
271
272        return ret;
273    }
274
275
276    /**
277     * This will compute a {@code count} for just one, particular, HTML Element of whether that
278     * Element has been properly opened and closed.  An open and close {@code count} (integer
279     * value) will be returned by this method.
280     * 
281     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
282     * 
283     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
284     * 
285     * @param htmlTag This the html element whose open-close count needs to be kept.
286     * 
287     * @return The count of each html-element present in this {@code Vector}.  For instance, if the
288     * user had requested that HTML Anchor Links be counted, and if the input {@code Vector} had 5
289     * {@code '<A ...>'} (Anchor-Link) elements, and six {@code '</A>'} then this method would
290     * return {@code -1}.
291     * 
292     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
293     * 
294     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
295     * Tag, this exception will throw.
296     */
297    public static int checkTag(Vector<? super TagNode> html, String htmlTag)
298    {
299        // Check that this is a valid HTML Tag, throw an exception if invalid
300        htmlTag = ARGCHECK.htmlTag(htmlTag);
301
302        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
303            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only " +
304            "allowed opening versions of the tag."
305        );
306
307        TagNode tn;     int i = 0;
308
309        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
310        // not HTML Comments
311
312        for (Object o : html) if (o instanceof TagNode) 
313
314            // If we encounter an HTML Element whose tag is the tag whose count we are 
315            // computing, then....
316
317            if ((tn = (TagNode) o).tok.equals(htmlTag))
318            
319                // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
320                // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
321
322                i += tn.isClosing ? -1 : 1;
323
324        return i;
325    }
326
327
328    /**
329     * This method will calculate the "Maximum" and "Minimum" depth for every HTML 5.0 Tag found on
330     * a page.  The Max-Depth is the "Maximum-Number" of Opening HTML Element Opening Tags were
331     * found for a particular element, before a matching closing version of the same Element is
332     * encountered. In the example below, the maximum "open-count" for the HTML 'divider' Element
333     * ({@code <DIV>}) is {@code '2'}.  This is because a second {@code <DIV>} element is opened
334     * before the first is closed.
335     *
336     * <DIV CLASS="HTML">{@code
337     * <DIV class="MySection"><H1>These are my ideas:</H1>
338     * <!-- Above is an outer divider, below is an inner divider -->
339     * <DIV class="MyNumbers">Here are the points:
340     * <!-- HTML Content Here -->
341     * </DIV></DIV>
342     * }</DIV>
343     *
344     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE2>
345     *
346     * <BR /><BR /><B CLASS=JDDescLabel>'Count' Computation-Heuristic:</B>
347     * 
348     * <BR />This maximum and minimum depth count will not pay any attention to whether HTML open
349     * and close tags "enclose each-other" or are "interleaved."  The actual mechanics of the
350     * for-loop which calculaties the {@code count} shall hopefully explain this computation
351     * clearly enough.  This may be viewed in this method's hilited source-code, below.
352     *
353     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
354     * 
355     * @return The returned {@code Hashtable} will contain an integer-array for each HTML Element
356     * that was found on the page.  Each of these arrays shall be of length {@code 3}.
357     * 
358     * <BR /><BR /><OL CLASS=JDUL>
359     * <LI>Minimum Depth: {@code return_array[0]}</LI>
360     * <LI>Maximum Depth: {@code return_array[1]}</LI>
361     * <LI>Total Count: {@code return_array[2]}</LI>
362     * </OL>
363     *
364     * <BR /><BR /><B><SPAN STYLE="color: red;">REDUNDANCY NOTE:</SPAN></B> The third element of
365     * the returned array should be identical to the result produced by an invocation of method:
366     * {@code Balance.checkTag(html, htmlTag);}
367     * 
368     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
369     * 
370     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
371     * Tag, this exception will throw.
372     */
373    public static Hashtable<String, int[]> depth(Vector<? super TagNode> html)
374    {
375        Hashtable<String, int[]> ht = new Hashtable<>();
376
377        // Iterate through the HTML List, we are only counting HTML Elements, not text, and not HTML Comments
378        for (Object o : html) if (o instanceof TagNode) 
379        {
380            TagNode tn = (TagNode) o;
381
382            // Don't keep a count on singleton tags.
383            if (HTMLTags.isSingleton(tn.tok)) continue;
384
385            int[] curMaxAndMinArr = ht.get(tn.tok);
386
387            // If this is the first encounter of a particular HTML Element, create a MAX/MIN
388            // integer array, and initialize it's values to zero.
389
390            if (curMaxAndMinArr == null)
391            {
392                curMaxAndMinArr = new int[3];
393
394                curMaxAndMinArr[0] = 0;     // Current Min Depth Count for Element "tn.tok" is zero
395                curMaxAndMinArr[1] = 0;     // Current Max Depth Count for Element "tn.tok" is zero
396                curMaxAndMinArr[2] = 0;     // Current Computed Depth Count for "tn.tok" is zero
397
398                ht.put(tn.tok, curMaxAndMinArr);
399            }
400
401            // curCount += tn.isClosing ? -1 : 1;
402            //
403            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
404            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
405
406            curMaxAndMinArr[2] += tn.isClosing ? -1 : 1;
407
408            // If the current depth-count is a "New Minimum" (a new low! :), then save it in the
409            // minimum pos of the output-array.
410
411            if (curMaxAndMinArr[2] < curMaxAndMinArr[0]) curMaxAndMinArr[0] = curMaxAndMinArr[2];
412
413            // If the current depth-count (for this tag) is a "New Maximum" (a new high), save it
414            // to the max-pos of the output-array.
415
416            if (curMaxAndMinArr[2] > curMaxAndMinArr[1]) curMaxAndMinArr[1] = curMaxAndMinArr[2];
417        }
418
419        return ht;
420    }
421
422
423
424    /**
425     * This method will calculate the "Maximum" and "Minimum" depth for every HTML Tag listed in
426     * the {@code var-args String[] htmlTags} parameter.  The Max-Depth is the "Maximum-Number" of
427     * Opening HTML Element Opening Tags were found for a particular element, before a matching
428     * closing version of the same Element is encountered.  In the example below, the maximum
429     * {@code 'open-count'} for the HTML 'divider' Element ({@code <DIV>}) is {@code '2'}.  This is
430     * because a second {@code <DIV>} element is opened before the first is closed.
431     *
432     * <DIV CLASS="HTML">{@code
433     * <DIV class="MySection"><H1>These are my ideas:</H1>
434     * <!-- Above is an outer divider, below is an inner divider -->
435     * <DIV class="MyNumbers">Here are the points:
436     * <!-- HTML Content Here -->
437     * </DIV></DIV>
438     * }</DIV>
439     *
440     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE2>
441     *
442     * <BR /><BR /><B CLASS=JDDescLabel>'Count' Computation-Heuristic:</B>
443     * 
444     * <BR />This maximum and minimum depth count will not pay any attention to whether HTML open
445     * and close tags "enclose each-other" or are "interleaved."  The actual mechanics of the
446     * for-loop which calculaties the {@code count} shall hopefully explain this computation
447     * clearly enough.  This may be viewed in this method's hilited source-code, below.
448     *
449     * <BR /><BR /><B CLASS=JDDescLabel>Var-Args Addition:</B>
450     * 
451     * <BR />This method differs from the method with an identical name (defined above) in that it
452     * adds a <I>{@code String}-VarArgs parameter</I> that allows a user to decide which tags he
453     * would like counted and returned in this {@code Hashtable}, and which he would like to ignore.
454     * 
455     * <BR /><BR />If one of the requested HTML-Tags from this{@code String}-VarArgs parameter is not
456     * actually an HTML Element present on the page, the returned {@code Hashtable} will still
457     * contain an {@code int[]}-Array for that tag.  The values in that array will be equal to
458     * zero.
459     *
460     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
461     * 
462     * @return The returned {@code Hashtable} will contain an integer-array for each HTML Element
463     * that was found on the page.  Each of these arrays shall be of length {@code 3}.
464     * 
465     * <BR /><BR /><OL CLASS=JDUL>
466     * <LI>Minimum Depth: {@code return_array[0]}</LI>
467     * <LI>Maximum Depth: {@code return_array[1]}</LI>
468     * <LI>Total Count: {@code return_array[2]}</LI>
469     * </OL>
470     *
471     * <BR /><BR /><B><SPAN STYLE="color: red;">REDUNDANCY NOTE:</SPAN></B> The third element of
472     * the returned array should be identical to the result produced by an invocation of method:
473     * {@code Balance.checkTag(html, htmlTag);}
474     * 
475     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
476     * 
477     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'}
478     * (Self-Closing) Tag, this exception will throw.
479     */
480    public static Hashtable<String, int[]> depth(Vector<? super TagNode> html, String... htmlTags)
481    {
482        // Check that these are all valid HTML Tags, throw an exception if not.
483        htmlTags = ARGCHECK.htmlTags(htmlTags);
484
485        Hashtable<String, int[]> ht = new Hashtable<>();
486
487        // Initialize the temporary hash-table.  This will be discarded at the end of the method,
488        // and converted into a parallel array.  (Parallel to the input String... htmlTags array).
489        // Also, check to make sure the user hasn't requested a count of Singleton HTML Elements.
490
491        for (String htmlTag : htmlTags)
492        {
493            if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
494                "One of the tags you have passed: [" + htmlTag + "] is a singleton-tag, " +
495                "and is only allowed opening versions of the tag."
496            );
497
498            // Insert an initialized array (init to zero) for this HTML Tag/Token
499            int[] arr = new int[3];
500
501            arr[0] = 0;     // Current Minimum Depth Count for HTML Element "tn.tok" is zero
502            arr[1] = 0;     // Current Maximum Depth Count for HTML Element "tn.tok" is zero
503            arr[2] = 0;     // Current Computed Depth Count is HTML Element "tn.tok" is zero
504
505            ht.put(htmlTag, arr);
506        }
507
508        // Iterate through the HTML List, we are only counting HTML Elements, not text,
509        // and not HTML Comments
510
511        for (Object o: html) if (o instanceof TagNode) 
512        {
513            TagNode tn = (TagNode) o;
514
515            int[] curMaxAndMinArr = ht.get(tn.tok);
516
517            // If this is null, we are attempting to perform the count on an HTML Element that
518            // wasn't requested by the user with the var-args 'String... htmlTags' parameter.
519            // The Hashtable was initialized to only have those tags. (see about 5 lines above 
520            // where the Hashtable is initialized)
521
522            if (curMaxAndMinArr == null) continue;
523
524            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
525            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
526
527            curMaxAndMinArr[2] += tn.isClosing ? -1 : 1;
528
529            // If the current depth-count is a "New Minimum" (a new low! :), then save it in the
530            // minimum pos of the output-array.
531
532            if (curMaxAndMinArr[2] < curMaxAndMinArr[0]) curMaxAndMinArr[0] = curMaxAndMinArr[2];
533
534            // If the current depth-count (for this tag) is a "New Maximum" (a new high), save it
535            // to the max-pos of the output-array.
536
537            if (curMaxAndMinArr[2] > curMaxAndMinArr[1]) curMaxAndMinArr[1] = curMaxAndMinArr[2];
538
539            // NOTE:    No need to update the hash-table, since this is an array - changing its
540            //          values is already "reflected" into the Hashtable.
541        }
542
543        return ht;
544    }
545
546
547    /**
548     * Creates a {@code Hashtable} that has a maximum and minimum depth for all HTML tags found on
549     * the page.  Any HTML Tags that meet ALL of these criteria shall be removed from the
550     * result-set {@code Hashtable} ...
551     * 
552     * <BR /><BR /><UL CLASS=JDUL>
553     * <LI>Minimum Depth Is {@code '0'} - i.e. <I>closing tag never precedes opening.</I></LI>
554     * <LI>Count is {@code '0'} - i.ei. <I>there is a {@code 1-to-1} ratio of opening and closing
555     * tags</I> for the particular HTML Element.</LI>
556     * </UL>
557     * 
558     * <BR /><BR /><B>NOTE:</B> This means that there is a {@code 1:1} ratio of opening and closing
559     * versions of the tag, <B><I>and also</I></B> that there are no positions in the vector where
560     * a closing tag to come before an tag to open it.
561     *
562     * <BR /><BR /><B CLASS=JDDescLabel>Cloned Input:</B>
563     * 
564     * <BR />This method clones the original input {@code Hashtable}, and removes the tags whose
565     * depth-calculations are invalid - as described above.  This allows the user to perform other
566     * operations with the original table, while this class is processing.
567     *
568     * @param ht This should be a {@code Hashtable} that was produced by a call to one of the two
569     * available {@code depth(...)} methods.
570     * 
571     * @return This shall a return a list of HTML Tags that are <I>potentially (but not guaranteed
572     * to be)</I> invalid.
573     */
574    public static Hashtable<String, int[]> depthInvalid(Hashtable<String, int[]> ht)
575    {
576        @SuppressWarnings("unchecked")
577        Hashtable<String, int[]>    ret     = (Hashtable<String, int[]>) ht.clone();
578        Enumeration<String>         keys    = ret.keys();
579
580        // Using the "Enumeration" class allows the situation where elements can be removed from
581        // the underlying data-structure - while iterating through that data-structure.  This is
582        // not possible using a keySet Iterator.
583
584        while (keys.hasMoreElements())
585        {
586            String  key = keys.nextElement();
587            int[]   arr = ret.get(key);
588
589            if ((arr[1] >= 0) && (arr[2] == 0)) ret.remove(key);
590        }
591
592        return ret;
593    }
594
595    /**
596     * Creates a {@code Hashtable} that has a maximum and minimum depth for all HTML tags found on
597     * the page.  Any HTML Tags that meet ALL of these criteria, below, shall be removed from the
598     * result-set {@code Hashtable} ...
599     * 
600     * <BR /><BR /><UL CLASS=JDUL>
601     * <LI> Maximum Depth is precisely {@code '1'} - i.e. <I>Each element of this tag is closed
602     *      before a second is open.</I>
603     *      </LI>
604     * </UL>
605     * 
606     * <BR /><BR /><B CLASS=JDDescLabel>Cloned Input:</B>
607     * 
608     * <BR />This method clones the original input {@code Hashtable}, and removes the tags whose
609     * maximum-depth is not greater than one.  This allows the user to perform other operations
610     * with the original table, while this class is processing.
611     *
612     * @param ht This should be a {@code Hashtable} that was produced by a call to one of the two
613     * available {@code depth(...)} methods.
614     * 
615     * @return This shall a return a list of HTML Tags that are <I>potentially (but not guaranteed
616     * to be)</I>
617     * invalid.
618     */
619    public static Hashtable<String, int[]> depthGreaterThanOne(Hashtable<String, int[]> ht)
620    {
621        @SuppressWarnings("unchecked")
622        Hashtable<String, int[]>    ret     = (Hashtable<String, int[]>) ht.clone();
623        Enumeration<String>         keys    = ret.keys();
624
625        // Using the "Enumeration" class allows the situation where elements can be removed from
626        // the underlying data-structure - while iterating through that data-structure.  This is not
627        // possible using a keySet Iterator.
628
629        while (keys.hasMoreElements())
630        {
631            String  key = keys.nextElement();
632            int[]   arr = ret.get(key);
633
634            if (arr[1] == 1) ret.remove(key);
635        }
636
637        return ret;
638    }
639
640
641    /**
642     * This method will calculate the "Maximum" and "Minimum" depth for a particular HTML Tag.
643     * The Max-Depth just means the number of Maximum-Number of Opening HTML Element Opening Tags
644     * were found, before a matching closing version of the same Element is encountered.  For
645     * instance: {@code <DIV ...><DIV ..> Some Page</DIV></DIV>} has a maximum depth of
646     * {@code '2'}.  This means there is a point in the vectorized-html where there are 2
647     * successive divider elements that are opened, before even one has been closed.
648     *
649     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE2>
650     *
651     * <BR /><BR /><B CLASS=JDDescLabel>'Count' Computation-Heuristic:</B>
652     * 
653     * <BR />This maximum and minimum depth count will not pay any attention to whether HTML open
654     * and close tags "enclose each-other" or are "interleaved."  The actual mechanics of the
655     * for-loop which calculaties the {@code count} shall hopefully explain this computation
656     * clearly enough.  This may be viewed in this method's hilited source-code, below.
657     *
658     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
659     * 
660     * @param htmlTag This the html element whose maximum and minimum depth-count needs to be
661     * computed.
662     * 
663     * @return The returned integer-array, shall be of length 3.
664     * 
665     * <BR /><BR /><OL CLASS=JDUL>
666     * <LI>Minimum Depth: {@code return_array[0]}</LI>
667     * <LI>Maximum Depth: {@code return_array[1]}</LI>
668     * <LI>Total Count: {@code return_array[2]}</LI>
669     * </OL>
670     * 
671     * <BR /><BR /><B><SPAN STYLE="color: red;">REDUNDANCY NOTE:</SPAN></B> The third element of
672     * the returned array should be identical to the result produced by an invocation of method:
673     * {@code Balance.checkTag(html, htmlTag);}
674     * 
675     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
676     * 
677     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
678     * Tag, this exception will throw.
679     */
680    public static int[] depthTag(Vector<? super TagNode> html, String htmlTag)
681    {
682        // Check that this is a valid HTML Tag, throw an exception if invalid
683        htmlTag = ARGCHECK.htmlTag(htmlTag);
684
685        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
686            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only allowed " +
687            "opening versions of the tag."
688        );
689
690        TagNode tn;     int i = 0;      int max = 0;        int min = 0;
691
692        // Iterate through the HTML List, we are only counting HTML Elements, not text, and not HTML Comments
693        for (Object o : html) if (o instanceof TagNode)
694
695            if ((tn = (TagNode) o).tok.equals(htmlTag))
696            {
697                // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
698                // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count
699
700                i += tn.isClosing ? -1 : 1;
701
702                if (i > max) max = i;
703                if (i < min) min = i;
704            }
705
706        // Generate the output array, and return
707        int[] ret = new int[2];
708
709        ret[0] = min;
710        ret[1] = max;
711        ret[2] = i;
712
713        return ret;
714    }
715
716    /**
717     * This will find the (likely) places where the "non-nested HTML Elements" have become nested.
718     * For the purposes of finding mismatched elements - such as an unclosed "Italics" Element, or
719     * an "Extra" Italics Element - this method will find places where a new HTML Tag has opened
720     * before a previous one has been closed - <I>or vice-versa (where there is an 'extra'
721     * closed-tag).</I> 
722     * 
723     * <BR /><BR />Certainly, if "nesting" is usually acceptable (for instance the HTML divider
724     * {@code '<DIV>...</DIV>'} construct) <I><B>then the results of this method would not have any
725     * meaning.</I></B>  Fortunately, for the vast majority of HTML Elements {@code <I>, <B>, <A>,
726     * etc...} nesting the tags is not allowed or encouraged. 
727     *
728     * <BR /><BR />The following example use of this method should make clear the application.  If
729     * a user has identified that there is an unclosed HTML italics element ({@code <I>...</I>})
730     * somewhere on a page, for-example, and that page has numerous italics elements, this method
731     * can pinpoint the failure instantly, using this example.  Note that the file-name is a
732     * Java-Doc generated output HTML file.  The documentation for this package received a copious
733     * amount of attention due to the sheer number of method-names and class-names used throughout.
734     * 
735     * <DIV CLASS="EXAMPLE">{@code 
736     * String           fStr    = FileRW.loadFileToString("javadoc/Torello/HTML/TagNode.html");
737     * Vector<HTMLNode> v       = HTMLPage.getPageTokens(fStr, false);
738     * int[]            posArr  = Balance.nonNestedCheck(v, "i");
739     * 
740     * // Below, the class 'Debug' is used to pretty-print the vectorized-html page.  Here, the
741     * // output will find the lone, non-closed, HTML italics <I> ... </I> tag-element, and output
742     * // it to the terminal-window.  The parameter '5' means the nearest 5 elements (in either
743     * // direction) are printed, in addition to the elements at the indices in the posArr.
744     * // Parameter 'true' implies that two curly braces are printed surrounding the matched node.
745     * 
746     * System.out.println(Debug.print(v, posArr, 5, " Skip a few ", true, Debug::K));
747     * }</DIV>
748     * 
749     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
750     * 
751     * @param htmlTag This the html element whose maximum and minimum depth-count was not {@code 1}
752     * and {@code 0}, respectively.  The precise location where the depth achieved either a
753     * negative depth, or depth greater than {@code 1} will be returned in the integer array.  In
754     * English: When two opening-tags or two closing-tags are identified, successively, then the
755     * index where the second tag was found is recorded into the output array.
756     * 
757     * @return This will return an array of vectorized-html index-locations / index-pointers where
758     * the first instance of an extra opening, or an extra-closing tag, occurs.  This will
759     * facilitate finding tags that are not intended to be nested.  If "tag-nesting" (for example
760     * HTML divider, {@code 'DIV'}, elements), then the results returned by this method will not be
761     * useful.
762     * 
763     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
764     * 
765     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
766     * Tag, this exception will throw.
767     * 
768     * @see FileRW#loadFileToString(String)
769     * @see HTMLPage#getPageTokens(CharSequence, boolean)
770     * @see Debug#print(Vector, int[], int, String, boolean, BiConsumer)
771     */
772    public static int[] nonNestedCheck(Vector<? super TagNode> html, String htmlTag)
773    {
774        // Java Streams are an easier way to keep variable-length lists.  They use
775        // "builders" - and this one is for an "IntStream"
776
777        IntStream.Builder b = IntStream.builder();
778
779        // Check that this is a valid HTML Tag, throw an exception if invalid
780        htmlTag = ARGCHECK.htmlTag(htmlTag);
781
782        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
783            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only " +
784            "allowed opening versions of the tag."
785        );
786
787        Object o;     TagNode tn;     int len = html.size();      TC last = null;
788
789        // Iterate through the HTML List, we are only counting HTML Elements, not text,
790        // and not HTML Comments
791
792        for (int i=0; i < len; i++)
793
794            if ((o = html.elementAt(i)) instanceof TagNode) 
795                if ((tn = (TagNode) o).tok.equals(htmlTag))
796                {
797                    if ((tn.isClosing)      && (last == TC.ClosingTags))    b.add(i);
798                    if ((! tn.isClosing)    && (last == TC.OpeningTags))    b.add(i);
799
800                    last = tn.isClosing ? TC.ClosingTags : TC.OpeningTags;
801                }
802
803        return b.build().toArray();
804    }
805
806    /**
807     * For likely greater than 95% of HTML tags - finding situations where that tag has 
808     * <I><B>'nested tags'</I></B> is highly unlikely.  Unfortunately, two or three of the most
809     * common tags in use, for instance {@code <DIV>, <SPAN>}, finding where a mis-match has
810     * occurred (tracking down an "Unclosed divider") is an order of magnitude more difficult than
811     * finding an unclosed anchor {@code '<A HREF...>'}.  This method shall return two parallel
812     * arrays.  The first array will contain vector indices.  The second array contains the depth
813     * (nesting level) of that tag at that position.  In this way, finding an unclosed divider is
814     * tantamount to finding where all closing-dividers seem to evaluate to a depth of '1' (one)
815     * rather than '0' (zero). 
816     * 
817     * <BR /><BR /><B>NOTE:</B> This method can highly useful for SPAN and DIV, while the
818     * "non-standard depth locations" method can be extremely useful for simple, non-nested tags
819     * such as Anchor, Paragraph, Section, etc... - HTML Elements that are mostly never nested.
820     * 
821     * <DIV CLASS="EXAMPLE">{@code
822     * // Load an HTML File to a String
823     * String file = LFEC.loadFile("~/HTML/MyHTMLFile.html");
824     * 
825     * // Parse, and convert to vectorized-html
826     * Vector<HTMLNode> v = HTMLPage.getPageTokens(file, false);
827     * 
828     * // Run this method
829     * Ret2<int[], int[]> r = Balance.locationsAndDepth(v, "div");
830     * 
831     * // This array has vector-indices
832     * int[] posArr = (int[]) r.a;
833     * 
834     * // This (parallel) array has the depth at that index.
835     * int[] depthArr = (int[]) r.b;
836     * 
837     * for (int i=0; i < posArr.length; i++) System.out.println(
838     *     "(" + posArr[i] + ", " + depthArr[i] + "):\t" +    // Prints the Vector-Index, and Depth
839     *     C.BRED + v.elementAt(posArr[i]).str + C.RESET      // Prints the actual HTML divider.
840     * );
841     * }</DIV>
842     * 
843     * <BR />The above code would produce a list of HTML Divider elements, along with their index
844     * in the {@code Vector}, and the exact depth (number of nested, open {@code 'DIV'} elements)
845     * at that location.  This is usually helpful when trying to find unclosed HTML Tags.
846     * 
847     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
848     * 
849     * @param htmlTag This the html element that has an imbalanced OPEN-CLOSE ratio in the tree.
850     * 
851     * @return Two parallel arrays, as follows:
852     * 
853     * <BR /><BR /><OL CLASS=JDOL>
854     * <LI> {@code Ret2.a (int[])}
855     *      <BR /><BR />
856     *      This shall be an integer array of {@code Vector}-indices where the HTML Element has
857     *      been found.
858     *      <BR /><BR />
859     * </LI>
860     * <LI> {@code Ret2.b (int[])}
861     *      <BR /><BR />
862     *      This shall contain an array of the value of the depth for the {@code 'htmlTag'}
863     *      at the particular {@code Vector}-index identified in the first-array.
864     * </LI>
865     * </OL>
866     * 
867     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
868     * 
869     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
870     * Tag, this exception will throw.
871     */
872    public static Ret2<int[], int[]> locationsAndDepth(Vector<? super TagNode> html, String htmlTag)
873    {
874        // Java Streams are an easier way to keep variable-length lists.  They use
875        // "builders" - and this one is for an "IntStream"
876
877        IntStream.Builder locations         = IntStream.builder();
878        IntStream.Builder depthAtLocation   = IntStream.builder();
879
880        // Check that this is a valid HTML Tag, throw an exception if invalid
881        htmlTag = ARGCHECK.htmlTag(htmlTag);
882
883        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
884            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only " +
885            "allowed opening versions of the tag."
886        );
887
888        Object o;     TagNode tn;     int len = html.size();      int depth = 0;
889
890        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
891        // not HTML Comments
892
893        for (int i=0; i < len; i++) if ((o = html.elementAt(i)) instanceof TagNode) 
894
895        if ((tn = (TagNode) o).tok.equals(htmlTag))
896            {
897                depth += tn.isClosing ? -1 : 1;
898
899                locations.add(i);
900
901                depthAtLocation.add(depth);
902            }
903
904        return new Ret2<int[], int[]>
905            (locations.build().toArray(), depthAtLocation.build().toArray());
906    }
907
908    /**
909     * Converts a depth report to a {@code String}, for printing.
910     * 
911     * @param depthReport This should be a {@code Hashtable} returned by any of the depth-methods.
912     * 
913     * @return This shall return the report as a {@code String}.
914     */
915    public static String toStringDepth(Hashtable<String, int[]> depthReport)
916    {
917        StringBuilder sb = new StringBuilder();
918
919        for (String htmlTag : depthReport.keySet())
920        {
921            int[] arr = depthReport.get(htmlTag);
922
923            sb.append(
924                "HTML Element: [" + htmlTag + "]:\t" +
925                "Min-Depth: " + arr[0] + ",\tMax-Depth: " + arr[1] + ",\tCount: " + arr[2] + "\n"
926            );
927        }
928
929        return sb.toString();
930    }
931
932
933    /**
934     * Converts a balance report to a {@code String}, for printing.
935     * 
936     * @param balanceCheckReport This should be a {@code Hashtable} returned by any of the
937     * balance-check methods.
938     * 
939     * @return This shall return the report as a {@code String}.
940     */
941    public static String toStringBalance(Hashtable<String, Integer> balanceCheckReport)
942    {
943        StringBuilder   sb              = new StringBuilder();
944        int             maxTagLen       = 0;
945        int             maxValStrLen    = 0;
946        int             maxAbsValStrLen = 0;
947        int             val;
948        String          valAsStr;
949
950        // For good spacing purposes, we need the length of the longest of the tags.
951        for (String htmlTag : balanceCheckReport.keySet())
952            if (htmlTag.length() > maxTagLen)
953                maxTagLen = htmlTag.length();
954
955        // 17 is the length of the string below, 2 is the amount of extra-space needed
956        maxTagLen += 17 + 2; 
957
958        for (int v : balanceCheckReport.values())
959            if ((valAsStr = ("" + v)).length() > maxValStrLen)
960                maxValStrLen = valAsStr.length();
961
962        for (int v : balanceCheckReport.values())
963            if ((valAsStr = ("" + Math.abs(v))).length() > maxAbsValStrLen)
964                maxAbsValStrLen = valAsStr.length();
965
966        for (String htmlTag : balanceCheckReport.keySet())
967
968            sb.append(
969                StringParse.rightSpacePad("HTML Element: [" + htmlTag + "]:", maxTagLen) +
970                StringParse.rightSpacePad(
971                    ("" + (val = balanceCheckReport.get(htmlTag).intValue())),
972                    maxValStrLen
973                ) +
974                NOTE(val, htmlTag, maxAbsValStrLen) +
975                "\n"
976            );
977
978        return sb.toString();
979    }
980
981    private static String NOTE(int val, String htmlTag, int padding)
982    {
983        if (val == 0) return "";
984
985        else if (val > 0) return
986            ", which implies " + StringParse.rightSpacePad("" + Math.abs(val), padding) +
987            " unclosed <" + htmlTag + "> element(s)";
988
989        else return
990            ", which implies " + StringParse.rightSpacePad("" + Math.abs(val), padding) +
991            " extra </" + htmlTag + "> element(s)";
992    }
993
994    /**
995     * Converts a balance report to a {@code String}, for printing.
996     * 
997     * @param balanceCheckReport This should be a {@code Hashtable} returned by any of the
998     * balance-check methods.
999     * 
1000     * @return This shall return the report as a {@code String}.
1001     * 
1002     * @throws IllegalArgumentException This exception throws if the length of the two input arrays
1003     * are not equal.  It is imperative that the balance report being printed was created by the
1004     * html-tags that are listed in the HTML Token var-args parameter.  If the two arrays are the
1005     * same length, but the tags used to create the report Hashtable are not the same ones being
1006     * passed to the var-args parameter {@code 'htmlTags'} - <I>the logic will not know the
1007     * difference, and no exception is thrown.</I>
1008     */
1009    public static String toStringBalance(int[] balanceCheckReport, String... htmlTags)
1010    {
1011        if (balanceCheckReport.length != htmlTags.length) throw new IllegalArgumentException(
1012            "The balance report that you are checking was not generated using the html token " +
1013            "list provided, they are different lengths.  balanceCheckReport.length: " +
1014            "[" + balanceCheckReport.length + "]\t htmlTags.length: [" + htmlTags.length + "]"
1015        );
1016
1017        StringBuilder sb = new StringBuilder();
1018
1019        for (int i=0; i < balanceCheckReport.length; i++)
1020            sb.append("HTML Element: [" + htmlTags[i] + "]:\t" + balanceCheckReport[i] + "\n");
1021
1022        return sb.toString();
1023    }
1024
1025}