Balance.java.html

package Torello.HTML;

import Torello.HTML.NodeSearch.*;
import Torello.Java.FileRW; // used in @see comments
import Torello.Java.StringParse;
import Torello.Java.Additional.Ret2;

import java.util.*;
import java.util.stream.IntStream;

/**
 * Utilities for checking that opening and closing {@link TagNode} elements match up (that the HTML
 * is balanced).
 * 
 * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE>
 */
@Torello.JavaDoc.StaticFunctional
public class Balance
{
    private Balance() { }

    /**
     * Invokes:
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     *  <LI>{@link #check(Vector)}</LI>
     *  <LI>{@link #checkNonZero(Hashtable)}</LI>
     *  <LI>{@link #toStringBalance(Hashtable)}</LI>
     * </UL>
     * 
     * <DIV CLASS=EXAMPLE>{@code
     * String b = Balance.CB(a.articleBody);
     * System.out.println((b == null) ? "Page has Balanced HTML" : b);
     * 
     * // If Page has equal number of open and close tags prints:
     * // Page Has Balanced HTML
     * // OTHERWISE PRINTS REPORT
     * }</DIV>
     * 
     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
     * 
     * @return Will return null if the snippet or page has 'balanced' HTML, otherwise returns the
     * trimmed balance-report as a {@code String}.
     */
    public static String CB(Vector<HTMLNode> html)
    {
        String ret = toStringBalance(checkNonZero(check(html)));

        return (ret.length() == 0) ? null : ret;
    }

    /**
     * Creates a {@code Hashtable} that has a count of all open and closed HTML tags found on the
     * page.
     *
     * <BR /><BR />This {@code Hashtable} may be regarded as maintaining "counts" on each-and-every
     * HTML tag to identify whether there is <I><B>a one-to-one balance mapping between opening and
     * closing tags</I></B> for each element.  When the {@code Hashtable} generated by
     * this method is non-zero (for a particular HTML-Tag) it means that there are an unequal
     * number of opening and closing elements for that tag.
     * 
     * <BR /><BR />Suppose this method were to produce a {@code Hashtable}, and that
     * {@code Hashtable} queried for a count on the HTML <B CLASS=JDHTags>{@code <DIV>}</B> tag
     * (dividers).  If that count turned out to be a non-zero positive number it would mean that
     * the Vectorized-HTML had more opening <B CLASS=JDHTags>{@code <DIV>}</B> tags than the 
     * number of closing <B CLASS=JDHTags>{@code </DIV>}</B> tags on that page.
     * 
     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
     * 
     * <BR /><BR />The following example will help explain the use of this method.  If an HTML page
     * needs to be checked to see that all elements are properly opened and closed, this method can
     * be used to return a list of any HTML element tag that does not have an equal number of
     * opening and closing tags.
     * 
     * <BR /><BR />In this example, the generated Java-Doc HTML-Page for class {@code TagNode} is
     * checked.
     * 
     * <DIV CLASS="EXAMPLE">{@code
     * String                      html    = FileRW.loadFileToString(htmlFileName);
     * Vector<HTMLNode>            v       = HTMLPage.getPageTokens(html, false);
     * Hashtable<String, Integer>  b       = Balance.check(v);
     * StringBuffer                sb      = new StringBuffer();
     *
     * // This part just prints a text-output to a string buffer, which is printed to the screen.
     * for (String key : b.keySet())
     * {
     *     Integer i = b.get(key);
     * 
     *     // Only print keys that had a "non-zero count"
     *     // A Non-Zero-Count implies Opening-Tag-Count and Closing-Tag-Count are not equal!
     * 
     *     if (i.intValue() != 0) sb.append(key + "\t" + i.intValue() + "\n");
     * }
     * 
     * // This example output was: "i   -1", because of an unclosed italics element.
     * // NOTE: To find where this unclosed element is, use method: nonNestedCheck(Vector, String)
     * }</DIV>
     * 
     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
     * 
     * @return A {@code Hashtable} map of the count of each HTML-Tag present in the
     * input {@code Vector}.
     * 
     * <BR /><BR />For instance, if this {@code Vector} had five
     * <B CLASS=JDHTags>{@code <A HREF=...>}</B> (Anchor-Link) tags, and six
     * <B CLASS=JDHTags>{@code </A>}</B> tags, then the returned {@code Hashtable} would have a
     * {@code String}-key equal to {@code "A"} with an integer value of {@code -1}.
     * 
     * @see FileRW#loadFileToString(String)
     * @see HTMLPage#getPageTokens(CharSequence, boolean)
     */
    public static Hashtable<String, Integer> check(Vector<? super TagNode> html)
    {
        Hashtable<String, Integer> ht = new Hashtable<>();

        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
        // not HTML Comments

        for (Object o : html) if (o instanceof TagNode)
        {
            TagNode tn = (TagNode) o;

            // Singleton tags are also known as 'self-closing' tags.  BR, HR, IMG, etc...
            if (HTMLTags.isSingleton(tn.tok)) continue;

            Integer I = ht.get(tn.tok);
            int     i = (I != null) ? I.intValue() : 0;

            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count

            i += tn.isClosing ? -1 : 1;

            // Update the return result Hashtable for this particular HTML-Element (tn.tok)
            ht.put(tn.tok, Integer.valueOf(i));
        }

        return ht;
    }

    /**
     * Creates an array that includes an open-and-close {@code 'count'} for each HTML-Tag / 
     * that was requested via the passed input {@code String[]}-Array parameter {@code 'htmlTags'}.
     * 
     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
     * 
     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
     * 
     * <BR /><BR />The HTML-Element Open-Close-Counts are computed from this page.
     * 
     * @param htmlTags This may be one, or many, HTML-Tags whose open-close count needs to be
     * computed.  Any HTML Element that is not present in this list - <I>will not have a count
     * computed.</I>
     * 
     * <BR /><BR />The {@code count} results which are stored in an {@code int[]}-Array that should
     * be considered "parallel" to this input Var-Args-Array.
     * 
     * @return An array of the count of each html-element present in the input vectorized-html
     * parameter {@code 'html'}.
     * For instance, If the following values were passed to this method:
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     * <LI> A Vectorized-HTML page that had 5 {@code '<SPAN ...>'} open-elements, and 6
     *      {@code '</SPAN>'} closing {@code SPAN}-Tags.
     *      </LI>
     * 
     * <LI> And at least one of the {@code String's} in the Var-Args parameter {@code 'htmlTags'}
     *      was equal to the {@code String} {@code "SPAN"} (case insensitive).
     *      </LI>
     * 
     * <LI> <B>==&gt;</B> Then the array-position corresponding to the position in array 
     *      {@code 'htmlTags'} that had the {@code "SPAN"} would have a value of {@code '-1'}.
     *      </LI>
     * </UL>
     * 
     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
     * 
     * @throws SingletonException If and of the {@code String}-Tags passed to parameter
     * {@code 'htmlTags'} are {@code 'singleton'} (Self-Closing) Tags, then this exception throws
     */
    public static int[] check(Vector<? super TagNode> html, String... htmlTags)
    {
        // Check that these are all valid HTML Tags, throw an exception if not.
        htmlTags = ARGCHECK.htmlTags(htmlTags);

        // Temporary Hash-table, used to store the count of each htmlTag
        Hashtable<String, Integer> ht = new Hashtable<>();

        // Initialize the temporary hash-table.  This will be discarded at the end of the method,
        // and converted into a parallel array.  (Parallel to the input String... htmlTags array).
        // Also, check to make sure the user hasn't requested a count of Singleton HTML Elements.

        for (String htmlTag : htmlTags)
        {
            if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
                "One of the tags you have passed: [" + htmlTag + "] is a singleton-tag, " +
                "and is only allowed opening versions of the tag."
            );

            ht.put(htmlTag, Integer.valueOf(0));
        }

        Integer I;

        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
        // not HTML Comments
        for (Object o : html) if (o instanceof TagNode)
        {
            TagNode tn = (TagNode) o;

            // Get the current count from the hash-table
            I = ht.get(tn.tok);

            // The hash-table only holds elements we are counting, if null, then skip.
            if (I == null) continue;

            // Save the new, computed count, in the hash-table
            //
            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count

            ht.put(tn.tok, Integer.valueOf(I.intValue() + (tn.isClosing ? -1 : 1)));
        }

        // Convert the hash-table to an integer-array, and return this to the user
        int[] ret = new int[htmlTags.length];

        for (int i=0; i < ret.length; i++)
            ret[i] = 0;

        for (int i=0; i < htmlTags.length; i++)
            if ((I = ht.get(htmlTags[i])) != null) 
                ret[i] = I.intValue();
    
        return ret;
    }

    /**
     * Creates a {@code Hashtable} that has a count of all open and closed HTML-Tags found on
     * the page - whose count-value is not equal to zero.
     * 
     * <BR /><BR />This method will report when there are unbalanced HTML-Tags on a page, <I><B>and
     * strictly ignore any &amp; all tags with a count of zero</B></I>.  Specifically, if a tag has
     * a {@code 1-to-1} open-close count, then it will not have any keys avialable in the returned
     * {@code Hashtable}.
     *
     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_CLONE> <!-- Clone Note -->
     *
     * @param ht This should be a {@code Hashtable} that was produced by a call to one of the two
     * available {@code check(...)} methods.
     * 
     * @return A {@code Hashtable} map of the count of each html-element present in this
     * {@code Vector}.  For instance, if this {@code Vector} had 5 {@code '<A ...>'} (Anchor-Link)
     * elements, and six {@code '</A>'} then this {@code Hashtable} would have a {@code String}-key
     * {@code 'a'} with an integer value of {@code '-1'}.
     */
    public static Hashtable<String, Integer> checkNonZero(Hashtable<String, Integer> ht)
    {
        @SuppressWarnings("unchecked")
        Hashtable<String, Integer>  ret     = (Hashtable<String, Integer>) ht.clone();
        Enumeration<String>         keys    = ret.keys();

        while (keys.hasMoreElements())
        {
            String key = keys.nextElement();

            // Remove any keys (HTML element-names) that have a normal ('0') count.
            if (ret.get(key).intValue() == 0) ret.remove(key);
        }

        return ret;
    }


    /**
     * This will compute a {@code count} for just one, particular, HTML Element of whether that
     * Element has been properly opened and closed.  An open and close {@code count} (integer
     * value) will be returned by this method.
     * 
     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE1> <!-- Validity Note -->
     * 
     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
     * 
     * @param htmlTag This the html element whose open-close count needs to be kept.
     * 
     * @return The count of each html-element present in this {@code Vector}.  For instance, if the
     * user had requested that HTML Anchor Links be counted, and if the input {@code Vector} had 5
     * {@code '<A ...>'} (Anchor-Link) elements, and six {@code '</A>'} then this method would
     * return {@code -1}.
     * 
     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
     * 
     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
     * Tag, this exception will throw.
     */
    public static int checkTag(Vector<? super TagNode> html, String htmlTag)
    {
        // Check that this is a valid HTML Tag, throw an exception if invalid
        htmlTag = ARGCHECK.htmlTag(htmlTag);

        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only " +
            "allowed opening versions of the tag."
        );

        TagNode tn;     int i = 0;

        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
        // not HTML Comments

        for (Object o : html) if (o instanceof TagNode) 

            // If we encounter an HTML Element whose tag is the tag whose count we are 
            // computing, then....

            if ((tn = (TagNode) o).tok.equals(htmlTag))
            
                // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
                // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count

                i += tn.isClosing ? -1 : 1;

        return i;
    }


    /**
     * This method will calculate the "Maximum" and "Minimum" depth for every HTML 5.0 Tag found on
     * a page.  The Max-Depth is the "Maximum-Number" of Opening HTML Element Opening Tags were
     * found for a particular element, before a matching closing version of the same Element is
     * encountered. In the example below, the maximum "open-count" for the HTML 'divider' Element
     * ({@code <DIV>}) is {@code '2'}.  This is because a second {@code <DIV>} element is opened
     * before the first is closed.
     *
     * <DIV CLASS="HTML">{@code
     * <DIV class="MySection"><H1>These are my ideas:</H1>
     * <!-- Above is an outer divider, below is an inner divider -->
     * <DIV class="MyNumbers">Here are the points:
     * <!-- HTML Content Here -->
     * </DIV></DIV>
     * }</DIV>
     *
     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE2>
     *
     * <BR /><BR /><B CLASS=JDDescLabel>'Count' Computation-Heuristic:</B>
     * 
     * <BR />This maximum and minimum depth count will not pay any attention to whether HTML open
     * and close tags "enclose each-other" or are "interleaved."  The actual mechanics of the
     * for-loop which calculaties the {@code count} shall hopefully explain this computation
     * clearly enough.  This may be viewed in this method's hilited source-code, below.
     *
     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
     * 
     * @return The returned {@code Hashtable} will contain an integer-array for each HTML Element
     * that was found on the page.  Each of these arrays shall be of length {@code 3}.
     * 
     * <BR /><BR /><OL CLASS=JDUL>
     * <LI>Minimum Depth: {@code return_array[0]}</LI>
     * <LI>Maximum Depth: {@code return_array[1]}</LI>
     * <LI>Total Count: {@code return_array[2]}</LI>
     * </OL>
     *
     * <BR /><BR /><B><SPAN STYLE="color: red;">REDUNDANCY NOTE:</SPAN></B> The third element of
     * the returned array should be identical to the result produced by an invocation of method:
     * {@code Balance.checkTag(html, htmlTag);}
     * 
     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
     * 
     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
     * Tag, this exception will throw.
     */
    public static Hashtable<String, int[]> depth(Vector<? super TagNode> html)
    {
        Hashtable<String, int[]> ht = new Hashtable<>();

        // Iterate through the HTML List, we are only counting HTML Elements, not text, and not HTML Comments
        for (Object o : html) if (o instanceof TagNode) 
        {
            TagNode tn = (TagNode) o;

            // Don't keep a count on singleton tags.
            if (HTMLTags.isSingleton(tn.tok)) continue;

            int[] curMaxAndMinArr = ht.get(tn.tok);

            // If this is the first encounter of a particular HTML Element, create a MAX/MIN
            // integer array, and initialize it's values to zero.

            if (curMaxAndMinArr == null)
            {
                curMaxAndMinArr = new int[3];

                curMaxAndMinArr[0] = 0;     // Current Min Depth Count for Element "tn.tok" is zero
                curMaxAndMinArr[1] = 0;     // Current Max Depth Count for Element "tn.tok" is zero
                curMaxAndMinArr[2] = 0;     // Current Computed Depth Count for "tn.tok" is zero

                ht.put(tn.tok, curMaxAndMinArr);
            }

            // curCount += tn.isClosing ? -1 : 1;
            //
            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count

            curMaxAndMinArr[2] += tn.isClosing ? -1 : 1;

            // If the current depth-count is a "New Minimum" (a new low! :), then save it in the
            // minimum pos of the output-array.

            if (curMaxAndMinArr[2] < curMaxAndMinArr[0]) curMaxAndMinArr[0] = curMaxAndMinArr[2];

            // If the current depth-count (for this tag) is a "New Maximum" (a new high), save it
            // to the max-pos of the output-array.

            if (curMaxAndMinArr[2] > curMaxAndMinArr[1]) curMaxAndMinArr[1] = curMaxAndMinArr[2];
        }

        return ht;
    }



    /**
     * This method will calculate the "Maximum" and "Minimum" depth for every HTML Tag listed in
     * the {@code var-args String[] htmlTags} parameter.  The Max-Depth is the "Maximum-Number" of
     * Opening HTML Element Opening Tags were found for a particular element, before a matching
     * closing version of the same Element is encountered.  In the example below, the maximum
     * {@code 'open-count'} for the HTML 'divider' Element ({@code <DIV>}) is {@code '2'}.  This is
     * because a second {@code <DIV>} element is opened before the first is closed.
     *
     * <DIV CLASS="HTML">{@code
     * <DIV class="MySection"><H1>These are my ideas:</H1>
     * <!-- Above is an outer divider, below is an inner divider -->
     * <DIV class="MyNumbers">Here are the points:
     * <!-- HTML Content Here -->
     * </DIV></DIV>
     * }</DIV>
     *
     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE2>
     *
     * <BR /><BR /><B CLASS=JDDescLabel>'Count' Computation-Heuristic:</B>
     * 
     * <BR />This maximum and minimum depth count will not pay any attention to whether HTML open
     * and close tags "enclose each-other" or are "interleaved."  The actual mechanics of the
     * for-loop which calculaties the {@code count} shall hopefully explain this computation
     * clearly enough.  This may be viewed in this method's hilited source-code, below.
     *
     * <BR /><BR /><B CLASS=JDDescLabel>Var-Args Addition:</B>
     * 
     * <BR />This method differs from the method with an identical name (defined above) in that it
     * adds a <I>{@code String}-VarArgs parameter</I> that allows a user to decide which tags he
     * would like counted and returned in this {@code Hashtable}, and which he would like to ignore.
     * 
     * <BR /><BR />If one of the requested HTML-Tags from this{@code String}-VarArgs parameter is not
     * actually an HTML Element present on the page, the returned {@code Hashtable} will still
     * contain an {@code int[]}-Array for that tag.  The values in that array will be equal to
     * zero.
     *
     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
     * 
     * @return The returned {@code Hashtable} will contain an integer-array for each HTML Element
     * that was found on the page.  Each of these arrays shall be of length {@code 3}.
     * 
     * <BR /><BR /><OL CLASS=JDUL>
     * <LI>Minimum Depth: {@code return_array[0]}</LI>
     * <LI>Maximum Depth: {@code return_array[1]}</LI>
     * <LI>Total Count: {@code return_array[2]}</LI>
     * </OL>
     *
     * <BR /><BR /><B><SPAN STYLE="color: red;">REDUNDANCY NOTE:</SPAN></B> The third element of
     * the returned array should be identical to the result produced by an invocation of method:
     * {@code Balance.checkTag(html, htmlTag);}
     * 
     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
     * 
     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'}
     * (Self-Closing) Tag, this exception will throw.
     */
    public static Hashtable<String, int[]> depth(Vector<? super TagNode> html, String... htmlTags)
    {
        // Check that these are all valid HTML Tags, throw an exception if not.
        htmlTags = ARGCHECK.htmlTags(htmlTags);

        Hashtable<String, int[]> ht = new Hashtable<>();

        // Initialize the temporary hash-table.  This will be discarded at the end of the method,
        // and converted into a parallel array.  (Parallel to the input String... htmlTags array).
        // Also, check to make sure the user hasn't requested a count of Singleton HTML Elements.

        for (String htmlTag : htmlTags)
        {
            if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
                "One of the tags you have passed: [" + htmlTag + "] is a singleton-tag, " +
                "and is only allowed opening versions of the tag."
            );

            // Insert an initialized array (init to zero) for this HTML Tag/Token
            int[] arr = new int[3];

            arr[0] = 0;     // Current Minimum Depth Count for HTML Element "tn.tok" is zero
            arr[1] = 0;     // Current Maximum Depth Count for HTML Element "tn.tok" is zero
            arr[2] = 0;     // Current Computed Depth Count is HTML Element "tn.tok" is zero

            ht.put(htmlTag, arr);
        }

        // Iterate through the HTML List, we are only counting HTML Elements, not text,
        // and not HTML Comments

        for (Object o: html) if (o instanceof TagNode) 
        {
            TagNode tn = (TagNode) o;

            int[] curMaxAndMinArr = ht.get(tn.tok);

            // If this is null, we are attempting to perform the count on an HTML Element that
            // wasn't requested by the user with the var-args 'String... htmlTags' parameter.
            // The Hashtable was initialized to only have those tags. (see about 5 lines above 
            // where the Hashtable is initialized)

            if (curMaxAndMinArr == null) continue;

            // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
            // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count

            curMaxAndMinArr[2] += tn.isClosing ? -1 : 1;

            // If the current depth-count is a "New Minimum" (a new low! :), then save it in the
            // minimum pos of the output-array.

            if (curMaxAndMinArr[2] < curMaxAndMinArr[0]) curMaxAndMinArr[0] = curMaxAndMinArr[2];

            // If the current depth-count (for this tag) is a "New Maximum" (a new high), save it
            // to the max-pos of the output-array.

            if (curMaxAndMinArr[2] > curMaxAndMinArr[1]) curMaxAndMinArr[1] = curMaxAndMinArr[2];

            // NOTE:    No need to update the hash-table, since this is an array - changing its
            //          values is already "reflected" into the Hashtable.
        }

        return ht;
    }


    /**
     * Creates a {@code Hashtable} that has a maximum and minimum depth for all HTML tags found on
     * the page.  Any HTML Tags that meet ALL of these criteria shall be removed from the
     * result-set {@code Hashtable} ...
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     * <LI>Minimum Depth Is {@code '0'} - i.e. <I>closing tag never precedes opening.</I></LI>
     * <LI>Count is {@code '0'} - i.ei. <I>there is a {@code 1-to-1} ratio of opening and closing
     * tags</I> for the particular HTML Element.</LI>
     * </UL>
     * 
     * <BR /><BR /><B>NOTE:</B> This means that there is a {@code 1:1} ratio of opening and closing
     * versions of the tag, <B><I>and also</I></B> that there are no positions in the vector where
     * a closing tag to come before an tag to open it.
     *
     * <BR /><BR /><B CLASS=JDDescLabel>Cloned Input:</B>
     * 
     * <BR />This method clones the original input {@code Hashtable}, and removes the tags whose
     * depth-calculations are invalid - as described above.  This allows the user to perform other
     * operations with the original table, while this class is processing.
     *
     * @param ht This should be a {@code Hashtable} that was produced by a call to one of the two
     * available {@code depth(...)} methods.
     * 
     * @return This shall a return a list of HTML Tags that are <I>potentially (but not guaranteed
     * to be)</I> invalid.
     */
    public static Hashtable<String, int[]> depthInvalid(Hashtable<String, int[]> ht)
    {
        @SuppressWarnings("unchecked")
        Hashtable<String, int[]>    ret     = (Hashtable<String, int[]>) ht.clone();
        Enumeration<String>         keys    = ret.keys();

        // Using the "Enumeration" class allows the situation where elements can be removed from
        // the underlying data-structure - while iterating through that data-structure.  This is
        // not possible using a keySet Iterator.

        while (keys.hasMoreElements())
        {
            String  key = keys.nextElement();
            int[]   arr = ret.get(key);

            if ((arr[1] >= 0) && (arr[2] == 0)) ret.remove(key);
        }

        return ret;
    }

    /**
     * Creates a {@code Hashtable} that has a maximum and minimum depth for all HTML tags found on
     * the page.  Any HTML Tags that meet ALL of these criteria, below, shall be removed from the
     * result-set {@code Hashtable} ...
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     * <LI> Maximum Depth is precisely {@code '1'} - i.e. <I>Each element of this tag is closed
     *      before a second is open.</I>
     *      </LI>
     * </UL>
     * 
     * <BR /><BR /><B CLASS=JDDescLabel>Cloned Input:</B>
     * 
     * <BR />This method clones the original input {@code Hashtable}, and removes the tags whose
     * maximum-depth is not greater than one.  This allows the user to perform other operations
     * with the original table, while this class is processing.
     *
     * @param ht This should be a {@code Hashtable} that was produced by a call to one of the two
     * available {@code depth(...)} methods.
     * 
     * @return This shall a return a list of HTML Tags that are <I>potentially (but not guaranteed
     * to be)</I>
     * invalid.
     */
    public static Hashtable<String, int[]> depthGreaterThanOne(Hashtable<String, int[]> ht)
    {
        @SuppressWarnings("unchecked")
        Hashtable<String, int[]>    ret     = (Hashtable<String, int[]>) ht.clone();
        Enumeration<String>         keys    = ret.keys();

        // Using the "Enumeration" class allows the situation where elements can be removed from
        // the underlying data-structure - while iterating through that data-structure.  This is not
        // possible using a keySet Iterator.

        while (keys.hasMoreElements())
        {
            String  key = keys.nextElement();
            int[]   arr = ret.get(key);

            if (arr[1] == 1) ret.remove(key);
        }

        return ret;
    }


    /**
     * This method will calculate the "Maximum" and "Minimum" depth for a particular HTML Tag.
     * The Max-Depth just means the number of Maximum-Number of Opening HTML Element Opening Tags
     * were found, before a matching closing version of the same Element is encountered.  For
     * instance: {@code <DIV ...><DIV ..> Some Page</DIV></DIV>} has a maximum depth of
     * {@code '2'}.  This means there is a point in the vectorized-html where there are 2
     * successive divider elements that are opened, before even one has been closed.
     *
     * <EMBED CLASS='external-html' DATA-FILE-ID=BALANCE_VALID_NOTE2>
     *
     * <BR /><BR /><B CLASS=JDDescLabel>'Count' Computation-Heuristic:</B>
     * 
     * <BR />This maximum and minimum depth count will not pay any attention to whether HTML open
     * and close tags "enclose each-other" or are "interleaved."  The actual mechanics of the
     * for-loop which calculaties the {@code count} shall hopefully explain this computation
     * clearly enough.  This may be viewed in this method's hilited source-code, below.
     *
     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
     * 
     * @param htmlTag This the html element whose maximum and minimum depth-count needs to be
     * computed.
     * 
     * @return The returned integer-array, shall be of length 3.
     * 
     * <BR /><BR /><OL CLASS=JDUL>
     * <LI>Minimum Depth: {@code return_array[0]}</LI>
     * <LI>Maximum Depth: {@code return_array[1]}</LI>
     * <LI>Total Count: {@code return_array[2]}</LI>
     * </OL>
     * 
     * <BR /><BR /><B><SPAN STYLE="color: red;">REDUNDANCY NOTE:</SPAN></B> The third element of
     * the returned array should be identical to the result produced by an invocation of method:
     * {@code Balance.checkTag(html, htmlTag);}
     * 
     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
     * 
     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
     * Tag, this exception will throw.
     */
    public static int[] depthTag(Vector<? super TagNode> html, String htmlTag)
    {
        // Check that this is a valid HTML Tag, throw an exception if invalid
        htmlTag = ARGCHECK.htmlTag(htmlTag);

        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only allowed " +
            "opening versions of the tag."
        );

        TagNode tn;     int i = 0;      int max = 0;        int min = 0;

        // Iterate through the HTML List, we are only counting HTML Elements, not text, and not HTML Comments
        for (Object o : html) if (o instanceof TagNode)

            if ((tn = (TagNode) o).tok.equals(htmlTag))
            {
                // An opening-version (TC.OpeningTags, For Instance <DIV ...>) will ADD 1 to the count
                // A closing-tag (For Instance: </DIV>) will SUBTRACT 1 from the count

                i += tn.isClosing ? -1 : 1;

                if (i > max) max = i;
                if (i < min) min = i;
            }

        // Generate the output array, and return
        int[] ret = new int[2];

        ret[0] = min;
        ret[1] = max;
        ret[2] = i;

        return ret;
    }

    /**
     * This will find the (likely) places where the "non-nested HTML Elements" have become nested.
     * For the purposes of finding mismatched elements - such as an unclosed "Italics" Element, or
     * an "Extra" Italics Element - this method will find places where a new HTML Tag has opened
     * before a previous one has been closed - <I>or vice-versa (where there is an 'extra'
     * closed-tag).</I> 
     * 
     * <BR /><BR />Certainly, if "nesting" is usually acceptable (for instance the HTML divider
     * {@code '<DIV>...</DIV>'} construct) <I><B>then the results of this method would not have any
     * meaning.</I></B>  Fortunately, for the vast majority of HTML Elements {@code <I>, <B>, <A>,
     * etc...} nesting the tags is not allowed or encouraged. 
     *
     * <BR /><BR />The following example use of this method should make clear the application.  If
     * a user has identified that there is an unclosed HTML italics element ({@code <I>...</I>})
     * somewhere on a page, for-example, and that page has numerous italics elements, this method
     * can pinpoint the failure instantly, using this example.  Note that the file-name is a
     * Java-Doc generated output HTML file.  The documentation for this package received a copious
     * amount of attention due to the sheer number of method-names and class-names used throughout.
     * 
     * <DIV CLASS="EXAMPLE">{@code 
     * String           fStr    = FileRW.loadFileToString("javadoc/Torello/HTML/TagNode.html");
     * Vector<HTMLNode> v       = HTMLPage.getPageTokens(fStr, false);
     * int[]            posArr  = Balance.nonNestedCheck(v, "i");
     * 
     * // Below, the class 'Debug' is used to pretty-print the vectorized-html page.  Here, the
     * // output will find the lone, non-closed, HTML italics <I> ... </I> tag-element, and output
     * // it to the terminal-window.  The parameter '5' means the nearest 5 elements (in either
     * // direction) are printed, in addition to the elements at the indices in the posArr.
     * // Parameter 'true' implies that two curly braces are printed surrounding the matched node.
     * 
     * System.out.println(Debug.print(v, posArr, 5, " Skip a few ", true, Debug::K));
     * }</DIV>
     * 
     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
     * 
     * @param htmlTag This the html element whose maximum and minimum depth-count was not {@code 1}
     * and {@code 0}, respectively.  The precise location where the depth achieved either a
     * negative depth, or depth greater than {@code 1} will be returned in the integer array.  In
     * English: When two opening-tags or two closing-tags are identified, successively, then the
     * index where the second tag was found is recorded into the output array.
     * 
     * @return This will return an array of vectorized-html index-locations / index-pointers where
     * the first instance of an extra opening, or an extra-closing tag, occurs.  This will
     * facilitate finding tags that are not intended to be nested.  If "tag-nesting" (for example
     * HTML divider, {@code 'DIV'}, elements), then the results returned by this method will not be
     * useful.
     * 
     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
     * 
     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
     * Tag, this exception will throw.
     * 
     * @see FileRW#loadFileToString(String)
     * @see HTMLPage#getPageTokens(CharSequence, boolean)
     * @see Debug#print(Vector, int[], int, String, boolean, BiConsumer)
     */
    public static int[] nonNestedCheck(Vector<? super TagNode> html, String htmlTag)
    {
        // Java Streams are an easier way to keep variable-length lists.  They use
        // "builders" - and this one is for an "IntStream"

        IntStream.Builder b = IntStream.builder();

        // Check that this is a valid HTML Tag, throw an exception if invalid
        htmlTag = ARGCHECK.htmlTag(htmlTag);

        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only " +
            "allowed opening versions of the tag."
        );

        Object o;     TagNode tn;     int len = html.size();      TC last = null;

        // Iterate through the HTML List, we are only counting HTML Elements, not text,
        // and not HTML Comments

        for (int i=0; i < len; i++)

            if ((o = html.elementAt(i)) instanceof TagNode) 
                if ((tn = (TagNode) o).tok.equals(htmlTag))
                {
                    if ((tn.isClosing)      && (last == TC.ClosingTags))    b.add(i);
                    if ((! tn.isClosing)    && (last == TC.OpeningTags))    b.add(i);

                    last = tn.isClosing ? TC.ClosingTags : TC.OpeningTags;
                }

        return b.build().toArray();
    }

    /**
     * For likely greater than 95% of HTML tags - finding situations where that tag has 
     * <I><B>'nested tags'</I></B> is highly unlikely.  Unfortunately, two or three of the most
     * common tags in use, for instance {@code <DIV>, <SPAN>}, finding where a mis-match has
     * occurred (tracking down an "Unclosed divider") is an order of magnitude more difficult than
     * finding an unclosed anchor {@code '<A HREF...>'}.  This method shall return two parallel
     * arrays.  The first array will contain vector indices.  The second array contains the depth
     * (nesting level) of that tag at that position.  In this way, finding an unclosed divider is
     * tantamount to finding where all closing-dividers seem to evaluate to a depth of '1' (one)
     * rather than '0' (zero). 
     * 
     * <BR /><BR /><B>NOTE:</B> This method can highly useful for SPAN and DIV, while the
     * "non-standard depth locations" method can be extremely useful for simple, non-nested tags
     * such as Anchor, Paragraph, Section, etc... - HTML Elements that are mostly never nested.
     * 
     * <DIV CLASS="EXAMPLE">{@code
     * // Load an HTML File to a String
     * String file = LFEC.loadFile("~/HTML/MyHTMLFile.html");
     * 
     * // Parse, and convert to vectorized-html
     * Vector<HTMLNode> v = HTMLPage.getPageTokens(file, false);
     * 
     * // Run this method
     * Ret2<int[], int[]> r = Balance.locationsAndDepth(v, "div");
     * 
     * // This array has vector-indices
     * int[] posArr = (int[]) r.a;
     * 
     * // This (parallel) array has the depth at that index.
     * int[] depthArr = (int[]) r.b;
     * 
     * for (int i=0; i < posArr.length; i++) System.out.println(
     *     "(" + posArr[i] + ", " + depthArr[i] + "):\t" +    // Prints the Vector-Index, and Depth
     *     C.BRED + v.elementAt(posArr[i]).str + C.RESET      // Prints the actual HTML divider.
     * );
     * }</DIV>
     * 
     * <BR />The above code would produce a list of HTML Divider elements, along with their index
     * in the {@code Vector}, and the exact depth (number of nested, open {@code 'DIV'} elements)
     * at that location.  This is usually helpful when trying to find unclosed HTML Tags.
     * 
     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
     * 
     * @param htmlTag This the html element that has an imbalanced OPEN-CLOSE ratio in the tree.
     * 
     * @return Two parallel arrays, as follows:
     * 
     * <BR /><BR /><OL CLASS=JDOL>
     * <LI> {@code Ret2.a (int[])}
     *      <BR /><BR />
     *      This shall be an integer array of {@code Vector}-indices where the HTML Element has
     *      been found.
     *      <BR /><BR />
     * </LI>
     * <LI> {@code Ret2.b (int[])}
     *      <BR /><BR />
     *      This shall contain an array of the value of the depth for the {@code 'htmlTag'}
     *      at the particular {@code Vector}-index identified in the first-array.
     * </LI>
     * </OL>
     * 
     * @throws HTMLTokException If any of the tags passed are not valid HTML tags.
     * 
     * @throws SingletonException If this {@code 'htmlTag'} is a {@code 'singleton'} (Self-Closing)
     * Tag, this exception will throw.
     */
    public static Ret2<int[], int[]> locationsAndDepth(Vector<? super TagNode> html, String htmlTag)
    {
        // Java Streams are an easier way to keep variable-length lists.  They use
        // "builders" - and this one is for an "IntStream"

        IntStream.Builder locations         = IntStream.builder();
        IntStream.Builder depthAtLocation   = IntStream.builder();

        // Check that this is a valid HTML Tag, throw an exception if invalid
        htmlTag = ARGCHECK.htmlTag(htmlTag);

        if (HTMLTags.isSingleton(htmlTag)) throw new SingletonException(
            "The tag you have passed: [" + htmlTag + "] is a singleton-tag, and is only " +
            "allowed opening versions of the tag."
        );

        Object o;     TagNode tn;     int len = html.size();      int depth = 0;

        // Iterate through the HTML List, we are only counting HTML Elements, not text, and
        // not HTML Comments

        for (int i=0; i < len; i++) if ((o = html.elementAt(i)) instanceof TagNode) 

        if ((tn = (TagNode) o).tok.equals(htmlTag))
            {
                depth += tn.isClosing ? -1 : 1;

                locations.add(i);

                depthAtLocation.add(depth);
            }

        return new Ret2<int[], int[]>
            (locations.build().toArray(), depthAtLocation.build().toArray());
    }

    /**
     * Converts a depth report to a {@code String}, for printing.
     * 
     * @param depthReport This should be a {@code Hashtable} returned by any of the depth-methods.
     * 
     * @return This shall return the report as a {@code String}.
     */
    public static String toStringDepth(Hashtable<String, int[]> depthReport)
    {
        StringBuilder sb = new StringBuilder();

        for (String htmlTag : depthReport.keySet())
        {
            int[] arr = depthReport.get(htmlTag);

            sb.append(
                "HTML Element: [" + htmlTag + "]:\t" +
                "Min-Depth: " + arr[0] + ",\tMax-Depth: " + arr[1] + ",\tCount: " + arr[2] + "\n"
            );
        }

        return sb.toString();
    }


    /**
     * Converts a balance report to a {@code String}, for printing.
     * 
     * @param balanceCheckReport This should be a {@code Hashtable} returned by any of the
     * balance-check methods.
     * 
     * @return This shall return the report as a {@code String}.
     */
    public static String toStringBalance(Hashtable<String, Integer> balanceCheckReport)
    {
        StringBuilder   sb              = new StringBuilder();
        int             maxTagLen       = 0;
        int             maxValStrLen    = 0;
        int             maxAbsValStrLen = 0;
        int             val;
        String          valAsStr;

        // For good spacing purposes, we need the length of the longest of the tags.
        for (String htmlTag : balanceCheckReport.keySet())
            if (htmlTag.length() > maxTagLen)
                maxTagLen = htmlTag.length();

        // 17 is the length of the string below, 2 is the amount of extra-space needed
        maxTagLen += 17 + 2; 

        for (int v : balanceCheckReport.values())
            if ((valAsStr = ("" + v)).length() > maxValStrLen)
                maxValStrLen = valAsStr.length();

        for (int v : balanceCheckReport.values())
            if ((valAsStr = ("" + Math.abs(v))).length() > maxAbsValStrLen)
                maxAbsValStrLen = valAsStr.length();

        for (String htmlTag : balanceCheckReport.keySet())

            sb.append(
                StringParse.rightSpacePad("HTML Element: [" + htmlTag + "]:", maxTagLen) +
                StringParse.rightSpacePad(
                    ("" + (val = balanceCheckReport.get(htmlTag).intValue())),
                    maxValStrLen
                ) +
                NOTE(val, htmlTag, maxAbsValStrLen) +
                "\n"
            );

        return sb.toString();
    }

    private static String NOTE(int val, String htmlTag, int padding)
    {
        if (val == 0) return "";

        else if (val > 0) return
            ", which implies " + StringParse.rightSpacePad("" + Math.abs(val), padding) +
            " unclosed <" + htmlTag + "> element(s)";

        else return
            ", which implies " + StringParse.rightSpacePad("" + Math.abs(val), padding) +
            " extra </" + htmlTag + "> element(s)";
    }

    /**
     * Converts a balance report to a {@code String}, for printing.
     * 
     * @param balanceCheckReport This should be a {@code Hashtable} returned by any of the
     * balance-check methods.
     * 
     * @return This shall return the report as a {@code String}.
     * 
     * @throws IllegalArgumentException This exception throws if the length of the two input arrays
     * are not equal.  It is imperative that the balance report being printed was created by the
     * html-tags that are listed in the HTML Token var-args parameter.  If the two arrays are the
     * same length, but the tags used to create the report Hashtable are not the same ones being
     * passed to the var-args parameter {@code 'htmlTags'} - <I>the logic will not know the
     * difference, and no exception is thrown.</I>
     */
    public static String toStringBalance(int[] balanceCheckReport, String... htmlTags)
    {
        if (balanceCheckReport.length != htmlTags.length) throw new IllegalArgumentException(
            "The balance report that you are checking was not generated using the html token " +
            "list provided, they are different lengths.  balanceCheckReport.length: " +
            "[" + balanceCheckReport.length + "]\t htmlTags.length: [" + htmlTags.length + "]"
        );

        StringBuilder sb = new StringBuilder();

        for (int i=0; i < balanceCheckReport.length; i++)
            sb.append("HTML Element: [" + htmlTags[i] + "]:\t" + balanceCheckReport[i] + "\n");

        return sb.toString();
    }

}