StrSource.java.html

package Torello.Java;

import Torello.Java.ReadOnly.ReadOnlySet;
import Torello.Java.ReadOnly.ReadOnlyHashSet;
import Torello.Java.ReadOnly.ReadOnlyList;
import Torello.Java.ReadOnly.ReadOnlyArrayList;

import Torello.Java.Additional.Counter;

import java.util.regex.Pattern;
import java.util.regex.Matcher;

import java.util.stream.Stream;

import java.util.function.Supplier;

@Torello.JavaDoc.StaticFunctional
public class StrSource
{
    private StrSource() { }


    // ********************************************************************************************
    // ********************************************************************************************
    // FIELDS
    // ********************************************************************************************
    // ********************************************************************************************


    private static final char[] REGEX_ESCAPE_CHARS_ARR =
    { '\\', '/', '(', ')', '[', ']', '{', '}', '$', '^', '+', '*', '?', '-', '.' };

    /**
     * These are 'control' characters (Reg Ex Code), so they must be escaped if the are to be
     * treated as their ASCII-equivalent values.
     */
    public static final ReadOnlySet<Character> REGEX_ESCAPE_CHARS =
        new ReadOnlyHashSet<>(REGEX_ESCAPE_CHARS_ARR, null);

    private static final char[] JS_ESCAPE_CHARS_ARR =
    { '\\', '/', '\n', '\"' };

    /**
     * When converting a {@code String} for a Java-Script {@code String}, these are the 
     * characters that must be escaped.
     */
    public static final ReadOnlySet<Character> JS_ESCAPE_CHARS = 
        new ReadOnlyHashSet<>(JS_ESCAPE_CHARS_ARR, null);

    /**
     * The list of reserved Java Key-Words.  This list was written by ChatGPT on February 1st,
     * 2024.
     */
    public static final ReadOnlyList<String> reservedKeywords = new ReadOnlyArrayList<>(
        "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class",
        "const", "continue", "default", "do", "double", "else", "enum", "extends", "false",
        "final", "finally", "float", "for", "goto", "if", "implements", "import", "instanceof",
        "int", "interface", "long", "native", "new", "null", "package", "permirs", "private",
        "protected", "public", "return", "short", "static", "strictfp", "super", "switch",
        "synchronized", "this", "throw", "throws", "transient", "true", "try", "void", "volatile",
        "while"
    );

    /** This will match the definition for a java {@code 'Generic'} class or interface */
    public static final Pattern GENERIC_PARAMS = Pattern.compile("^.+?<([\\s\\w\\<>,\\?]+)>$");

    /** This shall match a Java Package {@code String} */
    public static final Pattern PACKAGE_NAME = Pattern.compile("([A-Za-z_]\\w*\\.)+");


    // ********************************************************************************************
    // ********************************************************************************************
    // Searching for a tag in an HTML string (the early way - without regular expressions)
    // ********************************************************************************************
    // ********************************************************************************************


    /**
     * If parameter {@code String s} contains any tag within-which there is a valid
     * {@code "HREF"}, this will return the contents of the {@code HREF} Attribute/InnerTag.
     * 
     * @param s This is usually some variant of an HTML element/tag {@code String}.  This method
     * was the first one written for HTML in this scrape package, and is just kept here for legacy
     * reasons. The {@code class HTML.TagNode} has a number of options for extracting the
     * {@code 'HREF'} attribute from an HTML element.
     * 
     * @return The attribute-value of an {@code HREF=...} attribute inside (usually an {@code <A>}
     * 'Anchor') HTML tag. This will return 'null' if there is no {@code HREF="..."}
     * attribute-value pair is found or identified.
     * 
     * @throws IllegalArgumentException If there is no end-quote found for the {@code HREF="..."}
     * sub-string.
     */
    public static String grep_HREF_tag(String s)
    {
        s = s.toLowerCase();
        String quote = "\"";

        int hrefPos = s.indexOf("href=\"");

        if (hrefPos == -1)
        {
            hrefPos = s.indexOf("href='");
            if (hrefPos == -1) return null;
            quote = "'";
        }

        // System.out.print("\t[hrefPos=" + hrefPos + "]");
        
        // the " + 6" is because the string HREF=" is 6 characters long
        String ret = s.substring(hrefPos + 6);
        int endQuotePos = ret.indexOf(quote);
        
        if (endQuotePos == -1) throw new IllegalArgumentException
            ("HREF has no End-Quote!\n\nFor String:\n" + s);

        // System.out.print("endQuotePos = " + endQuotePos + " " + ret.substring(0, endQuotePos));

        return ret.substring(0,endQuotePos);
    }

    /**
     * If parameter {@code String s} contains an HTML {@code "IMG"} tag, this will return the
     * contents of the {@code "SRC=..."} attribute tag-field.
     * 
     * @param s This is usually some variant of an HTML element/tag {@code String}.  This method
     * was the first one written for HTML in this scrape package, and is just kept here for legacy
     * reasons. The {@code class HTML.TagNode} has a number of options for extracting the
     * {@code 'SRC'} attribute from an HTML element.
     * 
     * @return The attribute-value of a {@code SRC=...} attribute inside (usually an {@code <IMG>}
     * 'Image') HTML tag. 'null' is returned if:
     * 
     * <BR /><BR /><OL CLASS=JDOL>
     * <LI>There is no HTML {@code 'IMG'} token found in the {@code String}</LI>
     * <LI>There is no {@code SRC='...'} attribute-value pair found.</LI>
     * </OL>
     */
    public static String grep_IMG_SRC_tag(String s)
    {
        String stlc = s.toLowerCase();
        // System.out.println("1: " + stlc);
        
        int imgPos = stlc.indexOf("<img ");

        if (imgPos == -1) return null;

        stlc = stlc.substring(imgPos + 5);
        // System.out.println("2: " + stlc + "[imgPos=" + imgPos + "]");

        // first check for double-quotes
        String  quote   = "\"";
        int     srcPos  = stlc.indexOf("src=\"");

        if (srcPos == -1)
        {
            // if no double-quotes, try single quotes
            srcPos = stlc.indexOf("src='");

            if (srcPos == -1) return null;

            quote = "'";
        }

        stlc = stlc.substring(srcPos + 5);

        // System.out.println("3: " + stlc + "[srcPos=" + srcPos + "]");
        
        int endSrcPos = stlc.indexOf(quote);

        if (endSrcPos == -1) return null;
        
        int urlStart    = imgPos + srcPos + 10;
        int urlEnd      = urlStart + endSrcPos;
        
        // System.out.println
        //     ("4: [endSrcPos=" + endSrcPos + ", urlStart=" + urlStart + ", urlEnd=" + urlEnd);

        return s.substring(urlStart, urlEnd);
    }


    // ********************************************************************************************
    // ********************************************************************************************
    // Java-Script & Reg-Ex String encoding (JSON.stringify())
    // ********************************************************************************************
    // ********************************************************************************************


    /**
     * <EMBED CLASS='external-html' DATA-FILE-ID=STRSRC_ESC_4JS>
     * 
     * @param str This may be any String in java.  It is intended to be inserted into a Java-Script
     * file between an open and close quotation marks.  
     * 
     * @return The String that is returned will have certain characters escaped, so that it may be
     * wrapped in quotation marks and easily inserted into any java-script ".js" text-file.
     * 
     * <BR /><BR /><B>Escaped-Text:</B>
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     * <LI> {@code char '\'} will be escaped to: {@code "\\"}</LI>
     * 
     * <LI> {@code char '/'} will be escaped to: {@code "\/"}, this is required in Java-Script, but
     *      not Java!
     *      </LI>
     * 
     * <LI> {@code char '"'} will be escaped to: {@code "\""}</LI>
     * <LI> {@code char '\n'} will be escaped to: {@code "\\n"}</LI>
     * </UL>
     *
     * <BR /><B><SPAN STYLE="color: red;">IMPORTANT NOTE:</B></SPAN> There is no easy, nor clear,
     * way to express what is being replaced and/or escaped in a simple list.  You may run this
     * method on any {@code String} and view for yourself what changes.  <B><I>The primary 
     * goal</B></I> of the method is to allow <I>*any* Java String of *any* length</I> to be 
     * converted, wrapped inside of an open and closed quotation-marks, and printed into a 
     * Java-Script {@code ".js" file}.  Escaping "escape characters" which does come up some-what
     * often in HTML text/string processing is near-impossible to explain clearly!  Review the
     * stack-overflow "incantation" for possible help.
     */
    public static String escStrForJavaScript(String str)
    { return StrReplace.r(str, JS_ESCAPE_CHARS_ARR, '\\'); }

    /**
     * This method should only be used for a <B><I>precise {@code String} match</I></B> using a
     * regular-expression.  This method shall 'escape' all characters that the JVM Regular
     * Expression Matcher in {@code package java.util.regex.*} would expect be escaped.  If the
     * input parameter {@code 'str'} contains any regular-expression code, then this method would
     * <B>FAIL</B> as it would escape regular-expression code into unusable text.
     * 
     * @param str This should be any {@code String} for which the user would like to find an
     * <B>exact match, as-is</B>.
     * 
     * @return A regular-expression ready {@code String}
     */
    public static String escStrForRegEx(String str)
    { return StrReplace.r(str, REGEX_ESCAPE_CHARS_ARR, '\\'); }


    // ********************************************************************************************
    // ********************************************************************************************
    // Java Code String-Functions
    // ********************************************************************************************
    // ********************************************************************************************


    /**
     * Parses a {@code String} such as {@code T extends TreeMap<Integer, List<String>>}.  It is
     * strictly used, to <B><I>only parse</I></B> the generic-definition lists that are at the top
     * of generic <B>classes</B> and <B>interfaces</B>.
     *
     * <EMBED CLASS='external-html' DATA-FILE-ID=STRSRC_PARSE_GENT DATA-NODE="An Example of Sorts">
     *
     * @param genericTypeParamOrDefinition This should be {@code String} retrieved from inside the
     * less-than ({@code '<'}) and greater-than ({@code '>'}) symbols.  For example, for 
     * {@code SortedList<A extends Comparable, B>} the {@code String} passed to this method should
     * be {@code "A extends Comparable, B"}
     * 
     * @return This should break down this {@code CSV} (comma separated value) list into 
     * individual {@code String's}.
     * 
     * @throws NoMatchException if the input {@code String} parameter does not match the
     * generics regular-expression {@link #GENERIC_PARAMS}.
     * 
     * @throws StringFormatException If the input {@code String} could not be parsed.
     */
    public static String[] parseGenericType(String genericTypeParamOrDefinition)
    {
        Matcher m               = GENERIC_PARAMS.matcher(genericTypeParamOrDefinition);
        String  innerGenericStr = m.find() ? m.group(1) : null;

        if (innerGenericStr == null) throw new NoMatchException(
            "The provided value to parameter 'genericTypeParamOrDefinition' [" + 
            genericTypeParamOrDefinition + "] did not match the Java Generics " +
            "Regular-Expression:\n" + GENERIC_PARAMS.toString()
        );

        Stream.Builder<String>  b               = Stream.builder();
        String[]                sArr            = innerGenericStr.split(",");

        for (int i=0; i < sArr.length; i++)

            // We have shifted elements, and now all of the remaining elements would be null
            // return immediately

            if (sArr[i] == null) return b.build().toArray(String[]::new);

            // Simple generic-type definition: has no "sub-generics" or "inner-generics"
            // Add this to the list, and move on

            else if ((! sArr[i].contains("<")) && (! sArr[i].contains(">")))
                b.accept(sArr[i].trim());

            // This is a generic-type definition that has at least one "sub-generic"
            // If there are an equal number of '<' and '>' then there were no commas
            // in between the sub-generics.  Add this to this list, and move on.

            else if (   StringParse.countCharacters(sArr[i], '<') ==
                        StringParse.countCharacters(sArr[i], '>')
            )
                b.accept(sArr[i].trim());

            // There was a generic with a sub-generic that had a comma...
            else
            {
                // If we have reached the end of the String, the number of greater than and
                // less than symbols was not balanced.

                if (i == (sArr.length - 1)) throw new StringFormatException(
                    "The provided value to parameter 'genericTypeParamOrDefinition' [" + 
                    genericTypeParamOrDefinition + "], was not properly formatted, and could " +
                    "not be parsed."
                );

                // Join the next String Array Element with the current one.
                sArr[i] = sArr[i].trim() + ", " + sArr[i + 1].trim();

                // Shift the rest of the array left.
                for (int j=i+1; j < (sArr.length-1); j++) sArr[j] = sArr[j+1];
                sArr[sArr.length - 1] = null;

                // decrement the counter to retest this array-index location
                i--;
            }

        // Return the list
        return b.build().toArray(String[]::new);
    }

    /**
     * This will print a caret-symbol on a line of text underneath the input {@code String}
     * parameter {@code 'str'}.  Preceeding the caret-symbol will be exactly {@code strPos - 1}
     * space characters.  This look of the output-{@code String} is similar to some of the error
     * messages generated by a Java Compiler.
     * 
     * <BR /><BR />The caret-symbol {@code '^'} will bee pointing to the character at index
     * {@code strPos}.
     * 
     * <DIV CLASS=EXAMPLE>{@code
     * // Notice the (accidental, on-purpose) use of the '@'' character instead of an 'a'
     * // To make this easy, lets compute the exact location of this erroneous character.
     * String   s   = "This string has an inv@lid character.";
     * int      pos = s.indexOf("@");
     * 
     * // This will print out a line of text containing the string, with a caret pointing
     * // at the '@' symbol.
     * System.out.println(StringParse.caretBeneath(s, pos));
     *
     * // PRINTS:
     * // This string has an inv@lid character.
     * //                       ^
     * }</DIV>
     * 
     * @param str This may be any input-{@code String} that is less than 100 characters.
     * 
     * @param strPos This must be a number between 0 and the length
     * 
     * @return The same input-{@code String} with a second line appended underneath (using a
     * newline) having a <B>caret</B> ({@code '^'}) directly underneath the character at
     * {@code strPos}.
     * 
     * @throws IllegalArgumentException If the input {@code String} is longer than 
     * {@code 100 characters}.
     * 
     * @throws StringFormatException If the input {@code String} contains any new-line {@code '\n'}
     * or tab {@code '\t'} characters.
     * 
     * @throws StringIndexOutOfBoundsException If the value pased to {@code strPos} is negative or
     * greater than the length of the input-{@code String}.
     * 
     * @see StringParse#nChars(char, int)
     */
    public static String caretBeneath(String str, int strPos)
    {
        if (str.length() > 100) throw new IllegalArgumentException(
            "The length of the input-string must be less than 100.  str has length: " +
            str.length()
        );

        if (StrCmpr.containsOR(str, "\n", "\t")) throw new StringFormatException
            ("The input-string may not contain new-line or tab characters.");

        if (strPos >= str.length()) throw new StringIndexOutOfBoundsException(
            "The value you have passed to 'strPos' [" + strPos + "] is greater than the length " +
            "the input-string [" + str.length() + "]"
        );

        if (strPos < 0) throw new StringIndexOutOfBoundsException
            ("You have passed a negative value to strPos [" + strPos + "]");

        return str + "\n" + StringParse.nChars(' ', strPos) + '^';
    }

    private static StringFormatException REM_GENERIC_ERROR_MSG(String s, int charPos)
    { 
        return new StringFormatException(
            /*
            "The opening '<' and closing '>' symbols in the type-string have not been " +
            "properly placed.\n" +
            */
            "Generic Type-String Error, Beginning at Noted Location:\n" +
            caretBeneath(s, charPos)
        );
    }

    /**
     * This will remove the generic type-parameters expression from a Java Type Declaration or
     * Reference.  In simple terms, this removes the {@code '<K, V>'} from a {@code String} such
     * as {@code Map.Entry<K, V>}.
     * 
     * <BR /><TABLE CLASS=JDBriefTable>
     * <TR> <TH>Returned {@code String}</TH>
     *      <TH>Input {@code String}</TH>
     *      </TR>
     * <TR> <TD>{@code "Vector"}</TD>
     *      <TD>{@code "Vector<E>"}</TD>
     *      </TR>
     * <TR> <TD>{@code "AbstractHNLI"}</TD>
     *      <TD>{@code "AbstractHNLI<E extends HTMLNode, F>"}</TD>
     *      </TR>
     * <TR> <TD>{@code "Torello.HTML.TagNode"}</TD>
     *      <TD>{@code "Torello.HTML.TagNode"}</TD>
     *      </TR>
     * <TR> <TD>{@code "ClassA.InnerClassB.InnerClassC"}</TD>
     *      <TD>{@code "ClassA<X>.InnerClassB<Y>.InnerClassC"}</TD>
     *      </TR>
     * <TR> <TD>{@code "String[]"}</TD>
     *      <TD>{@code "String[]"}</TD>
     *      </TR>
     * <TR> <TD>{@code "java.lang.String[]"}</TD>
     *      <TD>{@code "java.lang.String[]"}</TD>
     *      </TR>
     * <TR> <TD>{@code "Vector"}</TD>
     *      <TD>{@code "Vector<String[]>"}</TD>
     *      </TR>
     * <TR> <TD>{@code "java.util.Vector"}</TD>
     *      <TD>{@code "java.util.Vector<String[]>"}</TD>
     *      </TR>
     * <TR> <TH COLSPAN=2>Point of Interest:</TH>
     *      </TR>
     * <TR> <TD>"I watched the World Series"</TD>
     *      <TD>"I watched the World Series"</TD>
     *      </TR>
     * <TR> <TD>{@code "Vector"}</TD>
     *      <TD>{@code "Vector<Quoth the Raven>"}</TD>
     *      </TR>
     * <TR> <TH COLSPAN=2>Throws an Exception</TH></TR>
     * <TR> <TD COLSPAN=2>{@code "HNLI<E> <"}</TD></TR>
     * <TR> <TD COLSPAN=2>{@code "> <Quoth the Raven>"}</TD></TR>
     * </TABLE>
     * 
     * @param typeAsStr The "Reference Type" or "Declaration Type".
     * 
     * @return The same {@code String}, having everything between the <B>outer-most, matching</B>
     * {@code '<'} and {@code '>'} symbols.
     * 
     * <BR /><BR /><B>NOTE:</B> The returned {@code String} will not contain any leading or
     * trailing white-space.  It is trimmed before being returned.
     * 
     * @throws StringFormatException An exhaustive check on everything that could be wrong with
     * a type-{@code String} is an impossibility (if you include checking for valid types).  This
     * exception is only thrown if the {@code '<'} and {@code '>'} symbols inside the
     * input-{@code String} do not match-up.
     * 
     * <BR /><BR />In order to avoid throwing this exception, there must be an equal number of
     * opening and closing symbols.
     * 
     * <BR /><BR />There is also a check to ensure that the charcters in this {@code String}
     * are valid.
     */
    public static String removeGeneric(String typeAsStr)
    {
        int leftPos = typeAsStr.indexOf('<');

        if (leftPos == -1)
        {
            int pos = typeAsStr.indexOf('>');

            if (pos == -1) return typeAsStr.trim();

            throw REM_GENERIC_ERROR_MSG(typeAsStr, pos);
        }

        char[]  cArr    = typeAsStr.toCharArray();
        int     count   = 1;            // The number of OPENING-CLOSING tags (same as Inclusive)
        int     END     = cArr.length;  // This is the location JUST-AFTER the last USEABLE-char
        int     delta   = 0;            // How many characters have been deleted already.
                                        // NOTE: This is zero, because the loop hasn't started.
                                        //       If there is a "Shift" this will be PRECISELY-EQUAL
                                        //       to the size of the last generic parameter-expression.
                                        // ALSO: The only purpose of this is for error-reporting.

        // check for a closing '>' before the first opening '<'
        for (int j=0; j < leftPos; j++)
            if (cArr[j] == '>') throw REM_GENERIC_ERROR_MSG(typeAsStr, j);

        // Check for in-valid characters
        // This is a lot of lines of code, but these methods are extremely short, and the input
        // string (for all VALID) input will be very short.  This is peace of mind.  It checks...
        for (int pos=0; pos < cArr.length; pos++)
        {
            char c = cArr[pos];
            if (! Character.isJavaIdentifierPart(c))
                if (! Character.isIdentifierIgnorable(c))
                    if (! Character.isWhitespace(c))
                        if (
                                (c != '[') && (c != ']') && (c != '?') && (c != '<') &&
                                (c != '>') && (c != ',') && (c != '.')
                        )
                            throw REM_GENERIC_ERROR_MSG(typeAsStr, pos);
        }

        do
        {
            // Keeps a count on the number of "Opening Braces" and "Closing Braces" 
            // This is the same thing as the whole "Inclusive" deal, but with braces instead.
            //
            // count: At loop start, count is '1'  If it ever reaches 0, the loop exits.
            // leftPos: The location of the '<' that has been found.
            int i = leftPos + 1;
    
            while ((count > 0) && (i < END))
            {
                if      (cArr[i] == '<')    count++;
                else if (cArr[i] == '>')    count--;

                if (count > 0) i++;
            }

            // The '<' and the '>' didn't match up.  Better to throw exception, than ignore it.
            if ((count != 0) && (i == END))
                throw REM_GENERIC_ERROR_MSG(typeAsStr, leftPos);

            int rightPos = i; // 'i' is currently pointing to the '>'

            // Erase the most recently found <...> expression
            int     sourcePos       = rightPos + 1; // Pointing at first VALID / NEED-TO-COPY char
            int     destPos         = leftPos;      // Pointing at '<'
            boolean possiblyAnother = false;

            while (sourcePos < END)
            {
                // The next character to copy... check it first to see if it is valid!
                char c = cArr[sourcePos]; 

                // continue to shift all the characters left to erase the expression.
                cArr[destPos] = c;

                if (! possiblyAnother) // Haven't found an opening '<'
                {
                    // If there is a '>' - ***AND NO '<' HAS BEEN FOUND***, this is an error.    
                    if (c == '>')
                        throw REM_GENERIC_ERROR_MSG(typeAsStr, delta + sourcePos);

                    // If there is another '<', then it is possible another expression awaits us
                    if (c == '<')
                    {
                        // Reset the outer-loop variables for the next iteration.  There is going
                        // to be another iteration - guaranteed.
                        //
                        // NOTE: Delta is supposed to hold how many characters are being deleted.
                        //       This is used for proper error-reporting (only)

                        // This is how many chars are in the current <...> expression
                        delta   = rightPos - leftPos + 1;

                        leftPos = destPos;  // Now pointing at the next open '<' char (just found!)
                        count   = 1;        // There was a new-unclosed '>', prepares for next loop

                        // You know it
                        possiblyAnother = true;
                    }
                }

                sourcePos++; destPos++;
            }

            // Completed without errors, and without another expression being found.
            // NOTE: This used to be a one-line return call.
            // ADDED: This now does a String.trim().   These little loops skip leading and 
            //        trailing white-space BEFORE returning the String
            //
            // WORKS-NO-TRIM: return new String(cArr, 0, destPos);
            //                replace loop-body with the above line to get rid of trim()
            if (! possiblyAnother)
            {
                int sPos    = 0;
                int len     = destPos;  // REMEMBER:    new String(char[], int OFFSET, int COUNT)
                                        // NOT:         new String(char[], int SPOS, int EPOS)

                // Skip LEADING-WHITESPACE
                while ((sPos < cArr.length) && (destPos > 0) && Character.isWhitespace(cArr[sPos]))
                { sPos++; destPos--; } // Advance start, *AND* shorten "count"

                // Skip TRAILING WHITE-SPACE
                while ((destPos > 1) && Character.isWhitespace(cArr[sPos + destPos-1]))
                    destPos--; // Shorten length *ONLY*

                return new String(cArr, sPos, destPos);
            }
            
            END = destPos;  // Pointing at the first invalid / unused / ALREADY-MOVED char
        }
        while (true);
    }

    /**
     * This will remove any generic-parameter information from a Java type-{@code String} <B>and
     * then</B> remove all package-information or outer-class {@code String's}.  What is left 
     * is a single <B>Java Identifier {@code String}</B> that, <I>as long as the proper scope has
     * been provided</I>, identifies a Java Type (Class, Interface, Enum, Record, Annotation).
     * 
     * <BR /><TABLE CLASS=JDBriefTable>
     * <TR><TH>Output</TH><TH>Input</TH></TR>
     * <TR><TD>{@code "Integer"}</TD><TD>{@code "java.lang.Integer"}</TD></TR>
     * <TR><TD>{@code "Vector"}</TD><TD>{@code "java.util.Vector<E>"}</TD></TR>
     * <TR><TD>{@code "Entry"}</TD><TD>{@code "java.util.Map.Entry<String, Integer>"}</TD></TR>
     * <TR><TD>{@code "Entry"}</TD><TD>{@code "Map.Entry<String, Intger>"}</TD></TR>
     * <TR><TD>{@code "Entry"}</TD><TD>{@code "Entry<String, Integer>"}</TD></TR>
     * <TR><TD>{@code "Entry"}</TD><TD>{@code "Entry"}</TD></TR>
     * <TR><TD>{@code "String[]"}</TD><TD>{@code "String[]"}</TD></TR>
     * <TR><TD>{@code "String[]"}</TD><TD>{@code "java.lang.String[]"}</TD></TR>
     * <TR><TD>{@code "Vector"}</TD><TD>{@code "Vector<String[]>"}</TD></TR>
     * <TR><TD>{@code "Vector[]"}</TD><TD>{@code "Vector<String>[]"}</TD></TR>
     * 
     * <TR><TH COLSPAN=2>Point of Interest:</TH></TR>
     * <TR><TD>{@code "The World Series"}</TD><TD>{@code "The World Series"}</TD></TR>
     * <TR><TD>{@code "Quoth the Raven"}</TD><TD>{@code "Quoth the Raven<java.lang.Integer>"}</TD></TR>
     * 
     * <TR><TH COLSPAN=2>Finally:</TH></TR>
     * <TR><TD>{@code "String..."}</TD><TD>{@code "String..."}</TD></TR>
     * <TR><TD>{@code "String..."}</TD><TD>{@code "java.lang.String..."}</TD></TR>
     * <TR><TD>{@code "Vector..."}</TD><TD>{@code "Vector<E>..."}</TD></TR>
     * <TR><TD>{@code "Vector..."}</TD><TD>{@code "java.util.Vector<E>..."}</TD></TR>
     * </TABLE>
     * 
     * @param typeStr This is a type as a {@code String}.  These are usually retrieved from Java
     * Parser, in the Java Doc Upgrader package.  This method does not provide an exhaustive
     * check for all variants of format and naming erros of a Java Type.  Some validity checks
     * are performed regarding the use of non-Java type characters.  
     * 
     * <BR /><BR /><B STYLE='color:red;'>NOTE:</B> All the exceptions thrown by the method
     * {@link #removeGeneric(String)} will also be thrown here, if {@code 'typeStr'} is not
     * not properly formatted.
     * 
     * @return a Simplified version of the type that leaves out the scope, but provides a
     * simple Java Identifier, instead.  Throws exceptions if not properly formatted.  If any
     * array-bracket characters are passed, they is preserved, unless the arrays in this type
     * are part of the generic-type parameters; please see the examples above.
     * 
     * @throws StringFormatException Please see the explanation provided in
     * {@link #removeGeneric(String)} under 'Throws'.
     * 
     * @see #removeGeneric(String)
     */
    public static String typeToJavaIdentifier(String typeStr)
    {
        String  ret         = removeGeneric(typeStr);
        boolean isVarArgs   = false;

        if (ret.endsWith("..."))
        {
            ret = ret.substring(0, ret.length() - 3);
            isVarArgs = true;
        }

        int pos = ret.lastIndexOf('.');

        if (isVarArgs)
        {
            if (pos == -1)  return ret + "...";
            else            return ret.substring(pos+1) + "...";
        }

        else
        {
            if (pos == -1)  return ret;
            else            return ret.substring(pos+1);
        }
    }

    // This was designed while staring at the field retrieved from a JavaDoc HTML Page that
    // looked like this (from AbstractHNLI)
    //        protected java.util.function.Predicate<E extends HTMLNode> p;
    // This puts a group (group 1) around the ( extends HTMLNode) part, so it can be removed.
    // JavaParser complained about it.

    private static final Pattern exClause =
        Pattern.compile("([A-Za-z][A-Za-z0-9]*)(\\s+extends\\s+[\\w\\.]+)");

    /**
     * Removes the {@code 'extends'} part of a Java Generic
     * 
     * <BR /><BR /><B STYLE='color:red;'>TO DO:</B> This will fail for a class such as:
     * <BR />{@code public class MyClass<T extends Vector<String>}, where the extends clause
     * also has a generic in it.  Java HTML does not define such classes, but they are possible,
     * and this needs to be fixed, as soon as they let me!
     * 
     * @param decl Any Type Declaration that includes has the word {{@code 'extends'}},
     * followed by type-parameter information.
     * 
     * @return The same {@code String} without the clause.
     */
    public static String removeExtendsClause(String decl)
    {
        Matcher m = exClause.matcher(decl);

        while (m.find())
        {
            decl = m.replaceFirst(m.group(1));
            m.reset(decl);
        }

        return decl;
    }

    /**
     * <EMBED CLASS='external-html' DATA-FILE-ID=STRSRC_JTYPE_STR>
     * 
     * @param s Any Java {@code String}.
     * 
     * @return {@code TRUE} if and only if the Java Compiler could interpret {@code 's'} as a valid
     * reference to a Java Type.  In computer-programming, the world <B>{@code Type}</B> can have a
     * lot of meanings, but here, the word should be interpreted as a Java Class, Interface,
     * Enumeration (an {@code 'enum'}), Annotation or Record.
     * 
     * <BR /><BR /><B>NOTE:</B> {@code 's'} may include the period {@code '.'} since inner classes,
     * enum's and interfaces are also valid Java Type's.  Two consecutive period-characters, or a
     * period at the beginning or ending of {@code 's'} will result in this method returning
     * {@code FALSE}.
     */
    public static boolean isJavaTypeStr(String s)
    {
        if (s.length() == 0) return false;

        // Java restricts the first character of a java-identifier to a smaller subset than the
        // other characters in an identifier.  Use method 'isJavaIdentifierStart'

        if (! Character.isJavaIdentifierStart(s.charAt(0))) return false;

        int     len = s.length();
        char    c   = 0;

        for (int i=1; i < len; i++)

            if (! Character.isJavaIdentifierPart(c = s.charAt(i)))
            {
                if (c == '.')
                {
                    // A second (subsequent) period-character (in a row) ==> FALSE
                    if (s.charAt(i-1) == '.') return false;

                    // The LAST character in the String is a period-character ==> FALSE
                    if (i == (len-1)) return false;

                    // The character immediately following a period isn't a valid Java Identifier
                    // Start ==> FALSE

                    if (! Character.isJavaIdentifierStart(s.charAt(++i))) return false;
                }
                else
                    // Character is NEITHER a period, NOR a Java Identifier Part ==> FALSE
                    return false;
            }

        // All metrics / tests have succeeded (which would have resulted in immediate exiting of
        // this method, and a FALSE return value) ... therefore return TRUE.
        return true;
    }

    /**
     * Checks whether an input {@code String} would be allowed as a Java Identifier - for instance,
     * whether the input would make a valid Field-Name, Variable-Name, Class-Name or Method-Name.
     * 
     * <BR /><BR /><B CLASS=JDDescLabel>ChatGPT Note:</B>
     * 
     * <BR /><B>ChatGPT, 3.5</B> wrote this whole thing, including the in-line comments.  I had to
     * write the Java-Doc Comments, but I guess I could have asked it to do that too.
     * 
     * @param identifier Any Java {@code String}
     * 
     * @return {@code TRUE} if-and-only-if parameter {@code 'identifier'} is a valid Java
     * Identifier.
     */
    public static boolean isValidJavaIdentifier(String identifier)
    {
        // Check if the string is not null or empty
        if (identifier == null || identifier.isEmpty()) return false;

        // Check if the first character is a letter, underscore, or dollar sign
        if (! Character.isJavaIdentifierStart(identifier.charAt(0))) return false;

        // Check the remaining characters
        for (int i = 1; i < identifier.length(); i++)
            if (!Character.isJavaIdentifierPart(identifier.charAt(i)))
                return false;

        // Check if the identifier is a reserved keyword
        if (reservedKeywords.contains(identifier)) return false;

        // The string is a valid Java identifier
        return true;
    }


    // ********************************************************************************************
    // ********************************************************************************************
    // Replace Special-Character
    // ********************************************************************************************
    // ********************************************************************************************


    /**
     * There are actually people out there who are willing to put character {@code '160'} into
     * a file or document, instead of a simple {@code '&nbsp;'} element.  How rude.
     * Any instances of this character shall be replaced with the standard space character
     * {@code ASCII #32}.
     * 
     * @param s Any {@code String} will pass.  Generally {@code String's} that were converted from
     * HTML pages will contain {@code char #160} as it is occasionally translated from the HTML
     * escape sequence {@code &nbsp;}
     * 
     * @return A String where any instance of white-space character {@code #160} have been
     * replaced with character {@code #32}
     */
    public static String replaceNBSP(String s)
    { return s.replace(("" + ((char) 160)), " "); }

    /**
     * Even lower than {@code #160}, apparently is the {@code "Zero Width Space"} (character 
     * {@code #8203}.  This is actually inserted by the <B>JavaDoc Tool</B> (by
     * {@code Sun / Oracle}) into JavaDoc generated HTML Pages.  Here, it shall be replaced by
     * character {@code #32} - the <I>space-character</I>.
     * 
     * <BR /><BR /><B>A.K.A.:</B> <CODE>&quot;\u200B&quot;</CODE>.
     * 
     * <BR /><BR /><B><I STYLE='color: red;'>Can you see the character, above?</I></B>  No?
     * That's zero width space for you!  If you ever sitting and wondering why a {@code String}
     * seems to be something else than what it looks like - you might have a zero-width 
     * space in your {@code String}.  If so, it will take a while to find the bug.
     * 
     * @param s Any {@code String} will pass.  Generally {@code String's} that were converted from
     * JavaDoc HTML pages will contain {@code char #8203}.
     * 
     * @return A String where any instance of white-space character {@code #8203} have been
     * replaced with character {@code #32}
     */
    public static String replaceZWSP(String s)
    { return s.replace(("" + ((char) 8203)), " "); }


    // ********************************************************************************************
    // ********************************************************************************************
    // CSS Source
    // ********************************************************************************************
    // ********************************************************************************************


    /**
     * Checks if a Java-{@code String} constitutes a valid CSS Property-Name.  Note that this
     * method, in no way consults any "complete list" of all known CSS-Properties.  Instead, it 
     * simply analyzes whether the name is conguent with the CSS-Property Validator Reg-ex.
     * 
     * @param cssPropertyName Any Java-{@code String}
     * 
     * @return {@code TRUE} if and ony if {@code 'attributeName'} is a valid HTML Atribute-Name,
     * according to the agreed upon CSS-Property Regular-Expression Validator.
     */
    public static boolean isCSSPropertyName(String cssPropertyName)
    {
        if (cssPropertyName.length() == 0) return false;

        if (! isCSSPropertyNameStart(cssPropertyName.charAt(0))) return false;

        for (int i=1; i < cssPropertyName.length(); i++)
        {
            final char c = cssPropertyName.charAt(i);
            if ((c >= 'A') && (c <= 'Z')) continue;
            if ((c >= 'a') && (c <= 'z')) continue;
            if ((c >= '0') && (c <= '9')) continue;
            if ((c == '-') || (c == '_')) continue;
            return false;
        }

        return true;
    }

    /**
     * Checks whether parameter {@code 'c'} is one of the agreed-upon standard characters that are
     * allowed to begin CSS Property-Names.
     * 
     * @param c Any Java {@code char}-primitive
     * 
     * @return {@code TRUE} if and ony if {@code 'c'} is a character that would be allowed to begin
     * a CSS Property-Name
     */
    public static boolean isCSSPropertyNameStart(char c)
    {
        if ((c >= 'A') && (c <= 'Z')) return true;
        if ((c >= 'a') && (c <= 'z')) return true;
        if ((c == '-') || (c == '_')) return true;
        return false;
    }

    /**
     * Checks whether parameter {@code 'c'} is one of the agreed-upon standard characters that are
     * permitted within CSS Property-Names, after the first character of the name.
     * 
     * @param c Any Java {@code char}-primitive
     * 
     * @return {@code TRUE} if and ony if {@code 'c'} is a character that would be allowed within a
     * valid CSS Property-Name.
     */
    public static boolean isCSSPropertyNamePart(char c)
    {
        if ((c >= 'A') && (c <= 'Z')) return true;
        if ((c >= 'a') && (c <= 'z')) return true;
        if ((c >= '0') && (c <= '9')) return true;
        if ((c == '-') || (c == '_')) return true;
        return false;
    }


    // ********************************************************************************************
    // ********************************************************************************************
    // More HTML Source
    // ********************************************************************************************
    // ********************************************************************************************


    /**
     * Checks if a Java-{@code String} constitutes a valid HTML Attibute-Name.  Note that this
     * method, in no way consults any "complete list" of all know HTML-Attributes.  Instead, it 
     * simply analyzes whether the name is conguent with the Attribute-Name Validator Reg-ex.
     * 
     * @param attributeName Any Java-{@code String}
     * 
     * @return {@code TRUE} if and ony if {@code 'attributeName'} is a valid HTML Atribute-Name,
     * according to the agreed upon Attribute-Name Regular-Expression Validator.
     */
    public static boolean isAttributeName(String attributeName)
    {
        if (attributeName.length() == 0) return false;

        if (! isAttributeNameStart(attributeName.charAt(0))) return false;

        for (int i=1; i < attributeName.length(); i++)
        {
            final char c = attributeName.charAt(i);
            if ((c >= 'A') && (c <= 'Z')) continue;
            if ((c >= 'a') && (c <= 'z')) continue;
            if ((c >= '0') && (c <= '9')) continue;
            if ((c == '-') || (c == '_')) continue;
            return false;
        }

        return true;
    }

    /**
     * Checks whether parameter {@code 'c'} is one of the agreed-upon standard characters that are
     * allowed to begin HTML Attribute-Names.
     * 
     * @param c Any Java {@code char}-primitive
     * 
     * @return {@code TRUE} if and ony if {@code 'c'} is a character that would be allowed to begin
     * an HTML Attribute-Name
     */
    public static boolean isAttributeNameStart(char c)
    {
        if ((c >= 'A') && (c <= 'Z')) return true;
        if ((c >= 'a') && (c <= 'z')) return true;
        return false;
    }

    /**
     * Checks whether parameter {@code 'c'} is one of the agreed-upon standard characters that are
     * permitted within HTML Attribute-Names, after the first character of the name.
     * 
     * @param c Any Java {@code char}-primitive
     * 
     * @return {@code TRUE} if and ony if {@code 'c'} is a character that would be allowed within a
     * valid HTML Attribute-Name.
     */
    public static boolean isAttributeNamePart(char c)
    {
        if ((c >= 'A') && (c <= 'Z')) return true;
        if ((c >= 'a') && (c <= 'z')) return true;
        if ((c >= '0') && (c <= '9')) return true;
        if ((c == '-') || (c == '_')) return true;
        return false;
    }

}