Source code

001package Torello.HTML;
002
003import java.util.*;
004import java.util.regex.*;
005import java.util.stream.*;
006
007import Torello.Java.*;
008
009
010/**
011 * Easy utilities for escaping and un-escaping HTML characters such as {@code &nbsp;}, and even
012 * code-point based Emoji's.
013 * 
014 * <EMBED CLASS='external-html' DATA-FILE-ID=ESCAPE>
015 */
016@Torello.JavaDoc.StaticFunctional
017public final class Escape
018{
019    private Escape() { }
020
021
022    // ********************************************************************************************
023    // ********************************************************************************************
024    // Internal Fields, used by this class only
025    // ********************************************************************************************
026    // ********************************************************************************************
027
028
029    /**
030     * Regular Expression for characters represented in HTML as
031     * <CODE>&amp;#x[Hexadecimal-Code];</CODE>
032     */
033    private static final Pattern HEX_CODE = Pattern.compile("&#x([A-F,a-f,\\d]{1,8});");
034
035    /**
036     * Regular Expression for characters represented in HTML as <CODE>&amp;#[Decimal-Code];</CODE>
037     */
038    private static final Pattern DEC_CODE = Pattern.compile("&#(\\d{1,8});");
039
040    /**
041     * Regular Expression (approximate, not exact) for hard-coded escape sequences such as
042     * <CODE>"&amp;amp;"</CODE>
043     * 
044     * <BR /><BR />This is <I>"approximate"</I> - because it does not actually look the sequence
045     * up in the hash table.  This means, of course, that not everything which matches this Regular
046     * Expression Pattern is actually an escaped HTML ASCII/UniCode character.
047     * 
048     * <BR /><BR /><B CLASS=JDDescLabel>For Example:</B>
049     * 
050     * <BR /><CODE>&amp;NotACode;</CODE> will match this Regular-Expression, but it is not an
051     * actual HTML Escape-sequence.  For that, one needs to consult the internal
052     * {@code 'htmlEscSeq'} or {@code 'htmlEscChars'} tables themselves.
053     * 
054     * @see #htmlEscChars
055     * @see #htmlEscSeq
056     */
057    private static final Pattern TEXT_CODE = Pattern.compile("&[A-Z,a-z,0-9]{1,8};");
058
059    @SuppressWarnings("rawtypes")
060    private static final Vector data = LFEC.readObjectFromFile_JAR
061        (Escape.class, "data-files/Escape.htdat", true, Vector.class);
062
063    /** 
064     * This {@code Hashtable} contains all of the HTML escape characters which are represented by
065     * a short Text-{@code String}.  The file listed above contains that list.
066     * 
067     * @see HTML_ESC_CHARS
068     */
069    @SuppressWarnings("unchecked")
070    private static final Hashtable<String, Character> htmlEscChars = 
071        (Hashtable<String, Character>) data.elementAt(0);
072
073    /**
074     * This {@code Hashtable} is the reverse of the previous table.  It allows a user to look up
075     * the escape sequence, given a particular ASCII {@code char}.
076     * 
077     * @see HTML_ESC_CHARS
078     * @see #htmlEscChars
079     */
080    @SuppressWarnings("unchecked")
081    private static final Hashtable<Character, String> htmlEscSeq =
082        (Hashtable<Character, String>) data.elementAt(1);
083
084
085    // ********************************************************************************************
086    // ********************************************************************************************
087    // Some debug, and "View Data" methods
088    // ********************************************************************************************
089    // ********************************************************************************************
090
091
092    /**
093     * Print's the HTML Escape Character lookup table to {@code System.out}.
094     * This is useful for debugging.
095     * 
096     * <BR /><BR /><B CLASS=JDDescLabel>View Escape-Codes:</B>
097     * 
098     * <BR />The JAR Data-File List included within the page attached (below) is a complete list of
099     * all <B><CODE>text-String</B> HTML Escape Sequences </CODE> that are known to this class.  
100     * This list, does not include any <CODE>Code Point, Hex</CODE> or <CODE>Decimal Number</CODE>
101     * sequences.
102     *
103     * <BR /><BR /><B><CODE><A HREF="doc-files/EscapeCodes.html">
104     * All HTML Escape Sequences</A></CODE></B>
105     */
106    public static void printHTMLEsc()
107    {
108        Enumeration<String> e = htmlEscChars.keys();
109
110        while (e.hasMoreElements())
111        {
112            String tag = e.nextElement();
113            System.out.println("&" + tag + "; ==> " + htmlEscChars.get(tag));
114        }
115    }
116
117
118    // ********************************************************************************************
119    // ********************************************************************************************
120    // Main Part of the class
121    // ********************************************************************************************
122    // ********************************************************************************************
123
124
125    /**
126     * Converts a single {@code String} from an HTML-escape sequence into the appropriate
127     * character.
128     * 
129     * <BR /><BR />
130     * <CODE>&amp;[escape-sequence];</CODE> ==&gt; actual ASCII or UniCode character.
131     *
132     * @param escHTML An HTML escape sequence.
133     * 
134     * @return the {@code ASCII} or {@code Unicode} character represented by this escape sequence.
135     * 
136     * <BR /><BR />This method will return {@code '0'} if the input it does not represent a valid
137     * HTML Escape sequence.
138     */
139    public static char escHTMLToChar(String escHTML)
140    {
141        if (! escHTML.startsWith("&") || ! escHTML.endsWith(";")) return (char) 0;
142
143        String  s = escHTML.substring(1, escHTML.length() - 1);
144
145        // Temporary Variable.
146        int     i = 0;
147
148        // Since the EMOJI Escape Sequences use Code Point, they cannot, generally be
149        // converted into a single Character.  Skip them.
150
151        if (HEX_CODE.matcher(s).find())
152        {
153            if ((i = Integer.parseInt(s.substring(2), 16)) < Character.MAX_VALUE)
154                return (char) i;
155            else
156                return 0;
157        }
158
159        // Again, deal with Emoji's here...  Parse the integer, and make sure it is a
160        // character in the standard UNICODE range.
161
162        if (DEC_CODE.matcher(s).find()) 
163        {
164            if ((i = Integer.parseInt(s.substring(1))) < Character.MAX_VALUE)
165                return (char) i;
166            else
167                return 0;
168        }
169
170        // Now check if the provided Escape String is listed in the htmlEscChars Hashtable.
171        Character c = htmlEscChars.get(s);
172
173        // If the character was found in the table that lists all escape sequence characters,
174        // then return it.  Otherwise just return ASCII zero.
175
176        return (c != null) ? c.charValue() : 0;
177    }
178
179    /**
180     * Will generate a {@code String} whereby any &amp; all <B STYLE='color: red;'><I>Hexadecimal
181     * Escape Sequences</I></B> have been removed and subsequently replaced with their actual
182     * ASCII/UniCode un-escaped characters!
183     * 
184     * <BR /><BR /><B CLASS=JDDescLabel>Hexadecimal HTML Escape-Sequence Examples:</B>
185     * 
186     * <BR /><TABLE CLASS=JDBriefTable>
187     * <TR><TH>Substring from Input:</TH><TH>Web-Browser Converts To:</TH></TR>
188     * <TR><TD><CODE>&amp;#xAA;</CODE></TD><TD><CODE>'&#xAA;'</CODE> within a browser</TD></TR>
189     * <TR><TD><CODE>&amp;#x67;</CODE></TD><TD><CODE>'&#x67;'</CODE> within a browser</TD></TR>
190     * <TR><TD><CODE>&amp;#x84;</CODE></TD><TD><CODE>'&#x84;'</CODE> within a browser</TD></TR>
191     * </TABLE>
192     * 
193     * <BR />This method might be thought of as similar to the older C/C++ {@code 'Ord()'}
194     * function, except it is for HTML.
195     * 
196     * @param str any {@code String} that contains an HTML Escape Sequence
197     * &amp;#x[HEXADECIMAL VALUE];
198     * 
199     * @return a {@code String}, with all of the hexadecimal escape sequences removed and replaced
200     * with their equivalent ASCII or UniCode Characters.
201     * 
202     * @see #replaceAll_DEC(String str)
203     * @see StrReplace#r(String, String[], char[])
204     */
205    public static String replaceAll_HEX(String str)
206    {
207        // This is the RegEx Matcher from the top.  It matches string's that look like: &#x\d+;
208        Matcher m = HEX_CODE.matcher(str);
209
210        // Save the escape-string regex search matches in a TreeMap.  We need to use a
211        // TreeMap because it is much easier to check if a particular escape sequence has already
212        // been found.  It is easier to find duplicates with TreeMap's.
213
214        TreeMap<String, Character> escMap = new TreeMap<>();
215
216        while (m.find())
217        {
218            // Use Base-16 Integer-Parse
219            int i = Integer.valueOf(m.group(1), 16);
220
221            // Do not un-escape EMOJI's... It makes a mess - they are sequences of characters
222            // not single characters.
223
224            if (i > Character.MAX_VALUE) continue;
225
226            // Retrieve the Text Information about the HTML Escape Sequence
227            String text = m.group();
228
229            // Check if it is a valid HTML 5 Escape Sequence.
230            if (! escMap.containsKey(text)) escMap.put(text, Character.valueOf((char) i));
231        }
232        
233        // Build the matchStr's and replaceChar's arrays.  These are just the KEY's and
234        // the VALUE's of the TreeMap<String, Character> which was just built.
235        // NOTE: A TreeMap is used *RATHER THAN* two parallel arrays in order to avoid keeping
236        //       duplicates when the replacement occurs.
237
238        String[]    matchStrs       = escMap.keySet().toArray(new String[escMap.size()]);
239        char[]      replaceChars    = new char[escMap.size()];
240
241        // Lookup each "ReplaceChar" in the TreeMap, and put it in the output "replaceChars"
242        // array.  The class StrReplace will replace all the escape squences with the actual
243        // characters.
244
245        for (int i=0; i < matchStrs.length; i++) replaceChars[i] = escMap.get(matchStrs[i]);
246
247        return StrReplace.r(str, matchStrs, replaceChars);
248    }
249
250    /**
251     * This method functions the same as {@code replaceAll_HEX(String)} - except it replaces only
252     * HTML Escape sequences that are represented using decimal (base-10) values.
253     * {@code 'replaceAll_HEX(...)'} works on hexadecimal (base-16) values.
254     * 
255     * <BR /><BR /><B CLASS=JDDescLabel>Base-10 HTML Escape-Sequence Examples:</B>
256     * 
257     * <BR /><TABLE CLASS=JDBriefTable>
258     * <TR><TH>Substring from Input:</TH><TH>Web-Browser Converts To:</TH></TR>
259     * <TR><TD><CODE>&amp;#48;</CODE></TD><TD><CODE>'&#48;'</CODE> in your browser</TD></TR>
260     * <TR><TD><CODE>&amp;#64;</CODE></TD><TD><CODE>'&#64;'</CODE> in your browser</TD></TR>
261     * <TR><TD><CODE>&amp;#123;</CODE></TD><TD><CODE>'&#123;'</CODE> in your browser</TD></TR>
262     * <TR><TD><CODE>&amp;#125;</CODE></TD><TD><CODE>'&#125;'</CODE> in your browser</TD></TR>
263     * </TABLE>
264     * 
265     * <BR /><B CLASS=JDDescLabel>Base-10 &amp; Base-16 Escape-Sequence Difference:</B>
266     * 
267     * <BR /><UL CLASS=JDUL>
268     * <LI> <CODE>&amp;#x[hex base-16 value];</CODE>  There is an {@code 'x'} as the third character
269     *      in  the {@code String}
270     * </LI>
271     * <LI> <CODE>&amp;#[decimal base-10 value];</CODE>  There is no {@code 'x'} in the
272     *      escape-sequence  {@code String!}
273     * </LI>
274     * </UL>
275     * 
276     * <BR />This short example delineates the difference between an HTML escape-sequence that
277     * employs {@code Base-10} numbers, and one using {@code Base-16} (Hexadecimal) numbers.
278     * 
279     * @param str any {@code String} that contains the HTML Escape Sequence 
280     * <CODE>&amp;#[DECIMAL VALUE];</CODE>.
281     * 
282     * @return a {@code String}, with all of the decimal escape sequences removed and replaced with
283     * ASCII UniCode Characters.
284     * 
285     * <BR /><BR />If this parameter does not contain such a sequence, then this method will return
286     * the same input-{@code String} reference as its return value.  
287     * 
288     * @see #replaceAll_HEX(String str)
289     * @see StrReplace#r(String, String[], char[])
290     */
291    public static String replaceAll_DEC(String str)
292    {
293        // This is the RegEx Matcher from the top.  It matches string's that look like: &#\d+;
294        Matcher m = DEC_CODE.matcher(str);
295
296        // Save the escape-string regex search matches in a TreeMap.  We need to use a
297        // TreeMap because it is much easier to check if a particular escape sequence has already
298        // been found.  It is easier to find duplicates with TreeMap's.
299
300        TreeMap<String, Character> escMap = new TreeMap<>();
301
302        while (m.find())
303        {
304            // Use Base-10 Integer-Parse
305            int i = Integer.valueOf(m.group(1));
306
307            // Do not un-escape EMOJI's... It makes a mess - they are sequences of characters
308            // not single characters.
309
310            if (i > Character.MAX_VALUE) continue;
311
312            // Retrieve the Text Information about the HTML Escape Sequence
313            String text = m.group();
314
315            // Check if it is a valid HTML 5 Escape Sequence.
316            if (! escMap.containsKey(text)) escMap.put(text, Character.valueOf((char) i));
317        }
318        
319        // Build the matchStr's and replaceChar's arrays.  These are just the KEY's and
320        // the VALUE's of the TreeMap<String, Character> which was just built.
321        // NOTE: A TreeMap is used *RATHER THAN* two parallel arrays in order to avoid keeping
322        //       duplicates when the replacement occurs.
323
324        String[]    matchStrs       = escMap.keySet().toArray(new String[escMap.size()]);
325        char[]      replaceChars    = new char[escMap.size()];
326
327        // Lookup each "ReplaceChar" in the TreeMap, and put it in the output "replaceChars"
328        // array.  The class StrReplace will replace all the escape sequences with the actual
329        // characters.
330
331        for (int i=0; i < matchStrs.length; i++) replaceChars[i] = escMap.get(matchStrs[i]);
332
333        return StrReplace.r(str, matchStrs, replaceChars);
334    }
335
336    /**
337     * <EMBED CLASS='external-html' DATA-FILE-ID=ESCAPE_ALL_TEXT>
338     * 
339     * @param str any {@code String} that contains HTML Escape Sequences that need to be converted
340     * to their ASCII-UniCode character representations.
341     * 
342     * @return a {@code String}, with all of the decimal escape sequences removed and replaced with
343     * ASCII UniCode Characters.
344     * 
345     * @see #replaceAll_HEX(String str)
346     * @see StrReplace#r(String, boolean, String[], Torello.Java.Function.ToCharIntTFunc)
347     * 
348     * @throws IllegalStateException
349     */
350    public static String replaceAll_TEXT(String str)
351    {
352        // We only need to find which escape sequences are in this string.
353        // use a TreeSet<String> to list them.  It will
354
355        Matcher                 m        = TEXT_CODE.matcher(str);
356        TreeMap<String, String> escMap   = new TreeMap<>();
357
358        while (m.find())
359        {
360            // Retrieve the Text Information about the HTML Escape Sequence
361            String text     = m.group();
362            String sequence = text.substring(1, text.length() - 1);
363
364            // Check if it is a valid HTML 5 Escape Sequence.
365            if ((! escMap.containsKey(text)) && htmlEscChars.containsKey(sequence))
366                escMap.put(text, sequence);
367        }
368        
369        // Convert the TreeSet to a String[] array... and use StrReplace
370        String[] escArr = new String[escMap.size()];
371
372        return StrReplace.r(
373            str, false, escMap.keySet().toArray(escArr),
374            (int i, String sequence) -> htmlEscChars.get(escMap.get(sequence))
375        );
376    }
377
378    /**
379     * Calls all of the HTML Escape Sequence convert/replace {@code String} functions at once.
380     * 
381     * @param s This may be any Java {@code String} which may (or may not) contain HTML Escape
382     * sequences.
383     * 
384     * @return a new {@code String} where all HTML escape-sequence substrings have been replaced 
385     * with their natural character representations.
386     * 
387     * @see #replaceAll_DEC(String)
388     * @see #replaceAll_HEX(String)
389     * @see #replaceAll_TEXT(String)
390     */
391    @Deprecated
392    public static String replaceAll(String s)
393    { return replaceAll_HEX(replaceAll_DEC(replaceAll_TEXT(s))); }
394
395    /**
396     * <EMBED CLASS='external-html' DATA-FILE-ID=ESCAPE_REPLACE>
397     * 
398     * @param s This may be any Java {@code String} which may (or may not) contain HTML Escape
399     * sequences.
400     * 
401     * @return a new {@code String} where all HTML escape-sequence substrings have been replaced 
402     * with their natural character representations.
403     */
404    public static String replace(String s)
405    {
406        // The primary optimization is to do this the "C" way (As in The C Programming Language)
407        // The String to Escape is converted to a character array, and the characters are shifted
408        // as the Escape Sequences are replaced.  This is all done "in place" without creating
409        // new substring's in memory.
410
411        char[] c = s.toCharArray();
412
413        // These two pointers are kept as the "Source Character" - as in the next character to
414        // "Read" ... and the "Destination Character" - as in the next location to write.
415
416        int sourcePos   = 0;
417        int destPos     = 0;
418
419        while (sourcePos < c.length)
420
421            // All Escape Sequences begin with the Ampersand Symbol.  If the next character
422            // does not begin with the Ampersand, we should skip and move on.  Copy the next source
423            // character to the next destination location, and continue the loop.
424
425            if (c[sourcePos] != '&')
426            { c[destPos++]=c[sourcePos++];  continue; }
427    
428            // Here, an Ampersand has been found.  Now check if the character immediately 
429            // following the Ampersand is a Pound Sign.  If it is a Pound Sign, that implies
430            // this escape sequence is simply going to be a number.
431
432            else if ((sourcePos < (c.length-1)) && (c[sourcePos + 1] == '#'))
433            {
434                int     evaluatingPos   = sourcePos + 1;
435                boolean isHex           = false;
436
437                // If the Character after the Pound Sign is an 'X', it means that the number
438                // that has been escaped is a Base 16 (Hexadecimal) number.
439                // IMPORTANT: Check to see that the Ampersand wasn't the last char in the String
440
441                if (evaluatingPos + 1 < c.length)
442                    if (c[evaluatingPos + 1] == 'x')
443                    { isHex = true; evaluatingPos++; }
444
445                // Keep skipping the numbers, until a non-digit character is identified.
446                while ((++evaluatingPos < c.length) && Character.isDigit(c[evaluatingPos]));
447
448                // If the character immediately after the last digit isn't a ';' (Semicolon),
449                // then this entire thing is NOT an escaped HTML character.  In this case, make
450                // sure to copy the next source-character to the next destination location in the
451                // char[] array...  Then continue the loop to the next 'char' (after Ampersand)
452
453                if ((evaluatingPos == c.length) || (c[evaluatingPos] != ';'))
454                    { c[destPos++]=c[sourcePos++];  continue; }
455
456                int escapedChar;
457
458                try
459                { 
460                    // Make sure to convert 16-bit numbers using the 16-bit radix using the
461                    // standard java parse integer way.
462
463                    escapedChar = isHex
464                        ? Integer.parseInt(s.substring(sourcePos + 3, evaluatingPos), 16)
465                        : Integer.parseInt(s.substring(sourcePos + 2, evaluatingPos));
466                }
467
468                // If for whatever reason java was unable to parse the digits in the escape
469                // sequence, then copy the next source-character to the next destination-location
470                // and move on in the loop.
471
472                catch (NumberFormatException e)
473                    { c[destPos++]=c[sourcePos++];  continue; }
474
475                // If the character was an Emoji, then it would be a number greater than
476                // 2^16.  Emoji's use Code Points - which are multiple characters used up
477                // together.  Their escape sequences are always characters larger than 65,535.
478                // If so, just copy the next source-character to the next destination location, and
479                // move on in the loop.
480
481                if (escapedChar > Character.MAX_VALUE)
482                    { c[destPos++]=c[sourcePos++];  continue; }
483
484                // Replace the next "Destination Location" with the (un) escaped char.
485                c[destPos++] = (char) escapedChar;
486
487                // Skip the entire HTML Escape Sequence by skipping to the location after the
488                // position where the "evaluation" (all this processing) was occurring.  This
489                // just happens to be the next-character immediately after the semi-colon
490
491                sourcePos = evaluatingPos + 1;  // will be pointing at the ';' (semicolon)
492            }
493
494            // An Ampersand was just found, but it was not followed by a '#' (Pound Sign).  This
495            // means that it is not a "numbered" (to invent a term) HTML Escape Sequence.  Instead
496            // we shall check if there is a valid Escape-String (before the next semi-colon) that
497            // can be identified in the Hashtable 'htmlEscChars'
498
499            else if (sourcePos < (c.length - 1))
500            {
501                // We need to create a 'temp variable' and it will be called "evaluating position"
502                int evaluatingPos = sourcePos;
503
504                // All text (non "Numbered") HTML Escape String's are comprised of letter or digits
505                while ((++evaluatingPos < c.length) && Character.isLetterOrDigit(c[evaluatingPos]));
506
507                // If the character immediately after the last letter or digit is not a semi-colon,
508                // then there is no way this is an HTML Escape Sequence.  Copy the next source to
509                // the next destination location, and continue with the loop.
510
511                if ((evaluatingPos == c.length) || (c[evaluatingPos] != ';'))
512                    { c[destPos++]=c[sourcePos++];  continue; }
513
514                // Get the replacement character from the lookup table.
515                Character replacement = htmlEscChars.get(s.substring(sourcePos + 1, evaluatingPos));
516
517                // The lookup table will return null if there this was not a valid escape sequence.
518                // If this was not a valid sequence, just copy the next character from the source
519                // location, and move on in the loop.
520
521                if (replacement == null)
522                    { c[destPos++]=c[sourcePos++];  continue; }
523
524                c[destPos++]    = replacement;
525                sourcePos       = evaluatingPos + 1;
526            }
527
528            else
529                { c[destPos++]=c[sourcePos++];  continue; }
530
531        return new String(c, 0, destPos);    
532    }
533
534    /**
535     * <EMBED CLASS='external-html' DATA-FILE-ID=ESCAPE_CHAR>
536     * 
537     * @param c Any Java Character.  Note that the Java <B>Primitive Type</B> {@code 'char'}
538     * is a 16-bit type.  This parameter equates to the <B>UNICODE</B> Characters
539     * {@code 0x0000} up to {@code 0xFFFF}.
540     * 
541     * @param use16BitEscapeSequence If the user would like the returned, escaped, {@code String}
542     * to use <B>Base 16</B> for the escaped digits, pass {@code TRUE} to this parameter.  If the
543     * user would like to retrieve an escaped {@code String} that uses standard <B>Base 10</B>
544     * digits, then pass {@code FALSE} to this parameter.
545     * 
546     * @return The passed character parameter {@code 'c'} will be converted to an HTML Escape
547     * Sequence.  For instance if the character <CODE>'&#6211;'</CODE>, which is the Chinese
548     * Character for <I>I, Me, Myself</I> were passed to this method, then the {@code String}
549     * {@code "&#25105;"} would be returned.
550     * 
551     * <BR /><BR />If the parameter {@code 'use16BitEscapeSequence'} had been passed {@code TRUE},
552     * then this method would, instead, return the {@code String "&#x6211;"}.
553     */
554    public static String escChar(char c, boolean use16BitEscapeSequence)
555    {
556        return use16BitEscapeSequence
557            ? "&#" + ((int) c) + ";"
558            : "&#x" + Integer.toHexString((int) c).toUpperCase() + ";";
559    }
560
561    /**
562     * <EMBED CLASS='external-html' DATA-FILE-ID=ESCAPE_CODE_PT>
563     * 
564     * @param codePoint This will take any integer.  It will be interpreted as a {@code UNICODE}
565     * {@code code point}.  
566     * 
567     * <BR /><BR /><B STYLE="color:red;">NOTE:</B> Java uses <B>16-bit</B> values for it's
568     * primitive {@code 'char'} type.  This is also the "first plane" of the <B>UNICODE Space</B>
569     * and actually referred to as the <B>Basic Multi Lingual Plane</B>.  Any value passed to this
570     * method that is lower than {@code 65,535} would receive the same escape-{@code String} that
571     * it would from a call to the method {@link #escChar(char, boolean)}.
572     * 
573     * @param use16BitEscapeSequence If the user would like the returned, escaped, {@code String}
574     * to use <B>Base 16</B> for the escaped digits, pass {@code TRUE} to this parameter.  If the
575     * user would like to retrieve an escaped {@code String} that uses standard <B>Base 10</B>
576     * digits, then pass {@code FALSE} to this parameter.
577     * 
578     * @return The {@code code point} will be converted to an HTML Escape Sequence, as a 
579     * {@code java.lang.String}.  For instance if the {@code code point} for "the snowman" glyph
580     * (character &#x2603;), which happens to be represented by a {@code code point} that is below
581     * {@code 65,535} (and, incidentally, does "fit" into a single Java {@code 'char'}) - this
582     * method would return the {@code String "&#9731;"}. 
583     * 
584     * <BR /><BR />If the parameter {@code 'use16BitEscapeSequence'} had been passed {@code TRUE},
585     * then this method would, instead, return the {@code String "&#x2603;"}.
586     * 
587     * @throws IllegalArgumentException Java has a method for determining whether any integer is a
588     * valid {@code code point}.  Not all of the integers "fit" into the 17 Unicode "planes".  
589     * Note that each of the planes in {@code 'Unicode Space'} contain {@code 65,535}
590     * (or {@code 2^16}) characters.
591     */
592    public static String escCodePoint(int codePoint, boolean use16BitEscapeSequence)
593    {
594        if (! Character.isValidCodePoint(codePoint)) throw new IllegalArgumentException(
595            "The integer you have passed to this method [" + codePoint + "] was deemed an " +
596            "invalid Code Point after a call to: [java.lang.Character.isValidCodePoint(int)].  " +
597            "Therefore this method is unable to provide an HTML Escape Sequence."
598        );
599
600        return use16BitEscapeSequence
601            ? "&#" + codePoint + ";"
602            : "&#x" + Integer.toHexString(codePoint).toUpperCase() + ";";
603    }
604    
605    /**
606     * <EMBED CLASS='external-html' DATA-FILE-ID=ESCAPE_HAS_HTML>
607     *
608     * @param c Any <B>ASCII</B> or <B>UNICODE</B> Character
609     * 
610     * @return {@code TRUE} if there is a {@code String} escape sequence for this character, and
611     * {@code FALSE} otherwise.
612     * 
613     * @see #htmlEsc(char)
614     */
615    public static boolean hasHTMLEsc(char c)
616    { return htmlEscSeq.get(Character.valueOf(c)) != null; }
617
618    /**
619     * <EMBED CLASS='external-html' DATA-FILE-ID=ESCAPE_HTML_ESC>
620     *
621     * @param c Any <B>ASCII</B> or <B>UNICODE</B> Character
622     * 
623     * @return The {@code String} that is used by web-browsers to escape this ASCII / Uni-Code
624     * character - <I>if there is one saved</I> in the <B>internal</B> <CODE>Lookup Table</CODE>.
625     * If the character provided does not have an associated {@code HTML Escape String}, then
626     * 'null' is returned.
627     * 
628     * <BR /><BR /><B>NOTE:</B> The entire escape-{@code String} is not provided, just the
629     * inner-characters.  The leading {@code '&'} (Ampersand) and the trailing {@code ';'} 
630     * (Semi-Colon) are not appended to the returned {@code String}.
631     * 
632     * @see #hasHTMLEsc(char)
633     */
634    public static String htmlEsc(char c)
635    { return htmlEscSeq.get(Character.valueOf(c)); }
636}