001package Torello.Java;
002
003import java.io.*;
004import java.util.*;
005import java.net.*;
006import javax.net.ssl.*;
007
008import java.nio.charset.Charset;
009import java.util.regex.Pattern;
010import java.util.stream.IntStream;
011
012import Torello.HTML.*;
013import Torello.HTML.NodeSearch.*;
014
015import Torello.Java.Shell.C;
016import Torello.Java.FileNode;
017import Torello.Java.Additional.RemoveUnsupportedIterator;
018import Torello.Java.FileNode.RetTypeChoice;
019import Torello.Java.Additional.Ret2;
020
021/**
022 * Wraps a basic on-line syntax-hiliter named <CODE>HiLite&#46;ME</CODE> (which, itself, wraps the
023 * on-line hiliter <CODE>pygments&#46;org</CODE>).
024 * 
025 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=HILITEME>
026 */
027public class HiLiteMe
028{
029    // no constructors
030    private HiLiteMe() { }
031
032    // ********************************************************************************************
033    // HiLite.ME parameter-tags stored internally.
034    // ********************************************************************************************
035
036    @SuppressWarnings("unchecked")
037    private static final Vector<Object> dataFile = (Vector<Object>) LFEC.readObjectFromFile_JAR
038        (Torello.Data.DataFileLoader.class, "data06.vdat", true, Vector.class);
039
040    @SuppressWarnings("unchecked")
041    private static final TreeMap<String, String> codeTypeDescriptions =
042        (TreeMap<String, String>) dataFile.elementAt(0);
043
044    @SuppressWarnings("unchecked")
045    private static final Vector<String> styleTypes = (Vector<String>) dataFile.elementAt(1);
046
047    // This is only loaded from disk if "SimplifySpans" method is invoked.  It is loaded only
048    // once.  The only location where it can be loaded is inside 'simplifyColorSpans'
049    // LAZY-LOADING
050    private static TreeMap<String, TreeMap<String, TagNode>> allMaps = null;
051
052    // This is only loaded from disk if the styleParamCSSClasses(String) method is called.
053    // LAZY-LOADING
054    private static TreeMap<String, String> styleCSSDefinitions = null;
055
056    /**
057     * Each time a piece of code is to be pretty-printed, {@code HiLite.ME} expects to receive a
058     * "type of software" or "type of code" descriptor {@code String} that identifies what type of
059     * textual-code it is receiving.  There are exactly 266 different types of software files that
060     * may be passed to the {@code HiLite.ME} server.  These {@code String}-tag for these 
061     * {@code 'Code Types'} may be viewed here.
062     * 
063     * <BR /><BR />Click the link below to see the complete list of programming-type codes.
064     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteProgrammingLanguages.html'>
065     * Programming Language Codes</A></B>
066     * 
067     * @return An {@code Iterator<String>} that produces each {@code String}-tag that may be passed
068     * as a {@code 'Code Type'} to {@code http://HiLite.ME}
069     */
070    public static Iterator<String> getCodeTypes()
071    {
072        // The 'RemoveUnsupportedIterator' wrapper class prohibits modifications to this TreeMap
073        return new RemoveUnsupportedIterator<String>(codeTypeDescriptions.keySet().iterator());
074    }
075
076    /**
077     * This will iterate over the full-name descriptions of the software types available for
078     * parsing with the {@code HiLite.ME} server
079     * 
080     * @return An {@code Iterator<String>} that produces a {@code String}-description of each
081     * software-types available for parsing.
082     */
083    public static Iterator<String> getCodeTypeDescriptions()
084    {
085        // The 'RemoveUnsupportedIterator' wrapper class prohibits modifications to this TreeMap
086        return new RemoveUnsupportedIterator<String>(codeTypeDescriptions.values().iterator());
087    }
088
089    /**
090     * This will iterate over the "Defining Style-Output Types" available to users of the
091     * {@code http://HiLite.ME} server.
092     * 
093     * <BR /><BR />Click the link below to see the complete list of {@code 'Style Codes'}
094     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteStyleCodes.html'>
095     * HiLiting Style Codes</A></B>
096     * 
097     * @return An {@code Iterator<String>} over the different available {@code String}-tags that
098     * may be passed as a {@code 'Style Tag'} when performing a <B>"pretty print"</B> operation.
099     */
100    public static Iterator<String> getStyleTypes()
101    {
102        // The 'RemoveUnsupportedIterator' wrapper class prohibits modifications to this Vector
103        return new RemoveUnsupportedIterator<String>(styleTypes.iterator());
104    }
105
106    /**
107     * Returns the description for a specific {@code 'Code Type'}
108     * @return the long-form of the {@code 'codeType'} as a java {@code String}
109     */
110    public static String getCodeDescription(String codeType)
111    { return codeTypeDescriptions.get(codeType); }
112
113    /**
114     * Checks whether the passed {@code String}-parameter is a recognized {@code 'Code Type'}
115     * 
116     * <BR /><BR />Click the link below to see the complete list of programming-type codes.
117     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteProgrammingLanguages.html'>
118     * Programming Language Codes</A></B>
119     * 
120     * @param s This may be any {@code java.lang.String}.  It is intended to be one of the listed
121     * {@code 'Code Types'} available for use with the {@code HiLite.ME} server.
122     * 
123     * @return This will return <B>TRUE</B> if the passed {@code String}-tag is one of the tags 
124     * listed with the {@code HiLite.ME} server for <I>Software Types</I>, or {@code 'Code Types'}
125     * - and <B>FALSE</B> otherwise.  Use the {@code Iterator} to get the complete list of
126     * available {@code 'Code Tags'} (or click the link, above, to view them in this browser).
127     * 
128     * @see #getCodeTypes()
129     * @see #getCodeTypeDescriptions()
130     */
131    public static boolean isCodeType(String s)
132    { return codeTypeDescriptions.containsKey(s); }
133
134    /**
135     * Checks whether the passed {@code String}-parameter is a recognized {@code 'Style Type'}
136     * 
137     * <BR /><BR />Click the link below to see the complete list of {@code 'Style Codes'}
138     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteStyleCodes.html'>
139     * HiLiting Style Codes</A></B>
140     * 
141     * @param s This may be any {@code java.lang.String}.  It is intended to be one of the listed
142     * {@code 'Style Types'} available for use with the {@code HiLite.ME} server.
143     * 
144     * @return This will return <B>TRUE</B> if the passed {@code String}-tag is one of the tags
145     * listed with the {@code HiLite.ME} server for <I>Style Types</I>, and <B>FALSE</B> otherwise.
146     * Use the {@code Iterator} to get the complete list of available {@code 'Style Tags'} (or
147     * click the link, above, to view them in this browser).
148     * 
149     * @see #getStyleTypes()
150     */
151    public static boolean isStyleType(String s) { return styleTypes.contains(s); }
152
153
154    // ********************************************************************************************
155    // Here are the local variables and classes
156    // ********************************************************************************************
157
158    /**
159     * If you choose to call the method {@code createIndex(TreeSet<String> fileIndexList, String
160     * targetDirectory)}, then this {@code String} will be used as the header for that file.
161     * 
162     * <BR /><BR /><B>NOTE:</B> It may be changed, as it has not been declared {@code 'final'}.
163     */
164    public static String INDEX_HEADER_HTML =
165        "<HTML>\n" +
166        "<HEAD>\n" +
167        "<TITLE>HiLite.ME Index</TITLE>\n" +
168        "<META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" +
169        "<STYLE TYPE=\"text/css\">\n" +
170        "A                      { color: black; text-decoration: none;  }\n" +
171        "UL LI:Nth-Child(odd)   { background: lightgray;                }\n" +
172        "UL LI:Nth-Child(even)  { background: white;                    }\n" +
173        "UL LI                  { padding: 5 5 5 5;                     }\n" +
174        "UL                     { max-width: 60%;                       }\n" +
175        "</STYLE>\n" +
176        "</HEAD>\n" +
177        "<BODY>\n" +
178        "<H2>HiLite.ME Code</H2>\n";
179
180    // ********************************************************************************************
181    // Here is class Params
182    // ********************************************************************************************
183
184    /**
185     * Inner class for providing a list of parameters when hiliting multiple files on disk.
186     * 
187     * <BR /><BR />
188     * <EMBED CLASS="external-html" DATA-FILE-ID="HLMP">
189     */
190    public static class Params
191    {
192        /**
193         * The default value for {@code 'headerHTML'}  This value may be reset by accessing the
194         * field {@code 'headerHTML'}, which is a {@code 'public'} field in this class.
195         */
196        public static final String DEFAULT_HEADER_HTML =
197            "<HTML>\n<HEAD>\n<META http-equiv=\"Content-Type\" content=\"text/html; " +
198            "charset=utf-8\" />\n<TITLE>INSERT HERE</TITLE>\n</HEAD>\n<BODY>\n";
199
200        /**
201         * The default value for {@code 'endingHTML'}  This value may be reset by accessing the
202         * field {@code 'endingHTML'}, which is a {@code 'public'} field in this class.
203         */
204        public static final String DEFAULT_ENDING_HTML = "\n</BODY>\n</HTML>\n";
205    
206        /**
207         * The style parameter to be used with {@code HiLite.ME}  View the list of available
208         * {@code Style Tags} using {@link HiLiteMe#getStyleTypes()}
209         */
210        public String styleTag = "native";
211
212        /**
213         * When this is <B>TRUE</B>, {@code System.out} will print a line each time a file in the
214         * {@code FileNode} tree is visited and sent to the pretty-printing HiLite servers.
215         */
216        public boolean verbose = true;
217
218        /** 
219         * This is the target directory for where the output {@code '.html'} files will be
220         * sent.
221         */
222        public String targetDirectory = "";
223
224        /**
225         * This is the header HTML that is inserted above each post-processed / pretty-printed
226         * source-code file that is received from {@code HiLite.ME} servers.  The default version
227         * includes a {@code META-UTF8} clause, because often higher-level {@code Uni-Code}
228         * characters from Mandarin Chinese, Spanish, Korean, Vietnamese and other foreign
229         * languages are in (the writer, Torello's code).
230         *
231         * <BR /><BR /><B>NOTE:</B> The header must have a sub-string that says (in all caps)
232         * {@code "INSERT HERE"} - the file name will be inserted.
233         */
234        public String headerHTML = DEFAULT_HEADER_HTML;
235
236        /**
237         * This is the HTML that is appended to each pretty-printed source-code file received from
238         * {@code HiLite.ME} servers.  The default version of this {@code java.lang.String} simply
239         * contains {@code <BODY>, </HTML>} - <I>change this if necessary!</I>.
240         */
241        public String endingHTML = DEFAULT_ENDING_HTML;
242
243        /**
244         * This will store each file that is successfully converted into a "Pretty Printed HTML"
245         * file in the passed parameter {@code TreeSet}.  Java's {@code TreeSet} stores things
246         * alphabetically, and does not allow duplicates.  It can be used to generate an
247         * {@code 'index.html'} file.  Send this field to the
248         * {@link HiLiteMe#createIndex(TreeSet, String)} method to build the {@code 'index.html'}
249         * file.  If this is {@code null}, files that are pretty-printed by the
250         * {@code http://HiLite.ME/} server will just not be logged to this data-structure.
251         */
252        public TreeSet<String> fileIndexList = new TreeSet<String>();
253
254        /**
255         * Any file that ends with {@code ".txt"} can be copied to the {@link #targetDirectory}
256         * location - without being pretty printed if this variable is set to <B>TRUE</B>.
257         */
258        public boolean copyTextFiles = true;
259
260        /**
261         * This is null by default.  If it is not null, then each {@code java.lang.string 'key'} in
262         * the Map should be a file ending, and the {@code 'value'} to which the key maps should be
263         * a {@code Code Type} Tag that is recognized by the HiLite Pretty-Print HTML servers.
264         *
265         * <BR /><BR /><B>NOTE:</B> It is important to know that the {@code 'keys'} in this
266         * {@code TreeMap} data-structure are valid file-name extensions, <I>that leave out the
267         * leading '.' (period)</I>.  The key should be {@code 'java'} not {@code '.java'}.
268         * Furthermore, the values in the data-structure to which these keys map, should be valid
269         * HiLiteMe Pretty-Print Servers code-type values.  The complete list of valid
270         * {@code 'Code Types'} can be viewed by iterating through {@code public Iterator<String>
271         * getCodeTypes()}
272         *
273         * @see #getCodeTypes()
274         * @see #isCodeType(String)
275         * @see #deduceCodeType(String, TreeMap)
276         */
277        public TreeMap<String, String> customMappings = null;
278
279        /**
280         * Instantiate a {@code Cache}, and the {@code HiLite.ME} server will save all
281         * code-{@code String's} into a cache.  For each source code request that is made <I>which
282         * includes <B>BOTH</B> a {@code Params} parameter class, <B>AND</B> a non-null
283         * {@code 'cache'} field</I>, the logic will first check the code-cache to see if an
284         * identical {@code String} is in the cache.  If the {@code String} is identical, querying
285         * the {@code HiLite.ME} server will be skipped, and the local copy used instead.  This can
286         * increase build time many-fold, as seconds are reduced to milliseconds in large builds
287         * where only 2 or 3 classes have seen code-changes since a previous build.
288         * @see Cache
289         */
290        public Cache cache = null;
291
292        /**
293         * The public constructor has no body.  Modify the constants as necessary - since the
294         * fields are all {@code 'public'}, and <I><B>are not {@code 'final'}</I></B>
295         */
296        public Params() { }
297    }
298
299
300    // ********************************************************************************************
301    // Here is static inner class "Cache"
302    // ********************************************************************************************
303
304    /**
305     * A caching-system class that allows this tool to efficiently bypass calls to the server when
306     * an exact-copy of the hilited source-code already exists inside the cache.
307     * 
308     * <BR /><BR />
309     * <EMBED CLASS="external-html" DATA-FILE-ID="HLMC">
310     */
311    public static class Cache
312    {
313        // This is, as the name clearly says, the Cache-Directory
314        private final String cacheSaveDirectory;
315
316        // This is the list of Hash-Codes for all Code/HTML pairs stored in the cache.  This is the
317        // exact data-structure that is referred to as the "Master Hash File"
318
319        private final TreeSet<Integer> hashCodes;
320
321        private static final short  NUM_DIRS        = 50;
322        private static final String HASH_SAVE_TREE  = "HILITED_STRINGS_HASH_CODE.ts";
323
324        /**
325         * Inform the user how much space (in bytes) is used by this {@code Cache}.
326         * @return The number of bytes being used on the file-system by this {@code Cache}.
327         */
328        public long totalSize()
329        { return FileNode.createRoot(cacheSaveDirectory).loadTree().getDirTotalContentsSize(); }
330
331        /**
332         * Count how many files and directories are contained in this {@code Cache}.
333         * @return The total number of files and sub-directories in the {@code Cache} directory.
334         */
335        public int totalNumber()
336        {return FileNode.createRoot(cacheSaveDirectory).loadTree().count(); }
337
338        private static String checkCSD(String cacheSaveDirectory)
339        {
340            cacheSaveDirectory = cacheSaveDirectory.trim();
341
342            if (! cacheSaveDirectory.endsWith(File.separator))
343                cacheSaveDirectory = cacheSaveDirectory + File.separator;
344
345            File f  = new File(cacheSaveDirectory);
346
347            if (! f.exists()) throw new CacheError(
348                "The specified cache-directory specified does not exist on the file-system: " +
349                "[" + cacheSaveDirectory + "]"
350            );
351
352            return cacheSaveDirectory;
353        }
354
355        // the "return TreeSet<Integer>" complains about an unchecked cast.
356        @SuppressWarnings("unchecked") 
357        private static TreeSet<Integer> checkTS(String cacheSaveDirectory)
358        {
359            String  fName   = cacheSaveDirectory + HASH_SAVE_TREE;
360            File    f       = new File(fName);
361
362            if (! f.exists()) throw new CacheError(
363                "The current-cache directory does not contain a primary-cache file: " +
364                "[" + fName + "]"
365            );
366
367            Object o;
368
369            try
370                { o = FileRW.readObjectFromFile(fName, true); }
371
372            catch (Throwable t)
373            {
374                throw new CacheError(
375                    "There was an error attempting to read the following primary-cache file.  " + 
376                    "It appears to be corrupted: [" + fName + "]",
377                    t
378                );
379            }
380
381            if (! (o instanceof TreeSet)) throw new CacheError(
382                "There primary cache file loaded, but does not contain the correct " +
383                "data-structure.  It appears to be corrupted. [" + fName + "]"
384            );
385
386            return (TreeSet<Integer>) o;
387        }
388
389        // Just saves a brand-new (empty) Hash-Code List (a java.util.TreeSet) to disk, using
390        // Standard Java Object Serialization.
391        private static TreeSet<Integer> writeNewTS(String cacheSaveDirectory)
392        {
393            TreeSet<Integer> hashCodes = new TreeSet<>();
394
395            try
396                { FileRW.writeObjectToFile(hashCodes, cacheSaveDirectory + HASH_SAVE_TREE, true); }
397
398            catch (Throwable t)
399            {
400                throw new CacheError(
401                    "There was an error writing the Cache Hash-Code File to disk.  " +
402                    "[" + cacheSaveDirectory + HASH_SAVE_TREE + "].  " ,
403                    t
404                );
405            }
406
407            return hashCodes;
408        }
409
410        /**
411         * This will load the hashCodes table to memory from the file-system directory identified
412         * by parameter {@code String cacheSaveDirectory}.  An exception shall be thrown if this
413         * file is not found.
414         *
415         * @param cacheSaveDirectory This constructor presumes that this cache has been used and
416         * visited before.  This directory name should point to your local-cache of the
417         * {@code HiLite.ME} Server Code hilite past-operations.
418         *
419         * @throws CacheError This error will throw if the cache has not been instantiated, or
420         * is corrupted.  If the specified directory does not exist, then this {@code Error} shall
421         * also throw.  The chain-cause {@code Throwable} should be visible, and is included as the 
422         * {@code Throwable.getCause()}.
423         */
424        public Cache(String cacheSaveDirectory) throws CacheError
425        {
426            this.cacheSaveDirectory = checkCSD(cacheSaveDirectory);
427            this.hashCodes          = checkTS(this.cacheSaveDirectory);
428        }
429
430        /**
431         * This will save the hash-code {@code TreeSet<Integer>} to disk.  The <B>"Master Hash-Code
432         * List"</B> just keeps a record of the hashcodes of every {@code String} that was hilited
433         * by the Hiliter <I>(and therefore saved inside the Cache).</I>  This method will save
434         * that Java {@code TreeSet} of Hash-Codes to disk.
435         *
436         * @throws CacheError This {@code Error} will throw if there is a problem writing the
437         * master cache-hash to disk.  The chain-cause {@code Throwable} should be visible, and is
438         * included as the {@code Throwable.getCause()}
439         */
440        public void persistMasterHashToDisk() throws CacheError
441        {
442            try
443            {
444                FileRW.writeObjectToFile
445                    (hashCodes, this.cacheSaveDirectory + HASH_SAVE_TREE, true);
446            } 
447            catch (Throwable t)
448            {
449                throw new CacheError(
450                    "There was an error writing the Master Hash-Code table to disk. " +
451                    "File [" + this.cacheSaveDirectory + HASH_SAVE_TREE + "] was not saved. " +
452                    "The cache-file will have to be refreshed at some point.  New Files " +
453                    "Cache-Hash not saved.",
454                    t
455                );
456            }
457        }
458
459        /** Will write this method soon.  It currently is not written. */
460        public void rebuildMasterHashCache()
461        {
462            // TO DO
463            // This is supposed to be for "Error Recovery".  Fortunately, an error has never really
464            // happend to me, and even if it did... Just deleting the whole thing and rebuilding
465            // the Cache by running the HiLiter on all of the files seems smarter/safer anyway.
466            // This has perpetually been on the "To Do List" for 2 years now...  I think it more
467            // prudent to remind people, just delete and start over is probably smarter, it your
468            // Cache directory got messed up (for whatever reason - but mine never has anyway!)
469        }
470
471        /**
472         * This will initialize a cache-file in the file-system directory identified by parameter
473         * {@code String cacheSaveDirectory}.  If the directory specified does not exist, a
474         * {@code CacheError} is thrown.  Any old cache files will be removed.  To attempt to
475         * preserve old cache-files, call method {@code initializeOrRepair(String, StorageWriter)}
476         * 
477         * <BR /><BR /><B><I>OrClear:</I></B> If the directory structure provided to this
478         * initialize method is not empty, the <SPAN STYLE="color: red;"><B><I>its entire contents
479         * shall be erased by a call to </I></B></SPAN> (Below)
480         * 
481         * <DIV CLASS="LOC">{@code 
482         * FileTransfer.deleteFilesRecursive
483         *     (FileNode.createRoot(cacheSaveDirectory).loadTree(), sw);
484         * }</DIV>
485         * 
486         * @param cacheSaveDirectory This constructor presumes that this cache has been used and
487         * visited before.  This directory name should point to your local-cache of 
488         * {@code HiLite.ME} Server Code hilite past-operations.
489         * 
490         * @param sw This receives log-writes from the call to
491         * {@link FileTransfer#deleteFilesRecursive} which clears the files currently in the cache.
492         * This parameter may be null, and if it is, output-text will be shunted.
493         * 
494         * @throws CacheError This exception will be throw if there are errors deleting any
495         * old-cache files currently in the directory; or if there is any error creating the new
496         * master hash-cache file.  The chain-cause {@code Throwable} should be visible, and is 
497         * included as the {@code Throwable.getCause()}.
498         */
499        public static Cache initializeOrClear(String cacheSaveDirectory, StorageWriter sw)
500            throws CacheError
501        {
502            cacheSaveDirectory = checkCSD(cacheSaveDirectory);
503
504            final String tempStrForStupidLambdaFinal = cacheSaveDirectory;
505
506            try
507            {
508                File f = new File(cacheSaveDirectory);
509
510                if (f.isDirectory())
511                    FileTransfer.deleteFilesRecursive(
512                        FileNode.createRoot(cacheSaveDirectory).loadTree(), null,
513                        (FileNode fn) -> fn.getFullPathName().equals(tempStrForStupidLambdaFinal),
514                        sw
515                    );
516
517                f.mkdirs();
518            }
519            catch (Throwable t)
520            {
521                throw new CacheError(
522                    "There was an error emptying/clearing the directory " +
523                    "[" + cacheSaveDirectory + "] of it's contents, please see cause " +
524                    "throwable.getCause() for details.",
525                    t
526                );
527            }
528
529            try
530                { writeNewTS(cacheSaveDirectory); }
531
532            catch (Throwable t)
533            {
534                throw new CacheError(
535                    "There was an error saving/creating the new cache-file " +
536                    "[" + cacheSaveDirectory + "], please see cause chain throwable.getCause(), " +
537                    "for more details.",
538                    t
539                );
540            }
541
542            return new Cache(cacheSaveDirectory);
543        }
544
545        String get(
546                String sourceCodeAsString, String codeTypeParam, String styleTypeParam,
547                boolean includeLineNumbers
548            )
549        {
550            Integer h = Integer.valueOf(
551                codeTypeParam.hashCode() + styleTypeParam.hashCode() +
552                (includeLineNumbers ? 1 : 0) +
553                sourceCodeAsString.hashCode()
554            );
555
556            // NOTE: The Math.abs is OK, because it is just the directory name! (A little tricky)
557            if (! hashCodes.contains(h)) return null;
558
559            String root = 
560                cacheSaveDirectory +
561                StringParse.zeroPad((Math.abs(h.intValue()) % NUM_DIRS)) + 
562                File.separator + "H" + h.toString();
563
564            try
565            {
566                String saved = (String)
567                    FileRW.readObjectFromFileNOCNFE(root + "-SOURCE.sdat", true);
568
569                if (saved.equals(sourceCodeAsString))
570                    return (String) FileRW.readObjectFromFileNOCNFE(root + "-HILITE.sdat", true);
571                else
572                    return null;
573            }
574            catch (Throwable t)
575            {
576                throw new CacheError(
577                    "There was an error reading from the cache-directory: " +
578                    "[" + root + "...sdat].  Please see cause throwable.getCause() for more " +
579                    "details",
580                    t
581                );
582            }
583        }
584
585        void checkIn(
586                String sourceCodeAsString, String hilitedCodeAsString, 
587                String codeTypeParam, String styleTypeParam, boolean includeLineNumbers
588            )
589        {
590            Integer h = Integer.valueOf(
591                codeTypeParam.hashCode() + styleTypeParam.hashCode() +
592                (includeLineNumbers ? 1 : 0) +
593                sourceCodeAsString.hashCode()
594            );
595
596            // NOTE: The Math.abs is OK, because it is just the directory name! (A little tricky)
597            String root = cacheSaveDirectory +
598                StringParse.zeroPad((Math.abs(h.intValue()) % NUM_DIRS)) + File.separator;
599
600            try
601            {
602                File f = new File(root);
603                if (! f.exists()) f.mkdirs();
604
605                root = root + "H" + h.toString();
606
607                FileRW.writeObjectToFile(sourceCodeAsString, root + "-SOURCE.sdat", true);
608                FileRW.writeObjectToFile(hilitedCodeAsString, root + "-HILITE.sdat", true);
609
610                hashCodes.add(h);
611                // DEBUGING System.out.println(" CHECKEDIN ");
612            }
613            catch (Throwable t)
614            {
615                throw new CacheError(
616                    "There was an exception when writing to the cache directory: " +
617                    "[" + root + "...sdat].  See cause exception throwable.getCause(); " +
618                    "for details.",
619                    t
620                );
621            }
622        }
623    }
624
625
626    // ********************************************************************************************
627    // ********************************************************************************************
628    // Recurse Source-Code Directory
629    // ********************************************************************************************
630    // ********************************************************************************************
631
632
633    /**
634     * This will generate an {@code 'index.html'} file of all the recently generated
635     * {@code '.html'} files.
636     * 
637     * @param targetDirectory This is simply the save-location for this {@code 'index.html'} file
638     * 
639     * @param fileIndexList This must be a list of file-names that were generated by the 
640     * {@code class HiLiteMe}
641     */
642    public static void createIndex(TreeSet<String> fileIndexList, String targetDirectory)
643        throws IOException
644    {
645        StringBuilder out = new StringBuilder();
646
647        out.append(INDEX_HEADER_HTML);
648        out.append("<UL>");
649
650        for (String f : fileIndexList) out.append(
651            "<LI><A HREF=\"" + f + (f.endsWith(".txt") ? "" : ".html") +
652            "\" TARGET=\"_blank\">\n" + f +"</A></LI>\n"
653        );
654
655        out.append("</UL>");
656        out.append(Params.DEFAULT_ENDING_HTML);
657
658        FileRW.writeFile(out, targetDirectory + "index.html");
659    }
660
661    /**
662     * This will take a {@code FileNode} tree, and iterate through all of it - calling the
663     * {@code http://HiLite.ME/} server for each software file that it finds / discovers and
664     * recognizes.  Each source-code file that is within the {@code FileNode} tree that is passed,
665     * after being <B>pretty-printed</B>, will be saved as a {@code '.html'} file in the
666     * {@link Params#targetDirectory}
667     *
668     * <BR /><BR /><B STYLE="color: red;">IMPORTANT NOTE:</B> The provided java class
669     * {@code FileNode} has some very simple filter operations for making sure that only the
670     * code-files that you want to be transmitted will actually be sent. Each and every file in the
671     * {@link FileNode} tree <I><B>whose {@code 'Code Type'} can be deduced</B></I> (by it's 
672     * file-extension, or via the {@link Params#customMappings}) will be sent to the
673     * {@code HiLite.ME} server for hiliting.   
674     * 
675     * <BR /><BR />It should be easy to call the {@code fileNodeRoot.prune(...)} method to make 
676     * sure you are only transmitting/hiliting the files you want.
677     * 
678     * <BR /><BR /><B>FURTHERMORE:</B> The means by which files are copied from a "Source" 
679     * source-code directory to a "Target" hilited-HTML directory is simply via the
680     * {@code FileRW.copyFile} method.  Because the copy operations preserves the directory-tree
681     * structure of the input source-code file-system tree, <B>it is imperative to use a relative
682     * directory-location</B> when loading the source-code {@code FileNode} tree.
683     * 
684     * @param node This is the root node of a {@code FileNode} directory tree.  Every
685     * operating-system file that is found  inside this tree will have it's code-hilited using the
686     * {@code HiLite.Me} server.
687     * 
688     * @param hlmp Please review the inner-class {@code Params} to configure the extra parameters
689     * for storing and saving the results of the code hiliting operation.
690     * 
691     * @see #prettyPrintScrape(String, String, String, boolean)
692     * @see Params
693     */
694    public static void prettyPrintRecurseTree(FileNode node, Params hlmp) throws IOException
695    {
696        (new File(hlmp.targetDirectory + node.getFullPathName())).mkdirs();
697
698        // This "flattens" the source-code FileNode tree into an Iterator.  The Iterator will not
699        // iterate "FileNodes" - but rather String's - where each String is the relative path
700        // name of the source code file.
701
702        Iterator<String> files = node.getDirContentsFiles(RetTypeChoice.FULLPATH_ITERATOR);
703
704        while (files.hasNext())
705        {
706            String fileName = files.next();
707
708            if (hlmp.verbose) System.out.printf("%1$-80s", fileName);
709
710            // Sometimes it helps to have any text-files copied to output / target directory
711            if (hlmp.copyTextFiles && fileName.toLowerCase().endsWith(".txt"))
712            {
713                // Again, put this in the "Complete List" of files for the "creatIndex" method.
714                // If the user wants an 'index.html' file that lists all of the files that have
715                // been hilited, this list is necessary.
716
717                if (hlmp.fileIndexList != null) hlmp.fileIndexList.add(fileName);
718
719                if (hlmp.verbose)
720                    System.out.println( "\nCOPYING " + C.BCYAN + "TEXT-FILE " + C.RESET);
721
722                // This is why a "relative FileNode" is mandatory.  All that is happening is that
723                // the "Full Path Name" of the source-code file is being appended to the
724                // "Target Directory" name.
725    
726                FileRW.copyFile(fileName, hlmp.targetDirectory + fileName, true);
727
728                // Text files are not actually hilited, just copied.
729                continue;
730            }
731
732            // This checks the file-extension - '.java', '.js', '.html' - and uses that extension
733            // String as a 'Code Type' with the HiLite server.
734            //
735            // If there are files where that system doesn't work - the mapping of a file-extension
736            // to a 'Code Type' should be put into the "Custom Mappings" TreeMap in class
737            // HiLiteMe.Params and passed to this 
738
739            String codeTypeParam = deduceCodeType(fileName, hlmp.customMappings);
740
741            if (codeTypeParam == null)
742            {
743                // Without a 'Code Type' there isn't any way to hilite the file...  Let the user
744                // know and continue
745
746                if (hlmp.verbose)
747                    System.out.println(
748                        "\nUNKNOWN FILE TYPE FOUND: " + C.BCYAN + fileName + C.RESET +
749                        C.BYELLOW + "\nNOT CONVERTING FILE..." + C.RESET
750                    );
751
752                continue; // next source code file loop iteration
753            }
754
755            // Loads the source-code file to a String
756            String fileText     = FileRW.loadFileToStringBuilder(fileName).toString();
757            String hilitedCode  = null;
758
759            // It is always faster to use a Cache with big projects...
760            // If there is a 'Cache' check that first...
761
762            if (hlmp.cache == null)
763
764                // NOTE: The 'true' (last parameter) is passed to "includeLineNumbers"
765                hilitedCode = prettyPrintScrape(fileText, codeTypeParam, hlmp.styleTag, true);
766
767            else
768            {
769                // Check the Cache.  Note that it is pretty efficient, and doesn't do a 
770                // character-by-character check - unless there is an exact hashCode match first.
771                //
772                // NOTE: The 'true' (last parameter) is passed to "includeLineNumbers"
773
774                hilitedCode = hlmp.cache.get(fileText, codeTypeParam, hlmp.styleTag, true);
775
776                // The Cache did not have an exact match for the Source Code File
777                if (hilitedCode == null)
778                {
779                    // NOTE: The 'true' (last parameter) is passed to "includeLineNumbers"
780                    //       This 'true' is the last parameter for both of the method calls below
781
782                    hilitedCode = prettyPrintScrape(fileText, codeTypeParam, hlmp.styleTag, true);
783                    hlmp.cache.checkIn(fileText, hilitedCode, codeTypeParam, hlmp.styleTag, true);
784                }
785            }
786
787            // Now just write the hilited code to disk... Make sure to insert the HTML header
788            // and tail/footer text.
789
790            hilitedCode = hlmp.headerHTML.replace("INSERT HERE", fileName) + 
791                hilitedCode + hlmp.endingHTML;
792
793            FileRW.writeFile(hilitedCode, hlmp.targetDirectory + fileName + ".html");
794
795            // Add this file-name to the list of files that have been converted/hilited
796            // (but only if there is such a list being kep!)  This list can be used by the
797            // method createIndex(TreeSet, String) to create an 'index.html' file
798
799            if (hlmp.fileIndexList != null) hlmp.fileIndexList.add(fileName);
800
801            // Let the user know this file has been successfully hilited.
802            if (hlmp.verbose) System.out.println(C.BRED + "Completed." + C.RESET);
803        }
804
805        // After having iterated all the files in a directory, use recursion to iterate through the
806        // files in any sub directories...
807        //
808        // The iterator of sub-dirs is an 'Iterator<FileNode>' rather than an 'Iterator<String>'
809        // (which was used in the loop above)
810
811        Iterator<FileNode> dirs = node.getDirContentsDirs(RetTypeChoice.ITERATOR);
812
813        while (dirs.hasNext()) prettyPrintRecurseTree(dirs.next(), hlmp);
814    }
815
816    /**
817     * Covenience Method.
818     * <BR />Invokes: {@link #deduceCodeType(String, TreeMap)}
819     */
820    public static String deduceCodeType(String fileName)
821    { return deduceCodeType(fileName, null); }
822
823    /**
824     * This attempts to guess the {@code 'Code Type'} parameter for the file, based on the input
825     * file name. The file is loaded from disk, so be sure to include the full path to the file in
826     * the {@code fileName}.
827     * 
828     * @param fileName The full-path name of a Source-Code / Software file.  It will be loaded from
829     * disk, and transmitted to {@code HiLite.ME} servers and <B>"pretty-printed"</B> into HTML.
830     * 
831     * @param customMappings This parameter may be null, and if it is, then it is ignored.  If this
832     * parameter is not null, then the file-extension obtained from the {@code String 'fileName'}
833     * field will be "looked up" in the {@code TreeMap}, and if it matches one of these
834     * custom-mappings of file-extension to {@code HiLite.ME} code-type parameters, that
835     * {@code 'Code Type'} parameter will be used in the call to the Pretty Print Server.
836     * 
837     * @return the {@code 'Code Type'} that is associated with the file-name extension - or null if
838     * there is no file-name extension, or the file-name extension is not found.
839     */
840    public static String deduceCodeType(String fileName, TreeMap<String, String> customMappings)
841    {
842        int extStartPos = fileName.lastIndexOf('.');
843
844        if (extStartPos == -1) return null;
845
846        String ext = fileName.substring(extStartPos + 1);
847
848        if (ext == null) return null;
849
850        if ((customMappings != null) && customMappings.containsKey(ext))
851            return customMappings.get(ext);
852
853        return isCodeType(ext) ? ext : null;
854    }
855
856
857    // ********************************************************************************************
858    // ********************************************************************************************
859    // Scrape / Pretty Print Methods
860    // ********************************************************************************************
861    // ********************************************************************************************
862
863
864    /**
865     * Convenience Method.
866     * <BR />Invokes: {@link #prettyPrintScrapeToVector(String, String, String, boolean)}
867     * <BR />And: {@link Util#pageToString(Vector)}
868     * <BR />Returns: HTML as a {@code java.lang.String}
869     * <BR />No cache is expected.
870     */
871    public static String prettyPrintScrape
872        (String codeText, String codeTypeParam, String styleTypeParam, boolean includeLineNumbers)
873        throws IOException
874    {
875        // NOTE: Util.pageToString(...) accepts a 'Vector<HTMLNode>' and returns the input HTML
876        //       as a java.lang.String
877
878        return Util.pageToString(
879
880            // NOTE: prettyPrintScrapeToVector Inserts HTML <SPAN> Elements into PLAIN-TEXT Source
881            //       code.  Afterwards it converts the HTML from a 'String' to a 'Vector<HTMLNode>'
882
883            prettyPrintScrapeToVector
884                (codeText, codeTypeParam, styleTypeParam, includeLineNumbers)
885        );
886    }
887
888    /**
889     * Convenience Method.
890     * <BR />Invokes: {@link #prettyPrintScrapeToVector(String, String, String, boolean, Cache)}
891     * <BR />And: {@link Util#pageToString(Vector)}
892     * <BR />Returns: HTML as a {@code java.lang.String}
893     * <BR />Cache required.
894     */
895    public static String prettyPrintScrape(
896            String codeText, String codeTypeParam, String styleTypeParam,
897            boolean includeLineNumbers, Cache cache
898        )
899        throws IOException
900    {
901        // NOTE: Util.pageToString(...) accepts a 'Vector<HTMLNode>' and returns the input HTML
902        //       as a java.lang.String
903
904        return Util.pageToString(
905
906            // NOTE: prettyPrintScrapeToVector Inserts HTML <SPAN> Elements into PLAIN-TEXT Source
907            //       code.  Afterwards it converts the HTML from a 'String' to a 'Vector<HTMLNode>'
908            //
909            // ALSO: There are two versions of the method 'prettyPrintScrapeToVector.'  The one
910            //       used here accepts and make-use-of a Cache.  In the method above, that version
911            //       of this method DOES NOT utilize the Cache.
912
913            prettyPrintScrapeToVector
914                (codeText, codeTypeParam, styleTypeParam, includeLineNumbers, cache)
915        );
916    }
917
918    /**
919     * Convenience Method.
920     * <BR />Invokes: {@link #prettyPrintScrapeToVector(String, String, String, boolean, Cache)}
921     * <BR />And: {@link #simplifyColorSpans(Vector, String)}
922     * <BR />And: {@link Util#pageToString(Vector)}
923     * <BR />Returns: HTML as a {@code java.lang.String}
924     * <BR />No cache is expected.
925     */
926    public static String prettyPrintScrapeAndSimplify
927        (String codeText, String codeTypeParam, String styleTypeParam, boolean includeLineNumbers)
928        throws IOException
929    {
930        // NOTE: Util.pageToString(...) accepts a 'Vector<HTMLNode>' and returns the input HTML
931        //       as a java.lang.String
932
933        return Util.pageToString(
934
935            // The method 'simplifyColorSpans' modifies the HTML that is received from the HiLite
936            // Server, and replaces each of the In-Line Style-Spans so that rather than containing
937            // 'STYLE= ...' HTML Attributes, they contains a brief 'CLASS=...'.  This makes the
938            // output HTML look cleaner, nicer, and easier to manage / modify.
939
940            simplifyColorSpans(
941
942                // NOTE: prettyPrintScrapeToVector Inserts HTML <SPAN> Elements into PLAIN-TEXT
943                //       Source-Code.  Afterwards it converts the HTML from a 'String' to a
944                //       'Vector<HTMLNode>'
945    
946                prettyPrintScrapeToVector
947                    (codeText, codeTypeParam, styleTypeParam, includeLineNumbers),
948
949                // simplifyColorSpans(...) needs to know which style-parameter was passed to the HiLite
950                // server in order convert the STYLE Assignments into CSS Classes.
951        
952                styleTypeParam
953            ));
954    }
955
956    /**
957     * Convenience Method.
958     * <BR />Invokes: {@link #prettyPrintScrapeToVectorAndSimplify(String, String, String, boolean, Cache)}
959     * <BR />And: {@link Util#pageToString(Vector)}
960     * <BR />Returns: HTML as a {@code java.lang.String}
961     * <BR />Cache required.
962     */
963    public static String prettyPrintScrapeAndSimplify(
964            String codeText, String codeTypeParam, String styleTypeParam,
965            boolean includeLineNumbers, Cache cache
966        )
967        throws IOException
968    {
969        // NOTE: Util.pageToString(...) accepts a 'Vector<HTMLNode>' and returns the input HTML
970        //       as a java.lang.String
971
972        return Util.pageToString(
973
974            // Method 'prettyPrintScrapeToVectorAndSimplify' does:
975            //
976            // ONE: Invokes the HiLite Server, and Converts the HTML-String which is returned by
977            ///     that server (or found inside the Cache), and converts that HTML-String to a
978            //      Vector<HTMLNode>
979            //
980            // TWO: Replaces all inline <SPAN STYLE='...'> elements to <SPAN CLASS='...'> in order
981            //      to make the HTML nicer looking, more compact, and easier to control
982 
983            prettyPrintScrapeToVectorAndSimplify
984                (codeText, codeTypeParam, styleTypeParam, includeLineNumbers, cache)
985        );
986    }
987
988    /**
989     * Convenience Method.
990     * <BR />Invokes: {@link #prettyPrintScrapeToVector(String, String, String, boolean, Cache)}
991     * <BR />And: {@link #simplifyColorSpans(Vector, String)}
992     * <BR />Returns: {@code Vector<HTMLNode>}
993     * <BR />No cache is expected.
994     */
995    public static Vector<HTMLNode> prettyPrintScrapeToVectorAndSimplify
996        (String codeText, String codeTypeParam, String styleTypeParam, boolean includeLineNumbers)
997        throws IOException
998    {
999        // The method 'simplifyColorSpans' modifies the HTML that is received from the HiLite
1000        // Server, and replaces each of the In-Line Style-Spans so that rather than containing
1001        // 'STYLE= ...' HTML Attributes, they contains a brief 'CLASS=...'.  This makes the
1002        // output HTML look cleaner, nicer, and easier to manage / modify.
1003
1004        return simplifyColorSpans(
1005
1006            // The method 'prettyPrintScrapeToVector' sends the 'codeText' to the HiLite Server,
1007            // and receives the HiLited Source-Code HTML.  Afterwards it simple converts the HTML
1008            // which it received as a java.lang.String *INTO* a Vector<HTMLNode>
1009
1010            prettyPrintScrapeToVector
1011                (codeText, codeTypeParam, styleTypeParam, includeLineNumbers),
1012
1013            // simplifyColorSpans(...) needs to know which style-parameter was passed to the HiLite
1014            // server in order convert the STYLE Assignments into CSS Classes.
1015
1016            styleTypeParam
1017        );
1018    }
1019
1020    /**
1021     * This will take a {@code java.lang.String} of code - in almost any coding language - and
1022     * generate an HTML-"ified" version of that code.  This is often called "Pretty-Printing" code.
1023     * The software-platform/engine that does the transformation of source code to "nice-looking
1024     * HTML code" is just a site called <A HREF="http://HiLite.me/" TARGET="_blank">
1025     * http://HiLite.me</A>.
1026     * 
1027     * <BR /><BR />This method takes three parameters, and the first is the code itself (passed as a
1028     * {@code java.lang.String.})  The second is the "descriptor" - which is a short
1029     * text-{@code String} that identifies what type (programming-language) is being passed to
1030     * {@code HiLite.ME}.  The complete list of available {@code Code Types} can be found in the 
1031     * Raw HTML of {@code HiLite.ME's} main page.
1032     * 
1033     * <BR /><BR /><UL CLASS="JDUL">
1034     *  <LI>{@code 'java'} =&gt; Java Code!</LI>
1035     *  <LI>{@code 'css'} =&gt; CSS - Cascading Style Sheets</LI>
1036     *  <LI>{@code 'js'} =&gt; Java-Script code</LI>
1037     *  <LI>The list contains at least 75 languages, select "View Code Source" in Google-Chrome or
1038     *      Internet-Explorer to see complete set of options!</LI>
1039     * </UL>
1040     * 
1041     * <BR /><BR />
1042     * The third parameter is a {@code 'style'} name / type parameter.  Again, view the
1043     * <A HREF="http://HiLite.ME" TARGET="_blank"> http://HiLite.ME</A> website to see the complete
1044     * list of available styles.  My preferred / defaults are {@code 'vim'} and {@code 'native'}
1045     *
1046     * @param codeText This is the software-code (like CSS, Java, SQL, Python, etc..) saved as a
1047     * single text-{@code String}
1048     * 
1049     * @param codeTypeParam <EMBED CLASS="external-html" DATA-FILE-ID=HLMCODETP>
1050     * 
1051     * @param styleTypeParam <EMBED CLASS="external-html" DATA-FILE-ID=HLMSTYLETP>
1052     * 
1053     * @param includeLineNumbers When this parameter receives <B>TRUE</B>, line-numbers are
1054     * appended to the HTML output.
1055     * 
1056     * @return The HTML is returned as a {@code Vector<HTMLNode>}<BR /><BR />
1057     * 
1058     * <EMBED CLASS="external-html" DATA-FILE-ID=HLMPPSRET>
1059     * 
1060     * @throws IOException If there are any problems communicating with the HiLite Server.
1061     * 
1062     * @see Torello.Java.URLs#toProperURLV2(String)
1063     * @see HTMLPage#getPageTokens(BufferedReader, boolean)
1064     * @see InnerTagGetInclusive
1065     * @see TagNodeRemoveInclusive
1066     */
1067    public static Vector<HTMLNode> prettyPrintScrapeToVector
1068        (String codeText, String codeTypeParam, String styleTypeParam, boolean includeLineNumbers)
1069        throws IOException
1070    {
1071        codeText = URLs.toProperURLV2(codeText);
1072
1073        URL             url         = new URL("http://hilite.me");
1074        URLConnection   connection  = url.openConnection();
1075
1076        connection.setDoOutput(true);  
1077        
1078        OutputStreamWriter out = new OutputStreamWriter(connection.getOutputStream(), "UTF-8");
1079
1080        out.write(
1081            "lexer=" + codeTypeParam + 
1082            "&style=" + styleTypeParam + 
1083            (includeLineNumbers ? "&linenos=false" : "") +
1084            "&divstyles=border:solid gray;border-width:.1em .1em .1em .8em;padding:.2em .6em;" +
1085            "&code=" + codeText 
1086        );
1087        out.flush();   
1088
1089        BufferedReader br = new BufferedReader
1090            (new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));
1091
1092        // Read the HTML String response and Parse the JTML into Vectorized-HTML.
1093        Vector<HTMLNode> v = HTMLPage.getPageTokens(br, false);
1094
1095        // First, find the divider where the HiLited Code is located.  It still has some extra
1096        // bluff-and-bluster, fluff-and-fanfare, a bunch-of-crap to filter.
1097
1098        // The response is inside of the <DIV ID=preview> ... </DIV>
1099        // However, there is another 'outer-div' that contains an Inline CSS Style Attribute. The
1100        // inner <DIV> ... </DIV> is the one to return.  If there are line-numbers, that inner
1101        // <DIV> will have a <TABLE> that has one row with two columns.  The first column will have
1102        // all of the line-numbers, and the second column will have the Hi-Lited Source Code.
1103
1104        int idDIVPos = InnerTagFind.first(v, "div", "id", TextComparitor.EQ_CI, "preview");
1105
1106        v = TagNodeGetInclusive.first(v, idDIVPos + 1, -1, "div");
1107
1108        /*
1109        if (includeLineNumbers)
1110        {
1111            int end = v.size();
1112            System.out.println(
1113                "******* DEBUG *******\n" +
1114                Debug.print(v, 0, ((end < 20) ? end : 20), Debug::K) +
1115                "******* DEBUG *******\n" +
1116                Debug.print(v, ((end < 20) ? 0 : (end - 20)), end, Debug::K)
1117            );
1118            if (! Q.YN("continue?")) System.exit(0);
1119        }
1120        */
1121
1122        return v;
1123    }
1124
1125    /**
1126     * This performs an identical operation to method: {@code prettyPrintScrapeToVector(String,
1127     * String, String)}, but it checks the provided {@code cache} first, before querying the
1128     * server.
1129     * 
1130     * @param codeText This is the software-code (like CSS, Java, SQL, Python, etc..) saved as a
1131     * single text-{@code String}.
1132     * 
1133     * @param codeTypeParam <EMBED CLASS="external-html" DATA-FILE-ID=HLMCODETP>
1134     * 
1135     * @param styleTypeParam <EMBED CLASS="external-html" DATA-FILE-ID=HLMSTYLETP>
1136     * 
1137     * @param includeLineNumbers When this parameter receives <B>TRUE</B>, line-numbers are
1138     * appended to the HTML output.
1139     * 
1140     * @param cache This is the cache that must be passed.  Read about the {@link HiLiteMe.Cache}
1141     * {@code static inner class} for an explanation about how caching results from the server
1142     * works.  Save time on the build by caching results that have not changed.
1143     * 
1144     * @return The HTML is returned as a {@code Vector<HTMLNode>}<BR /><BR />
1145     * 
1146     * <EMBED CLASS="external-html" DATA-FILE-ID=HLMPPSRET>
1147     * 
1148     * @throws IOException If there are any problems communicating with the HiLite Server.
1149     * 
1150     * @see #prettyPrintScrapeToVector(String, String, String, boolean)
1151     * @see HTMLPage#getPageTokens(CharSequence, boolean)
1152     */
1153    public static Vector<HTMLNode> prettyPrintScrapeToVector(
1154            String codeText, String codeTypeParam, String styleTypeParam,
1155            boolean includeLineNumbers, Cache cache
1156        )
1157        throws IOException
1158    {
1159        // FIRST: Check the Cache, to see if the exact String has been hilited!
1160        String ret = cache.get(codeText, codeTypeParam, styleTypeParam, includeLineNumbers);
1161
1162        // If there was a Cache hit -> return that String rather than querying the server.
1163        if (ret != null) return HTMLPage.getPageTokens(ret, false);
1164
1165        // NO? Then query the server.
1166        Vector<HTMLNode> retVec = prettyPrintScrapeToVector
1167            (codeText, codeTypeParam, styleTypeParam, includeLineNumbers);
1168
1169        // Make sure to save the response in the Cache for next time
1170        cache.checkIn(
1171            codeText, Util.pageToString(retVec), codeTypeParam, styleTypeParam,
1172            includeLineNumbers
1173        );
1174
1175        return retVec;
1176    }
1177
1178    /**
1179     * This performs an identical operation to method: {@code prettyPrintScrapeToVector(String,
1180     * String, String)}, but it checks the provided {@code cache} first, before querying the
1181     * server.  Furthermore, the HTML that is returned has been <B>simplified</B>, using the
1182     * method {@link #simplifyColorSpans(Vector, String)}.
1183     * 
1184     * <BR /><BR />Using this version of <B>Pretty Print</B> mandates that the user provide a
1185     * Cache.
1186     * 
1187     * @param codeText This is the software-code (like CSS, Java, SQL, Python, etc..) saved as a
1188     * single text-{@code String}.
1189     * 
1190     * @param codeTypeParam <EMBED CLASS="external-html" DATA-FILE-ID=HLMCODETP>
1191     * 
1192     * @param styleTypeParam <EMBED CLASS="external-html" DATA-FILE-ID=HLMSTYLETP>
1193     * 
1194     * @param includeLineNumbers When this parameter receives <B>TRUE</B>, line-numbers are
1195     * appended to the HTML output.
1196     * 
1197     * @param cache This is the cache that must be passed.  Read about the {@link HiLiteMe.Cache}
1198     * {@code static inner class} for an explanation about how caching results from the server
1199     * works.  Save time on the build by caching results that have not changed.
1200     * 
1201     * @return The HTML is returned as a {@code Vector<HTMLNode>}<BR /><BR />
1202     * 
1203     * <EMBED CLASS="external-html" DATA-FILE-ID=HLMPPSRET>
1204     * 
1205     * @throws IOException If there are any problems communicating with the HiLite Server.
1206     * 
1207     * @see #prettyPrintScrapeToVector(String, String, String, boolean)
1208     * @see HTMLPage#getPageTokens(CharSequence, boolean)
1209     * @see #simplifyColorSpans(Vector, String)
1210     */
1211    public static Vector<HTMLNode> prettyPrintScrapeToVectorAndSimplify(
1212            String codeText, String codeTypeParam, String styleTypeParam,
1213            boolean includeLineNumbers, Cache cache
1214        )
1215        throws IOException
1216    {
1217        // FIRST: Check the Cache, to see if the exact String has been hilited!
1218        String ret = cache.get(codeText, codeTypeParam, styleTypeParam, includeLineNumbers);
1219
1220        // If there was a Cache hit -> return that String rather than querying the server.
1221        if (ret != null) return HTMLPage.getPageTokens(ret, false);
1222
1223        // NO? Then query the server.
1224        Vector<HTMLNode> retVec =
1225            prettyPrintScrapeToVector(codeText, codeTypeParam, styleTypeParam, includeLineNumbers);
1226
1227        // Replace all of the STYLE=... attributes to CLASS=... attributes
1228        simplifyColorSpans(retVec, styleTypeParam);
1229
1230        // Make sure to save the response in the Cache for next time
1231        cache.checkIn(
1232            codeText, Util.pageToString(retVec), codeTypeParam, styleTypeParam,
1233            includeLineNumbers
1234        );
1235
1236        return retVec;
1237    }
1238
1239
1240    // ********************************************************************************************
1241    // ********************************************************************************************
1242    // Simplify Color Spans
1243    // ********************************************************************************************
1244    // ********************************************************************************************
1245
1246
1247    /**
1248     * Will return a {@code java.lang.String} containing all of the {@code CSS STYLE} definitions
1249     * for a particular {@code 'Style Type'}.
1250     * 
1251     * <BR /><BR /><B>NOTE:</B> These definitions are only useful if the HiLited Source Code that
1252     * you have used has been simplified from <B>in-line {@code 'STYLE=...'} attributes</B> into 
1253     * a <B>{@code CLASS=...}</B> version of the hiliting.
1254     * 
1255     * @param styleParam This may be any of the valid {@code 'Style Tags'} available for hiliting
1256     * source-code.  The complete list of {@code 'Style Tags'} may be viewed here:
1257     * 
1258     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteStyleCodes.html'>
1259     * HiLiting Style Codes</A></B>
1260     * 
1261     * @return The {@code CSS Class} definitions as a {@code String}
1262     * 
1263     * <BR /><BR /><B>NOTE:</B> The {@code RETURN VALUES} (as {@code String's}) can actually be
1264     * viewed right here by clicking the link below.  Each {@code 'Style Type'} is followed by
1265     * a list of <B>CSS Definitions</B> that must be included in order for the Code HiLiting to
1266     * work (if-and-only-if) you have opted to use the "Simplified Color Spans."
1267     * 
1268     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/StyleTagCSSDefinitions.html'>
1269     * Style Tag CSS Definitions</A></B>
1270     * 
1271     * @throws IllegalArgumentException If an invalid {@code 'Style Type'} has been passed to the
1272     * {@code 'styleParam'} parameter.
1273     */
1274    @SuppressWarnings("unchecked")
1275    public static String styleParamCSSClasses(String styleParam)
1276    {
1277        if (! isStyleType(styleParam)) throw new IllegalArgumentException
1278            ("The Style Type Parameter passed [" + styleParam + "] is not a valid Style Type");
1279
1280        if (styleCSSDefinitions == null)
1281            styleCSSDefinitions = (TreeMap<String, String>) LFEC.readObjectFromFile_JAR
1282                (Torello.Data.DataFileLoader.class, "data20.tmdat", true, TreeMap.class);
1283
1284        return styleCSSDefinitions.get(styleParam);
1285    }
1286
1287    /**
1288     * This may be called from the command line.  It will print the CSS Definitions for a 
1289     * {@code 'Style Type'} parameter.  The complete list of valid {@code Style Types} may be
1290     * viewed here:
1291     * 
1292     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/StyleTagCSSDefinitions.html'>
1293     * Style Tag CSS Definitions</A></B>
1294     */
1295    public static void main(String[] argv)
1296    {
1297        if (argv.length != 1)
1298            System.out.println("Pass a Style Type as an argument to this main method.");
1299
1300        if (! isStyleType(argv[0]))
1301            System.out.println(
1302                "You have not passed a valid Style Type to this main method.  Please view " +
1303                "the documentation to see the list of valid Style Type's."
1304            );
1305
1306        System.out.println(styleParamCSSClasses(argv[0]));
1307    }
1308
1309    /**
1310     * <EMBED CLASS='external-html' DATA-FILE-ID=HLMSIMPLIFY>
1311     * 
1312     * @param page This should be Vectorized-HTML that was produced by the
1313     * {@code 'prettyPrintScrapeToVector'} method.
1314     *
1315     * @param styleTypeParam This has to be the same value for {@code 'styleTypeParam'} that was
1316     * used to convert the HTML for the {@code 'page'} parameter.  If a different
1317     * {@code 'Style Type'} is accidentally passed to this method, none of the HTML {@code <SPAN>}
1318     * elements will be replaced.
1319     * 
1320     * @return The original page {@code Vector<HTMLNode>}
1321     * 
1322     * @throws IllegalArgumentException If the passed {@code 'styleTypeParam'} is not recognized
1323     * by the internal list of available {@code 'Style Types'}.
1324     */
1325    @SuppressWarnings("unchecked")
1326    public static Vector<HTMLNode> simplifyColorSpans(Vector<HTMLNode> page, String styleTypeParam)
1327    {
1328        if (! isStyleType(styleTypeParam)) throw new IllegalArgumentException(
1329            "The passed 'styleTypeParam' value [" + styleTypeParam + "] is not a registered " +
1330            "Style Type Code known by this class."
1331        );
1332
1333        // Lazy
1334        if (allMaps == null)
1335        {
1336            allMaps = 
1337                (TreeMap<String, TreeMap<String, TagNode>>)
1338                LFEC.readObjectFromFile_JAR
1339                (Torello.Data.DataFileLoader.class, "data19.tmdat", true, TreeMap.class);
1340        }
1341
1342        // Retrieves the a TreeMap that maps STYLE-ATTRIBUTE values (retrieved from <SPAN>
1343        // elements) directly to replacement TagNode's that use a CLASS=... instead.
1344
1345        TreeMap<String, TagNode> map = allMaps.get(styleTypeParam);
1346
1347        // This should never happen, but if it does, this error is better than NullPointerException
1348        if (map == null) throw new InternalError(
1349            "The style parameter you have passed IS VALID, but unfortunately, the map file for " +
1350            "that style is not loading properly."
1351        );
1352
1353        // Retrieve all HTML "<SPAN STYLE=...>" elements. Specifically, retrieve all <SPAN>
1354        // that actually contain a STYLE attribute.  Afterwards, retrieve all of the values of
1355        // that style-element, and store that in the 'styles' String-Array.
1356
1357        int[]       spans       = InnerTagFind.all(page, "span", "style");
1358        String[]    styles      = Attributes.retrieve(page, spans, "style");
1359        TagNode     replacement = null;
1360
1361        // Minor optimization that's inside the data-file.  The "STYLE='color: #123456;...'
1362        // eliminates the leading characters "color: #" in the style-attribute.  Here we need
1363        // to do the EXACT-SAME removal, or else the lookup-table will not find that
1364        // style-attribute.
1365
1366        for (int i=0; i < styles.length; i++)
1367            if (styles[i].startsWith("color: #"))
1368                styles[i] = styles[i].substring("color: #".length());
1369
1370        // Replace all of the <SPAN STYLE=...> TagNode's with the simplified TagNode's.
1371
1372        for (int i=0; i < spans.length; i++)
1373            if ((replacement = map.get(styles[i])) != null)
1374                page.setElementAt(replacement, spans[i]);
1375
1376        removeDuplicateColorSpans(page);
1377
1378        return page;
1379    }
1380
1381    private static final Pattern P = Pattern.compile("H(\\d{1,3})");
1382
1383    // The HiLite.Me server has many places where a <SPAN>...</SPAN> of code is redundant, and 
1384    // should be removed.
1385    // It usually looks like the following (PAY CLOSE ATTENTION!)
1386    // 
1387    // <span style="color: #d0d0d0">removeArr</span> <span style="color: #d0d0d0">=</span>
1388    //
1389    // The above <SPAN> is FIRST simplified to (in the 'simplifyColorSpans') to:
1390    //
1391    // <span style=H10>removeArr</span> <span style=H10>=</span>
1392    // 
1393    // AND THEN SIMPLIFIED TO: (by this method)
1394    //
1395    // <SPAN STYLE=10>removeArr =</SPAN>
1396    private static int removeDuplicateColorSpans(Vector<HTMLNode> v)
1397    {
1398        HNLIInclusive       iter        = InnerTagInclusiveIterator.get(v, "span", "class", P);
1399        IntStream.Builder   b           = IntStream.builder();
1400        DotPair             prev        = new DotPair(0, v.size() - 1);  // Non-sense initialization
1401        int                 prevClass   = -1; // Non-sense initialization
1402
1403        while (iter.hasNext())
1404        {
1405            DotPair         cur         = iter.nextDotPair();
1406            TagNode         tn          = (TagNode) v.elementAt(cur.start);
1407            int             curClass    = Integer.parseInt(tn.AV("class").substring(1));
1408            boolean         isClear     = prev.end < cur.start;
1409            HTMLNode        n;
1410
1411            for (int i=(prev.end+1); isClear && (i < (cur.start-1)); i++)
1412                if (((n = v.elementAt(i)) instanceof TagNode) || (n.str.trim().length() > 0))
1413                    isClear = false;
1414
1415            if (isClear && (curClass == prevClass))
1416            { b.accept(prev.end); b.accept(cur.start); }
1417
1418            prev        = cur;
1419            prevClass   = curClass;
1420        }
1421
1422        int[] removeArr = b.build().toArray();
1423        Util.removeNodesOPT(v, removeArr);
1424
1425        return removeArr.length;
1426    }
1427}