001package Torello.Java;
002
003import java.io.*;
004import java.util.*;
005import java.net.*;
006import javax.net.ssl.*;
007
008import java.nio.charset.Charset;
009import java.util.regex.Pattern;
010import java.util.stream.IntStream;
011
012import Torello.HTML.*;
013import Torello.HTML.NodeSearch.*;
014
015import static Torello.Java.C.*;
016
017import Torello.Java.FileNode;
018
019import Torello.Java.Additional.RemoveUnsupportedIterator;
020import Torello.Java.Additional.Ret2;
021import Torello.Java.Additional.URLs;
022
023/**
024 * Wraps a basic on-line syntax-hiliter named <CODE>HiLite&#46;ME</CODE> (which, itself, wraps the
025 * on-line hiliter <CODE>pygments&#46;org</CODE>).
026 * 
027 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=HILITEME>
028 */
029public class HiLiteMe
030{
031    // no constructors
032    private HiLiteMe() { }
033
034
035    // ********************************************************************************************
036    // HiLite.ME parameter-tags stored internally.
037    // ********************************************************************************************
038
039    @SuppressWarnings("unchecked")
040    private static final Vector<Object> dataFile = (Vector<Object>) LFEC.readObjectFromFile_JAR
041        (HiLiteMe.class, "data-files/HLMDataFile.vdat", true, Vector.class);
042
043    @SuppressWarnings("unchecked")
044    private static final TreeMap<String, String> codeTypeDescriptions =
045        (TreeMap<String, String>) dataFile.elementAt(0);
046
047    @SuppressWarnings("unchecked")
048    private static final Vector<String> styleTypes = (Vector<String>) dataFile.elementAt(1);
049
050    // This is only loaded from disk if "SimplifySpans" method is invoked.  It is loaded only
051    // once.  The only location where it can be loaded is inside 'simplifyColorSpans'
052    // LAZY-LOADING
053    private static TreeMap<String, TreeMap<String, TagNode>> allMaps = null;
054
055    // This is only loaded from disk if the styleParamCSSClasses(String) method is called.
056    // LAZY-LOADING
057    private static TreeMap<String, String> styleCSSDefinitions = null;
058
059    /**
060     * Each time a piece of code is to be pretty-printed, {@code HiLite.ME} expects to receive a
061     * "type of software" or "type of code" descriptor {@code String} that identifies what type of
062     * textual-code it is receiving.  There are exactly 266 different types of software files that
063     * may be passed to the {@code HiLite.ME} server.  These {@code String}-tag for these 
064     * {@code 'Code Types'} may be viewed here.
065     * 
066     * <BR /><BR />Click the link below to see the complete list of programming-type codes.
067     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteProgrammingLanguages.html'>
068     * Programming Language Codes</A></B>
069     * 
070     * @return An {@code Iterator<String>} that produces each {@code String}-tag that may be passed
071     * as a {@code 'Code Type'} to {@code http://HiLite.ME}
072     */
073    public static Iterator<String> getCodeTypes()
074    {
075        // The 'RemoveUnsupportedIterator' wrapper class prohibits modifications to this TreeMap
076        return new RemoveUnsupportedIterator<String>(codeTypeDescriptions.keySet().iterator());
077    }
078
079    /**
080     * This will iterate over the full-name descriptions of the software types available for
081     * parsing with the {@code HiLite.ME} server
082     * 
083     * @return An {@code Iterator<String>} that produces a {@code String}-description of each
084     * software-types available for parsing.
085     */
086    public static Iterator<String> getCodeTypeDescriptions()
087    {
088        // The 'RemoveUnsupportedIterator' wrapper class prohibits modifications to this TreeMap
089        return new RemoveUnsupportedIterator<String>(codeTypeDescriptions.values().iterator());
090    }
091
092    /**
093     * This will iterate over the "Defining Style-Output Types" available to users of the
094     * {@code http://HiLite.ME} server.
095     * 
096     * <BR /><BR />Click the link below to see the complete list of {@code 'Style Codes'}
097     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteStyleCodes.html'>
098     * HiLiting Style Codes</A></B>
099     * 
100     * @return An {@code Iterator<String>} over the different available {@code String}-tags that
101     * may be passed as a {@code 'Style Tag'} when performing a <B>"pretty print"</B> operation.
102     */
103    public static Iterator<String> getStyleTypes()
104    {
105        // The 'RemoveUnsupportedIterator' wrapper class prohibits modifications to this Vector
106        return new RemoveUnsupportedIterator<String>(styleTypes.iterator());
107    }
108
109    /**
110     * Returns the description for a specific {@code 'Code Type'}
111     * @return the long-form of the {@code 'codeType'} as a java {@code String}
112     */
113    public static String getCodeDescription(String codeType)
114    { return codeTypeDescriptions.get(codeType); }
115
116    /**
117     * Checks whether the passed {@code String}-parameter is a recognized {@code 'Code Type'}
118     * 
119     * <BR /><BR />Click the link below to see the complete list of programming-type codes.
120     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteProgrammingLanguages.html'>
121     * Programming Language Codes</A></B>
122     * 
123     * @param s This may be any {@code java.lang.String}.  It is intended to be one of the listed
124     * {@code 'Code Types'} available for use with the {@code HiLite.ME} server.
125     * 
126     * @return This will return {@code TRUE} if the passed {@code String}-tag is one of the tags 
127     * listed with the {@code HiLite.ME} server for <I>Software Types</I>, or {@code 'Code Types'}
128     * - and {@code FALSE} otherwise.  Use the {@code Iterator} to get the complete list of
129     * available {@code 'Code Tags'} (or click the link, above, to view them in this browser).
130     * 
131     * @see #getCodeTypes()
132     * @see #getCodeTypeDescriptions()
133     */
134    public static boolean isCodeType(String s)
135    { return codeTypeDescriptions.containsKey(s); }
136
137    /**
138     * Checks whether the passed {@code String}-parameter is a recognized {@code 'Style Type'}
139     * 
140     * <BR /><BR />Click the link below to see the complete list of {@code 'Style Codes'}
141     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteStyleCodes.html'>
142     * HiLiting Style Codes</A></B>
143     * 
144     * @param s This may be any {@code java.lang.String}.  It is intended to be one of the listed
145     * {@code 'Style Types'} available for use with the {@code HiLite.ME} server.
146     * 
147     * @return This will return {@code TRUE} if the passed {@code String}-tag is one of the tags
148     * listed with the {@code HiLite.ME} server for <I>Style Types</I>, and {@code FALSE} otherwise.
149     * Use the {@code Iterator} to get the complete list of available {@code 'Style Tags'} (or
150     * click the link, above, to view them in this browser).
151     * 
152     * @see #getStyleTypes()
153     */
154    public static boolean isStyleType(String s) { return styleTypes.contains(s); }
155
156
157    // ********************************************************************************************
158    // Here are the local variables and classes
159    // ********************************************************************************************
160
161    /**
162     * If you choose to call the method {@code createIndex(TreeSet<String> fileIndexList, String
163     * targetDirectory)}, then this {@code String} will be used as the header for that file.
164     * 
165     * <BR /><BR /><B>NOTE:</B> It may be changed, as it has not been declared {@code 'final'}.
166     */
167    public static String INDEX_HEADER_HTML =
168        "<HTML>\n" +
169        "<HEAD>\n" +
170        "<TITLE>HiLite.ME Index</TITLE>\n" +
171        "<META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" +
172        "<STYLE TYPE=\"text/css\">\n" +
173        "A                      { color: black; text-decoration: none;  }\n" +
174        "UL LI:Nth-Child(odd)   { background: lightgray;                }\n" +
175        "UL LI:Nth-Child(even)  { background: white;                    }\n" +
176        "UL LI                  { padding: 5 5 5 5;                     }\n" +
177        "UL                     { max-width: 60%;                       }\n" +
178        "</STYLE>\n" +
179        "</HEAD>\n" +
180        "<BODY>\n" +
181        "<H2>HiLite.ME Code</H2>\n";
182
183    // ********************************************************************************************
184    // Here is class Params
185    // ********************************************************************************************
186
187    /**
188     * Inner class for providing a list of parameters when hiliting multiple files on disk.
189     * 
190     * <BR /><BR />
191     * <EMBED CLASS='external-html' DATA-FILE-ID=HLMP>
192     */
193    public static class Params
194    {
195        /**
196         * The default value for {@code 'headerHTML'}  This value may be reset by accessing the
197         * field {@code 'headerHTML'}, which is a {@code 'public'} field in this class.
198         */
199        public static final String DEFAULT_HEADER_HTML =
200            "<HTML>\n<HEAD>\n<META http-equiv=\"Content-Type\" content=\"text/html; " +
201            "charset=utf-8\" />\n<TITLE>INSERT HERE</TITLE>\n</HEAD>\n<BODY>\n";
202
203        /**
204         * The default value for {@code 'endingHTML'}  This value may be reset by accessing the
205         * field {@code 'endingHTML'}, which is a {@code 'public'} field in this class.
206         */
207        public static final String DEFAULT_ENDING_HTML = "\n</BODY>\n</HTML>\n";
208    
209        /**
210         * The style parameter to be used with {@code HiLite.ME}  View the list of available
211         * {@code Style Tags} using {@link HiLiteMe#getStyleTypes()}
212         */
213        public String styleTag = "native";
214
215        /**
216         * When this is {@code TRUE}, {@code System.out} will print a line each time a file in the
217         * {@code FileNode} tree is visited and sent to the pretty-printing HiLite servers.
218         */
219        public boolean verbose = true;
220
221        /** 
222         * This is the target directory for where the output {@code '.html'} files will be
223         * sent.
224         */
225        public String targetDirectory = "";
226
227        /**
228         * This is the header HTML that is inserted above each post-processed / pretty-printed
229         * source-code file that is received from {@code HiLite.ME} servers.  The default version
230         * includes a {@code META-UTF8} clause, because often higher-level {@code Uni-Code}
231         * characters from Mandarin Chinese, Spanish, Korean, Vietnamese and other foreign
232         * languages are in (the writer, Torello's code).
233         *
234         * <BR /><BR /><B>NOTE:</B> The header must have a sub-string that says (in all caps)
235         * {@code "INSERT HERE"} - the file name will be inserted.
236         */
237        public String headerHTML = DEFAULT_HEADER_HTML;
238
239        /**
240         * This is the HTML that is appended to each pretty-printed source-code file received from
241         * {@code HiLite.ME} servers.  The default version of this {@code java.lang.String} simply
242         * contains {@code <BODY>, </HTML>} - <I>change this if necessary!</I>.
243         */
244        public String endingHTML = DEFAULT_ENDING_HTML;
245
246        /**
247         * This will store each file that is successfully converted into a "Pretty Printed HTML"
248         * file in the passed parameter {@code TreeSet}.  Java's {@code TreeSet} stores things
249         * alphabetically, and does not allow duplicates.  It can be used to generate an
250         * {@code 'index.html'} file.  Send this field to the
251         * {@link HiLiteMe#createIndex(TreeSet, String)} method to build the {@code 'index.html'}
252         * file.  If this is {@code null}, files that are pretty-printed by the
253         * {@code http://HiLite.ME/} server will just not be logged to this data-structure.
254         */
255        public TreeSet<String> fileIndexList = new TreeSet<String>();
256
257        /**
258         * Any file that ends with {@code ".txt"} can be copied to the {@link #targetDirectory}
259         * location - without being pretty printed if this variable is set to {@code TRUE}.
260         */
261        public boolean copyTextFiles = true;
262
263        /**
264         * This is null by default.  If it is not null, then each {@code java.lang.string 'key'} in
265         * the Map should be a file ending, and the {@code 'value'} to which the key maps should be
266         * a {@code Code Type} Tag that is recognized by the HiLite Pretty-Print HTML servers.
267         *
268         * <BR /><BR /><B>NOTE:</B> It is important to know that the {@code 'keys'} in this
269         * {@code TreeMap} data-structure are valid file-name extensions, <I>that leave out the
270         * leading '.' (period)</I>.  The key should be {@code 'java'} not {@code '.java'}.
271         * Furthermore, the values in the data-structure to which these keys map, should be valid
272         * HiLiteMe Pretty-Print Servers code-type values.  The complete list of valid
273         * {@code 'Code Types'} can be viewed by iterating through {@code public Iterator<String>
274         * getCodeTypes()}
275         *
276         * @see #getCodeTypes()
277         * @see #isCodeType(String)
278         * @see #deduceCodeType(String, TreeMap)
279         */
280        public TreeMap<String, String> customMappings = null;
281
282        /**
283         * Instantiate a {@code Cache}, and the {@code HiLite.ME} server will save all
284         * code-{@code String's} into a cache.  For each source code request that is made <I>which
285         * includes <B>BOTH</B> a {@code Params} parameter class, <B>AND</B> a non-null
286         * {@code 'cache'} field</I>, the logic will first check the code-cache to see if an
287         * identical {@code String} is in the cache.  If the {@code String} is identical, querying
288         * the {@code HiLite.ME} server will be skipped, and the local copy used instead.  This can
289         * increase build time many-fold, as seconds are reduced to milliseconds in large builds
290         * where only 2 or 3 classes have seen code-changes since a previous build.
291         * @see Cache
292         */
293        public Cache cache = null;
294
295        /**
296         * The public constructor has no body.  Modify the constants as necessary - since the
297         * fields are all {@code 'public'}, and <I><B>are not {@code 'final'}</I></B>
298         */
299        public Params() { }
300    }
301
302
303    // ********************************************************************************************
304    // Here is static inner class "Cache"
305    // ********************************************************************************************
306
307    /**
308     * A caching-system class that allows this tool to efficiently bypass calls to the server when
309     * an exact-copy of the hilited source-code already exists inside the cache.
310     * 
311     * <BR /><BR />
312     * <EMBED CLASS='external-html' DATA-FILE-ID=HLMC>
313     */
314    public static class Cache
315    {
316        // This is, as the name clearly says, the Cache-Directory
317        private final String cacheSaveDirectory;
318
319        // This is the list of Hash-Codes for all Code/HTML pairs stored in the cache.  This is the
320        // exact data-structure that is referred to as the "Master Hash File"
321
322        private final TreeSet<Integer> hashCodes;
323
324        private static final short  NUM_DIRS        = 50;
325        private static final String HASH_SAVE_TREE  = "HILITED_STRINGS_HASH_CODE.ts";
326
327        /**
328         * Inform the user how much space (in bytes) is used by this {@code Cache}.
329         * @return The number of bytes being used on the file-system by this {@code Cache}.
330         */
331        public long totalSize()
332        { return FileNode.createRoot(cacheSaveDirectory).loadTree().getDirTotalContentsSize(); }
333
334        /**
335         * Count how many files and directories are contained in this {@code Cache}.
336         * @return The total number of files and sub-directories in the {@code Cache} directory.
337         */
338        public int totalNumber()
339        {return FileNode.createRoot(cacheSaveDirectory).loadTree().count(); }
340
341        private static String checkCSD(String cacheSaveDirectory)
342        {
343            cacheSaveDirectory = cacheSaveDirectory.trim();
344
345            if (! cacheSaveDirectory.endsWith(File.separator))
346                cacheSaveDirectory = cacheSaveDirectory + File.separator;
347
348            File f  = new File(cacheSaveDirectory);
349
350            if (! f.exists()) throw new CacheError(
351                "The specified cache-directory specified does not exist on the file-system: " +
352                "[" + cacheSaveDirectory + "]"
353            );
354
355            return cacheSaveDirectory;
356        }
357
358        // the "return TreeSet<Integer>" complains about an unchecked cast.
359        @SuppressWarnings("unchecked") 
360        private static TreeSet<Integer> checkTS(String cacheSaveDirectory)
361        {
362            String  fName   = cacheSaveDirectory + HASH_SAVE_TREE;
363            File    f       = new File(fName);
364
365            if (! f.exists()) throw new CacheError(
366                "The current-cache directory does not contain a primary-cache file: " +
367                "[" + fName + "]"
368            );
369
370            Object o;
371
372            try
373                { o = FileRW.readObjectFromFile(fName, true); }
374
375            catch (Throwable t)
376            {
377                throw new CacheError(
378                    "There was an error attempting to read the following primary-cache file.  " + 
379                    "It appears to be corrupted: [" + fName + "]",
380                    t
381                );
382            }
383
384            if (! (o instanceof TreeSet)) throw new CacheError(
385                "There primary cache file loaded, but does not contain the correct " +
386                "data-structure.  It appears to be corrupted. [" + fName + "]"
387            );
388
389            return (TreeSet<Integer>) o;
390        }
391
392        // Just saves a brand-new (empty) Hash-Code List (a java.util.TreeSet) to disk, using
393        // Standard Java Object Serialization.
394
395        private static TreeSet<Integer> writeNewTS(String cacheSaveDirectory)
396        {
397            TreeSet<Integer> hashCodes = new TreeSet<>();
398
399            try
400                { FileRW.writeObjectToFile(hashCodes, cacheSaveDirectory + HASH_SAVE_TREE, true); }
401
402            catch (Throwable t)
403            {
404                throw new CacheError(
405                    "There was an error writing the Cache Hash-Code File to disk.  " +
406                    "[" + cacheSaveDirectory + HASH_SAVE_TREE + "].  " ,
407                    t
408                );
409            }
410
411            return hashCodes;
412        }
413
414        /**
415         * This will load the hashCodes table to memory from the file-system directory identified
416         * by {@code String}-Parameter {@code 'cacheSaveDirectory'}.  An exception shall be thrown
417         * if this file is not found.
418         *
419         * @param cacheSaveDirectory This constructor presumes that this cache has been used and
420         * visited before.  This directory name should point to your local-cache of the
421         * {@code HiLite.ME} Server Code hilite past-operations.
422         *
423         * @throws CacheError This error will throw if the cache has not been instantiated, or
424         * is corrupted.  If the specified directory does not exist, then this {@code Error} shall
425         * also throw.  The chain-cause {@code Throwable} should be visible, and is included as the 
426         * {@code Throwable.getCause()}.
427         */
428        public Cache(String cacheSaveDirectory) throws CacheError
429        {
430            this.cacheSaveDirectory = checkCSD(cacheSaveDirectory);
431            this.hashCodes          = checkTS(this.cacheSaveDirectory);
432        }
433
434        /**
435         * This will save the hash-code {@code TreeSet<Integer>} to disk.  The <B>Master Hash-Code
436         * List</B> just keeps a record of the hashcodes of every {@code String} that was hilited
437         * by the Hiliter <I>(and therefore saved inside the Cache).</I>  This method will save
438         * that Java {@code TreeSet} of Hash-Codes to disk.
439         *
440         * @throws CacheError This {@code Error} will throw if there is a problem writing the
441         * master cache-hash to disk.  The chain-cause {@code Throwable} should be visible, and is
442         * included as the {@code Throwable.getCause()}
443         */
444        public void persistMasterHashToDisk() throws CacheError
445        {
446            try
447            {
448                FileRW.writeObjectToFile
449                    (hashCodes, this.cacheSaveDirectory + HASH_SAVE_TREE, true);
450            } 
451            catch (Throwable t)
452            {
453                throw new CacheError(
454                    "There was an error writing the Master Hash-Code table to disk. " +
455                    "File [" + this.cacheSaveDirectory + HASH_SAVE_TREE + "] was not saved. " +
456                    "The cache-file will have to be refreshed at some point.  New Files " +
457                    "Cache-Hash not saved.",
458                    t
459                );
460            }
461        }
462
463        /** Will write this method soon.  It currently is not written. */
464        public void rebuildMasterHashCache()
465        {
466            // TO DO
467            // This is supposed to be for "Error Recovery".  Fortunately, an error has never really
468            // happend to me, and even if it did... Just deleting the whole thing and rebuilding
469            // the Cache by running the HiLiter on all of the files seems smarter/safer anyway.
470            // This has perpetually been on the "To Do List" for 2 years now...  I think it more
471            // prudent to remind people, just delete and start over is probably smarter, it your
472            // Cache directory got messed up (for whatever reason - but mine never has anyway!)
473        }
474
475        /**
476         * This will initialize a cache-file in the file-system directory identified by parameter
477         * {@code String cacheSaveDirectory}.  If the directory specified does not exist, a
478         * {@code CacheError} is thrown.  Any old cache files will be removed.  To attempt to
479         * preserve old cache-files, call method {@code initializeOrRepair(String, StorageWriter)}
480         * 
481         * <BR /><BR /><B><I>OrClear:</I></B> If the directory structure provided to this
482         * initialize method is not empty, the <SPAN STYLE="color: red;"><B><I>its entire contents
483         * shall be erased by a call to </I></B></SPAN> (Below)
484         * 
485         * <DIV CLASS=LOC>{@code 
486         * FileTransfer.deleteFilesRecursive
487         *     (FileNode.createRoot(cacheSaveDirectory).loadTree(), sw);
488         * }</DIV>
489         * 
490         * @param cacheSaveDirectory This constructor presumes that this cache has been used and
491         * visited before.  This directory name should point to your local-cache of 
492         * {@code HiLite.ME} Server Code hilite past-operations.
493         * 
494         * @param sw This receives log-writes from the call to
495         * {@link FileTransfer#deleteFilesRecursive} which clears the files currently in the cache.
496         * This parameter may be null, and if it is, output-text will be shunted.
497         * 
498         * @throws CacheError This exception will be throw if there are errors deleting any
499         * old-cache files currently in the directory; or if there is any error creating the new
500         * master hash-cache file.  The chain-cause {@code Throwable} should be visible, and is 
501         * included as the {@code Throwable.getCause()}.
502         */
503        public static Cache initializeOrClear(String cacheSaveDirectory, StorageWriter sw)
504            throws CacheError
505        {
506            cacheSaveDirectory = checkCSD(cacheSaveDirectory);
507
508            final String tempStrForStupidLambdaFinal = cacheSaveDirectory;
509
510            try
511            {
512                File f = new File(cacheSaveDirectory);
513
514                if (f.isDirectory())
515                    FileTransfer.deleteFilesRecursive(
516                        FileNode.createRoot(cacheSaveDirectory).loadTree(), null,
517                        (FileNode fn) -> fn.getFullPathName().equals(tempStrForStupidLambdaFinal),
518                        sw
519                    );
520
521                f.mkdirs();
522            }
523            catch (Throwable t)
524            {
525                throw new CacheError(
526                    "There was an error emptying/clearing the directory " +
527                    "[" + cacheSaveDirectory + "] of it's contents, please see cause " +
528                    "throwable.getCause() for details.",
529                    t
530                );
531            }
532
533            try
534                { writeNewTS(cacheSaveDirectory); }
535
536            catch (Throwable t)
537            {
538                throw new CacheError(
539                    "There was an error saving/creating the new cache-file " +
540                    "[" + cacheSaveDirectory + "], please see cause chain throwable.getCause(), " +
541                    "for more details.",
542                    t
543                );
544            }
545
546            return new Cache(cacheSaveDirectory);
547        }
548
549        String get(
550                String sourceCodeAsString, String codeTypeParam, String styleTypeParam,
551                boolean includeLineNumbers
552            )
553        {
554            Integer h = Integer.valueOf(
555                codeTypeParam.hashCode() + styleTypeParam.hashCode() +
556                (includeLineNumbers ? 1 : 0) +
557                sourceCodeAsString.hashCode()
558            );
559
560            // NOTE: The Math.abs is OK, because it is just the directory name! (A little tricky)
561            if (! hashCodes.contains(h)) return null;
562
563            String root = 
564                cacheSaveDirectory +
565                StringParse.zeroPad((Math.abs(h.intValue()) % NUM_DIRS)) + 
566                File.separator + "H" + h.toString();
567
568            try
569            {
570                String saved = (String)
571                    FileRW.readObjectFromFileNOCNFE(root + "-SOURCE.sdat", true);
572
573                if (saved.equals(sourceCodeAsString))
574                    return (String) FileRW.readObjectFromFileNOCNFE(root + "-HILITE.sdat", true);
575                else
576                    return null;
577            }
578            catch (Throwable t)
579            {
580                throw new CacheError(
581                    "There was an error reading from the cache-directory: " +
582                    "[" + root + "...sdat].  Please see cause throwable.getCause() for more " +
583                    "details",
584                    t
585                );
586            }
587        }
588
589        void checkIn(
590                String sourceCodeAsString, String hilitedCodeAsString, 
591                String codeTypeParam, String styleTypeParam, boolean includeLineNumbers
592            )
593        {
594            Integer h = Integer.valueOf(
595                codeTypeParam.hashCode() + styleTypeParam.hashCode() +
596                (includeLineNumbers ? 1 : 0) +
597                sourceCodeAsString.hashCode()
598            );
599
600            // NOTE: The Math.abs is OK, because it is just the directory name! (A little tricky)
601            String root = cacheSaveDirectory +
602                StringParse.zeroPad((Math.abs(h.intValue()) % NUM_DIRS)) + File.separator;
603
604            try
605            {
606                File f = new File(root);
607                if (! f.exists()) f.mkdirs();
608
609                root = root + "H" + h.toString();
610
611                FileRW.writeObjectToFile(sourceCodeAsString, root + "-SOURCE.sdat", true);
612                FileRW.writeObjectToFile(hilitedCodeAsString, root + "-HILITE.sdat", true);
613
614                hashCodes.add(h);
615                // DEBUGING System.out.println(" CHECKEDIN ");
616            }
617            catch (Throwable t)
618            {
619                throw new CacheError(
620                    "There was an exception when writing to the cache directory: " +
621                    "[" + root + "...sdat].  See cause exception throwable.getCause(); " +
622                    "for details.",
623                    t
624                );
625            }
626        }
627    }
628
629
630    // ********************************************************************************************
631    // ********************************************************************************************
632    // Recurse Source-Code Directory
633    // ********************************************************************************************
634    // ********************************************************************************************
635
636
637    /**
638     * This will generate an {@code 'index.html'} file of all the recently generated
639     * {@code '.html'} files.
640     * 
641     * @param targetDirectory This is simply the save-location for this {@code 'index.html'} file
642     * 
643     * @param fileIndexList This must be a list of file-names that were generated by the 
644     * {@code class HiLiteMe}
645     */
646    public static void createIndex(TreeSet<String> fileIndexList, String targetDirectory)
647        throws IOException
648    {
649        StringBuilder out = new StringBuilder();
650
651        out.append(INDEX_HEADER_HTML);
652        out.append("<UL>");
653
654        for (String f : fileIndexList) out.append(
655            "<LI><A HREF=\"" + f + (f.endsWith(".txt") ? "" : ".html") +
656            "\" TARGET=\"_blank\">\n" + f +"</A></LI>\n"
657        );
658
659        out.append("</UL>");
660        out.append(Params.DEFAULT_ENDING_HTML);
661
662        FileRW.writeFile(out, targetDirectory + "index.html");
663    }
664
665    /**
666     * This will take a {@code FileNode} tree, and iterate through all of it - calling the
667     * {@code http://HiLite.ME/} server for each software file that it finds / discovers and
668     * recognizes.  Each source-code file that is within the {@code FileNode} tree that is passed,
669     * after being <B>pretty-printed</B>, will be saved as a {@code '.html'} file in the
670     * {@link Params#targetDirectory}
671     *
672     * <BR /><BR /><B STYLE="color: red;">IMPORTANT NOTE:</B> The provided java class
673     * {@code FileNode} has some very simple filter operations for making sure that only the
674     * code-files that you want to be transmitted will actually be sent. Each and every file in the
675     * {@link FileNode} tree <I><B>whose {@code 'Code Type'} can be deduced</B></I> (by it's 
676     * file-extension, or via the {@link Params#customMappings}) will be sent to the
677     * {@code HiLite.ME} server for hiliting.   
678     * 
679     * <BR /><BR />It should be easy to call the {@code fileNodeRoot.prune(...)} method to make 
680     * sure you are only transmitting/hiliting the files you want.
681     * 
682     * <BR /><BR /><B>FURTHERMORE:</B> The means by which files are copied from a "Source" 
683     * source-code directory to a "Target" hilited-HTML directory is simply via the
684     * {@code FileRW.copyFile} method.  Because the copy operations preserves the directory-tree
685     * structure of the input source-code file-system tree, <B>it is imperative to use a relative
686     * directory-location</B> when loading the source-code {@code FileNode} tree.
687     * 
688     * @param node This is the root node of a {@code FileNode} directory tree.  Every
689     * operating-system file that is found  inside this tree will have it's code-hilited using the
690     * {@code HiLite.Me} server.
691     * 
692     * @param hlmp Please review the inner-class {@code Params} to configure the extra parameters
693     * for storing and saving the results of the code hiliting operation.
694     * 
695     * @see #prettyPrintScrape(String, String, String, boolean)
696     * @see Params
697     */
698    public static void prettyPrintRecurseTree(FileNode node, Params hlmp) throws IOException
699    {
700        (new File(hlmp.targetDirectory + node.getFullPathName())).mkdirs();
701
702        // This "flattens" the source-code FileNode tree into an Iterator.  The Iterator will not
703        // iterate "FileNodes" - but rather String's - where each String is the relative path
704        // name of the source code file.
705
706        Iterator<String> files = node.getDirContentsFiles(RTC.FULLPATH_ITERATOR());
707
708        while (files.hasNext())
709        {
710            String fileName = files.next();
711
712            if (hlmp.verbose) System.out.printf("%1$-80s", fileName);
713
714            // Sometimes it helps to have any text-files copied to output / target directory
715            if (hlmp.copyTextFiles && fileName.toLowerCase().endsWith(".txt"))
716            {
717                // Again, put this in the "Complete List" of files for the "creatIndex" method.
718                // If the user wants an 'index.html' file that lists all of the files that have
719                // been hilited, this list is necessary.
720
721                if (hlmp.fileIndexList != null) hlmp.fileIndexList.add(fileName);
722
723                if (hlmp.verbose)
724                    System.out.println( "\nCOPYING " + BCYAN + "TEXT-FILE " + RESET);
725
726                // This is why a "relative FileNode" is mandatory.  All that is happening is that
727                // the "Full Path Name" of the source-code file is being appended to the
728                // "Target Directory" name.
729    
730                FileRW.copyFile(fileName, hlmp.targetDirectory + fileName, true);
731
732                // Text files are not actually hilited, just copied.
733                continue;
734            }
735
736            // This checks the file-extension - '.java', '.js', '.html' - and uses that extension
737            // String as a 'Code Type' with the HiLite server.
738            //
739            // If there are files where that system doesn't work - the mapping of a file-extension
740            // to a 'Code Type' should be put into the "Custom Mappings" TreeMap in class
741            // HiLiteMe.Params and passed to this 
742
743            String codeTypeParam = deduceCodeType(fileName, hlmp.customMappings);
744
745            if (codeTypeParam == null)
746            {
747                // Without a 'Code Type' there isn't any way to hilite the file...  Let the user
748                // know and continue
749
750                if (hlmp.verbose)
751                    System.out.println(
752                        "\nUNKNOWN FILE TYPE FOUND: " + BCYAN + fileName + RESET +
753                        BYELLOW + "\nNOT CONVERTING FILE..." + RESET
754                    );
755
756                continue; // next source code file loop iteration
757            }
758
759            // Loads the source-code file to a String
760            String fileText     = FileRW.loadFileToString(fileName);
761            String hilitedCode  = null;
762
763            // It is always faster to use a Cache with big projects...
764            // If there is a 'Cache' check that first...
765
766            if (hlmp.cache == null)
767
768                // NOTE: The 'true' (last parameter) is passed to "includeLineNumbers"
769                hilitedCode = prettyPrintScrape(fileText, codeTypeParam, hlmp.styleTag, true);
770
771            else
772            {
773                // Check the Cache.  Note that it is pretty efficient, and doesn't do a 
774                // character-by-character check - unless there is an exact hashCode match first.
775                //
776                // NOTE: The 'true' (last parameter) is passed to "includeLineNumbers"
777
778                hilitedCode = hlmp.cache.get(fileText, codeTypeParam, hlmp.styleTag, true);
779
780                // The Cache did not have an exact match for the Source Code File
781                if (hilitedCode == null)
782                {
783                    // NOTE: The 'true' (last parameter) is passed to "includeLineNumbers"
784                    //       This 'true' is the last parameter for both of the method calls below
785
786                    hilitedCode = prettyPrintScrape(fileText, codeTypeParam, hlmp.styleTag, true);
787                    hlmp.cache.checkIn(fileText, hilitedCode, codeTypeParam, hlmp.styleTag, true);
788                }
789            }
790
791            // Now just write the hilited code to disk... Make sure to insert the HTML header
792            // and tail/footer text.
793
794            hilitedCode = hlmp.headerHTML.replace("INSERT HERE", fileName) + 
795                hilitedCode + hlmp.endingHTML;
796
797            FileRW.writeFile(hilitedCode, hlmp.targetDirectory + fileName + ".html");
798
799            // Add this file-name to the list of files that have been converted/hilited
800            // (but only if there is such a list being kep!)  This list can be used by the
801            // method createIndex(TreeSet, String) to create an 'index.html' file
802
803            if (hlmp.fileIndexList != null) hlmp.fileIndexList.add(fileName);
804
805            // Let the user know this file has been successfully hilited.
806            if (hlmp.verbose) System.out.println(BRED + "Completed." + RESET);
807        }
808
809        // After having iterated all the files in a directory, use recursion to iterate through the
810        // files in any sub directories...
811        //
812        // The iterator of sub-dirs is an 'Iterator<FileNode>' rather than an 'Iterator<String>'
813        // (which was used in the loop above)
814
815        Iterator<FileNode> dirs = node.getDirContentsDirs(RTC.ITERATOR());
816
817        while (dirs.hasNext()) prettyPrintRecurseTree(dirs.next(), hlmp);
818    }
819
820    /**
821     * Covenience Method.
822     * <BR />Invokes: {@link #deduceCodeType(String, TreeMap)}
823     */
824    public static String deduceCodeType(String fileName)
825    { return deduceCodeType(fileName, null); }
826
827    /**
828     * This attempts to guess the {@code 'Code Type'} parameter for the file, based on the input
829     * file name. The file is loaded from disk, so be sure to include the full path to the file in
830     * the {@code fileName}.
831     * 
832     * @param fileName The full-path name of a Source-Code / Software file.  It will be loaded from
833     * disk, and transmitted to {@code HiLite.ME} servers and <B>"pretty-printed"</B> into HTML.
834     * 
835     * @param customMappings This parameter may be null, and if it is, then it is ignored.  If this
836     * parameter is not null, then the file-extension obtained from the {@code String 'fileName'}
837     * field will be "looked up" in the {@code TreeMap}, and if it matches one of these
838     * custom-mappings of file-extension to {@code HiLite.ME} code-type parameters, that
839     * {@code 'Code Type'} parameter will be used in the call to the Pretty Print Server.
840     * 
841     * @return the {@code 'Code Type'} that is associated with the file-name extension - or null if
842     * there is no file-name extension, or the file-name extension is not found.
843     */
844    public static String deduceCodeType(String fileName, TreeMap<String, String> customMappings)
845    {
846        int extStartPos = fileName.lastIndexOf('.');
847
848        if (extStartPos == -1) return null;
849
850        String ext = fileName.substring(extStartPos + 1);
851
852        if (ext == null) return null;
853
854        if ((customMappings != null) && customMappings.containsKey(ext))
855            return customMappings.get(ext);
856
857        return isCodeType(ext) ? ext : null;
858    }
859
860
861    // ********************************************************************************************
862    // ********************************************************************************************
863    // Scrape / Pretty Print Methods
864    // ********************************************************************************************
865    // ********************************************************************************************
866
867
868    /**
869     * Convenience Method.
870     * <BR />Invokes: {@link #prettyPrintScrapeToVector(String, String, String, boolean)}
871     * <BR />And: {@link Util#pageToString(Vector)}
872     * <BR />Returns: HTML as a {@code java.lang.String}
873     * <BR />No cache is expected.
874     */
875    public static String prettyPrintScrape
876        (String codeText, String codeTypeParam, String styleTypeParam, boolean includeLineNumbers)
877        throws IOException
878    {
879        // NOTE: Util.pageToString(...) accepts a 'Vector<HTMLNode>' and returns the input HTML
880        //       as a java.lang.String
881
882        return Util.pageToString(
883
884            // NOTE: prettyPrintScrapeToVector Inserts HTML <SPAN> Elements into PLAIN-TEXT Source
885            //       code.  Afterwards it converts the HTML from a 'String' to a 'Vector<HTMLNode>'
886
887            prettyPrintScrapeToVector
888                (codeText, codeTypeParam, styleTypeParam, includeLineNumbers)
889        );
890    }
891
892    /**
893     * Convenience Method.
894     * <BR />Invokes: {@link #prettyPrintScrapeToVector(String, String, String, boolean, Cache)}
895     * <BR />And: {@link Util#pageToString(Vector)}
896     * <BR />Returns: HTML as a {@code java.lang.String}
897     * <BR />Cache required.
898     */
899    public static String prettyPrintScrape(
900            String codeText, String codeTypeParam, String styleTypeParam,
901            boolean includeLineNumbers, Cache cache
902        )
903        throws IOException
904    {
905        // NOTE: Util.pageToString(...) accepts a 'Vector<HTMLNode>' and returns the input HTML
906        //       as a java.lang.String
907
908        return Util.pageToString(
909
910            // NOTE: prettyPrintScrapeToVector Inserts HTML <SPAN> Elements into PLAIN-TEXT Source
911            //       code.  Afterwards it converts the HTML from a 'String' to a 'Vector<HTMLNode>'
912            //
913            // ALSO: There are two versions of the method 'prettyPrintScrapeToVector.'  The one
914            //       used here accepts and make-use-of a Cache.  In the method above, that version
915            //       of this method DOES NOT utilize the Cache.
916
917            prettyPrintScrapeToVector
918                (codeText, codeTypeParam, styleTypeParam, includeLineNumbers, cache)
919        );
920    }
921
922    /**
923     * Convenience Method.
924     * <BR />Invokes: {@link #prettyPrintScrapeToVector(String, String, String, boolean, Cache)}
925     * <BR />And: {@link #simplifyColorSpans(Vector, String)}
926     * <BR />And: {@link Util#pageToString(Vector)}
927     * <BR />Returns: HTML as a {@code java.lang.String}
928     * <BR />No cache is expected.
929     */
930    public static String prettyPrintScrapeAndSimplify
931        (String codeText, String codeTypeParam, String styleTypeParam, boolean includeLineNumbers)
932        throws IOException
933    {
934        // NOTE: Util.pageToString(...) accepts a 'Vector<HTMLNode>' and returns the input HTML
935        //       as a java.lang.String
936
937        return Util.pageToString(
938
939            // The method 'simplifyColorSpans' modifies the HTML that is received from the HiLite
940            // Server, and replaces each of the In-Line Style-Spans so that rather than containing
941            // 'STYLE= ...' HTML Attributes, they contains a brief 'CLASS=...'.  This makes the
942            // output HTML look cleaner, nicer, and easier to manage / modify.
943
944            simplifyColorSpans(
945
946                // NOTE: prettyPrintScrapeToVector Inserts HTML <SPAN> Elements into PLAIN-TEXT
947                //       Source-Code.  Afterwards it converts the HTML from a 'String' to a
948                //       'Vector<HTMLNode>'
949    
950                prettyPrintScrapeToVector
951                    (codeText, codeTypeParam, styleTypeParam, includeLineNumbers),
952
953                // simplifyColorSpans(...) needs to know which style-parameter was passed to the HiLite
954                // server in order convert the STYLE Assignments into CSS Classes.
955        
956                styleTypeParam
957            ));
958    }
959
960    /**
961     * Convenience Method.
962     * <BR />Invokes: {@link #prettyPrintScrapeToVectorAndSimplify(String, String, String, boolean, Cache)}
963     * <BR />And: {@link Util#pageToString(Vector)}
964     * <BR />Returns: HTML as a {@code java.lang.String}
965     * <BR />Cache required.
966     */
967    public static String prettyPrintScrapeAndSimplify(
968            String codeText, String codeTypeParam, String styleTypeParam,
969            boolean includeLineNumbers, Cache cache
970        )
971        throws IOException
972    {
973        // NOTE: Util.pageToString(...) accepts a 'Vector<HTMLNode>' and returns the input HTML
974        //       as a java.lang.String
975
976        return Util.pageToString(
977
978            // Method 'prettyPrintScrapeToVectorAndSimplify' does:
979            //
980            // ONE: Invokes the HiLite Server, and Converts the HTML-String which is returned by
981            ///     that server (or found inside the Cache), and converts that HTML-String to a
982            //      Vector<HTMLNode>
983            //
984            // TWO: Replaces all inline <SPAN STYLE='...'> elements to <SPAN CLASS='...'> in order
985            //      to make the HTML nicer looking, more compact, and easier to control
986 
987            prettyPrintScrapeToVectorAndSimplify
988                (codeText, codeTypeParam, styleTypeParam, includeLineNumbers, cache)
989        );
990    }
991
992    /**
993     * Convenience Method.
994     * <BR />Invokes: {@link #prettyPrintScrapeToVector(String, String, String, boolean, Cache)}
995     * <BR />And: {@link #simplifyColorSpans(Vector, String)}
996     * <BR />Returns: {@code Vector<HTMLNode>}
997     * <BR />No cache is expected.
998     */
999    public static Vector<HTMLNode> prettyPrintScrapeToVectorAndSimplify
1000        (String codeText, String codeTypeParam, String styleTypeParam, boolean includeLineNumbers)
1001        throws IOException
1002    {
1003        // The method 'simplifyColorSpans' modifies the HTML that is received from the HiLite
1004        // Server, and replaces each of the In-Line Style-Spans so that rather than containing
1005        // 'STYLE= ...' HTML Attributes, they contains a brief 'CLASS=...'.  This makes the
1006        // output HTML look cleaner, nicer, and easier to manage / modify.
1007
1008        return simplifyColorSpans(
1009
1010            // The method 'prettyPrintScrapeToVector' sends the 'codeText' to the HiLite Server,
1011            // and receives the HiLited Source-Code HTML.  Afterwards it simple converts the HTML
1012            // which it received as a java.lang.String *INTO* a Vector<HTMLNode>
1013
1014            prettyPrintScrapeToVector
1015                (codeText, codeTypeParam, styleTypeParam, includeLineNumbers),
1016
1017            // simplifyColorSpans(...) needs to know which style-parameter was passed to the HiLite
1018            // server in order convert the STYLE Assignments into CSS Classes.
1019
1020            styleTypeParam
1021        );
1022    }
1023
1024    /**
1025     * This will take a {@code java.lang.String} of code - in almost any coding language - and
1026     * generate an HTML-"ified" version of that code.  This is often called "Pretty-Printing" code.
1027     * The software-platform/engine that does the transformation of source code to "nice-looking
1028     * HTML code" is just a site called <A HREF="http://HiLite.me/" TARGET="_blank">
1029     * http://HiLite.me</A>.
1030     * 
1031     * <BR /><BR />This method takes three parameters, and the first is the code itself (passed as a
1032     * {@code java.lang.String.})  The second is the "descriptor" - which is a short
1033     * text-{@code String} that identifies what type (programming-language) is being passed to
1034     * {@code HiLite.ME}.  The complete list of available {@code Code Types} can be found in the 
1035     * Raw HTML of {@code HiLite.ME's} main page.
1036     * 
1037     * <BR /><BR /><UL CLASS=JDUL>
1038     *  <LI>{@code 'java'} =&gt; Java Code!</LI>
1039     *  <LI>{@code 'css'} =&gt; CSS - Cascading Style Sheets</LI>
1040     *  <LI>{@code 'js'} =&gt; Java-Script code</LI>
1041     *  <LI>The list contains at least 75 languages, select "View Code Source" in Google-Chrome or
1042     *      Internet-Explorer to see complete set of options!</LI>
1043     * </UL>
1044     * 
1045     * <BR /><BR />
1046     * The third parameter is a {@code 'style'} name / type parameter.  Again, view the
1047     * <A HREF="http://HiLite.ME" TARGET="_blank"> http://HiLite.ME</A> website to see the complete
1048     * list of available styles.  My preferred / defaults are {@code 'vim'} and {@code 'native'}
1049     *
1050     * @param codeText This is the software-code (like CSS, Java, SQL, Python, etc..) saved as a
1051     * single text-{@code String}
1052     * 
1053     * @param codeTypeParam <EMBED CLASS='external-html' DATA-FILE-ID=HLMCODETP>
1054     * 
1055     * @param styleTypeParam <EMBED CLASS='external-html' DATA-FILE-ID=HLMSTYLETP>
1056     * 
1057     * @param includeLineNumbers When this parameter receives {@code TRUE}, line-numbers are
1058     * appended to the HTML output.
1059     * 
1060     * @return The HTML is returned as a {@code Vector<HTMLNode>}<BR /><BR />
1061     * 
1062     * <EMBED CLASS='external-html' DATA-FILE-ID=HLMPPSRET>
1063     * 
1064     * @throws IOException If there are any problems communicating with the HiLite Server.
1065     * 
1066     * @see Torello.Java.Additional.URLs#toProperURLV2(String)
1067     * @see HTMLPage#getPageTokens(BufferedReader, boolean)
1068     * @see InnerTagGetInclusive
1069     * @see TagNodeRemoveInclusive
1070     */
1071    public static Vector<HTMLNode> prettyPrintScrapeToVector
1072        (String codeText, String codeTypeParam, String styleTypeParam, boolean includeLineNumbers)
1073        throws IOException
1074    {
1075        codeText = URLs.toProperURLV2(codeText);
1076
1077        URL             url         = new URL("http://hilite.me");
1078        URLConnection   connection  = url.openConnection();
1079
1080        connection.setDoOutput(true);  
1081        
1082        OutputStreamWriter out = new OutputStreamWriter(connection.getOutputStream(), "UTF-8");
1083
1084        out.write(
1085            "lexer=" + codeTypeParam + 
1086            "&style=" + styleTypeParam + 
1087            (includeLineNumbers ? "&linenos=false" : "") +
1088            "&divstyles=border:solid gray;border-width:.1em .1em .1em .8em;padding:.2em .6em;" +
1089            "&code=" + codeText 
1090        );
1091        out.flush();   
1092
1093        BufferedReader br = new BufferedReader
1094            (new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));
1095
1096        // Read the HTML String response and Parse the JTML into Vectorized-HTML.
1097        Vector<HTMLNode> v = HTMLPage.getPageTokens(br, false);
1098
1099        // First, find the divider where the HiLited Code is located.  It still has some extra
1100        // bluff-and-bluster, fluff-and-fanfare, a bunch-of-crap to filter.
1101
1102        // The response is inside of the <DIV ID=preview> ... </DIV>
1103        // However, there is another 'outer-div' that contains an Inline CSS Style Attribute. The
1104        // inner <DIV> ... </DIV> is the one to return.  If there are line-numbers, that inner
1105        // <DIV> will have a <TABLE> that has one row with two columns.  The first column will have
1106        // all of the line-numbers, and the second column will have the Hi-Lited Source Code.
1107
1108        int idDIVPos = InnerTagFind.first(v, "div", "id", TextComparitor.EQ_CI, "preview");
1109
1110        v = TagNodeGetInclusive.first(v, idDIVPos + 1, -1, "div");
1111
1112        /*
1113        if (includeLineNumbers)
1114        {
1115            int end = v.size();
1116            System.out.println(
1117                "******* DEBUG *******\n" +
1118                Debug.print(v, 0, ((end < 20) ? end : 20), Debug::K) +
1119                "******* DEBUG *******\n" +
1120                Debug.print(v, ((end < 20) ? 0 : (end - 20)), end, Debug::K)
1121            );
1122            if (! Q.YN("continue?")) System.exit(0);
1123        }
1124        */
1125
1126        return v;
1127    }
1128
1129    /**
1130     * This performs an identical operation to method: {@code prettyPrintScrapeToVector(String,
1131     * String, String)}, but it checks the provided {@code cache} first, before querying the
1132     * server.
1133     * 
1134     * @param codeText This is the software-code (like CSS, Java, SQL, Python, etc..) saved as a
1135     * single text-{@code String}.
1136     * 
1137     * @param codeTypeParam <EMBED CLASS='external-html' DATA-FILE-ID=HLMCODETP>
1138     * 
1139     * @param styleTypeParam <EMBED CLASS='external-html' DATA-FILE-ID=HLMSTYLETP>
1140     * 
1141     * @param includeLineNumbers When this parameter receives {@code TRUE}, line-numbers are
1142     * appended to the HTML output.
1143     * 
1144     * @param cache This is the cache that must be passed.  Read about the {@link HiLiteMe.Cache}
1145     * {@code static inner class} for an explanation about how caching results from the server
1146     * works.  Save time on the build by caching results that have not changed.
1147     * 
1148     * @return The HTML is returned as a {@code Vector<HTMLNode>}<BR /><BR />
1149     * 
1150     * <EMBED CLASS='external-html' DATA-FILE-ID=HLMPPSRET>
1151     * 
1152     * @throws IOException If there are any problems communicating with the HiLite Server.
1153     * 
1154     * @see #prettyPrintScrapeToVector(String, String, String, boolean)
1155     * @see HTMLPage#getPageTokens(CharSequence, boolean)
1156     */
1157    public static Vector<HTMLNode> prettyPrintScrapeToVector(
1158            String codeText, String codeTypeParam, String styleTypeParam,
1159            boolean includeLineNumbers, Cache cache
1160        )
1161        throws IOException
1162    {
1163        // FIRST: Check the Cache, to see if the exact String has been hilited!
1164        String ret = cache.get(codeText, codeTypeParam, styleTypeParam, includeLineNumbers);
1165
1166        // If there was a Cache hit -> return that String rather than querying the server.
1167        if (ret != null) return HTMLPage.getPageTokens(ret, false);
1168
1169        // NO? Then query the server.
1170        Vector<HTMLNode> retVec = prettyPrintScrapeToVector
1171            (codeText, codeTypeParam, styleTypeParam, includeLineNumbers);
1172
1173        // Make sure to save the response in the Cache for next time
1174        cache.checkIn(
1175            codeText, Util.pageToString(retVec), codeTypeParam, styleTypeParam,
1176            includeLineNumbers
1177        );
1178
1179        return retVec;
1180    }
1181
1182    /**
1183     * This performs an identical operation to method: {@code prettyPrintScrapeToVector(String,
1184     * String, String)}, but it checks the provided {@code cache} first, before querying the
1185     * server.  Furthermore, the HTML that is returned has been <B>simplified</B>, using the
1186     * method {@link #simplifyColorSpans(Vector, String)}.
1187     * 
1188     * <BR /><BR />Using this version of <B>Pretty Print</B> mandates that the user provide a
1189     * Cache.
1190     * 
1191     * @param codeText This is the software-code (like CSS, Java, SQL, Python, etc..) saved as a
1192     * single text-{@code String}.
1193     * 
1194     * @param codeTypeParam <EMBED CLASS='external-html' DATA-FILE-ID=HLMCODETP>
1195     * 
1196     * @param styleTypeParam <EMBED CLASS='external-html' DATA-FILE-ID=HLMSTYLETP>
1197     * 
1198     * @param includeLineNumbers When this parameter receives {@code TRUE}, line-numbers are
1199     * appended to the HTML output.
1200     * 
1201     * @param cache This is the cache that must be passed.  Read about the {@link HiLiteMe.Cache}
1202     * {@code static inner class} for an explanation about how caching results from the server
1203     * works.  Save time on the build by caching results that have not changed.
1204     * 
1205     * @return The HTML is returned as a {@code Vector<HTMLNode>}<BR /><BR />
1206     * 
1207     * <EMBED CLASS='external-html' DATA-FILE-ID=HLMPPSRET>
1208     * 
1209     * @throws IOException If there are any problems communicating with the HiLite Server.
1210     * 
1211     * @see #prettyPrintScrapeToVector(String, String, String, boolean)
1212     * @see HTMLPage#getPageTokens(CharSequence, boolean)
1213     * @see #simplifyColorSpans(Vector, String)
1214     */
1215    public static Vector<HTMLNode> prettyPrintScrapeToVectorAndSimplify(
1216            String codeText, String codeTypeParam, String styleTypeParam,
1217            boolean includeLineNumbers, Cache cache
1218        )
1219        throws IOException
1220    {
1221        // FIRST: Check the Cache, to see if the exact String has been hilited!
1222        String ret = cache.get(codeText, codeTypeParam, styleTypeParam, includeLineNumbers);
1223
1224        // If there was a Cache hit -> return that String rather than querying the server.
1225        if (ret != null) return HTMLPage.getPageTokens(ret, false);
1226
1227        // NO? Then query the server.
1228        Vector<HTMLNode> retVec =
1229            prettyPrintScrapeToVector(codeText, codeTypeParam, styleTypeParam, includeLineNumbers);
1230
1231        // Replace all of the STYLE=... attributes to CLASS=... attributes
1232        simplifyColorSpans(retVec, styleTypeParam);
1233
1234        // Make sure to save the response in the Cache for next time
1235        cache.checkIn(
1236            codeText, Util.pageToString(retVec), codeTypeParam, styleTypeParam,
1237            includeLineNumbers
1238        );
1239
1240        return retVec;
1241    }
1242
1243
1244    // ********************************************************************************************
1245    // ********************************************************************************************
1246    // Simplify Color Spans
1247    // ********************************************************************************************
1248    // ********************************************************************************************
1249
1250
1251    /**
1252     * Will return a {@code java.lang.String} containing all of the {@code CSS STYLE} definitions
1253     * for a particular {@code 'Style Type'}.
1254     * 
1255     * <BR /><BR /><B>NOTE:</B> These definitions are only useful if the HiLited Source Code that
1256     * you have used has been simplified from <B>in-line {@code 'STYLE=...'} attributes</B> into 
1257     * a <B>{@code CLASS=...}</B> version of the hiliting.
1258     * 
1259     * @param styleParam This may be any of the valid {@code 'Style Tags'} available for hiliting
1260     * source-code.  The complete list of {@code 'Style Tags'} may be viewed here:
1261     * 
1262     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/HiLiteStyleCodes.html'>
1263     * HiLiting Style Codes</A></B>
1264     * 
1265     * @return The {@code CSS Class} definitions as a {@code String}
1266     * 
1267     * <BR /><BR /><B>NOTE:</B> The {@code RETURN VALUES} (as {@code String's}) can actually be
1268     * viewed right here by clicking the link below.  Each {@code 'Style Type'} is followed by
1269     * a list of <B>CSS Definitions</B> that must be included in order for the Code HiLiting to
1270     * work (if-and-only-if) you have opted to use the "Simplified Color Spans."
1271     * 
1272     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/StyleTagCSSDefinitions.html'>
1273     * Style Tag CSS Definitions</A></B>
1274     * 
1275     * @throws IllegalArgumentException If an invalid {@code 'Style Type'} has been passed to the
1276     * {@code 'styleParam'} parameter.
1277     */
1278    @SuppressWarnings("unchecked")
1279    public static String styleParamCSSClasses(String styleParam)
1280    {
1281        if (! isStyleType(styleParam)) throw new IllegalArgumentException
1282            ("The Style Type Parameter passed [" + styleParam + "] is not a valid Style Type");
1283
1284        if (styleCSSDefinitions == null)
1285            styleCSSDefinitions = (TreeMap<String, String>) LFEC.readObjectFromFile_JAR
1286                (HiLiteMe.class, "data-files/HLMClasses.tmdat", true, TreeMap.class);
1287
1288        return styleCSSDefinitions.get(styleParam);
1289    }
1290
1291    /**
1292     * This may be called from the command line.  It will print the CSS Definitions for a 
1293     * {@code 'Style Type'} parameter.  The complete list of valid {@code Style Types} may be
1294     * viewed here:
1295     * 
1296     * <BR /><BR /><B><A HREF='doc-files/HiLiteMe/StyleTagCSSDefinitions.html'>
1297     * Style Tag CSS Definitions</A></B>
1298     */
1299    public static void main(String[] argv)
1300    {
1301        // for (String s : codeTypeDescriptions.keySet()) System.out.println(s);
1302        // System.exit(0);
1303
1304        if (argv.length != 1)
1305            System.out.println("Pass a Style Type as an argument to this main method.");
1306
1307        if (! isStyleType(argv[0]))
1308            System.out.println(
1309                "You have not passed a valid Style Type to this main method.  Please view " +
1310                "the documentation to see the list of valid Style Type's."
1311            );
1312
1313        System.out.println(styleParamCSSClasses(argv[0]));
1314    }
1315
1316    /**
1317     * <EMBED CLASS='external-html' DATA-FILE-ID=HLMSIMPLIFY>
1318     * 
1319     * @param page This should be Vectorized-HTML that was produced by the
1320     * {@code 'prettyPrintScrapeToVector'} method.
1321     *
1322     * @param styleTypeParam This has to be the same value for {@code 'styleTypeParam'} that was
1323     * used to convert the HTML for the {@code 'page'} parameter.  If a different
1324     * {@code 'Style Type'} is accidentally passed to this method, none of the HTML {@code <SPAN>}
1325     * elements will be replaced.
1326     * 
1327     * @return The original page {@code Vector<HTMLNode>}
1328     * 
1329     * @throws IllegalArgumentException If the passed {@code 'styleTypeParam'} is not recognized
1330     * by the internal list of available {@code 'Style Types'}.
1331     */
1332    @SuppressWarnings("unchecked")
1333    public static Vector<HTMLNode> simplifyColorSpans(Vector<HTMLNode> page, String styleTypeParam)
1334    {
1335        if (! isStyleType(styleTypeParam)) throw new IllegalArgumentException(
1336            "The passed 'styleTypeParam' value [" + styleTypeParam + "] is not a registered " +
1337            "Style Type Code known by this class."
1338        );
1339
1340        // Lazy
1341        if (allMaps == null)
1342        {
1343            allMaps = 
1344                (TreeMap<String, TreeMap<String, TagNode>>)
1345                LFEC.readObjectFromFile_JAR
1346                    (HiLiteMe.class, "data-files/HLMSpans.tmdat", true, TreeMap.class);
1347        }
1348
1349        // Retrieves the a TreeMap that maps STYLE-ATTRIBUTE values (retrieved from <SPAN>
1350        // elements) directly to replacement TagNode's that use a CLASS=... instead.
1351
1352        TreeMap<String, TagNode> map = allMaps.get(styleTypeParam);
1353
1354        // This should never happen, but if it does, this error is better than NullPointerException
1355        if (map == null) throw new InternalError(
1356            "The style parameter you have passed IS VALID, but unfortunately, the map file for " +
1357            "that style is not loading properly."
1358        );
1359
1360        // Retrieve all HTML "<SPAN STYLE=...>" elements. Specifically, retrieve all <SPAN>
1361        // that actually contain a STYLE attribute.  Afterwards, retrieve all of the values of
1362        // that style-element, and store that in the 'styles' String-Array.
1363
1364        int[]       spans       = InnerTagFind.all(page, "span", "style");
1365        String[]    styles      = Attributes.retrieve(page, spans, "style");
1366        TagNode     replacement = null;
1367
1368        // Minor optimization that's inside the data-file.  The "STYLE='color: #123456;...'
1369        // eliminates the leading characters "color: #" in the style-attribute.  Here we need
1370        // to do the EXACT-SAME removal, or else the lookup-table will not find that
1371        // style-attribute.
1372
1373        for (int i=0; i < styles.length; i++)
1374            if (styles[i].startsWith("color: #"))
1375                styles[i] = styles[i].substring("color: #".length());
1376
1377        // Replace all of the <SPAN STYLE=...> TagNode's with the simplified TagNode's.
1378
1379        for (int i=0; i < spans.length; i++)
1380            if ((replacement = map.get(styles[i])) != null)
1381                page.setElementAt(replacement, spans[i]);
1382
1383        removeDuplicateColorSpans(page);
1384
1385        return page;
1386    }
1387
1388    private static final Pattern P = Pattern.compile("H(\\d{1,3})");
1389
1390    // The HiLite.Me server has many places where a <SPAN>...</SPAN> of code is redundant, and 
1391    // should be removed.
1392    // It usually looks like the following (PAY CLOSE ATTENTION!)
1393    // 
1394    // <span style="color: #d0d0d0">removeArr</span> <span style="color: #d0d0d0">=</span>
1395    //
1396    // The above <SPAN> is FIRST simplified to (in the 'simplifyColorSpans') to:
1397    //
1398    // <span style=H10>removeArr</span> <span style=H10>=</span>
1399    // 
1400    // AND THEN SIMPLIFIED TO: (by this method)
1401    //
1402    // <SPAN STYLE=10>removeArr =</SPAN>
1403    private static int removeDuplicateColorSpans(Vector<HTMLNode> v)
1404    {
1405        HNLIInclusive       iter        = InnerTagInclusiveIterator.get(v, "span", "class", P);
1406        IntStream.Builder   b           = IntStream.builder();
1407        DotPair             prev        = new DotPair(0, v.size() - 1);  // Non-sense initialization
1408        int                 prevClass   = -1; // Non-sense initialization
1409
1410        while (iter.hasNext())
1411        {
1412            DotPair         cur         = iter.nextDotPair();
1413            TagNode         tn          = (TagNode) v.elementAt(cur.start);
1414            int             curClass    = Integer.parseInt(tn.AV("class").substring(1));
1415            boolean         isClear     = prev.end < cur.start;
1416            HTMLNode        n;
1417
1418            for (int i=(prev.end+1); isClear && (i < (cur.start-1)); i++)
1419                if (((n = v.elementAt(i)) instanceof TagNode) || (n.str.trim().length() > 0))
1420                    isClear = false;
1421
1422            if (isClear && (curClass == prevClass))
1423            { b.accept(prev.end); b.accept(cur.start); }
1424
1425            prev        = cur;
1426            prevClass   = curClass;
1427        }
1428
1429        int[] removeArr = b.build().toArray();
1430        Util.Remove.nodesOPT(v, removeArr);
1431
1432        return removeArr.length;
1433    }
1434}