001package Torello.HTML.Tools.Images;
002
003import Torello.HTML.*;
004import Torello.Java.*;
005
006import Torello.HTML.NodeSearch.TagNodeFind;
007import Torello.Java.Additional.Ret2;
008import Torello.Java.Additional.Counter;
009import Torello.Java.Shell.C;
010
011import java.net.*;
012import java.io.*;
013import java.util.*;
014import java.util.regex.*;
015import java.util.function.*;
016import javax.imageio.*;
017import java.util.concurrent.*;
018import java.util.concurrent.locks.*;
019
020import java.awt.image.BufferedImage;
021
022/**
023 * A more advanced class for both downloading and saving a list of images, using URL's.
024 * 
025 * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID="ISR">
026 */
027public class ImageScraper
028{
029    /**
030     * This is the default maximum wait time for an image to download ({@value}).  This value may
031     * be reset or modified by instantiating a {@code ImageScraper.AdditionalParameters} class, and
032     * passing the desired values to the constructor.  This value is measured in units of
033     * {@code public static final java.util.concurrent.TimeUnit MAX_WAIT_TIME_UNIT}
034     *
035     * @see #MAX_WAIT_TIME_UNIT
036     */
037    public static final long        MAX_WAIT_TIME       = 10;
038
039    /**
040     * This is the default measuring unit for the {@code static final long MAX_WAIT_TIME} member.
041     * This value may be reset or modified by instantiating a 
042     * {@code ImageScraper.AdditionalParameters} class, and passing the desired values to the
043     * constructor.
044     *
045     * @see #MAX_WAIT_TIME
046     */
047    public static final TimeUnit MAX_WAIT_TIME_UNIT  = TimeUnit.SECONDS;
048
049    /** <EMBED CLASS="external-html" DATA-FILE-ID="ISUA"> */
050    public static String USER_AGENT = "Chrome/61.0.3163.100";
051
052    private final   Iterable<String>            source;
053    private final   URL                         originalPageURL;
054
055    private final   String                      targetDirectory;
056    private final   TargetDirectoryRetriever    targetDirectoryRetriever;
057    private final   ImageReceiver               imageReceiver;
058
059    /**
060     * A functional-interface that allows a user to specify where to save an image-file that
061     * provides a list of details about the image before it is actually written to disk.
062     * 
063     * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID="TDR">
064     */
065    @FunctionalInterface
066    public static interface TargetDirectoryRetriever extends java.io.Serializable
067    {
068        /** <EMBED CLASS="external-html" DATA-FILE-ID="SVUIDFI">  */
069        public static final long serialVersionUID = 1;
070
071        /**
072         * The {@code dir(...)} method within this interface will be called each time that an image
073         * successfully downloads from the internet.  It's purpose is to allow the programmer to
074         * supply a target directory for where to store this downloaded image.  Implement the
075         * lone-method from this interface, and images will be saved to individual save-directories
076         * on an image-by-image basis.  If a {@code interface TargetDirectorieRetriever} is not
077         * provided, then all images will be saved to a single target-directory (or the
078         * {@code interface 'ImageReceiver"} must be implemented).
079         *
080         * @param url This is the {@code URL} that was used to connect to the internet, and
081         * download the image in question.
082         *
083         * @param fileName This parameter will receive the computed filename of the image.
084         *
085         * @param imageFormat This identifies whether the image-in-question is a {@code GIF, JPG, 
086         * PNG} etc...  Remember the image might not be saved by the same name which was used in 
087         * the HTML on the website from which this was downloaded.
088         *
089         * @param iteratorCount <EMBED CLASS="external-html" DATA-FILE-ID="ISIC">
090         *
091         * @param successCount <EMBED CLASS="external-html" DATA-FILE-ID="ISSC">
092         *
093         * @return It is up to the user implement this method such that it returns a {@code String}
094         * that identifies an appropriate directory in the local filesystem where the image may be
095         * saved.
096         */
097        public String dir
098            (URL url, String fileName, IF imageFormat, int iteratorCount, int successCount);
099    }
100
101    /**
102     * A functional-interface that allows a user to save an image-file to a location of his
103     * or her choosing.
104     * 
105     * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID="IMGR">
106     */
107    @FunctionalInterface
108    public static interface ImageReceiver extends java.io.Serializable
109    {
110        /** <EMBED CLASS="external-html" DATA-FILE-ID="SVUIDFI">  */
111        public static final long serialVersionUID = 1;
112
113        /**
114         * Implement this class if saving image files to a target-directory on the file-system is
115         * not acceptable, and the programmer wishes to do something else with the downloaded
116         * images.  The lone-method in this interface (the "save" method) will be invoked each
117         * time and image is downloaded.
118         *
119         * @param url This is the {@code URL} that was used to connect to the internet, and
120         * download the image in question.
121         *
122         * @param fileName This parameter will receive the computed filename of the image.
123         *
124         * @param imageFormat This identifies whether the image-in-question is a {@code GIF, JPG,
125         * PNG, etc...}  Remember the image might not be saved by the same name which was used in
126         * the HTML on the website from which this was downloaded.
127         *
128         * @param iteratorCount <EMBED CLASS="external-html" DATA-FILE-ID="ISIC">
129         *
130         * @param successCount <EMBED CLASS="external-html" DATA-FILE-ID="ISSC">
131         *
132         * @param image This is the newly downloaded image.
133         */
134        public void save(
135            URL url, String fileName, IF imageFormat, int iteratorCount,
136            int successCount, BufferedImage image
137        );
138    }
139
140    /**
141     * A functional-interface allowing a user to specify a file-name for an image, which provides
142     * some details about the image before it is saved to disk.
143     * 
144     * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID="FNR">
145     */
146    @FunctionalInterface
147    public static interface FileNameRetriever extends java.io.Serializable
148    {
149        /** <EMBED CLASS="external-html" DATA-FILE-ID="SVUIDFI">  */
150        public static final long serialVersionUID = 1;
151
152        /**
153         * The intention of implementing this method is to provide the code with an 'adjusted'
154         * file name for saving images downloaded from the internet.
155         *
156         * @param url This is the {@code URL} that was used to connect to the internet, and
157         * download the image in question.
158         *
159         * @param imageFormat This identifies whether the image-in-question is a {@code GIF, JPG, 
160         * PNG,} etc...  Remember the image might not be saved by the same name which was used in
161         * the HTML on the website from which this was downloaded.
162         *
163         * @param iteratorCount <EMBED CLASS="external-html" DATA-FILE-ID="ISIC">
164         * @param successCount <EMBED CLASS="external-html" DATA-FILE-ID="ISSC">
165         *
166         * @return utilizing the information provided in the method-signature, the programmer is
167         * expected to provide a file-name for saving the image that was provided.
168         */
169        public String fileName(URL url, IF imageFormat, int iteratorCount, int successCount);
170    }
171
172    // *************************************************************************************
173    // source is Iterable<URL>
174    // *************************************************************************************
175
176    private static final Iterable<String> URLVecToStringVec(Iterable<URL> source)
177    {
178        Vector<String> ret = new Vector<>();
179        source.forEach((URL url) -> ret.add(url.toString()));
180        return ret;
181    }
182
183    /**
184     * Convenience Constructor.
185     * <BR />Invokes: {@link #ImageScraper(URL, Iterable, String)}
186     * <BR />Converts: {@code Iterable<URL>} to {@code Iterable<String>}
187     */
188    public ImageScraper(Iterable<URL> source, String targetDirectory)
189    { this(null, URLVecToStringVec(source), targetDirectory); }
190
191    /**
192     * Convenience Constructor.
193     * <BR />Invokes: {@link #ImageScraper(URL, Iterable, TargetDirectoryRetriever)}
194     * <BR />Converts: {@code Iterable<URL>} to {@code Iterable<String>}
195     */
196    public ImageScraper(Iterable<URL> source, TargetDirectoryRetriever targetDirectoryRetriever)
197    { this(null, URLVecToStringVec(source), targetDirectoryRetriever); }
198
199    /**
200     * Convenience Constructor.
201     * <BR />Invokes: {@link #ImageScraper(URL, Iterable, ImageReceiver)}
202     * <BR />Converts: {@code Iterable<URL>} to {@code Iterable<String>}
203     */
204    public ImageScraper(Iterable<URL> source, ImageReceiver imageReceiver)
205    { this(null, URLVecToStringVec(source), imageReceiver); }
206
207    // *************************************************************************************
208    // source is Iterable<TagNode>, URL
209    // *************************************************************************************
210
211    private static final Iterable<String> TagNodeVecToStringVec(Iterable<TagNode> source)
212    {
213        Vector<String> ret = new Vector<>();
214        source.forEach((TagNode tn) -> ret.add(tn.AV("src")));
215        return ret;
216    }
217
218    /**
219     * Convenience Constructor.
220     * <BR />Invokes: {@link #ImageScraper(URL, Iterable, String)}
221     * <BR />Converts: {@code Iterable<TagNode>} to {@code String[]} using
222     * {@link TagNode#AV(String)}
223     * 
224     * @param source This may be any java {@code Iterable<TagNode>}.  The {@code TagNode's} are
225     * expected to contain HTML {@code <IMG SRC="...">} tags.
226     */
227    public ImageScraper(Iterable<TagNode> source, URL originalPageURL, String targetDirectory)
228    { this(originalPageURL, TagNodeVecToStringVec(source), targetDirectory); }
229
230    /**
231     * Convenience Constructor.
232     * <BR />Invokes: {@link #ImageScraper(URL, Iterable, TargetDirectoryRetriever)}
233     * <BR />Converts: {@code Iterable<TagNode>} to {@code @String[]} using
234     * {@link TagNode#AV(String)}
235     * 
236     * @param source This may be any java {@code Iterable<TagNode>}.  The {@code TagNode's} are
237     * expected to contain HTML {@code <IMG SRC="...">} tags.
238     */
239    public ImageScraper(
240            Iterable<TagNode> source, URL originalPageURL,
241            TargetDirectoryRetriever targetDirectoryRetriever
242        )
243    { this(originalPageURL, TagNodeVecToStringVec(source), targetDirectoryRetriever); }
244
245    /**
246     * Convenience Constructor.
247     * <BR />Invokes: {@link #ImageScraper(URL, Iterable, ImageReceiver)}
248     * <BR />Converts: {@code Iterable<TagNode>} to {code @String[]} using
249     * {@link TagNode#AV(String)}
250     * 
251     * @param source This may be any java {@code Iterable<TagNode>}.  The {@code TagNode's} are
252     * expected to contain HTML {@code <IMG SRC="...">} tags.
253     */
254    public ImageScraper(Iterable<TagNode> source, URL originalPageURL, ImageReceiver imageReceiver)
255    { this(originalPageURL, TagNodeVecToStringVec(source), imageReceiver); }
256
257    // *************************************************************************************
258    // source is Iterable<String>, URL
259    // *************************************************************************************
260
261    /**
262     * Constructor that allows a user to provide a set of {@code URL's} as {@code String's} to the
263     * download mechanism.
264     *
265     * @param source This is a {@code Vector<String>} of Image {@code URL's} saved as a
266     * {@code String}.
267     *
268     * @param originalPageURL <EMBED CLASS="external-html" DATA-FILE-ID="ISOPURL">
269     *
270     * @param targetDirectory When this constructor is used, this {@code String} parameter
271     * identifies the directory to where files must be saved.
272     *
273     * @throws NullPointerException If any of the elements of the input {@code Iterable<String>}
274     * are null elements, then this Exception shall be thrown.
275     *
276     * @throws WritableDirectoryException This constructor shall check that parameter
277     * {@code 'targetDirectory'} exists on the file-system, and is writable.  A small, temporary,
278     * file shall be written to check this.
279     */
280    public ImageScraper(URL originalPageURL, Iterable<String> source, String targetDirectory)
281    {
282        this.source                     = source;
283        this.originalPageURL            = originalPageURL;
284
285        // Ensures that the target directory exists, and is writable
286        WritableDirectoryException.check(targetDirectory);
287
288        // Makes sure that the directory ends with a slash / file-separator.
289        if (! targetDirectory.endsWith(File.separator)) if (targetDirectory.length() > 0)
290            targetDirectory = targetDirectory + File.separator;
291
292        this.targetDirectory            = targetDirectory;
293        this.targetDirectoryRetriever   = null;
294        this.imageReceiver              = null;
295
296        if (source == null)
297            throw new NullPointerException("parameter source is null");
298
299        if (targetDirectory == null)
300            throw new NullPointerException("parameter targetDirectory is null");
301    }
302
303    /**
304     * Constructor that allows a user to provide a set of {@code URL's} as {@code String's} to
305     * the download mechanism.
306     *
307     * @param source This is a {@code Vector<String>} of Image {@code URL's} saved as a
308     * {@code String}.
309     *
310     * @param originalPageURL <EMBED CLASS="external-html" DATA-FILE-ID="ISOPURL">
311     *
312     * @param targetDirectoryRetriever This parameter must implement the static-inner
313     * {@code class TargetDirectoryRetriever}.  This parameter allows the programmer to make a 
314     * decision where image-files are stored after they are downloaded one a file-by-file basis.
315     *
316     * @throws NullPointerException If any of the elements of the input {@code Iterable<String>}
317     * are null elements, then this Exception shall be thrown.
318     */
319    public ImageScraper(
320        URL originalPageURL, Iterable<String> source,
321        TargetDirectoryRetriever targetDirectoryRetriever
322    )
323    {
324        this.source                     = source;
325        this.originalPageURL            = originalPageURL;
326        this.targetDirectory            = null;
327        this.targetDirectoryRetriever   = targetDirectoryRetriever;
328        this.imageReceiver              = null;
329
330        if (source == null)
331            throw new NullPointerException("parameter source is null");
332
333        if (targetDirectoryRetriever == null)
334            throw new NullPointerException("targetDirectoryRetriever is null");
335    }
336
337    /**
338     * Constructor that allows a user to provide a set of {@code URL's} as {@code String's} to the
339     * download mechanism.
340     *
341     * @param source This is a {@code Vector<String>} of Image {@code URL's} saved as a
342     * {@code String}.
343     *
344     * @param originalPageURL <EMBED CLASS="external-html" DATA-FILE-ID="ISOPURL">
345     *
346     * @param imageReceiver This parameter allows the programmer to circumvent the "save-to-file"
347     * portion of the code, and instead send the downloaded image to this interface.
348     *
349     * @throws NullPointerException If any of the elements of the input {@code Iterable<String>} 
350     * are null elements, then this exception shall be thrown.
351     */
352    public ImageScraper(URL originalPageURL, Iterable<String> source, ImageReceiver imageReceiver)
353    {
354        this.source                     = source;
355        this.originalPageURL            = originalPageURL;
356
357        this.targetDirectory            = null;
358        this.targetDirectoryRetriever   = null;
359        this.imageReceiver              = imageReceiver;
360
361        if (source == null)
362            throw new NullPointerException("parameter source is null");
363
364        if (imageReceiver == null)
365            throw new NullPointerException("imageReceiver is null");
366    }
367
368    // *************************************************************************************
369    // *************************************************************************************
370    // More available download configuration parameters
371    // *************************************************************************************
372    // *************************************************************************************
373
374    /**
375     * A class that allows a user to further configure how images are downloaded.
376     * 
377     * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID="ADPA">
378     */
379    public static class AdditionalParameters
380    {
381        /** <EMBED CLASS="external-html" DATA-FILE-ID="SVUID"> */
382        public static final long serialVersionUID = 1;
383    
384        /**
385         * When this field is <B>TRUE</B>, if an attempt to download an image generates an
386         * exception, the exception-throw <I>will not halt the download</I>, but rather the image
387         * will be skipped, and download attempt will be performed on the next image in the list.
388         * The exception will be stored in the {@code 'Results'} return object.
389         */
390        public final boolean skipOnIOException;
391
392        /**
393         * When this field is null, it is ignored; but if this field is not null, then before any
394         * {@code URL} is connected for download, the downloaded mechanism will ask this
395         * {@code URL-Predicate} for permission first.  If this {@code Predicate} returns
396         * <B>FALSE</B> for a particular <B>URL,</B> that image will not be downloaded, and
397         * instead, skipped.
398         */
399        public final Predicate<URL> skipURL;
400
401        /**
402         * When this field is null, it is ignored; but if not null, this {@code String} will be
403         * <I>prepended</I> to each file-name that is saved or stored to the file-system.
404         */
405        public final String fileNamePrefix;
406
407        /**
408         * When true, images will be saved according to a counter; when this is <B>FALSE</B>, the
409         * software will attempt to save these images using their original filenames - picked from
410         * the <B>URL</B>.  Saving using a counter is the default behaviour for this class.
411         */
412        public final boolean useDefaultCounterForImageFileNames;
413
414        /**
415         * When this field is null, it is ignored; but if not null, each time an image is written
416         * to the file-system, this <CODE>java&#46;util&#46;function&#46;Function&lt;URL,
417         * String&gt;</CODE> will be queried for a file-name before writing the the image-file to
418         * the file-system.  If this field is non-null, but images are being sent to
419         * {@code Consumer<BufferedImage, IF> downloadedImageAltTarget}, rather than being saved
420         * to the file-system, then this field is <I>also ignored</I>.
421         */
422        public final FileNameRetriever getImageFileSaveName;
423
424        /**
425         * This scraper has the ability to decode and save {@code Base-64} Images, and they may be
426         * downloaded or skipped - <I>based on this {@code boolean}</I>.  If an
427         * {@code Iterable<TagNode>} is passed to the constructor, and one of those
428         * {@code TagNode's} contain an Image Element
429         * ({@code <IMG SRC="data:image/jpeg;base64,...data">}) this class has the ability to
430         * interpret and save the image to a regular image file.  By default, {@code Base-64}
431         * images are skipped, but they can also be downloaded as well.
432         */
433        public final boolean skipBase64EncodedImages;
434
435        /**
436         * If you do not want the downloader to hang on an image, which is sometimes an issue
437         * depending upon the site from which you are making a request, set this parameter, and the
438         * downloader will not wait past that amount of time to download an image.  The default
439         * value for this parameter is {@code 10 seconds}.  If you do not wish to set the
440         * max-wait-time "the download time-out" counter, then leave the parameter
441         * {@code "waitTimeUnits"} set to {@code null}, and this parameter will be ignored.
442         */
443        public final long maxDownloadWaitTime;
444
445        /**
446         * This is the "unit of measurement" for the field {@code long maxDownloadWaitTime}.
447         * <BR /><BR /><B>NOTE:</B> <I>This parameter may be {@code null}, and if it is
448         * <SPAN STYLE="color: red;"> both <B>this</B> parameter and the parameter <B>{@code long
449         * maxDownloadWaitTime}</B> will be ignored</SPAN></I>, and the default maximum-wait-time
450         * (download time-out settings) will be used instead.
451         *
452         * <BR /><BR /><B>READ:</B> java.util.concurrent.*; package, and about the {@code class
453         * java.util.concurrent.TimeUnit} for more information.
454         */
455        public final TimeUnit waitTimeUnits;
456
457        /**
458         * Use this constructor to instantiate this class.  Read what each of these parameters
459         * means to the downloader, by reading the comment information for each of these fields
460         * in this class (above).
461         *
462         * @param skipOnIOException This will "skip" an image, and prevent the downloading process from
463         * halting if an image fails to download
464         *
465         * @param skipURL A java {@code Predicate} for deciding which images should be skipped.
466         * This parameter may be 'null.'  If it is, it will be ignored, and the downloader will
467         * attempt to download all images.
468         *
469         * @param fileNamePrefix A standard Java-{@code String} may be inserted before the
470         * file-name of each image downloaded, as a 'file-name prefix'.  This parameter may be
471         * null, and if it is file-name prefixes will not be used.
472         *
473         * @param useDefaultCounterForImageFileNames It is usually a good idea to replace the
474         * file-name for an image retrieved from a web-site with a simple, three-digit,
475         * counter-name.  Image file names on a web-site can often be long {@code PKID Strings}
476         * obtained from {@code SQL} database queries. To use a standard "counter" set this
477         * parameter to <B>TRUE</B>.
478         *
479         * @param getImageFileSaveName This parameter may be used to convert image file-names used
480         * on a web-page to user-generated image-file-names.  This parameter may be null, and if it
481         * is - it will be ignored.  If this parameter is non-null, it takes precedence over the
482         * {@code boolean} passed to parameter {@code 'useDefaultCounterForImageFileNames'}
483         *
484         * @param skipBase64EncodedImages This will order the downloader to convert and save HTML
485         * Image Elements whose image-data was encoded into HTML Element, itself, using
486         * {@code Base-64} Image-Encoding.  Thumbnails and other small images are sometimes stored
487         * on web-pages using such encoding.
488         *
489         * @param maxDownloadWaitTime This parameter will be ignored unless a non-null value has
490         * been passed to parameter {@code 'waitTimeUnits'}.  This may be used to prevent the
491         * downloader from hanging when collecting images for a web-page.
492         *
493         * @param waitTimeUnits This is java {@code class TimeUnit} parameter for describing what
494         * units are being used for the previous parameter, {@code 'maxDownloadWaitTime'}.
495         */
496        public AdditionalParameters(
497            boolean                 skipOnIOException,
498            Predicate<URL>          skipURL,
499            String                  fileNamePrefix,
500            boolean                 useDefaultCounterForImageFileNames,
501            FileNameRetriever       getImageFileSaveName,
502            boolean                 skipBase64EncodedImages,
503            long                    maxDownloadWaitTime,
504            TimeUnit                waitTimeUnits
505        )
506        {
507            this.skipOnIOException                      = skipOnIOException;
508            this.skipURL                                = skipURL;
509            this.fileNamePrefix                         = fileNamePrefix;
510            this.useDefaultCounterForImageFileNames     = useDefaultCounterForImageFileNames;
511            this.getImageFileSaveName                   = getImageFileSaveName;
512            this.skipBase64EncodedImages                = skipBase64EncodedImages;
513            this.maxDownloadWaitTime                    = maxDownloadWaitTime;
514            this.waitTimeUnits                          = waitTimeUnits;
515
516            if (maxDownloadWaitTime < 0) throw new IllegalArgumentException(
517                "You have passed a negative number for parameter maxDownloadWaitTime, and this is " +
518                "not allowed here."
519            );
520        }
521
522        /**
523         * This constructor will return an instance of {@code AdditionalParameters} whose values
524         * provide the following <B>MOST COMMON</B> behaviour choices:
525         *
526         * <BR /><TABLE CLASS="BRIEFTABLE">
527         * <TR><TH>Parameter</TH><TH>Value</TH></TR>
528         * <TR><TD>{@code skipOnIOException}</TD><TD>{@code TRUE}</TD></TR>
529         * <TR><TD>{@code useDefaultCounterForImageFileNames}</TD><TD>{@code TRUE}</TD></TR>
530         * <TR><TD>{@code skipBase64EncodedImages}</TD><TD>{@code FALSE}</TD></TR>
531         * <TR><TD COLSPAN="2"><I>All other parameters set to 'null', and will be ignored.</I>
532         * </TD></TR>
533         * </TABLE>
534         */
535        public AdditionalParameters()
536        { this(true, null, null, true, null, false, 0, null); }
537    }
538
539    // *************************************************************************************
540    // *************************************************************************************
541    // Results inner class
542    // *************************************************************************************
543    // *************************************************************************************
544
545    /**
546     * After downloading, class <CODE>ImageScraper</CODE> returns an instance of this class.
547     * 
548     * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID="ISRES">
549     */
550    public static class Results
551    {
552        /**
553         * The java serializable tools can be very beneficial for saving the state of a program you
554         * are testing.  Though it is unlikely a programmer would want to transmit this
555         * 'results-report' class around (or at least I cannot think of much use), saving the state
556         * of web-page scrape and all the testing routines that have been used is something that
557         * can be really helpful.  <I><SPAN STYLE="color: red;">This is why most of the classes
558         * that can be created / instantiated - a.k.a. non-static classes - implement the
559         * Serializable interface</SPAN></I>.  It's a great debugging tool.
560         */
561        public static final long serialVersionUID = 1;
562
563        /**
564         * This will contain a complete list of the {@code URL's} that were retrieved (or generated-
565         * <I>if partially-resolved 'relative' {@code URL's} occurred</I>).  Every image downloaded
566         * (or attempted for download) will have its {@code URL} saved here.
567         *
568         * <BR /><BR />The index of this array will be parallel to the input-source {@code URL}
569         * retrieval order.
570         */
571        public final URL[] urls;
572
573        /**
574         * If the "skip" {@code Predicate} declares that a particular image-download should not be
575         * attempted, <I>FALSE</I> will be stored in this array.
576         *
577         * <BR /><BR />The index of this array will be parallel to the input-source {@code URL}
578         * retrieval order.
579         */
580        public final boolean[] skipped;
581
582        /**
583         * The names of the files that were retrieved and/or stored will be in this array.
584         * If this image were skipped or an exception occurred, the array position for that
585         * {@code URL} would contain 'null'.
586         *
587         * <BR /><BR />The index of this array will be parallel to the input-source {@code URL}
588         * retrieval order.
589         */
590        public final String[] fileNames;
591
592        /**
593         * The location of the file-name saved directory, if an image did not successfully save to
594         * the file system, or if an {@code ImageReceiver} were used, then the array-location would
595         * contain {@code 'null'}.
596         *
597         * <BR /><BR />The index of this array will be parallel to the input-source {@code URL}
598         * retrieval order.
599         */
600        public final String[] saveDirectories;
601
602        /**
603         * The image type of the files that were retrieved will be stored in this array.
604         *
605         * <BR /><BR />The index of this array will be parallel to the input-source {@code URL}
606         * retrieval order.
607         */
608        public final IF[] imageFormats;
609
610        /**
611         * If an image download fails, this will contain a record of the exception.
612         *
613         * <BR /><BR />The index of this array will be parallel to the input-source {@code URL}
614         * retrieval order.
615         *
616         * <BR /><BR />If the download succeeded, then the associated array location would contain
617         * 'null.'
618         */
619        public final Exception[] exceptions;
620
621        /**
622         * This will contain a list of long-integers, each of which will have the file-size of the
623         * downloaded image.  If the programmer has elected for the {@code 'ImageReceiver'} option
624         * - <I>rather than direct download of the images to the underlying file-system</I> (save to
625         * lambda, instead of save-as-file) - then the "fileSize" will be a calculated file-size,
626         * and not reflect the actual size of any file on the file-system.  Obviously, this is
627         * because no file was created!
628         *
629         * <BR /><BR />The index of this array will be parallel to the input-source {@code URL}
630         * retrieval order.
631         */
632        public final long[] sizes;
633
634        /**
635         * This will contain a list of integers, each of which shall have the image-widths of the 
636         * downloaded images.
637         *
638         * <BR /><BR />The index of this array will be parallel to the input-source {@code URL}
639         * retrieval order.
640         */
641        public final int[] widths;
642
643        /**
644         * This shall contain a list of integers, each of which shall have the image-heights of 
645         * the downloaded images.
646         *
647         * <BR /><BR />The index of this array will be parallel to the input-source {@code URL}
648         * retrieval order.
649         */
650        public final int[] heights;
651
652        /** next result received array position. */
653        int pos = 0;
654
655        /** number of successfully saved images. */
656        int successCounter = 0;
657
658        /** When images are downloaded, log information may be sent here */
659        Appendable log = null;
660
661        Results(int size, Appendable log)
662        {
663            this.log = log;
664
665            urls                = new URL[size];
666            skipped             = new boolean[size];
667            fileNames           = new String[size];
668            saveDirectories     = new String[size];
669            imageFormats        = new IF[size];
670            exceptions          = new Exception[size];
671            sizes               = new long[size];
672            widths              = new int[size];
673            heights             = new int[size];
674
675            for (int i=0; i < size; i++)
676            {
677                urls[i]             = null;
678                skipped[i]          = false;
679                fileNames[i]        = null;
680                saveDirectories[i]  = null;
681                imageFormats[i]     = null;
682                exceptions[i]       = null;
683                sizes[i]            = -1;
684                widths[i]           = -1;
685                heights[i]          = -1;
686            }
687        }
688
689        void nullURL() throws IOException // The Appendable throws this
690        {
691            if (log != null) log.append
692                ("\t\t" + C.RED + "No URL was passed, or URL not found." + C.RESET + '\n');
693
694            skipped[pos]            = true;
695            pos++;
696        }
697
698        void urlException(String src, Exception e) throws IOException // The Appendable throws this
699        {
700            if (log != null) log.append(
701                "\t\t" + C.RED + "Failed Instantiate URL, src = " + src + ", " +
702                e.getClass().getName() + ": " + e.getMessage() + C.RESET + '\n'
703            );
704
705            skipped[pos]            = true;
706            exceptions[pos]         = e;
707            pos++;
708        }
709    
710        void skippedURL(URL url) throws IOException // The Appendable throws this
711        {
712            if (log != null) log.append("\t\t" + C.YELLOW + "*** SKIPPING" + C.RESET + '\n');
713
714            urls[pos]               = url;
715            skipped[pos]            = true;
716            pos++;
717        }
718
719        void downloadException(URL url, Exception e) throws IOException // The Appendable throws this
720        {
721            String msg = (e.getMessage() != null) 
722                ? e.getMessage()
723                : "no exception-message provided, [e.getMessage()==null]";
724
725            if (log != null) log.append(
726                "\t\t" + C.RED + "DOWNLOAD-EXCEPTION:\t" + e.getClass().getName() + ": " + msg + C.RESET + '\n' +
727                "\t\t" + "While Downloading URL:\t" + url.toString() + '\n'
728            );
729
730            urls[pos]               = url;
731            skipped[pos]            = true;
732            exceptions[pos]         = e;
733            pos++;
734        }
735
736        void imageReceiverSuccess
737            (URL url, String fileName, IF ext, long size, int width, int height)
738            throws IOException // The Appendable throws this
739        {
740            if (log != null) log.append(
741                "\t\t" + C.YELLOW + "Successfully sent [" + fileName + '.' + ext.extension + "] to class ImageReceiver" + C.RESET + '\n'
742            );
743
744            urls[pos]               = url;
745            fileNames[pos]          = fileName + '.' + ext.extension;
746            imageFormats[pos]       = ext;
747            sizes[pos]              = size;
748            widths[pos]             = width;
749            heights[pos]            = height;
750            pos++;
751            successCounter++;
752        }
753
754        void saveSuccess(
755                URL url, String targetDirectory, String fileName, IF ext, long size,
756                int width, int height
757            )
758            throws IOException // The Appendable throws this
759        {
760            if (log != null) log.append(
761                "\t\t" + C.YELLOW + "File Saved:\t" + targetDirectory + fileName + "." +
762                ext.extension + C.RESET + '\n'
763            );
764
765            urls[pos]               = url;
766            saveDirectories[pos]    = targetDirectory;
767            fileNames[pos]          = fileName + '.' + ext.extension;
768            imageFormats[pos]       = ext;
769            sizes[pos]              = size;
770            widths[pos]             = width;
771            heights[pos]            = height;
772            pos++;
773            successCounter++;
774        }
775
776        void saveFail(URL url, String targetDirectory, String fileName, IF ext, Exception e)
777             throws IOException // The Appendable throws this
778        {
779            if (log != null) log.append(
780                "\t\t" + C.RED + "***FILE-SAVE-EXCEPTION:\t" + targetDirectory + fileName + "." +
781                ext.extension +
782                "\t\t" + e.getClass().getName() + ": " + e.getMessage() + C.RESET + '\n'
783            );
784
785            urls[pos]               = url;
786            skipped[pos]            = true;
787            saveDirectories[pos]    = targetDirectory;
788            fileNames[pos]          = fileName + '.' + ext.extension;
789            imageFormats[pos]       = ext;
790            exceptions[pos]         = e;
791            pos++;
792        }
793
794        void skipB64(String imgFormatStr, String encodedPartialStr)
795            throws IOException // The Appendable throws this
796        {
797            if (log != null) log.append(
798                "\t\t" + C.YELLOW + "Skipping B64 Encoded String: " + imgFormatStr + ", " + encodedPartialStr + C.RESET + '\n'
799            );
800
801            skipped[pos]            = true;
802            pos++;
803        }
804
805        void b64ConvertException(Exception e)
806            throws IOException // The Appendable throws this
807        {
808            if (log != null) log.append
809                ("\t\t" + C.RED + "Error Converting and Decoding Base-64 Image" + C.RESET + '\n');
810
811            skipped[pos]            = true;
812            exceptions[pos]         = e;
813            pos++;
814        }
815    }
816
817    /**
818        ******************************************************
819        class ImageScraper
820        ******************************************************
821        Iterable<String>                    source;
822        URL                                 originalPageURL;
823
824        String                              targetDirectory;
825        TargetDirectoryRetriever            targetDirectoryRetriever;
826        ImageReceiver                       imageReceiver;
827
828        ******************************************************
829        class ImageScraper.AdditionalParameters
830        ******************************************************
831        boolean                             skipOnIOException;
832        Predicate<URL>                      skipURL;
833        String                              fileNamePrefix;
834        boolean                             useDefaultCounterForImageFileNames;
835        FileNameRetriever                   getImageFileSaveName;
836        long                                maxDownloadWaitTime
837        TimeUnit                            waitTimeUnits
838    */
839
840    // *************************************************************************************
841    // *************************************************************************************
842    // download methods
843    // *************************************************************************************
844    // *************************************************************************************
845
846    /**
847     * Convenience Method.
848     * <BR />Invokes: {@link #download(AdditionalParameters, Appendable)}
849     * <!-- NOTE: JavaDoc Upgrader REMOVES EXCEPTION THROWS... DO NOT MOVE THIS METHOD -->
850     */
851    public Results download()
852        throws IOException, MalformedURLException, URISyntaxException
853    { return download(null, null); }
854
855    /**
856     * This will iterate through the {@code URL's} and download them.  Note: Both the
857     * {@code AdditionalParameters} and {@code 'log'} parameters may be null, and if they are, they
858     * will be ignored.
859     *
860     * @param a This parameter takes customization requests for batch image downloads.  This 
861     * parameter can be passed 'null' and when it is, customizations shall be ignored.
862     * 
863     * <BR /><BR /><B>SKIP ON EXCEPTION:</B> The most useful feature of the {@code class 
864     * AdditionalParameters} is to facilitate a download where invalid or out-dated {@code URL's}
865     * do not cause the download mechanism to break - which normally would require running an
866     * image-download from the beginning.  There is a simple {@code AdditionalParameters}
867     * constructor that quickly builds an instance of that class to have {@code boolean
868     * skipOnIOException} initialized to <B>TRUE</B>.
869     * 
870     * @param log This shall receive text / log information.  If this parameter receives 'null',
871     * it will be ignored.
872     *
873     * <EMBED CLASS="external-html" DATA-FILE-ID="APPENDABLE">
874     *
875     * @return an instance of {@code class Results} for the download.  The {@code class
876     * ImageScraper.Results} contains several parallel arrays with information about images that
877     * have downloaded.  If an image-download happens to fail due to an improperly formed {@code
878     * URL} (or an 'incorrect' {@code URL}), then the information in the {@code Results} arrays 
879     * will contain a 'null' value for the index at those array-positions corresponding to the
880     * failed image.
881     *
882     * @throws IOException This might throw if there is an {@code IOException} when downloading an
883     * image, or attempting to save an image to the file-system.  If the
884     * {@code AdditionalParameters 'a'} parameter is set to suppress-exceptions (and continue to the
885     * next Image {@code URL}, via the {@code boolean skipIOExceptions}), then this exception will
886     * never throw.
887     *
888     * @throws MalformedURLException This will throw if there are problems de-referencing the
889     * {@code URL's}.  If the {@code AdditionalParameters 'a'} parameter is set to 
890     * suppress-exceptions (and continue to the next Image {@code URL}, via the {@code boolean
891     * skipIOExceptions}), then this exception will never throw.
892     *
893     * @throws URISyntaxException Same as {@code MalformedURLException.}  Will not throw if 
894     * exceptions are ignored.
895     */
896    public Results download(AdditionalParameters a, Appendable log)
897        throws IOException, MalformedURLException, URISyntaxException
898    {
899        // Compute the size of the input, will make array-building much faster
900        Counter counter = new Counter();
901        source.forEach(url -> counter.addOne());
902
903        Results     results = new Results(counter.size(), log);
904 
905        for (String src : source) 
906            if (src == null)
907            {
908                results.nullURL();
909                if ((a != null) && a.skipOnIOException) continue;
910                else throw new NullPointerException("One of the SRC URL's was null.");
911            }
912            else
913            {
914                Matcher m = IF.B64_INIT_STRING.matcher(src);
915                if (m.find())   CONVERT_B64(m.group(1), m.group(2), results, a);
916                else            DOWNLOAD(COMPUTE_URL(src, results, a), results, a);
917            }
918
919        return results;
920    }
921
922    // *************************************************************************************
923    // *************************************************************************************
924    // Internal COMPUTE-URL / FILENAME Methods
925    // *************************************************************************************
926    // *************************************************************************************
927
928    private void CONVERT_B64(
929            String imageFormatStr, String b64EncodedImage, Results results,
930            AdditionalParameters a
931        )
932        throws IIOException, IOException
933    {
934        if (results.log != null) 
935            results.log.append(
936                "\tBASE-64 IMAGE:\t" + imageFormatStr + ',' + b64EncodedImage.substring(0, 40) + '\n'
937            );
938
939        if ((a == null) || ((a != null) && a.skipBase64EncodedImages))
940        {
941            results.skipB64(imageFormatStr, b64EncodedImage.substring(0, 15));
942            return;
943        }
944
945        IF              ext;
946        BufferedImage   image;
947        try {
948            ext     = IF.get(imageFormatStr);
949            image   = IF.decodeBase64ToImage(b64EncodedImage, ext);
950            //image   = IF.decodeBase64ToImage_V2(b64EncodedImage, ext);
951        } catch (Exception e)
952        {
953            results.b64ConvertException(e);
954            if ((a != null) && a.skipOnIOException) return;
955            throw e;
956        }
957        String fileName = FILENAME(null, ext, results, a);
958        HANDLE_DOWNLOADED_IMAGE(null, fileName, ext, results, a, image);
959    }
960
961    private URL COMPUTE_URL(String src, Results results, AdditionalParameters a)
962        throws IOException, URISyntaxException
963    {
964        if (results.log != null)
965            results.log.append("\tChecking / Converting SRC-URL string:\t" + src + '\n');
966
967        if (StrCmpr.startsWithXOR_CI(src.trim(), "http://", "https://"))
968
969            try
970                { return new URL(URLs.toProperURLV7(src)); }
971
972            catch(MalformedURLException e)
973            {
974                results.urlException(src, e);
975                if ((a != null) && a.skipOnIOException) return null;
976                else throw e;
977            }
978            catch(URISyntaxException e)
979            {
980                results.urlException(src, e);
981                if ((a != null) && a.skipOnIOException) return null;
982                else throw e;
983            }
984
985        else if (originalPageURL == null)
986        {
987            MalformedURLException ex = new MalformedURLException(
988                "You have passed a null 'originalPageURL' parameter, but at least one of the URL's " +
989                "you have passed for downloading is either a partial URL, or else an invalid URL: " +
990                "[" + src + "]"
991            );
992
993            results.urlException(src, ex);
994
995            if ((a != null) && a.skipOnIOException) return null;
996            else                                    throw ex;
997        }
998
999        Ret2<URL, MalformedURLException> ret = Links.resolve_KE(src, originalPageURL);
1000
1001        if (ret == null) // I do not think this case is possible.  I'll leave it here anyway.
1002        {
1003            results.nullURL();
1004            return null;
1005        }
1006        if (ret.b != null)
1007        {
1008            results.urlException(src, ret.b);
1009            if ((a != null) && a.skipOnIOException) return null;
1010            else throw ret.b;
1011        }
1012
1013        // ADDED in NOVEMBER, 2019
1014        // This micro-detail is the case where the "Resolved URL" also has ASCII-Escape characters
1015        // that need to be escaped.  This is rare, but it needs to be heeded.  If there are
1016        // ASCII-Escape character (which must be escaped).   Then "toProperURLV8" will handle that
1017        // well enough.
1018        //
1019        // CONSIDER IT: Post-Processing of the "Resolve URLs" class
1020
1021        try
1022            { return new URL(URLs.toProperURLV8(ret.a)); }
1023
1024        catch(MalformedURLException e)
1025        {
1026            results.urlException(src, e);
1027            if ((a != null) && a.skipOnIOException) return null;
1028            else throw e;
1029        }
1030        catch(URISyntaxException e)
1031        {
1032            results.urlException(src, e);
1033            if ((a != null) && a.skipOnIOException) return null;
1034            else throw e;
1035        }
1036    }
1037
1038    private String FILENAME(URL url, IF ext, Results results, AdditionalParameters a)
1039    {
1040        String fileName = ((a != null) && (a.fileNamePrefix != null)) ? a.fileNamePrefix : "";
1041
1042        if ((a != null) && (a.getImageFileSaveName != null))
1043            fileName = fileName + a.getImageFileSaveName.fileName
1044                (url, ext, results.pos, results.successCounter);
1045
1046        else if ((a == null) || ((a != null) && a.useDefaultCounterForImageFileNames))
1047            fileName = fileName + StringParse.zeroPad(results.successCounter);
1048
1049        else
1050        {
1051            fileName = url.getFile().substring(1);
1052            if (fileName.toLowerCase().endsWith('.' + ext.extension))
1053                fileName = fileName.substring(0, fileName.length() - 1 - ext.extension.length());
1054        }
1055        return fileName;
1056    }
1057
1058    // *************************************************************************************
1059    // *************************************************************************************
1060    // Internal Download Methods
1061    // *************************************************************************************
1062    // *************************************************************************************
1063
1064    /**
1065     * If this class has been used to make "multi-threaded" calls that use a Time-Out wait-period,
1066     * you might see your Java-Program hang for a few seconds when you would expect it to exit back
1067     * to your O.S. normally.
1068     *
1069     * <BR /><BR /><B><SPAN STYLE="color: red;">NOTE:</B></SPAN>
1070     * {@code AdditionalParameters.maxDownloadWaitTime, AdditionalParameters.waitTimeUnits} operate
1071     * by building a "Timeout &amp; Monitor" thread.  Thusly, when a program you have written
1072     * yourself reaches the end of its code, <I><B>if you have performed any time-dependent
1073     * Image-Downloads using {@code class ImageScraper}</B></I>, then your program <I>might not
1074     * exit immediately,</I> but rather sit at the command-prompt for anywhere between 10 and 30
1075     * seconds before this Timeout-Thread  dies.
1076     *
1077     * <BR /><BR /><B><SPAN STYLE="color: red">MULTI-THREADED:</B></SPAN> You may immediately
1078     * terminate any additional threads that were started using this method.
1079     */
1080    public static void shutdownTOThreads() { executor.shutdownNow(); }
1081
1082    // ******************************
1083    private static final    ExecutorService executor    = Executors.newCachedThreadPool();
1084    private static final    Lock            lock        = new ReentrantLock();
1085    // ******************************
1086
1087    private void DOWNLOAD(URL url, Results results, AdditionalParameters a) throws IOException
1088    {
1089        BufferedImage   image;
1090        if (url == null) return;
1091
1092        Appendable log = results.log;
1093        if (log != null) log.append("\tIMAGE-URL:\t\t" + url.toString() + '\n');
1094
1095        if ((a != null) && (a.skipURL != null) && (a.skipURL.test(url) == true))
1096        { results.skippedURL(url); return; }
1097
1098        // ******************************
1099        // *** ADDED on May 1st, 2019 ***
1100        // ******************************
1101        Callable<BufferedImage> threadDownloader = new Callable<BufferedImage>()
1102        {
1103            public BufferedImage call() throws Exception
1104            {
1105                try { return ImageIO.read(url); }
1106                catch (IIOException e)
1107                {   
1108                    // This will **sometimes** help when connecting to a URL "expects" this "User-Agent"
1109                    // This won't *always* work - or will it?  It is a very large-internet, with many MANY types of web-servers.
1110                    // THIS IS SORT-OF "ATTEMPT TO DOWNLOAD #2"
1111                    // try {
1112                    if (log != null) log.append("\tUSING USER-AGENT:\t" + url.toString() + '\n');
1113                    HttpURLConnection con = (HttpURLConnection) url.openConnection();
1114                    con.setRequestMethod("GET");
1115                    con.setRequestProperty("User-Agent", "Chrome/61.0.3163.100");
1116                    InputStream is = con.getInputStream();
1117                    return ImageIO.read(is);
1118                }
1119            }
1120        };
1121
1122        lock.lock();
1123        Future<BufferedImage> future = executor.submit(threadDownloader);
1124        lock.unlock();
1125
1126        long wt = ((a != null) && (a.waitTimeUnits != null)) 
1127            ? a.maxDownloadWaitTime 
1128            : MAX_WAIT_TIME;
1129
1130        TimeUnit tu = ((a != null) && (a.waitTimeUnits != null)) 
1131            ? a.waitTimeUnits
1132            : MAX_WAIT_TIME_UNIT;
1133
1134        try
1135            { image = future.get(wt, tu); }
1136
1137        catch (TimeoutException e)
1138        {
1139            if (e.getMessage() == null) e = new TimeoutException
1140                ("Waited: " + wt + " " + tu.toString());
1141
1142            results.downloadException(url, e);
1143
1144            if ((a != null) && a.skipOnIOException) return;
1145
1146            throw new IOException
1147                ("The download timed-out, see getCause() for more information.", e);
1148        }
1149
1150        catch (ExecutionException e)
1151        {
1152            Exception cause = ((e.getCause() != null) && (e.getCause() instanceof Exception)) 
1153                ? (Exception) e.getCause() 
1154                : e;
1155
1156            results.downloadException(url, cause);
1157
1158            if ((a != null) && a.skipOnIOException) return;
1159
1160            throw new IOException
1161                ("The download had an exception, see getCause() for more information.", cause);
1162        }
1163
1164        catch (InterruptedException e)
1165        {
1166            results.downloadException(url, e);
1167
1168            if ((a != null) && a.skipOnIOException) return;
1169
1170            throw new IOException(
1171                "The download was interrupted by another thread, see this Throwable.getCause() " +
1172                "for more information.", e
1173            );
1174        }
1175
1176        IF      ext         = IF.getGuess(url.toString());
1177        String  fileName    = FILENAME(url, ext, results, a);
1178
1179        HANDLE_DOWNLOADED_IMAGE(url, fileName, ext, results, a, image);
1180    }
1181
1182    private void HANDLE_DOWNLOADED_IMAGE(
1183            URL url, String fileName, IF ext, Results results,
1184            AdditionalParameters a, BufferedImage image
1185        )
1186        throws IIOException, IOException
1187    {
1188        Appendable log = results.log;
1189
1190        String dirName = null;
1191
1192        if (targetDirectory != null)
1193            dirName = targetDirectory;
1194
1195        else if (targetDirectoryRetriever != null)
1196            dirName = targetDirectoryRetriever.dir
1197                (url, fileName, ext, results.pos, results.successCounter);
1198
1199        else if (imageReceiver != null)
1200        {
1201            imageReceiver.save(url, fileName + '.' + ext.extension, ext, results.pos, results.successCounter, image);
1202            
1203            long l;
1204            {   // CODE COPIED FROM STACK-OVERFLOW.  This should probably become a separate-method. 
1205                //  I am not 100% it works yet.  (The "Green Check Mark" was not checked on this answer!)
1206                ByteArrayOutputStream tmp = new ByteArrayOutputStream();
1207                ImageIO.write(image, ext.extension, tmp);
1208                tmp.close();
1209                l = tmp.size();
1210            }
1211            results.imageReceiverSuccess(url, fileName, ext, l, image.getWidth(), image.getHeight());
1212            return;
1213        }
1214
1215        else throw new IllegalStateException
1216            ("Not image-target specified.  Illegal State - is a constructor overloaded?");
1217
1218        if (! dirName.endsWith(File.separator)) dirName += File.separator;
1219
1220        File f = null;
1221        if (ext != null)
1222            try {
1223                String fName = dirName + fileName + '.' + ext.extension;
1224                if (log != null) log.append("\tAttempting to save file: " + fName + '\n');
1225
1226                f = new File(fName);
1227                ImageIO.write(image, ext.extension, f);
1228                results.saveSuccess(
1229                    url, dirName, fileName, ext, f.length(),
1230                    image.getWidth(), image.getHeight()
1231                );
1232                return;
1233            } catch (Exception e)
1234            {
1235                results.saveFail(url, dirName, fileName, ext, e);
1236                if ((a != null) && a.skipOnIOException) return;
1237                throw e;
1238            }
1239        else
1240        {
1241            String fName = dirName + fileName + '.';
1242            for (IF imageFormat : IF.values())
1243                try {
1244                    f = new File(fName + imageFormat.extension);
1245                    ImageIO.write(image, imageFormat.extension, f);
1246                    results.saveSuccess(
1247                        url, dirName, fileName, imageFormat, f.length(),
1248                        image.getWidth(), image.getHeight()
1249                    );
1250                    return;
1251                }
1252                catch (javax.imageio.IIOException e)    { f.delete();   continue; }
1253                catch (Exception e)
1254                {
1255                    e.printStackTrace();
1256                    results.saveFail(url, dirName, fileName, imageFormat, e);
1257                    if ((a != null) && a.skipOnIOException) return;
1258                    throw e;
1259                }
1260        }
1261    }
1262
1263    // *************************************************************************************
1264    // *************************************************************************************
1265    // Localize Images methods
1266    // *************************************************************************************
1267    // *************************************************************************************
1268
1269    /** 
1270      * Convenience Method.
1271      * <BR />Invokes: {@link #localizeImages(Vector, URL, Appendable, AdditionalParameters, String)}
1272      * <BR />Passes: null to {@link AdditionalParameters} and to root-{@code URL}
1273      * <BR /><B>WARNING:</B> Presumes there are no partial-{@code URL's}
1274      */
1275    public static Ret2<int[], ImageScraper.Results> localizeImages
1276        (Vector<HTMLNode> page, Appendable log, String downloadDirectory)
1277        throws IOException
1278    { return localizeImages(page, null, log, null, downloadDirectory); }
1279
1280    /** 
1281      * Convenience Method.
1282      * <BR />Invokes: {@link #localizeImages(Vector, URL, Appendable, AdditionalParameters, String)}.
1283      * <BR />Passes: null to {@link AdditionalParameters}.
1284      */
1285    public static Ret2<int[], ImageScraper.Results> localizeImages
1286        (Vector<HTMLNode> page, URL pageURL, Appendable log, String downloadDirectory)
1287        throws IOException
1288    { return localizeImages(page, pageURL, log, null, downloadDirectory); }
1289
1290    /**
1291     * Downloads images located inside an HTML Page and updates the {@code SRC=...} {@code URL's}
1292     * so that the links point to a <I>local copy</I> of <I>local images</I>.
1293     *
1294     * <BR /><BR />After completion of this method, an HTML page which contained any HTML image
1295     * elements will have had those images downloaded to the local file-system, and also have had 
1296     * the HTML attribute {@code 'src=...'} changed to reflect the local image name instead of the
1297     * Internet URL name.
1298     *
1299     * @param page Any vectorized-html page or subpage.  This page should have HTML {@code <IMG ...>}
1300     * elements in it, or else this method will exit without doing anything.
1301     *
1302     * @param pageURL If any of the HTML image elements have {@code src='...'} attributes that are
1303     * partially resolved or <I>relative {@code URL's}</I> then this can be passed to the
1304     * {@code ImageScraper} constructors in order to convert partial or relative {@code URL's}
1305     * into complete {@code URL's.}  The Image Downloader simply cannot work with partially
1306     * resolved {@code URL's}, and will skip them if they are partially resolved.  This parameter
1307     * may be null, but if it is and there are incomplete-{@code URL's} those images will
1308     * simply not be downloaded.
1309     *
1310     * @param log This is the 'logger' for this method.  It may be null, and if it is - no output
1311     * will be sent to the terminal.
1312     *
1313     * <EMBED CLASS="external-html" DATA-FILE-ID="APPENDABLE">
1314     *
1315     * @param ap This is the {@link AdditionalParameters} parameter that allows to further
1316     * specify the request to the Image Downloader.  See the documentation for this class for more
1317     * information.  This parameter may be null, and if it is, it will be ignored and default
1318     * behavior will occur.
1319     * 
1320     * <BR /><BR /><B>SKIP ON EXCEPTION:</B> The most useful feature of the {@code class
1321     * AdditionalParameters} is to facilitate a download where invalid or out-dated {@code URL's}
1322     * do not cause the download mechanism to break - which normally would require running an
1323     * image-download from the beginning.  There is a simple {@code AdditionalParameters} 
1324     * constructor that quickly builds an instance of that class to have
1325     * {@code boolean skipOnIOException} initialized to <B>TRUE</B>.
1326     *
1327     * @param downloadDirectory This File-System directory where these files shall be stored.
1328     *
1329     * @return An instance of {@code Ret2<int[], ImageScraper.Results>}.  The two returned elements
1330     * of this class include:
1331     *
1332     * <BR /><BR /><UL CLASS="JDUL">
1333     * <LI> {@code Ret2.a (int[])}
1334     *      <BR /><BR />This shall contain an index-array for the indices of each HTML
1335     *      {@code '<IMG SRC=...>'} element found on the page.  It is not guaranteed that each of
1336     *      images will have been resolved or downloaded successfully, but rather just that an HTML
1337     *      {@code 'IMG'} element that had a {@code 'SRC'} attribute.  The second element of this
1338     *      return-type will contain information regarding which images downloaded successfully.
1339     *      <BR /><BR />
1340     * </LI>
1341     * <LI> {@code Ret2.b (ImageScraper.Results)}
1342     *      <BR /><BR />The second element of the return-type shall be the instance of
1343     *      {@link ImageScraper.Results} returned from the invocation of
1344     *      {@code ImageScraper.download(...)}.  This method will provide details about each of the
1345     *      images that were downloaded; or, if the download failed, the reasons for the failure.
1346     *      <I>This return element shall be null if no images were found on the page.</I>
1347     *      <BR />
1348     * </LI>
1349     * </UL>
1350     * 
1351     * <BR />These return {@code Object} references are not necessarily important - <I>and they
1352     * may be discarded if needed.</I>  They are provided as a matter of utility if further
1353     * verification or research into successful downloads is needed.
1354     *
1355     * @see AdditionalParameters
1356     */
1357    public static Ret2<int[], ImageScraper.Results> localizeImages(
1358        Vector<HTMLNode> page, URL pageURL, Appendable log, AdditionalParameters ap,
1359        String downloadDirectory
1360    )
1361        throws IOException
1362    {
1363        int[]               imgPosArr   = TagNodeFind.all(page, TC.Both, "img");
1364        Vector<TagNode>     vec         = new Vector<>();
1365
1366        // No Images Found.
1367        if (imgPosArr.length == 0) return new Ret2<int[], Results>(imgPosArr, null);
1368
1369        for (int pos : imgPosArr) vec.addElement((TagNode) page.elementAt(pos));
1370
1371        ImageScraper is = new ImageScraper(vec, pageURL, downloadDirectory);
1372        ImageScraper.Results r;
1373
1374        try
1375            { r = is.download(ap, log); }
1376        catch (URISyntaxException e)
1377        {
1378            throw new IOException(
1379                "There was a problem de-referencing one of the partial-URL's from the page URL.  " +
1380                "See this methods's Throwable.getCause() for details.",
1381                e
1382            ); 
1383        }
1384
1385        // ImageScraper.shutdownTOThreads(); 
1386        // NOTE-TO-READER: Need to call this method, or function will not shutdown.
1387        // NOTE: Commented out for now.
1388
1389        ReplaceNodes.r(page, imgPosArr, (HTMLNode n, int arrPos, int count) ->
1390        {
1391            if (    (r.fileNames[count] != null)
1392                &&  ((r.exceptions[count] == null)
1393                &&  (r.skipped[count] == false)))
1394
1395                return ((TagNode) page.elementAt(arrPos))
1396                        .setAV("src", r.fileNames[count], SD.SingleQuotes);
1397
1398            else
1399                return (TagNode) n;
1400        });
1401
1402        return new Ret2<int[], Results>(imgPosArr, r);
1403    }
1404}