001package Torello.HTML.Tools.Images;
002
003
004// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
005// My Imports
006// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
007
008import Torello.HTML.*;
009import Torello.Java.*;
010
011import Torello.HTML.NodeSearch.TagNodeFind;
012import Torello.Java.Additional.Ret2;
013import Torello.Java.Additional.AppendableLog;
014import Torello.Java.Additional.AppendableSafe;
015
016
017// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
018// JDK Imports.  These are all spelled-out at the bottom, because none of them are commonly used.
019// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
020
021// ByteArrayOutputStream, File, IOException
022import java.io.*;
023
024// Callable, Executors, ExecutorService
025import java.util.concurrent.*;
026
027import java.net.URL;
028import java.net.HttpURLConnection;
029import java.util.Vector;
030import java.util.regex.Matcher;
031import java.util.concurrent.locks.Lock;
032import java.util.concurrent.locks.ReentrantLock;
033import javax.imageio.ImageIO;
034import java.awt.image.BufferedImage;
035
036/**
037 * A more advanced class for both downloading and saving a list of images, using URL's.
038 * 
039 * <EMBED CLASS='external-html' DATA-FILE-ID=ISR>
040 */
041@Torello.JavaDoc.StaticFunctional
042@Torello.JavaDoc.JDHeaderBackgroundImg(EmbedTagFileID="IMAGE_SCRAPER_CLASS")
043public class ImageScraper
044{
045    // This Class is Static-Functional, and does not have any program state, other than the monitor
046    // Thread.  There is no need for a public-constructor, or any constructor for that matter.
047
048    private ImageScraper() { }
049
050    // Helps "FIND" the bugs.  There are only 6 extra boolean-comparisons for a println
051    // There is no need to delete this right now.
052
053    private static final boolean DEBUGGING = false;
054
055
056    // ********************************************************************************************
057    // ********************************************************************************************
058    // RECORD: Used as ImageScraper class Top-Level Data-Flow **AND** Helper-Function
059    // ********************************************************************************************
060    // ********************************************************************************************
061
062
063    // Simple "Record" that makes passing these parameters all around wily-nilly a lot easier
064    // Used Strictly Internally to this class
065    //
066    // There turns out to be a lot of "data" in both the form of "configurations", and even more
067    // that is saved and returned to the user after completion.  This RECORD right here saves all
068    // of the data, and keeps inside ... well ... one single (top-level) reference.
069
070    private static class RECORD
071    {
072        private static final String I4 = "    ";
073
074
075        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
076        // Constant (final) for the ENTIRETY of the download-process
077        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
078
079        final Request           request;
080        final Results           results;
081        final AppendableLog     al;
082        final AppendableSafe    log;
083
084        // Has a non-null log
085        final boolean hasLog;
086
087        // Verbosity-Level that is Strictly Equal-To
088        final boolean logLevelEQ1;
089        final boolean logLevelEQ2;
090        final boolean logLevelEQ3;
091
092        // Verbosity-Level that is Greater-Than or Equal-To
093        final boolean logLevelGTEQ1;
094        final boolean logLevelGTEQ2;
095
096
097        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
098        // These change with each loop iteration
099        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
100
101        // Reference-Fields
102        URL         url             = null;
103        String[]    b64ImageData    = null;
104        ImageInfo   imageInfo       = null;
105
106        // Boolean-Primitive
107        boolean isB64EncodedImage = false;
108
109
110        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
111        // Constructor
112        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
113
114        RECORD(Request request, Results results, AppendableLog al)
115        {
116            this.request    = request;
117            this.results    = results;
118            this.al         = al;
119            this.log        = al.log;
120
121            // If there is a non-null log, set the boolean stating that there is a log
122            this.hasLog = (al.log != null);
123
124            // DEBUGGING:
125            // System.out.println("hasLog: " + hasLog + ", al.level: " + al.level);
126            // if (! Q.YN("Continue?")) System.exit(0);
127
128            // Makes Verbose-Printing Code neater and easier to look at.
129            this.logLevelEQ1 = hasLog && (al.level == 1);
130            this.logLevelEQ2 = hasLog && (al.level == 2);
131            this.logLevelEQ3 = hasLog && (al.level == 3);
132
133            // Also Makes Verbosity Faster & Easier to Read
134            this.logLevelGTEQ1 = hasLog && (al.level >= 1);
135            this.logLevelGTEQ2 = hasLog && (al.level >= 2);
136        }
137
138
139        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
140        // Some Simple Methods
141        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
142
143        // This is called at the very beginning of the Primary Download-Loop, directly at the top
144        // of the loop-body.  It s the first thing that is done on each iteration of the download.
145        //
146        // NOTE: This resets all NON-FINAL fields in this class.
147
148        void reset()
149        {
150            this.url                = null;
151            this.b64ImageData       = null;
152            this.imageInfo          = null;
153            this.isB64EncodedImage  = false;
154        }
155
156        void append(String s)   { log.append(s); }
157        void appendI4(String s) { log.append(I4).append(s); }
158
159        // This is always a useful debugging tool, both now, and possibly in the future
160        public String toString()
161        {
162            return
163                // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
164                // Constant (final) for the ENTIRETY of the download-process
165                // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 
166
167                "RECORD's 'final' Fields (Constant through-out entire-download):\n" +
168
169                // final Request request;
170                I4 + "this.request:        " + ((this.request != null) ? "non-" : "") + "null\n" +
171
172                // final Results results;
173                I4 + "this.results:        " + ((this.results != null) ? "non-" : "") + "null\n" +
174
175                // final AppendableLog al;
176                I4 + "this.AppendableLog:  " + ((this.al != null) ? "non-" : "") + "null\n" +
177
178                // final AppendableSafe log;
179                I4 + "this.AppendableSafe: " + ((this.log != null) ? "non-" : "") + "null\n" +
180
181                // final boolean hasLog;
182                I4 + "this.hasLog:         " + this.hasLog + '\n' +
183
184                // final boolean logLevelEQ1;
185                I4 + "this.logLevelEQ1:    " + this.logLevelEQ1 + '\n' +
186
187                // final boolean logLevelEQ2;
188                I4 + "this.logLevelEQ2:    " + this.logLevelEQ2 + '\n' +
189
190                // final boolean logLevelEQ3;
191                I4 + "this.logLevelEQ3:    " + this.logLevelEQ3 + '\n' +
192
193                // final boolean logLevelGTEQ1;
194                I4 + "this.logLevelGTEQ1:  " + this.logLevelGTEQ1 + '\n' +
195
196                // final boolean logLevelGTEQ2;
197                I4 + "this.logLevelGTEQ2:  " + this.logLevelGTEQ2 + '\n' +
198
199
200                // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
201                // These change with each loop iteration
202                // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 
203
204                "Fields that change on each Loop-Iteration:\n" +
205
206                // URL url = null;
207                I4 + "this.url:               " +
208                    ((this.url != null) ? url.toString() : "null") + '\n' +
209
210                // String[] b64ImageData = null;
211                I4 + "this.b64ImageData:      " +
212                    ((this.b64ImageData != null) ? "non-" : "") + "null\n" +
213
214                // ImageInfo imageInfo = null;
215                I4 + "this.imageInfo:         " +
216                    ((this.imageInfo != null) ? "non-" : "") + "null\n" +
217
218                // boolean isB64Image = false;
219                I4 + "this.isB64EncodedImage: " + this.isB64EncodedImage + '\n';
220        }
221
222
223        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
224        // Helpers that SIMULTANEOUSLY write-results to 'Results' and write-log to 'AppendableLog'
225        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
226
227        // Simple Helper for Printing to the Appendable log
228        void printEx(String operation, Throwable t)
229        {
230            this.al.append(
231                "    The " + operation + " Code has thrown an Exception:\n" +
232                "        Throwable Class: " + t.getClass().getName() + '\n' +
233                "        Message:         [" + t.getMessage() + "]\n"
234            );
235
236            while ((t = t.getCause()) != null) this.al.append(
237                "            Caused by Throwable Class: " + t.getClass().getName() + '\n' +
238                "            Message:                   [" + t.getMessage() + "]\n"
239            );
240        }
241
242        // There are 4 different User-Provided Lambda-Targets.  If they throw an exception (which
243        // should be extremely rare), this method is called.
244        //
245        // NOTE: This method only works if "RECORD.imageInfo" is NON-NULL.  This means that the
246        //       first couple of User-Provided Lambda's have to use "reportEx" instead!
247
248        <T> T userLambdaEx(String userLambdaName, Exception e) throws ImageScraperException
249        {
250            this.results.userLambdaException(this.imageInfo, e);
251
252            final String errMsg =
253                "While attempting to invoke the User-Provided Lambda-Target" +
254                "'Request." + userLambdaName + "', an exception was thrown by the code.";
255
256            if (this.request.skipOnUserLambdaException)
257            {
258                if      (this.logLevelEQ1) this.append("x ");
259                else if (this.logLevelEQ2) this.appendI4(errMsg + '\n');
260                else if (this.logLevelEQ3) this.printEx("Invoke User '" + userLambdaName +"'", e);
261
262                return null;
263            }
264
265            else throw new ImageScraperException
266                (errMsg + ".  Please see Throwable.getCause() for more details.", e);
267        }
268
269        <T> T reportEx(boolean skipBool, String errMsg, String operationName, Exception e)
270            throws ImageScraperException
271        {
272            // Paranoia & Sanity (A Simple Check)  (An 'assert' that should never happen)
273            if (e == null) throw new UnreachableError();
274
275            this.results.exceptionFail(this.url, e);
276
277            if (skipBool)
278            {
279                if      (this.logLevelEQ1) this.append("x ");
280                else if (this.logLevelEQ2) this.appendI4(errMsg + '\n');
281                else if (this.logLevelEQ3) this.printEx(operationName, e);
282
283                return null;
284            }
285
286            else throw ImageScraperException.class.isAssignableFrom(e.getClass())
287                ? ((ImageScraperException) e)
288                : new ImageScraperException(errMsg + ".  See Throwable.geCause() for details.", e);
289        }
290    }
291
292
293    // ********************************************************************************************
294    // ********************************************************************************************
295    // Thread-Related Stuff
296    // ********************************************************************************************
297    // ********************************************************************************************
298
299
300    /**
301     * If this class has been used to make "multi-threaded" calls that use a Time-Out wait-period,
302     * you might see your Java-Program hang for a few seconds when you would expect it to exit back
303     * to your O.S. normally.
304     *
305     * <BR /><BR /><B CLASS=JDDescLabel>Before Exiting:</B>
306     * 
307     * <BR />When a program you have written reaches the end of its code, if you have performed any
308     * time-dependent Image-Downloads using this class (class {@code ImageScraper}), then your
309     * program <I>might not exit immediately,</I> but rather sit at the command-prompt for anywhere
310     * between 10 and 30 seconds before this Timeout-Thread dies.
311     *
312     * <BR /><BR />Note that you may immediately terminate any additional threads that were started
313     * using this method.
314     */
315    public static void shutdownTOThreads() { executor.shutdownNow(); }
316
317    // This class is Static-Functional, and these are the only class-fields.  They are both final,
318    // and the 'lock' variable is used to ensure that the class is, indeed, Thread-Safe.
319
320    private static final ExecutorService    executor    = Executors.newCachedThreadPool();
321    private static final Lock               lock        = new ReentrantLock();
322
323
324    // ********************************************************************************************
325    // ********************************************************************************************
326    // Primary User-API Methods
327    // ********************************************************************************************
328    // ********************************************************************************************
329
330
331    /**
332     * Downloads images located inside an HTML Page and updates the {@code SRC=...} {@code URL's}
333     * so that the links point to a <I>local copy</I> of <I>local images</I>.
334     *
335     * <BR /><BR />After completion of this method, an HTML page which contained any HTML image
336     * elements will have had those images downloaded to the local file-system, and also have had 
337     * the HTML attribute {@code 'src=...'} changed to reflect the local image name instead of the
338     * Internet URL name.
339     *
340     * @param page Any vectorized-html page or subpage.  This page should have HTML {@code <IMG ...>}
341     * elements in it, or else this method will exit without doing anything.
342     *
343     * @param pageURL If any of the HTML image elements have {@code src='...'} attributes that are
344     * partially resolved or <I>relative {@code URL's}</I> then this can be passed to the
345     * {@code ImageScraper} constructors in order to convert partial or relative {@code URL's}
346     * into complete {@code URL's.}  The Image Downloader simply cannot work with partially
347     * resolved {@code URL's}, and will skip them if they are partially resolved.  This parameter
348     * may be null, but if it is and there are incomplete-{@code URL's} those images will
349     * simply not be downloaded.
350     *
351     * @param log This is the 'logger' for this method.  It may be null, and if it is - no output
352     * will be sent to the terminal.
353     *
354     * <EMBED CLASS='external-html' DATA-FILE-ID=APPENDABLE>
355     *
356     * @param targetDirectory This File-System directory where these files shall be stored.
357     *
358     * @return An instance of {@code Ret2<int[], Results>}.  The two returned elements
359     * of this class include:
360     *
361     * <BR /><BR /><UL CLASS=JDUL>
362     * <LI> {@code Ret2.a (int[])}
363     *      <BR /><BR />This shall contain an index-array for the indices of each HTML
364     *      {@code '<IMG SRC=...>'} element found on the page.  It is not guaranteed that each of
365     *      images will have been resolved or downloaded successfully, but rather just that an HTML
366     *      {@code 'IMG'} element that had a {@code 'SRC'} attribute.  The second element of this
367     *      return-type will contain information regarding which images downloaded successfully.
368     *      <BR /><BR />
369     * </LI>
370     * <LI> {@code Ret2.b (Results)}
371     *      <BR /><BR />The second element of the return-type shall be the instance of
372     *      {@link Results} returned from the invocation of
373     *      {@code ImageScraper.download(...)}.  This method will provide details about each of the
374     *      images that were downloaded; or, if the download failed, the reasons for the failure.
375     *      <I>This return element shall be null if no images were found on the page.</I>
376     *      <BR />
377     * </LI>
378     * </UL>
379     * 
380     * <BR />These return {@code Object} references are not necessarily important - <I>and they
381     * may be discarded if needed.</I>  They are provided as a matter of utility if further
382     * verification or research into successful downloads is needed.
383     * 
384     * @throws IOException I/O Problems that weren't avoided.
385     * @throws ImageScraperException Thrown for any number of errors that went unsuppressed.
386     */
387    public static Ret2<int[], Results> localizeImages
388        (Vector<HTMLNode> page, URL pageURL, Appendable log, String targetDirectory)
389        throws IOException, ImageScraperException
390    {
391        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
392        // Find all of the Image TagNode's on the Input Web-Page
393        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
394
395        int[]           imgPosArr   = TagNodeFind.all(page, TC.Both, "img");
396        Vector<TagNode> vec         = new Vector<>();
397
398        // No Images Found.
399        if (imgPosArr.length == 0) return new Ret2<int[], Results>(imgPosArr, null);
400
401        for (int pos : imgPosArr) vec.addElement((TagNode) page.elementAt(pos));
402
403
404        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
405        // Build a Request and Download all of the Image's that were just found / identified
406        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
407
408        Request request = Request.buildFromTagNodeIter(vec, pageURL, true);
409        request.targetDirectory = targetDirectory;
410
411        // NOTE: This is NOT FINISHED:
412        // SET ALL OF THE "Skip On Exception" booleans to TRUE!!!
413
414        // Invoke the Main Image Downloader
415        Results r = ImageScraper.download(request, log);
416
417
418        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
419        // Replace the <IMG SRC=...> TagNode URL's for images that were successfully downloaded.
420        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
421
422        // Now replace 
423        ReplaceFunction replacer = (HTMLNode n, int arrPos, int count) ->
424        {
425            if (r.skipped[count] == false)
426
427                return ((TagNode) page.elementAt(arrPos))
428                        .setAV("src", r.fileNames[count], SD.SingleQuotes);
429
430            else return (TagNode) n;
431        };
432    
433        ReplaceNodes.r(page, imgPosArr, replacer);
434
435
436        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
437        // Report the Results
438        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
439
440        return new Ret2<int[], Results>(imgPosArr, r);
441    }
442
443    /**
444     * This will iterate through the {@code URL's} and download them.  Note that parameter
445     * {@code 'log'} may be null, and if so, it will be quietly ignored.
446     *
447     * @param request This parameter takes customization requests for batch image downloads.  To
448     * read more information about how to configure a download, please review the documentation for
449     * the class {@link Request}.
450     *
451     * <BR /><BR />Note that upon entering this method, this parameter is immediately cloned to
452     * prevent the possibility of Thread Concurrency Problems from happening.  After cloning, the
453     * the cloned instance is used exclusively, and the original parameter is discarded.  Further
454     * changes to the parameter-instance will not have any effect on the process.
455     * 
456     * @param log This shall receive text / log information.  This parameter may receive null, and
457     * if it does it will be ignored.  When ignored, logging information will not printed.
458     *
459     * <EMBED CLASS='external-html' DATA-FILE-ID=APPENDABLE>
460     *
461     * @return an instance of {@code class Results} for the download.  The {@link Results} class
462     * contains several parallel arrays with information about images that have downloaded.  If an
463     * image-download happens to fail due to an improperly formed {@code URL} (or an 'incorrect' 
464     * {@code URL}), then the information in the {@code Results} arrays will contain a 'null' value
465     * for the index at those array-positions corresponding to the failed image.
466     *
467     * @throws ImageScraperException Thrown for any number of exceptions that may be thrown while
468     * executing the download-loop.  If another exception is thrown, then it is wrapped by this
469     * class' exception ({@link ImageScraperException}), and set as the {@code 'cause'} of that
470     * exception.
471     * 
472     * @throws AppendableError The interface {@code java.lang.Appendable} was designed to allow for
473     * an implementation to throw the (unchecked) exception {@code IOException}.  This has many 
474     * blessings, but can occasionally be a pain since, indeed, {@code IOException} is both an
475     * unchecked exception (and requires an explicity catch), and also very common
476     * (even ubiquitous) inside of HTTP download code.
477     * 
478     * <BR /><BR />If the user-provided {@code 'log'} parameter throws an {@code IOException} for
479     * simply trying to write character-data to the log about the download-progress, then <I>an
480     * {@code AppendableError} will be thrown</I>.  Note that this throwable does inherit 
481     * {@code java.lang.Error}, meaning that it won't be caught by standard Java {@code catch}
482     * clauses <I>(unless {@code 'Error'} is explicity mentioned!)</I>
483     */
484    public static Results download(Request request, Appendable log)
485        throws ImageScraperException
486    {
487        // Clone the Request, Similar to "SafeVarArgs" - Specifically, if the user starts playing
488        // with the contents of this class in the middle of a download, it will not have any effect
489        // on the 'request' object that is actually being used.
490
491        request = request.clone();        
492    
493        // Runs a few tests to make sure there are no problems using the request
494        request.CHECK();
495
496        // Makes log printing easier and easier.
497        AppendableLog al = new AppendableLog(log, request.verbosity);
498
499        // Main Request-Configuration and Response Class Instances.
500        Results results = new Results(request.size);
501
502        // Private, Internal Static-Class.  Makes passing variables even easier
503        RECORD r = new RECORD(request, results, al);
504
505        // Now, this just gets rid of the surrounding try-catch block.  This is the only real
506        // reason for the internal/private method 'downloadWithoutTryCatch'.  This makes the
507        // indentation look a lot better.  Also, in this method, the 'log' is replaced with the
508        // AppendableSafe log
509
510        try 
511        {
512            mainDownloadLoop(r);
513            return results;
514        }
515
516        catch (ImageScraperException e)
517        {
518            // If an exception causes the system to stop/halt, this extra '\n\n' makes the output
519            // text look a little nicer (sometimes... Sometimes it already looks fine).
520            // No more no less.
521
522            if (al.hasLog) al.append("\n\nThrowing ImageScraperException...\n");
523            throw e;
524        }
525    }
526
527
528    // ********************************************************************************************
529    // ********************************************************************************************
530    // Main Download Iterator-Loop Method
531    // ********************************************************************************************
532    // ********************************************************************************************
533
534
535    private static void mainDownloadLoop(RECORD r) throws ImageScraperException
536    {
537        // Helps prepare for the printing loop;
538        if (r.logLevelGTEQ1) r.append("\n");
539
540        // The "Main Benefit" of having a "Loop-Body" Method is to make the code below in the
541        // actual Loop-Body have one-less-level-of-indentation.  That's really the only point of
542        // doing this - whatsoever!
543        //
544        // NOTE: Remember that all the 'continue' commands inside "loopBody" had to be changed
545        //       into 'return' commands
546
547        for (URL url : r.request.source())
548        {
549            r.reset();
550            r.url = url;
551            loopBody(r);
552        }
553    }
554
555    private static void loopBody(RECORD r) throws ImageScraperException
556    {
557        // Print URL-Iterable Number (request.counterPrinter)
558        if (r.logLevelEQ1)
559            r.append(r.request.counterPrinter.apply(r.results.pos) + ": ");
560
561        if (r.logLevelGTEQ2)
562            r.append("\n" + r.request.counterPrinter.apply(r.results.pos) + ": ");
563
564        if (DEBUGGING) System.out.println("HERE: 01 (" + r.results.pos + ")");
565
566
567        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
568        // DECIDE: Which of the three cases this is: URL, B64-Image, or an Exception-URL
569        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
570
571        // If there was an Image-URL next, then print it !!!
572        if (r.url != null)
573            { if (r.logLevelGTEQ2) r.append("Image-URL: [" + r.url.toString() + "]\n"); }
574
575        // If There was no URL, Then this is likely a B64-Encoded Image
576        else if ((r.b64ImageData = r.request.nextB64Image()) != null)
577        {
578            r.isB64EncodedImage = true;
579
580            if (r.logLevelGTEQ2) r.append(
581                "BASE-64 IMAGE: " + r.b64ImageData[0] /* imageFormatStr */ + ',' +
582                StrPrint.abbrev(r.b64ImageData[1], 35, true, " ... ", 70) + '\n'
583            );
584        }
585
586        // If url is null, and this isn't a "B64-Encoded", then it's an Exception-Throw URL
587        else
588            { dealWithExceptionURL(r);  return; }
589
590
591        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
592        // DOWNLOAD & CONVERT
593        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
594
595        if (DEBUGGING) System.out.println("HERE: 02 (" + r.results.pos + ")");
596
597        // If the user provided a 'urlPreProcessor' in his Request-instance, run that now.
598        doUserURLPreProcessorIfNeeded(r);
599
600        if (DEBUGGING) System.out.println("HERE: 03 (" + r.results.pos + ")");
601
602        // Get the java.awt.image.BufferedImage instance
603        Ret2<BufferedImage, IF> ret2BufferedImage = r.isB64EncodedImage
604            ? convertB64Image(r)
605            : downloadImage(r);
606
607        if (DEBUGGING) System.out.println("HERE: 04 (" + r.results.pos + ")");
608
609        // If 'null' is returned, The User Requested 'skipOn...' SO - skip-and-move-on.
610        //    * Log-Messages will ALREADY have been printed
611        //    * class Results array's will ALREADY have been updated. 
612        //    * If an ImageScraperException is needed, it would ALREADY have been thrown.
613
614        if (ret2BufferedImage == null) return;
615
616        // Convert java.awt.image.BufferedImage into a byte[]-Array
617        // This 'r2' contains the Image as a byte[]-Array, and the format in which it was saved
618
619        Ret2<byte[], IF> ret2ByteArrImage = writeBufferedImageToByteArray
620            (r, ret2BufferedImage.a /* The Image */, ret2BufferedImage.b /* The Extension */);
621
622        if (DEBUGGING) System.out.println("HERE: 05 (" + r.results.pos + ")");
623
624        // SAME AS PREVIOUS if (...) return;
625        if (ret2ByteArrImage == null) return;
626
627
628        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
629        // SAVE THE IMAGE (or send to 'Request.imageReceiver')
630        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
631
632        // Now Build an "ImageInfo" instance. (This is sent to any/all User's Lambdas)
633        // NOTE: No error-checking needed for a class that is strictly a data / "RECORD" class
634
635        r.imageInfo = new ImageInfo(
636            // Image-URL (very common)
637            r.url,
638
639            // Base-64 Image Stuff (rare, but not impossible)
640            r.isB64EncodedImage,
641            (r.isB64EncodedImage ? r.b64ImageData : null),
642
643            // The actual downloaded and converted images, themselves
644            ret2BufferedImage.a,    // java.awt.image.BufferredImage
645            ret2ByteArrImage.a,     // byte[] imgByteArr
646
647            // URL-Aquired Extension & Decided-Upon Extension
648            ret2BufferedImage.b,    // guessedExt
649            ret2ByteArrImage.b,     // actualExt
650
651            // Results Array Counters
652            r.results.pos,
653            r.results.successCounter
654        );
655
656        if (DEBUGGING) System.out.println("HERE: 06 (" + r.results.pos + ")");
657
658        // Save to Disk, or Send to Request.imageReceiver
659        handleImageByteArray(r);
660    }
661
662
663    // ********************************************************************************************
664    // ********************************************************************************************
665    // "Exception URL's" - Rare, but happens if the Static-Builder threw an Exception
666    // ********************************************************************************************
667    // ********************************************************************************************
668
669
670    private static void dealWithExceptionURL(RECORD r)
671    {
672        // "Exception-URL's" are URL's that must have come from the static "TagNode"
673        // Builders in class Request.  It happens when a complete-URL cannot be built
674        // from a partial-URL, and the Links-Class saved the Exception in a Vector,
675        // so that it can be reported to the user (righ here!)
676
677        Exception e = r.request.nextTNSRCException();
678
679        // ASSERT-STATEMENT: The 'request' instance should always return an 'e' here
680        if (e == null) throw new UnreachableError();
681
682        // Since this "Failed", make sure to let the "Results" object-instance know.
683        r.results.tagNodeSRCError(e);
684
685        // Now let the user know too
686        if (r.hasLog) 
687        {
688            if (r.logLevelEQ1) r.append(" x ");
689
690            else if (r.logLevelGTEQ2) r.append
691                ("URL-Building Exception: " + e.getClass().getName() + '\n');
692
693            if (r.logLevelEQ3) r.appendI4("Message: " + e.getMessage() + '\n');
694        }
695    }
696
697
698    // ********************************************************************************************
699    // ********************************************************************************************
700    // User-Provided URL-PreProcessor (Maybe!)
701    // ********************************************************************************************
702    // ********************************************************************************************
703
704
705    private static void doUserURLPreProcessorIfNeeded(RECORD r) throws ImageScraperException
706    {
707        if ((r.url == null) || (r.request.urlPreProcessor == null)) return;
708
709        try
710        {
711            r.url = r.request.urlPreProcessor.apply(r.url);
712
713            if (r.logLevelGTEQ2) r.appendI4("Pre-Processor URL:" + r.url + '\n');
714        }
715
716        catch (Exception e)
717        {
718            final String msg =
719                "While attempting to invoke the user provided lambda " +
720                "'Request.urlPreProcessor', an exception was thrown by the user-code.";
721
722            if (r.request.skipOnUserLambdaException)
723            {
724                if (r.logLevelGTEQ2)    r.appendI4(msg);
725                if (r.logLevelEQ3)      r.printEx("Run URL-PreProcessor", e);
726                if (r.logLevelEQ1)      r.append("x ");
727
728                r.results.exceptionFail(r.url, e);
729
730                return;
731            }
732
733            else throw new ImageScraperException
734                (msg + "  Please see Throwable.getCause() for more details.", e);
735        }
736    }
737
738
739    // ********************************************************************************************
740    // ********************************************************************************************
741    // Convert a B64-Image to a java.awt.image.BufferedImage instance
742    // ********************************************************************************************
743    // ********************************************************************************************
744
745
746    private static Ret2<BufferedImage, IF> convertB64Image(RECORD r) throws ImageScraperException
747    {
748        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
749        // Internally, the request-class saves B64-Images as Two-Element String[]-Array's
750        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
751
752        String  imageFormatStr  = r.b64ImageData[0];
753        String  b64EncodedImage = r.b64ImageData[1];
754
755
756        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
757        // Skipping B64-Images entirely is one of the boolean-options in 'Request'
758        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
759
760        if (r.request.skipBase64EncodedImages)
761        {
762            if      (r.logLevelEQ1)     r.append("x ");
763            else if (r.logLevelGTEQ2)   r.appendI4
764                ("Skipping - Skip Request for all Base64-Encoded Images\n");
765
766            r.results.skipB64();
767
768            return null;
769        }
770
771
772        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
773        // Try to do the B64-Converstion
774        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
775
776        try
777        {
778            IF ext = IF.get(imageFormatStr);
779
780            BufferedImage image = IF.decodeBase64ToImage(b64EncodedImage, ext);
781
782            // SUCCESS!
783            if (image != null) new Ret2<>(image, ext);
784        }
785    
786        catch (Exception e)
787        {
788            // This call either returns null, or throws an ImageScraperException
789            return r.reportEx(
790                r.request.skipOnB64DecodeException,
791                "Exception throw Java's Base-64 Image Decoder while decoding a Base-64 Image",
792                "Base-64 Image Decoding",
793                e
794            );
795        }
796
797
798        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
799        // ELSE: The Image was null, so use 'NullImageException'
800        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
801
802        Exception niex = new NullImageException(
803            "The B64-Image Encoding Regular-Expression matched the Source-URL, but " +
804            "Java's B64-Image Decoder has returned null upon decoding it."
805        );
806
807        niex.fillInStackTrace();
808
809        // Returns 'null', or throws an exception
810        return r.reportEx(
811            r.request.skipOnNullImageException,
812            "Null Image returned by Java's B64 Image-Decoder",
813            "Base-64 Image Decoding",
814            niex
815        );
816    }
817
818
819    // ********************************************************************************************
820    // ********************************************************************************************
821    // Download an Image to a java.awt.image.BufferedImage instance
822    // ********************************************************************************************
823    // ********************************************************************************************
824
825
826    private static Ret2<BufferedImage, IF> downloadImage(RECORD r) throws ImageScraperException
827    {
828        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
829        // Do the "Skipping URL" Lambda-Target right now (if the user's Request-Instance has one)
830        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
831
832        if (r.request.skipURL != null)
833        {
834            try 
835            {
836                if (r.request.skipURL.test(r.url))
837                {
838                    // This *ISN'T* Exception-Case Code, it is a situation where the user has
839                    // intentionally asked that this URL be skipped.
840        
841                    if (r.logLevelEQ1) r.append("x ");
842    
843                    if (r.logLevelGTEQ2)
844                        r.appendI4("URL Skip-Predicate requests this URL be skipped.\n");
845    
846                    r.results.skippedURL(r.url);
847                    return null;
848                }
849            }
850
851            catch (Exception e)
852            {
853                // This call either returns null, or throws an ImageScraperException 
854                // Depending upon the boolean 'r.request.skipOnUserLambdaException'
855                //
856                // NOTE: This **DOESN'T** return (for now).  This is actually a non-fatal exception
857                //       and progress can actually continue
858
859                r.reportEx(
860                    r.request.skipOnUserLambdaException,
861                    "Exception Thrown by User-Provided Lambda-Target 'Request.skipURL'",
862                    "Invoke User 'Request.skipURL' Lambda-Target",
863                    e
864                );
865            } 
866        }
867        
868
869        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
870        // Build a Monitor Thread Instance
871        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
872
873        Callable<BufferedImage> threadDownloader = new Callable<BufferedImage>()
874        {
875            public BufferedImage call() throws ImageScraperException
876            { return downloadImageCallable(r); }
877        };
878
879
880        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
881        // Run the Monitor Thread, return the result... Or Handle the Exception (if there was one)
882        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
883
884        lock.lock();
885        Future<BufferedImage> future = executor.submit(threadDownloader);
886        lock.unlock();
887
888        try
889        {
890            BufferedImage bi = future.get(r.request.maxDownloadWaitTime, r.request.waitTimeUnits);
891
892            return (bi == null)
893                ? null
894                : new Ret2<>(bi, IF.getGuess(r.url.toString()));
895        }
896
897
898        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
899        // TimeoutException: Web-Server took longer 'Request.maxDownloadWaitTime'
900        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
901
902        catch (TimeoutException e)
903        {
904            // This call either returns null, or throws an ImageScraperException 
905            // Depending upon the boolean 'r.request.skipOnTimeOutException'
906
907            return r.reportEx(
908                r.request.skipOnTimeOutException,
909                "Waited: " + r.request.maxDownloadWaitTime + " " +
910                    r.request.waitTimeUnits.toString(),
911                "HTTP Image-Download",
912                e
913            );
914
915            // OLD MESSAGE:
916            // "The download source-code seems to have waited the maximum amount of time, as " +
917            // "specified by the 'maxDownloadWaitTime' configuration parameters:\n" + msg
918        }
919
920
921        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
922        // ExecutionException: An "Exception Wrapper" for internal-exceptions
923        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
924        //
925        // Thrown if there were any exceptions while running the 'Callable' that was created above.
926        // Since the Callable's 'call()' method catches its exceptions, and wraps them inside an
927        // ImageScraperException, THEORETICALLY, the 'cause'-Throwable for this 'e' should
928        // **ALWAYS** be an ImageScraperException
929        //
930        // NOTE: If there is an ImageScraperException, make sure not to report it a second time!!!
931
932        catch (ExecutionException e)
933        {
934            Throwable cause = e.getCause();
935
936            if (ImageScraperException.class.isAssignableFrom(cause.getClass()))
937                throw (ImageScraperException) cause;
938
939            // This call either returns null, or throws an ImageScraperException 
940            // Depending upon the boolean 'r.request.skipOnDownloadException'
941
942            return r.reportEx(
943                r.request.skipOnDownloadException,
944                "Exception throw by Java Image-Download Code",
945                "HTTP Image-Download",
946                e
947            );
948        }
949
950
951        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
952        // InterruptedException: I THINK THIS IS UNREACHABLE - UNLESS USER IS INTERRUPTING THINGS!
953        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
954        // 
955        // This should mostly be unreachable, unless the end user is 'playing games' with Java's
956        // Thread Mechanism.  According to the JavaDoc Pgae for 'InterruptedException' - this is
957        // only thrown if the Thread is interrupte, which certainly won't happen on account of
958        // anything in this tool's code!
959
960        catch (InterruptedException e)
961        {
962            // This call either returns null, or throws an ImageScraperException 
963            // Depending upon the boolean 'r.request.skipOnDownloadException'
964
965            return r.reportEx(
966                r.request.skipOnDownloadException,
967                "Image Download Code Thread was Interrupted",
968                "HTTP Image-Download",
969                e
970            );
971        }
972    }
973
974    private static BufferedImage downloadImageCallable(RECORD r) throws ImageScraperException
975    {
976        BufferedImage       image   = null;
977        HttpURLConnection   con     = null;
978        Exception           ex      = null;
979
980        try
981        {
982            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
983            // FIRST DOWNLOAD ATTEMPT
984            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
985
986            if (r.request.alwaysUseUserAgent)
987            {
988                con = (HttpURLConnection) r.url.openConnection();
989                con.setRequestMethod("GET");
990                con.setRequestProperty("User-Agent", r.request.userAgent);
991
992                image = ImageIO.read(con.getInputStream());
993            }
994
995            else image = ImageIO.read(r.url);
996
997
998            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
999            // First Download-Attempt Was Possibly Successfull
1000            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1001
1002            if (image != null) return image;
1003
1004
1005            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1006            // IF NULL-IMAGE && NO-RETRY: Then either return null or throw ImageScraperException
1007            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1008
1009            else if (r.request.alwaysUseUserAgent || (! r.request.retryWithUserAgent))
1010            {
1011                // This call either returns null, or throws an ImageScraperException 
1012                // Depending upon the boolean 'r.request.skipOnNullImageException'
1013
1014                return r.reportEx(
1015                    r.request.skipOnNullImageException,
1016                    "Downloaded Empty / Null Image",
1017                    "HTTP Image-Download",
1018                    (NullImageException) new NullImageException
1019                        ("The Image Failed to Download Properly").fillInStackTrace()
1020                );
1021            }    
1022        }
1023
1024        catch (ImageScraperException e) { throw e; }
1025
1026        catch (Exception e) // (IOException | IIOException e)
1027        {
1028            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1029            // EXCEPTION WAS THROWN: So **Possibly** Still need to retry with the User-Agent
1030            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1031
1032            if (r.request.retryWithUserAgent && (! r.request.alwaysUseUserAgent))
1033            {
1034                if (r.logLevelGTEQ2) r.appendI4(
1035                    "Image Download Failed - Re-attempting Download with / via User-Agent: " +
1036                        r.request.userAgent + '\n'
1037                    );
1038            }
1039
1040            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1041            // NO RETRY: Either Skip to next image (return null), or throw ImageScraperException
1042            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1043
1044            else
1045            {
1046                // This call either returns null, or throws an ImageScraperException 
1047                // Depending upon the boolean 'r.request.skipOnDownloadException'
1048
1049                return r.reportEx(
1050                    r.request.skipOnDownloadException,
1051                    "Java HTTP Image Downloader javax.imageio.ImageIO.read(...) threw Exception",
1052                    "HTTP Image-Download",
1053                    e
1054                );
1055            }
1056        }
1057
1058        try
1059        {
1060            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1061            // SECOND DOWNLOAD ATTEMPT
1062            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1063
1064            con = (HttpURLConnection) r.url.openConnection();
1065            con.setRequestMethod("GET");
1066            con.setRequestProperty("User-Agent", r.request.userAgent);
1067
1068            image = ImageIO.read(con.getInputStream());
1069
1070
1071            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1072            // Second Download-Attempt Was Possibly Successfull
1073            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1074
1075            if (image != null) return image;
1076
1077
1078            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1079            // IF NULL-IMAGE: Then either return null or throw ImageScraperException
1080            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1081            //
1082            // This call either returns null, or throws an ImageScraperException 
1083            // Depending upon the boolean 'r.request.skipOnNullImageException'
1084
1085            return r.reportEx(
1086                r.request.skipOnNullImageException,
1087                "Downloaded Empty / Null Image",
1088                "HTTP Image-Download",
1089                (NullImageException) new NullImageException
1090                    ("The Image Failed to Download Properly").fillInStackTrace()
1091            );
1092        }
1093
1094        catch (ImageScraperException e) { throw e; }
1095
1096        catch (Exception e)
1097        {
1098            // This call either returns null, or throws an ImageScraperException 
1099            // Depending upon the boolean 'r.request.skipOnNullskipOnDownloadExceptionImageException'
1100
1101            return r.reportEx(
1102                r.request.skipOnDownloadException,
1103                "Java HTTP Image Downloader javax.imageio.ImageIO.read(...) threw Exception",
1104                "HTTP Image-Download",
1105                e
1106            );
1107        }
1108    }
1109
1110
1111    // ********************************************************************************************
1112    // ********************************************************************************************
1113    // Convert the java.awt.image.BufferedImage **INTO** a Java byte[]-Array
1114    // ********************************************************************************************
1115    // ********************************************************************************************
1116
1117
1118    // This just converts an image in the format of a 'BufferedImage' into an image that is an
1119    // array of bytes.  This method will attempt to save the image using the format that was
1120    // extracted using the URL-Name / FileName.  If that fails, there is a for-loop that will
1121    // attempt to save the image using the other formats.
1122
1123    private static Ret2<byte[], IF> writeBufferedImageToByteArray
1124        (RECORD r, BufferedImage image, IF extGuess)
1125        throws ImageScraperException
1126    {
1127        // This is merely an array of all available formats that may be used to save or download
1128        // an image.
1129
1130        IF[] allFormats = IF.values();
1131
1132        // This is used to generated the returned byte[] array.
1133        ByteArrayOutputStream baos = new ByteArrayOutputStream();
1134
1135        // This is used if the image could not be converted
1136        Exception saveItEx = null;
1137
1138
1139        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1140        // If the provided Image-Type is NON-NULL, try to save and return the Byte[]-Array
1141        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1142
1143        if (extGuess != null)
1144
1145            try
1146            {
1147                ImageIO.write(image, extGuess.extension, baos);
1148                baos.flush();
1149                baos.close();
1150
1151                if (r.logLevelEQ3) r.appendI4(
1152                    "Successfully Saved '." + extGuess.extension + "' URL to a '." +
1153                    extGuess.extension + "' Formatted Byte-Array.\n"
1154                );
1155
1156                return new Ret2<>(baos.toByteArray(), extGuess);
1157            }
1158
1159            catch (Exception e)
1160            {
1161                // IMPORTANT: It **IS NOT** time to quit yet!  Try the other Image-Types before
1162                //            reporting this as a Failed / Exception Case.
1163
1164                saveItEx = e;
1165
1166                if (r.logLevelEQ3) r.appendI4(
1167                    "Failed to Convert '." + extGuess.extension + "' URL to a '." +
1168                    extGuess.extension + "' Formatted Byte-Array.\n"
1169                );
1170
1171                for (int i=0; i < allFormats.length; i++)
1172
1173                    if (allFormats[i] == extGuess) { allFormats[i] = null;  break; }                
1174            }
1175
1176
1177        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1178        // Try any / all other formats that have not yet been attempted
1179        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1180
1181        for (IF format : allFormats)
1182
1183            try
1184            {
1185                baos.reset();
1186                ImageIO.write(image, format.extension, baos);
1187                baos.flush();
1188                baos.close();
1189
1190                if (r.logLevelEQ3) r.appendI4(
1191                    "Successfully Saved Image-URL to Byte-Array, Using as Guess '." +
1192                    format.extension + "' Format\n"
1193                );
1194
1195                return new Ret2<>(baos.toByteArray(), format);
1196            }
1197
1198            catch (Exception e)
1199                { if (saveItEx == null) saveItEx = e; }
1200
1201
1202        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1203        // All attempts to write using a specific format have failed.  Handle the Failure.
1204        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1205        //
1206        // This call either returns null, or throws an ImageScraperException 
1207        // Depending upon the boolean 'r.request.skipOnImageWritingFail'
1208
1209        return r.reportEx(
1210            r.request.skipOnImageWritingFail,
1211            "Could not translate java.awt.image.BufferedImage to a byte[]-Array with *Any* " +
1212                "Standard Image-Format",
1213            "BufferedImage to byte[]-Array",
1214            saveItEx
1215        );
1216    }
1217
1218
1219    // ********************************************************************************************
1220    // ********************************************************************************************
1221    // Write to Disk, or Send to Request.imageReceiver
1222    // ********************************************************************************************
1223    // ********************************************************************************************
1224
1225
1226    private static void handleImageByteArray(RECORD r) throws ImageScraperException
1227    {
1228        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1229        // Get the File-Name, this likely is an "error-free" step, but check just in case.
1230        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1231
1232        String tempFileName = computeFileName(r);
1233
1234        if (tempFileName == null) return; 
1235
1236        r.imageInfo.setFileName(tempFileName);
1237
1238
1239        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1240        // Run the User's Keeper-Predicate, if one was supplied
1241        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1242
1243        boolean keepIt = true;
1244
1245        if (r.request.keeperPredicate != null)
1246
1247            try
1248                { keepIt = r.request.keeperPredicate.test(r.imageInfo); }
1249
1250            catch (Exception e)
1251                { r.userLambdaEx("keeperPredicate", e);  return;}
1252
1253
1254        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1255        // Write-Image, or send to Request.imageReceiver
1256        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1257
1258        if (! keepIt)
1259        {
1260            r.results.predicateReject(r.imageInfo);
1261
1262            // Now let the user-log know... (MAYBE, IF THEY HAVE LEVEL-CLEARANCE)
1263            if (r.logLevelEQ3) r.appendI4("User-Provided Keeper Predicate Rejected this Image.");
1264            if (r.logLevelEQ1) r.append("x ");
1265        }
1266
1267        else writeOrTransmit(r);
1268    }
1269
1270    private static String computeFileName(RECORD r) throws ImageScraperException
1271    {
1272        String preFix = (r.request.fileNamePrefix != null) ? r.request.fileNamePrefix : "";
1273
1274
1275        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1276        // Use User-Provided "Get File-Name Lambda" - 'Request.getImageFileSaveName'
1277        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1278
1279        if (r.request.getImageFileSaveName != null)
1280        {
1281            String file = null;
1282
1283            try
1284                { file = r.request.getImageFileSaveName.apply(r.imageInfo); }
1285
1286            catch (Exception e)
1287                { return r.userLambdaEx("getImageFileSaveName", e); }
1288
1289            return preFix + file;
1290        }
1291
1292
1293        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1294        // Use 'Results.successCounter' for the File-Name
1295        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1296
1297        else if (r.request.useDefaultCounterForImageFileNames)
1298
1299            return preFix + r.request.counterPrinter.apply(r.results.successCounter);
1300
1301
1302        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1303        // Use the original URL's "File-Name" (Remember, on Yahoo! News, this don't work!)
1304        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1305
1306        else
1307        {
1308            String temp = r.imageInfo.url.getFile().substring(1);
1309
1310            if (r.imageInfo.guessedExtension == null) return preFix + temp;
1311
1312            String ext = r.imageInfo.guessedExtension.extension;
1313
1314            if (temp.toLowerCase().endsWith('.' + ext))
1315                return preFix + temp.substring(0, temp.length() - 1 - ext.length());
1316
1317            if (r.imageInfo.guessedExtension.alternateExtension == null) return preFix + temp;
1318
1319            ext = r.imageInfo.guessedExtension.alternateExtension;
1320
1321            if (temp.toLowerCase().endsWith('.' + ext))
1322                return preFix + temp.substring(0, temp.length() - 1 - ext.length());
1323
1324            throw new UnreachableError();
1325        }
1326    }
1327
1328    private static void writeOrTransmit(RECORD r) throws ImageScraperException
1329    {
1330        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1331        // Case: ImageReceiver
1332        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1333
1334        if (r.request.imageReceiver != null)
1335
1336            try 
1337            {
1338                r.request.imageReceiver.accept(r.imageInfo);
1339                r.results.success(r.imageInfo, null /* no target directory */);
1340
1341                if (r.logLevelEQ1) r.append("✓ ");
1342
1343                else if (r.logLevelGTEQ2)
1344                    r.appendI4("Image Properly Transmitted to Request.imageReceiver\n");
1345
1346                return;
1347            }
1348
1349            catch (Exception e)
1350                { r.userLambdaEx("imageReceiver", e);  return; }
1351
1352
1353        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1354        // Case: File-System
1355        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1356
1357        String dirName = null;
1358
1359        if (r.request.targetDirectory != null) dirName = r.request.targetDirectory;
1360
1361        else if (r.request.targetDirectoryRetriever != null)
1362        {
1363            File dir;
1364
1365
1366            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1367            // Run the Request.targetDirectoryRetriever instance
1368            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1369
1370            try 
1371                { dir = r.request.targetDirectoryRetriever.apply(r.imageInfo); }
1372
1373            catch (Exception e)
1374                { r.userLambdaEx("targetDirectoryRetriever", e);  return; }
1375
1376
1377            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1378            // Check that the directory returned is non-null and writeable
1379            // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1380
1381            try
1382                { WritableDirectoryException.check(dir); }
1383
1384            catch (Exception e)
1385            {
1386                // This call either returns null, or throws an ImageScraperException 
1387                // Depending upon the boolean 'r.request.skipOnImageWritingFail'
1388        
1389                r.reportEx(
1390                    r.request.skipOnImageWritingFail,
1391                    "Target-Directory reference provided is not a File-System Writeable Directory",
1392                    "Write Image to Disk",
1393                    e
1394                );
1395            }
1396        }
1397
1398        // This scenario is checked inside the Request class "check" method
1399        else throw new UnreachableError();
1400
1401
1402        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1403        // WRITE THE FILE
1404        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
1405
1406        if (! dirName.endsWith(File.separator)) dirName = dirName + File.separator;
1407
1408        try
1409        {
1410            String saveName =
1411                dirName + r.imageInfo.fileName() + '.' + r.imageInfo.actualExtension.extension;
1412
1413            FileRW.writeBinary(r.imageInfo.imgByteArr, saveName);
1414            r.results.success(r.imageInfo, dirName);
1415
1416            if (r.logLevelEQ1) r.append("✓ ");
1417
1418            else if (r.logLevelGTEQ2)
1419                r.appendI4("Image saved successfully to: [" + saveName + "]\n");
1420        }
1421
1422        catch (Exception e)
1423        {
1424            // This call either returns null, or throws an ImageScraperException 
1425            // Depending upon the boolean 'r.request.skipOnImageWritingFail'
1426    
1427            r.reportEx(
1428                r.request.skipOnImageWritingFail,
1429                "Exception thrown while attempting to write an image file to disk.",
1430                "Write Image to Disk",
1431                e
1432            );
1433        }
1434    }
1435}