001
002package Torello.HTML.Tools.Images;
003
004import Torello.Java.*;
005
006// Needed for a JavaDoc Comment {@link ...}
007import Torello.HTML.TagNode;
008
009import java.net.URL;
010import java.io.Serializable;
011import java.util.Arrays;
012
013/**
014 * After downloading all of the user's requested images, the class {@link ImageScraper} returns an
015 * instance of this class.
016 * 
017 * <BR /><BR />When a download request has completed, this class will be instantiated and returned.
018 * Care has been taken to ensure this class does not freeze nor fail when a particular image fails
019 * to download.  This level of control is customizable, so if the programmer would prefer download 
020 * execution to halt immediately upon exception, there is are several settings for this in the
021 * class {@link Request}.
022 * 
023 * <BR /><BR />The link below contains the output of invoking the {@link #toString} method on a
024 * {@code 'Results'} instance after downloading all of the HTML {@code <IMG SRC=...>} tags that
025 * were scraped from the Web-Server {@code news.yahoo.com}.
026 * 
027 * <BR /><BR /><B><CODE><A HREF='doc-files/results.txt'>results.txt</A></CODE></B>
028 *
029 * <BR /><BR /><B CLASS=JDDescLabel>Initializations:</B>
030 * 
031 * <BR />Many of the values in the arrays for class {@code 'Results'} (this class) may contain
032 * null-values, or a {@code '-1'}.  This happens if an exceptions is thrown while downloading or
033 * saving any one particular image, which prevents the process from running to completion.
034 * 
035 * <BR /><BR />To view the exact initialization-value for elements of any of these array, simply
036 * click on the <B><CODE><A HREF='hilite-files/Results.java.html'>HiLited Source-Code</A></CODE>
037 * </B> link, and scroll down to the <B>Package-Private</B> <CODE>Results</CODE> class constructor.
038 *
039 * <BR /><BR /><B CLASS=JDDescLabel>The 'skipped' Array:</B>
040 * 
041 * <BR />There is an easy way to check which {@code URL's} have failed or were skipped (due
042 * to user-request), and which {@code URL's} were successfully obtained.  One of several arrays
043 * that are {@code public} in this class is the {@link #skipped} {@code boolean[]}-array.
044 * 
045 * <BR /><BR /> Whenever a particular {@code URL}-index corresponds to a {@code skipped}-index that
046 * contains a {@code FALSE} boolean-value, it indicates that that particular {@code URL} ultimately
047 * was not properly saved or re-transmitted.
048 * 
049 * @see ImageScraper
050 */
051@Torello.JavaDoc.JDHeaderBackgroundImg(EmbedTagFileID="IMAGE_SCRAPER_CLASS")
052public class Results implements Serializable, Cloneable
053{
054    /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */
055    public static final long serialVersionUID = 1;
056
057
058    // ********************************************************************************************
059    // ********************************************************************************************
060    // Public Array Fields: User may inspect these fields when an instance of 'Results' is returned
061    // ********************************************************************************************
062    // ********************************************************************************************
063
064
065    /**
066     * This will contain a complete list of the {@code URL's} that were retrieved (or generated-
067     * <I>if partially-resolved 'relative' {@code URL's} were provided</I>) from the
068     * {@link Request}-instances {@code static}-builder.  Every image downloaded (or attempted for
069     * download) will have its {@code URL} saved here, in this array.
070     *
071     * <BR /><BR /><B CLASS=JDDescLabel>Null's in the {@code urls} Array</B>
072     * 
073     * <BR />An array-element of the {@code urls}-array will contain null under the following
074     * two circumstances:
075     * 
076     * <BR /><BR /><UL CLASS=JDUL>
077     * 
078     * <LI> No image-{@code URL} was provided, becasue the picture was a Base-64 Encoded Image, and
079     *      instead was a {@code String} that had been retrieved from a {@link TagNode}'s
080     *      {@code SRC}-Attribute.
081     *      <BR /><BR />
082     *      </LI>
083     *
084     * <LI> The user provided a {@code String} to the {@link Request} Class builder, but that 
085     *      {@code String} caused a {@code MalformedURLException}, and no {@code URL}-instance
086     *      was ever built.  (Note that in this scenario, the {@link #exceptions} array would be
087     *      storing the {@code URL}-Exception that was thrown).
088     *      <BR /><BR />
089     *      </LI>
090     * 
091     * </UL>
092     * 
093     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
094     */
095    public final URL[] urls;
096
097    /**
098     * When constructing an {@link ImageScraper}'s {@link Request} object, one of the options for
099     * building the instance is to pass a list of HTML {@link TagNode} instances containing HTML
100     * {@code '<IMG SRC=...>'} tags.
101     * 
102     * <BR /><BR />HTML {@code TagNode} elements will sometimes / occasionally have a variant of an
103     * image source known as the <B STYLE='color: red;'>Base-64 Encoded Image</B>.  These are
104     * images where the actual pictures fully stored &amp; encoded inside the {@code SRC}-Attribute
105     * of the HTML {@link TagNode}'s {@code SRC}-Attribute.
106     * 
107     * <BR /><BR />Base-64 Images are just pictures that have been converted into actual character
108     * data, in the form of a simple {@code String}, and saved into the {@code <IMG>} tag's
109     * {@code SRC}-URL - <I>instead of an actual HTTP {@code URL} being saved there</I>.  Note that
110     * this practice is generally used for much smaller pictures, thumbnails or logo signs (images
111     * that wouldn't use up a lot of data).
112     * 
113     * <BR /><BR />A full explanation of HTML's {@code Base-64} Image-Encoding is beyone the scope
114     * of this Java-Doc Comment.
115     * 
116     * <BR /><BR />If any image was "converted" from a B-64 Image-Encoding (rather than downloaded
117     * from a {@code URL}), then the boolean for the image's index will be {@code TRUE} rather than
118     * {@code FALSE}.  The default value for all elements of this array is {@code FALSE}.
119     * 
120     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
121     */    
122    public final boolean[] b64EncodedImg;
123
124    /**
125     * An images {@code Results}-data in this particular paralell-array will be {@code TRUE} under
126     * any of the following circumstances:
127     * 
128     * <BR /><BR /><UL CLASS=JDUL>
129     * 
130     * <LI> If the user provided a {@link Request#skipURL}-Predicate, and that predicate rejected
131     *      the image (telling the downloader not to download the picture).
132     *      <BR /><BR />
133     *      </LI>
134     * 
135     * <LI> If the user provided a {@link Request#keeperPredicate}, and that predicate, after
136     *      image-downloade complettion, rejected the image (telling the downloader not save the 
137     *      picture to disk the picture).
138     *      <BR /><BR />
139     *      </LI>
140     * 
141     * <LI> If there were any exceptions thrown while downloading the image that forced the
142     *      downloader-logic to abandon the image (and either throw the exception, or
143     *      skip-and-move-on to the next image).
144     *      <BR /><BR />
145     *      </LI>
146     *
147     * <LI> If the original {@code Iterable} that was provided to the {@link Request}-instance
148     *      had entries that had caused {@code exceptions} to be thrown while building the
149     *      {@link Request}-instance.
150     *      </LI>
151     * 
152     * </UL>
153     * 
154     * <BR /><B STYLE='color: red;'><I>Under any / all other circumstances, if an image was
155     * successfully downloaded and written to disk, then the corresponding element in this array
156     * will contain {@code FALSE}!</I></B>
157     *
158     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
159     */
160    public final boolean[] skipped;
161
162    /**
163     * The names of the files that were retrieved and/or stored will be in this array.
164     * If this image were skipped or an exception occurred, the array position for that
165     * {@code URL} would contain 'null'.
166     * 
167     * <BR /><BR />It is important to note that if an element of this array contains a valid,
168     * non-null, file-name - <I><B>it does not guarantee that the image was properly saved</B></I>.
169     * The value stored in the corresponding (parallel) {@link #skipped}-array index is the only
170     * way to ascertain whether an image was ultimately written to disk (or transmitted to a 
171     * User-Provided {@link Request#imageReceiver}).
172     * 
173     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
174     */
175    public final String[] fileNames;
176
177    /**
178     * The location of the file-name saved directory, if an image did not successfully save to
179     * the file system, or if an {@code imageReceiver} were used, then the array-location would
180     * contain {@code 'null'}.
181     * 
182     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
183     */
184    public final String[] saveDirectories;
185
186    /**
187     * The image type of the files that were retrieved will be stored in this array.
188     * 
189     * <EMBED CLASS='external-html' DATA-DEFVAL=null DATA-SPEC="image-format"
190     *      DATA-FILE-ID=REQ_SKIPPED_NOTE>
191     * 
192     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
193     */
194    public final IF[] imageFormats;
195
196    /**
197     * If any stage of the image download, conversion or disk-write fails, then this array will
198     * store a record the exception that was thrown.
199     *
200     * <BR /><BR />If the download succeeded, then the {@code 'exceptions'}-array element at that
201     * index would contain 'null.'  Any {@code exceptions}-array index that contains a non-null 
202     * {@code Exception} will be an index for which the {@link #skipped}-array contains a
203     * {@code TRUE}-value stored at the same location.
204     * 
205     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
206     */
207    public final Exception[] exceptions;
208
209    /**
210     * This will contain a list of long-integers, each of which will have the file-size of the
211     * downloaded image.  
212     * 
213     * <EMBED CLASS='external-html' DATA-DEFVAL="-1" DATA-SPEC=size DATA-FILE-ID=REQ_SKIPPED_NOTE>
214     * 
215     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
216     */
217    public final long[] sizes;
218
219    /**
220     * This will contain a list of integers, each of which shall have the image-widths of the 
221     * downloaded images.
222     * 
223     * <EMBED CLASS='external-html' DATA-DEFVAL="-1" DATA-SPEC=width DATA-FILE-ID=REQ_SKIPPED_NOTE>
224     * 
225     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
226     */
227    public final int[] widths;
228
229    /**
230     * This shall contain a list of integers, each of which shall have the image-heights of 
231     * the downloaded images.
232     * 
233     * <EMBED CLASS='external-html' DATA-DEFVAL="-1" DATA-SPEC=height
234     *      DATA-FILE-ID=REQ_SKIPPED_NOTE>
235     * 
236     * <EMBED CLASS='external-html' DATA-FILE-ID=REQ_PARALLEL_NOTE>
237     */
238    public final int[] heights;
239
240
241    // ********************************************************************************************
242    // ********************************************************************************************
243    // Some Package-Private Fields, Used here and by class ImageScraper
244    // ********************************************************************************************
245    // ********************************************************************************************
246
247
248    // next result received array position.
249    int pos = 0;
250
251    // number of successfully saved images.
252    int successCounter = 0;
253
254
255    // ********************************************************************************************
256    // ********************************************************************************************
257    // Package-Private Constructor
258    // ********************************************************************************************
259    // ********************************************************************************************
260
261
262    Results(int size)
263    {
264        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
265        // Create each of these arrays
266        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
267
268        urls                = new URL[size];
269        b64EncodedImg       = new boolean[size];
270        skipped             = new boolean[size];
271        fileNames           = new String[size];
272        saveDirectories     = new String[size];
273        imageFormats        = new IF[size];
274        exceptions          = new Exception[size];
275        sizes               = new long[size];
276        widths              = new int[size];
277        heights             = new int[size];
278
279
280        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
281        // Initialize each element of the above arrays
282        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
283
284        for (int i=0; i < size; i++)
285        {
286            urls[i]             = null;
287            b64EncodedImg[i]    = false;
288            skipped[i]          = false;
289            fileNames[i]        = null;
290            saveDirectories[i]  = null;
291            imageFormats[i]     = null;
292            exceptions[i]       = null;
293            sizes[i]            = -1;
294            widths[i]           = -1;
295            heights[i]          = -1;
296        }
297    }
298
299
300    // ********************************************************************************************
301    // ********************************************************************************************
302    // No Image Downloaded
303    // ********************************************************************************************
304    // ********************************************************************************************
305
306
307    // Request static-builder generated an "Exception URL"
308    // Called From: ImageScraper.loopBody(RECORD)
309
310    void tagNodeSRCError(Exception e)
311    {
312        skipped[pos]    = true;
313        exceptions[pos] = e;
314
315        pos++;
316    }
317
318    // User-Provided "Predicate<URL> skipURL"
319    // Called From: ImageScraper.downloadImage(RECORD)
320
321    void skippedURL(URL url)
322    {
323        urls[pos]       = url;
324        skipped[pos]    = true;
325
326        pos++;
327    }
328
329    // User-Provided "boolean skipBase64EncodedImages"
330    // Called From: ImageScraper.convertB64Image(RECORD)
331
332    void skipB64()
333    {
334        b64EncodedImg[pos]  = true;
335        skipped[pos]        = true;
336
337        pos++;
338    }
339
340    // Called from many places.  This method is the biggest of the Results-Reporters 
341    //   * Exception-thrown
342    //   * 'ImageInfo' instance hasn't been constructed yet
343
344    void exceptionFail(URL url, Exception e)
345    {
346        urls[pos]           = url;
347        b64EncodedImg[pos]  = (url == null);
348        skipped[pos]        = true;
349        exceptions[pos]     = e;
350
351        pos++;
352    }
353
354
355    // ********************************************************************************************
356    // ********************************************************************************************
357    // "ImageInfo" instance available now: Image Successfully Downloaded and Converted to Array.
358    // ********************************************************************************************
359    // ********************************************************************************************
360
361
362    // There are 3 different User-Provided Lambda-Target's that might throw exceptions
363    // This is called from within "ImageScraper.RECORD.userLambdaEx(...)"
364
365    void userLambdaException(ImageInfo imageInfo, Exception e)
366    {
367        skipped[pos]    = true;
368        exceptions[pos] = e;
369
370        copyImageInfo(imageInfo);
371    }
372
373    // The User Keep/Reject Predicate rejected this image
374    // Called From: ImageScraper.handleImageByteArray(RECORD)
375
376    void predicateReject(ImageInfo imageInfo)
377    {
378        skipped[pos] = true;
379        copyImageInfo(imageInfo);
380    }
381
382    // Image was written to disk somewhere, or accepted by the Request.imageReceiver
383    // Called From: ImageScraper.writeOrTransmit(RECORD)
384
385    void success(ImageInfo imageInfo, String targetDirectory)
386    {
387        // Directory where the image was saved, if called by "ImageReceiver", this will be null
388        saveDirectories[pos] = targetDirectory;
389
390        copyImageInfo(imageInfo);
391
392        // Only time this is ever incremented
393        successCounter++;
394    }
395
396
397    // ********************************************************************************************
398    // ********************************************************************************************
399    // SMALL HELPER
400    // ********************************************************************************************
401    // ********************************************************************************************
402
403
404    // Private Method, used in all 5 previous methods directly above here
405    private void copyImageInfo(ImageInfo imageInfo)
406    {
407        urls[pos]           = imageInfo.url;
408        b64EncodedImg[pos]  = imageInfo.isB64EncodedImage;
409        imageFormats[pos]   = imageInfo.actualExtension;
410        sizes[pos]          = imageInfo.imgByteArr.length;
411        widths[pos]         = imageInfo.width;
412        heights[pos]        = imageInfo.height;
413        fileNames[pos]      = imageInfo.fileName() + '.' + imageInfo.actualExtension;
414
415        pos++;
416    }
417
418
419    // ********************************************************************************************
420    // ********************************************************************************************
421    // interface java.lang.Cloneable
422    // ********************************************************************************************
423    // ********************************************************************************************
424
425
426    /**
427     * Generates a <B STYLE='color: red;'>Deep Copy</B> of {@code 'this'} instance.  This means
428     * all internal arrays are also cloned / copied
429     * 
430     * @return A duplicate instance of this class, with all arrays having been copied.
431     */
432    public Results clone()
433    { return new Results(this); }
434
435    // Private Constructor, used only for the 'clone()' method
436    private Results(Results r)
437    {
438        this.urls               = r.urls.clone();
439        this.b64EncodedImg      = r.b64EncodedImg.clone();
440        this.skipped            = r.skipped.clone();
441        this.fileNames          = r.fileNames.clone();
442        this.saveDirectories    = r.saveDirectories.clone();
443        this.imageFormats       = r.imageFormats.clone();
444        this.exceptions         = r.exceptions.clone();
445        this.sizes              = r.sizes.clone();
446        this.widths             = r.widths.clone();
447        this.heights            = r.heights.clone();
448
449        this.pos                = r.pos;
450        this.successCounter     = r.successCounter;
451    }
452
453
454    // ********************************************************************************************
455    // ********************************************************************************************
456    // java.lang.Object
457    // ********************************************************************************************
458    // ********************************************************************************************
459
460
461    /**
462     * Checks whether {@code 'this'} instance is equal to another instance of class
463     * {@code Results}.  This method performs a <B STYLE='color: red;'>Deep Equals</B> comparison
464     * using the {@code equals(...)} method suite found in class' {@code java.util.Arrays}.
465     * 
466     * @param other This may be any Java Object, but only an instance class {@code 'Results'} has
467     * any chance of being marked as <B STYLE='color: red;'>equal</B> to this instance.
468     * 
469     * @return {@code TRUE} if and only if {@code 'o'} has a type that's assignable to
470     * {@code Results} - and if each of the internal arrays in this instance are equal to the
471     * arrays in parameter {@code 'o'}.
472     */
473    public boolean equals(Object other)
474    {
475        if (other == null) return false;
476
477        if (! Results.class.isAssignableFrom(other.getClass())) return false;
478
479        Results r = (Results) other;
480
481
482        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
483        // NOTE: These arrays cannot ever be null, that is an "Unreachable Situation"
484        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
485
486        return 
487                Arrays.equals(this.urls,            r.urls)
488            &&  Arrays.equals(this.b64EncodedImg,   r.b64EncodedImg)
489            &&  Arrays.equals(this.skipped,         r.skipped)
490            &&  Arrays.equals(this.fileNames,       r.fileNames)
491            &&  Arrays.equals(this.saveDirectories, r.saveDirectories)
492            &&  Arrays.equals(this.imageFormats,    r.imageFormats)
493            &&  Arrays.equals(this.exceptions,      r.exceptions)
494            &&  Arrays.equals(this.sizes,           r.sizes)
495            &&  Arrays.equals(this.widths,          r.widths)
496            &&  Arrays.equals(this.heights,         r.heights);
497    }
498
499    // Used in the above toString() method
500    private static final String COMMA = ", ";
501
502    /**
503     * Returns a {@code java.lang.String} representation of {@code 'this'} instance
504     * 
505     * @return A Java {@code String} containing the data inside this class.
506     */
507    public String toString()
508    {
509        StringBuilder sb = new StringBuilder();
510
511
512        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
513        // NOTE: These arrays, themselves can never be null - BUT THEIR CONTENTS ARE OFTEN NULL
514        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
515
516        for (int i=0; i < urls.length; i++)
517        {
518            if (b64EncodedImg[i]) sb.append("Base 64 Encoded Image\n");
519
520            else sb.append(
521                "URL: " + ((urls[i] == null)
522                    ? "null"
523                    : StrPrint.abbrev(urls[i].toString(), 50, true, " ... ", 100)) +
524                '\n'
525            );
526
527            boolean comma = false;
528
529            if (skipped[i] == true)
530            {
531                sb.append("    SKIPPED");
532                comma = true;
533            }
534
535            if (imageFormats[i] != null)
536            {
537                sb.append(comma ? COMMA : "    ");
538                sb.append("Format: " + imageFormats[i]);
539                comma = true;
540            }
541
542            if (sizes[i] > 0)
543            {
544                sb.append(comma ? COMMA : "    ");
545                sb.append("Size: " + StringParse.commas(sizes[i]));
546                comma = true;
547            }
548
549            if (widths[i] > 0)
550            {
551                sb.append(comma ? COMMA : "    ");
552                sb.append("W: " + StringParse.commas(widths[i]));
553                comma = true;
554            }
555
556            if (heights[i] > 0)
557            {
558                sb.append(comma ? COMMA : "    ");
559                sb.append("H: " + StringParse.commas(heights[i]));
560                comma = true;
561            }
562
563            if (comma) sb.append('\n');
564
565            comma = false;
566
567            if (fileNames[i] != null)
568            {
569                sb.append("    FileName: [" + fileNames[i] + ']');
570                comma = true;
571            }
572
573            if (saveDirectories[i] != null)
574            {
575                sb.append(comma ? COMMA : "    ");
576                sb.append("Dir: [" + StrPrint.abbrev(saveDirectories[i], 30, true, null, 60) + ']');
577                comma = true;
578            }
579
580            if (comma) sb.append('\n');
581
582            if (exceptions[i] != null)
583                sb.append("    Thrown: " + exceptions[i].getClass().getName() + '\n');
584
585            if (i < (urls.length - 1)) sb.append('\n');
586        }
587
588        return sb.toString();
589    }
590
591    /**
592     * Java's hash-code requirement.  The code is computed by summing the first 10 {@link #sizes}
593     * array elements.
594     * 
595     * @return A hash-code that may be used when storing this node in a java sorted-collection.
596     */
597    public int hashCode()
598    {
599       int sum = 0;
600
601       for (int i=0; (i < 10) && (i < sizes.length); i++) sum += sizes[i];
602
603       return sum;
604    }
605}