001package Torello.HTML.Tools.Images; 002 003 004// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 005// My Imports 006// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 007 008import Torello.HTML.*; 009import Torello.Java.*; 010 011import Torello.HTML.NodeSearch.TagNodeFind; 012import Torello.Java.Additional.Ret2; 013import Torello.Java.Additional.AppendableLog; 014import Torello.Java.Additional.AppendableSafe; 015 016 017// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 018// JDK Imports. These are all spelled-out at the bottom, because none of them are commonly used. 019// *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 020 021// ByteArrayOutputStream, File, IOException 022import java.io.*; 023 024// Callable, Executors, ExecutorService 025import java.util.concurrent.*; 026 027import java.net.URL; 028import java.net.HttpURLConnection; 029import java.util.Vector; 030import java.util.regex.Matcher; 031import java.util.concurrent.locks.Lock; 032import java.util.concurrent.locks.ReentrantLock; 033import javax.imageio.ImageIO; 034import java.awt.image.BufferedImage; 035 036/** 037 * A more advanced class for both downloading and saving a list of images, using URL's. 038 * 039 * <EMBED CLASS='external-html' DATA-FILE-ID=ISR> 040 */ 041@Torello.JavaDoc.StaticFunctional 042@Torello.JavaDoc.JDHeaderBackgroundImg(EmbedTagFileID="IMAGE_SCRAPER_CLASS") 043public class ImageScraper 044{ 045 // This Class is Static-Functional, and does not have any program state, other than the monitor 046 // Thread. There is no need for a public-constructor, or any constructor for that matter. 047 048 private ImageScraper() { } 049 050 // Helps "FIND" the bugs. There are only 6 extra boolean-comparisons for a println 051 // There is no need to delete this right now. 052 053 private static final boolean DEBUGGING = false; 054 055 056 // ******************************************************************************************** 057 // ******************************************************************************************** 058 // RECORD: Used as ImageScraper class Top-Level Data-Flow **AND** Helper-Function 059 // ******************************************************************************************** 060 // ******************************************************************************************** 061 062 063 // Simple "Record" that makes passing these parameters all around wily-nilly a lot easier 064 // Used Strictly Internally to this class 065 // 066 // There turns out to be a lot of "data" in both the form of "configurations", and even more 067 // that is saved and returned to the user after completion. This RECORD right here saves all 068 // of the data, and keeps inside ... well ... one single (top-level) reference. 069 070 private static class RECORD 071 { 072 private static final String I4 = " "; 073 074 075 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 076 // Constant (final) for the ENTIRETY of the download-process 077 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 078 079 final Request request; 080 final Results results; 081 final AppendableLog al; 082 final AppendableSafe log; 083 084 // Has a non-null log 085 final boolean hasLog; 086 087 // Verbosity-Level that is Strictly Equal-To 088 final boolean logLevelEQ1; 089 final boolean logLevelEQ2; 090 final boolean logLevelEQ3; 091 092 // Verbosity-Level that is Greater-Than or Equal-To 093 final boolean logLevelGTEQ1; 094 final boolean logLevelGTEQ2; 095 096 097 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 098 // These change with each loop iteration 099 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 100 101 // Reference-Fields 102 URL url = null; 103 String[] b64ImageData = null; 104 ImageInfo imageInfo = null; 105 106 // Boolean-Primitive 107 boolean isB64EncodedImage = false; 108 109 110 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 111 // Constructor 112 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 113 114 RECORD(Request request, Results results, AppendableLog al) 115 { 116 this.request = request; 117 this.results = results; 118 this.al = al; 119 this.log = al.log; 120 121 // If there is a non-null log, set the boolean stating that there is a log 122 this.hasLog = (al.log != null); 123 124 // DEBUGGING: 125 // System.out.println("hasLog: " + hasLog + ", al.level: " + al.level); 126 // if (! Q.YN("Continue?")) System.exit(0); 127 128 // Makes Verbose-Printing Code neater and easier to look at. 129 this.logLevelEQ1 = hasLog && (al.level == 1); 130 this.logLevelEQ2 = hasLog && (al.level == 2); 131 this.logLevelEQ3 = hasLog && (al.level == 3); 132 133 // Also Makes Verbosity Faster & Easier to Read 134 this.logLevelGTEQ1 = hasLog && (al.level >= 1); 135 this.logLevelGTEQ2 = hasLog && (al.level >= 2); 136 } 137 138 139 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 140 // Some Simple Methods 141 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 142 143 // This is called at the very beginning of the Primary Download-Loop, directly at the top 144 // of the loop-body. It s the first thing that is done on each iteration of the download. 145 // 146 // NOTE: This resets all NON-FINAL fields in this class. 147 148 void reset() 149 { 150 this.url = null; 151 this.b64ImageData = null; 152 this.imageInfo = null; 153 this.isB64EncodedImage = false; 154 } 155 156 void append(String s) { log.append(s); } 157 void appendI4(String s) { log.append(I4).append(s); } 158 159 // This is always a useful debugging tool, both now, and possibly in the future 160 public String toString() 161 { 162 return 163 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 164 // Constant (final) for the ENTIRETY of the download-process 165 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 166 167 "RECORD's 'final' Fields (Constant through-out entire-download):\n" + 168 169 // final Request request; 170 I4 + "this.request: " + ((this.request != null) ? "non-" : "") + "null\n" + 171 172 // final Results results; 173 I4 + "this.results: " + ((this.results != null) ? "non-" : "") + "null\n" + 174 175 // final AppendableLog al; 176 I4 + "this.AppendableLog: " + ((this.al != null) ? "non-" : "") + "null\n" + 177 178 // final AppendableSafe log; 179 I4 + "this.AppendableSafe: " + ((this.log != null) ? "non-" : "") + "null\n" + 180 181 // final boolean hasLog; 182 I4 + "this.hasLog: " + this.hasLog + '\n' + 183 184 // final boolean logLevelEQ1; 185 I4 + "this.logLevelEQ1: " + this.logLevelEQ1 + '\n' + 186 187 // final boolean logLevelEQ2; 188 I4 + "this.logLevelEQ2: " + this.logLevelEQ2 + '\n' + 189 190 // final boolean logLevelEQ3; 191 I4 + "this.logLevelEQ3: " + this.logLevelEQ3 + '\n' + 192 193 // final boolean logLevelGTEQ1; 194 I4 + "this.logLevelGTEQ1: " + this.logLevelGTEQ1 + '\n' + 195 196 // final boolean logLevelGTEQ2; 197 I4 + "this.logLevelGTEQ2: " + this.logLevelGTEQ2 + '\n' + 198 199 200 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 201 // These change with each loop iteration 202 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 203 204 "Fields that change on each Loop-Iteration:\n" + 205 206 // URL url = null; 207 I4 + "this.url: " + 208 ((this.url != null) ? url.toString() : "null") + '\n' + 209 210 // String[] b64ImageData = null; 211 I4 + "this.b64ImageData: " + 212 ((this.b64ImageData != null) ? "non-" : "") + "null\n" + 213 214 // ImageInfo imageInfo = null; 215 I4 + "this.imageInfo: " + 216 ((this.imageInfo != null) ? "non-" : "") + "null\n" + 217 218 // boolean isB64Image = false; 219 I4 + "this.isB64EncodedImage: " + this.isB64EncodedImage + '\n'; 220 } 221 222 223 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 224 // Helpers that SIMULTANEOUSLY write-results to 'Results' and write-log to 'AppendableLog' 225 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 226 227 // Simple Helper for Printing to the Appendable log 228 void printEx(String operation, Throwable t) 229 { 230 this.al.append( 231 " The " + operation + " Code has thrown an Exception:\n" + 232 " Throwable Class: " + t.getClass().getName() + '\n' + 233 " Message: [" + t.getMessage() + "]\n" 234 ); 235 236 while ((t = t.getCause()) != null) this.al.append( 237 " Caused by Throwable Class: " + t.getClass().getName() + '\n' + 238 " Message: [" + t.getMessage() + "]\n" 239 ); 240 } 241 242 // There are 4 different User-Provided Lambda-Targets. If they throw an exception (which 243 // should be extremely rare), this method is called. 244 // 245 // NOTE: This method only works if "RECORD.imageInfo" is NON-NULL. This means that the 246 // first couple of User-Provided Lambda's have to use "reportEx" instead! 247 248 <T> T userLambdaEx(String userLambdaName, Exception e) throws ImageScraperException 249 { 250 this.results.userLambdaException(this.imageInfo, e); 251 252 final String errMsg = 253 "While attempting to invoke the User-Provided Lambda-Target" + 254 "'Request." + userLambdaName + "', an exception was thrown by the code."; 255 256 if (this.request.skipOnUserLambdaException) 257 { 258 if (this.logLevelEQ1) this.append("x "); 259 else if (this.logLevelEQ2) this.appendI4(errMsg + '\n'); 260 else if (this.logLevelEQ3) this.printEx("Invoke User '" + userLambdaName +"'", e); 261 262 return null; 263 } 264 265 else throw new ImageScraperException 266 (errMsg + ". Please see Throwable.getCause() for more details.", e); 267 } 268 269 <T> T reportEx(boolean skipBool, String errMsg, String operationName, Exception e) 270 throws ImageScraperException 271 { 272 // Paranoia & Sanity (A Simple Check) (An 'assert' that should never happen) 273 if (e == null) throw new UnreachableError(); 274 275 this.results.exceptionFail(this.url, e); 276 277 if (skipBool) 278 { 279 if (this.logLevelEQ1) this.append("x "); 280 else if (this.logLevelEQ2) this.appendI4(errMsg + '\n'); 281 else if (this.logLevelEQ3) this.printEx(operationName, e); 282 283 return null; 284 } 285 286 else throw ImageScraperException.class.isAssignableFrom(e.getClass()) 287 ? ((ImageScraperException) e) 288 : new ImageScraperException(errMsg + ". See Throwable.geCause() for details.", e); 289 } 290 } 291 292 293 // ******************************************************************************************** 294 // ******************************************************************************************** 295 // Thread-Related Stuff 296 // ******************************************************************************************** 297 // ******************************************************************************************** 298 299 300 /** 301 * If this class has been used to make "multi-threaded" calls that use a Time-Out wait-period, 302 * you might see your Java-Program hang for a few seconds when you would expect it to exit back 303 * to your O.S. normally. 304 * 305 * <BR /><BR /><B CLASS=JDDescLabel>Before Exiting:</B> 306 * 307 * <BR />When a program you have written reaches the end of its code, if you have performed any 308 * time-dependent Image-Downloads using this class (class {@code ImageScraper}), then your 309 * program <I>might not exit immediately,</I> but rather sit at the command-prompt for anywhere 310 * between 10 and 30 seconds before this Timeout-Thread dies. 311 * 312 * <BR /><BR />Note that you may immediately terminate any additional threads that were started 313 * using this method. 314 */ 315 public static void shutdownTOThreads() { executor.shutdownNow(); } 316 317 // This class is Static-Functional, and these are the only class-fields. They are both final, 318 // and the 'lock' variable is used to ensure that the class is, indeed, Thread-Safe. 319 320 private static final ExecutorService executor = Executors.newCachedThreadPool(); 321 private static final Lock lock = new ReentrantLock(); 322 323 324 // ******************************************************************************************** 325 // ******************************************************************************************** 326 // Primary User-API Methods 327 // ******************************************************************************************** 328 // ******************************************************************************************** 329 330 331 /** 332 * Downloads images located inside an HTML Page and updates the {@code SRC=...} {@code URL's} 333 * so that the links point to a <I>local copy</I> of <I>local images</I>. 334 * 335 * <BR /><BR />After completion of this method, an HTML page which contained any HTML image 336 * elements will have had those images downloaded to the local file-system, and also have had 337 * the HTML attribute {@code 'src=...'} changed to reflect the local image name instead of the 338 * Internet URL name. 339 * 340 * @param page Any vectorized-html page or subpage. This page should have HTML {@code <IMG ...>} 341 * elements in it, or else this method will exit without doing anything. 342 * 343 * @param pageURL If any of the HTML image elements have {@code src='...'} attributes that are 344 * partially resolved or <I>relative {@code URL's}</I> then this can be passed to the 345 * {@code ImageScraper} constructors in order to convert partial or relative {@code URL's} 346 * into complete {@code URL's.} The Image Downloader simply cannot work with partially 347 * resolved {@code URL's}, and will skip them if they are partially resolved. This parameter 348 * may be null, but if it is and there are incomplete-{@code URL's} those images will 349 * simply not be downloaded. 350 * 351 * @param log This is the 'logger' for this method. It may be null, and if it is - no output 352 * will be sent to the terminal. 353 * 354 * <EMBED CLASS='external-html' DATA-FILE-ID=APPENDABLE> 355 * 356 * @param targetDirectory This File-System directory where these files shall be stored. 357 * 358 * @return An instance of {@code Ret2<int[], Results>}. The two returned elements 359 * of this class include: 360 * 361 * <BR /><BR /><UL CLASS=JDUL> 362 * <LI> {@code Ret2.a (int[])} 363 * <BR /><BR />This shall contain an index-array for the indices of each HTML 364 * {@code '<IMG SRC=...>'} element found on the page. It is not guaranteed that each of 365 * images will have been resolved or downloaded successfully, but rather just that an HTML 366 * {@code 'IMG'} element that had a {@code 'SRC'} attribute. The second element of this 367 * return-type will contain information regarding which images downloaded successfully. 368 * <BR /><BR /> 369 * </LI> 370 * <LI> {@code Ret2.b (Results)} 371 * <BR /><BR />The second element of the return-type shall be the instance of 372 * {@link Results} returned from the invocation of 373 * {@code ImageScraper.download(...)}. This method will provide details about each of the 374 * images that were downloaded; or, if the download failed, the reasons for the failure. 375 * <I>This return element shall be null if no images were found on the page.</I> 376 * <BR /> 377 * </LI> 378 * </UL> 379 * 380 * <BR />These return {@code Object} references are not necessarily important - <I>and they 381 * may be discarded if needed.</I> They are provided as a matter of utility if further 382 * verification or research into successful downloads is needed. 383 * 384 * @throws IOException I/O Problems that weren't avoided. 385 * @throws ImageScraperException Thrown for any number of errors that went unsuppressed. 386 */ 387 public static Ret2<int[], Results> localizeImages 388 (Vector<HTMLNode> page, URL pageURL, Appendable log, String targetDirectory) 389 throws IOException, ImageScraperException 390 { 391 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 392 // Find all of the Image TagNode's on the Input Web-Page 393 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 394 395 int[] imgPosArr = TagNodeFind.all(page, TC.Both, "img"); 396 Vector<TagNode> vec = new Vector<>(); 397 398 // No Images Found. 399 if (imgPosArr.length == 0) return new Ret2<int[], Results>(imgPosArr, null); 400 401 for (int pos : imgPosArr) vec.addElement((TagNode) page.elementAt(pos)); 402 403 404 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 405 // Build a Request and Download all of the Image's that were just found / identified 406 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 407 408 Request request = Request.buildFromTagNodeIter(vec, pageURL, true); 409 request.targetDirectory = targetDirectory; 410 411 // NOTE: This is NOT FINISHED: 412 // SET ALL OF THE "Skip On Exception" booleans to TRUE!!! 413 414 // Invoke the Main Image Downloader 415 Results r = ImageScraper.download(request, log); 416 417 418 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 419 // Replace the <IMG SRC=...> TagNode URL's for images that were successfully downloaded. 420 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 421 422 // Now replace 423 ReplaceFunction replacer = (HTMLNode n, int arrPos, int count) -> 424 { 425 if (r.skipped[count] == false) 426 427 return ((TagNode) page.elementAt(arrPos)) 428 .setAV("src", r.fileNames[count], SD.SingleQuotes); 429 430 else return (TagNode) n; 431 }; 432 433 ReplaceNodes.r(page, imgPosArr, replacer); 434 435 436 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 437 // Report the Results 438 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 439 440 return new Ret2<int[], Results>(imgPosArr, r); 441 } 442 443 /** 444 * This will iterate through the {@code URL's} and download them. Note that parameter 445 * {@code 'log'} may be null, and if so, it will be quietly ignored. 446 * 447 * @param request This parameter takes customization requests for batch image downloads. To 448 * read more information about how to configure a download, please review the documentation for 449 * the class {@link Request}. 450 * 451 * <BR /><BR />Note that upon entering this method, this parameter is immediately cloned to 452 * prevent the possibility of Thread Concurrency Problems from happening. After cloning, the 453 * the cloned instance is used exclusively, and the original parameter is discarded. Further 454 * changes to the parameter-instance will not have any effect on the process. 455 * 456 * @param log This shall receive text / log information. This parameter may receive null, and 457 * if it does it will be ignored. When ignored, logging information will not printed. 458 * 459 * <EMBED CLASS='external-html' DATA-FILE-ID=APPENDABLE> 460 * 461 * @return an instance of {@code class Results} for the download. The {@link Results} class 462 * contains several parallel arrays with information about images that have downloaded. If an 463 * image-download happens to fail due to an improperly formed {@code URL} (or an 'incorrect' 464 * {@code URL}), then the information in the {@code Results} arrays will contain a 'null' value 465 * for the index at those array-positions corresponding to the failed image. 466 * 467 * @throws ImageScraperException Thrown for any number of exceptions that may be thrown while 468 * executing the download-loop. If another exception is thrown, then it is wrapped by this 469 * class' exception ({@link ImageScraperException}), and set as the {@code 'cause'} of that 470 * exception. 471 * 472 * @throws AppendableError The interface {@code java.lang.Appendable} was designed to allow for 473 * an implementation to throw the (unchecked) exception {@code IOException}. This has many 474 * blessings, but can occasionally be a pain since, indeed, {@code IOException} is both an 475 * unchecked exception (and requires an explicity catch), and also very common 476 * (even ubiquitous) inside of HTTP download code. 477 * 478 * <BR /><BR />If the user-provided {@code 'log'} parameter throws an {@code IOException} for 479 * simply trying to write character-data to the log about the download-progress, then <I>an 480 * {@code AppendableError} will be thrown</I>. Note that this throwable does inherit 481 * {@code java.lang.Error}, meaning that it won't be caught by standard Java {@code catch} 482 * clauses <I>(unless {@code 'Error'} is explicity mentioned!)</I> 483 */ 484 public static Results download(Request request, Appendable log) 485 throws ImageScraperException 486 { 487 // Clone the Request, Similar to "SafeVarArgs" - Specifically, if the user starts playing 488 // with the contents of this class in the middle of a download, it will not have any effect 489 // on the 'request' object that is actually being used. 490 491 request = request.clone(); 492 493 // Runs a few tests to make sure there are no problems using the request 494 request.CHECK(); 495 496 // Makes log printing easier and easier. 497 AppendableLog al = new AppendableLog(log, request.verbosity); 498 499 // Main Request-Configuration and Response Class Instances. 500 Results results = new Results(request.size); 501 502 // Private, Internal Static-Class. Makes passing variables even easier 503 RECORD r = new RECORD(request, results, al); 504 505 // Now, this just gets rid of the surrounding try-catch block. This is the only real 506 // reason for the internal/private method 'downloadWithoutTryCatch'. This makes the 507 // indentation look a lot better. Also, in this method, the 'log' is replaced with the 508 // AppendableSafe log 509 510 try 511 { 512 mainDownloadLoop(r); 513 return results; 514 } 515 516 catch (ImageScraperException e) 517 { 518 // If an exception causes the system to stop/halt, this extra '\n\n' makes the output 519 // text look a little nicer (sometimes... Sometimes it already looks fine). 520 // No more no less. 521 522 if (al.hasLog) al.append("\n\nThrowing ImageScraperException...\n"); 523 throw e; 524 } 525 } 526 527 528 // ******************************************************************************************** 529 // ******************************************************************************************** 530 // Main Download Iterator-Loop Method 531 // ******************************************************************************************** 532 // ******************************************************************************************** 533 534 535 private static void mainDownloadLoop(RECORD r) throws ImageScraperException 536 { 537 // Helps prepare for the printing loop; 538 if (r.logLevelGTEQ1) r.append("\n"); 539 540 // The "Main Benefit" of having a "Loop-Body" Method is to make the code below in the 541 // actual Loop-Body have one-less-level-of-indentation. That's really the only point of 542 // doing this - whatsoever! 543 // 544 // NOTE: Remember that all the 'continue' commands inside "loopBody" had to be changed 545 // into 'return' commands 546 547 for (URL url : r.request.source()) 548 { 549 r.reset(); 550 r.url = url; 551 loopBody(r); 552 } 553 } 554 555 private static void loopBody(RECORD r) throws ImageScraperException 556 { 557 // Print URL-Iterable Number (request.counterPrinter) 558 if (r.logLevelEQ1) 559 r.append(r.request.counterPrinter.apply(r.results.pos) + ": "); 560 561 if (r.logLevelGTEQ2) 562 r.append("\n" + r.request.counterPrinter.apply(r.results.pos) + ": "); 563 564 if (DEBUGGING) System.out.println("HERE: 01 (" + r.results.pos + ")"); 565 566 567 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 568 // DECIDE: Which of the three cases this is: URL, B64-Image, or an Exception-URL 569 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 570 571 // If there was an Image-URL next, then print it !!! 572 if (r.url != null) 573 { if (r.logLevelGTEQ2) r.append("Image-URL: [" + r.url.toString() + "]\n"); } 574 575 // If There was no URL, Then this is likely a B64-Encoded Image 576 else if ((r.b64ImageData = r.request.nextB64Image()) != null) 577 { 578 r.isB64EncodedImage = true; 579 580 if (r.logLevelGTEQ2) r.append( 581 "BASE-64 IMAGE: " + r.b64ImageData[0] /* imageFormatStr */ + ',' + 582 StrPrint.abbrev(r.b64ImageData[1], 35, true, " ... ", 70) + '\n' 583 ); 584 } 585 586 // If url is null, and this isn't a "B64-Encoded", then it's an Exception-Throw URL 587 else 588 { dealWithExceptionURL(r); return; } 589 590 591 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 592 // DOWNLOAD & CONVERT 593 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 594 595 if (DEBUGGING) System.out.println("HERE: 02 (" + r.results.pos + ")"); 596 597 // If the user provided a 'urlPreProcessor' in his Request-instance, run that now. 598 doUserURLPreProcessorIfNeeded(r); 599 600 if (DEBUGGING) System.out.println("HERE: 03 (" + r.results.pos + ")"); 601 602 // Get the java.awt.image.BufferedImage instance 603 Ret2<BufferedImage, IF> ret2BufferedImage = r.isB64EncodedImage 604 ? convertB64Image(r) 605 : downloadImage(r); 606 607 if (DEBUGGING) System.out.println("HERE: 04 (" + r.results.pos + ")"); 608 609 // If 'null' is returned, The User Requested 'skipOn...' SO - skip-and-move-on. 610 // * Log-Messages will ALREADY have been printed 611 // * class Results array's will ALREADY have been updated. 612 // * If an ImageScraperException is needed, it would ALREADY have been thrown. 613 614 if (ret2BufferedImage == null) return; 615 616 // Convert java.awt.image.BufferedImage into a byte[]-Array 617 // This 'r2' contains the Image as a byte[]-Array, and the format in which it was saved 618 619 Ret2<byte[], IF> ret2ByteArrImage = writeBufferedImageToByteArray 620 (r, ret2BufferedImage.a /* The Image */, ret2BufferedImage.b /* The Extension */); 621 622 if (DEBUGGING) System.out.println("HERE: 05 (" + r.results.pos + ")"); 623 624 // SAME AS PREVIOUS if (...) return; 625 if (ret2ByteArrImage == null) return; 626 627 628 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 629 // SAVE THE IMAGE (or send to 'Request.imageReceiver') 630 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 631 632 // Now Build an "ImageInfo" instance. (This is sent to any/all User's Lambdas) 633 // NOTE: No error-checking needed for a class that is strictly a data / "RECORD" class 634 635 r.imageInfo = new ImageInfo( 636 // Image-URL (very common) 637 r.url, 638 639 // Base-64 Image Stuff (rare, but not impossible) 640 r.isB64EncodedImage, 641 (r.isB64EncodedImage ? r.b64ImageData : null), 642 643 // The actual downloaded and converted images, themselves 644 ret2BufferedImage.a, // java.awt.image.BufferredImage 645 ret2ByteArrImage.a, // byte[] imgByteArr 646 647 // URL-Aquired Extension & Decided-Upon Extension 648 ret2BufferedImage.b, // guessedExt 649 ret2ByteArrImage.b, // actualExt 650 651 // Results Array Counters 652 r.results.pos, 653 r.results.successCounter 654 ); 655 656 if (DEBUGGING) System.out.println("HERE: 06 (" + r.results.pos + ")"); 657 658 // Save to Disk, or Send to Request.imageReceiver 659 handleImageByteArray(r); 660 } 661 662 663 // ******************************************************************************************** 664 // ******************************************************************************************** 665 // "Exception URL's" - Rare, but happens if the Static-Builder threw an Exception 666 // ******************************************************************************************** 667 // ******************************************************************************************** 668 669 670 private static void dealWithExceptionURL(RECORD r) 671 { 672 // "Exception-URL's" are URL's that must have come from the static "TagNode" 673 // Builders in class Request. It happens when a complete-URL cannot be built 674 // from a partial-URL, and the Links-Class saved the Exception in a Vector, 675 // so that it can be reported to the user (righ here!) 676 677 Exception e = r.request.nextTNSRCException(); 678 679 // ASSERT-STATEMENT: The 'request' instance should always return an 'e' here 680 if (e == null) throw new UnreachableError(); 681 682 // Since this "Failed", make sure to let the "Results" object-instance know. 683 r.results.tagNodeSRCError(e); 684 685 // Now let the user know too 686 if (r.hasLog) 687 { 688 if (r.logLevelEQ1) r.append(" x "); 689 690 else if (r.logLevelGTEQ2) r.append 691 ("URL-Building Exception: " + e.getClass().getName() + '\n'); 692 693 if (r.logLevelEQ3) r.appendI4("Message: " + e.getMessage() + '\n'); 694 } 695 } 696 697 698 // ******************************************************************************************** 699 // ******************************************************************************************** 700 // User-Provided URL-PreProcessor (Maybe!) 701 // ******************************************************************************************** 702 // ******************************************************************************************** 703 704 705 private static void doUserURLPreProcessorIfNeeded(RECORD r) throws ImageScraperException 706 { 707 if ((r.url == null) || (r.request.urlPreProcessor == null)) return; 708 709 try 710 { 711 r.url = r.request.urlPreProcessor.apply(r.url); 712 713 if (r.logLevelGTEQ2) r.appendI4("Pre-Processor URL:" + r.url + '\n'); 714 } 715 716 catch (Exception e) 717 { 718 final String msg = 719 "While attempting to invoke the user provided lambda " + 720 "'Request.urlPreProcessor', an exception was thrown by the user-code."; 721 722 if (r.request.skipOnUserLambdaException) 723 { 724 if (r.logLevelGTEQ2) r.appendI4(msg); 725 if (r.logLevelEQ3) r.printEx("Run URL-PreProcessor", e); 726 if (r.logLevelEQ1) r.append("x "); 727 728 r.results.exceptionFail(r.url, e); 729 730 return; 731 } 732 733 else throw new ImageScraperException 734 (msg + " Please see Throwable.getCause() for more details.", e); 735 } 736 } 737 738 739 // ******************************************************************************************** 740 // ******************************************************************************************** 741 // Convert a B64-Image to a java.awt.image.BufferedImage instance 742 // ******************************************************************************************** 743 // ******************************************************************************************** 744 745 746 private static Ret2<BufferedImage, IF> convertB64Image(RECORD r) throws ImageScraperException 747 { 748 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 749 // Internally, the request-class saves B64-Images as Two-Element String[]-Array's 750 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 751 752 String imageFormatStr = r.b64ImageData[0]; 753 String b64EncodedImage = r.b64ImageData[1]; 754 755 756 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 757 // Skipping B64-Images entirely is one of the boolean-options in 'Request' 758 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 759 760 if (r.request.skipBase64EncodedImages) 761 { 762 if (r.logLevelEQ1) r.append("x "); 763 else if (r.logLevelGTEQ2) r.appendI4 764 ("Skipping - Skip Request for all Base64-Encoded Images\n"); 765 766 r.results.skipB64(); 767 768 return null; 769 } 770 771 772 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 773 // Try to do the B64-Converstion 774 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 775 776 try 777 { 778 IF ext = IF.get(imageFormatStr); 779 780 BufferedImage image = IF.decodeBase64ToImage(b64EncodedImage, ext); 781 782 // SUCCESS! 783 if (image != null) new Ret2<>(image, ext); 784 } 785 786 catch (Exception e) 787 { 788 // This call either returns null, or throws an ImageScraperException 789 return r.reportEx( 790 r.request.skipOnB64DecodeException, 791 "Exception throw Java's Base-64 Image Decoder while decoding a Base-64 Image", 792 "Base-64 Image Decoding", 793 e 794 ); 795 } 796 797 798 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 799 // ELSE: The Image was null, so use 'NullImageException' 800 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 801 802 Exception niex = new NullImageException( 803 "The B64-Image Encoding Regular-Expression matched the Source-URL, but " + 804 "Java's B64-Image Decoder has returned null upon decoding it." 805 ); 806 807 niex.fillInStackTrace(); 808 809 // Returns 'null', or throws an exception 810 return r.reportEx( 811 r.request.skipOnNullImageException, 812 "Null Image returned by Java's B64 Image-Decoder", 813 "Base-64 Image Decoding", 814 niex 815 ); 816 } 817 818 819 // ******************************************************************************************** 820 // ******************************************************************************************** 821 // Download an Image to a java.awt.image.BufferedImage instance 822 // ******************************************************************************************** 823 // ******************************************************************************************** 824 825 826 private static Ret2<BufferedImage, IF> downloadImage(RECORD r) throws ImageScraperException 827 { 828 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 829 // Do the "Skipping URL" Lambda-Target right now (if the user's Request-Instance has one) 830 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 831 832 if (r.request.skipURL != null) 833 { 834 try 835 { 836 if (r.request.skipURL.test(r.url)) 837 { 838 // This *ISN'T* Exception-Case Code, it is a situation where the user has 839 // intentionally asked that this URL be skipped. 840 841 if (r.logLevelEQ1) r.append("x "); 842 843 if (r.logLevelGTEQ2) 844 r.appendI4("URL Skip-Predicate requests this URL be skipped.\n"); 845 846 r.results.skippedURL(r.url); 847 return null; 848 } 849 } 850 851 catch (Exception e) 852 { 853 // This call either returns null, or throws an ImageScraperException 854 // Depending upon the boolean 'r.request.skipOnUserLambdaException' 855 // 856 // NOTE: This **DOESN'T** return (for now). This is actually a non-fatal exception 857 // and progress can actually continue 858 859 r.reportEx( 860 r.request.skipOnUserLambdaException, 861 "Exception Thrown by User-Provided Lambda-Target 'Request.skipURL'", 862 "Invoke User 'Request.skipURL' Lambda-Target", 863 e 864 ); 865 } 866 } 867 868 869 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 870 // Build a Monitor Thread Instance 871 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 872 873 Callable<BufferedImage> threadDownloader = new Callable<BufferedImage>() 874 { 875 public BufferedImage call() throws ImageScraperException 876 { return downloadImageCallable(r); } 877 }; 878 879 880 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 881 // Run the Monitor Thread, return the result... Or Handle the Exception (if there was one) 882 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 883 884 lock.lock(); 885 Future<BufferedImage> future = executor.submit(threadDownloader); 886 lock.unlock(); 887 888 try 889 { 890 BufferedImage bi = future.get(r.request.maxDownloadWaitTime, r.request.waitTimeUnits); 891 892 return (bi == null) 893 ? null 894 : new Ret2<>(bi, IF.getGuess(r.url.toString())); 895 } 896 897 898 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 899 // TimeoutException: Web-Server took longer 'Request.maxDownloadWaitTime' 900 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 901 902 catch (TimeoutException e) 903 { 904 // This call either returns null, or throws an ImageScraperException 905 // Depending upon the boolean 'r.request.skipOnTimeOutException' 906 907 return r.reportEx( 908 r.request.skipOnTimeOutException, 909 "Waited: " + r.request.maxDownloadWaitTime + " " + 910 r.request.waitTimeUnits.toString(), 911 "HTTP Image-Download", 912 e 913 ); 914 915 // OLD MESSAGE: 916 // "The download source-code seems to have waited the maximum amount of time, as " + 917 // "specified by the 'maxDownloadWaitTime' configuration parameters:\n" + msg 918 } 919 920 921 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 922 // ExecutionException: An "Exception Wrapper" for internal-exceptions 923 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 924 // 925 // Thrown if there were any exceptions while running the 'Callable' that was created above. 926 // Since the Callable's 'call()' method catches its exceptions, and wraps them inside an 927 // ImageScraperException, THEORETICALLY, the 'cause'-Throwable for this 'e' should 928 // **ALWAYS** be an ImageScraperException 929 // 930 // NOTE: If there is an ImageScraperException, make sure not to report it a second time!!! 931 932 catch (ExecutionException e) 933 { 934 Throwable cause = e.getCause(); 935 936 if (ImageScraperException.class.isAssignableFrom(cause.getClass())) 937 throw (ImageScraperException) cause; 938 939 // This call either returns null, or throws an ImageScraperException 940 // Depending upon the boolean 'r.request.skipOnDownloadException' 941 942 return r.reportEx( 943 r.request.skipOnDownloadException, 944 "Exception throw by Java Image-Download Code", 945 "HTTP Image-Download", 946 e 947 ); 948 } 949 950 951 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 952 // InterruptedException: I THINK THIS IS UNREACHABLE - UNLESS USER IS INTERRUPTING THINGS! 953 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 954 // 955 // This should mostly be unreachable, unless the end user is 'playing games' with Java's 956 // Thread Mechanism. According to the JavaDoc Pgae for 'InterruptedException' - this is 957 // only thrown if the Thread is interrupte, which certainly won't happen on account of 958 // anything in this tool's code! 959 960 catch (InterruptedException e) 961 { 962 // This call either returns null, or throws an ImageScraperException 963 // Depending upon the boolean 'r.request.skipOnDownloadException' 964 965 return r.reportEx( 966 r.request.skipOnDownloadException, 967 "Image Download Code Thread was Interrupted", 968 "HTTP Image-Download", 969 e 970 ); 971 } 972 } 973 974 private static BufferedImage downloadImageCallable(RECORD r) throws ImageScraperException 975 { 976 BufferedImage image = null; 977 HttpURLConnection con = null; 978 Exception ex = null; 979 980 try 981 { 982 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 983 // FIRST DOWNLOAD ATTEMPT 984 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 985 986 if (r.request.alwaysUseUserAgent) 987 { 988 con = (HttpURLConnection) r.url.openConnection(); 989 con.setRequestMethod("GET"); 990 con.setRequestProperty("User-Agent", r.request.userAgent); 991 992 image = ImageIO.read(con.getInputStream()); 993 } 994 995 else image = ImageIO.read(r.url); 996 997 998 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 999 // First Download-Attempt Was Possibly Successfull 1000 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1001 1002 if (image != null) return image; 1003 1004 1005 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1006 // IF NULL-IMAGE && NO-RETRY: Then either return null or throw ImageScraperException 1007 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1008 1009 else if (r.request.alwaysUseUserAgent || (! r.request.retryWithUserAgent)) 1010 { 1011 // This call either returns null, or throws an ImageScraperException 1012 // Depending upon the boolean 'r.request.skipOnNullImageException' 1013 1014 return r.reportEx( 1015 r.request.skipOnNullImageException, 1016 "Downloaded Empty / Null Image", 1017 "HTTP Image-Download", 1018 (NullImageException) new NullImageException 1019 ("The Image Failed to Download Properly").fillInStackTrace() 1020 ); 1021 } 1022 } 1023 1024 catch (ImageScraperException e) { throw e; } 1025 1026 catch (Exception e) // (IOException | IIOException e) 1027 { 1028 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1029 // EXCEPTION WAS THROWN: So **Possibly** Still need to retry with the User-Agent 1030 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1031 1032 if (r.request.retryWithUserAgent && (! r.request.alwaysUseUserAgent)) 1033 { 1034 if (r.logLevelGTEQ2) r.appendI4( 1035 "Image Download Failed - Re-attempting Download with / via User-Agent: " + 1036 r.request.userAgent + '\n' 1037 ); 1038 } 1039 1040 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1041 // NO RETRY: Either Skip to next image (return null), or throw ImageScraperException 1042 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1043 1044 else 1045 { 1046 // This call either returns null, or throws an ImageScraperException 1047 // Depending upon the boolean 'r.request.skipOnDownloadException' 1048 1049 return r.reportEx( 1050 r.request.skipOnDownloadException, 1051 "Java HTTP Image Downloader javax.imageio.ImageIO.read(...) threw Exception", 1052 "HTTP Image-Download", 1053 e 1054 ); 1055 } 1056 } 1057 1058 try 1059 { 1060 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1061 // SECOND DOWNLOAD ATTEMPT 1062 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1063 1064 con = (HttpURLConnection) r.url.openConnection(); 1065 con.setRequestMethod("GET"); 1066 con.setRequestProperty("User-Agent", r.request.userAgent); 1067 1068 image = ImageIO.read(con.getInputStream()); 1069 1070 1071 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1072 // Second Download-Attempt Was Possibly Successfull 1073 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1074 1075 if (image != null) return image; 1076 1077 1078 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1079 // IF NULL-IMAGE: Then either return null or throw ImageScraperException 1080 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1081 // 1082 // This call either returns null, or throws an ImageScraperException 1083 // Depending upon the boolean 'r.request.skipOnNullImageException' 1084 1085 return r.reportEx( 1086 r.request.skipOnNullImageException, 1087 "Downloaded Empty / Null Image", 1088 "HTTP Image-Download", 1089 (NullImageException) new NullImageException 1090 ("The Image Failed to Download Properly").fillInStackTrace() 1091 ); 1092 } 1093 1094 catch (ImageScraperException e) { throw e; } 1095 1096 catch (Exception e) 1097 { 1098 // This call either returns null, or throws an ImageScraperException 1099 // Depending upon the boolean 'r.request.skipOnNullskipOnDownloadExceptionImageException' 1100 1101 return r.reportEx( 1102 r.request.skipOnDownloadException, 1103 "Java HTTP Image Downloader javax.imageio.ImageIO.read(...) threw Exception", 1104 "HTTP Image-Download", 1105 e 1106 ); 1107 } 1108 } 1109 1110 1111 // ******************************************************************************************** 1112 // ******************************************************************************************** 1113 // Convert the java.awt.image.BufferedImage **INTO** a Java byte[]-Array 1114 // ******************************************************************************************** 1115 // ******************************************************************************************** 1116 1117 1118 // This just converts an image in the format of a 'BufferedImage' into an image that is an 1119 // array of bytes. This method will attempt to save the image using the format that was 1120 // extracted using the URL-Name / FileName. If that fails, there is a for-loop that will 1121 // attempt to save the image using the other formats. 1122 1123 private static Ret2<byte[], IF> writeBufferedImageToByteArray 1124 (RECORD r, BufferedImage image, IF extGuess) 1125 throws ImageScraperException 1126 { 1127 // This is merely an array of all available formats that may be used to save or download 1128 // an image. 1129 1130 IF[] allFormats = IF.values(); 1131 1132 // This is used to generated the returned byte[] array. 1133 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1134 1135 // This is used if the image could not be converted 1136 Exception saveItEx = null; 1137 1138 1139 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1140 // If the provided Image-Type is NON-NULL, try to save and return the Byte[]-Array 1141 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1142 1143 if (extGuess != null) 1144 1145 try 1146 { 1147 ImageIO.write(image, extGuess.extension, baos); 1148 baos.flush(); 1149 baos.close(); 1150 1151 if (r.logLevelEQ3) r.appendI4( 1152 "Successfully Saved '." + extGuess.extension + "' URL to a '." + 1153 extGuess.extension + "' Formatted Byte-Array.\n" 1154 ); 1155 1156 return new Ret2<>(baos.toByteArray(), extGuess); 1157 } 1158 1159 catch (Exception e) 1160 { 1161 // IMPORTANT: It **IS NOT** time to quit yet! Try the other Image-Types before 1162 // reporting this as a Failed / Exception Case. 1163 1164 saveItEx = e; 1165 1166 if (r.logLevelEQ3) r.appendI4( 1167 "Failed to Convert '." + extGuess.extension + "' URL to a '." + 1168 extGuess.extension + "' Formatted Byte-Array.\n" 1169 ); 1170 1171 for (int i=0; i < allFormats.length; i++) 1172 1173 if (allFormats[i] == extGuess) { allFormats[i] = null; break; } 1174 } 1175 1176 1177 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1178 // Try any / all other formats that have not yet been attempted 1179 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1180 1181 for (IF format : allFormats) 1182 1183 try 1184 { 1185 baos.reset(); 1186 ImageIO.write(image, format.extension, baos); 1187 baos.flush(); 1188 baos.close(); 1189 1190 if (r.logLevelEQ3) r.appendI4( 1191 "Successfully Saved Image-URL to Byte-Array, Using as Guess '." + 1192 format.extension + "' Format\n" 1193 ); 1194 1195 return new Ret2<>(baos.toByteArray(), format); 1196 } 1197 1198 catch (Exception e) 1199 { if (saveItEx == null) saveItEx = e; } 1200 1201 1202 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1203 // All attempts to write using a specific format have failed. Handle the Failure. 1204 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1205 // 1206 // This call either returns null, or throws an ImageScraperException 1207 // Depending upon the boolean 'r.request.skipOnImageWritingFail' 1208 1209 return r.reportEx( 1210 r.request.skipOnImageWritingFail, 1211 "Could not translate java.awt.image.BufferedImage to a byte[]-Array with *Any* " + 1212 "Standard Image-Format", 1213 "BufferedImage to byte[]-Array", 1214 saveItEx 1215 ); 1216 } 1217 1218 1219 // ******************************************************************************************** 1220 // ******************************************************************************************** 1221 // Write to Disk, or Send to Request.imageReceiver 1222 // ******************************************************************************************** 1223 // ******************************************************************************************** 1224 1225 1226 private static void handleImageByteArray(RECORD r) throws ImageScraperException 1227 { 1228 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1229 // Get the File-Name, this likely is an "error-free" step, but check just in case. 1230 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1231 1232 String tempFileName = computeFileName(r); 1233 1234 if (tempFileName == null) return; 1235 1236 r.imageInfo.setFileName(tempFileName); 1237 1238 1239 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1240 // Run the User's Keeper-Predicate, if one was supplied 1241 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1242 1243 boolean keepIt = true; 1244 1245 if (r.request.keeperPredicate != null) 1246 1247 try 1248 { keepIt = r.request.keeperPredicate.test(r.imageInfo); } 1249 1250 catch (Exception e) 1251 { r.userLambdaEx("keeperPredicate", e); return;} 1252 1253 1254 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1255 // Write-Image, or send to Request.imageReceiver 1256 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1257 1258 if (! keepIt) 1259 { 1260 r.results.predicateReject(r.imageInfo); 1261 1262 // Now let the user-log know... (MAYBE, IF THEY HAVE LEVEL-CLEARANCE) 1263 if (r.logLevelEQ3) r.appendI4("User-Provided Keeper Predicate Rejected this Image."); 1264 if (r.logLevelEQ1) r.append("x "); 1265 } 1266 1267 else writeOrTransmit(r); 1268 } 1269 1270 private static String computeFileName(RECORD r) throws ImageScraperException 1271 { 1272 String preFix = (r.request.fileNamePrefix != null) ? r.request.fileNamePrefix : ""; 1273 1274 1275 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1276 // Use User-Provided "Get File-Name Lambda" - 'Request.getImageFileSaveName' 1277 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1278 1279 if (r.request.getImageFileSaveName != null) 1280 { 1281 String file = null; 1282 1283 try 1284 { file = r.request.getImageFileSaveName.apply(r.imageInfo); } 1285 1286 catch (Exception e) 1287 { return r.userLambdaEx("getImageFileSaveName", e); } 1288 1289 return preFix + file; 1290 } 1291 1292 1293 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1294 // Use 'Results.successCounter' for the File-Name 1295 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1296 1297 else if (r.request.useDefaultCounterForImageFileNames) 1298 1299 return preFix + r.request.counterPrinter.apply(r.results.successCounter); 1300 1301 1302 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1303 // Use the original URL's "File-Name" (Remember, on Yahoo! News, this don't work!) 1304 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1305 1306 else 1307 { 1308 String temp = r.imageInfo.url.getFile().substring(1); 1309 1310 if (r.imageInfo.guessedExtension == null) return preFix + temp; 1311 1312 String ext = r.imageInfo.guessedExtension.extension; 1313 1314 if (temp.toLowerCase().endsWith('.' + ext)) 1315 return preFix + temp.substring(0, temp.length() - 1 - ext.length()); 1316 1317 if (r.imageInfo.guessedExtension.alternateExtension == null) return preFix + temp; 1318 1319 ext = r.imageInfo.guessedExtension.alternateExtension; 1320 1321 if (temp.toLowerCase().endsWith('.' + ext)) 1322 return preFix + temp.substring(0, temp.length() - 1 - ext.length()); 1323 1324 throw new UnreachableError(); 1325 } 1326 } 1327 1328 private static void writeOrTransmit(RECORD r) throws ImageScraperException 1329 { 1330 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1331 // Case: ImageReceiver 1332 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1333 1334 if (r.request.imageReceiver != null) 1335 1336 try 1337 { 1338 r.request.imageReceiver.accept(r.imageInfo); 1339 r.results.success(r.imageInfo, null /* no target directory */); 1340 1341 if (r.logLevelEQ1) r.append("✓ "); 1342 1343 else if (r.logLevelGTEQ2) 1344 r.appendI4("Image Properly Transmitted to Request.imageReceiver\n"); 1345 1346 return; 1347 } 1348 1349 catch (Exception e) 1350 { r.userLambdaEx("imageReceiver", e); return; } 1351 1352 1353 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1354 // Case: File-System 1355 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1356 1357 String dirName = null; 1358 1359 if (r.request.targetDirectory != null) dirName = r.request.targetDirectory; 1360 1361 else if (r.request.targetDirectoryRetriever != null) 1362 { 1363 File dir; 1364 1365 1366 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1367 // Run the Request.targetDirectoryRetriever instance 1368 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1369 1370 try 1371 { dir = r.request.targetDirectoryRetriever.apply(r.imageInfo); } 1372 1373 catch (Exception e) 1374 { r.userLambdaEx("targetDirectoryRetriever", e); return; } 1375 1376 1377 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1378 // Check that the directory returned is non-null and writeable 1379 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1380 1381 try 1382 { WritableDirectoryException.check(dir); } 1383 1384 catch (Exception e) 1385 { 1386 // This call either returns null, or throws an ImageScraperException 1387 // Depending upon the boolean 'r.request.skipOnImageWritingFail' 1388 1389 r.reportEx( 1390 r.request.skipOnImageWritingFail, 1391 "Target-Directory reference provided is not a File-System Writeable Directory", 1392 "Write Image to Disk", 1393 e 1394 ); 1395 } 1396 } 1397 1398 // This scenario is checked inside the Request class "check" method 1399 else throw new UnreachableError(); 1400 1401 1402 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1403 // WRITE THE FILE 1404 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1405 1406 if (! dirName.endsWith(File.separator)) dirName = dirName + File.separator; 1407 1408 try 1409 { 1410 String saveName = 1411 dirName + r.imageInfo.fileName() + '.' + r.imageInfo.actualExtension.extension; 1412 1413 FileRW.writeBinary(r.imageInfo.imgByteArr, saveName); 1414 r.results.success(r.imageInfo, dirName); 1415 1416 if (r.logLevelEQ1) r.append("✓ "); 1417 1418 else if (r.logLevelGTEQ2) 1419 r.appendI4("Image saved successfully to: [" + saveName + "]\n"); 1420 } 1421 1422 catch (Exception e) 1423 { 1424 // This call either returns null, or throws an ImageScraperException 1425 // Depending upon the boolean 'r.request.skipOnImageWritingFail' 1426 1427 r.reportEx( 1428 r.request.skipOnImageWritingFail, 1429 "Exception thrown while attempting to write an image file to disk.", 1430 "Write Image to Disk", 1431 e 1432 ); 1433 } 1434 } 1435}