001package Torello.HTML.Tools.Images; 002 003import java.net.URL; 004import java.net.MalformedURLException; 005import java.util.Iterator; 006import java.util.NoSuchElementException; 007import java.util.function.IntFunction; 008 009/** 010 * An <CODE>Iterator</CODE> that is intended to be used for retrieving the image-URL's from 011 * the page. 012 * 013 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=PBSURLI> 014 */ 015public class URLIterator implements Iterator<URL> 016{ 017 private int start, end, cur; 018 private IntFunction<URL> getter; 019 020 /** 021 * Perhaps as more of these "wonderful" photo-bomb sites are published, more versions 022 * of this iterator shall occur. Right now, the easiest way to deal with iterating through 023 * the forty or fifty pages of photos, is to indicate the start and end number of the pages, 024 * <I><B>and require the user/programmer to provide a lambda function</I></B> "making" the 025 * URL out of a cur-position number. 026 * 027 * @param start This is the integer that is the "first" page of the site. 028 * 029 * <DIV CLASS="HTML">{@code 030 * <!-- This URL has a lot of "Cute Little Bears" being saved in Siberia 031 * The way you can scrape all 39 photos quickly is to iterator through 032 * each of the PHP calls via the value passed to "page=" --> 033 * <A HREF='https://www.jerusalemonline.com/view/bear-cubs-jol/?page=1'> 034 * }</DIV> 035 * 036 * @param end This is the integer that contains the last page of the photo-site collection. 037 * In the particular case of the "Bears who lost their momma in Siberia" - the last page 038 * that is currently available is page number 39. 039 * 040 * @param urlGetter Any programmer that is familiar with Java Lambda Functions, should 041 * know this is just Java's version of a "Function Pointer" from C and C++. This function 042 * pointer must be a function that takes as input and integer (which is a page number), and 043 * returns as output a URL. This will be called once for each page on the site. 044 * 045 * <DIV CLASS="EXAMPLE">{@code 046 * // Generally, one might think this should be a single-line lambda expression. Though 047 * // single line function pointers are quite common, because calling the constructor to a 048 * // URL can generate a MalformedURLException, and because these exceptions are not 049 * // sub-classes of RunTimeException, this short lambda has to include a try-catch. Here, 050 * // the checked exception is simply converted to NullPointerException - which is 051 * // unchecked. The reality is that if proper values are entered for start and end, no 052 * // exceptions will occur. 053 * URLIterator iter = new URLIterator(1, 39, (int curPage) -> 054 * { 055 * try 056 * { return new URL(urlStr + curPage); } 057 * catch (MalformedURLException e) 058 * { throw new NullPointerException("Malformed URL Exception" + e.toString()); } 059 * } 060 * }</DIV> 061 */ 062 public URLIterator(int start, int end, IntFunction<URL> urlGetter) 063 { 064 this.getter = urlGetter; 065 this.start = start; 066 this.end = end; 067 this.cur = start - 1; 068 } 069 070 /** 071 * Just checks if there are more elements available. 072 * @return {@code TRUE} if there are more pages to check, and {@code FALSE} otherwise. 073 */ 074 public boolean hasNext() { return cur < end; } 075 076 /** 077 * Meeting the requirements of an instance of Java's standard iterator instance. 078 * @return This shall return the "next" URL element from the Photo Site. 079 */ 080 public URL next() 081 { 082 cur++; 083 if (cur > end) throw new NoSuchElementException( 084 "The current iteration counter is: " + cur + 085 " but unfortunately, the max-page-number you passed to the constructor is: " + end 086 ); 087 return getter.apply(cur); 088 } 089 090 public static URLIterator usual(String baseURLStr, int startPageNum, int lastPageNum) 091 throws MalformedURLException 092 { 093 CHECK_EXCEPTIONS(baseURLStr, startPageNum, lastPageNum); 094 095 return new URLIterator(startPageNum, lastPageNum, (int curPage) -> 096 { 097 try 098 { return new URL(baseURLStr + curPage); } 099 catch (MalformedURLException e) 100 { throw new NullPointerException("Malformed URL Exception" + e.toString()); } 101 // CHEAP-TRICK: Compile-Time Exception to Runtime Exception... However, the 102 // base-URL has already been tested, and therefore this exception NEEDS to be 103 // suppressed... NOTE: This exception should *NEVER* throw... 104 }); 105 } 106 107 public static URLIterator usual 108 (String url, String appendParamStr, int startPageNum, int lastPageNum) 109 throws MalformedURLException 110 { 111 CHECK_EXCEPTIONS(url + 1 + appendParamStr, startPageNum, lastPageNum); 112 113 return new URLIterator(startPageNum, lastPageNum, (int curPage) -> 114 { 115 try 116 { return new URL(url + curPage + appendParamStr); } 117 catch (MalformedURLException e) 118 { throw new NullPointerException("Malformed URL Exception" + e.toString()); } 119 // CHEAP-TRICK: Compile-Time Exception to Runtime Exception... However, the 120 // base-URL has already been tested, and therefore this exception NEEDS to be 121 // suppressed... NOTE: This exception should *NEVER* throw... 122 }); 123 } 124 125 public static void CHECK_EXCEPTIONS(String url, int startPageNum, int lastPageNum) 126 throws MalformedURLException 127 { 128 // FAIL-FAST: Check user input before the iterator starts iterating. 129 if (startPageNum < 0) throw new IllegalArgumentException( 130 "The value passed to the starting-page-number parameter [" + startPageNum + "], " + 131 "was negative. Most often it is 1 or, possibly, 0." 132 ); 133 134 if (lastPageNum < 0) throw new IllegalArgumentException( 135 "The value passed to the ending-page-number parameter [" + lastPageNum + "], was negative." 136 ); 137 138 if (startPageNum > lastPageNum) throw new IllegalArgumentException( 139 "The value passed to the ending-page-number parameter [" + startPageNum + "], was greater " + 140 "than the value passed to ending-page-number parameter [" + lastPageNum + "]." 141 ); 142 143 if (url == null) throw new NullPointerException 144 ("A null value was passed as a url."); 145 146 // FAIL-FAST: This should be a valid URL as a String. This invocation will throw the 147 // MalformedURLException if it is not. 148 new URL(url); 149 } 150}