001package Torello.HTML.Tools.Images;
002
003import java.net.URL;
004import java.net.MalformedURLException;
005import java.util.Iterator;
006import java.util.NoSuchElementException;
007import java.util.function.IntFunction;
008
009/**
010 * An <CODE>Iterator</CODE> that is intended to be used for retrieving the image-URL's from
011 * the page.
012 * 
013 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=PBSURLI>
014 */
015public class URLIterator implements Iterator<URL>
016{
017    private int start, end, cur;
018    private IntFunction<URL> getter;
019
020    /**
021     * Perhaps as more of these "wonderful" photo-bomb sites are published, more versions
022     * of this iterator shall occur.  Right now, the easiest way to deal with iterating through
023     * the forty or fifty pages of photos, is to indicate the start and end number of the pages,
024     * <I><B>and require the user/programmer to provide a lambda function</I></B> "making" the
025     * URL out of a cur-position number.
026     * 
027     * @param start This is the integer that is the "first" page of the site.
028     * 
029     * <DIV CLASS="HTML">{@code 
030     * <!-- This URL has a lot of "Cute Little Bears" being saved in Siberia
031     *      The way you can scrape all 39 photos quickly is to iterator through
032     *      each of the PHP calls via the value passed to "page=" -->
033     * <A HREF='https://www.jerusalemonline.com/view/bear-cubs-jol/?page=1'>
034     * }</DIV>
035     * 
036     * @param end This is the integer that contains the last page of the photo-site collection.
037     * In the particular case of the "Bears who lost their momma in Siberia" - the last page
038     * that is currently available is page number 39.
039     * 
040     * @param urlGetter  Any programmer that is familiar with Java Lambda Functions, should
041     * know this is just Java's version of a "Function Pointer" from C and C++.  This function
042     * pointer must be a function that takes as input and integer (which is a page number), and
043     * returns as output a URL.  This will be called once for each page on the site.
044     * 
045     * <DIV CLASS="EXAMPLE">{@code
046     * // Generally, one might think this should be a single-line lambda expression.  Though
047     * // single line function pointers are quite common, because calling the constructor to a
048     * // URL can generate a MalformedURLException, and because these exceptions are not 
049     * // sub-classes of RunTimeException, this short lambda has to include a try-catch.  Here,
050     * // the checked exception is simply converted to NullPointerException - which is
051     * // unchecked.  The reality is that if proper values are entered for start and end, no
052     * // exceptions will occur.
053     * URLIterator iter = new URLIterator(1, 39, (int curPage) ->
054     * {   
055     *     try
056     *         { return new URL(urlStr + curPage); }
057     *     catch (MalformedURLException e)
058     *         { throw new NullPointerException("Malformed URL Exception" + e.toString()); }
059     * }
060     * }</DIV>
061     */
062    public URLIterator(int start, int end, IntFunction<URL> urlGetter)
063    {
064        this.getter = urlGetter;
065        this.start  = start;
066        this.end    = end;
067        this.cur    = start - 1;
068    }
069
070    /**
071     * Just checks if there are more elements available.
072     * @return {@code TRUE} if there are more pages to check, and {@code FALSE} otherwise.
073     */
074    public boolean hasNext() { return cur < end; }
075
076    /**
077     * Meeting the requirements of an instance of Java's standard iterator instance.
078     * @return This shall return the "next" URL element from the Photo Site.
079     */
080    public URL next()
081    {
082        cur++;
083        if (cur > end) throw new NoSuchElementException(
084            "The current iteration counter is: " + cur +
085            " but unfortunately, the max-page-number you passed to the constructor is: " + end 
086        );
087        return getter.apply(cur);
088    }
089
090    public static URLIterator usual(String baseURLStr, int startPageNum, int lastPageNum)
091        throws MalformedURLException
092    {
093        CHECK_EXCEPTIONS(baseURLStr, startPageNum, lastPageNum);
094
095        return new URLIterator(startPageNum, lastPageNum, (int curPage) ->
096        {   
097            try
098                { return new URL(baseURLStr + curPage); }
099            catch (MalformedURLException e)
100                { throw new NullPointerException("Malformed URL Exception" + e.toString()); }
101                // CHEAP-TRICK: Compile-Time Exception to Runtime Exception...  However, the 
102                // base-URL has already been tested, and therefore this exception NEEDS to be 
103                // suppressed...  NOTE: This exception should *NEVER* throw...
104        });
105    }
106
107    public static URLIterator usual
108        (String url, String appendParamStr, int startPageNum, int lastPageNum)
109        throws MalformedURLException
110    {
111        CHECK_EXCEPTIONS(url + 1 + appendParamStr, startPageNum, lastPageNum);
112
113        return new URLIterator(startPageNum, lastPageNum, (int curPage) ->
114        {   
115            try
116                { return new URL(url + curPage + appendParamStr); }
117            catch (MalformedURLException e)
118                { throw new NullPointerException("Malformed URL Exception" + e.toString()); }
119                // CHEAP-TRICK: Compile-Time Exception to Runtime Exception...  However, the 
120                // base-URL has already been tested, and therefore this exception NEEDS to be 
121                // suppressed...  NOTE: This exception should *NEVER* throw...
122        });
123    }
124
125    public static void CHECK_EXCEPTIONS(String url, int startPageNum, int lastPageNum)
126        throws MalformedURLException
127    {
128        // FAIL-FAST: Check user input before the iterator starts iterating.
129        if (startPageNum < 0) throw new IllegalArgumentException(
130            "The value passed to the starting-page-number parameter [" + startPageNum + "], " +
131            "was negative.  Most often it is 1 or, possibly, 0."
132        );
133
134        if (lastPageNum < 0) throw new IllegalArgumentException(
135            "The value passed to the ending-page-number parameter [" + lastPageNum + "], was negative."
136        );
137
138        if (startPageNum > lastPageNum) throw new IllegalArgumentException(
139            "The value passed to the ending-page-number parameter [" + startPageNum + "], was greater " +
140            "than the value passed to ending-page-number parameter [" + lastPageNum + "]."
141        );
142
143        if (url == null) throw new NullPointerException
144            ("A null value was passed as a url.");
145
146        // FAIL-FAST:   This should be a valid URL as a String.  This invocation will throw the
147        //              MalformedURLException if it is not.
148        new URL(url);
149    }
150}