URLIterator.java.html

package Torello.HTML.Tools.Images;

import java.net.URL;
import java.net.MalformedURLException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.function.IntFunction;

/**
 * An <CODE>Iterator</CODE> that is intended to be used for retrieving the image-URL's from
 * the page.
 * 
 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=PBSURLI>
 */
public class URLIterator implements Iterator<URL>
{
    private int start, end, cur;
    private IntFunction<URL> getter;

    /**
     * Perhaps as more of these "wonderful" photo-bomb sites are published, more versions
     * of this iterator shall occur.  Right now, the easiest way to deal with iterating through
     * the forty or fifty pages of photos, is to indicate the start and end number of the pages,
     * <I><B>and require the user/programmer to provide a lambda function</I></B> "making" the
     * URL out of a cur-position number.
     * 
     * @param start This is the integer that is the "first" page of the site.
     * 
     * <DIV CLASS="HTML">{@code 
     * <!-- This URL has a lot of "Cute Little Bears" being saved in Siberia
     *      The way you can scrape all 39 photos quickly is to iterator through
     *      each of the PHP calls via the value passed to "page=" -->
     * <A HREF='https://www.jerusalemonline.com/view/bear-cubs-jol/?page=1'>
     * }</DIV>
     * 
     * @param end This is the integer that contains the last page of the photo-site collection.
     * In the particular case of the "Bears who lost their momma in Siberia" - the last page
     * that is currently available is page number 39.
     * 
     * @param urlGetter  Any programmer that is familiar with Java Lambda Functions, should
     * know this is just Java's version of a "Function Pointer" from C and C++.  This function
     * pointer must be a function that takes as input and integer (which is a page number), and
     * returns as output a URL.  This will be called once for each page on the site.
     * 
     * <DIV CLASS="EXAMPLE">{@code
     * // Generally, one might think this should be a single-line lambda expression.  Though
     * // single line function pointers are quite common, because calling the constructor to a
     * // URL can generate a MalformedURLException, and because these exceptions are not 
     * // sub-classes of RunTimeException, this short lambda has to include a try-catch.  Here,
     * // the checked exception is simply converted to NullPointerException - which is
     * // unchecked.  The reality is that if proper values are entered for start and end, no
     * // exceptions will occur.
     * URLIterator iter = new URLIterator(1, 39, (int curPage) ->
     * {   
     *     try
     *         { return new URL(urlStr + curPage); }
     *     catch (MalformedURLException e)
     *         { throw new NullPointerException("Malformed URL Exception" + e.toString()); }
     * }
     * }</DIV>
     */
    public URLIterator(int start, int end, IntFunction<URL> urlGetter)
    {
        this.getter = urlGetter;
        this.start  = start;
        this.end    = end;
        this.cur    = start - 1;
    }

    /**
     * Just checks if there are more elements available.
     * @return {@code TRUE} if there are more pages to check, and {@code FALSE} otherwise.
     */
    public boolean hasNext() { return cur < end; }

    /**
     * Meeting the requirements of an instance of Java's standard iterator instance.
     * @return This shall return the "next" URL element from the Photo Site.
     */
    public URL next()
    {
        cur++;
        if (cur > end) throw new NoSuchElementException(
            "The current iteration counter is: " + cur +
            " but unfortunately, the max-page-number you passed to the constructor is: " + end 
        );
        return getter.apply(cur);
    }

    public static URLIterator usual(String baseURLStr, int startPageNum, int lastPageNum)
        throws MalformedURLException
    {
        CHECK_EXCEPTIONS(baseURLStr, startPageNum, lastPageNum);

        return new URLIterator(startPageNum, lastPageNum, (int curPage) ->
        {   
            try
                { return new URL(baseURLStr + curPage); }
            catch (MalformedURLException e)
                { throw new NullPointerException("Malformed URL Exception" + e.toString()); }
                // CHEAP-TRICK: Compile-Time Exception to Runtime Exception...  However, the 
                // base-URL has already been tested, and therefore this exception NEEDS to be 
                // suppressed...  NOTE: This exception should *NEVER* throw...
        });
    }

    public static URLIterator usual
        (String url, String appendParamStr, int startPageNum, int lastPageNum)
        throws MalformedURLException
    {
        CHECK_EXCEPTIONS(url + 1 + appendParamStr, startPageNum, lastPageNum);

        return new URLIterator(startPageNum, lastPageNum, (int curPage) ->
        {   
            try
                { return new URL(url + curPage + appendParamStr); }
            catch (MalformedURLException e)
                { throw new NullPointerException("Malformed URL Exception" + e.toString()); }
                // CHEAP-TRICK: Compile-Time Exception to Runtime Exception...  However, the 
                // base-URL has already been tested, and therefore this exception NEEDS to be 
                // suppressed...  NOTE: This exception should *NEVER* throw...
        });
    }

    public static void CHECK_EXCEPTIONS(String url, int startPageNum, int lastPageNum)
        throws MalformedURLException
    {
        // FAIL-FAST: Check user input before the iterator starts iterating.
        if (startPageNum < 0) throw new IllegalArgumentException(
            "The value passed to the starting-page-number parameter [" + startPageNum + "], " +
            "was negative.  Most often it is 1 or, possibly, 0."
        );

        if (lastPageNum < 0) throw new IllegalArgumentException(
            "The value passed to the ending-page-number parameter [" + lastPageNum + "], was negative."
        );

        if (startPageNum > lastPageNum) throw new IllegalArgumentException(
            "The value passed to the ending-page-number parameter [" + startPageNum + "], was greater " +
            "than the value passed to ending-page-number parameter [" + lastPageNum + "]."
        );

        if (url == null) throw new NullPointerException
            ("A null value was passed as a url.");

        // FAIL-FAST:   This should be a valid URL as a String.  This invocation will throw the
        //              MalformedURLException if it is not.
        new URL(url);
    }
}