1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
package Torello.HTML.Tools.Images;

import java.net.URL;
import java.net.MalformedURLException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.function.IntFunction;

/**
 * An <CODE>Iterator</CODE> that is intended to be used for retrieving the image-URL's from
 * the page.
 * 
 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=PBSURLI>
 */
public class URLIterator implements Iterator<URL>
{
    private int start, end, cur;
    private IntFunction<URL> getter;

    /**
     * Perhaps as more of these "wonderful" photo-bomb sites are published, more versions
     * of this iterator shall occur.  Right now, the easiest way to deal with iterating through
     * the forty or fifty pages of photos, is to indicate the start and end number of the pages,
     * <I><B>and require the user/programmer to provide a lambda function</I></B> "making" the
     * URL out of a cur-position number.
     * 
     * @param start This is the integer that is the "first" page of the site.
     * 
     * <DIV CLASS="HTML">{@code 
     * <!-- This URL has a lot of "Cute Little Bears" being saved in Siberia
     *      The way you can scrape all 39 photos quickly is to iterator through
     *      each of the PHP calls via the value passed to "page=" -->
     * <A HREF='https://www.jerusalemonline.com/view/bear-cubs-jol/?page=1'>
     * }</DIV>
     * 
     * @param end This is the integer that contains the last page of the photo-site collection.
     * In the particular case of the "Bears who lost their momma in Siberia" - the last page
     * that is currently available is page number 39.
     * 
     * @param urlGetter  Any programmer that is familiar with Java Lambda Functions, should
     * know this is just Java's version of a "Function Pointer" from C and C++.  This function
     * pointer must be a function that takes as input and integer (which is a page number), and
     * returns as output a URL.  This will be called once for each page on the site.
     * 
     * <DIV CLASS="EXAMPLE">{@code
     * // Generally, one might think this should be a single-line lambda expression.  Though
     * // single line function pointers are quite common, because calling the constructor to a
     * // URL can generate a MalformedURLException, and because these exceptions are not 
     * // sub-classes of RunTimeException, this short lambda has to include a try-catch.  Here,
     * // the checked exception is simply converted to NullPointerException - which is
     * // unchecked.  The reality is that if proper values are entered for start and end, no
     * // exceptions will occur.
     * URLIterator iter = new URLIterator(1, 39, (int curPage) ->
     * {   
     *     try
     *         { return new URL(urlStr + curPage); }
     *     catch (MalformedURLException e)
     *         { throw new NullPointerException("Malformed URL Exception" + e.toString()); }
     * }
     * }</DIV>
     */
    public URLIterator(int start, int end, IntFunction<URL> urlGetter)
    {
        this.getter = urlGetter;
        this.start  = start;
        this.end    = end;
        this.cur    = start - 1;
    }

    /**
     * Just checks if there are more elements available.
     * @return {@code TRUE} if there are more pages to check, and {@code FALSE} otherwise.
     */
    public boolean hasNext() { return cur < end; }

    /**
     * Meeting the requirements of an instance of Java's standard iterator instance.
     * @return This shall return the "next" URL element from the Photo Site.
     */
    public URL next()
    {
        cur++;
        if (cur > end) throw new NoSuchElementException(
            "The current iteration counter is: " + cur +
            " but unfortunately, the max-page-number you passed to the constructor is: " + end 
        );
        return getter.apply(cur);
    }

    public static URLIterator usual(String baseURLStr, int startPageNum, int lastPageNum)
        throws MalformedURLException
    {
        CHECK_EXCEPTIONS(baseURLStr, startPageNum, lastPageNum);

        return new URLIterator(startPageNum, lastPageNum, (int curPage) ->
        {   
            try
                { return new URL(baseURLStr + curPage); }
            catch (MalformedURLException e)
                { throw new NullPointerException("Malformed URL Exception" + e.toString()); }
                // CHEAP-TRICK: Compile-Time Exception to Runtime Exception...  However, the 
                // base-URL has already been tested, and therefore this exception NEEDS to be 
                // suppressed...  NOTE: This exception should *NEVER* throw...
        });
    }

    public static URLIterator usual
        (String url, String appendParamStr, int startPageNum, int lastPageNum)
        throws MalformedURLException
    {
        CHECK_EXCEPTIONS(url + 1 + appendParamStr, startPageNum, lastPageNum);

        return new URLIterator(startPageNum, lastPageNum, (int curPage) ->
        {   
            try
                { return new URL(url + curPage + appendParamStr); }
            catch (MalformedURLException e)
                { throw new NullPointerException("Malformed URL Exception" + e.toString()); }
                // CHEAP-TRICK: Compile-Time Exception to Runtime Exception...  However, the 
                // base-URL has already been tested, and therefore this exception NEEDS to be 
                // suppressed...  NOTE: This exception should *NEVER* throw...
        });
    }

    public static void CHECK_EXCEPTIONS(String url, int startPageNum, int lastPageNum)
        throws MalformedURLException
    {
        // FAIL-FAST: Check user input before the iterator starts iterating.
        if (startPageNum < 0) throw new IllegalArgumentException(
            "The value passed to the starting-page-number parameter [" + startPageNum + "], " +
            "was negative.  Most often it is 1 or, possibly, 0."
        );

        if (lastPageNum < 0) throw new IllegalArgumentException(
            "The value passed to the ending-page-number parameter [" + lastPageNum + "], was negative."
        );

        if (startPageNum > lastPageNum) throw new IllegalArgumentException(
            "The value passed to the ending-page-number parameter [" + startPageNum + "], was greater " +
            "than the value passed to ending-page-number parameter [" + lastPageNum + "]."
        );

        if (url == null) throw new NullPointerException
            ("A null value was passed as a url.");

        // FAIL-FAST:   This should be a valid URL as a String.  This invocation will throw the
        //              MalformedURLException if it is not.
        new URL(url);
    }
}