001package Torello.HTML.NodeSearch;
002
003import java.util.*;
004
005import java.util.function.Predicate;
006
007import Torello.HTML.*;
008
009import Torello.Java.LV;
010import Torello.HTML.Util.Inclusive;
011
012/**
013 * Iterates <B>'Inclusive'</B> {@link TagNode} sublist-matches, which would be similar to iterating
014 * the <CODE>'&#46;innerHTML'</cODE> fields of elements in a JavaScript DOM-Tree.
015 * 
016 * <EMBED CLASS="external-html" DATA-FILE-ID=HNLIInclusive>
017 */
018@SuppressWarnings("unchecked")
019@Torello.HTML.Tools.JavaDoc.JDHeaderBackgroundImg
020public class HNLIInclusive extends AbstractHNLI<TagNode, Vector<HTMLNode>> 
021{
022    // **************************************************************************************
023    // Private, Non-Static, Fields
024    // **************************************************************************************
025
026    private DotPair hasNextDP       = null;
027    private DotPair hasPrevDP       = null;
028    private DotPair lastReturned    = null;
029
030    // **************************************************************************************
031    // lone constructor
032    // **************************************************************************************
033
034    /**
035     * This will produce an P@code Iterator} with generic type {@code 'E'}.  The last parameter to
036     * this constructor {@code Class<E> c} is required since as per Java's Erasure "Feature" -
037     * there is no way to identify what the Variable-Type Parameter {@code 'E'} evaluates at
038     * Run-Time.
039     * 
040     * <BR /><BR /><B><SPAN STYLE="color: red;">PROTECTED NOTE:</B></SPAN> This method is, by
041     * necessity kept {@code 'protected'} because of the nature of what constitutes an match for a
042     * {@code TagNode} when <B>'Inclusive'</B> Sublists are to be returned.
043     * 
044     * @param html This may be any HTML {@code Vector} or sub-section.
045     *
046     * @param p This is a {@code java.util.function.Predicate} that identifies when the
047     * {@code Iterator} should consider a {@code TagNode} a "Match."
048     */
049    HNLIInclusive (Vector<? extends HTMLNode> html, Predicate<TagNode> p)
050    { super(html, p, TagNode.class); }
051
052    void RESET_MATCHES() { hasNextDP = hasPrevDP = lastReturned = null; }
053
054    int REMOVE() { return Util.removeRange(v, lastReturned); }
055
056
057    // **************************************************************************************
058    // HELPER
059    // **************************************************************************************
060
061    private DotPair TEST_CURSOR_INCLUSIVE()
062    {
063        Object o = v.elementAt(cursor);
064
065        if (! (o instanceof TagNode)) return null;
066
067        TagNode tn = (TagNode) o;
068
069        if (tn.isClosing) return null;
070
071        if (! p.test(tn)) return null;
072
073        if (maxCursor == -1)    return Inclusive.dotPairOPT(v, cursor);
074        else                    return Inclusive.dotPairOPT(v, cursor, maxCursor);
075    }
076
077    // **************************************************************************************
078    // "Previous" - Retrieval Operations
079    // **************************************************************************************
080
081    /**
082     * Use this method to find out whether the underlying {@code Vector} and current {@code cursor}
083     * position would retrieve another match if {@code 'previous'} or {@code 'previousIndex'} were
084     * called.
085     * 
086     * @return This shall return <B>TRUE</B> if calling the {@code previous()}, or 
087     * {@code previousIndex()} methods would return another inclusive / sub-list node-match.  This
088     * method shall return <B>FALSE</B> if calling {@code previous()} would generate / throw a 
089     * {@code 'NoSuchElementException'} - <I>because there are no more sub-list matches in the
090     * underlying {@code Vector}, given the current {@code cursor} position.</I>
091     * 
092     * @throws ConcurrentModificationException <EMBED CLASS="external-html" DATA-FILE-ID="CMEX">
093     * 
094     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
095     * @see TagNode#isClosing
096     * @see SubSection
097     */
098    public boolean hasPrevious()
099    {
100        CHECK_CME();
101
102        if (hasPrevDP != null) return true;
103
104        int LOOP_BOUNDARY = (minCursor == -1) ? 0 : minCursor;
105
106        if (cursor == -1) cursor = LOOP_BOUNDARY;  // will return false
107
108        while (--cursor >= LOOP_BOUNDARY)
109
110            if ((hasPrevDP = TEST_CURSOR_INCLUSIVE()) != null)
111                return true;
112
113        return false;
114    }
115
116    /**
117     * Returns the nearest sub-list match in the underlying {@code Vector}, given the current
118     * {@code cursor} position - <I>when searching in the left-direction, or in the direction of
119     * decreasing {@code Vector}-indices.</I>
120     * 
121     * @return This shall return the sub-list match that is directly previous to the current
122     * {@code cursor} position.
123     * 
124     * @throws ConcurrentModificationException <EMBED CLASS="external-html" DATA-FILE-ID="CMEX">
125     * 
126     * @throws NoSuchElementException If there are not more matches, this exception shall throw.  
127     * Avoid having to catch this exception by always calling method {@code 'hasPrevious'}, and
128     * only invoking {@code 'previous'} if that method returned <B>TRUE.</B>
129     */
130    public Vector<HTMLNode> previous()
131    { return Util.cloneRange(v, previousDotPair()); }
132
133    /**
134     * This method, in-some-ways but-not-others, <B>"overrides"</B> the original
135     * {@code ListIterator<E> public int previousIndex()} method.  Since
136     * {@code public class HNLIInclusive} is an {@code Iterator} over "sub-lists" - <I>not
137     * individual nodes</I> - this means that whenever a {@code Vector}-index position is expected,
138     * the programmer should be expecting this {@code Iterator} to return two values:  both a
139     * sub-list start-position, and also a sublist ending-{@code Vector}-position.  <I><B>This is
140     * the purpose of {@code class DotPair}</B></I> - it allows pointers (indices) rather than
141     * copies of the nodes themselves to be saved, copied or evaluated.
142     *
143     * <BR /><BR /><B>NOTE:</B> Java's {@code public int 'previousIndex()'} method requires that an
144     * integer be returned in order for this class to properly implement the
145     * {@code public interface ListIterator<E>}.  This method, however, is offered as a "better 
146     * substitution for" the original {@code 'previousIndex'} method.  The original
147     * {@code previousIndex()} method in the ancestor {@code interface ListIterator}  may still be
148     * used, but the actual intention (finding sublists matches in a vectorized-html webpage) could
149     * be misunderstood by a novice.  Method {@code previousIndex()}, which is mandatory, will
150     * return an integer that points to the beginning index of the next sub-list match, <I>but will
151     * (obviously) leave off the ending-position of the next sub-list match.</I>  Remember that the
152     * concept behind the key-word "Inclusive" is that a {@code Vector}-sublist shall searched,
153     * found, and returned, not just the first HTML Element {@code TagNode} found.
154     *
155     * <BR /><BR /><B><SPAN STYLE="color: red;">SUMMARY OF ISSUE:</B></SPAN> The method
156     * {@code previousDotPair()} returns a value that is more-exactly in-line with the notion of an
157     * HTML Node List Iterator than the method {@code previousIndex()}.  The latter will return an
158     * integer index-pointer (into the underlying vectorized-HTML page-{@code Vector}) that
159     * identifies the first element of the previous-match, but leave off completely information
160     * about where that sublist ends.
161     *
162     * @return The previous integer-pointer pair to the starting-index and ending-index of the
163     * previous "inclusive-sublist match" found on the vectorized-html webpage.
164     *
165     * @throws ConcurrentModificationException <EMBED CLASS="external-html" DATA-FILE-ID="CMEX">
166     * 
167     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
168     * @see TagNode#isClosing
169     */
170    public DotPair previousDotPair()
171    {
172        CHECK_CME();
173
174        lastReturned    = hasPrevDP;
175        hasNextDP       = hasPrevDP = null;
176        modifiedSince   = false;
177
178        if (lastReturned != null) return lastReturned;
179
180        int LOOP_BOUNDARY = (minCursor == -1) ? 0 : minCursor;
181
182        if (cursor == -1) cursor = LOOP_BOUNDARY; // Will throw exception
183
184        while (--cursor >= LOOP_BOUNDARY)
185
186            if ((lastReturned = TEST_CURSOR_INCLUSIVE()) != null)
187                return lastReturned;
188
189        throw new NoSuchElementException("There are no more previous elements available.");
190    }
191
192    // **************************************************************************************
193    // "Next" - Retrieval Operations
194    // **************************************************************************************
195
196    /**
197     * Use this method to find out whether the underlying {@code Vector} and current {@code cursor}
198     * position would retrieve another match if {@code 'next'} or {@code 'nextIndex'} were called.
199     * 
200     * @return This shall return <B>TRUE</B> if calling the {@code next()}, or {@code nextIndex()}
201     * methods would return another inclusive / sub-list match.  This method shall return
202     * <B>FALSE</B> if calling {@code 'next'} would generate / throw a
203     * {@code 'NoSuchElementException'} - <I>because there are no more sub-list matches in the
204     * underlying {@code Vector}, given the current {@code cursor} position.</I>
205     * 
206     * @throws ConcurrentModificationException <EMBED CLASS="external-html" DATA-FILE-ID="CMEX">
207     * 
208     * @see #CHECK_CME()
209     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
210     * @see TagNode#isClosing
211     * @see SubSection
212     */
213    public boolean hasNext()
214    {
215        CHECK_CME();
216
217        if (hasNextDP != null) return true;
218
219        int LOOP_BOUNDARY = (maxCursor == -1) ? (v.size() - 1) : maxCursor;
220
221        if (cursor == -1) cursor = (minCursor == -1) ? -1 : (minCursor-1);
222
223        while (++cursor <= LOOP_BOUNDARY)
224
225            if ((hasNextDP = TEST_CURSOR_INCLUSIVE()) != null)
226                return true;
227
228        return false;
229    }
230
231    /**
232     * Returns the nearest node-match in the underlying {@code Vector}, given the current
233     * {@code cursor} position - <I>when searching in the right-direction, or in the direction of
234     * increasing {@code Vector}-indices.</I>
235     * 
236     * @return This shall return the sub-list match that is directly next to the current
237     * {@code cursor} position.
238     * 
239     * @throws ConcurrentModificationException <EMBED CLASS="external-html" DATA-FILE-ID="CMEX">
240     * 
241     * @throws NoSuchElementException If there are not more matches, this exception shall throw.  
242     * Avoid having to catch this exception by always calling method {@code 'hasNext'}, and only
243     * invoking {@code 'next'} if that method returned <B>TRUE.</B>
244     * 
245     * @see #CHECK_CME()
246     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
247     * @see TagNode#isClosing
248     * @see SubSection
249     */
250    public Vector<HTMLNode> next()
251    { return Util.cloneRange(v, nextDotPair()); }
252
253    /**
254     * This method, in-some-ways but-not-others, <B>"overrides"</B> the original
255     * {@code ListIterator<E>.nextIndex()} method.  Since {@code public class HNLIInclusive} is an
256     * {@code Iterator} over "sub-lists" - <I>not individual nodes</I> - this means that whenever a
257     * {@code Vector}-index position is expected, the programmer should be expecting this
258     * {@code Iterator} to return two values: both a sub-list start-position, and also a sublist
259     * ending-{@code Vector}-position.  <I><B>This is the purpose of {@code class DotPair}</B></I>
260     * - it allows pointers (indices) rather than copies of the nodes themselves to be saved,
261     * copied or evaluated.
262     *
263     * <BR /><BR /><B>NOTE:</B> Java's {@code public int nextIndex()} method requires that an
264     * integer be returned in order for this class to properly implement the
265     * {@code public interface ListIterator<>}.  This method, however, is offered as a "better
266     * substitution for" the original {@code 'nextIndex'} method.  The original {@code nextIndex()}
267     * method in the ancestor {@code class ListIterator} may still be used, but the actual
268     * intention (finding sublists matches in a vectorized-html webpage) could be misunderstood by
269     * a novice.  Method {@code next()}, which is mandatory, will return an integer that points to
270     * the begin index of the next sub-list match, <I>but will (obviously) leave off the
271     * ending-position of the next sub-list match.</I>  Remember that the concept behind the
272     * key-word "Inclusive" is that a {@code Vector}-sublist shall searched, found, and returned,
273     * not just the first HTML Element {@code TagNode} found.
274     *
275     * <BR /><BR /><B><SPAN STYLE="color: red;">SUMMARY OF ISSUE:</B></SPAN> The method
276     * {@code nextDotPair()} returns a value that is more-exactly in-line with the notion of an
277     * HTML Node List Iterator than the method {@code nextIndex()}.  The latter will return an
278     * integer index-pointer (into the underlying vectorized-HTML page-{@code Vector}) that
279     * identifies the first element of the next-match, but leave off completely information about
280     * where that sublist ends.
281     *
282     * @return The next integer-pointer pair to the starting-index and ending-index of the next
283     * "inclusive-sublist match" found on the vectorized-html webpage.
284     *
285     * @throws ConcurrentModificationException <EMBED CLASS="external-html" DATA-FILE-ID="CMEX">
286     * 
287     * @see #CHECK_CME()
288     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
289     * @see TagNode#isClosing
290     * @see SubSection
291     */
292    public DotPair nextDotPair()
293    {
294        CHECK_CME();
295
296        lastReturned    = hasNextDP;
297        hasNextDP       = hasPrevDP = null;
298        modifiedSince   = false;
299
300        if (lastReturned != null) return lastReturned;
301
302        int LOOP_BOUNDARY = (maxCursor == -1) ? (v.size() - 1) : maxCursor;
303
304        if (cursor == -1) cursor = (minCursor == -1) ? -1 : (minCursor-1);
305
306        while (++cursor <= LOOP_BOUNDARY)
307
308            if ((lastReturned = TEST_CURSOR_INCLUSIVE()) != null)
309                return lastReturned;
310
311        throw new NoSuchElementException("There are no more next elements available.");
312    }
313
314    // **************************************************************************************
315    // "First" and "Last" - Retrieval Operations
316    // **************************************************************************************
317
318    /**
319     * This adds method {@code public DotPair firstIDotPair()} to the java
320     * {@code public interface ListIterator<E>.}
321     * This, actually, returns an instance of {@code DotPair}.  Because this {@code Iterator}
322     * iterates {@code Vector}-sublists, not individual HTML nodes, the first-index of the first
323     * match will be a {@code DotPair}, <I>not an integer.</I>  This (hopefully-obvious) is because
324     * the {@code public class DotPair} encapsulates two needed numbers (a {@code Vector}-position
325     * start-index, and an ending-index) into a single-data-class.
326     * 
327     * <EMBED CLASS="external-html" DATA-FILE-ID="CMERESET">
328     *
329     * @return Out of the entire vectorized-html webpage, this method resets the internal
330     * {@code cursor}, and returns the first {@code 'DotPair'} match - the starting-index and
331     * ending-index - of the first "inclusive-sublist match"
332     * 
333     * @see #nextDotPair()
334     * @see #lastDotPair()
335     */
336    public DotPair firstDotPair()
337    {
338        cursor          = 0;
339        hasNextDP       = hasPrevDP = null;
340
341        // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic
342        expectedSize    = v.size();
343
344        return nextDotPair();
345    }
346
347    /**
348     * This does the same as {@code firstIDotPair()} but returns the <B><I>last list
349     * match index-pair</I></B> found within the input {@code Vector}.
350     *
351     * <BR /><BR />This adds method {@code public DotPair lastIDotPair()} to the java
352     * {@code public interface ListIterator<E>.}  This, actually, returns an instance of
353     * {@code DotPair}.  Because this {@code Iterator} iterates {@code Vector}-sublists, not
354     * individual HTML nodes, the last-index of the last match will be a {@code 'DotPair'}
355     * <I>not an integer.</I>  This (hopefully obviously) is because the {@code public
356     * class DotPair} encapsulates two needed numbers (a {@code Vector}-position start-index,
357     * and an ending-index) into a single-data-class.
358     *
359     * <EMBED CLASS="external-html" DATA-FILE-ID="CMERESET">
360     *
361     * @return Out of the entire vectorized-html webpage, this method resets the internal pointer,
362     * and returns the last {@code 'DotPair'} match - the starting-index and ending-index - of the
363     * last "inclusive-sublist match"
364     * 
365     * @see #previousDotPair()
366     * @see #firstDotPair()
367     */
368    public DotPair lastDotPair()
369    {
370        cursor          = v.size() - 1;
371        hasNextDP       = hasPrevDP = null;
372
373        // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic
374        expectedSize    = v.size();
375
376        return previousDotPair();
377    }
378
379    /**
380     * This adds to the {@code ListIterator<E>} class by providing a {@code first()} method that
381     * resets this {@code Iterator} back to the first match that is found in the underlying
382     * html-{@code Vector}.  The internal-{@code cursor} will be moved back to the beginning of
383     * the {@code Vector}.
384     *
385     * <BR /><BR /><B>NOTE:</B> If the underlying web-page {@code Vector} has been modified, then
386     * this method shall return the <I>updated first match.</I>  There is no "match memory."
387     * Rather, if the underlying {@code Vector} changes, further calls to {@code next(), previous(),
388     * first()} and {@code last()} could also change.
389     *
390     * <EMBED CLASS="external-html" DATA-FILE-ID="CMERESET">
391     *
392     * @return This returns the first "inclusive" sub-list (open-tag / start-tag up to the next
393     * close-tag) match as a vectorized-html sublist.
394     * 
395     * @see #next()
396     */
397    public Vector<HTMLNode> first()
398    {
399        cursor          = 0;
400        hasNextDP       = hasPrevDP = null;
401
402        // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic
403        expectedSize    = v.size();
404
405        return next();
406    }
407
408    /**
409     * This adds to the {@code ListIterator<E>} class by providing a {@code last()} method that
410     * moves this {@code Iterator} to the last match that is found in the underlying 
411     * html-{@code Vector}.  The internal-{@code cursor} will be moved directly to the end of the
412     * {@code Vector}.
413     *
414     * <BR /><BR /><B>NOTE:</B> If the underlying web-page {@code Vector} has been modified, then
415     * this method shall return the <I>updated last match.</I>  There is no "match memory."
416     * Rather, if the underlying {@code Vector} changes, further calls to {@code next(), previous(),
417     * first()} and {@code last()} could also change.
418     *
419     * <EMBED CLASS="external-html" DATA-FILE-ID="CMERESET">
420     *
421     * @return This returns the last "inclusive" sub-list (open-tag / start-tag up to the next
422     * close-tag) match as an vectorized-html sublist.
423     * 
424     * @see #previous()
425     */
426    public Vector<HTMLNode> last()
427    {
428        cursor          = v.size() - 1;
429        hasNextDP       = hasPrevDP = null;
430
431        // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic
432        expectedSize    = v.size();
433
434        return previous();
435    }
436
437    // **************************************************************************************
438    // NEXT and PREVIOUS Index
439    // **************************************************************************************
440
441    /**
442     * The veracity of using this method has been eclipsed by method {@code public
443     * previoustDotPair()}.  Nothing problematic should happen, that is unless you forget that this
444     * {@code Iterator} is an 'inclusive' {@code Iterator}.  The word "Inclusive" is intended to
445     * indicate that a 'range' or 'sublist' (demarcated by a {@code 'start'} and {@code 'end'}
446     * {@code Vector}-index pair) are involved.  This is <I>usually-but-not-always</I> expressed
447     * using an instance of class {@code 'DotPair'}.  The starting and ending indices are meant to
448     * point to HTML opening and closing element tags such as: {@code <DIV>} and {@code </DIV>}, or
449     * maybe {@code <A>} and {@code </A>}
450     *
451     * <BR /><BR />Because this method only returns a single integer, and that is the index of the
452     * <I>previous opening HTML Tag</I> matching the iterator's constraints (but leaves off the
453     * closing-tag) this method {@code 'previousIndex()'} may seem out of place.
454     * 
455     * @return Returns the index of the beginning of the previous matched sub-section.
456     */
457    public int previousIndex() { return previousDotPair().start; }
458
459    /**
460     * The veracity of using this method has been eclipsed by method {@code public nextDotPair()}
461     * Nothing problematic should happen, that is unless you forget that this {@code Iterator} is
462     * an 'inclusive' {@code Iterator}. The word "Inclusive" is intended to indicate that a 'range'
463     * or 'sublist' (demarcated by a {@code 'start'} and {@code 'end'} {@code Vector}-index pair)
464     * are involved.  This is <I>usually-but-not-always</I> expressed using an instance of class
465     * {@code 'DotPair'}.  The starting and ending indices are meant to point to HTML opening and
466     * closing element tags such as: {@code <DIV>} and {@code </DIV>}, or maybe {@code <A>} and
467     * {@code </A>}
468     *
469     * <BR /><BR />Because this method only returns a single integer, and that is the index of the
470     * <I>next opening HTML Tag</I> matching the iterator's constraints (but leaves off the
471     * closing-tag) this method {@code 'nextIndex()'} may seem out of place.
472     * 
473     * @return Returns the index of the beginning of the next matched sub-section.
474     */
475    public int nextIndex() { return nextDotPair().start; }
476
477}