001package Torello.HTML.NodeSearch;
002
003import java.util.*;
004
005import java.util.function.Predicate;
006
007import Torello.HTML.*;
008
009import Torello.Java.LV;
010import Torello.HTML.Util.Inclusive;
011
012/**
013 * Iterates <B>'Inclusive'</B> {@link TagNode} sublist-matches, which would be similar to iterating
014 * the <CODE>'&#46;innerHTML'</cODE> fields of elements in a JavaScript DOM-Tree.
015 * 
016 * <EMBED CLASS='external-html' DATA-FILE-ID=HNLI_EXTENDS_LITER>
017 * <EMBED CLASS='external-html' DATA-FILE-ID=HNLI_EASY_TO_USE>
018 */
019@SuppressWarnings("unchecked")
020@Torello.JavaDoc.JDHeaderBackgroundImg
021public class HNLIInclusive extends AbstractHNLI<TagNode, Vector<HTMLNode>> 
022{
023    // ********************************************************************************************
024    // ********************************************************************************************
025    // Private, Non-Static, Fields
026    // ********************************************************************************************
027    // ********************************************************************************************
028
029
030    private DotPair hasNextDP       = null;
031    private DotPair hasPrevDP       = null;
032    private DotPair lastReturned    = null;
033
034
035    // ********************************************************************************************
036    // ********************************************************************************************
037    // Only Constructor **AND** Package-Private Abstract-Method Implementations
038    // ********************************************************************************************
039    // ********************************************************************************************
040
041
042    /**
043     * This will produce an P@code Iterator} with generic type {@code 'E'}.  The last parameter to
044     * this constructor {@code Class<E> c} is required since as per Java's Erasure "Feature" -
045     * there is no way to identify what the Variable-Type Parameter {@code 'E'} evaluates at
046     * Run-Time.
047     * 
048     * <BR /><BR /><B><SPAN STYLE="color: red;">PROTECTED NOTE:</B></SPAN> This method is, by
049     * necessity kept {@code 'protected'} because of the nature of what constitutes an match for a
050     * {@code TagNode} when <B>'Inclusive'</B> Sublists are to be returned.
051     * 
052     * @param html This may be any HTML {@code Vector} or sub-section.
053     *
054     * @param p This is a {@code java.util.function.Predicate} that identifies when the
055     * {@code Iterator} should consider a {@code TagNode} a "Match."
056     */
057    HNLIInclusive (Vector<? extends HTMLNode> html, Predicate<TagNode> p)
058    { super(html, p, TagNode.class); }
059
060    void RESET_MATCHES() { hasNextDP = hasPrevDP = lastReturned = null; }
061
062    int REMOVE() { return Util.Remove.range(v, lastReturned); }
063
064
065    // ********************************************************************************************
066    // ********************************************************************************************
067    // HELPER
068    // ********************************************************************************************
069    // ********************************************************************************************
070
071
072    private DotPair TEST_CURSOR_INCLUSIVE()
073    {
074        Object o = v.elementAt(cursor);
075
076        if (! (o instanceof TagNode)) return null;
077
078        TagNode tn = (TagNode) o;
079
080        if (tn.isClosing) return null;
081
082        if (! p.test(tn)) return null;
083
084        if (maxCursor == -1)    return Inclusive.dotPairOPT(v, cursor);
085        else                    return Inclusive.dotPairOPT(v, cursor, maxCursor);
086    }
087
088
089    // ********************************************************************************************
090    // ********************************************************************************************
091    // "Previous" - Retrieval Operations
092    // ********************************************************************************************
093    // ********************************************************************************************
094
095
096    /**
097     * Use this method to find out whether the underlying {@code Vector} and current {@code cursor}
098     * position would retrieve another match if {@code 'previous'} or {@code 'previousIndex'} were
099     * called.
100     * 
101     * @return This shall return {@code TRUE} if calling the {@code previous()}, or 
102     * {@code previousIndex()} methods would return another inclusive / sub-list node-match.  This
103     * method shall return {@code FALSE} if calling {@code previous()} would generate / throw a 
104     * {@code 'NoSuchElementException'} - <I>because there are no more sub-list matches in the
105     * underlying {@code Vector}, given the current {@code cursor} position.</I>
106     * 
107     * @throws ConcurrentModificationException
108     * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX>
109     * 
110     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
111     * @see TagNode#isClosing
112     * @see SubSection
113     */
114    public boolean hasPrevious()
115    {
116        CHECK_CME();
117
118        if (hasPrevDP != null) return true;
119
120        int LOOP_BOUNDARY = (minCursor == -1) ? 0 : minCursor;
121
122        if (cursor == -1) cursor = LOOP_BOUNDARY;  // will return false
123
124        while (--cursor >= LOOP_BOUNDARY)
125
126            if ((hasPrevDP = TEST_CURSOR_INCLUSIVE()) != null)
127                return true;
128
129        return false;
130    }
131
132    /**
133     * Returns the nearest sub-list match in the underlying {@code Vector}, given the current
134     * {@code cursor} position - <I>when searching in the left-direction, or in the direction of
135     * decreasing {@code Vector}-indices.</I>
136     * 
137     * @return This shall return the sub-list match that is directly previous to the current
138     * {@code cursor} position.
139     * 
140     * @throws ConcurrentModificationException
141     * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX>
142     * 
143     * @throws NoSuchElementException If there are not more matches, this exception shall throw.  
144     * Avoid having to catch this exception by always calling method {@code 'hasPrevious'}, and
145     * only invoking {@code 'previous'} if that method returned <B>TRUE.</B>
146     */
147    public Vector<HTMLNode> previous()
148    { return Util.cloneRange(v, previousDotPair()); }
149
150    /**
151     * <EMBED CLASS="defs" DATA-NEXT_PREV=previous>
152     * <EMBED CLASS='external-html' DATA-FILE-ID=HNLI_NEXT_PREV_DP>
153     *
154     * @return The previous integer-pointer pair to the starting-index and ending-index of the
155     * previous "inclusive-sublist match" found on the vectorized-html webpage.
156     *
157     * @throws ConcurrentModificationException
158     * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX>
159     * 
160     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
161     * @see TagNode#isClosing
162     */
163    public DotPair previousDotPair()
164    {
165        CHECK_CME();
166
167        lastReturned    = hasPrevDP;
168        hasNextDP       = hasPrevDP = null;
169        modifiedSince   = false;
170
171        if (lastReturned != null) return lastReturned;
172
173        int LOOP_BOUNDARY = (minCursor == -1) ? 0 : minCursor;
174
175        if (cursor == -1) cursor = LOOP_BOUNDARY; // Will throw exception
176
177        while (--cursor >= LOOP_BOUNDARY)
178
179            if ((lastReturned = TEST_CURSOR_INCLUSIVE()) != null)
180                return lastReturned;
181
182        throw new NoSuchElementException("There are no more previous elements available.");
183    }
184
185
186    // ********************************************************************************************
187    // ********************************************************************************************
188    // "Next" - Retrieval Operations
189    // ********************************************************************************************
190    // ********************************************************************************************
191
192
193    /**
194     * Use this method to find out whether the underlying {@code Vector} and current {@code cursor}
195     * position would retrieve another match if {@code 'next'} or {@code 'nextIndex'} were called.
196     * 
197     * @return This shall return {@code TRUE} if calling the {@code next()}, or {@code nextIndex()}
198     * methods would return another inclusive / sub-list match.  This method shall return
199     * {@code FALSE} if calling {@code 'next'} would generate / throw a
200     * {@code 'NoSuchElementException'} - <I>because there are no more sub-list matches in the
201     * underlying {@code Vector}, given the current {@code cursor} position.</I>
202     * 
203     * @throws ConcurrentModificationException
204     * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX>
205     * 
206     * @see #CHECK_CME()
207     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
208     * @see TagNode#isClosing
209     * @see SubSection
210     */
211    public boolean hasNext()
212    {
213        CHECK_CME();
214
215        if (hasNextDP != null) return true;
216
217        int LOOP_BOUNDARY = (maxCursor == -1) ? (v.size() - 1) : maxCursor;
218
219        if (cursor == -1) cursor = (minCursor == -1) ? -1 : (minCursor-1);
220
221        while (++cursor <= LOOP_BOUNDARY)
222
223            if ((hasNextDP = TEST_CURSOR_INCLUSIVE()) != null)
224                return true;
225
226        return false;
227    }
228
229    /**
230     * Returns the nearest node-match in the underlying {@code Vector}, given the current
231     * {@code cursor} position - <I>when searching in the right-direction, or in the direction of
232     * increasing {@code Vector}-indices.</I>
233     * 
234     * @return This shall return the sub-list match that is directly next to the current
235     * {@code cursor} position.
236     * 
237     * @throws ConcurrentModificationException
238     * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX>
239     * 
240     * @throws NoSuchElementException If there are not more matches, this exception shall throw.  
241     * Avoid having to catch this exception by always calling method {@code 'hasNext'}, and only
242     * invoking {@code 'next'} if that method returned <B>TRUE.</B>
243     * 
244     * @see #CHECK_CME()
245     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
246     * @see TagNode#isClosing
247     * @see SubSection
248     */
249    public Vector<HTMLNode> next()
250    { return Util.cloneRange(v, nextDotPair()); }
251
252    /**
253     * <EMBED CLASS="defs" DATA-NEXT_PREV=next>
254     * <EMBED CLASS='external-html' DATA-FILE-ID=HNLI_NEXT_PREV_DP>
255     *
256     * @return The next integer-pointer pair to the starting-index and ending-index of the next
257     * "inclusive-sublist match" found on the vectorized-html webpage.
258     *
259     * @throws ConcurrentModificationException
260     * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX>
261     * 
262     * @see #CHECK_CME()
263     * @see Util.Inclusive#subSectionOPT(Vector, int, int)
264     * @see TagNode#isClosing
265     * @see SubSection
266     */
267    public DotPair nextDotPair()
268    {
269        CHECK_CME();
270
271        lastReturned    = hasNextDP;
272        hasNextDP       = hasPrevDP = null;
273        modifiedSince   = false;
274
275        if (lastReturned != null) return lastReturned;
276
277        int LOOP_BOUNDARY = (maxCursor == -1) ? (v.size() - 1) : maxCursor;
278
279        if (cursor == -1) cursor = (minCursor == -1) ? -1 : (minCursor-1);
280
281        while (++cursor <= LOOP_BOUNDARY)
282
283            if ((lastReturned = TEST_CURSOR_INCLUSIVE()) != null)
284                return lastReturned;
285
286        throw new NoSuchElementException("There are no more next elements available.");
287    }
288
289
290    // ********************************************************************************************
291    // ********************************************************************************************
292    // "First" and "Last" - Retrieval Operations
293    // ********************************************************************************************
294    // ********************************************************************************************
295
296
297    /**
298     * This adds method {@code public DotPair firstIDotPair()} to the java
299     * {@code public interface ListIterator<E>.}
300     * This, actually, returns an instance of {@code DotPair}.  Because this {@code Iterator}
301     * iterates {@code Vector}-sublists, not individual HTML nodes, the first-index of the first
302     * match will be a {@code DotPair}, <I>not an integer.</I>  This (hopefully-obvious) is because
303     * the {@code public class DotPair} encapsulates two needed numbers (a {@code Vector}-position
304     * start-index, and an ending-index) into a single-data-class.
305     * 
306     * <EMBED CLASS='external-html' DATA-FILE-ID=CMERESET>
307     *
308     * @return Out of the entire vectorized-html webpage, this method resets the internal
309     * {@code cursor}, and returns the first {@code 'DotPair'} match - the starting-index and
310     * ending-index - of the first "inclusive-sublist match"
311     * 
312     * @see #nextDotPair()
313     * @see #lastDotPair()
314     */
315    public DotPair firstDotPair()
316    {
317        cursor          = 0;
318        hasNextDP       = hasPrevDP = null;
319
320        // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic
321        expectedSize    = v.size();
322
323        return nextDotPair();
324    }
325
326    /**
327     * This does the same as {@code firstIDotPair()} but returns the <B><I>last list
328     * match index-pair</I></B> found within the input {@code Vector}.
329     *
330     * <BR /><BR />This adds method {@code public DotPair lastIDotPair()} to the java
331     * {@code public interface ListIterator<E>.}  This, actually, returns an instance of
332     * {@code DotPair}.  Because this {@code Iterator} iterates {@code Vector}-sublists, not
333     * individual HTML nodes, the last-index of the last match will be a {@code 'DotPair'}
334     * <I>not an integer.</I>  This (hopefully obviously) is because the {@code public
335     * class DotPair} encapsulates two needed numbers (a {@code Vector}-position start-index,
336     * and an ending-index) into a single-data-class.
337     *
338     * <EMBED CLASS='external-html' DATA-FILE-ID=CMERESET>
339     *
340     * @return Out of the entire vectorized-html webpage, this method resets the internal pointer,
341     * and returns the last {@code 'DotPair'} match - the starting-index and ending-index - of the
342     * last "inclusive-sublist match"
343     * 
344     * @see #previousDotPair()
345     * @see #firstDotPair()
346     */
347    public DotPair lastDotPair()
348    {
349        cursor          = v.size() - 1;
350        hasNextDP       = hasPrevDP = null;
351
352        // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic
353        expectedSize    = v.size();
354
355        return previousDotPair();
356    }
357
358    /**
359     * This adds to the {@code ListIterator<E>} class by providing a {@code first()} method that
360     * resets this {@code Iterator} back to the first match that is found in the underlying
361     * html-{@code Vector}.  The internal-{@code cursor} will be moved back to the beginning of
362     * the {@code Vector}.
363     *
364     * <BR /><BR /><B CLASS=JDDescLabel>Modified Return-Value:</B>
365     * 
366     * <BR />If the underlying web-page {@code Vector} has been modified, then this method shall
367     * return the <I>updated first match.</I>  There is no "match memory."  Rather, if the
368     * underlying {@code Vector} changes, further calls to {@code next(), previous(), first()} and
369     * {@code last()} would also change.
370     *
371     * <EMBED CLASS='external-html' DATA-FILE-ID=CMERESET>
372     *
373     * @return This returns the first "inclusive" sub-list (open-tag / start-tag up to the next
374     * close-tag) match as a vectorized-html sublist.
375     * 
376     * @see #next()
377     */
378    public Vector<HTMLNode> first()
379    {
380        cursor          = 0;
381        hasNextDP       = hasPrevDP = null;
382
383        // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic
384        expectedSize    = v.size();
385
386        return next();
387    }
388
389    /**
390     * This adds to the {@code ListIterator<E>} class by providing a {@code last()} method that
391     * moves this {@code Iterator} to the last match that is found in the underlying 
392     * html-{@code Vector}.  The internal-{@code cursor} will be moved directly to the end of the
393     * {@code Vector}.
394     *
395     * <BR /><BR /><B CLASS=JDDescLabel>Modified Return-Value:</B>
396     * 
397     * <BR />If the underlying web-page {@code Vector} has been modified, then this method shall
398     * return the <I>updated first match.</I>  There is no "match memory."  Rather, if the
399     * underlying {@code Vector} changes, further calls to {@code next(), previous(), first()} and
400     * {@code last()} would also change.
401     *
402     * <EMBED CLASS='external-html' DATA-FILE-ID=CMERESET>
403     *
404     * @return This returns the last "inclusive" sub-list (open-tag / start-tag up to the next
405     * close-tag) match as an vectorized-html sublist.
406     * 
407     * @see #previous()
408     */
409    public Vector<HTMLNode> last()
410    {
411        cursor          = v.size() - 1;
412        hasNextDP       = hasPrevDP = null;
413
414        // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic
415        expectedSize    = v.size();
416
417        return previous();
418    }
419
420
421    // ********************************************************************************************
422    // ********************************************************************************************
423    // NEXT and PREVIOUS Index
424    // ********************************************************************************************
425    // ********************************************************************************************
426
427
428    /**
429     * The veracity of using this method has been eclipsed by method {@code public
430     * previoustDotPair()}.  Nothing problematic should happen, that is unless you forget that this
431     * {@code Iterator} is an 'inclusive' {@code Iterator}.  The word "Inclusive" is intended to
432     * indicate that a 'range' or 'sublist' (demarcated by a {@code 'start'} and {@code 'end'}
433     * {@code Vector}-index pair) are involved.  This is <I>usually-but-not-always</I> expressed
434     * using an instance of class {@code 'DotPair'}.  The starting and ending indices are meant to
435     * point to HTML opening and closing element tags such as: {@code <DIV>} and {@code </DIV>}, or
436     * maybe {@code <A>} and {@code </A>}
437     *
438     * <BR /><BR />Because this method only returns a single integer, and that is the index of the
439     * <I>previous opening HTML Tag</I> matching the iterator's constraints (but leaves off the
440     * closing-tag) this method {@code 'previousIndex()'} may seem out of place.
441     * 
442     * @return Returns the index of the beginning of the previous matched sub-section.
443     */
444    public int previousIndex() { return previousDotPair().start; }
445
446    /**
447     * The veracity of using this method has been eclipsed by method {@code public nextDotPair()}
448     * Nothing problematic should happen, that is unless you forget that this {@code Iterator} is
449     * an 'inclusive' {@code Iterator}. The word "Inclusive" is intended to indicate that a 'range'
450     * or 'sublist' (demarcated by a {@code 'start'} and {@code 'end'} {@code Vector}-index pair)
451     * are involved.  This is <I>usually-but-not-always</I> expressed using an instance of class
452     * {@code 'DotPair'}.  The starting and ending indices are meant to point to HTML opening and
453     * closing element tags such as: {@code <DIV>} and {@code </DIV>}, or maybe {@code <A>} and
454     * {@code </A>}
455     *
456     * <BR /><BR />Because this method only returns a single integer, and that is the index of the
457     * <I>next opening HTML Tag</I> matching the iterator's constraints (but leaves off the
458     * closing-tag) this method {@code 'nextIndex()'} may seem out of place.
459     * 
460     * @return Returns the index of the beginning of the next matched sub-section.
461     */
462    public int nextIndex() { return nextDotPair().start; }
463
464}