001package Torello.HTML.NodeSearch; 002 003import java.util.*; 004 005import java.util.function.Predicate; 006 007import Torello.HTML.*; 008 009import Torello.Java.LV; 010import Torello.HTML.Util.Inclusive; 011 012/** 013 * Iterates <B>'Inclusive'</B> {@link TagNode} sublist-matches, which would be similar to iterating 014 * the <CODE>'.innerHTML'</cODE> fields of elements in a JavaScript DOM-Tree. 015 * 016 * <EMBED CLASS='external-html' DATA-FILE-ID=HNLI_EXTENDS_LITER> 017 * <EMBED CLASS='external-html' DATA-FILE-ID=HNLI_EASY_TO_USE> 018 */ 019@SuppressWarnings("unchecked") 020@Torello.JavaDoc.JDHeaderBackgroundImg 021public class HNLIInclusive extends AbstractHNLI<TagNode, Vector<HTMLNode>> 022{ 023 // ******************************************************************************************** 024 // ******************************************************************************************** 025 // Private, Non-Static, Fields 026 // ******************************************************************************************** 027 // ******************************************************************************************** 028 029 030 private DotPair hasNextDP = null; 031 private DotPair hasPrevDP = null; 032 private DotPair lastReturned = null; 033 034 035 // ******************************************************************************************** 036 // ******************************************************************************************** 037 // Only Constructor **AND** Package-Private Abstract-Method Implementations 038 // ******************************************************************************************** 039 // ******************************************************************************************** 040 041 042 /** 043 * This will produce an P@code Iterator} with generic type {@code 'E'}. The last parameter to 044 * this constructor {@code Class<E> c} is required since as per Java's Erasure "Feature" - 045 * there is no way to identify what the Variable-Type Parameter {@code 'E'} evaluates at 046 * Run-Time. 047 * 048 * <BR /><BR /><B><SPAN STYLE="color: red;">PROTECTED NOTE:</B></SPAN> This method is, by 049 * necessity kept {@code 'protected'} because of the nature of what constitutes an match for a 050 * {@code TagNode} when <B>'Inclusive'</B> Sublists are to be returned. 051 * 052 * @param html This may be any HTML {@code Vector} or sub-section. 053 * 054 * @param p This is a {@code java.util.function.Predicate} that identifies when the 055 * {@code Iterator} should consider a {@code TagNode} a "Match." 056 */ 057 HNLIInclusive (Vector<? extends HTMLNode> html, Predicate<TagNode> p) 058 { super(html, p, TagNode.class); } 059 060 void RESET_MATCHES() { hasNextDP = hasPrevDP = lastReturned = null; } 061 062 int REMOVE() { return Util.Remove.range(v, lastReturned); } 063 064 065 // ******************************************************************************************** 066 // ******************************************************************************************** 067 // HELPER 068 // ******************************************************************************************** 069 // ******************************************************************************************** 070 071 072 private DotPair TEST_CURSOR_INCLUSIVE() 073 { 074 Object o = v.elementAt(cursor); 075 076 if (! (o instanceof TagNode)) return null; 077 078 TagNode tn = (TagNode) o; 079 080 if (tn.isClosing) return null; 081 082 if (! p.test(tn)) return null; 083 084 if (maxCursor == -1) return Inclusive.dotPairOPT(v, cursor); 085 else return Inclusive.dotPairOPT(v, cursor, maxCursor); 086 } 087 088 089 // ******************************************************************************************** 090 // ******************************************************************************************** 091 // "Previous" - Retrieval Operations 092 // ******************************************************************************************** 093 // ******************************************************************************************** 094 095 096 /** 097 * Use this method to find out whether the underlying {@code Vector} and current {@code cursor} 098 * position would retrieve another match if {@code 'previous'} or {@code 'previousIndex'} were 099 * called. 100 * 101 * @return This shall return {@code TRUE} if calling the {@code previous()}, or 102 * {@code previousIndex()} methods would return another inclusive / sub-list node-match. This 103 * method shall return {@code FALSE} if calling {@code previous()} would generate / throw a 104 * {@code 'NoSuchElementException'} - <I>because there are no more sub-list matches in the 105 * underlying {@code Vector}, given the current {@code cursor} position.</I> 106 * 107 * @throws ConcurrentModificationException 108 * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX> 109 * 110 * @see Util.Inclusive#subSectionOPT(Vector, int, int) 111 * @see TagNode#isClosing 112 * @see SubSection 113 */ 114 public boolean hasPrevious() 115 { 116 CHECK_CME(); 117 118 if (hasPrevDP != null) return true; 119 120 int LOOP_BOUNDARY = (minCursor == -1) ? 0 : minCursor; 121 122 if (cursor == -1) cursor = LOOP_BOUNDARY; // will return false 123 124 while (--cursor >= LOOP_BOUNDARY) 125 126 if ((hasPrevDP = TEST_CURSOR_INCLUSIVE()) != null) 127 return true; 128 129 return false; 130 } 131 132 /** 133 * Returns the nearest sub-list match in the underlying {@code Vector}, given the current 134 * {@code cursor} position - <I>when searching in the left-direction, or in the direction of 135 * decreasing {@code Vector}-indices.</I> 136 * 137 * @return This shall return the sub-list match that is directly previous to the current 138 * {@code cursor} position. 139 * 140 * @throws ConcurrentModificationException 141 * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX> 142 * 143 * @throws NoSuchElementException If there are not more matches, this exception shall throw. 144 * Avoid having to catch this exception by always calling method {@code 'hasPrevious'}, and 145 * only invoking {@code 'previous'} if that method returned <B>TRUE.</B> 146 */ 147 public Vector<HTMLNode> previous() 148 { return Util.cloneRange(v, previousDotPair()); } 149 150 /** 151 * <EMBED CLASS="defs" DATA-NEXT_PREV=previous> 152 * <EMBED CLASS='external-html' DATA-FILE-ID=HNLI_NEXT_PREV_DP> 153 * 154 * @return The previous integer-pointer pair to the starting-index and ending-index of the 155 * previous "inclusive-sublist match" found on the vectorized-html webpage. 156 * 157 * @throws ConcurrentModificationException 158 * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX> 159 * 160 * @see Util.Inclusive#subSectionOPT(Vector, int, int) 161 * @see TagNode#isClosing 162 */ 163 public DotPair previousDotPair() 164 { 165 CHECK_CME(); 166 167 lastReturned = hasPrevDP; 168 hasNextDP = hasPrevDP = null; 169 modifiedSince = false; 170 171 if (lastReturned != null) return lastReturned; 172 173 int LOOP_BOUNDARY = (minCursor == -1) ? 0 : minCursor; 174 175 if (cursor == -1) cursor = LOOP_BOUNDARY; // Will throw exception 176 177 while (--cursor >= LOOP_BOUNDARY) 178 179 if ((lastReturned = TEST_CURSOR_INCLUSIVE()) != null) 180 return lastReturned; 181 182 throw new NoSuchElementException("There are no more previous elements available."); 183 } 184 185 186 // ******************************************************************************************** 187 // ******************************************************************************************** 188 // "Next" - Retrieval Operations 189 // ******************************************************************************************** 190 // ******************************************************************************************** 191 192 193 /** 194 * Use this method to find out whether the underlying {@code Vector} and current {@code cursor} 195 * position would retrieve another match if {@code 'next'} or {@code 'nextIndex'} were called. 196 * 197 * @return This shall return {@code TRUE} if calling the {@code next()}, or {@code nextIndex()} 198 * methods would return another inclusive / sub-list match. This method shall return 199 * {@code FALSE} if calling {@code 'next'} would generate / throw a 200 * {@code 'NoSuchElementException'} - <I>because there are no more sub-list matches in the 201 * underlying {@code Vector}, given the current {@code cursor} position.</I> 202 * 203 * @throws ConcurrentModificationException 204 * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX> 205 * 206 * @see #CHECK_CME() 207 * @see Util.Inclusive#subSectionOPT(Vector, int, int) 208 * @see TagNode#isClosing 209 * @see SubSection 210 */ 211 public boolean hasNext() 212 { 213 CHECK_CME(); 214 215 if (hasNextDP != null) return true; 216 217 int LOOP_BOUNDARY = (maxCursor == -1) ? (v.size() - 1) : maxCursor; 218 219 if (cursor == -1) cursor = (minCursor == -1) ? -1 : (minCursor-1); 220 221 while (++cursor <= LOOP_BOUNDARY) 222 223 if ((hasNextDP = TEST_CURSOR_INCLUSIVE()) != null) 224 return true; 225 226 return false; 227 } 228 229 /** 230 * Returns the nearest node-match in the underlying {@code Vector}, given the current 231 * {@code cursor} position - <I>when searching in the right-direction, or in the direction of 232 * increasing {@code Vector}-indices.</I> 233 * 234 * @return This shall return the sub-list match that is directly next to the current 235 * {@code cursor} position. 236 * 237 * @throws ConcurrentModificationException 238 * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX> 239 * 240 * @throws NoSuchElementException If there are not more matches, this exception shall throw. 241 * Avoid having to catch this exception by always calling method {@code 'hasNext'}, and only 242 * invoking {@code 'next'} if that method returned <B>TRUE.</B> 243 * 244 * @see #CHECK_CME() 245 * @see Util.Inclusive#subSectionOPT(Vector, int, int) 246 * @see TagNode#isClosing 247 * @see SubSection 248 */ 249 public Vector<HTMLNode> next() 250 { return Util.cloneRange(v, nextDotPair()); } 251 252 /** 253 * <EMBED CLASS="defs" DATA-NEXT_PREV=next> 254 * <EMBED CLASS='external-html' DATA-FILE-ID=HNLI_NEXT_PREV_DP> 255 * 256 * @return The next integer-pointer pair to the starting-index and ending-index of the next 257 * "inclusive-sublist match" found on the vectorized-html webpage. 258 * 259 * @throws ConcurrentModificationException 260 * <EMBED CLASS='external-html' DATA-FILE-ID=CONC_MOD_EX> 261 * 262 * @see #CHECK_CME() 263 * @see Util.Inclusive#subSectionOPT(Vector, int, int) 264 * @see TagNode#isClosing 265 * @see SubSection 266 */ 267 public DotPair nextDotPair() 268 { 269 CHECK_CME(); 270 271 lastReturned = hasNextDP; 272 hasNextDP = hasPrevDP = null; 273 modifiedSince = false; 274 275 if (lastReturned != null) return lastReturned; 276 277 int LOOP_BOUNDARY = (maxCursor == -1) ? (v.size() - 1) : maxCursor; 278 279 if (cursor == -1) cursor = (minCursor == -1) ? -1 : (minCursor-1); 280 281 while (++cursor <= LOOP_BOUNDARY) 282 283 if ((lastReturned = TEST_CURSOR_INCLUSIVE()) != null) 284 return lastReturned; 285 286 throw new NoSuchElementException("There are no more next elements available."); 287 } 288 289 290 // ******************************************************************************************** 291 // ******************************************************************************************** 292 // "First" and "Last" - Retrieval Operations 293 // ******************************************************************************************** 294 // ******************************************************************************************** 295 296 297 /** 298 * This adds method {@code public DotPair firstIDotPair()} to the java 299 * {@code public interface ListIterator<E>.} 300 * This, actually, returns an instance of {@code DotPair}. Because this {@code Iterator} 301 * iterates {@code Vector}-sublists, not individual HTML nodes, the first-index of the first 302 * match will be a {@code DotPair}, <I>not an integer.</I> This (hopefully-obvious) is because 303 * the {@code public class DotPair} encapsulates two needed numbers (a {@code Vector}-position 304 * start-index, and an ending-index) into a single-data-class. 305 * 306 * <EMBED CLASS='external-html' DATA-FILE-ID=CMERESET> 307 * 308 * @return Out of the entire vectorized-html webpage, this method resets the internal 309 * {@code cursor}, and returns the first {@code 'DotPair'} match - the starting-index and 310 * ending-index - of the first "inclusive-sublist match" 311 * 312 * @see #nextDotPair() 313 * @see #lastDotPair() 314 */ 315 public DotPair firstDotPair() 316 { 317 cursor = 0; 318 hasNextDP = hasPrevDP = null; 319 320 // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic 321 expectedSize = v.size(); 322 323 return nextDotPair(); 324 } 325 326 /** 327 * This does the same as {@code firstIDotPair()} but returns the <B><I>last list 328 * match index-pair</I></B> found within the input {@code Vector}. 329 * 330 * <BR /><BR />This adds method {@code public DotPair lastIDotPair()} to the java 331 * {@code public interface ListIterator<E>.} This, actually, returns an instance of 332 * {@code DotPair}. Because this {@code Iterator} iterates {@code Vector}-sublists, not 333 * individual HTML nodes, the last-index of the last match will be a {@code 'DotPair'} 334 * <I>not an integer.</I> This (hopefully obviously) is because the {@code public 335 * class DotPair} encapsulates two needed numbers (a {@code Vector}-position start-index, 336 * and an ending-index) into a single-data-class. 337 * 338 * <EMBED CLASS='external-html' DATA-FILE-ID=CMERESET> 339 * 340 * @return Out of the entire vectorized-html webpage, this method resets the internal pointer, 341 * and returns the last {@code 'DotPair'} match - the starting-index and ending-index - of the 342 * last "inclusive-sublist match" 343 * 344 * @see #previousDotPair() 345 * @see #firstDotPair() 346 */ 347 public DotPair lastDotPair() 348 { 349 cursor = v.size() - 1; 350 hasNextDP = hasPrevDP = null; 351 352 // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic 353 expectedSize = v.size(); 354 355 return previousDotPair(); 356 } 357 358 /** 359 * This adds to the {@code ListIterator<E>} class by providing a {@code first()} method that 360 * resets this {@code Iterator} back to the first match that is found in the underlying 361 * html-{@code Vector}. The internal-{@code cursor} will be moved back to the beginning of 362 * the {@code Vector}. 363 * 364 * <BR /><BR /><B CLASS=JDDescLabel>Modified Return-Value:</B> 365 * 366 * <BR />If the underlying web-page {@code Vector} has been modified, then this method shall 367 * return the <I>updated first match.</I> There is no "match memory." Rather, if the 368 * underlying {@code Vector} changes, further calls to {@code next(), previous(), first()} and 369 * {@code last()} would also change. 370 * 371 * <EMBED CLASS='external-html' DATA-FILE-ID=CMERESET> 372 * 373 * @return This returns the first "inclusive" sub-list (open-tag / start-tag up to the next 374 * close-tag) match as a vectorized-html sublist. 375 * 376 * @see #next() 377 */ 378 public Vector<HTMLNode> first() 379 { 380 cursor = 0; 381 hasNextDP = hasPrevDP = null; 382 383 // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic 384 expectedSize = v.size(); 385 386 return next(); 387 } 388 389 /** 390 * This adds to the {@code ListIterator<E>} class by providing a {@code last()} method that 391 * moves this {@code Iterator} to the last match that is found in the underlying 392 * html-{@code Vector}. The internal-{@code cursor} will be moved directly to the end of the 393 * {@code Vector}. 394 * 395 * <BR /><BR /><B CLASS=JDDescLabel>Modified Return-Value:</B> 396 * 397 * <BR />If the underlying web-page {@code Vector} has been modified, then this method shall 398 * return the <I>updated first match.</I> There is no "match memory." Rather, if the 399 * underlying {@code Vector} changes, further calls to {@code next(), previous(), first()} and 400 * {@code last()} would also change. 401 * 402 * <EMBED CLASS='external-html' DATA-FILE-ID=CMERESET> 403 * 404 * @return This returns the last "inclusive" sub-list (open-tag / start-tag up to the next 405 * close-tag) match as an vectorized-html sublist. 406 * 407 * @see #previous() 408 */ 409 public Vector<HTMLNode> last() 410 { 411 cursor = v.size() - 1; 412 hasNextDP = hasPrevDP = null; 413 414 // Calls to first, last, firstIndex, or lastIndex "reset" the CME Monitor-Logic 415 expectedSize = v.size(); 416 417 return previous(); 418 } 419 420 421 // ******************************************************************************************** 422 // ******************************************************************************************** 423 // NEXT and PREVIOUS Index 424 // ******************************************************************************************** 425 // ******************************************************************************************** 426 427 428 /** 429 * The veracity of using this method has been eclipsed by method {@code public 430 * previoustDotPair()}. Nothing problematic should happen, that is unless you forget that this 431 * {@code Iterator} is an 'inclusive' {@code Iterator}. The word "Inclusive" is intended to 432 * indicate that a 'range' or 'sublist' (demarcated by a {@code 'start'} and {@code 'end'} 433 * {@code Vector}-index pair) are involved. This is <I>usually-but-not-always</I> expressed 434 * using an instance of class {@code 'DotPair'}. The starting and ending indices are meant to 435 * point to HTML opening and closing element tags such as: {@code <DIV>} and {@code </DIV>}, or 436 * maybe {@code <A>} and {@code </A>} 437 * 438 * <BR /><BR />Because this method only returns a single integer, and that is the index of the 439 * <I>previous opening HTML Tag</I> matching the iterator's constraints (but leaves off the 440 * closing-tag) this method {@code 'previousIndex()'} may seem out of place. 441 * 442 * @return Returns the index of the beginning of the previous matched sub-section. 443 */ 444 public int previousIndex() { return previousDotPair().start; } 445 446 /** 447 * The veracity of using this method has been eclipsed by method {@code public nextDotPair()} 448 * Nothing problematic should happen, that is unless you forget that this {@code Iterator} is 449 * an 'inclusive' {@code Iterator}. The word "Inclusive" is intended to indicate that a 'range' 450 * or 'sublist' (demarcated by a {@code 'start'} and {@code 'end'} {@code Vector}-index pair) 451 * are involved. This is <I>usually-but-not-always</I> expressed using an instance of class 452 * {@code 'DotPair'}. The starting and ending indices are meant to point to HTML opening and 453 * closing element tags such as: {@code <DIV>} and {@code </DIV>}, or maybe {@code <A>} and 454 * {@code </A>} 455 * 456 * <BR /><BR />Because this method only returns a single integer, and that is the index of the 457 * <I>next opening HTML Tag</I> matching the iterator's constraints (but leaves off the 458 * closing-tag) this method {@code 'nextIndex()'} may seem out of place. 459 * 460 * @return Returns the index of the beginning of the next matched sub-section. 461 */ 462 public int nextIndex() { return nextDotPair().start; } 463 464}