001package Torello.HTML; 002 003import java.util.stream.*; 004import java.util.*; 005 006import Torello.Java.LV; 007import Torello.Java.StrCSV; 008import Torello.Java.ExceptionCheckError; 009 010/** 011 * A simple utility class that, used ubiquitously throughout Java HTML, which maintains two integer 012 * fields - <CODE><B><A HREF='#start'>DotPai.start</A></B></CODE> and 013 * <CODE><B><A HREF='#end'>DotPai.end</A></B></CODE> , for demarcating the begining and ending 014 * of a sub-list within an HTML web-page. 015 * 016 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=DOT_PAIR> 017 * 018 * @see NodeIndex 019 * @see SubSection 020 */ 021public final class DotPair 022implements java.io.Serializable, Comparable<DotPair>, Cloneable, Iterable<Integer> 023{ 024 /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */ 025 public static final long serialVersionUID = 1; 026 027 /** 028 * This is intended to be the "starting index" into an sub-array of an HTML {@code Vector} of 029 * {@code HTMLNode} elements. 030 */ 031 public final int start; 032 033 /** 034 * This is intended to be the "ending index" into a sub-array of an HTML {@code Vector} of 035 * {@code HTMLNode} elements. 036 */ 037 public final int end; 038 039 040 // ******************************************************************************************** 041 // ******************************************************************************************** 042 // Constructor 043 // ******************************************************************************************** 044 // ******************************************************************************************** 045 046 047 /** 048 * This constructor takes two integers and saves them into the {@code public} member fields. 049 * 050 * @param start This is intended to store the starting position of a vectorized-webpage 051 * sub-list or subpage. 052 * 053 * @param end This will store the ending position of a vectorized-html webpage or subpage. 054 * 055 * @throws IndexOutOfBoundsException A negative {@code 'start'} or {@code 'end'} 056 * parameter-value will cause this exception throw. 057 * 058 * @throws IllegalArgumentException A {@code 'start'} parameter-value that is larger than the 059 * {@code 'end'} parameter will cause this exception throw. 060 * 061 * @see NodeIndex 062 * @see SubSection 063 */ 064 public DotPair(int start, int end) 065 { 066 if (start < 0) throw new IndexOutOfBoundsException 067 ("Negative start value passed to DotPair constructor: start = " + start); 068 069 if (end < 0) throw new IndexOutOfBoundsException 070 ("Negative ending value passed to DotPair constructor: end = " + end); 071 072 if (end < start) throw new IllegalArgumentException( 073 "Start-parameter value passed to constructor is greater than ending-parameter: " + 074 "start: [" + start + "], end: [" + end + ']' 075 ); 076 077 this.start = start; 078 this.end = end; 079 } 080 081 /** 082 * Creates a new instance that has been shifted by {@code 'delta'}. 083 * 084 * @param delta The number of array indices to shift {@code 'this'} intance. This parameter 085 * may be negative, and if so, {@code 'this'} will be shifted left, instead of right. 086 * 087 * @return A new, shifted, instance of {@code 'this'} 088 */ 089 public DotPair shift(int delta) 090 { return new DotPair(this.start + delta, this.end + delta); } 091 092 093 // ******************************************************************************************** 094 // ******************************************************************************************** 095 // Standard Java Methods 096 // ******************************************************************************************** 097 // ******************************************************************************************** 098 099 100 /** 101 * Implements the standard java {@code 'hashCode()'} method. This will provide a hash-code 102 * that is likely to avoid crashes. 103 * 104 * @return A hash-code that may be used for inserting {@code 'this'} instance into a hashed 105 * table, map or list. 106 */ 107 public int hashCode() 108 { return this.start + (1000 * this.end); } 109 110 /** 111 * The purpose of this is to remind the user that the array bounds are inclusive at <B>BOTH</B> 112 * ends of the sub-list. 113 * 114 * <BR /><BR /><B CLASS=JDDescLabel>Inclusive & Exclusive:</B> 115 * 116 * <BR />For an instance of {@code 'DotPair'}, the intention is to include both the 117 * characters located at the {@code Vector}-index positions {@link #start} and the one at 118 * {@link #end}. Specifically, (and unlike many of the {@code Node-Search} package methods) 119 * both of the internal fields to this class are <B STYLE='color: red'><I>inclusive</I></B>, 120 * rather than exclusive. 121 * 122 * <BR /><BR />For many of the search methods in package {@link Torello.HTML.NodeSearch}, the 123 * {@code 'ePos'} parameters are always <B STYLE='color: red'><I>exclusive</I></B> - meaning 124 * the character at {@code Vector}=index {@code 'ePos'} is not included in the search. 125 * 126 * @return The length of a sub-array that would be indicated by this dotted pair. 127 */ 128 public int size() { return this.end - this.start + 1; } 129 130 /** 131 * Java's {@code toString()} requirement. 132 * 133 * @return A string representing 'this' instance of DotPair. 134 */ 135 public String toString() { return "[" + start + ", " + end + "]"; } 136 137 /** 138 * Java's {@code public boolean equals(Object o)} requirements. 139 * 140 * @param o This may be any Java {@code Object}, but only ones of {@code 'this'} type whose 141 * internal-values are identical will force this method to return {@code TRUE}. 142 * 143 * @return {@code TRUE} if (and only if) parameter {@code 'o'} is an {@code instanceof DotPair} 144 * and, also, both have equal start and ending field values. 145 */ 146 public boolean equals(Object o) 147 { 148 if (o instanceof DotPair) 149 { 150 DotPair dp = (DotPair) o; 151 return (this.start == dp.start) && (this.end == dp.end); 152 } 153 154 else return false; 155 } 156 157 /** 158 * Java's {@code interface Cloneable} requirements. This instantiates a new {@code DotPair} 159 * with identical {@code 'start', 'end'} fields. 160 * 161 * @return A new {@code DotPair} whose internal fields are identical to this one. 162 */ 163 public DotPair clone() { return new DotPair(this.start, this.end); } 164 165 /** 166 * Java's {@code interface Comparable<T>} requirements. <I>This is not the only comparison4 167 * operation possible,</I> but it does satisfy one reasonable requirement - 168 * <I>SPECIFICALLY:</I> which of two separate instances of {@code DotPair} start first. 169 * 170 * <BR /><BR /><B CLASS=JDDescLabel>Comparator Heuristic:</B> 171 * 172 * <BR />If two {@code DotPair} instances begin at the same {@code Vector}-index, then the 173 * shorter of the two shall come first. 174 * 175 * @param other Any other {@code DotPair} to be compared to {@code 'this' DotPair} 176 * 177 * @return An integer that fulfils Java's 178 * {@code interface Comparable<T> public boolean compareTo(T t)} method requirements. 179 */ 180 public int compareTo(DotPair other) 181 { 182 int ret = this.start - other.start; 183 184 return (ret != 0) ? ret : (this.size() - other.size()); 185 } 186 187 /** 188 * This is an "alternative Comparitor" that can be used for sorting instances of this class. 189 * It should work with the {@code Collections.sort(List, Comparator)} method in the standard 190 * JDK package {@code java.util.*;} 191 * 192 * <BR /><BR /><B CLASS=JDDescLabel>Comparator Heuristic:</B> 193 * 194 * <BR />This "extra <CODE>Comparitor</CODE>" simply compares the size of one {@code DotPair} 195 * to a second. The smaller shall be sorted first, and the larger (longer-in-length) 196 * {@code DotPair} shall be sorted later. If they are of equal size, whichever of the two has 197 * an earlier {@link #start} position in the {@code Vector} is considered first. 198 * 199 * @see CommentNode#body 200 */ 201 public static Comparator<DotPair> comp2 = (DotPair dp1, DotPair dp2) -> 202 { 203 int ret = dp1.size() - dp2.size(); 204 205 return (ret != 0) ? ret : (dp1.start - dp2.start); 206 }; 207 208 /** 209 * This shall return an {@code int Iterator} (which is properly named 210 * {@code class java.util.PrimitiveIterator.OfInt}) that iterates integers beginning with the 211 * value in {@code this.start} and ending with the value in {@code this.end}. 212 * 213 * @return An {@code Iterator} that iterates {@code 'this'} instance of {@code DotPair} from 214 * the beginning of the range, to the end of the range. The {@code Iterator} returned will 215 * produce Java's primitive type {@code int}. 216 * 217 * <BR /><BR /><DIV CLASS=JDHint> 218 * The elements returned by the {@code Iterator} 219 * are integers, and this is, in effect, nothing more than one which counts from {@link #start} 220 * to {@link #end}. 221 * </DIV> 222 */ 223 public PrimitiveIterator.OfInt iterator() 224 { 225 return new PrimitiveIterator.OfInt() 226 { 227 private int cursor = start; 228 229 public boolean hasNext() { return this.cursor <= end; } 230 231 public int nextInt() 232 { 233 if (cursor == end) throw new NoSuchElementException 234 ("Cursor has reached the value stored in 'end' [" + end + "]"); 235 236 return cursor++; 237 } 238 }; 239 } 240 241 /** 242 * A simple {@code Iterator} that will iterate elements on an input page, using {@code 'this'} 243 * intance of {@code DotPair's} indices, {@link #start}, and {@link #end}. 244 * 245 * @param page This may be any HTML page or sub-page. This page should correspond to 246 * {@code 'this'} instance of {@code DotPair}. 247 * 248 * @return An {@code Iterator} that will iterate each node in the page, beginning with the 249 * node at {@code page.elementAt(this.start)}, and ending with {@code page.elementAt(this.end)} 250 * 251 * @throws IndexOutOfBoundsException This throws if {@code 'this'} instance does not have a 252 * range that adheres to the size of the input {@code 'page'} parameter. 253 */ 254 public <T extends HTMLNode> Iterator<T> iterator(Vector<T> page) 255 { 256 if (this.start >= page.size()) throw new IndexOutOfBoundsException( 257 "This instance of DotPair points to elements that are outside of the range of the" + 258 "input 'page' Vector.\n" + 259 "'page' parameter size: " + page.size() + ", this.start: [" + this.start + "]" 260 ); 261 262 if (this.end >= page.size()) throw new IndexOutOfBoundsException( 263 "This instance of DotPair points to elements that are outside of the range of the" + 264 "input 'page' Vector.\n" + 265 "'page' parameter size: " + page.size() + ", this.end: [" + this.end + "]" 266 ); 267 268 return new Iterator<T>() 269 { 270 private int cursor = start; // a.k.a. 'this.start' 271 private int expectedSize = page.size(); 272 private int last = end; // a.k.a. 'this.end' 273 274 public boolean hasNext() { return cursor < last; } 275 276 public T next() 277 { 278 if (++cursor > last) throw new NoSuchElementException( 279 "This iterator's cursor has run past the end of the DotPaiar instance that " + 280 "formed this Iterator. No more elements to iterate. Did you call hasNext() ?" 281 ); 282 283 if (page.size() != expectedSize) throw new ConcurrentModificationException( 284 "The expected size of the underlying vector has changed." + 285 "\nCurrent-Size " + 286 "[" + page.size() + "], Expected-Size [" + expectedSize + "]\n" + 287 "\nCursor location: [" + cursor + "]" 288 ); 289 290 return page.elementAt(cursor); 291 } 292 293 // Removes the node from the underlying {@code Vector at the cursor's location. 294 public void remove() 295 { page.removeElementAt(cursor); expectedSize--; cursor--; last--; } 296 }; 297 } 298 299 300 // ******************************************************************************************** 301 // ******************************************************************************************** 302 // Simple Boolean tests 303 // ******************************************************************************************** 304 // ******************************************************************************************** 305 306 307 /** 308 * This will test whether a specific index is contained (between {@code this.start} and 309 * {@code this.end}, inclusively. 310 * 311 * @param index This is any integer index value. It must be greater than zero. 312 * 313 * @return {@code TRUE} If the value of index is greater-than-or-equal-to the value stored in 314 * field {@code 'start'} and furthermore is less-than-or-equal-to the value of field 315 * {@code 'end'} 316 * 317 * @throws IndexOutOfBoundsException If the value is negative, this exception will throw. 318 */ 319 public boolean isInside(int index) 320 { 321 if (index < 0) throw new IndexOutOfBoundsException 322 ("You have passed a negative index [" + index + "] here, but this is not allowed."); 323 324 return (index >= start) && (index <= end); 325 } 326 327 /** 328 * Tests whether {@code 'this' DotPair} is fully enclosed by {@code DotPair} parameter 329 * {@code 'other'} 330 * 331 * @param other Another {@code DotPair}. This parameter is expected to be a descriptor of the 332 * same vectorized-webpage as {@code 'this' DotPair} is. It is not mandatory, but if not, the 333 * comparison is likely meaningless. 334 * 335 * @return {@code TRUE} If (and only if) parameter {@code 'other'} encloses {@code 'this'}. 336 */ 337 public boolean enclosedBy(DotPair other) 338 { return (other.start <= this.start) && (other.end >= this.end); } 339 340 /** 341 * Tests whether {@code 'this' DotPair} is enclosed, completely, by parameter {@code DotPair} 342 * parameter {@code 'other'} 343 * 344 * @param other Another {@code DotPair}. This parameter is expected to be a descriptor of the 345 * same vectorized-webpage as {@code 'this' DotPair} is. It is not mandatory, but if not, the 346 * comparison is likely meaningless. 347 * 348 * @return {@code TRUE} If (and only if) parameter {@code 'other'} is enclosed completely by 349 * {@code 'this'}. 350 */ 351 public boolean encloses(DotPair other) 352 { return (this.start <= other.start) && (this.end >= other.end); } 353 354 /** 355 * Tests whether parameter {@code 'other'} has any overlapping {@code Vector}-indices with 356 * {@code 'this' DotPair} 357 * 358 * @param other Another {@code DotPair}. This parameter is expected to be a descriptor of the 359 * same vectorized-webpage as {@code 'this' DotPair} is. It is not mandatory, but if not, the 360 * comparison is likely meaningless. 361 * 362 * @return {@code TRUE} If (and only if) parameter {@code 'other'} and {@code 'this'} have any 363 * overlap. 364 */ 365 public boolean overlaps(DotPair other) 366 { 367 return 368 ((this.start >= other.start) && (this.start <= other.end)) || 369 ((this.end >= other.start) && (this.end <= other.end)); 370 } 371 372 /** 373 * Tests whether {@code 'this'} lays, <I>completely</I>, before {@code DotPair} parameter 374 * {@code 'other'}. 375 * 376 * @param other Another {@code DotPair}. This parameter is expected to be a descriptor of the 377 * same vectorized-webpage as {@code 'this' DotPair} is. It is not mandatory, but if not, the 378 * comparison is likely meaningless. 379 * 380 * @return {@code TRUE} if <I>every index</I> of {@code 'this'} has a value that is less than 381 * every index of {@code 'other'} 382 */ 383 public boolean isBefore(DotPair other) 384 { return this.end < other.start; } 385 386 /** 387 * Tests whether {@code 'this'} begins before {@code DotPair} parameter {@code 'other'}. 388 * 389 * @param other Another {@code DotPair}. This parameter is expected to be a descriptor of the 390 * same vectorized-webpage as {@code 'this' DotPair} is. It is not mandatory, but if not, the 391 * comparison is likely meaningless. 392 * 393 * @return {@code TRUE} if {@code this.start} is less than {@code other.start}, and 394 * {@code FALSE} otherwise. 395 */ 396 public boolean startsBefore(DotPair other) 397 { return this.start < other.start; } 398 399 /** 400 * Tests whether {@code 'this'} lays, <I>completely</I>, after {@code DotPair} parameter 401 * {@code 'other'}. 402 * 403 * @param other Another {@code DotPair}. This parameter is expected to be a descriptor of the 404 * same vectorized-webpage as {@code 'this' DotPair} is. It is not mandatory, but if not, the 405 * comparison is likely meaningless. 406 * 407 * @return {@code TRUE} if <I>every index</I> of {@code 'this'} has a value that is greater 408 * than every index of {@code 'other'} 409 */ 410 public boolean isAfter(DotPair other) 411 { return this.start > other.end; } 412 413 /** 414 * Tests whether {@code 'this'} ends after {@code DotPair} parameter {@code 'other'}. 415 * 416 * @param other Another {@code DotPair}. This parameter is expected to be a descriptor of the 417 * same vectorized-webpage as {@code 'this' DotPair} is. It is not mandatory, but if not, the 418 * comparison is likely meaningless. 419 * 420 * @return {@code TRUE} if {@code this.end} is greater than {@code other.end}, and 421 * {@code FALSE} otherwise. 422 */ 423 public boolean endsAfter(DotPair other) 424 { return this.end > other.end; } 425 426 427 428 429 // ******************************************************************************************** 430 // ******************************************************************************************** 431 // Exception Check 432 // ******************************************************************************************** 433 // ******************************************************************************************** 434 435 436 /** 437 * A method that will do a fast check that {@code 'this'} intance holds index-pointers to 438 * an opening and closing HTML-Tag pair. Note, though these mistakes may seem trivial, when 439 * parsing Internet Web-Pages, these are exactly the type of basic mistakes that users will 440 * make when their level of 'concentration' is low. This is no different that checking an 441 * array-index or {@code String}-index for an {@code IndexOutOfBoundsException}. 442 * 443 * <BR /><BR />This type of detailed exception message can make analyzing web-pages more 444 * direct and less error-prone. The 'cost' incurred includes only a few {@code if}-statement 445 * comparisons, and <I>this check should be performed immediatley <B>before a loop is 446 * entered.</B></I> 447 * 448 * @param page Any web-page, or sub-page. It needs to be the page from whence {@code 'this'} 449 * instance of {@code DotPair} was retrieved. 450 * 451 * @throws TagNodeExpectedException If {@code 'this'} instance' {@link #start} or {@link #end} 452 * fields do not point to {@code TagNode} elements on the {@code 'page'}. 453 * 454 * @throws HTMLTokException If {@link #start} or {@link #end} do not point to a {@code TagNode} 455 * whose {@link TagNode#tok} field equals the {@code String} contained by parameter 456 * {@code 'token'}. 457 * 458 * @throws OpeningTagNodeExpectedException If {@link #start} does not point to an opening 459 * {@code TagNode}. 460 * 461 * @throws ClosingTagNodeExpectedException If {@link #end} does not point to a closing 462 * {@code TagNode}. 463 * 464 * @throws NullPointerException If the {@code 'page'} parameter is null. 465 * 466 * @throws ExceptionCheckError <B STYLE='color:red;'>IMPORTANT</B> Since this method is, 467 * indubuitably, a method for performing error checking, the presumption is that the programmer 468 * is trying to check for <I>his users input</I>. If in the processes of checking for user 469 * error, another mistake is made that would generate an exception, this must thought of as a 470 * more serious error. 471 * 472 * <BR /><BR />The purpose of the {@code 'possibleTokens'} array is to check that those tokens 473 * match the tokens that are contained by the {@code TagNode's} on the page at index 474 * {@code this.start}, and {@code this.end}. If invalid HTML tokens, null tokens, or even 475 * HTML Singleton tokens are passed <B>this exception-check, itself, is flawed!</B> If there 476 * are problems with this var-args array, this error is thrown. 477 * 478 * <BR /><BR />It is more serious because it indicates that the programmer has made a mistake 479 * in attempting to check for user-errors. 480 */ 481 public void exceptionCheck(Vector<HTMLNode> page, String... possibleTokens) 482 { 483 if (page == null) throw new NullPointerException 484 ("HTML-Vector parameter was passed a null reference."); 485 486 if (possibleTokens == null) throw new ExceptionCheckError 487 ("HTML tags string-list was passed a null reference."); 488 489 for (String token : possibleTokens) 490 { 491 if (token == null) throw new ExceptionCheckError 492 ("One of the HTML Tag's in the tag-list String-array was null."); 493 494 if (! HTMLTags.isTag(token)) throw new ExceptionCheckError 495 ("One of the passed tokens [" + token +"] is not a valid HTML token."); 496 497 if (HTMLTags.isSingleton(token)) throw new ExceptionCheckError 498 ("One of the passed tokens [" + token +"] is an HTML Singleton."); 499 } 500 501 502 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 503 // Check the DotPair.start 504 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 505 506 if (this.start >= page.size()) throw new IndexOutOfBoundsException( 507 "DotPair's 'start' field [" + this.start + "], is greater than or equal to the " + 508 "size of the HTML-Vector [" + page.size() + "]." 509 ); 510 511 if (! (page.elementAt(this.start) instanceof TagNode)) 512 throw new TagNodeExpectedException(this.start); 513 514 TagNode t1 = (TagNode) page.elementAt(this.start); 515 516 if (t1.isClosing) throw new OpeningTagNodeExpectedException( 517 "The TagNode at index [" + this.start + "] was a closing " + 518 "</" + t1.tok.toUpperCase() + ">, but an opening tag was expected here." 519 ); 520 521 522 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 523 // Now Check the DotPair.end 524 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 525 526 if (this.end >= page.size()) throw new IndexOutOfBoundsException( 527 "DotPair's 'end' field [" + this.end + "], is greater than or equal to the " + 528 "size of the HTML-Vector [" + page.size() + "]." 529 ); 530 531 if (! (page.elementAt(this.end) instanceof TagNode)) 532 throw new TagNodeExpectedException(this.end); 533 534 TagNode t2 = (TagNode) page.elementAt(this.end); 535 536 if (! t2.isClosing) throw new ClosingTagNodeExpectedException( 537 "The TagNode at index [" + this.start + "] was an opening " + 538 "<" + t2.tok.toUpperCase() + ">, but a closing tag was expected here." 539 ); 540 541 542 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 543 // Token Check 544 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 545 546 if (! t1.tok.equalsIgnoreCase(t2.tok)) throw new HTMLTokException( 547 "The opening TagNode was the [" + t1.tok.toLowerCase() + "] HTML Tag, while the " + 548 "closing Tag was the [" + t2.tok.toLowerCase() + "]. These two tag's must be an " + 549 "opening and closing pair, and therefore must match each-other." 550 ); 551 552 for (String possibleToken : possibleTokens) 553 if (possibleToken.equalsIgnoreCase(t1.tok)) 554 return; 555 556 String t = t1.tok.toUpperCase(); 557 558 throw new HTMLTokException( 559 "The opening and closing tags were: <" + t + ">, and </" + t + ">, but " + 560 "unfortunately this Tag is not included among the list of expected tags:\n" + 561 " [" + StrCSV.toCSV(possibleTokens, false, false, 60) + "]." 562 ); 563 } 564 565 /** 566 * Performs an exception check, using {@code 'this'} instance of {@code DotPair}, and throws 567 * an {@code IndexOutOfBoundsException} if {@code 'this'} contains end-points that do not fit 568 * inside the {@code 'page'} Vector Parameter. 569 * 570 * @param page Any HTML Page, or subpage. {@code page.size()} must return a value that is 571 * larger than <B STYLE='color: red;'>BOTH</B> {@link #start} 572 * <B STYLE='color:red;'>AND</B> {@link #end}. 573 * 574 * @throws IndexOutOfBoundsException A value for {@link #start} or {@link #end} which 575 * are larger than the size of the {@code Vector} parameter {@code 'page'} will cause this 576 * exception throw. 577 */ 578 public void exceptionCheck(Vector<HTMLNode> page) 579 { 580 if (this.end >= page.size()) throw new IndexOutOfBoundsException( 581 "The value of this.end [" + this.end + "] is greater than the size of Vector " + 582 "parameter 'page' [" + page.size() + "]" 583 ); 584 585 // This is actually unnecessary. If 'end' is fine, then 'start' must be fine. If 'end' is 586 // out of bounds, then it is irrelevant whether 'start' is out of bounds. "They" play with 587 // your brain when you are coding. 588 589 /* 590 if (this.start >= page.size()) throw new IndexOutOfBoundsException( 591 "The value of this.start [" + this.start + "] is greater than the size of Vector " + 592 "parameter 'page' [" + page.size() + "]" 593 ); 594 */ 595 } 596}