001package Torello.HTML; 002 003import java.util.*; 004import java.util.regex.*; 005import java.util.stream.*; 006import java.util.function.*; 007 008import Torello.Java.StringParse; 009import Torello.Java.StrCmpr; 010import Torello.Java.StrFilter; 011import Torello.Java.Additional.EffectivelyFinal; 012 013 014import Torello.HTML.parse.HTMLRegEx; 015import Torello.HTML.NodeSearch.CSSStrException; 016import Torello.HTML.NodeSearch.TextComparitor; 017 018/** 019 * Represents an HTML Element Tag, and is the flagship class of the Java-HTML Library. 020 * 021 * <EMBED CLASS="external-html" DATA-FILE-ID=TAG_NODE> 022 * 023 * <EMBED CLASS="external-html" DATA-FILE-ID=HTML_NODE_SUB_IMG> 024 * 025 * @see TextNode 026 * @see CommentNode 027 * @see HTMLNode 028 */ 029@Torello.JavaDoc.JDHeaderBackgroundImg(EmbedTagFileID="HTML_NODE_SUBCLASS") 030public final class TagNode 031 extends HTMLNode 032 implements CharSequence, java.io.Serializable, Cloneable, Comparable<TagNode> 033{ 034 /** <EMBED CLASS="external-html" DATA-FILE-ID="SVUID"> */ 035 public static final long serialVersionUID = 1; 036 037 038 // ******************************************************************************************** 039 // ******************************************************************************************** 040 // NON-STATIC FIELDS 041 // ******************************************************************************************** 042 // ******************************************************************************************** 043 044 045 /** <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_TOK> */ 046 public final String tok; 047 048 /** <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_IS_CLOSING> */ 049 public final boolean isClosing; 050 051 052 // ******************************************************************************************** 053 // ******************************************************************************************** 054 // Constructors 055 // ******************************************************************************************** 056 // ******************************************************************************************** 057 058 059 /** 060 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_DESC_1> 061 * 062 * @param s Any valid HTML tag, for instance: {@code <H1>, <A HREF="somoe url">, 063 * <DIV ID="some id">} etc... 064 * 065 * @throws MalformedTagNodeException If the passed {@code String} wasn't valid - meaning <I>it 066 * did not match the regular-expression {@code parser}.</I> 067 * 068 * @throws HTMLTokException If the {@code String} found where the usual HTML token-element is 069 * situated <I>is not a valid HTML element</I> then the {@code HTMLTokException} will be 070 * thrown. 071 * 072 * @see HTMLTags#getTag_MEM_HEAP_CHECKOUT_COPY(String) 073 */ 074 public TagNode(String s) 075 { 076 super(s); 077 078 // If the second character of the string is a forward-slash, this must be a closing-element 079 // For Example: </SPAN>, </DIV>, </A>, etc... 080 081 isClosing = s.charAt(1) == '/'; 082 083 // This is the Element & Attribute Matcher used by the RegEx Parser. If this Matcher 084 // doesn't find a match, the parameter 's' cannot be a valid HTML Element. NOTE: The 085 // results of this matcher are also used to retrieve attribute-values, but here below, 086 // its results are ignored. 087 088 Matcher m = HTMLRegEx.P1.matcher(s); 089 090 if (! m.find()) throw new MalformedTagNodeException( 091 "The parser's regular-expression did not match the constructor-string.\n" + 092 "The exact input-string was: [" + s + "]\n" + 093 "NOTE: The parameter-string is included as a field (ex.str) to this Exception.", s 094 ); 095 096 // MINOR/MAJOR IMPROVEMENT... REUSE THE "ALLOCATED STRING TOKEN" from HTMLTag's class 097 // THINK: Let the Garbage Collector take out as many duplicate-strings as is possible.. 098 // AND SOONER. DECEMBER 2019: "Optimization" or ... "Improvement" 099 100 String tokTEMP = m.group(1).toLowerCase(); 101 102 if ((m.start() != 0) || (m.end() != s.length())) 103 104 throw new MalformedTagNodeException( 105 "The parser's regular-expression did not match the entire-string-length of the " + 106 "string-parameter to this constructor: m.start()=" + m.start() + ", m.end()=" + 107 m.end() + ".\nHowever, the length of the Input-Parameter String was " + 108 '[' + s.length() + "]\nThe exact input-string was: [" + s + "]\nNOTE: The " + 109 "parameter-string is included as a field (ex.str) to this Exception.", s 110 ); 111 112 113 // Get a copy of the 'tok' string that was already allocated on the heap; (OPTIMIZATON) 114 // NOTE: There are already myriad strings for the '.str' field. 115 // ALSO: Don't pay much attention to this line if it doesn't make sense... it's not 116 // that important. If the HTML Token found was not a valid HTML5 token, this field 117 // will be null. 118 // Java 14+ has String.intern() - that's what this is.... 119 120 this.tok = HTMLTags.getTag_MEM_HEAP_CHECKOUT_COPY(tokTEMP); 121 122 // Now do the usual error check. 123 if (this.tok == null) throw new HTMLTokException( 124 "The HTML Tag / Token Element that is specified by the input string " + 125 "[" + tokTEMP + "] is not a valid HTML Element Name.\n" + 126 "The exact input-string was: [" + s + "]" 127 ); 128 } 129 130 // USED-INTERNALLY - bypasses all checks. used when creating new HTML Element-Names 131 // ONLY: class 'HTMLTags' via method 'addTag(...)' shall ever invoke this constructor. 132 // NOTE: This only became necessary because of the MEM_COPY_HEAP optimization. This 133 // optimization expects that there is already a TagNode with element 'tok' in 134 // the TreeSet, which is always OK - except for the method that CREATES NEW HTML 135 // TAGS... a.k.a. HTMLTags.addTag(String). 136 TagNode(String token, TC openOrClosed) 137 { 138 super("<" + ((openOrClosed == TC.ClosingTags) ? "/" : "") + token + ">"); 139 140 // ONLY CHANGE CASE HERE, NOT IN PREVIOUS-LINE. PAY ATTENTION. 141 this.tok = token.toLowerCase(); 142 143 this.isClosing = (openOrClosed == TC.ClosingTags) ? true : false; 144 } 145 146 /** 147 * Convenience Constructor. 148 * <BR />Invokes: {@link #TagNode(String, Properties, Iterable, SD, boolean)} 149 * <BR />Passes: null to the Boolean / Key-Only Attributes {@code Iterable} 150 */ 151 public TagNode(String tok, Properties attributes, SD quotes, boolean addEndingForwardSlash) 152 { this(tok, attributes, null /* keyOnlyAttributes */, quotes, addEndingForwardSlash); } 153 154 /** 155 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_DESC_2> 156 * 157 * @param tok <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_TOK> 158 * @param attributes <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_ATTRIBUTES> 159 * @param keyOnlyAttributes <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_KO_ATTRIBUTES> 160 * @param quotes <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_QUOTES> 161 * @param addEndingForwardSlash <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_AEFS> 162 * 163 * @throws InnerTagKeyException <EMBED CLASS="external-html" DATA-FILE-ID="ITKEYEXPROP"> 164 * @throws QuotesException <EMBED CLASS="external-html" DATA-FILE-ID="QEX"> 165 * @throws HTMLTokException if an invalid HTML 4 or 5 token is not present 166 * <B>(check is {@code CASE_INSENSITIVE})</B> 167 * 168 * @see InnerTagKeyException#check(String, String) 169 * @see QuotesException#check(String, SD, String) 170 * @see #generateElementString(String, Properties, Iterable, SD, boolean) 171 */ 172 public TagNode( 173 String tok, Properties attributes, Iterable<String> keyOnlyAttributes, 174 SD quotes, boolean addEndingForwardSlash 175 ) 176 { 177 this( 178 generateElementString 179 (tok, attributes, keyOnlyAttributes, quotes, addEndingForwardSlash)); 180 } 181 182 /** 183 * This builds an HTML Element as a {@code String.} This {@code String} may be passed to the 184 * standard HTML {@code TagNode} Constructor that accepts a {@code String} as input. 185 * 186 * @param tok <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_TOK> 187 * @param attributes <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_ATTRIBUTES> 188 * @param keyOnlyAttributes <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_KO_ATTRIBUTES> 189 * @param quotes <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_QUOTES> 190 * @param addEndingForwardSlash <EMBED CLASS='external-html' DATA-FILE-ID=TN_C_AEFS> 191 * 192 * @throws InnerTagKeyException <EMBED CLASS="external-html" DATA-FILE-ID="ITKEYEXPROP"> 193 * @throws QuotesException <EMBED CLASS="external-html" DATA-FILE-ID="QEX"> 194 * @throws HTMLTokException if an invalid HTML 4 or 5 token is not present 195 * <B>{@code CASE_INSENSITIVE}</B> 196 * 197 * @return This method returns an HTML Element, as a {@code String}. 198 * 199 * @see HTMLTokException#check(String[]) 200 * @see InnerTagKeyException#check(String, String) 201 * @see QuotesException#check(String, SD, String) 202 */ 203 protected static String generateElementString( 204 String tok, Properties attributes, Iterable<String> keyOnlyAttributes, 205 SD quotes, boolean addEndingForwardSlash 206 ) 207 { 208 String computedQuote = (quotes == null) ? "" : ("" + quotes.quote); 209 210 HTMLTokException.check(tok); 211 212 // The HTML Element is "built" using a StringBuilder 213 StringBuilder sb = new StringBuilder(); 214 sb.append("<" + tok); 215 216 // If there are any Inner-Tag Key-Value pairs, insert them first. 217 if ((attributes != null) && (attributes.size() > 0)) 218 219 for (String key : attributes.stringPropertyNames()) 220 { 221 String value = attributes.getProperty(key); 222 223 InnerTagKeyException.check(key, value); 224 225 QuotesException.check( 226 value, quotes, 227 "parameter 'Properties' contains:\nkey:\t" + key + "\nvalue:\t" + value + "\n" 228 ); 229 230 sb.append(" " + key + '=' + computedQuote + value + computedQuote); 231 } 232 233 // If there are any Key-Only Inner-Tags (Boolean Attributes), insert them next. 234 if (keyOnlyAttributes != null) 235 236 for (String keyOnlyAttribute : keyOnlyAttributes) 237 { 238 InnerTagKeyException.check(keyOnlyAttribute); 239 sb.append(" " + keyOnlyAttribute); 240 } 241 242 // Add a closing forward-slash 243 sb.append(addEndingForwardSlash ? " />" : ">"); 244 245 // Build the String, using the StringBuilder, and return the newly-constructed HTML Element 246 return sb.toString(); 247 } 248 249 250 // ******************************************************************************************** 251 // ******************************************************************************************** 252 // isTag 253 // ******************************************************************************************** 254 // ******************************************************************************************** 255 256 257 /** 258 * This method identifies that {@code 'this'} instance of (abstract parent-class) 259 * {@link HTMLNode} is, indeed, an instance of sub-class {@code TagNode}. 260 * 261 * <BR /><BR /><B CLASS=JDRedLabel>Final Method:</B> 262 * 263 * <BR />This method is final, and cannot be modified by sub-classes. 264 * 265 * @return This method shall always return <B>TRUE</B> It overrides the parent-class 266 * {@code HTMLNode} method {@link #isTagNode()}, which always returns <B>FALSE</B>. 267 */ 268 @Override 269 public final boolean isTagNode() { return true; } 270 271 /** 272 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_IF_TN_DESC> 273 * 274 * <BR /><BR /><B CLASS=JDRedLabel>Final Method:</B> 275 * 276 * <BR />This method is final, and cannot be modified by sub-classes. 277 * 278 * @return <EMBED CLASS='external-html' DATA-FILE-ID=TN_IF_TN_RET> 279 */ 280 @Override 281 public final TagNode ifTagNode() { return this; } 282 283 /** 284 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_OPENTAG_PWA_DESC> 285 * 286 * <BR /><BR /><B CLASS=JDRedLabel>Final Method:</B> 287 * 288 * <BR />This method is final, and cannot be modified by sub-classes. 289 * 290 * @return <EMBED CLASS='external-html' DATA-FILE-ID=TN_OPENTAG_PWA_RET> 291 */ 292 @Override 293 public final TagNode openTagPWA() 294 { 295 // Closing TagNode's simply may not have attributes 296 if (this.isClosing) return null; 297 298 // A TagNode whose '.str' field is not AT LEAST 5 characters LONGER than the length of the 299 // HTML-Tag / Token, simply cannot have an attribute. 300 // 301 // NOTE: Below is the shortest possible HTML tag that could have an attribute-value pair. 302 // COMPUTE: '<' + TOK.LENGTH + SPACE + 'c' + '=' + '>' 303 304 if (this.str.length() < (this.tok.length() + 5)) return null; 305 306 // This TagNode is an opening HTML tag (like <DIV ...>, rather than </DIV>), 307 // and there are at least two additional characters after the token, such as: <DIV A...> 308 // It is not guaranteed that this tag has attributes, but it is possibly - based on these 309 /// optimization methods, and further investigation would have merit. 310 311 return this; 312 } 313 314 /** 315 * This is a loop-optimization method that makes finding opening {@code TagNode's} - <B>with 316 * attribute-values</B> - quites a bit faster. All {@code HTMLNode} subclasses implement this 317 * method, but only {@code TagNode} instances will ever return a non-null value. 318 * 319 * <BR /><BR /><B CLASS=JDRedLabel>Final Method:</B> 320 * 321 * <BR />This method is final, and cannot be modified by sub-classes. 322 * 323 * @return Returns null if and only if {@code 'this'} instance' {@link #isClosing} field is 324 * false. When a non-null return-value is acheived, that value will always be {@code 'this'} 325 * instance. 326 */ 327 @Override 328 public final TagNode openTag() 329 { return isClosing ? null : this; } 330 331 /** 332 * Receives a list of html-elements which the {@code this.tok} field must match. 333 * This method returns <B>TRUE</B> if any match is found. 334 * 335 * <BR /><BR /><IMG SRC='doc-files/img/isTag.png' CLASS=JDIMG ALT='example'> 336 * 337 * @param possibleTags This non-null list of potential HTML tags. 338 * @return <B>TRUE</B> If {@code this.tok} matches at least one of these tags. 339 * @see #tok 340 */ 341 public boolean isTag(String... possibleTags) 342 { 343 for (String htmlTag : possibleTags) if (htmlTag.equalsIgnoreCase(this.tok)) return true; 344 345 return false; 346 } 347 348 /** 349 * Receives a list of html-elements which {@code this.tok} field <B>MAY NOT</B> match. 350 * This method returns <B>FALSE</B> if any match is found. 351 * 352 * @param possibleTags This must be a non-null list of potential HTML tags. 353 * 354 * @return <B>FALSE</B> If {@code this.tok} matches any one of these tags, and <B>TRUE</B> 355 * otherwise. 356 * 357 * @see #tok 358 * @see #isTag(String[]) 359 */ 360 public boolean isTagExcept(String... possibleTags) 361 { 362 for (String htmlTag : possibleTags) if (htmlTag.equalsIgnoreCase(this.tok)) return false; 363 364 return true; 365 } 366 367 /** 368 * Receives two "criteria-specifier" parameters. This method shall return <B>TRUE</B> if: 369 * 370 * <BR /><BR /><UL CLASS="JDUL"> 371 * <LI>Field {@code 'isClosing'} is equal-to / consistent-with {@code TC tagCriteria}</LI> 372 * <LI>Field {@code 'tok'} is equal to at least one of the {@code 'possibleTags'}</LI> 373 * </UL> 374 * 375 * <BR /><BR /><IMG SRC='doc-files/img/isTag2.png' CLASS=JDIMG ALT='example'> 376 * 377 * @param tagCriteria This ought to be either {@code 'TC.OpeningTags'} or 378 * {@code TC.ClosingTags'}. This parameter specifies what {@code 'this'} instance of 379 * {@code TagNode} is expected to contain, as {@code this.isClosing} field shall be compared 380 * against it. 381 * 382 * @param possibleTags This is presumed to be a non-zero-length, and non-null-valued list of 383 * html tokens. 384 * 385 * @return <B>TRUE</B> If {@code 'this'} matches the specified criteria, and <B>FALSE</B> 386 * otherwise. 387 * 388 * @see TC 389 * @see #isClosing 390 * @see #tok 391 */ 392 public boolean isTag(TC tagCriteria, String... possibleTags) 393 { 394 // Requested an "OpeningTag" but this is a "ClosingTag" 395 if ((tagCriteria == TC.OpeningTags) && this.isClosing) return false; 396 397 // Requested a "ClosingTag" but this is an "OpeningTag" 398 if ((tagCriteria == TC.ClosingTags) && ! this.isClosing) return false; 399 400 for (int i=0; i < possibleTags.length; i++) 401 402 if (this.tok.equalsIgnoreCase(possibleTags[i])) 403 404 // Found a TOKEN match, return TRUE immediately 405 return true; 406 407 // None of the elements in 'possibleTags' equalled tn.tok 408 return false; 409 } 410 411 /** 412 * Receives a {@code TagNode} and then two "criteria-specifier" parameters. This method shall 413 * return <B>FALSE</B> if: 414 * 415 * <BR /><BR /><UL CLASS="JDUL"> 416 * <LI> Field {@code 'isClosing'} is <B><I>not</I></B> equal-to / 417 * <B><I>not</I></B> consistent-with {@code TC tagCriteria}</LI> 418 * <LI> Field {@code 'tok'} is <B><I>equal-to</I></B> any of the {@code 'possibleTags'}</LI> 419 * </UL> 420 * 421 * @param tagCriteria tagCriteria This ought to be either {@code 'TC.OpeningTags'} or 422 * {@code TC.ClosingTags'} This parameter specifies what {@code 'this'} instance of 423 * {@code TagNode} is expected to contain, as {@code this.isClosing} field shall be compared 424 * against it. 425 * 426 * @param possibleTags This is presumed to be a non-zero-length, and non-null-valued list of 427 * html tokens. 428 * 429 * @return <B>TRUE</B> If this {@code TagNode 'n'} matches the specified criteria explained 430 * above, and <B>FALSE</B> otherwise. 431 * 432 * @see TC 433 * @see #tok 434 * @see #isClosing 435 */ 436 public boolean isTagExcept(TC tagCriteria, String... possibleTags) 437 { 438 // Requested an "OpeningTag" but this is a "ClosingTag" 439 if ((tagCriteria == TC.OpeningTags) && this.isClosing) return false; 440 441 // Requested a "ClosingTag" but this is an "OpeningTag" 442 if ((tagCriteria == TC.ClosingTags) && ! this.isClosing) return false; 443 444 for (int i=0; i < possibleTags.length; i++) 445 446 if (this.tok.equalsIgnoreCase(possibleTags[i])) 447 448 // The Token of the input node was a match with one of the 'possibleTags' 449 // Since this is "Except" - we must return 'false' 450 451 return false; 452 453 // None of the elements in 'possibleTags' equalled tn.tok 454 // since this is "Except" - return 'true' 455 456 return true; 457 } 458 459 460 // ******************************************************************************************** 461 // ******************************************************************************************** 462 // Main Method 'AV' 463 // ******************************************************************************************** 464 // ******************************************************************************************** 465 466 467 /** 468 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_AV_DESC> 469 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_AV_DESC_EXAMPLE> 470 * @param innerTagAttribute <EMBED CLASS="external-html" DATA-FILE-ID=TN_AV_ITA> 471 * @return <EMBED CLASS="external-html" DATA-FILE-ID=TN_AV_RET> 472 * @see #isClosing 473 * @see #str 474 * @see #tok 475 * @see StringParse#ifQuotesStripQuotes(String) 476 * @see AttrRegEx#KEY_VALUE_REGEX 477 */ 478 public String AV(String innerTagAttribute) 479 { 480 // All HTML element tags that start like: </DIV> with a front-slash. 481 // They may not legally contain inner-tag attributes. 482 483 if (this.isClosing) return null; 484 485 // All HTML element tags that contain only <TOK> (TOK <==> Tag-Name) in their 'str' field 486 // Specifically: '<', TOKEN, '>', (Where TOKEN is 'div', 'span', 'table', 'ul', etc...) 487 // are TOO SHORT to have the attribute, so don't check... return null. 488 489 if (this.str.length() < 490 (3 + this.tok.length() + (innerTagAttribute = innerTagAttribute.trim()).length())) 491 return null; 492 493 // Matches "Attribute / Inner-Tag Key-Value" Pairs. 494 Matcher m = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 495 496 // This loop iterates the KEY_VALUE PAIRS THAT HAVE BEEN FOUND. 497 /// NOTE: The REGEX Matches on Key-Value Pairs. 498 499 while (m.find()) 500 501 // m.group(2) is the "KEY" of the Attribute KEY-VALUE Pair 502 // m.group(3) is the "VALUE" of the Attribute. 503 if (m.group(2).equalsIgnoreCase(innerTagAttribute)) 504 return StringParse.ifQuotesStripQuotes(m.group(3)); 505 506 // This means the attribute name provided to parameter 'innerTagAttribute' was not found. 507 return null; 508 } 509 510 /** 511 * <SPAN STYLE="color: red;"><B>OPT: Optimized</B></SPAN> 512 * 513 * <BR /><BR /> This is an "optimized" version of method {@link #AV(String)}. This method does 514 * the exact same thing as {@code AV(...)}, but leaves out parameter-checking and 515 * error-checking. This is used internally (repeatedly) by the NodeSearch Package Search Loops. 516 * 517 * @param innerTagAttribute This is the inner-tag / attribute <B STYLE="color: red;">name</B> 518 * whose <B STYLE="color: red;">value</B> is hereby being requested. 519 * 520 * @return {@code String}-<B STYLE="color: red;">value</B> of this inner-tag / attribute. 521 * 522 * @see StringParse#ifQuotesStripQuotes(String) 523 * @see #str 524 * @see TagNode.AttrRegEx#KEY_VALUE_REGEX 525 */ 526 public String AVOPT(String innerTagAttribute) 527 { 528 // COPIED DIRECTLY FROM class TagNode, leaves off initial tests. 529 530 // Matches "Attribute / Inner-Tag Key-Value" Pairs. 531 Matcher m = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 532 533 // This loop iterates the KEY_VALUE PAIRS THAT HAVE BEEN FOUND. 534 /// NOTE: The REGEX Matches on Key-Value Pairs. 535 536 while (m.find()) 537 538 // m.group(2) is the "KEY" of the Attribute KEY-VALUE Pair 539 // m.group(3) is the "VALUE" of the Attribute. 540 541 if (m.group(2).equalsIgnoreCase(innerTagAttribute)) 542 return StringParse.ifQuotesStripQuotes(m.group(3)); 543 544 // This means the attribute name provided to parameter 'innerTagAttribute' was not found. 545 return null; 546 } 547 548 549 // ******************************************************************************************** 550 // ******************************************************************************************** 551 // Attribute Modify-Value methods 552 // ******************************************************************************************** 553 // ******************************************************************************************** 554 555 556 /** 557 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_SET_AV_DESC> 558 * @param attribute <EMBED CLASS="external-html" DATA-FILE-ID=TN_SET_AV_ATTR> 559 * @param value Any valid attribute-<B STYLE="color: red;">value</B>. This parameter may not 560 * be null, or a {@code NullPointerException} will throw. 561 * @param quote <EMBED CLASS="external-html" DATA-FILE-ID=TN_SET_AV_QUOTE> 562 * @throws InnerTagKeyException <EMBED CLASS="external-html" DATA-FILE-ID="ITKEYEX2"> 563 * @throws QuotesException <EMBED CLASS="external-html" DATA-FILE-ID="QEX"> 564 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID=CTNEX> 565 * @throws HTMLTokException If an invalid HTML 4 or 5 token is not present 566 * (<B>{@code CASE_INSENSITIVE}</B>). 567 * @return <EMBED CLASS="external-html" DATA-FILE-ID=TN_SET_AV_RET> 568 * @see ClosingTagNodeException#check(TagNode) 569 * @see #generateElementString(String, Properties, Iterable, SD, boolean) 570 * @see #setAV(Properties, SD) 571 * @see #tok 572 * @see #str 573 * @see #isClosing 574 */ 575 public TagNode setAV(String attribute, String value, SD quote) 576 { 577 ClosingTagNodeException.check(this); 578 579 if (attribute == null) throw new NullPointerException( 580 "You have passed 'null' to the 'attribute' (attribute-name) String-parameter, " + 581 "but this is not allowed here." 582 ); 583 584 if (value == null) throw new NullPointerException( 585 "You have passed 'null' to the 'attribute' (attribute-value) String-parameter, " + 586 "but this is not allowed here." 587 ); 588 589 // Retrieve all "Key-Only" (Boolean) Attributes from 'this' (the original) TagNode 590 // Use Java Streams to filter out any that match the newly-added attribute key-value pair. 591 // SAVE: Save the updated / shortened list to a List<String> 592 593 List<String> prunedOriginalKeyOnlyAttributes = allKeyOnlyAttributes(true) 594 .filter((String originalKeyOnlyAttribute) -> 595 ! originalKeyOnlyAttribute.equalsIgnoreCase(attribute)) 596 .collect(Collectors.toList()); 597 598 // Retrieve all Inner-Tag Key-Value Pairs. Preserve the Case of the Attributes. Preserve 599 // the Quotation-Marks. 600 601 Properties p = allAV(true, true); 602 String originalValueWithQuotes = null; 603 String computedQuote = null; 604 605 // NOTE, there should only be ONE instance of an attribute in an HTML element, but 606 // malformed HTML happens all the time, so to keep this method safe, it checks 607 // (and removes) the entire attribute-list for matches - not just the first found instance. 608 609 for (String key : p.stringPropertyNames()) 610 611 if (key.equalsIgnoreCase(attribute)) 612 { 613 Object temp = p.remove(key); 614 if (temp instanceof String) originalValueWithQuotes = (String) temp; 615 } 616 617 // If the user does not wish to "change" the original quote choice, then find out what 618 // the original-quote choice was... 619 620 if ( 621 (quote == null) 622 && (originalValueWithQuotes != null) 623 && (originalValueWithQuotes.length() >= 2) 624 ) 625 { 626 char s = originalValueWithQuotes.charAt(0); 627 char e = originalValueWithQuotes.charAt(originalValueWithQuotes.length() - 1); 628 629 if ((s == e) && (s == '\'')) computedQuote = "" + SD.SingleQuotes.quote; 630 631 else if ((s == e) && (s == '"')) computedQuote = "" + SD.DoubleQuotes.quote; 632 633 else computedQuote = ""; 634 } 635 else if (quote == null) computedQuote = ""; 636 637 else computedQuote = "" + quote.quote; 638 639 p.put(attribute, computedQuote + value + computedQuote); 640 641 return new TagNode( 642 generateElementString( 643 // Rather than using '.tok' here, preserve the case of the original HTML Element 644 this.str.substring(1, 1 + tok.length()), p, 645 prunedOriginalKeyOnlyAttributes, null /* SD */, this.str.endsWith("/>") 646 )); 647 } 648 649 /** 650 * This allows for inserting or updating multiple {@code TagNode} inner-tag 651 * <B STYLE="color: red;">key-value</B> pairs with a single method invocation. 652 * 653 * @param attributes These are the new attribute <B STYLE="color: red;">key-value</B> pairs to 654 * be inserted. 655 * 656 * @param defaultQuote This is the default quotation mark to use, if the {@code 'attribute'} 657 * themselves do not already have quotations. 658 * 659 * <BR /><BR /><B><SPAN STYLE='color: red;'>IMPORTANT:</B></SPAN> If this value is used, then 660 * none of the provided {@code Property}-<B STYLE="color: red;">values</B> of the input 661 * {@code java.lang.Properties} instance should have quotes already. Each of these 662 * new-<B STYLE="color: red;">values</B> will be wrapped in the quote that is provided as the 663 * value to this parameter. 664 * 665 * <BR /><BR /><B><SPAN STYLE='color: red;'>HOWEVER:</B></SPAN> If this parameter is passed a 666 * value of 'null', then no quotes will be added to the new <B STYLE="color: red;">keys</B> - 667 * <I>unless the attribute being inserted is replacing a previous attribute that was already 668 * present in the element.</I> In this case, the original quotation shall be used. If this 669 * parameter receives 'null' and any of the new {@code Properties} were not already present in 670 * the original ({@code 'this'}) element, then no quotation marks will be used, which may 671 * throw a {@code QuotesException} if the attribute <B STYLE="color: red;">value</B> contains 672 * any white-space. 673 * 674 * @throws InnerTagKeyException <EMBED CLASS="external-html" DATA-FILE-ID="ITKEYEXPROP"> 675 * 676 * @throws QuotesException if there are "quotes within quotes" problems, due to the 677 * <B STYLE="color: red;">values</B> of the <B STYLE="color: red;">key-value</B> pairs. 678 * 679 * @throws HTMLTokException if an invalid HTML 4 or 5 token is not present 680 * <B>({@code CASE_INSENSITIVE})</B> 681 * 682 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID=CTNEX> 683 * 684 * @return An HTML {@code TagNode} instance with updated {@code TagNode} information. 685 * 686 * <BR /><BR /><B><SPAN STYLE="color: red;">IMPORTANT:</SPAN></B> Because 687 * <I>{@code TagNode's} are immutable</I> (since they are just wrapped-java-{@code String's}, 688 * which are also immutable), it is important to remember that this method <I><B>does not 689 * change the contents</B></I> of a {@code TagNode}, but rather <I><B>returns an entirely 690 * new {@code TagNode}</I></B> as a result instead. 691 * 692 * @see ClosingTagNodeException#check(TagNode) 693 * @see #setAV(String, String, SD) 694 * @see #allKeyOnlyAttributes(boolean) 695 * @see #tok 696 * @see #str 697 * @see #isClosing 698 */ 699 public TagNode setAV(Properties attributes, SD defaultQuote) 700 { 701 ClosingTagNodeException.check(this); 702 703 // Check that this attributes has elements. 704 if (attributes.size() == 0) throw new IllegalArgumentException( 705 "You have passed an empty java.util.Properties instance to the " + 706 "setAV(Properties, SD) method" 707 ); 708 709 // Retrieve all Inner-Tag Key-Value Pairs. 710 // Preserve: the Case of the Attributes. 711 // Preserve: the Quotation-Marks. 712 713 Properties originalAttributes = allAV(true, true); 714 715 // Retrieve all "Key-Only" (Boolean) attributes from the new / update attribute-list 716 Set<String> newAttributeKeys = attributes.stringPropertyNames(); 717 718 // Retrieve all "Key-Only" (Boolean) Attributes from 'this' (the original) TagNode 719 // Use Java Streams to filter out all the ones that need to be clobbered by-virtue-of 720 // the fact that they are present in the new / parameter-updated attribute key-value list. 721 // SAVE: Save the updated / shortened list to a List<String> 722 723 List<String> prunedOriginalKeyOnlyAttributes = allKeyOnlyAttributes(true) 724 .filter((String originalKeyOnlyAttribute) -> 725 { 726 // Returns false when the original key-only attribute matches one of the 727 // new attributes being inserted. Notice that a case-insensitive comparison 728 // must be performed - to preserve case. 729 730 for (String newKey : newAttributeKeys) 731 if (newKey.equalsIgnoreCase(originalKeyOnlyAttribute)) 732 return false; 733 734 return true; 735 }) 736 .collect(Collectors.toList()); 737 738 // NOTE: There is no need to check the validity of the new attributes. The TagNode 739 // constructor that is invoked on the last line of this method will do a 740 // validity-check on the attribute key-names provided to the 'attributes' 741 // java.util.Properties instance passed to to this method. 742 743 for (String newKey : newAttributeKeys) 744 { 745 String originalValueWithQuotes = null; 746 String computedQuote = null; 747 748 // NOTE, there should only be ONE instance of an attribute in an HTML element, but 749 // malformed HTML happens all the time, so to keep this method safe, it checks (and 750 // removes) the entire attribute-list for matches - not just the first found instance. 751 752 for (String originalKey : originalAttributes.stringPropertyNames()) 753 754 if (originalKey.equalsIgnoreCase(newKey)) 755 { 756 // Remove the original key-value inner-tag pair. 757 Object temp = originalAttributes.remove(originalKey); 758 if (temp instanceof String) originalValueWithQuotes = (String) temp; 759 } 760 761 // If the user does not wish to "change" the original quote choice, then find out what 762 // the original-quote choice was... 763 764 if ( 765 (defaultQuote == null) 766 && (originalValueWithQuotes != null) 767 && (originalValueWithQuotes.length() >= 2) 768 ) 769 { 770 char s = originalValueWithQuotes.charAt(0); 771 char e = originalValueWithQuotes.charAt(originalValueWithQuotes.length() - 1); 772 773 if ((s == e) && (s == '\'')) computedQuote = "" + SD.SingleQuotes.quote; 774 775 else if ((s == e) && (s == '"')) computedQuote = "" + SD.DoubleQuotes.quote; 776 777 else computedQuote = ""; 778 } 779 780 else if (defaultQuote == null) computedQuote = ""; 781 782 else computedQuote = "" + defaultQuote.quote; 783 784 // Insert the newly, updated key-value inner-tag pair. This 'Properties' will be 785 // used to construct a new TagNode. 786 787 originalAttributes.put(newKey, computedQuote + attributes.get(newKey) + computedQuote); 788 } 789 790 return new TagNode( 791 generateElementString( 792 // Rather than using '.tok' here, preserve the case of the original HTML Element 793 this.str.substring(1, 1 + tok.length()), 794 originalAttributes, prunedOriginalKeyOnlyAttributes, null /* SD */, 795 this.str.endsWith("/>") 796 )); 797 } 798 799 800 /** 801 * This will append a substring to the attribute <B STYLE="color: red;">value</B> of an HTML 802 * {@code TagNode}. 803 * 804 * This method can be very useful, for instance when dealing with CSS tags that are inserted 805 * inside the HTML node itself. For example, in order to add a {@code 'color: red; 806 * background: white;'} portion to the CSS {@code 'style'} tag of an HTML 807 * {@code <TABLE STYLE="...">} element, without clobbering the {@code style}-information that 808 * is already inside the element, using this method will achieve that. 809 * 810 * @param attribute The <B STYLE="color: red;">name</B> of the attribute to which the 811 * <B STYLE="color: red;">value</B> must be appended. This parameter may not be null, or a 812 * {@code NullPointerException} will throw. 813 * 814 * @param appendStr The {@code String} to be appended to the 815 * attribute-<B STYLE="color: red;">value</B>. 816 * 817 * @param startOrEnd If this parameter is <B>TRUE</B> then the append-{@code String} will be 818 * inserted at the beginning (before) whatever the current attribute-<B STYLE="color: red;"> 819 * value</B> is. If this parameter is <B>FALSE</B> then the append-{@code String} will be 820 * inserted at the end (after) the current attribute-<B STYLE="color: red;">value</B> 821 * {@code String}. 822 * 823 * <BR /><BR /><B>NOTE:</B> If tag element currently does not posses this attribute, then the 824 * <B STYLE="color: red;">attribute/value</B> pair will be created and inserted with its 825 * <B STYLE="color: red;">value</B> set to the value of {@code 'appendStr'.} 826 * 827 * @param quote <EMBED CLASS='external-html' DATA-FILE-ID=TGND_QUOTE_EXPL> 828 * 829 * <BR /><BR />It is important to note that "appending" a {@code String} to an attribute's 830 * <B STYLE='color: red;'>value</B> will often (but not always) mean that the new 831 * attribute-<B STYLE='color: red;'>value</B> will have a space character. <B><I>If</I></B> 832 * this parameter were passed null, <B><I>and if</I></B> the original tag had a value, but did 833 * not use any quotes, <B><I>then</I></B> the attribute's ultimate inclusion into the tag would 834 * generate invalid HTML, and the invocation of {@link #setAV(String, String, SD)} would 835 * throw a {@link QuotesException}. 836 * 837 * @return Since all instances of {@code TagNode} are immutable, this method will not actually 838 * alter the {@code TagNode} element, but rather create a new object reference that contains 839 * the updated attribute. 840 * 841 * @see #AV(String) 842 * @see #setAV(String, String, SD) 843 * @see ClosingTagNodeException#check(TagNode) 844 * 845 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID=CTNEX> 846 * 847 * @throws QuotesException The <B><A HREF=#QUOTEEX>rules</A></B> for quotation usage apply 848 * here too, and see that explanation for how how this exception could be thrown. 849 */ 850 public TagNode appendToAV(String attribute, String appendStr, boolean startOrEnd, SD quote) 851 { 852 ClosingTagNodeException.check(this); 853 854 if (attribute == null) throw new NullPointerException( 855 "You have passed 'null' to the 'attribute' (attribute-name) String-parameter, " + 856 "but this is not allowed here." 857 ); 858 859 if (appendStr == null) throw new NullPointerException( 860 "You have passed 'null' to the 'appendStr' (attribute-value-append-string) " + 861 "String-parameter, but this is not allowed here." 862 ); 863 864 String curVal = AV(attribute); 865 866 if (curVal == null) curVal = ""; 867 868 // This decides whether to insert the "appendStr" before the current value-string, 869 // or afterwards. This is based on the passed boolean-parameter 'startOrEnd' 870 871 curVal = startOrEnd ? (appendStr + curVal) : (curVal + appendStr); 872 873 // Reuse the 'setAV(String, String, SD)' method already defined in this class. 874 return setAV(attribute, curVal, quote); 875 } 876 877 878 // ******************************************************************************************** 879 // ******************************************************************************************** 880 // Attribute Removal Operations 881 // ******************************************************************************************** 882 // ******************************************************************************************** 883 884 885 /** 886 * Convenience Method. 887 * <BR />Invokes: {@link #removeAttributes(String[])} 888 */ 889 public TagNode remove(String attributeName) { return removeAttributes(attributeName); } 890 891 /** 892 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_REM_ATTR_DESC> 893 * @param attributes <EMBED CLASS="external-html" DATA-FILE-ID=TN_REM_ATTR_ATTR> 894 * @return <EMBED CLASS="external-html" DATA-FILE-ID=TN_REM_ATTR_RET> 895 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID=CTNEX> 896 * @see ClosingTagNodeException#check(TagNode) 897 * @see #tok 898 * @see #isClosing 899 * @see #str 900 * @see #TagNode(String) 901 * @see #generateElementString(String, Properties, Iterable, SD, boolean) 902 */ 903 public TagNode removeAttributes(String... attributes) 904 { 905 ClosingTagNodeException.check(this); 906 907 // Retrieve all Inner-Tag Key-Value Pairs. Preserve the Case of the Attributes. Preserve 908 // the Quotation-Marks. 909 910 Properties originalAttributes = allAV(true, true); 911 912 // Remove any attributes from the "Attributes Key-Value Properties Instance" which MATCH 913 // the attribute names that have been EXPLICITLY REQUESTED FOR REMOVAL 914 915 for (String key : originalAttributes.stringPropertyNames()) 916 for (String attribute : attributes) 917 if (key.equalsIgnoreCase(attribute)) 918 originalAttributes.remove(key); 919 920 // Retrieve all "Boolean Attributes" (key-no-value). Preserve the Case of these Attributes 921 // Retain only the attributes in the 'filteredKeyOnlyAttributes' String-Array which have 922 // PASSED THE FILTER OPERATION. The filter operation only returns TRUE if the 923 // requested-attribute-list does not contain a copy of the Key-Only-Attribute 924 // 925 // NOTE: 'true' is passed as input to the 'allKeyOnlyAttributes(boolean)' method to request 926 // that CASE be PRESERVED. 927 928 Iterable<String> prunedKeyOnlyAttributes = allKeyOnlyAttributes(true) 929 930 .filter((String attribute) -> 931 { 932 // Returns false when the original key-only attribute matches one of the attributes 933 // that was requested to to be removed. Notice that a case-insensitive comparison 934 // must be performed. 935 936 for (String removeAttributes : attributes) 937 if (removeAttributes.equalsIgnoreCase(attribute)) 938 return false; 939 940 return true; 941 }) 942 .collect(Collectors.toList()); 943 944 return new TagNode( 945 generateElementString( 946 // Rather than using '.tok' here, preserve the case of the original HTML Element 947 this.str.substring(1, 1 + tok.length()), 948 originalAttributes, prunedKeyOnlyAttributes, /* SD */ null, 949 this.str.endsWith("/>") 950 )); 951 } 952 953 /** 954 * {@code TagNode's} are immutable. And because of this, calling {@code removeAllAV()} is 955 * actually the same as retrieving the standard, zero-attribute, pre-instantiated instance of 956 * an HTML Element. Pre-instantiated <B><I>factory-instances</I></B> of {@code class TagNode} 957 * for every HTML-Element are stored by {@code class HTMLTags} inside a {@code Hashtable.} 958 * They can be retrieved in multiple ways, two of which are found in methods in this class. 959 * 960 * <BR /><BR /><B>Point of Interest:</B> Calling these three different methods will all return 961 * <I>identical</I> {@code Object} references: 962 * 963 * <BR /><BR /> 964 * 965 * <UL CLASS="JDUL"> 966 * <LI>{@code TagNode v1 = myTagNode.removeAllAV(); } </LI> 967 * <LI>{@code TagNode v2 = TagNode.getInstance(myTagToken, openOrClosed); } </LI> 968 * <LI>{@code TagNode v3 = HTMLTag.hasTag(myTagToken, openOrClosed); } </LI> 969 * <LI><SPAN STYLE="color: red;">{@code assert((v1 == v2) && (v2 == v3)); }</SPAN></LI> 970 * </UL> 971 * 972 * <BR /><BR /><IMG SRC='doc-files/img/removeAllAV.png' CLASS=JDIMG ALT='example'> 973 * 974 * @return An HTML {@code TagNode} instance with all inner attributes removed. 975 * 976 * <BR /><BR /><B>NOTE:</B> If this tag contains an "ending forward slash" that ending slash 977 * will not be included in the output {@code TagNode.} 978 * 979 * <BR /><BR /><B><SPAN STYLE="color: red;">IMPORTANT:</SPAN></B> Because <I>{@code TagNode's} 980 * are immutable</I> (since they are just wrapped-java-{@code String's}, which are also 981 * immutable), it is important to remember that this method <I><B>does not change the 982 * contents</B></I> of a {@code TagNode}, but rather <I><B>returns an entirely new 983 * {@code TagNode}</I></B> as a result instead. 984 * 985 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID=CTNEX> 986 * 987 * @see ClosingTagNodeException#check(TagNode) 988 * @see #getInstance(String, TC) 989 * @see #str 990 * @see #tok 991 * @see TC#OpeningTags 992 */ 993 public TagNode removeAllAV() 994 { 995 ClosingTagNodeException.check(this); 996 997 // NOTE: We *CANNOT* use the 'tok' field to instantiate the TagNode here, because the 'tok' 998 // String-field is *ALWAYS* guaranteed to be in a lower-case format. The 'str' 999 // String-field, however uses the original case that was found on the HTML Document by the 1000 // parser (or in the Constructor-Parameters that were passed to construct 'this' instance 1001 // of TagNode. 1002 1003 return getInstance(this.str.substring(1, 1 + tok.length()), TC.OpeningTags); 1004 } 1005 1006 // ******************************************************************************************** 1007 // Retrieve all attributes 1008 // ******************************************************************************************** 1009 1010 /** 1011 * Convenience Method. 1012 * <BR />Invokes: {@link #allAV(boolean, boolean)} 1013 * <BR />Attribute-<B STYLE="color: red;">names</B> will be in lower-case. 1014 */ 1015 public Properties allAV() { return allAV(false, false); } 1016 1017 /** 1018 * This will copy every attribute <B STYLE="color: red;">key-value</B> pair inside 1019 * {@code 'this'} HTML {@code TagNode} element into a {@code java.util.Properties} Hash-Table. 1020 * 1021 * <BR /><BR /><B>RETURN-VALUE NOTE:</B> This method shall not return any "Key-Only Attributes" 1022 * (a.k.a. "Boolean Attributes"). The most commonly used "Boolean Attribute" example is the 1023 * {@code 'HIDDEN'} key-word that is used to prevent the browser from displaying an HTML 1024 * Element. Inner-tags that represent attribute <B STYLE="color: red;">key-value</B> pairs are 1025 * the only attributes that may be included in the returned {@code 'Properties'} instance. 1026 * 1027 * <BR /><BR /><IMG SRC='doc-files/img/allAV.png' CLASS=JDIMG ALT="example"> 1028 * 1029 * @param keepQuotes If this parameter is passed <B>TRUE</B>, then any surrounding quotation 1030 * marks will be included for each the <B STYLE="color: red;">values</B> of each attribute 1031 * key-value pair. 1032 * 1033 * @param preserveKeysCase If this parameter is passed <B>TRUE</B>, then the method 1034 * {@code String.toLowerCase()} will not be invoked on any of the 1035 * <B STYLE="color: red;">keys</B> (attribute-names) of each inner-tag key-value pair. 1036 * 1037 * <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_PRESERVE_C> 1038 * 1039 * @return This returns a list of each and every attribute-<B STYLE="color: red;">name</B> - 1040 * <I>and the associate <B STYLE="color: red;">value</B> of the attribute</I> - found in 1041 * {@code 'this' TagNode}. An instance of {@code class java.util.Properties} is used to store 1042 * the attribute <B STYLE="color: red;">key-value</B> pairs. 1043 * 1044 * <BR /><BR /><B>NOTE:</B> This method will <B>NOT</B> return any boolean, 1045 * <B STYLE="color: red;">key-only</B> attributes present in {@code 'this' TagNode}. 1046 * 1047 * <BR /><BR /><B>ALSO:</B> This method shall not return {@code 'null'}. If there do not 1048 * exist any Attribute-Value Pairs, or if {@code 'this'} node is a closing-element, then 1049 * an empty {@code 'Properties'} instance shall be returned. 1050 * 1051 * @see StringParse#ifQuotesStripQuotes(String) 1052 * @see AttrRegEx#KEY_VALUE_REGEX 1053 * @see #tok 1054 * @see #str 1055 */ 1056 public Properties allAV(boolean keepQuotes, boolean preserveKeysCase) 1057 { 1058 Properties ret = new Properties(); 1059 1060 // NOTE: OPTIMIZED, "closing-versions" of the TagNode, and TagNode's whose 'str' field is 1061 // is only longer than the token, itself, by 3 or less characters cannot have 1062 // attributes. 1063 // CHARS: '<', TOKEN, SPACE, '>' 1064 // RET: In that case, just return an empty 'Properties' instance. 1065 1066 if (isClosing || (str.length() <= (tok.length() + 3))) return ret; 1067 1068 // This RegEx Matcher 'matches' against Attribute/InnerTag Key-Value Pairs. 1069 // m.group(1): UN-USED! (Includes Key, Equals-Sign, and Value). Not w/leading white-space 1070 // m.group(2): returns the 'key' portion of the key-value pair, before an '=' (equals-sign) 1071 // m.group(3): returns the 'value' portion of the key-value pair, after an '=' 1072 1073 Matcher m = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 1074 1075 // MORE-CODE, but MORE-EFFICIENT (slightly) 1076 1077 if (keepQuotes && preserveKeysCase) 1078 while (m.find()) ret.put(m.group(2), m.group(3)); 1079 1080 else if (!keepQuotes && preserveKeysCase) 1081 while (m.find()) ret.put(m.group(2), StringParse.ifQuotesStripQuotes(m.group(3))); 1082 1083 else if (keepQuotes && !preserveKeysCase) 1084 while (m.find()) ret.put(m.group(2).toLowerCase(), m.group(3)); 1085 1086 else if (!keepQuotes && !preserveKeysCase) 1087 while (m.find()) 1088 ret.put(m.group(2).toLowerCase(), StringParse.ifQuotesStripQuotes(m.group(3))); 1089 1090 return ret; 1091 } 1092 1093 /** 1094 * Convenience Method. 1095 * <BR />Invokes: {@link #allAN(boolean, boolean)} 1096 * <BR />Attribute-<B STYLE="color: red;">names</B> will be in lower-case 1097 */ 1098 public Stream<String> allAN() 1099 { return allAN(false, false); } 1100 1101 /** 1102 * This method will only return a list of attribute-<B STYLE="color: red;">names</B>. The 1103 * attribute-<B STYLE="color: red">values</B> shall <B>NOT</B> be included in the result. The 1104 * {@code String's} returned can have their "case-preserved" by passing <B>TRUE</B> to the 1105 * input boolean parameter {@code 'preserveKeysCase'}. 1106 * 1107 * @param preserveKeysCase If this is parameter receives <B>TRUE</B> then the case of the 1108 * attribute-<B STYLE="color: red;">names</B> shall be preserved. 1109 * 1110 * <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_PRESERVE_C> 1111 * 1112 * @param includeKeyOnlyAttributes When this parameter receives <B>TRUE</B>, then any 1113 * "Boolean Attributes" or "Key-Only, No-Value-Assignment" Inner-Tags will <B>ALSO</B> be 1114 * included in the {@code Stream<String>} returned by this method. 1115 * 1116 * @return an instance of {@code Stream<String>} containing all 1117 * attribute-<B STYLE="color: red;">names</B> identified in {@code 'this'} instance of 1118 * {@code TagNode}. A {@code java.util.stream.Stream} is used because it's contents can easily 1119 * be converted to just about any data-type. 1120 * 1121 * <EMBED CLASS="external-html" DATA-FILE-ID=STRMCNVT> 1122 * 1123 * <BR /><B>NOTE:</B> This method shall never return {@code 'null'} - even if there are no 1124 * attribute <B STYLE="color: red;">key-value</B> pairs contained by {@code 'this' TagNode}. 1125 * If there are strictly zero attributes, an empty {@code Stream} shall be returned, instead. 1126 * 1127 * @see #allKeyOnlyAttributes(boolean) 1128 * @see #allAN() 1129 */ 1130 public Stream<String> allAN(boolean preserveKeysCase, boolean includeKeyOnlyAttributes) 1131 { 1132 // If there is NO ROOM in the "str" field for attributes, then there is now way attributes 1133 // could exist in this element. Return "empty" immediately. 1134 // 1135 // NOTE: OPTIMIZED, "closing-versions" of the TagNode, and TagNode's whose 'str' field 1136 // is only longer than the token, itself, by 3 or less characters cannot have 1137 // attributes. 1138 // 1139 // CHARS: '<', TOKEN, SPACE, '>' 1140 // RET: In that case, just return an empty Stream. 1141 1142 if (isClosing || (str.length() <= (tok.length() + 3))) return Stream.empty(); 1143 1144 // Use Java Streams. A String-Stream is easily converted to just about any data-type 1145 Stream.Builder<String> b = Stream.builder(); 1146 1147 // This RegEx Matcher 'matches' against Attribute/InnerTag Key-Value Pairs. 1148 // m.group(2): returns the 'key' portion of the key-value pair, before an '=' (equals-sign) 1149 1150 Matcher m = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 1151 1152 // Retrieve all of the keys of the attribute key-value pairs. 1153 while (m.find()) b.add(m.group(2)); 1154 1155 // This Stream contains only keys that were once key-value pairs, if there are "key-only" 1156 // attributes, they have not been added yet. 1157 1158 Stream<String> ret = b.build(); 1159 1160 // Convert these to lower-case, (if requested) 1161 if (! preserveKeysCase) ret = ret.map((String attribute) -> attribute.toLowerCase()); 1162 1163 // Now, add in all the "Key-Only" attributes (if there are any). Note, "preserve-case" 1164 // and "to lower case" are handled, already, in method "allKeyOnlyAttributes(boolean)" 1165 1166 if (includeKeyOnlyAttributes) 1167 return Stream.concat(ret, allKeyOnlyAttributes(preserveKeysCase)); 1168 1169 return ret; 1170 } 1171 1172 1173 // ******************************************************************************************** 1174 // ******************************************************************************************** 1175 // Key only attributes 1176 // ******************************************************************************************** 1177 // ******************************************************************************************** 1178 1179 1180 /** 1181 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_ALL_KOA_DESC> 1182 * @param preserveKeysCase <EMBED CLASS='external-html' DATA-FILE-ID=TN_ALL_KOA_PKC> 1183 * <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_PRESERVE_C> 1184 * @return <EMBED CLASS="external-html" DATA-FILE-ID=TN_ALL_KOA_RET> 1185 * <EMBED CLASS="external-html" DATA-FILE-ID=STRMCNVT> 1186 * @see #tok 1187 * @see #str 1188 */ 1189 public Stream<String> allKeyOnlyAttributes(boolean preserveKeysCase) 1190 { 1191 // NOTE: OPTIMIZED, "closing-versions" of the TagNode, and TagNode's whose 'str' 1192 // field is only longer than the token, itself, by 3 or less characters cannot have 1193 // attributes. In that case, just return an empty 'Stream' instance. 1194 1195 int len = str.length(); 1196 if (isClosing || (len <= (tok.length() + 3))) return Stream.empty(); 1197 1198 // Leaves off the opening 'token' and less-than '<' symbol (leaves off "<DIV " for example) 1199 // Also leave off the "ending-forward-slash" (if there is one) and ending '>' 1200 1201 String s = str.substring(tok.length() + 2, len - ((str.charAt(len - 2) == '/') ? 2 : 1)); 1202 1203 // if all lower-case is requested, do that here. 1204 if (! preserveKeysCase) s = s.toLowerCase(); 1205 1206 // java.util.regex.Pattern.split(CharSequence) is sort of an "inverse reg-ex" in that it 1207 // returns all of the text that was present BETWEEN the matches 1208 // NOTE: This is the "opposite of the matches, themselves)" - a.k.a. all the stuff that was 1209 // left-out. 1210 1211 Stream.Builder<String> b = Stream.builder(); 1212 1213 // 'split' => inverse-matches (text between KEY-VALUE pairs) 1214 for (String unMatchedStr : AttrRegEx.KEY_VALUE_REGEX.split(s)) 1215 1216 // Of that stuff, now do a white-space split for connected characters 1217 for (String keyWord : unMatchedStr.split("\\s+")) 1218 1219 // Call String.trim() and String.length() 1220 if ((keyWord = keyWord.trim()).length() > 0) 1221 1222 // Check for valid Attribute-Name's only 1223 if (AttrRegEx.ATTRIBUTE_KEY_REGEX_PRED.test(keyWord)) 1224 1225 // ... put it in the return stream. 1226 // NOTE: This has the potential to slightly change the original HTML 1227 // It will "leave out any guck" that was in the Element 1228 b.add(keyWord); 1229 1230 // Build the Stream<String>, and return; 1231 return b.build(); 1232 } 1233 1234 /** 1235 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_HAS_KOA_DESC> 1236 * <!-- @ExternalHTMLDocFiles({"@returns", "@desc", "keyOnlyAttribute", 1237 * "@IllegalArgumentException"}) --> 1238 * @param keyOnlyAttribute <EMBED CLASS='external-html' DATA-FILE-ID=TN_HAS_KOA_KOA> 1239 * @return <EMBED CLASS='external-html' DATA-FILE-ID=TN_HAS_KOA_RET> 1240 * @throws IllegalArgumentException <EMBED CLASS='external-html' DATA-FILE-ID=TN_HAS_KOA_IAEX> 1241 * @see AttrRegEx#KEY_VALUE_REGEX 1242 */ 1243 public boolean hasKeyOnlyAttribute(String keyOnlyAttribute) 1244 { 1245 // Closing TagNode's do not have attributes, return false immediately. 1246 if (this.isClosing) return false; 1247 1248 // ONLY CHECKS FOR WHITE-SPACE, *NOT* VALIDITY... 1249 if (StringParse.hasWhiteSpace(keyOnlyAttribute)) throw new IllegalArgumentException( 1250 "The attribute you have passed [" + keyOnlyAttribute + "] has white-space, " + 1251 "This is not allowed here, because the search routine splits on whitespace, and " + 1252 "therefore a match would never be found." 1253 ); 1254 1255 // NOTE: TagNode's whose 'str' field is only longer than the token, itself, by 3 or less 1256 // characters cannot have attributes. In that case, just return false. 1257 1258 int len = str.length(); 1259 if (len <= (tok.length() + 3)) return false; 1260 1261 // Leaves off the opening 'token' and less-than '<' symbol (leaves off "<DIV " for example) 1262 // Also leave off the "ending-forward-slash" (if there is one), and edning '>' 1263 1264 String s = str.substring(tok.length() + 2, len - ((str.charAt(len - 2) == '/') ? 2 : 1)); 1265 1266 // java.util.regex.Pattern.split(CharSequence) is sort of an "inverse reg-ex" in that it 1267 // returns all of the text that was present BETWEEN the matches 1268 1269 // 'split' => inverse-matches (text between KEY-VALUE pairs) 1270 for (String unMatchedStr : AttrRegEx.KEY_VALUE_REGEX.split(s)) 1271 1272 // Of that stuff, now do a white-space split for connected characters 1273 for (String keyWord : unMatchedStr.split("\\s+")) 1274 1275 // trim, check-length... 1276 if ((keyWord = keyWord.trim()).length() > 0) 1277 1278 if (keyOnlyAttribute.equalsIgnoreCase(keyWord)) return true; 1279 1280 // Was not found, return false; 1281 return false; 1282 } 1283 1284 1285 // ******************************************************************************************** 1286 // ******************************************************************************************** 1287 // testAV 1288 // ******************************************************************************************** 1289 // ******************************************************************************************** 1290 1291 1292 /** 1293 * Convenience Method. 1294 * <BR />Invokes: {@link #testAV(String, Predicate)} 1295 * <BR />Passes: {@code String.equals(attributeValue)} as the test-{@code Predicate} 1296 */ 1297 public boolean testAV(String attributeName, String attributeValue) 1298 { return testAV(attributeName, (String s) -> s.equals(attributeValue)); } 1299 1300 /** 1301 * Convenience Method. 1302 * <BR />Invokes: {@link #testAV(String, Predicate)} 1303 * <BR />Passes: {@code attributeValueTest.asPredicate()} 1304 */ 1305 public boolean testAV(String attributeName, Pattern attributeValueTest) 1306 { return testAV(attributeName, attributeValueTest.asPredicate()); } 1307 1308 /** 1309 * Convenience Method. 1310 * <BR />Invokes: {@link #testAV(String, Predicate)} 1311 * <BR />Passes: {@link TextComparitor#test(String, String[])} as the test-{@code Predicate} 1312 */ 1313 public boolean testAV 1314 (String attributeName, TextComparitor attributeValueTester, String... compareStrs) 1315 { return testAV(attributeName, (String s) -> attributeValueTester.test(s, compareStrs)); } 1316 1317 /** 1318 * Test the <B STYLE="color: red;">value</B> of the inner-tag named {@code 'attributeName'} 1319 * (if that attribute exists, and has a non-empty value) using a provided 1320 * {@code Predicate<String>}. 1321 * 1322 * <BR /><BR /><IMG SRC='doc-files/img/testAV1.png' CLASS=JDIMG ALT='example'> 1323 * 1324 * @param attributeName Any String will suffice - but only valid attribute 1325 * <B STYLE="color: red;">names</B> will match the internal regular-expression. 1326 * 1327 * <BR /><BR /><B>NOTE:</B> The validity of this parameter <I><B>is not</I></B> checked with 1328 * the HTML attribute-<B STYLE="color: red;">name</B> Regular-Expression exception checker. 1329 * 1330 * @param attributeValueTest Any {@code java.util.function.Predicate<String>} 1331 * 1332 * @return Method will return <B>TRUE</B> if and only if: 1333 * 1334 * <BR /><BR /><UL CLASS="JDUL"> 1335 * <LI> {@code 'this'} instance of {@code TagNode} has an inner-tag named 1336 * {@code 'attributeName'}. 1337 * <BR /><BR /> 1338 * </LI> 1339 * <LI> The results of the provided {@code String-Predicate}, when applied against the 1340 * <B STYLE="color: red">value</B> of the requested attribute, returns <B>TRUE</B>. 1341 * </LI> 1342 * </UL> 1343 * 1344 * @see AttrRegEx#KEY_VALUE_REGEX 1345 * @see #str 1346 * @see #isClosing 1347 * @see StringParse#ifQuotesStripQuotes(String) 1348 */ 1349 public boolean testAV(String attributeName, Predicate<String> attributeValueTest) 1350 { 1351 // Closing TagNode's (</DIV>, </A>) cannot attributes, or attribute-values 1352 if (isClosing) return false; 1353 1354 // OPTIMIZATION: TagNode's whose String-length is less than this computed length 1355 // are simply too short to have the attribute named by the input parameter 1356 1357 if (this.str.length() < (this.tok.length() + attributeName.length() + 4)) return false; 1358 1359 // This Reg-Ex will allow us to iterate through each attribute key-value pair 1360 // contained / 'inside' this instance of TagNode. 1361 1362 Matcher m = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 1363 1364 // Test each attribute key-value pair, and return the test results if an attribute 1365 // whose name matches 'attributeName' is found. 1366 1367 while (m.find()) 1368 if (m.group(2).equalsIgnoreCase(attributeName)) 1369 return attributeValueTest.test 1370 (StringParse.ifQuotesStripQuotes(m.group(3))); 1371 1372 // No attribute key-value pair was found whose 'key' matched input-parameter 1373 // 'attributeName' 1374 1375 return false; 1376 } 1377 1378 1379 // ******************************************************************************************** 1380 // ******************************************************************************************** 1381 // has-attribute boolean-logic methods 1382 // ******************************************************************************************** 1383 // ******************************************************************************************** 1384 1385 1386 /** 1387 * Convenience Method. 1388 * <BR />Invokes: {@link #hasLogicOp(boolean, IntFunction, IntPredicate, String[])} 1389 * <BR />Passes: AND Boolean Logic 1390 */ 1391 public boolean hasAND(boolean checkAttributeStringsForErrors, String... attributes) 1392 { 1393 // First-Function: Tells the logic to *IGNORE* intermediate matches (returns NULL) 1394 // (This is *AND*, so wait until all attributes have been found, or at 1395 // the very least all tags in the element tested, and failed. 1396 // 1397 // Second-Function: At the End of the Loops, all Attributes have either been found, or 1398 // at least all attributes in 'this' tag have been tested. Note that the 1399 // first-function is only called on a MATCH, and tht 'AND' requires to 1400 // defer a response until all attributes have been tested.. Here, simply 1401 // RETURN WHETHER OR NOT the MATCH-COUNT equals the number of matches in 1402 // the user-provided String-array. 1403 1404 return hasLogicOp( 1405 checkAttributeStringsForErrors, 1406 (int matchCount) -> null, 1407 (int matchCount) -> (matchCount == attributes.length), 1408 attributes 1409 ); 1410 } 1411 1412 /** 1413 * Convenience Method. 1414 * <BR />Invokes: {@link #hasLogicOp(boolean, IntFunction, IntPredicate, String[])} 1415 * <BR />Passes: OR Boolean Logic 1416 * <!-- NOT USED NOW: 1417 * <BR /><BR /><IMG SRC='doc-files/img/hasOR.png' CLASS=JDIMG ALT="Example"> --> 1418 */ 1419 public boolean hasOR(boolean checkAttributeStringsForErrors, String... attributes) 1420 { 1421 // First-Function: Tells the logic to return TRUE on any match IMMEDIATELY 1422 // 1423 // Second-Function: At the End of the Loops, all Attributes have been tested. SINCE the 1424 // previous function returns on match immediately, AND SINCE this is an 1425 // OR, therefore FALSE must be returned (since there were no matches!) 1426 1427 return hasLogicOp( 1428 checkAttributeStringsForErrors, 1429 (int matchCount) -> true, 1430 (int matchCount) -> false, 1431 attributes 1432 ); 1433 } 1434 1435 /** 1436 * Convenience Method. 1437 * <BR />Invokes: {@link #hasLogicOp(boolean, IntFunction, IntPredicate, String[])} 1438 * <BR />Passes: NAND Boolean Logic 1439 */ 1440 public boolean hasNAND(boolean checkAttributeStringsForErrors, String... attributes) 1441 { 1442 // First-Function: Tells the logic to return FALSE on any match IMMEDIATELY 1443 // 1444 // Second-Function: At the End of the Loops, all Attributes have been tested. SINCE 1445 // the previous function returns on match immediately, AND SINCE this is 1446 // a NAND, therefore TRUE must be returned (since there were no matches!) 1447 1448 return hasLogicOp( 1449 checkAttributeStringsForErrors, 1450 (int matchCount) -> false, 1451 (int matchCount) -> true, 1452 attributes 1453 ); 1454 } 1455 1456 /** 1457 * Convenience Method. 1458 * <BR />Invokes: {@link #hasLogicOp(boolean, IntFunction, IntPredicate, String[])} 1459 * <BR />Passes: XOR Boolean Logic 1460 */ 1461 public boolean hasXOR(boolean checkAttributeStringsForErrors, String... attributes) 1462 { 1463 // First-Function: Tells the logic to IGNORE the FIRST MATCH, and any matches afterwards 1464 // should produce a FALSE result immediately 1465 // (XOR means ==> one-and-only-one) 1466 // 1467 // Second-Function: At the End of the Loops, all Attributes have been tested. Just 1468 // return whether or not the match-count is PRECISELY ONE. 1469 1470 return hasLogicOp( 1471 checkAttributeStringsForErrors, 1472 (int matchCount) -> (matchCount == 1) ? null : false, 1473 (int matchCount) -> (matchCount == 1), 1474 attributes 1475 ); 1476 } 1477 1478 /** 1479 * Provides the Logic for methods: 1480 * 1481 * <BR ><TABLE CLASS=BRIEFTABLE> 1482 * <TR><TH>Boolean Evaluation</TH><TH>Method</TH></TR> 1483 * <TR> 1484 * <TD>Checks that <B STYLE='color: red;'><I>all</I></B> Attributes are found</TD> 1485 * <TD>{@link #hasAND(boolean, String[])}</TD> 1486 * </TR> 1487 * <TR> 1488 * <TD>Checks that <B STYLE='color: red;'><I>at least one</I></B> Attribute matches</TD> 1489 * <TD>{@link #hasOR(boolean, String[])}</TD> 1490 * </TR> 1491 * <TR> 1492 * <TD>Checks that <B STYLE='color: red;'><I>precisely-one</I></B> Attribute is found</TD> 1493 * <TD>{@link #hasXOR(boolean, String[])}</TD> 1494 * </TR> 1495 * <TR> 1496 * <TD>Checks that <B STYLE='color: red;'><I>none</I></B> of the Attributes match</TD> 1497 * <TD>{@link #hasNAND(boolean, String[])}</TD> 1498 * </TR> 1499 * </TABLE> 1500 * 1501 * <BR /><IMG SRC='doc-files/img/hasAND.png' CLASS=JDIMG ALT="Example"> 1502 * 1503 * @param attributes This is a list of HTML Element Attribute-<B STYLE="color: red;">Names</B> 1504 * or "Inner Tags" as they are called in this Search and Scrape Package. 1505 * 1506 * @param checkAttributeStringsForErrors <EMBED CLASS="external-html" 1507 * DATA-FILE-ID=TAGNODE_HAS_BOOL> 1508 * 1509 * @return <B>TRUE</B> if at least one of these attribute-<B STYLE="color: red;">names</B> are 1510 * present in {@code 'this'} instance, and <B>FALSE</B> otherwise. 1511 * 1512 * <BR /><BR /><B>NOTE:</B> If this method is passed a zero-length {@code String}-array to the 1513 * {@code 'attributes'} parameter, this method shall exit immediately and return <B>FALSE</B>. 1514 * 1515 * @throws InnerTagKeyException If any of the {@code 'attributes'} are not valid HTML 1516 * attributes, <I><B>and</B></I> the user has passed <B>TRUE</B> to parameter 1517 * {@code checkAttributeStringsForErrors}. 1518 * 1519 * @throws NullPointerException If any of the {@code 'attributes'} are null. 1520 * 1521 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID="CTNEX"> 1522 * 1523 * @throws IllegalArgumentException If the {@code 'attributes'} parameter has length zero. 1524 * 1525 * @see InnerTagKeyException#check(String[]) 1526 * @see #AV(String) 1527 * @see AttrRegEx#KEY_VALUE_REGEX 1528 */ 1529 protected boolean hasLogicOp( 1530 boolean checkAttributeStringsForErrors, IntFunction<Boolean> onMatchFunction, 1531 IntPredicate reachedEndFunction, String... attributes 1532 ) 1533 { 1534 ClosingTagNodeException.check(this); 1535 1536 // Keep a tally of how many of the input attributes are found 1537 int matchCount = 0; 1538 1539 // Don't clobber the user's input 1540 attributes = attributes.clone(); 1541 1542 // If no attributes are passed to 'attributes' parameter, throw exception. 1543 if (attributes.length == 0) throw new IllegalArgumentException 1544 ("Input variable-length String[] array parameter, 'attributes', has length zero."); 1545 1546 // OPTIMIZATION: TagNode's whose String-length is less than this computed length 1547 // are simply too short to have any attribute-value pairs. 1548 // 4 (characters) are: '<', '>', ' ' and 'X' 1549 // SHORTEST POSSIBLE SUCH-ELEMENT: <DIV X> 1550 // 1551 // This TagNode doesn't have any attributes in it. 1552 // There is no need to check anything, so return FALSE immediately ("OR" fails) 1553 1554 if (this.str.length() < (this.tok.length() + 4)) return false; 1555 1556 if (checkAttributeStringsForErrors) InnerTagKeyException.check(attributes); 1557 1558 1559 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1560 // Check the main key=value attributes 1561 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1562 1563 // Get all inner-tag key-value pairs. If even one of these is inside the 'attributes' 1564 // input-parameter string-array, Then we must return true, since this is OR 1565 1566 Matcher keyValueInnerTags = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 1567 1568 while (keyValueInnerTags.find()) 1569 { 1570 // Retrieve the key of the key-value pair 1571 String innerTagKey = keyValueInnerTags.group(2); 1572 1573 // Iterate every element of the String[] 'attributes' parameter 1574 SECOND_FROM_TOP: 1575 for (int i=0; i < attributes.length; i++) 1576 1577 // No need to check attributes that have already been matched. 1578 // When an attribute matches, it's place in the array is set to null 1579 if (attributes[i] != null) 1580 1581 // Does the latest keyOnlyInnerTag match one of the user-requested 1582 // attribute names? 1583 if (innerTagKey.equalsIgnoreCase(attributes[i])) 1584 { 1585 // NAND & OR will exit immediately on a match. XOR and AND 1586 // will return 'null' meaning they are not sure yet. 1587 1588 Boolean whatToDo = onMatchFunction.apply(++matchCount); 1589 1590 if (whatToDo != null) return whatToDo; 1591 1592 // Increment the matchCounter, if this ever reaches the length 1593 // of the input array, there is no need to continue with the loop 1594 1595 if (matchCount == attributes.length) 1596 return reachedEndFunction.test(matchCount); 1597 1598 // There are still more matches to be found (not every element in 1599 // this array is null yet)... Keep Searching, but eliminated the 1600 // recently identified attribute from the list, because it has 1601 // already been found. 1602 1603 attributes[i] = null; 1604 continue SECOND_FROM_TOP; 1605 } 1606 } 1607 1608 1609 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1610 // Check the main key-only (Boolean) Attributes 1611 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 1612 // 1613 // Also check the "Boolean Attributes" also known as the "Key-Word Only Attributes" 1614 // Use the "Inverse Reg-Ex Matcher" (which matches all the strings that are "between" the 1615 // real matches) 1616 1617 // substring eliminates the leading "<TOK ..." and the trailing '>' character 1618 // 2: '<' character *PLUS* the space (' ') character 1619 String strToSplit = this.str.substring( 1620 2 + tok.length(), 1621 this.str.length() - ((str.charAt(this.str.length() - 2) == '/') ? 2 : 1) 1622 ).trim(); 1623 1624 // 'split' => inverse-matches (text between KEY-VALUE pairs) 1625 for (String unMatchedStr : AttrRegEx.KEY_VALUE_REGEX.split(strToSplit)) 1626 1627 // Of that stuff, now do a white-space split for connected characters 1628 SECOND_FROM_TOP: 1629 for (String keyOnlyInnerTag : unMatchedStr.split("\\s+")) 1630 1631 // Just-In-Case, usually not necessary 1632 if ((keyOnlyInnerTag = keyOnlyInnerTag.trim()).length() > 0) 1633 1634 // Iterate all the input-parameter String-array attributes 1635 for (int i=0; i < attributes.length; i++) 1636 1637 // No need to check attributes that have already been matched. 1638 // When an attribute matches, it's place in the array is set to null 1639 if (attributes[i] != null) 1640 1641 // Does the latest keyOnlyInnerTag match one of the user-requested 1642 // attribute names? 1643 if (keyOnlyInnerTag.equalsIgnoreCase(attributes[i])) 1644 { 1645 // NAND & OR will exit immediately on a match. XOR and AND 1646 // will return 'null' meaning they are not sure yet. 1647 1648 Boolean whatToDo = onMatchFunction.apply(++matchCount); 1649 1650 if (whatToDo != null) return whatToDo; 1651 1652 // Increment the matchCounter, if this ever reaches the length 1653 // of the input array, there is no need to continue with the loop 1654 1655 if (matchCount == attributes.length) 1656 return reachedEndFunction.test(matchCount); 1657 1658 // There are still more matches to be found (not every element in 1659 // this array is null yet)... Keep Searching, but eliminated the 1660 // recently identified attribute from the list, because it has 1661 // already been found. 1662 1663 attributes[i] = null; 1664 continue SECOND_FROM_TOP; 1665 } 1666 1667 // Let them know how many matches there were 1668 return reachedEndFunction.test(matchCount); 1669 } 1670 1671 1672 // ******************************************************************************************** 1673 // ******************************************************************************************** 1674 // has methods - extended, variable attribute-names 1675 // ******************************************************************************************** 1676 // ******************************************************************************************** 1677 1678 1679 /** 1680 * Convenience Method. 1681 * <BR />Invokes: {@link #has(Predicate)} 1682 * <BR />Passes: {@code String.equalsIgnoreCase(attributeName)} as the test-{@code Predicate} 1683 */ 1684 public boolean has(String attributeName) 1685 { return has((String s) -> s.equalsIgnoreCase(attributeName)); } 1686 1687 /** 1688 * Convenience Method. 1689 * <BR />Invokes: {@link #has(Predicate)} 1690 * <BR />Passes: {@code Pattern.asPredicate()} 1691 */ 1692 public boolean has(Pattern attributeNameRegExTest) 1693 { return has(attributeNameRegExTest.asPredicate()); } 1694 1695 /** 1696 * Convenience Method. 1697 * <BR />Invokes: {@link #has(Predicate)} 1698 * <BR />Passes: {@link TextComparitor#test(String, String[])} as the test-{@code Predicate} 1699 */ 1700 public boolean has(TextComparitor tc, String... compareStrs) 1701 { return has((String s) -> tc.test(s, compareStrs)); } 1702 1703 /** 1704 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_HAS_DESC2> 1705 * <EMBED CLASS='external-html' DATA-FILE-ID=TAGNODE_HAS_NOTE> 1706 * @param attributeNameTest <EMBED CLASS='external-html' DATA-FILE-ID=TN_HAS_ANT> 1707 * @return <EMBED CLASS='external-html' DATA-FILE-ID=TN_HAS_RET2> 1708 * @see AttrRegEx#KEY_VALUE_REGEX 1709 * @see StrFilter 1710 */ 1711 public boolean has(Predicate<String> attributeNameTest) 1712 { 1713 // Closing HTML Elements may not have attribute-names or values. 1714 // Exit gracefully, and immediately. 1715 1716 if (this.isClosing) return false; 1717 1718 // OPTIMIZATION: TagNode's whose String-length is less than this computed length 1719 // are simply too short to have such an attribute-value pair. 1720 // 4 (characters) are: '<', '>', ' ' and 'X' 1721 // SHORTEST POSSIBLE SUCH-ELEMENT: <DIV X> 1722 1723 if (this.str.length() < (this.tok.length() + 4)) return false; 1724 1725 // Get all inner-tag key-value pairs. If any of them match with the 'attributeNameTest' 1726 // Predicate, return TRUE immediately. 1727 Matcher keyValueInnerTags = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 1728 1729 // the matcher.group(2) has the key (not the value) 1730 while (keyValueInnerTags.find()) 1731 if (attributeNameTest.test(keyValueInnerTags.group(2))) 1732 return true; 1733 1734 // Also check the "Boolean Attributes" also known as the "Key-Word Only Attributes" 1735 // Use the "Inverse Reg-Ex Matcher" (which matches all the strings that are "between" the 1736 // real matches) 1737 1738 // 'split' => inverse-matches (text between KEY-VALUE pairs) 1739 for (String unMatchedStr : AttrRegEx.KEY_VALUE_REGEX.split(this.str)) 1740 1741 // Of that stuff, now do a white-space split for connected characters 1742 for (String keyOnlyInnerTag : unMatchedStr.split("\\s+")) 1743 1744 // Just-In-Case, usually not necessary 1745 if ((keyOnlyInnerTag = keyOnlyInnerTag.trim()).length() > 0) 1746 1747 if (attributeNameTest.test(keyOnlyInnerTag)) 1748 return true; 1749 1750 // A match was not found in either the "key-value pairs", or the boolean "key-only list." 1751 return false; 1752 } 1753 1754 1755 // ******************************************************************************************** 1756 // ******************************************************************************************** 1757 // hasValue(...) methods 1758 // ******************************************************************************************** 1759 // ******************************************************************************************** 1760 1761 1762 /** 1763 * Convenience Method. 1764 * <BR />Invokes: {@link #hasValue(Predicate, boolean, boolean)} 1765 * <BR />Passes: {@code String.equals(attributeValue)} as the test-{@code Predicate} 1766 */ 1767 public Map.Entry<String, String> hasValue 1768 (String attributeValue, boolean retainQuotes, boolean preserveKeysCase) 1769 { return hasValue((String s) -> attributeValue.equals(s), retainQuotes, preserveKeysCase); } 1770 1771 /** 1772 * Convenience Method. 1773 * <BR />Invokes: {@link #hasValue(Predicate, boolean, boolean)} 1774 * <BR />Passes: {@code attributeValueRegExTest.asPredicate()} 1775 */ 1776 public Map.Entry<String, String> hasValue 1777 (Pattern attributeValueRegExTest, boolean retainQuotes, boolean preserveKeysCase) 1778 { return hasValue(attributeValueRegExTest.asPredicate(), retainQuotes, preserveKeysCase); } 1779 1780 /** 1781 * Convenience Method. 1782 * <BR />Invokes: {@link #hasValue(Predicate, boolean, boolean)} 1783 * <BR />Passes: {@link TextComparitor#test(String, String[])} as the test-{@code Predicate} 1784 */ 1785 public Map.Entry<String, String> hasValue( 1786 boolean retainQuotes, boolean preserveKeysCase, TextComparitor attributeValueTester, 1787 String... compareStrs 1788 ) 1789 { 1790 return hasValue( 1791 (String s) -> attributeValueTester.test(s, compareStrs), retainQuotes, 1792 preserveKeysCase 1793 ); 1794 } 1795 1796 /** 1797 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_HASVAL_DESC2> 1798 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_HASVAL_DNOTE> 1799 * @param attributeValueTest <EMBED CLASS='external-html' DATA-FILE-ID=TN_HASVAL_AVT> 1800 * @param retainQuotes <EMBED CLASS='external-html' DATA-FILE-ID=TN_HASVAL_RQ> 1801 * @param preserveKeysCase 1802 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_HASVAL_PKC> 1803 * <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_PRESERVE_C> 1804 * @return <EMBED CLASS="external-html" DATA-FILE-ID=TN_HASVAL_RET2> 1805 * @see AttrRegEx#KEY_VALUE_REGEX 1806 * @see StrFilter 1807 */ 1808 public Map.Entry<String, String> hasValue 1809 (Predicate<String> attributeValueTest, boolean retainQuotes, boolean preserveKeysCase) 1810 { 1811 // Closing HTML Elements may not have attribute-names or values. 1812 // Exit gracefully, and immediately. 1813 1814 if (this.isClosing) return null; 1815 1816 // OPTIMIZATION: TagNode's whose String-length is less than this computed length 1817 // are simply too short to have such an attribute-value pair. 1818 // 5 (characters) are: '<', '>', ' ', 'X' and '=' 1819 // SHORTEST POSSIBLE SUCH-ELEMENT: <DIV X=> 1820 1821 if (this.str.length() < (this.tok.length() + 5)) return null; 1822 1823 // Get all inner-tag key-value pairs. If any are 'equal' to parameter attributeName, 1824 // return TRUE immediately. 1825 1826 Matcher keyValueInnerTags = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 1827 1828 while (keyValueInnerTags.find()) 1829 { 1830 // Matcher.group(3) has the key's value, of the inner-tag key-value pair 1831 // (matcher.group(2) has the key's name) 1832 1833 String foundAttributeValue = keyValueInnerTags.group(3); 1834 1835 // The comparison must be performed on the version of the value that DOES NOT HAVE the 1836 // surrounding quotation-marks 1837 1838 String foundAttributeValueNoQuotes = 1839 StringParse.ifQuotesStripQuotes(foundAttributeValue); 1840 1841 // Matcher.group(3) has the key-value, make sure to remove quotation marks (if present) 1842 // before comparing. 1843 1844 if (attributeValueTest.test(foundAttributeValueNoQuotes)) 1845 1846 // matcher.group(2) has the key's name, not the value. This is returned via the 1847 // Map.Entry key 1848 1849 return retainQuotes 1850 1851 ? new AbstractMap.SimpleImmutableEntry<>( 1852 preserveKeysCase 1853 ? keyValueInnerTags.group(2) 1854 : keyValueInnerTags.group(2).toLowerCase(), 1855 foundAttributeValue 1856 ) 1857 1858 : new AbstractMap.SimpleImmutableEntry<>( 1859 preserveKeysCase 1860 ? keyValueInnerTags.group(2) 1861 : keyValueInnerTags.group(2).toLowerCase(), 1862 foundAttributeValueNoQuotes 1863 ); 1864 } 1865 1866 // No match was identified, return null. 1867 return null; 1868 } 1869 1870 1871 // ******************************************************************************************** 1872 // ******************************************************************************************** 1873 // getInstance() 1874 // ******************************************************************************************** 1875 // ******************************************************************************************** 1876 1877 1878 /** 1879 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_GETINST_DESC> 1880 * @param tok Any valid HTML tag. 1881 * @param openOrClosed <EMBED CLASS='external-html' DATA-FILE-ID=TN_GETINST_OOC> 1882 * @return An instance of this class 1883 * 1884 * @throws IllegalArgumentException If parameter {@code TC openOrClose} is {@code null} or 1885 * {@code TC.Both} 1886 * 1887 * @throws HTMLTokException If the parameter {@code String tok} is not a valid HTML-tag 1888 * 1889 * @throws SingletonException If the token requested is a {@code singleton} (self-closing) tag, 1890 * but the Tag-Criteria {@code 'TC'} parameter is requesting a closing-version of the tag. 1891 * 1892 * @see HTMLTags#hasTag(String, TC) 1893 * @see HTMLTags#isSingleton(String) 1894 */ 1895 public static TagNode getInstance(String tok, TC openOrClosed) 1896 { 1897 if (openOrClosed == null) 1898 throw new NullPointerException("The value of openOrClosed cannot be null."); 1899 1900 if (openOrClosed == TC.Both) 1901 throw new IllegalArgumentException("The value of openOrClosed cannot be TC.Both."); 1902 1903 if (HTMLTags.isSingleton(tok) && (openOrClosed == TC.ClosingTags)) 1904 1905 throw new SingletonException( 1906 "The value of openOrClosed is TC.ClosingTags, but unfortunately you have asked " + 1907 "for a [" + tok + "] HTML element, which is a singleton element, and therefore " + 1908 "cannot have a closing-tag instance." 1909 ); 1910 1911 TagNode ret = HTMLTags.hasTag(tok, openOrClosed); 1912 1913 if (ret == null) 1914 throw new HTMLTokException 1915 ("The HTML-Tag provided isn't valid!\ntok: " + tok + "\nTC: " + openOrClosed); 1916 1917 return ret; 1918 } 1919 1920 1921 // ******************************************************************************************** 1922 // ******************************************************************************************** 1923 // Methods for "CSS Classes" 1924 // ******************************************************************************************** 1925 // ******************************************************************************************** 1926 1927 1928 /** 1929 * Convenience Method. 1930 * <BR />Invokes: {@link #cssClasses()} 1931 * <BR />Catches-Exception 1932 */ 1933 public Stream<String> cssClassesNOCSE() 1934 { try { return cssClasses(); } catch (CSSStrException e) { return Stream.empty(); } } 1935 1936 /** 1937 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_CSS_CL_DESC> 1938 * @return 1939 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_CSS_CL_RET> 1940 * <EMBED CLASS="external-html" DATA-FILE-ID=STRMCNVT> 1941 * @throws CSSStrException <EMBED CLASS="external-html" DATA-FILE-ID=TN_CSS_CL_CSSSE> 1942 * @see #cssClasses() 1943 * @see #AV(String) 1944 * @see StringParse#WHITE_SPACE_REGEX 1945 * @see CSSStrException#check(Stream) 1946 */ 1947 public Stream<String> cssClasses() 1948 { 1949 // The CSS Class is just an attribute/inner-tag within an HTML Element. 1950 String classes = AV("class"); 1951 1952 // IF the "class" attribute was not present, OR (after trimming) was empty, return 1953 // "empty stream" 1954 1955 if ((classes == null) || ((classes = classes.trim()).length() == 0)) 1956 return Stream.empty(); 1957 1958 // STEP 1: Split the string on white-space 1959 // STEP 2: Check each element of the output Stream using the "CSSStrException Checker" 1960 1961 return CSSStrException.check(StringParse.WHITE_SPACE_REGEX.splitAsStream(classes)); 1962 } 1963 1964 /** 1965 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_SET_CSS_CL_DESC> 1966 * @param quote <EMBED CLASS='external-html' DATA-FILE-ID=TGND_QUOTE_EXPL> 1967 * @param appendOrClobber <EMBED CLASS='external-html' DATA-FILE-ID=TN_SET_CSS_CL_AOC> 1968 * @param cssClasses <EMBED CLASS='external-html' DATA-FILE-ID=TN_SET_CSS_CL_CCL> 1969 * @return <EMBED CLASS='external-html' DATA-FILE-ID=TN_SET_CSS_CL_RET> 1970 * @throws CSSStrException This exception shall throw if any of the {@code 'cssClasses'} in the 1971 * var-args {@code String...} parameter do not meet the HTML 5 CSS {@code Class} naming rules. 1972 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID=CTNEX> 1973 * @throws QuotesException <EMBED CLASS='external-html' DATA-FILE-ID=TN_SET_CSS_CL_QEX> 1974 * @see CSSStrException#check(String[]) 1975 * @see CSSStrException#VALID_CSS_CLASS_OR_NAME_TOKEN 1976 * @see #appendToAV(String, String, boolean, SD) 1977 * @see #setAV(String, String, SD) 1978 */ 1979 public TagNode setCSSClasses(SD quote, boolean appendOrClobber, String... cssClasses) 1980 { 1981 // Throw CSSStrException if any of the input strings are invalid CSS Class-Names. 1982 CSSStrException.check(cssClasses); 1983 1984 // Build the CSS 'class' Attribute String. This will be inserted into the TagNode Element 1985 StringBuilder sb = new StringBuilder(); 1986 boolean first = true; 1987 1988 for (String c : cssClasses) 1989 { sb.append((first ? "" : " ") + c.trim()); first=false; } 1990 1991 return appendOrClobber 1992 ? appendToAV("class", " " + sb.toString(), false, quote) 1993 : setAV("class", sb.toString(), quote); 1994 } 1995 1996 /** 1997 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_APD_CSS_CL_DESC> 1998 * @param cssClass This is the CSS-{@code Class} name that is being inserted into 1999 * {@code 'this'} instance of {@code TagNode} 2000 * @param quote <EMBED CLASS='external-html' DATA-FILE-ID=TGND_QUOTE_EXPL> 2001 * @return A newly instantiated {@code TagNode} with updated CSS {@code Class} Name(s). 2002 * @throws CSSStrException <EMBED CLASS='external-html' DATA-FILE-ID=TN_APD_CSS_CL_CSSSE> 2003 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID=CTNEX> 2004 * @throws QuotesException <EMBED CLASS='external-html' DATA-FILE-ID=TN_APD_CSS_CL_QEX> 2005 * @see CSSStrException#check(String[]) 2006 * @see #setAV(String, String, SD) 2007 * @see #appendToAV(String, String, boolean, SD) 2008 */ 2009 public TagNode appendCSSClass(String cssClass, SD quote) 2010 { 2011 // Do a validity check on the class. If this is "problematic" due to use of specialized / 2012 // pre-processed CSS Class directives, use the general purpose "setAV(...)" method 2013 2014 CSSStrException.check(cssClass); 2015 2016 String curCSSClassSetting = AV("class"); 2017 2018 // If there wasn't a CSS Class already defined, use "setAV(...)", 2019 // otherwise use "appendToAV(...)" 2020 2021 if ((curCSSClassSetting == null) || (curCSSClassSetting.length() == 0)) 2022 return setAV("class", cssClass, quote); 2023 2024 else 2025 return appendToAV("class", cssClass + ' ', true, quote); 2026 } 2027 2028 2029 // ******************************************************************************************** 2030 // ******************************************************************************************** 2031 // Methods for "CSS Style" 2032 // ******************************************************************************************** 2033 // ******************************************************************************************** 2034 2035 2036 /** 2037 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_CSS_STYLE_DESC> 2038 * <EMBED CLASS="external-html" DATA-FILE-ID=TN_CSS_STYLE_DESCEX> 2039 * @return <EMBED CLASS="external-html" DATA-FILE-ID=TN_CSS_STYLE_RET> 2040 * @see AttrRegEx#CSS_INLINE_STYLE_REGEX 2041 * @see #AV(String) 2042 */ 2043 public Properties cssStyle() 2044 { 2045 Properties p = new Properties(); 2046 String styleStr = AV("style"); 2047 // Returns the complete attribute-value of "style" in the TagNode 2048 2049 // There was no STYLE='...' attribute found, return empty Properties. 2050 if (styleStr == null) return p; 2051 2052 // Standard String-trim routine 2053 styleStr = styleStr.trim(); 2054 2055 if (styleStr.length() == 0) return p; 2056 2057 // This reg-ex "iterates" over matches of strings that follow the (very basic) form: 2058 // declaration-name: declaration-string; 2059 // 2060 // Where the ":" (color), and ";" (semi-colon) are the only watched/monitored tokens. 2061 // See the reg-ex definition in "See Also" tag. 2062 2063 Matcher m = AttrRegEx.CSS_INLINE_STYLE_REGEX.matcher(styleStr); 2064 2065 // m.group(1): declaration-name (stuff before the ":") 2066 // m.group(2): declaration-string (stuff before the next ";", or end-of-string) 2067 // 2068 // For Example, if the style attribute-value was specified as: 2069 // STYLE="font-weight: bold; margin: 1em 1em 1em 2em; color: #0000FF" 2070 // 2071 // The returned Properties object would contain the string key-value pair elements: 2072 // "font-weight" -> "bold" 2073 // "margin" -> "1em 1em 1em 2em" 2074 // "color" -> "#0000FF" 2075 2076 while (m.find()) p.put(m.group(1).toLowerCase(), m.group(2)); 2077 2078 return p; 2079 } 2080 2081 /** 2082 * <EMBED CLASS='external-html' DATA-FILE-ID=TN_SET_CSS_STY_DESC> 2083 * @param p <EMBED CLASS='external-html' DATA-FILE-ID=TN_SET_CSS_STY_P> 2084 * @param quote <EMBED CLASS='external-html' DATA-FILE-ID=TGND_QUOTE_EXPL> 2085 * @param appendOrClobber <EMBED CLASS='external-html' DATA-FILE-ID=TN_SET_CSS_STY_AOC> 2086 * @return <EMBED CLASS='external-html' DATA-FILE-ID=TN_SET_CSS_STY_RET> 2087 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID=CTNEX> 2088 * @throws CSSStrException If there is an invalid CSS Style Property Name. 2089 * @throws QuotesException If the style-element's quotation marks are incompatible with any 2090 * and all quotation marks employed by the style-element definitions. 2091 * @see CSSStrException#VALID_CSS_CLASS_OR_NAME_TOKEN 2092 * @see #appendToAV(String, String, boolean, SD) 2093 * @see #setAV(String, String, SD) 2094 */ 2095 public TagNode setCSSStyle(Properties p, SD quote, boolean appendOrClobber) 2096 { 2097 // this is used in the "exception constructor" below (which means, it might not be used). 2098 int counter = 0; 2099 2100 // Follows the "FAIL-FAST" philosophy, and does not allow invalid CSS declaration-name 2101 // tokens. Use TagNode.setAV("style", ...), or TagNode.appendToAV("style", ...), to 2102 // bypass exception-check. 2103 2104 for (String key : p.stringPropertyNames()) 2105 2106 if (! CSSStrException.VALID_CSS_CLASS_OR_NAME_TOKEN_PRED.test(key)) 2107 { 2108 String[] keyArr = new String[p.size()]; 2109 2110 throw new CSSStrException( 2111 2112 "CSS Style Definition Property: [" + key + "] does not conform to the " + 2113 "valid, HTML 5, regular-expression for CSS Style Definitions Properties:\n[" + 2114 CSSStrException.VALID_CSS_CLASS_OR_NAME_TOKEN.pattern() + "].", 2115 2116 // One minor "PRESUMPTION" that is the Iterator will return the elements of 2117 // Properties in the EXACT SAME ORDER on both creations / instantiations of the 2118 // iterator. Specifically: two invocations of method .iterator(), will return 2119 // the same-list of property-keys, in the same order, BOTH TIMES. Once for the 2120 // for-loop, and once for the exception message. This only matters if there is 2121 // an exception. 2122 2123 p.stringPropertyNames().toArray(keyArr), 2124 ++counter 2125 ); 2126 } 2127 else ++counter; 2128 2129 // Follows the "FAIL-FAST" philosophy, and does not allow "quotes-within-quote" problems 2130 // to occur. An attribute-value surrounded by single-quotes, cannot contain a 2131 // single-quote inside, and double-within-double. 2132 2133 counter = 0; 2134 2135 for (String key : p.stringPropertyNames()) 2136 2137 if (StrCmpr.containsOR(p.get(key).toString(), ("" + quote.quote), ";")) 2138 { 2139 String[] keyArr = new String[p.size()]; 2140 2141 throw new CSSStrException( 2142 "CSS Style Definition Property: [" + key + "], which maps to style-" + 2143 "definition property-value:\n[" + p.get(key) + "], contains either a " + 2144 "semi-colon ';' character, or the same quotation-mark specified: [" + 2145 quote.quote + "], and is therefore not a valid assignment for a CSS " + 2146 "Definition Property.", 2147 2148 p .stringPropertyNames() 2149 .stream() 2150 .map((String propertyName) -> p.get(propertyName)) 2151 .toArray((int i) -> new String[i]), 2152 2153 ++counter 2154 ); 2155 } 2156 else ++counter; 2157 2158 // ERROR-CHECKING: FINISHED, NOW JUST BUILD THE ATTRIBUTE-VALUE STRING 2159 // (using StringBuilder), AND INSERT IT. 2160 2161 StringBuilder sb = new StringBuilder(); 2162 2163 for (String key : p.stringPropertyNames()) sb.append(key + ": " + p.get(key) + ";"); 2164 2165 return appendOrClobber 2166 ? appendToAV("style", " " + sb.toString(), false, quote) 2167 : setAV("style", sb.toString(), quote); 2168 } 2169 2170 2171 // ******************************************************************************************** 2172 // ******************************************************************************************** 2173 // Methods for "CSS ID" 2174 // ******************************************************************************************** 2175 // ******************************************************************************************** 2176 2177 2178 /** 2179 * Convenience Method. 2180 * <BR />Invokes: {@link #AV(String)} 2181 * <BR />Passes: {@code String "id"}, the CSS-ID attribute-<B STYLE="color: red;">name</B> 2182 * <!-- RIDICULOUS: BR />BR />IMG SRC='doc-files/img/getID.png' CLASS=JDIMG ALT='example'> --> 2183 */ 2184 public String getID() 2185 { 2186 String id = AV("ID"); 2187 return (id == null) ? null : id.trim(); 2188 } 2189 2190 /** 2191 * This merely sets the current CSS {@code 'ID'} Attribute <B STYLE="color: red;">Value</B>. 2192 * 2193 * @param id This is the new CSS {@code 'ID'} attribute-<B STYLE="color: red;">value</B> that 2194 * the user would like applied to {@code 'this'} instance of {@code TagNode}. 2195 * 2196 * @param quote <EMBED CLASS='external-html' DATA-FILE-ID=TGND_QUOTE_EXPL> 2197 * 2198 * @return Returns a new instance of {@code TagNode} that has an updated {@code 'ID'} 2199 * attribute-<B STYLE="color: red;">value</B>. 2200 * 2201 * @throws IllegalArgumentException This exception shall throw if an invalid 2202 * {@code String}-token has been passed to parameter {@code 'id'}. 2203 * 2204 * <BR /><BR /><B>BYPASS NOTE:</B> If the user would like to bypass this exception-check, for 2205 * instance because he / she is using a CSS Pre-Processor, then applying the general-purpose 2206 * method {@code TagNode.setAV("id", "some-new-id")} ought to suffice. This other method will 2207 * not apply validity checking, beyond scanning for the usual "quotes-within-quotes" problems, 2208 * which is always disallowed. 2209 * 2210 * @throws ClosingTagNodeException <EMBED CLASS="external-html" DATA-FILE-ID=CTNEX> 2211 * 2212 * @see CSSStrException#VALID_CSS_CLASS_OR_NAME_TOKEN 2213 * @see #setAV(String, String, SD) 2214 */ 2215 public TagNode setID(String id, SD quote) 2216 { 2217 if (! CSSStrException.VALID_CSS_CLASS_OR_NAME_TOKEN_PRED.test(id)) 2218 2219 throw new IllegalArgumentException( 2220 "The id parameter provide: [" + id + "], does not conform to the standard CSS " + 2221 "Names.\nEither try using the generic TagNode.setAV(\"id\", yourNewId, quote); " + 2222 "method to bypass this check, or change the value passed to the 'id' parameter " + 2223 "here." 2224 ); 2225 2226 return setAV("id", id.trim(), quote); 2227 } 2228 2229 2230 // ******************************************************************************************** 2231 // ******************************************************************************************** 2232 // Attributes that begin with "data-..." 2233 // ******************************************************************************************** 2234 // ******************************************************************************************** 2235 2236 2237 /** 2238 * Convenience Method. 2239 * <BR />Invokes: {@link #AV(String)} 2240 * <BR />Passes: {@code "data-"} prepended to parameter {@code 'dataName'} for the 2241 * attribute-<B STYLE='color:red'>name</B> 2242 */ 2243 public String dataAV(String dataName) { return AV("data-" + dataName); } 2244 2245 /** 2246 * This method will remove any HTML <B STYLE="color: red;">'data'</B> Attributes - if there are 2247 * any present. "Data Inner-Tags" are simply the attributes inside of HTML Elements whose 2248 * <B STYLE="color: red;">names</B> begin with <B STYLE="color: red;">{@code "data-"}</B>. 2249 * 2250 * <BR /><BR />Since {@code class TagNode} is immutable, a new {@code TagNode} must be 2251 * instantiated, if any data-inner-tags are removed. If no data-attributes are removed, 2252 * {@code 'this'} instance {@code TagNode} shall be returned instead. 2253 * 2254 * @return This will return a newly constructed {@code 'TagNode'} instance, if there were any 2255 * "<B STYLE="color: red;">Data</B> Attributes" that were removed by request. If the original 2256 * {@code TagNode} has remained unchanged, a reference to {@code 'this'} shall be returned. 2257 * 2258 * @throws ClosingTagNodeException This exception throws if {@code 'this'} instance of 2259 * {@code TagNode} is a closing-version of the HTML Element. Closing HTML Elements may not 2260 * have data attributes, because they simply are not intended to contain <I>any</I> attributes. 2261 */ 2262 public TagNode removeDataAttributes() 2263 { 2264 // Because this method expects to modify the TagNode, this exception-check is necessary. 2265 ClosingTagNodeException.check(this); 2266 2267 // Make sure to keep the quotes that were already used, we are removing attributes, and the 2268 // original attributes that aren't removed need to preserve their quotation marks. 2269 2270 Properties p = this.allAV(true, true); 2271 Enumeration<Object> keys = p.keys(); 2272 int oldSize = p.size(); 2273 2274 // Visit each Property Element, and remove any properties that have key-names which 2275 // begin with the word "data-" 2276 2277 while (keys.hasMoreElements()) 2278 { 2279 String key = keys.nextElement().toString(); 2280 if (key.startsWith("data-")) p.remove(key); 2281 } 2282 2283 // If any of properties were removed, we have to rebuild the TagNode, and replace it. 2284 // REMEMBER: 'null' is passed to the 'SD' parameter, because we preserved the original 2285 // quotes above. 2286 2287 return (p.size() == oldSize) 2288 ? this 2289 : new TagNode(this.tok, p, null, this.str.endsWith("/>")); 2290 } 2291 2292 /** 2293 * Convenience Method. 2294 * <BR />Invokes: {@link #getDataAV(boolean)} 2295 * <BR />Attribute-<B STYLE="color: red;">names</B> will be in lower-case 2296 */ 2297 public Properties getDataAV() { return getDataAV(false); } 2298 2299 /** 2300 * This will retrieve and return any/all <B STYLE="color: red;">'data'</B> HTML Attributes. 2301 * "Data Inner-Tags" are simply the attributes inside of HTML Elements whose 2302 * <B STYLE="color: red;">names</B> begin with <B STYLE="color: red;">{@code "data-"}</B>. 2303 * 2304 * @param preserveKeysCase When this parameter is passed <B>TRUE</B>, the case of the attribute 2305 * <B STYLE="color: red;">names</B> in the returned {@code Properties} table will have been 2306 * preserved. When <B>FALSE</B> is passed, all {@code Properties} 2307 * <B STYLE="color: red">keys</B> shall have been converted to lower-case first. 2308 * 2309 * <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_PRESERVE_C> 2310 * 2311 * @return This will return a {@code java.util.Properties} of all 2312 * <B STYLE="color: red;">data</B>-attributes which are found in {@code 'this'} HTML Element. 2313 * If no such attributes were found, 'null' shall not be returned by this method, but rather an 2314 * empty {@code Properties} instance will be provided, instead. 2315 * 2316 * @see TagNode#isClosing 2317 * @see TagNode#str 2318 * @see TagNode#tok 2319 * @see AttrRegEx#DATA_ATTRIBUTE_REGEX 2320 */ 2321 public Properties getDataAV(boolean preserveKeysCase) 2322 { 2323 Properties ret = new Properties(); 2324 2325 // NOTE: OPTIMIZED, "closing-versions" of the TagNode, and TagNode's whose 'str' 2326 // field is only longer than the token, itself, by 3 or less characters cannot have 2327 // attributes.v In that case, just return an empty 'Properties' instance. 2328 2329 if (isClosing || (str.length() <= (tok.length() + 3))) return ret; 2330 2331 // This RegEx Matcher 'matches' against Attribute/InnerTag Key-Value Pairs 2332 // ONLY PAIRS WHOSE KEY BEGINS WITH "data-" WILL MATCH 2333 // 2334 // m.group(2): returns the 'key' portion of the key-value pair, before an '=' (equals-sign) 2335 // m.group(3): returns the 'value' portion of the key-value pair, after an '=' 2336 2337 Matcher m = AttrRegEx.DATA_ATTRIBUTE_REGEX.matcher(this.str); 2338 2339 // NOTE: HTML mandates attributes must be 'case-insensitive' to the attribute 'key-part' 2340 // (but not necessarily the 'value-part') 2341 // 2342 // HOWEVER: Java does not require anything for the 'Properties' class. 2343 // ALSO: Case is PRESERVED for the 'value-part' of the key-value pair. 2344 2345 if (preserveKeysCase) 2346 while (m.find()) 2347 ret.put(m.group(2), StringParse.ifQuotesStripQuotes(m.group(3))); 2348 2349 else 2350 while (m.find()) 2351 ret.put(m.group(2).toLowerCase(), StringParse.ifQuotesStripQuotes(m.group(3))); 2352 2353 return ret; 2354 } 2355 2356 /** 2357 * Convenience Method. 2358 * <BR />Invokes: {@link #getDataAN(boolean)} 2359 * <BR />Attribute-<B STYLE="color: red;">names</B> will be in lower-case 2360 */ 2361 public Stream<String> getDataAN() { return getDataAN(false); } 2362 2363 /** 2364 * This method will only return a list of all data-attribute <B STYLE="color: red;">names</B>. 2365 * The data-attribute <B STYLE="color: red;">values</B> shall not be included in the result. 2366 * An HTML Element "data-attribute" is any attribute inside of an HTML {@code TagNode} whose 2367 * <B STYLE="color: red;">key-value</B> pair uses a <B STYLE="color: red;">key</B> that begins 2368 * with <B STYLE="color: red;">{@code "data-"}</B>... <I>It is that simple!</I> 2369 * 2370 * @param preserveKeysCase When this parameter is passed <B>TRUE</B>, the case of the attribute 2371 * <B STYLE="color: red;">names</B> that are returned by this method will have been 2372 * preserved. When <B>FALSE</B> is passed, they shall be converted to lower-case first. 2373 * 2374 * <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_PRESERVE_C> 2375 * 2376 * @return Returns an instance of {@code Stream<String>}. The attribute 2377 * <B STYLE="color: red;">names</B> that are returned, are all converted to lower-case. 2378 * 2379 * <BR /><BR />A return type of {@code Stream<String>} is used. Please see the list below for 2380 * how to convert a {@code Stream} to another data-type. 2381 * 2382 * <EMBED CLASS="external-html" DATA-FILE-ID=STRMCNVT> 2383 * 2384 * <BR /><B>NOTE:</B> This method shall never return 'null' - even if there are no 2385 * <B STYLE="color: red;">data-</B>attribute <B STYLE="color: red;">key-value</B> pairs 2386 * contained by the {@code TagNode}. If there are strictly zero such attributes, 2387 * ({@code Stream.empty()}) shall be returned, instead. 2388 * 2389 * @see #str 2390 * @see #tok 2391 * @see AttrRegEx#DATA_ATTRIBUTE_REGEX 2392 */ 2393 public Stream<String> getDataAN(boolean preserveKeysCase) 2394 { 2395 // Java Stream's can be quickly and easily converted to any data-structure the user needs. 2396 Stream.Builder<String> b = Stream.builder(); 2397 2398 // NOTE: OPTIMIZED, "closing-versions" of the TagNode, and TagNode's whose 'str' 2399 // field is only longer than the token, itself, by 3 or less characters cannot have 2400 // attributes. In that case, just return an empty 'Properties' instance. 2401 2402 if (isClosing || (str.length() <= (tok.length() + 3))) return Stream.empty(); 2403 2404 // This RegEx Matcher 'matches' against Attribute/InnerTag Key-Value Pairs 2405 // ONLY PAIRS WHOSE KEY BEGINS WITH "data-" WILL MATCH 2406 // m.group(2): returns the 'key' portion of the key-value pair, before an '=' (equals-sign). 2407 // m.group(3): returns the 'value' portion of the key-value pair, after an '=' 2408 2409 Matcher m = AttrRegEx.DATA_ATTRIBUTE_REGEX.matcher(this.str); 2410 2411 // NOTE: HTML mandates attributes must be 'case-insensitive' to the attribute 'key-part' 2412 // (but not necessarily the 'value-part') 2413 // HOWEVER: Java does not require anything for the 'Properties' class. 2414 // ALSO: Case is PRESERVED for the 'value-part' of the key-value pair. 2415 2416 if (preserveKeysCase) while (m.find()) b.accept(m.group(2)); 2417 else while (m.find()) b.accept(m.group(2).toLowerCase()); 2418 2419 return b.build(); 2420 } 2421 2422 2423 // ******************************************************************************************** 2424 // ******************************************************************************************** 2425 // Java Methods 2426 // ******************************************************************************************** 2427 // ******************************************************************************************** 2428 2429 2430 /** 2431 * This does a "longer version" of the parent {@code toString()} method. This is because it 2432 * also parses and prints inner-tag <B STYLE="color: red;">key-value</B> pairs. The ordinary 2433 * {@code public String toString()} method that is inherited from parent {@code class HTMLNode} 2434 * will just return the value of {@code class HTMLNode} field: {@code public final String str}. 2435 * 2436 * @return A String with the inner-tag <B STYLE="color: red;">key-value</B> pairs specified. 2437 * 2438 * <DIV CLASS="EXAMPLE">{@code 2439 * // The following code, would output the text below 2440 * TagNode tn = new TagNode("<BUTTON CLASS='MyButtons' ONCLICK='MyListener();'>"); 2441 * System.out.println(tn.toStringAV()); 2442 * 2443 * // Outputs the following Text: 2444 * 2445 * // TagNode.str: [<BUTTON class='MyButtons' onclick='MyListener();'>], TagNode.tok: [button], 2446 * // TagNode.isClosing: [false] 2447 * // CONTAINS a total of (2) attributes / inner-tag key-value pairs: 2448 * // (KEY, VALUE): [onclick], [MyListener();] 2449 * // (KEY, VALUE): [class], [MyButtons] 2450 * }</DIV> 2451 * 2452 * @see #allAV() 2453 * @see #tok 2454 * @see #isClosing 2455 * @see HTMLNode#toString() 2456 */ 2457 public String toStringAV() 2458 { 2459 StringBuilder sb = new StringBuilder(); 2460 2461 // Basic information. This info is common to ALL instances of TagNode 2462 sb.append( 2463 "TagNode.str: [" + this.str + "], TagNode.tok: [" + this.tok + "], " + 2464 "TagNode.isClosing: [" + this.isClosing + "]\n" 2465 ); 2466 2467 // Not all instances of TagNode will have attributes. 2468 Properties attributes = this.allAV(false, true); 2469 2470 sb.append( 2471 "CONTAINS a total of (" + attributes.size() + ") attributes / inner-tag " + 2472 "key-value pairs" + (attributes.size() == 0 ? "." : ":") + "\n" 2473 ); 2474 2475 // If there are inner-tags / attributes, then add them to the output-string, each on a 2476 // separate line. 2477 2478 for (String key : attributes.stringPropertyNames()) 2479 sb.append("(KEY, VALUE):\t[" + key + "], [" + attributes.get(key) + "]\n"); 2480 2481 // Build the string from the StringBuilder, and return it. 2482 return sb.toString(); 2483 } 2484 2485 /** 2486 * Java's {@code interface Cloneable} requirements. This instantiates a new {@code TagNode} 2487 * with identical <SPAN STYLE="color: red;">{@code String str}</SPAN> fields, and also 2488 * identical <SPAN STYLE="color: red;">{@code boolean isClosing}</SPAN> and 2489 * <SPAN STYLE="color: red;">{@code String tok}</SPAN> fields. 2490 * 2491 * @return A new {@code TagNode} whose internal fields are identical to this one. 2492 */ 2493 public TagNode clone() { return new TagNode(str); } 2494 2495 /** 2496 * This sorts by: 2497 * 2498 * <BR /><BR /><OL CLASS="JDOL"> 2499 * <LI> by {@code String 'tok'} fields character-order (ASCII-alphabetical). 2500 * <BR />The following {@code final String 'tok'} fields are ASCII ordered: 2501 * {@code 'a', 'button', 'canvas', 'div', 'em', 'figure' ...} 2502 * </LI> 2503 * <LI>then (if {@code 'tok'} fields are equal) by the {@code public final boolean 'isClosing'} 2504 * field. <BR />{@code TagNode's} that have a {@code 'isClosing'} set to <B>FALSE</B> come 2505 * before {@code TagNode's} whose {@code 'isClosing'} field is set to <B>TRUE</B> 2506 * </LI> 2507 * <LI> finally, if the {@code 'tok'} and {@code 'isClosing'} fields are equal, they are 2508 * sorted by <I>the integer-length of</I> {@code final String 'str'} field. 2509 * </LI> 2510 * </OL> 2511 * 2512 * @param n Any other {@code TagNode} to be compared to {@code 'this' TagNode} 2513 * 2514 * @return An integer that fulfils Java's 2515 * {@code interface Comparable<T> public boolean compareTo(T t)} method requirements. 2516 * 2517 * @see #tok 2518 * @see #isClosing 2519 * @see #str 2520 */ 2521 public int compareTo(TagNode n) 2522 { 2523 // Utilize the standard "String.compare(String)" method with the '.tok' string field. 2524 // All 'tok' fields are stored as lower-case strings. 2525 int compare1 = this.tok.compareTo(n.tok); 2526 2527 // Comparison #1 will be non-zero if the two TagNode's being compared had different 2528 // .tok fields 2529 if (compare1 != 0) return compare1; 2530 2531 // If the '.tok' fields were the same, use the 'isClosing' field for comparison instead. 2532 // This comparison will only be used if they are different. 2533 if (this.isClosing != n.isClosing) return (this.isClosing == false) ? -1 : 1; 2534 2535 // Finally try using the entire element '.str' String field, instead. 2536 return this.str.length() - n.str.length(); 2537 } 2538 2539 2540 // ******************************************************************************************** 2541 // ******************************************************************************************** 2542 // UpperCase, LowerCase 2543 // ******************************************************************************************** 2544 // ******************************************************************************************** 2545 2546 // public TagNode toUpperCase(boolean b) { return null; } 2547 // public TagNode toLowerCase(boolean b) { return null; } 2548 2549 /** 2550 * Return a capitalized (upper-case) instance of the {@code String}-Contents of this 2551 * {@code TagNode}. 2552 * 2553 * <BR /><BR />The user has the option of capitalizing the Tag-Name only, or the Tag-Name 2554 * and the Attribute-<B STYLE='color: red;'>Name's</B>. 2555 * 2556 * <BR /><BR />White-space shall remain unchanged by this method. 2557 * 2558 * @param justTag_Or_TagAndAttributeNames When this parameter is passed {@code TRUE}, only the 2559 * Element-Name will be converted to Upper-Case. This is the {@link #tok} field of this 2560 * {@code TagNode}. 2561 * 2562 * <BR /><BR />If this parameter receives {@code FALSE}, then 2563 * <B STYLE='color: red;'><I>BOTH</I></B> the Tag-Name <B STYLE='color: red;'><I>AND</I></B> 2564 * the Attribute-Names are capitalized. 2565 * 2566 * <!-- 2567 * NOTE: THIS CODE BREAKS COMMAND 'javadoc', AND I DON'T KNOW WHY... 2568 * 2569 * <BR /><BR />The follwing example will (hopefully) elucidate the output of this method. 2570 * 2571 * <DIV CLASS=EXAMPLE>{@code 2572 * TagNode tn = new TagNode("<a href='http://some.url.com/Some/Case/Sensitive/DIR'>"); 2573 * 2574 * System.out.println(tn.toUpperCase(true)); 2575 * // Prints: "<A href='http://some.url.com/Some/Case/Sensitive/DIR'>" 2576 * // NOTE: Only the 'a' is upper-case, the 'href' attribute is still lower-case 2577 * 2578 * System.out.println(tn.toUpperCase(false)); 2579 * // Prints: "<A HREF='http://some.url.com/Some/Case/Sensitive/DIR'>" 2580 * // NOTE: The Tag-Name and all Attribute-Names are now upper-case. 2581 * // ALSO: The HREF-URL (an Attribute-Value) has remained unchanged. 2582 * }<DIV> 2583 * --> 2584 * 2585 * @return A capitalized version of {@code 'this'} instance. Only the Tag-Name, or the 2586 * Tag-Name and the Attribute-<B STYLE='color: red;'>Name's</B> will be capitalized. This 2587 * method will leave Attributte-<B STYLE='color: red;'>Values</B> unmodified. 2588 * 2589 * <BR /><BR />All spacing characters and (again) Attribute-<B STYLE='color: red;'>Values</B> 2590 * will remain unchanged. 2591 * 2592 * <BR /><BR /><B CLASS=JDRedLabel>'DATA-' ATTRIBUTES:</B> 2593 * 2594 * <BR />HTML {@code 'data-*'} Attributes are Inner-Tags that allow a programmer to pass 2595 * 'values', of one kind or another, that have a name to a Web-Browser or client. Since there 2596 * is the possibility that the 'name' provided is case-sensitive, this method will not alter the 2597 * name text that apears after the {@code 'data-'} portion of the 2598 * Attribute-<B STYLE='color: red;'>Name</B> for HTML Data-Attributes. 2599 */ 2600 public TagNode toUpperCase(boolean justTag_Or_TagAndAttributeNames) 2601 { 2602 if (justTag_Or_TagAndAttributeNames) return new TagNode( 2603 this.isClosing 2604 ? ("</" + this.tok.toUpperCase() + this.str.substring(2 + this.tok.length())) 2605 : ('<' + this.tok.toUpperCase() + this.str.substring(1 + this.tok.length())) 2606 ); 2607 2608 StringBuilder sb = new StringBuilder(); 2609 Matcher m = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 2610 2611 sb.append(this.isClosing ? "</" : "<"); 2612 sb.append(this.tok.toUpperCase()); 2613 2614 // Skip over the opening '<' and the Tag-Name 2615 int pos = this.tok.length() + 1; 2616 2617 // If this was a "Closing Tag", remember to skip the opeing '/' 2618 if (this.isClosing) pos++; 2619 2620 // Here, the Key-Value (Attribue-Name & Attribute-Value) pairs are iterated. Care is 2621 // taken to ensure that only the names (not the values) are modified. 2622 2623 while (m.find()) 2624 { 2625 // Apppend white-space that occurs **BEFORE** the Name-Value Pair 2626 // 2627 // NOTE: The 'toUpperCase' here will catch any Attribute-Name-Only Attributes 2628 // (also known as "Boolean-Attributes") 2629 2630 sb.append(this.str.substring(pos, m.start(2)).toUpperCase()); 2631 2632 // Append the Attribute-Name, and make sure to Capitalize it. First this needs to be 2633 // retrieved, and (more importantly), do not capitalize the actual name-part of 2634 // "data-" Attributes, their case **COULD POSSIBLY** be important... (They are for the 2635 // EmbedTag Parameters Data-Attributes) 2636 2637 String attrName = m.group(2).toUpperCase(); 2638 2639 sb.append( 2640 StrCmpr.startsWithIgnoreCase(attrName, "data-") 2641 ? ("DATA-" + attrName.substring(5).toUpperCase()) 2642 : attrName.toUpperCase() 2643 ); 2644 2645 // Append the Attribute-Value, and update the 'pos' variable to reflect where 2646 // in the String the current Match-Location ends... 2647 // 2648 // NOTE: m.end(2) and m.end() are the exact same values, since this regex's 2649 // group #2 ends at the very-end of the regex pattern. 2650 2651 sb.append(this.str.substring(pos = m.end(2), pos)); 2652 } 2653 2654 // ALWAYS: After the last match of a RegEx, remember to append any text that occurs 2655 // after the last match. This is also quite important in the HTML-Parser 2656 // not to forget this line. 2657 2658 sb.append(this.str.substring(pos)); 2659 2660 // Return the new TagNode 2661 return new TagNode(sb.toString()); 2662 } 2663 2664 /** 2665 * Return a de-capitalized (Lower-case) instance of the {@code String}-Contents of this 2666 * {@code TagNode}. 2667 * 2668 * <BR /><BR />The user has the option of de-capitalizing the Tag-Name only, or the Tag-Name 2669 * and the Attribute-<B STYLE='color: red;'>Name's</B>. 2670 * 2671 * <BR /><BR />White-space shall remain unchanged by this method. 2672 * 2673 * @param justTag_Or_TagAndAttributeNames When this parameter is passed {@code TRUE}, only the 2674 * Element-Name will be converted to Lower-Case. (The '{@link #tok}'' field of this 2675 * {@code TagNode} is changed) 2676 * 2677 * <BR /><BR />If this parameter receives {@code FALSE}, then 2678 * <B STYLE='color: red;'><I>BOTH</I></B> the Tag-Name <B STYLE='color: red;'><I>AND</I></B> 2679 * the Attribute-Names are de-capitalized. 2680 * 2681 * <!-- 2682 * THIS BREAKS JAVADOC, FOR SOME ODD REASON... 2683 * 2684 * <BR /><BR />The follwing example will (hopefully) elucidate the output of this method. 2685 * 2686 * <DIV CLASS=EXAMPLE>{@code 2687 * TagNode tn = new TagNode("<DIV CLASS=MyMainClass1>"); 2688 * 2689 * System.out.println(tn.toLowerCase(true)); 2690 * // Prints: "<div CLASS=MyMainClass1>" 2691 * // NOTE: Only the 'div' is lower-case, the 'class' attribute is still upper-case 2692 * 2693 * System.out.println(tn.toLowerCase(false)); 2694 * // Prints: "<div class=MyMainClass1>" 2695 * // NOTE: The Tag-Name and all Attribute-Names are now lower-case. 2696 * // ALSO: The CSS-Class Name (an Attribute-Value) has remained unchanged. 2697 * }<DIV> 2698 * --> 2699 * 2700 * @return A lower-case version of {@code 'this'} instance. Only the Tag-Name, or the 2701 * Tag-Name and the Attribute-<B STYLE='color: red;'>Name's</B> will be de-capitalized. This 2702 * method will leave Attributte-<B STYLE='color: red;'>Values</B> unmodified. 2703 * 2704 * <BR /><BR />All spacing characters and (again) Attribute-<B STYLE='color: red;'>Values</B> 2705 * will remain unchanged. 2706 * 2707 * <BR /><BR /><B CLASS=JDRedLabel>'DATA-' ATTRIBUTES:</B> 2708 * 2709 * <BR />HTML {@code 'data-*'} Attributes are Inner-Tags that allow a programmer to pass 2710 * 'values', of one kind or another, that have a name to a Web-Browser or client. Since there 2711 * is the possibility that the 'name' provided is case-sensitive, this method will not alter the 2712 * name text that apears after the {@code 'data-'} portion of the 2713 * Attribute-<B STYLE='color: red;'>Name</B> for HTML Data-Attributes. 2714 */ 2715 public TagNode toLowerCase(boolean justTag_Or_TagAndAttributeNames) 2716 { 2717 if (justTag_Or_TagAndAttributeNames) return new TagNode( 2718 this.isClosing 2719 ? ("</" + this.tok.toLowerCase() + this.str.substring(2 + this.tok.length())) 2720 : ('<' + this.tok.toLowerCase() + this.str.substring(1 + this.tok.length())) 2721 ); 2722 2723 StringBuilder sb = new StringBuilder(); 2724 Matcher m = AttrRegEx.KEY_VALUE_REGEX.matcher(this.str); 2725 2726 sb.append(this.isClosing ? "</" : "<"); 2727 sb.append(this.tok.toLowerCase()); 2728 2729 // Skip over the opening '<' and the Tag-Name 2730 int pos = this.tok.length() + 1; 2731 2732 // If this was a "Closing Tag", remember to skip the opeing '/' 2733 if (this.isClosing) pos++; 2734 2735 // Here, the Key-Value (Attribue-Name & Attribute-Value) pairs are iterated. Care is 2736 // taken to ensure that only the names (not the values) are modified. 2737 2738 while (m.find()) 2739 { 2740 // Apppend white-space that occurs **BEFORE** the Name-Value Pair 2741 // 2742 // NOTE: The 'toLowerCase' here will catch any Attribute-Name-Only Attributes 2743 // (also known as "Boolean-Attributes") 2744 2745 sb.append(this.str.substring(pos, m.start(2)).toLowerCase()); 2746 2747 // Append the Attribute-Name, and make sure to De-Capitalize it. First this needs to 2748 // be retrieved, and (more importantly), do not modify the actual name-part of "data-" 2749 // Attributes, their case **COULD POSSIBLY** be important... (They are for the EmbedTag 2750 // Parameters Data-Attributes) 2751 2752 String attrName = m.group(2).toLowerCase(); 2753 2754 sb.append( 2755 StrCmpr.startsWithIgnoreCase(attrName, "data-") 2756 ? ("DATA-" + attrName.substring(5).toLowerCase()) 2757 : attrName.toLowerCase() 2758 ); 2759 2760 // Append the Attribute-Value, and update the 'pos' variable to reflect where 2761 // in the String the current Match-Location ends... 2762 // 2763 // NOTE: m.end(2) and m.end() are the exact same values, since this regex's 2764 // group #2 ends at the very-end of the regex pattern. 2765 2766 sb.append(this.str.substring(pos = m.end(2), pos)); 2767 } 2768 2769 // ALWAYS: After the last match of a RegEx, remember to append any text that occurs 2770 // after the last match. This is also quite important in the HTML-Parser 2771 // not to forget this line. 2772 2773 sb.append(this.str.substring(pos)); 2774 2775 // Return the new TagNode 2776 return new TagNode(sb.toString()); 2777 } 2778 2779 2780 // ******************************************************************************************** 2781 // ******************************************************************************************** 2782 // Internally used Regular Expressions, (STATIC FIELDS INSIDE STATIC CLASS) 2783 // ******************************************************************************************** 2784 // ******************************************************************************************** 2785 2786 2787 /** 2788 * Regular-Expressions that are used by both the parsing class {@link HTMLPage}, and class 2789 * {@link TagNode} for searching HTML tags for attributes and even data. 2790 * 2791 * <BR /><BR /><EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_ATTR_REGEX> 2792 */ 2793 public static final class AttrRegEx 2794 { 2795 private AttrRegEx() { } 2796 2797 /** 2798 * <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_REGEX_KV> 2799 * @see TagNode#allAV(boolean, boolean) 2800 */ 2801 public static final Pattern KEY_VALUE_REGEX = Pattern.compile( 2802 "(?:\\s+?" + // mandatory leading white-space 2803 "(([\\w-]+?)=(" + // inner-tag name (a.k.a. 'key' or 'attribute-name') 2804 "'[^']*?'" + "|" + // inner-tag value using single-quotes ... 'OR' 2805 "\"[^\"]*?\"" + "|" + // inner-tag value using double-quotes ... 'OR' 2806 "[^\"'>\\s]*" + // inner-tag value without quotes 2807 ")))", 2808 Pattern.CASE_INSENSITIVE | Pattern.DOTALL 2809 ); 2810 2811 /** 2812 * A {@code Predicate<String>} Regular-Expression. 2813 * @see #KEY_VALUE_REGEX 2814 */ 2815 public static final Predicate<String> KEY_VALUE_REGEX_PRED = 2816 KEY_VALUE_REGEX.asPredicate(); 2817 2818 /** 2819 * <B CLASS=JDRedLabel>Legacy Regular Expression:</B> 2820 * 2821 * <BR />This RegEx was originall used by the method {@link #AV(String)}, but no longer is. 2822 * This isn't being deprecated because it still serves the purpose of showing how the HTML 2823 * Tags in this class are stored. 2824 * 2825 * <BR /><BR /><B CLASS=JDRedLabel>Capture Group:</B> 2826 * 2827 * <BR />This Regular-Expression has a single set of parenthesis <I>(and therefore only one 2828 * Capture-Group!)</I>. Notice that that group practically includes the entire RegEx - all 2829 * except the very first equals-sign located at the first character of the {@code String}. 2830 * 2831 * @see TagNode#AV(String) 2832 */ 2833 public static final Pattern QUOTES_AND_VALUE_REGEX = Pattern.compile( 2834 // Matches, for example: ='MyClass' or ="MyClass" or =MyClass 2835 "=(" + 2836 "\"[^\"]*?\"" + "|" + // inner-tag value using single-quotes ... 'OR' 2837 "'[^']*?'" + "|" + // inner-tag value using double-quotes ... 'OR' 2838 "[\\w-]+" + // inner-tag value without quotes 2839 ")", 2840 Pattern.DOTALL 2841 ); 2842 2843 /** 2844 * A {@code Predicate<String>} Regular-Expression. 2845 * @see #QUOTES_AND_VALUE_REGEX 2846 */ 2847 public static final Predicate<String> QUOTES_AND_VALUE_REGEX_PRED = 2848 QUOTES_AND_VALUE_REGEX.asPredicate(); 2849 2850 /** 2851 * This matches all valid attribute-<B STYLE="color: red;">keys</B> <I>(not values)</I> of 2852 * HTML Element <B STYLE="color: red;">key-value pairs</B>. 2853 * 2854 * <BR /><BR /><UL CLASS="JDUL"> 2855 * <LI> <B>PART-1:</B> {@code [A-Za-z_]} The first character must be a letter or the 2856 * underscore. 2857 * </LI> 2858 * <LI> <B>PART-2:</B> {@code [A-Za-z0-9_-]} All other characters must be alpha-numeric, 2859 * the dash {@code '-'}, or the underscore {@code '_'}. 2860 * </LI> 2861 * </UL> 2862 * 2863 * @see InnerTagKeyException#check(String[]) 2864 * @see #allKeyOnlyAttributes(boolean) 2865 */ 2866 public static final Pattern ATTRIBUTE_KEY_REGEX = 2867 Pattern.compile("^[A-Za-z_][A-Za-z0-9_-]*$"); 2868 2869 /** 2870 * A {@code Predicate<String>} Regular-Expression. 2871 * @see #ATTRIBUTE_KEY_REGEX 2872 */ 2873 public static final Predicate<String> ATTRIBUTE_KEY_REGEX_PRED = 2874 ATTRIBUTE_KEY_REGEX.asPredicate(); 2875 2876 /** 2877 * <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_REGEX_DATA> 2878 * @see TagNode#getDataAN() 2879 * @see TagNode#getDataAV() 2880 */ 2881 public static final Pattern DATA_ATTRIBUTE_REGEX = Pattern.compile( 2882 // regex will match, for example: data-src="https://cdn.imgur.com/MyImage.jpg" 2883 "(?:\\s+?" + // mandatory leading white-space 2884 "(data-([\\w-]+?)=" + // data inner-tag name 2885 "(" + "'[^']*?'" + "|" + // inner-tag value using single-quotes ... 'OR' 2886 "\"[^\"]*?\"" + "|" + // inner-tag value using double-quotes ... 'OR 2887 "[^\"'>\\s]*" + // inner-tag value without quotes 2888 ")))", 2889 Pattern.CASE_INSENSITIVE | Pattern.DOTALL 2890 ); 2891 2892 /** 2893 * A {@code Predicate<String>} Regular-Expression. 2894 * @see #DATA_ATTRIBUTE_REGEX 2895 */ 2896 public static final Predicate<String> DATA_ATTRIBUTE_REGEX_PRED = 2897 DATA_ATTRIBUTE_REGEX.asPredicate(); 2898 2899 /** 2900 * <EMBED CLASS="external-html" DATA-FILE-ID=TAGNODE_REGEX_CSS> 2901 * @see TagNode#cssStyle() 2902 */ 2903 public static final Pattern CSS_INLINE_STYLE_REGEX = Pattern.compile( 2904 // regex will match, for example: font-weight: bold; 2905 2906 // CSS Style Property Name - Must begin with letter or underscore 2907 "([_\\-a-zA-Z]+" + "[_\\-a-zA-Z0-9]*)" + 2908 2909 // The ":" symbol between property-name and property-value 2910 "\\s*?" + ":" + "\\s*?" + 2911 2912 // CSS Style Property Value 2913 "([^;]+?\\s*)" + 2914 2915 // text after the "Name : Value" definition 2916 "(;|$|[\\w]+$)" 2917 ); 2918 2919 /** 2920 * A {@code Predicate<String>} Regular-Expression. 2921 * @see #CSS_INLINE_STYLE_REGEX 2922 */ 2923 public static final Predicate<String> CSS_INLINE_STYLE_REGEX_PRED = 2924 CSS_INLINE_STYLE_REGEX.asPredicate(); 2925 } 2926 2927}