001package Torello.HTML; 002 003import java.util.*; 004import java.util.stream.*; 005 006import Torello.Java.LV; 007import Torello.Java.StrFilter; 008import Torello.Java.StringParse; 009import Torello.Java.Additional.Ret2; 010import Torello.Java.Function.IntTFunction; 011 012/** 013 * Utilities for getting, setting and removing attributes from the {@link TagNode} elements in a 014 * Web-Page {@code Vector}. 015 * 016 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=ATTRIBUTES> 017 * @see AUM 018 */ 019@Torello.JavaDoc.StaticFunctional 020public class Attributes 021{ 022 private Attributes() { } 023 024 025 // *************************************************************************************** 026 // *************************************************************************************** 027 // Update Attributes 028 // *************************************************************************************** 029 // *************************************************************************************** 030 031 032 /** 033 * Convenience Method. 034 * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}. 035 * <BR />Passes: Simple Update Lambda that <B>always</B> assigns {@code 'itValue'} to the 036 * Attribute 037 * <BR />Iterates: The entire {@code html}-page, Passes {@code 0, -1} to {@code sPos, ePos} 038 */ 039 public static int[] update 040 (Vector<? super TagNode> html, AUM mode, String innerTag, final String itValue, SD quote) 041 { 042 return update 043 (html, mode, 0, -1, innerTag, (int index, TagNode tn) -> itValue, quote); 044 } 045 046 /** 047 * Convenience Method. 048 * <BR />Receives: {@code DotPair} 049 * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}. 050 * <BR />Passes: Simple Update Lambda that <B>always</B> assigns {@code 'itValue'} to the 051 * Attribute 052 * <BR />Iterates: The {@code html}-page from {@code dp.start} (inclusive) to {@code dp.end} 053 * (also inclusive) 054 */ 055 public static int[] update( 056 Vector<? super TagNode> html, AUM mode, DotPair dp, 057 String innerTag, final String itValue, SD quote 058 ) 059 { 060 return update( 061 html, mode, dp.start, dp.end + 1, innerTag, 062 (int index, TagNode tn) -> itValue, quote 063 ); 064 } 065 066 /** 067 * Convenience Method. 068 * <BR />Receives: An Attribute-Update Lambda-Function {@code 'newITValueStrGetter'} 069 * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}. 070 * <BR />Iterates: The entire {@code html}-page, Passes {@code 0, -1} to {@code sPos, ePos} 071 */ 072 public static int[] update( 073 Vector<? super TagNode> html, AUM mode, 074 String innerTag, IntTFunction<TagNode, String> newITValueStrGetter, SD quote 075 ) 076 { return update(html, mode, 0, -1, innerTag, newITValueStrGetter, quote); } 077 078 /** 079 * Convenience Method. 080 * <BR />Receives: {@code DotPair} 081 * <BR />And-Receives: An Attribute-Update Lambda-Function {@code 'newITValueStrGetter'} 082 * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}. 083 * <BR />Iterates: The {@code html}-page from {@code dp.start} (inclusive) to {@code dp.end} 084 * (also inclusive) 085 */ 086 public static int[] update( 087 Vector<? super TagNode> html, AUM mode, DotPair dp, 088 String innerTag, IntTFunction<TagNode, String> newITValueStrGetter, SD quote 089 ) 090 { return update(html, mode, dp.start, dp.end + 1, innerTag, newITValueStrGetter, quote); } 091 092 /** 093 * Convenience Method. 094 * <BR />Receives: HTML-{@code Vector} starting & ending indices 095 * ({@code sPos} and {@code ePos}). 096 * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}. 097 * <BR />Passes: Simple Update Lambda that <B>always</B> assigns {@code 'itValue'} to the 098 * Attribute 099 * <BR />Iterates: The {@code html}-page from {@code sPos} (inclusive) to {@code ePos} 100 * (exclusive) 101 */ 102 public static int[] update( 103 @SuppressWarnings("unchecked") Vector<? super TagNode> html, AUM mode, 104 int sPos, int ePos, String innerTag, final String itValue, SD quote 105 ) 106 { return update(html, mode, sPos, ePos, innerTag, (int index, TagNode tn) -> itValue, quote); } 107 108 /** 109 * Will update any HTML {@code TagNode's} present in the vector-parameter {@code 'html'} 110 * according to passed <B>{@code AUM}</B> mode and the {@code 'innerTag'} parameter. 111 * 112 * <EMBED CLASS='external-html' DATA-PROC_TYPE=update DATA-FILE-ID=ATTR_RESTRICT_SE_POS> 113 * 114 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 115 * @param mode <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_MODE_PARAM> 116 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 117 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 118 * 119 * @param innerTag This is the <B STYLE="color: red;">name</B> of the HTML attribute that needs 120 * to be changed, added, or removed. 121 * 122 * @param newITValueStrGetter <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_NEW_ITSTR_FUNC> 123 * 124 * @param quote <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_QUOTE_PARAM> 125 * 126 * @return This method shall return an integer-{@code array} index-list whose values identify 127 * which HTML {@code Vector} Elements were changed as a result of this method invocation. 128 * 129 * <BR /><BR /><B>NOTE:</B> One minor subtlety, there could be cases where a new HTML Element 130 * {@code 'TagNode'} reference / object were instantiated or 'created,' even though the actual 131 * {@code String} that comprised the {@code HTMLNode} itself were identical to the original 132 * {@code HTMLNode.str String}. In the {@code 'AUM'} enumerated-type, when {@code AUM.Set} 133 * is invoked, the original {@code String} data for an attribute is always clobbered, even in 134 * cases where an identical version of the {@code String} is replaced or substituted. 135 * 136 * @throws QuotesException <EMBED CLASS='external-html' DATA-FILE-ID=QEX> 137 * @throws InnerTagKeyException <EMBED CLASS='external-html' DATA-FILE-ID=ITKEYEX> 138 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 139 * 140 * @see AUM#update(TagNode, String, String, SD) 141 * @see LV 142 * @see TagNode#isTagNode() 143 * @see TagNode#isClosing 144 */ 145 public static int[] update( 146 @SuppressWarnings("unchecked") Vector<? super TagNode> html, AUM mode, 147 int sPos, int ePos, String innerTag, IntTFunction<TagNode, String> newITValueStrGetter, 148 SD quote 149 ) 150 { 151 InnerTagKeyException.check(innerTag); 152 153 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 154 IntStream.Builder b = IntStream.builder(); 155 156 // This optimization is the same as the one in TagNode.openTagPWA(). However, that method 157 // cannot be used here, becaue for AUM.set, zero-attribute TagNode's **ALSO** have to be 158 // updated. So this is re-implemented here. 159 160 int MIN = 3 + innerTag.length(); 161 162 // Loop Variables 163 LV l = new LV(sPos, ePos, html); 164 TagNode tn; 165 166 for (int i=l.start; i < l.end; i++) 167 168 // Only instances of Opening-TagNodes need to be checked - All others should be skipped 169 if ((tn = ((HTMLNode) html.elementAt(i)).openTag()) != null) 170 171 // AUM.Set does not require the attribute to already exist 172 // **OR** Check for minimum possible str-length to have the attribute at all. 173 174 if ((mode == AUM.Set) || (tn.str.length() >= (MIN + tn.tok.length()))) 175 176 // If AUM.update returns a **NEW** (non-null) TagNode, replace the old one. 177 // Make sure to use the User-Provided Function-Pointer 178 179 if ((tn = mode.update(tn, innerTag, newITValueStrGetter.apply(i, tn), quote)) != null) 180 { 181 // Replace the old TagNode 182 html.setElementAt(tn, i); 183 184 // Make sure to keep the index where it resides, to return to the user 185 b.accept(i); 186 } 187 188 // Build the IntStream, Convert the IntStream -> int[], Return it. 189 return b.build().toArray(); 190 } 191 192 /** 193 * Convenience Method. 194 * <BR />Receives: An {@code int[]}-Array which identifes which nodes in the {@code Vector} to update. 195 * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}. 196 * <BR />Passes: Simple Update Lambda that <B>always</B> assigns {@code 'itValue'} to the 197 * Attribute 198 * <BR />Iterates: All {@code Vector}-indices pointed to by the values in {@code 'posArr'} 199 */ 200 public static int[] update( 201 Vector<? super TagNode> html, AUM mode, int[] posArr, 202 String innerTag, final String itValue, SD quote 203 ) 204 { return update(html, mode, posArr, innerTag, (int index, TagNode tn) -> itValue, quote); } 205 206 /** 207 * Will update any HTML {@code TagNode's} present in the vector-parameter {@code 'html'} 208 * according to a passed <B>{@code 'AUM'}</B> mode and the {@code 'innerTag'} parameter. 209 * 210 * <EMBED CLASS='external-html' DATA-PROC_TYPE=update DATA-FILE-ID=ATTR_RESTRICT_POSARR> 211 * 212 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 213 * @param mode <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_MODE_PARAM> 214 * @param posArr <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM> 215 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_UPDATE_AUM_EXAMPLE> 216 * 217 * @param innerTag This is the <B STYLE="color: red;">name</B> of the HMTL attribute that needs 218 * to be changed, added, or removed. 219 * 220 * @param newITValueStrGetter <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_NEW_ITSTR_FUNC> 221 * 222 * @param quote <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_QUOTE_PARAM> 223 * 224 * @return This method shall return an integer-{@code array} index-list whose values identify 225 * which HTML {@code Vector} Elements were changed as a result of this method invokation. 226 * 227 * <BR /><BR /><B>NOTE:</B> One minor subtlety, there could be cases where a new HTML Element 228 * {@code 'TagNode'} reference / object were instantiated or 'created,' even though the actual 229 * {@code String} that comprised the {@code HTMLNode} itself were identical to the original 230 * {@code HTMLNode.str String}. In the {@code 'AUM'} enumerated-type, when {@code AUM.Set} 231 * is invoked, the original {@code String} data for an attribute is always clobbered, even in 232 * cases where an identical version of the {@code String} is replaced or substituted. 233 * 234 * @throws QuotesException <EMBED CLASS='external-html' DATA-FILE-ID=QEX> 235 * @throws InnerTagKeyException <EMBED CLASS='external-html' DATA-FILE-ID=ITKEYEX> 236 * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX> 237 * 238 * @throws OpeningTagNodeExpectedException 239 * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX> 240 * @throws ArrayIndexOutOfBoundsException 241 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX> 242 * 243 * @see AUM#update(TagNode, String, String, SD) 244 * @see TagNode#isTagNode() 245 * @see TagNode#isClosing 246 */ 247 public static int[] update( 248 Vector<? super TagNode> html, AUM mode, int[] posArr, 249 String innerTag, IntTFunction<TagNode, String> newITValueStrGetter, SD quote 250 ) 251 { 252 InnerTagKeyException.check(innerTag); 253 254 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 255 IntStream.Builder b = IntStream.builder(); 256 257 // minimum possible length to have an attribute at all. 258 // '<', TOKEN, SPACE, ATTRIBUTE, '>' 259 260 int MIN = 3 + innerTag.length(); 261 262 for (int i : posArr) 263 { 264 HTMLNode n = (HTMLNode) html.elementAt(i); 265 266 if (! n.isTagNode()) throw new TagNodeExpectedException(i); 267 268 TagNode tn = (TagNode) n; 269 270 if (tn.isClosing) throw new OpeningTagNodeExpectedException(i); 271 272 // AUM.Set *DOES NOT* require the attribute to exist already (the other *DO*) 273 if (mode != AUM.Set) 274 275 // Minimum length of this element before it even could have the named inner-tag 276 // '<', TOKEN, SPACE, ATTRIBUTE, '=', '>' 277 278 if (tn.str.length() < (MIN + tn.tok.length())) continue; 279 280 // Make sure to use the User-Provided Function-Pointer 281 tn = mode.update(tn, innerTag, newITValueStrGetter.apply(i, tn), quote); 282 283 // if 'tn' is non-null ==> an update *WAS* performed 284 if (tn != null) 285 { 286 // Replace the old TagNode 287 html.setElementAt(tn, i); 288 289 // Make sure to keep the index where it resides, to return to the user 290 b.accept(i); 291 } 292 } 293 294 // Build the IntStream, Convert the IntStream -> int[], Return it. 295 return b.build().toArray(); 296 } 297 298 299 // *************************************************************************************** 300 // *************************************************************************************** 301 // Remove All Attributes 302 // *************************************************************************************** 303 // *************************************************************************************** 304 305 306 /** 307 * Convenience Method. 308 * <BR />Invokes: {@link #removeAll(Vector, int, int)} 309 */ 310 public static int[] removeAll(Vector<? super TagNode> html) 311 { return removeAll(html, 0, -1); } 312 313 /** 314 * Convenience Method. 315 * <BR />Receives: {@code DotPair} 316 * <BR />Invokes: {@link #removeAll(Vector, int, int)} 317 */ 318 public static int[] removeAll(Vector<? super TagNode> html, DotPair dp) 319 { return removeAll(html, dp.start, dp.end + 1); } 320 321 /** 322 * The purpose of this method is to remove all attributes / Inner-Tag 323 * <B STYLE="color: red;">key-value pairs</B> from each and every non-{@code 'TextNode'} and 324 * non-{@code 'CommentNode'} HTML Element found on the vectorized-html page parameter 325 * {@code 'html'}. The removal process is limited to the range specified by method-parameters 326 * {@code sPos, ePos.} 327 * 328 * <BR /><BR /><B CLASS=JDDescLabel>Attribute Removal Specifics:</B> 329 * 330 * <BR />This method will remove each and every {@code class=... id=... src=... alt=...} 331 * {@code href=... onclick=... etc...} attribute from all {@link TagNode}-instances whose 332 * {@code Vector}-index location inside {@code 'html'} falls between {@code 'sPos'} and 333 * {@code 'ePos'}. 334 * 335 * <BR /><BR />When this method exists, all {@link TagNode} instances inside {@code 'html'} 336 * that fall within the specified sub-range will be attribute-free. 337 * 338 * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_SE_POS> 339 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_REMOVEALL_EXAMPLE1> 340 * 341 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 342 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 343 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 344 * 345 * @return An integer array of {@code 'Vector'}-index positions / locations for each and every 346 * HTML {@code 'TagNode'} whose attributes have been removed. 347 * 348 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 349 * 350 * @see TagNode#removeAllAV 351 * @see TagNode#isTagNode() 352 * @see TagNode#isClosing 353 * @see LV 354 */ 355 public static int[] removeAll 356 (@SuppressWarnings("unchecked") Vector<? super TagNode> html, int sPos, int ePos) 357 { 358 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 359 IntStream.Builder b = IntStream.builder(); 360 361 // Loop Variables 362 LV l = new LV(sPos, ePos, html); 363 TagNode tn; 364 365 for (int i=l.start; i < l.end; i++) 366 367 // REQUIREMENTS: Only Opening-TagNodes 368 // If element-length = tok-length+2, there are no attributes: '<', TOKEN, '>' 369 370 if ((tn = ((HTMLNode) html.elementAt(i)).openTag()) != null) 371 372 if (tn.str.length() > (tn.tok.length() + 2)) 373 { 374 // Replace the old TagNode 375 html.setElementAt(tn.removeAllAV(), i); 376 377 // Make sure to keep the index where it resides, to return to the user 378 b.accept(i); 379 } 380 381 // Build the IntStream, Convert the IntStream -> int[], Return it. 382 return b.build().toArray(); 383 } 384 385 /** 386 * The purpose of this method is to remove all attributes / Inner-Tag 387 * <B STYLE="color: red;">key-value pairs</B> from each and every non-{@code 'TextNode'} and 388 * non-{@code 'CommentNode'} HTML Element found on the vectorized-html page parameter 389 * {@code 'html'}. The removal process is limited to the only removing attributes from elements 390 * pointed to by the contents of passed-parameter {@code 'posArr'} 391 * 392 * <BR /><BR /><B CLASS=JDDescLabel>Attribute Removal Specifics:</B> 393 * 394 * <BR />This method will remove each and every {@code class=... id=... src=... alt=...} 395 * {@code href=... onclick=... etc...} attribute from all {@link TagNode}-instances whose 396 * {@code Vector}-index location within {@code 'html'} are indices among those listed by 397 * the index-list {@code int[]}-Array {@code 'posArr'}. 398 * 399 * <BR /><BR />When this method exits, all {@link TagNode} instances inside {@code 'html'} 400 * specified by {@code 'posArr'} will be attribute-free. 401 * 402 * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_POSARR> 403 * 404 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 405 * @param posArr <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM> 406 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_REMOVEALL_EXAMPLE2> 407 * 408 * @return An integer array of {@code 'Vector'}-index positions / locations for each and every 409 * HTML {@code 'TagNode'} whose attributes have been removed. 410 * 411 * @throws ArrayIndexOutOfBoundsException 412 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX> 413 * @throws OpeningTagNodeExpectedException 414 * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX> 415 * 416 * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX> 417 * 418 * @see TagNode#removeAllAV() 419 * @see TagNode#isTagNode() 420 * @see TagNode#isClosing 421 */ 422 public static int[] removeAll(Vector<? super TagNode> html, int[] posArr) 423 { 424 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 425 IntStream.Builder b = IntStream.builder(); 426 427 for (int i : posArr) 428 { 429 HTMLNode n = (HTMLNode) html.elementAt(i); 430 431 if (! n.isTagNode()) throw new TagNodeExpectedException(i); 432 433 TagNode tn = (TagNode) n; 434 435 if (tn.isClosing) throw new OpeningTagNodeExpectedException(i); 436 437 // If element-length = tok-length+2, there are no attributes! 438 // Otherwise, replace the old TagNode with a new, empty, one 439 // Make sure to keep the index where it resides, to return to the user 440 441 if (tn.str.length() > (tn.tok.length() + 2)) 442 { 443 html.setElementAt(tn.removeAllAV(), i); 444 b.accept(i); 445 } 446 } 447 448 // Build the IntStream, Convert the IntStream -> int[], Return it. 449 return b.build().toArray(); 450 } 451 452 453 // *************************************************************************************** 454 // *************************************************************************************** 455 // Remove Data-Attributes 456 // *************************************************************************************** 457 // *************************************************************************************** 458 459 460 /** 461 * Convenience Method. 462 * <BR />Invokes: {@link #removeData(Vector, int, int)} 463 */ 464 public static int[] removeData(Vector<? super TagNode> html) 465 { return removeData(html, 0, -1); } 466 467 /** 468 * Convenience Method. 469 * <BR />Receives: {@code DotPair} 470 * <BR />Invokes: {@link #removeData(Vector, int, int)} 471 */ 472 public static int[] removeData(Vector<? super TagNode> html, DotPair dp) 473 { return removeData(html, dp.start, dp.end + 1); } 474 475 /** 476 * The purpose of this method is to remove all HTML <B STYLE="color: red;">data</B>-attribute 477 * <B STYLE="color: red;">key-value</B> pairs from {@code 'TagNode'} Elements contained inside 478 * parameter {@code 'html'}. 479 * 480 * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_SE_POS> 481 * 482 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 483 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 484 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 485 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 486 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_DATA_ATTR_RET_NOTE> 487 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL> 488 * 489 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 490 * 491 * @see TagNode#removeDataAttributes() 492 * @see TagNode#isTagNode() 493 * @see TagNode#isClosing 494 * @see LV 495 */ 496 public static int[] removeData(Vector<? super TagNode> html, int sPos, int ePos) 497 { 498 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 499 IntStream.Builder b = IntStream.builder(); 500 501 // Loop Counter & Temporary Variables 502 LV l = new LV(sPos, ePos, html); 503 TagNode tn, newTN; 504 505 for (int i=l.start; i < l.end; i++) 506 507 // Only instances of Opening-TagNodes, possibly with attributes 508 if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null) 509 510 // A "new" TagNode is *only returned* if the "data-attributes" were removed. 511 if ((newTN = tn.removeDataAttributes()) != tn) 512 { 513 html.setElementAt(newTN, i); // Replace the old TagNode 514 b.accept(i); // Make sure to keep the index where it resides 515 } // Method returns list of modified node's 516 517 // Build the IntStream, Convert the IntStream -> int[], Return it. 518 return b.build().toArray(); 519 } 520 521 /** 522 * The purpose of this method is to remove all HTML <B STYLE="color: red;">data</B>-attribute 523 * <B STYLE="color: red;">key-value</B> pairs from {@code 'TagNode'} Elements contained inside 524 * parameter {@code 'html'}. 525 * 526 * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_POSARR> 527 * 528 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 529 * @param posArr <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM> 530 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_DATA_ATTR_EXAMPLE> 531 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 532 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_DATA_ATTR_RET_NOTE> 533 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL> 534 * 535 * @throws ArrayIndexOutOfBoundsException 536 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX> 537 * @throws OpeningTagNodeExpectedException 538 * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX> 539 * 540 * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX> 541 * 542 * @see TagNode#removeDataAttributes() 543 * @see TagNode#isTagNode() 544 * @see TagNode#isClosing 545 */ 546 public static int[] removeData(Vector<? super TagNode> html, int[] posArr) 547 { 548 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 549 IntStream.Builder b = IntStream.builder(); 550 551 // Minimum Length of TagNode.str to even have a "data-*=" attribute 552 // '<', HTML-TOKEN, SPACE, "data-*", '>' 553 554 int MIN = 9; 555 556 for (int i: posArr) 557 { 558 HTMLNode n = (HTMLNode) html.elementAt(i); 559 560 if (! n.isTagNode()) throw new TagNodeExpectedException(i); 561 562 TagNode tn = (TagNode) n; 563 564 if (tn.isClosing) throw new OpeningTagNodeExpectedException(i); 565 566 // Minimum Length of TagNode.str to even have a "data-*=" attribute 567 if (tn.str.length() < (tn.tok.length() + MIN)) continue; 568 569 TagNode newTN = tn.removeDataAttributes(); 570 571 // A "new" TagNode is *only returned* by this method if the "data-attributes" were 572 // removed. If new, replace the old TagNode 573 574 if (newTN != tn) 575 { 576 html.setElementAt(newTN, i); 577 578 // Make sure to keep the index where it resides, so it may be returned to the user 579 b.accept(i); 580 } 581 } 582 583 // Build the IntStream, Convert the IntStream -> int[], Return it. 584 return b.build().toArray(); 585 } 586 587 588 // *************************************************************************************** 589 // *************************************************************************************** 590 // Remove Specified Attributes 591 // *************************************************************************************** 592 // *************************************************************************************** 593 594 595 /** 596 * Convenience Method. 597 * <BR />Invokes: {@link #remove(Vector, int, int, String[])} 598 */ 599 public static int[] remove(Vector<? super TagNode> html, String... innerTags) 600 { return remove(html, 0, -1, innerTags); } 601 602 /** 603 * Convenience Method. 604 * <BR />Receives: {@code DotPair} 605 * <BR />Invokes: {@link #remove(Vector, int, int, String[])} 606 */ 607 public static int[] remove(Vector<? super TagNode> html, DotPair dp, String... innerTags) 608 { return remove(html, dp.start, dp.end + 1, innerTags); } 609 610 /** 611 * This will remove all copies of the attributes whose <B STYLE="color: red;">names</B> are 612 * listed among the by {@code String[]} array parameter {@code 'innerTags'} from the 613 * vectorized-html web-page parameter {@code 'html'}. 614 * 615 * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_SE_POS> 616 * 617 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 618 * @param innerTags <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INNERTAGS_PARAM> 619 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_SE_RESTRICT_REM> 620 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 621 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 622 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 623 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_VARARGS_S_RET_NOTE> 624 * 625 * @throws InnerTagKeyException <EMBED CLASS='external-html' DATA-FILE-ID=ITKEYEX> 626 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 627 * @throws IllegalArgumentException If parameter {@code 'innerTags'} has zero elements. 628 * 629 * @see TagNode#removeAttributes(String[]) 630 * @see LV 631 * @see TagNode#hasOR(boolean, String[]) 632 * @see TagNode#isTagNode() 633 * @see TagNode#isClosing 634 * @see InnerTagKeyException#check(String[]) 635 */ 636 public static int[] remove 637 (Vector<? super TagNode> html, int sPos, int ePos, String... innerTags) 638 { 639 InnerTagKeyException.check(innerTags); 640 641 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 642 IntStream.Builder b = IntStream.builder(); 643 644 // Loop Counter & Temporary Variables 645 LV l = new LV(sPos, ePos, html); 646 TagNode tn; 647 648 for (int i=l.start; i < l.end; i++) 649 650 // Only instances of Opening-TagNodes, possibly with attributes 651 if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null) 652 653 // If this TagNode has the attributes that have been requested for removal, then... 654 if (tn.hasOR(false, innerTags)) 655 { 656 // Build a new TagNode, and then replace the old one with the newly built one 657 // on the page or sub-page, and at the same location. 658 659 tn = tn.removeAttributes(innerTags); 660 html.setElementAt(tn, i); 661 662 // Java's IntStream-Builder is just a way to "build" a short list of integer's. 663 // At the end of this method, the list will be built and returned to the user. 664 // It shall contain all Vector locations where a "TagNode swap" (replaced 665 // TagNode, with attributes filtered) has occurred. 666 667 b.accept(i); 668 } 669 670 // Build the IntStream, Convert the IntStream -> int[], Return it. 671 return b.build().toArray(); 672 } 673 674 /** 675 * This will remove all copies of the attributes whose <B STYLE="color: red;">names</B> are 676 * listed among the by {@code String[]} array parameter {@code 'innerTags'} from the 677 * vectorized-html web-page parameter {@code 'html'}. 678 * 679 * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_POSARR> 680 * 681 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 682 * @param innerTags <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INNERTAGS_PARAM> 683 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_PA_RESTRICT_REM> 684 * @param posArr <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM> 685 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_VARARGS_S_EXAMPLE> 686 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 687 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_VARARGS_S_RET_NOTE> 688 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL> 689 * 690 * @throws InnerTagKeyException 691 * <EMBED CLASS='external-html' DATA-FILE-ID=ITKEYEX> 692 * @throws ArrayIndexOutOfBoundsException 693 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX> 694 * @throws OpeningTagNodeExpectedException 695 * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX> 696 * 697 * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX> 698 * @throws IllegalArgumentException If parameter {@code 'innerTags'} has zero elements. 699 * 700 * @see TagNode#removeAttributes(String[]) 701 * @see TagNode#hasOR(boolean, String[]) 702 * @see TagNode#isTagNode() 703 * @see TagNode#isClosing 704 * @see InnerTagKeyException#check(String[]) 705 */ 706 public static int[] remove(Vector<? super TagNode> html, int[] posArr, String... innerTags) 707 { 708 InnerTagKeyException.check(innerTags); 709 710 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 711 IntStream.Builder b = IntStream.builder(); 712 713 // Compute the "minimum length" of a TagNode.str field 714 int MIN = 1000; 715 716 // Minimum-Length of TagNode.str would have to be 3 + smallest inner-tag passed 717 for (String attrib : innerTags) if (attrib.length() < MIN) MIN = attrib.length(); 718 MIN += 3; 719 720 for (int i : posArr) 721 { 722 HTMLNode n = (HTMLNode) html.elementAt(i); 723 724 if (! n.isTagNode()) throw new TagNodeExpectedException(i); 725 726 TagNode tn = (TagNode) n; 727 728 if (tn.isClosing) throw new OpeningTagNodeExpectedException(i); 729 730 // If element-length <= MIN, none of the attributes could possibly be present. 731 if (tn.str.length() < (tn.tok.length() + MIN)) continue; 732 733 // If this TagNode has the attributes that have been requested for removal, then... 734 if (tn.hasOR(false, innerTags)) 735 { 736 // Build a new TagNode, and then replace the old one with the newly built one 737 // on the page or sub-page, and at the same location. 738 739 tn = tn.removeAttributes(innerTags); 740 html.setElementAt(tn, i); 741 742 // Java's IntStream-Builder is just a way to "build" a short list of integer's. 743 // At the end of this method, the list will be built and returned to the user. 744 // It shall contain all Vector locations where a "TagNode swap" (replaced 745 // TagNode, with attributes filtered) has occurred. 746 747 b.accept(i); 748 } 749 } 750 751 // Build the IntStream, Convert the IntStream -> int[], Return it. 752 return b.build().toArray(); 753 } 754 755 756 // *************************************************************************************** 757 // *************************************************************************************** 758 // Retrieve Attributes 759 // *************************************************************************************** 760 // *************************************************************************************** 761 762 763 /** 764 * Convenience Method. 765 * <BR />Invokes: {@link #retrieve(Vector, int, int, String)} 766 */ 767 public static Ret2<int[], String[]> retrieve(Vector<? super TagNode> html, String attribute) 768 { return retrieve(html, 0, -1, attribute); } 769 770 /** 771 * Convenience Method. 772 * <BR />Receives: {@code DotPair} 773 * <BR />Invokes: {@link #retrieve(Vector, int, int, String)} 774 */ 775 public static Ret2<int[], String[]> retrieve 776 (Vector<? super TagNode> html, DotPair dp, String attribute) 777 { return retrieve(html, dp.start, dp.end + 1, attribute); } 778 779 /** 780 * The purpose of this method is to retrieve the <B STYLE="color: red">value</B> of each 781 * attribute in each {@code TagNode} in an HTML {@code Vector} (or sub-{@code Vector}) that 782 * contained such an attribute. 783 * 784 * <EMBED CLASS='external-html' DATA-PROC_TYPE=retrieval DATA-FILE-ID=ATTR_RESTRICT_SE_POS> 785 * 786 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 787 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 788 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 789 * @param attribute <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_ATTR_RETR_SE_PARAM> 790 * 791 * @throws InnerTagKeyException If the attribute <B STYLE="color: red;">name</B> passed to 792 * this parameter does not contain the <B STYLE="color: red;">name</B> of a valid HTML5 793 * attribute, then this exception shall throw. 794 * 795 * @throws IndexOutOfBoundsException 796 * <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 797 * 798 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_RETRIEVE_SE_RET> 799 * 800 * @see TagNode#AV(String) 801 * @see TagNode#isTagNode() 802 * @see TagNode#isClosing 803 * @see InnerTagKeyException#check(String[]) 804 * @see LV 805 */ 806 public static Ret2<int[], String[]> retrieve 807 (Vector<? super TagNode> html, int sPos, int ePos, String attribute) 808 { 809 InnerTagKeyException.check(attribute); 810 811 // Use a Java Int-Stream. Save matches here (vector-position) 812 IntStream.Builder posB = IntStream.builder(); 813 814 // Use a Java Stream<String>. Save attribute-values here 815 Stream.Builder<String> strB = Stream.builder(); 816 817 // Temp Variables & Loop Variable 818 LV l = new LV(sPos, ePos, html); 819 TagNode tn; 820 String attribValue; 821 822 for (int i=l.start; i < l.end; i++) 823 824 // Only Visit Open TagNode Elements with '.str' long enough to contain attributes 825 if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null) 826 827 // If the Open-Tag does not have the attribute, skip the node. If it does, save it. 828 if ((attribValue = tn.AV(attribute)) != null) 829 { 830 posB.accept(i); // Save the vector-index position of the TagNode 831 strB.accept(attribValue); // Save the Attribute-Value inside that TagNode 832 } 833 834 // Java Stream's shall build the arrays. Put them into an instance of Ret2, and return 835 return new Ret2<>(posB.build().toArray(), strB.build().toArray(String[]::new)); 836 } 837 838 /** 839 * This shall visit each {@link TagNode} indicated by the {@code int[]}-Array parameter 840 * {@code 'posArr'}), and then query those {@code TagNode's} for the 841 * Attribute-<B STYLE="color: red;">value</B> of the attribute named by 842 * {@code String}-Parameter {@code 'attribute'} 843 * 844 * <BR /><BR />The <B STYLE="color: red;">value</B> of each of these attributes will be 845 * recorded to a parallel {@code String}-array and returned. This {@code String[]} array shall 846 * be parallel to the input {@code Vector}-index {@code 'posArr'} parameter. 847 * 848 * <EMBED CLASS='external-html' DATA-PROC_TYPE=retrieval DATA-FILE-ID=ATTR_RESTRICT_POSARR> 849 * 850 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 851 * 852 * @param posArr This shall be a list of {@code Vector}-indices that contain opening 853 * {@code TagNode} elements. The <B STYLE="color: red;">value</B> of the attribute provided by 854 * parameter {@code 'attribute'} will be returned in a parallel {@code String[]} array for each 855 * {@code TagNode} identified by {@code 'posArr'}. 856 * 857 * @param attribute <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_ATTR_RETR_PA_PARAM> 858 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_RETRIEVE_PA_RET> 859 * 860 * @throws InnerTagKeyException If the {@code String} provided to parameter {@code 'attribute'} 861 * is not a valid HTML-5 attribute-<B STYLE="color: red;">name</B>, then this exception shall 862 * thow. 863 * 864 * @throws ArrayIndexOutOfBoundsException 865 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX> 866 * @throws OpeningTagNodeExpectedException 867 * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX> 868 * 869 * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX> 870 * 871 * @see InnerTagKeyException#check(String[]) 872 * @see TagNode#AV(String) 873 * @see TagNode#isTagNode() 874 * @see TagNode#isClosing 875 */ 876 public static String[] retrieve(Vector<? super TagNode> html, int[] posArr, String attribute) 877 { 878 InnerTagKeyException.check(attribute); 879 880 // Return Array, and its corresponding array-index pointer. 881 String[] ret = new String[posArr.length]; 882 int i = 0; 883 884 // Minimum length of the TagNode.str to even have the specified attribute 885 // '<', TOKEN, SPACE, INNERTAG, '=', '>' 886 887 int MIN = 4 + attribute.length(); 888 889 for (int pos: posArr) 890 { 891 HTMLNode n = (HTMLNode) html.elementAt(pos); 892 893 if (! n.isTagNode()) throw new TagNodeExpectedException(pos); 894 895 TagNode tn = (TagNode) n; 896 897 if (tn.isClosing) throw new OpeningTagNodeExpectedException(pos); 898 899 ret[i++] = (tn.str.length() < (tn.tok.length() + MIN)) 900 901 ? null // CASE-1: TagNode.str is too short to even have the attribute 902 : tn.AV(attribute); // CASE-2: Possibly has it: Save the result of TagNode.AV(...) 903 } 904 905 return ret; 906 } 907 908 909 // *************************************************************************************** 910 // *************************************************************************************** 911 // Functional Interface Filter 912 // *************************************************************************************** 913 // *************************************************************************************** 914 915 916 /** 917 * Lambda-target for creating attribute-filters. 918 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTRIBUTES_FILTER> 919 */ 920 @FunctionalInterface 921 public interface Filter 922 { 923 /** 924 * This receives the contents of a {@code 'TagNode'} - after the html-tag and the 925 * inner-tags have been extracted. This method is intended to be used to selectively 926 * remove specific inner-tags / attributes that the programmer would like to see removed. 927 * 928 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=FUNC_INTER_METH> 929 * 930 * @param htmlTag When this method is implemented by a class, or by a lambda-expression, 931 * the user will receive a copy of a TagNode's {@code TagNode.tok} field through this 932 * parameter. The class or lambda-expression which implements method 933 * {@code 'filter(...)'} may use the {@code String} that is passed via the 934 * {@code 'htmlTag'} parameter to, possibly - if needed, help decide which attributes to 935 * remove from the {@code java.util.Properties} parameter {@code 'attributes'} 936 * 937 * @param attributes When this method, {@code 'filter(...)'}, is implemented by a class or 938 * a lambda-expression, he or she is tasked with eliminating any attributes in this 939 * {@code Properties} class that he wishes to filter. 940 * 941 * <BR /><BR /><B><SPAN STYLE="color: red;">NOTE:</B></SPAN> The 942 * <B STYLE="color: red;">key-value</B> pairs of this {@code java.util.Properties} method 943 * are generated by calling 944 * {@link TagNode#allAV(boolean, boolean)}. The <B STYLE="color: red;">values</B> 945 * returned by that method will all have their original quotation-marks included in the 946 * <CODE><B STYLE="color: red;">'value'</B> String</CODE>. 947 * 948 * <BR /><BR /><B>ALSO:</B> This class is intended to function as a filter, and should be 949 * used to remove property <B STYLE="color: red;">key-value</B> pairs from the attributes 950 * parameter received here. However, there is nothing stopping the programmer from 951 * modifying the contents by adding properties, or even changing the 952 * <B STYLE="color: red;">values</B> of the properties. 953 * 954 * @return This method must return a boolean indicating whether or not the attributes 955 * parameter has been changed in any way. If {@code FALSE} were returned, but the class or 956 * lambda-expression which implements this method has modified the attributes 957 * {@code Properties} instance, the changes that were made would be lost, and the 958 * vectorized-html page that contained the {@code TagNode} wouldn't be updated with the 959 * new {@code TagNode}. 960 * 961 * @see TagNode#allAV(boolean, boolean) 962 * @see TagNode#tok 963 */ 964 public boolean filter(String htmlTag, Properties attributes); 965 } 966 967 968 // *************************************************************************************** 969 // *************************************************************************************** 970 // Use BiPredicate to Filter Attributes 971 // *************************************************************************************** 972 // *************************************************************************************** 973 974 975 /** 976 * Convenience Method. 977 * <BR />Invokes: {@link #update(Vector, int, int, Filter)} 978 */ 979 public static int[] update(Vector<? super TagNode> html, Filter f) 980 { return update(html, 0, -1, f); } 981 982 /** 983 * Convenience Method. 984 * <BR />Receives: {@code DotPair} 985 * <BR />Invokes: {@link #update(Vector, int, int, Filter)} 986 */ 987 public static int[] update(Vector<? super TagNode> html, DotPair dp, Filter f) 988 { return update(html, dp.start, dp.end + 1, f); } 989 990 /** 991 * Modifies the contents of each instance of a {@code 'TC.OpeningTags'} element found in the 992 * input {@code Vector}. The type of update that's performed is defined by the parameter 993 * {@code Filter 'f'}. Each time a {@code TagNode} found in the input vectorized-html web-page, 994 * or html sub-list, is changed or modified the, original {@code TagNode} will be removed and 995 * replaced by a new, modified {@code TagNode} instance. 996 * 997 * <EMBED CLASS='external-html' DATA-PROC_TYPE=filtering DATA-FILE-ID=ATTR_RESTRICT_SE_POS> 998 * 999 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 1000 * @param f <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_PARAM> 1001 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_SE_RESTRICT_REM> 1002 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 1003 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 1004 * 1005 * @throws InnerTagKeyException <EMBED CLASS='external-html' DATA-FILE-ID=ITKEX> 1006 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 1007 * @throws QuotesException <EMBED CLASS='external-html' DATA-FILE-ID=QEX> 1008 * 1009 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 1010 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_RET_NOTE> 1011 * 1012 * @see TagNode#allAV(boolean, boolean) 1013 * @see TagNode#isTagNode() 1014 * @see TagNode#isClosing 1015 * @see LV 1016 */ 1017 public static int[] update(Vector<? super TagNode> html, int sPos, int ePos, Filter f) 1018 { 1019 // Save Modified node-locations in a java stream (Use the "Primitive Int Stream") 1020 IntStream.Builder b = IntStream.builder(); 1021 1022 // Temp-Loop Variables 1023 LV l = new LV(sPos, ePos, html); 1024 Properties p; 1025 TagNode tn; 1026 1027 for (int i=l.start; i < l.end; i++) 1028 1029 if ( 1030 // Only Opening TagNode's that could possibly have attributes. 1031 ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null) 1032 1033 // Retrieve all Attribute Key-Value Pairs. Take note of surrounding quotes. 1034 && ((p = tn.allAV(true, true)).size() > 0) 1035 1036 // Run the provided filter logic, if it returns TRUE, then build new TagNode 1037 && f.filter(tn.tok, p) 1038 ) 1039 { 1040 // This makes sure not to leave out any possible "boolean" (a.k.a "Key Only") 1041 // attributes when we rebuild the new TagNode. An example of a "boolean" attribute 1042 // in HTML is "HIDDEN" which is a key that does not require any value to convey its 1043 // purpose or function. Sometimes web-page designers might type "HIDDENT=TRUE", 1044 // but it is not necessary. In any case, the "allAV(boolean, boolean)" method only 1045 // returns attributes that have BOTH a 'key' AND a 'value'. 1046 1047 List<String> keyOnly = tn.allKeyOnlyAttributes(true).collect(Collectors.toList()); 1048 1049 // Build a new TagNode, then replace the old one 1050 tn = new TagNode(tn.tok, p, keyOnly, null, tn.str.endsWith("/>")); 1051 html.setElementAt(tn, i); 1052 1053 // Save the vector-index where a replacement has occurred. The user will be 1054 // provided a list of all locations where an old TagNode was replaced with a new one. 1055 1056 b.accept(i); 1057 } 1058 1059 // Build the IntStream, Convert the IntStream -> int[], Return it. 1060 return b.build().toArray(); 1061 } 1062 1063 /** 1064 * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input 1065 * {@code Vector}. The type of filter performed is defined by the parameter 1066 * {@code Filter 'f'}. Each time a {@code TagNode} in the input vectorized-html web-page, or 1067 * html sub-list, is changed or modified the original {@code TagNode} will be removed and 1068 * replaced by a new, updated or modified {@code TagNode} instance. 1069 * 1070 * <EMBED CLASS='external-html' DATA-PROC_TYPE=filtering DATA-FILE-ID=ATTR_RESTRICT_POSARR> 1071 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_EXAMPLE> 1072 * 1073 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 1074 * @param f <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_PARAM> 1075 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_PA_RESTRICT_REM> 1076 * @param posArr <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM> 1077 * 1078 * @throws ArrayIndexOutOfBoundsException 1079 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX> 1080 * @throws OpeningTagNodeExpectedException 1081 * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX> 1082 * 1083 * @throws InnerTagKeyException <EMBED CLASS='external-html' DATA-FILE-ID=ITKEX> 1084 * @throws QuotesException <EMBED CLASS='external-html' DATA-FILE-ID=QEX> 1085 * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX> 1086 * 1087 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 1088 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_RET_NOTE> 1089 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL> 1090 * 1091 * @see TagNode#allAV(boolean, boolean) 1092 * @see TagNode#isTagNode() 1093 * @see TagNode#isClosing 1094 */ 1095 public static int[] update(Vector<? super TagNode> html, int[] posArr, Filter f) 1096 { 1097 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 1098 IntStream.Builder b = IntStream.builder(); 1099 1100 for (int i: posArr) 1101 { 1102 HTMLNode n = (HTMLNode) html.elementAt(i); 1103 1104 if (! n.isTagNode()) throw new TagNodeExpectedException(i); 1105 1106 TagNode tn = (TagNode) n; 1107 1108 if (tn.isClosing) throw new OpeningTagNodeExpectedException(i); 1109 1110 // If element-length < tok-length+5, there are no attributes! 1111 // '<', TOKEN, SPACE, ATTRIBUTE<MIN-1>, '=', '>' 1112 1113 if (tn.str.length() < (tn.tok.length() + 5)) continue; 1114 1115 // Retrieve all Attribute Key-Value Pairs. 1116 Properties p = tn.allAV(true, true); 1117 1118 // This makes sure not to leave out any possible "boolean" (a.k.a "Key Only") 1119 // attributes when we rebuild the new TagNode. An example of a "boolean" attribute 1120 // in HTML is "HIDDEN" which is a key that does not require any value to convey its 1121 // purpose or function. Sometimes web-page designers might type "HIDDENT=TRUE", but 1122 // it is not necessary. In any case, the "allAV(boolean, boolean)" method only returns 1123 // attributes that have BOTH a 'key' AND a 'value'. 1124 1125 List<String> keyOnly = tn.allKeyOnlyAttributes(true).collect(Collectors.toList()); 1126 1127 // Run the provided filter logic, if it returns TRUE, then build new TagNode 1128 if ((p.size() > 0) && f.filter(tn.tok, p)) 1129 { 1130 // Build a new TagNode, and replace the old one. 1131 tn = new TagNode(tn.tok, p, keyOnly, null, tn.str.endsWith("/>")); 1132 html.setElementAt(tn, i); 1133 1134 // Save the vector-index where a replacement has occured. The user will be 1135 // provided a list of all locations where an old TagNode was replaced with a 1136 // new one. 1137 1138 b.accept(i); 1139 } 1140 } 1141 1142 // Build the IntStream, Convert the IntStream -> int[], Return it. 1143 return b.build().toArray(); 1144 } 1145 1146 1147 // *************************************************************************************** 1148 // *************************************************************************************** 1149 // Use Attribute White-Lists to Filter Attributes 1150 // *************************************************************************************** 1151 // *************************************************************************************** 1152 1153 1154 /** 1155 * Convenience Method. 1156 * <BR />Invokes: {@link #filter(Vector, int, int, String[])} 1157 */ 1158 public static int[] filter(Vector<? super TagNode> html, String... innerTagWhiteList) 1159 { return filter(html, 0, -1, innerTagWhiteList); } 1160 1161 /** 1162 * Convenience Method. 1163 * <BR />Receives: {@code DotPair} 1164 * <BR />Invokes: {@link #filter(Vector, int, int, String[])} 1165 */ 1166 public static int[] filter 1167 (Vector<? super TagNode> html, DotPair dp, String... innerTagWhiteList) 1168 { return filter(html, dp.start, dp.end + 1, innerTagWhiteList); } 1169 1170 /** 1171 * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input 1172 * {@code Vector} using an attribute {@code 'white-list'}. All input-{@code Vector TagNode's} 1173 * that have attributes whose <B STYLE="color: red;">names</B> are not members of the inner-tag 1174 * {@code white-list} will be removed, and a new {@code TagNode} whose only attributes are 1175 * members of the innerTag {@code white-list} will replace the old {@code TagNode}. 1176 * 1177 * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_SE_POS> 1178 * 1179 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 1180 * @param innerTagWhiteList <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_LIST_PARAM> 1181 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_SE_RESTRICT_REM> 1182 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 1183 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 1184 * 1185 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 1186 * 1187 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 1188 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_L_RET_NOTE> 1189 * 1190 * @see TagNode#allAN(boolean, boolean) 1191 * @see TagNode#isTagNode() 1192 * @see TagNode#removeAttributes(String[]) 1193 * @see TagNode#isClosing 1194 * @see LV 1195 */ 1196 public static int[] filter 1197 (Vector<? super TagNode> html, int sPos, int ePos, String... innerTagWhiteList) 1198 { 1199 TreeSet<String> whiteList = new TreeSet<>(); 1200 1201 // Java Streams keep a list of which TagNode's were changed 1202 IntStream.Builder b = IntStream.builder(); 1203 1204 // Build the tree-set with the contents of the list. Trim them, convert to lower-case 1205 // 1206 // REMEMBER: Internally, attribute key-value pairs are returned in a java.util.Properties 1207 // instance. This Properties instance always has keys in lower case format. 1208 1209 for (String attribute: innerTagWhiteList) whiteList.add(attribute.trim().toLowerCase()); 1210 1211 // Loop Variables, Temp Variables 1212 LV l = new LV(sPos, ePos, html); 1213 Vector<String> attrToRemove = new Vector<>(); 1214 TagNode tn; 1215 1216 for (int i=l.start; i < l.end; i++) 1217 1218 if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null) 1219 { 1220 // Will keep the list of attributes that didn't pass the white-list 1221 attrToRemove.clear(); 1222 1223 // List of all attributes in the TagNode, as a String-Array 1224 String[] allAN = tn.allAN(true, true).toArray(String[]::new); 1225 1226 for (String attribute : allAN) 1227 if (! whiteList.contains(attribute)) 1228 attrToRemove.addElement(attribute); 1229 1230 // if there were attributes that didn't pass... 1231 if (attrToRemove.size() > 0) 1232 { 1233 // Build a new TagNode, and then replace the old one with the newly built one 1234 // on the page or sub-page, and at the same location. 1235 // NOTE: 'removeAttributes' needs a var-args String-Array, not a Vector<String> 1236 1237 tn = tn.removeAttributes(attrToRemove.toArray(StringParse.EMPTY_STR_ARRAY)); 1238 html.setElementAt(tn, i); 1239 1240 // Java's IntStream-Builder is just a way to "build" a short list of integer's. 1241 // This lists has all Vector locations where a "TagNode swap" has occurred. 1242 1243 b.accept(i); 1244 } 1245 } 1246 1247 // Build the IntStream, Convert the IntStream -> int[], Return it. 1248 return b.build().toArray(); 1249 } 1250 1251 /** 1252 * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input 1253 * {@code Vector} using an attribute {@code 'white-list'}. All input-{@code Vector TagNode's} 1254 * that have attributes whose <B STYLE="color: red;">names</B> are not members of the inner-tag 1255 * {@code white-list} will be removed, and a new {@code TagNode} whose only attributes are 1256 * members of the innerTag {@code white-list} will replace the old {@code TagNode}. 1257 * 1258 * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_POSARR> 1259 * 1260 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 1261 * @param innerTagWhiteList <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_LIST_PARAM> 1262 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_PA_RESTRICT_REM> 1263 * @param posArr <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM> 1264 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_LIST_EXAMPLE> 1265 * 1266 * @throws ArrayIndexOutOfBoundsException 1267 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX> 1268 * @throws TagNodeExpectedException 1269 * <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX> 1270 * @throws OpeningTagNodeExpectedException 1271 * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX> 1272 * 1273 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 1274 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_L_RET_NOTE> 1275 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL> 1276 * 1277 * @see TagNode#allAN(boolean, boolean) 1278 * @see TagNode#removeAttributes(String[]) 1279 * @see TagNode#isTagNode() 1280 * @see TagNode#isClosing 1281 */ 1282 public static int[] filter 1283 (Vector<? super TagNode> html, int[] posArr, String... innerTagWhiteList) 1284 { 1285 TreeSet<String> whiteList = new TreeSet<>(); 1286 1287 // Java Streams to keep a list of vector-indices that were updated. 1288 IntStream.Builder b = IntStream.builder(); 1289 1290 // Build the tree-set with the contents of the list. Trim them, convert to lower-case 1291 // 1292 // REMEMBER: Internally, attribute key-value pairs are returned in a java.util.Properties 1293 // instance. This Properties instance always has keys in lower case format. 1294 1295 for (String attribute: innerTagWhiteList) whiteList.add(attribute.trim().toLowerCase()); 1296 1297 for (int i: posArr) 1298 { 1299 HTMLNode n = (HTMLNode) html.elementAt(i); 1300 1301 if (! n.isTagNode()) throw new TagNodeExpectedException(i); 1302 1303 TagNode tn = (TagNode) n; 1304 1305 if (tn.isClosing) throw new OpeningTagNodeExpectedException(i); 1306 1307 // If element-length = tok-length+2, THERE ARE NO ATTRIBUTES! 1308 if (tn.str.length() <= (tn.tok.length() + 3)) continue; 1309 1310 // List of all attributes in the TagNode 1311 String[] allAN = tn.allAN(true, true).toArray(String[]::new); 1312 1313 // List of the attributes that DIDN'T PASS the WHITE-LIST 1314 Vector<String> attrToRemove = new Vector<>(); 1315 1316 for (String attribute : allAN) 1317 if (! whiteList.contains(attribute)) 1318 attrToRemove.addElement(attribute); 1319 1320 // if there were attributes that didn't pass... 1321 if (attrToRemove.size() > 0) 1322 { 1323 // Build a new TagNode, and then replace the old one with the newly built one 1324 // on the page or sub-page, and at the same location. 1325 // NOTE: 'removeAttributes' needs a var-args String-Array, not a Vector<String> 1326 1327 tn = tn.removeAttributes(attrToRemove.toArray(StringParse.EMPTY_STR_ARRAY)); 1328 html.setElementAt(tn, i); 1329 1330 // Java's IntStream-Builder is just a way to "build" a short list of integer's. 1331 // This lists has all Vector locations where a "TagNode swap" has occurred. 1332 1333 b.accept(i); 1334 } 1335 } 1336 1337 // Build the IntStream, Convert the IntStream -> int[], Return it. 1338 return b.build().toArray(); 1339 } 1340 1341 1342 // *************************************************************************************** 1343 // *************************************************************************************** 1344 // Use class StrFilter to Filter Attributes 1345 // *************************************************************************************** 1346 // *************************************************************************************** 1347 1348 1349 /** 1350 * Convenience Method. 1351 * <BR />Invokes: {@link #filter(Vector, int, int, StrFilter)} 1352 */ 1353 public static int[] filter(Vector<? super TagNode> html, StrFilter filter) 1354 { return filter(html, 0, -1, filter); } 1355 1356 /** 1357 * Convenience Method. 1358 * <BR />Receives: {@code DotPair} 1359 * <BR />Invokes: {@link #filter(Vector, int, int, StrFilter)} 1360 */ 1361 public static int[] filter(Vector<? super TagNode> html, DotPair dp, StrFilter filter) 1362 { return filter(html, dp.start, dp.end + 1, filter); } 1363 1364 /** 1365 * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input 1366 * {@code Vector} using a {@link StrFilter}. All input-{@code Vector TagNode's} which have 1367 * attributes will have the list of attribute-<B STYLE="color: red;">names</B> tested against 1368 * the provided {@code StrFilter.test(attribute)} predicate. 1369 * 1370 * <BR /><BR />If any attribute whose <B STYLE="color: red;">name</B> fails the 1371 * {@code Predicate} test, then that attribute will be removed. After testing all of a 1372 * {@code TagNode's} inner-tags, if any of those attributes did fail the 1373 * {@code StrFilter.test(...)} method, a new {@code TagNode} will be constructed leaving those 1374 * out. Finally, the old {@code TagNode} will be removed from input HTML {@code Vector}, and 1375 * replaced with the new one. 1376 * 1377 * <EMBED CLASS='external-html' DATA-PROC_TYPE=filtering DATA-FILE-ID=ATTR_RESTRICT_SE_POS> 1378 * 1379 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 1380 * @param filter <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILTER_PARAM> 1381 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_SE_RESTRICT_REM> 1382 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 1383 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 1384 * 1385 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 1386 * 1387 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 1388 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILT_RET_NOTE> 1389 * 1390 * @see TagNode#allAN() 1391 * @see TagNode#isTagNode() 1392 * @see TagNode#isClosing 1393 * @see TagNode#removeAttributes(String[]) 1394 * @see LV 1395 */ 1396 public static int[] filter 1397 (Vector<? super TagNode> html, int sPos, int ePos, StrFilter filter) 1398 { 1399 // Save the list of modified TagNode's in a Java Stream 1400 IntStream.Builder b = IntStream.builder(); 1401 1402 // Temp Var, Loop Variable 1403 LV l = new LV(sPos, ePos, html); 1404 TagNode tn; 1405 1406 for (int i=l.start; i < l.end; i++) 1407 1408 if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null) 1409 { 1410 // Build a list of all inner-tags that must be removed 1411 String[] innerTagsToRemove = tn 1412 .allAN(true, true) // Builds attibute Stream<String> 1413 .filter(innerTag -> filter.test(innerTag)) // Run the user provided filter 1414 .toArray(String[]::new); // Stream<String> -> String[] 1415 1416 if (innerTagsToRemove.length > 0) 1417 { 1418 // Build a new TagNode, and then replace the old one with the newly built one 1419 // on the page or sub-page, and at the same location. 1420 1421 tn = tn.removeAttributes(innerTagsToRemove); 1422 html.setElementAt(tn, i); 1423 1424 // Java's IntStream-Builder is just a way to "build" a short list of integer's. 1425 // The list shall contain all Vector indices where a "TagNode swap" occurred 1426 1427 b.accept(i); 1428 } 1429 } 1430 1431 // Build the IntStream, Convert the IntStream -> int[], Return it. 1432 return b.build().toArray(); 1433 } 1434 1435 /** 1436 * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input 1437 * {@code Vector} using a {@link StrFilter}. All input-{@code Vector TagNode's} which have 1438 * attributes will have the list of attribute-<B STYLE="color: red;">names</B> tested against 1439 * the provided {@code StrFilter.test(attribute)} predicate. 1440 * 1441 * <BR /><BR />If any attribute whose <B STYLE="color: red;">name</B> fails the 1442 * {@code Predicate} test, then that attribute will be removed. After testing all of a 1443 * {@code TagNode's} inner-tags, if any of those attributes did fail the 1444 * {@code StrFilter.test(...)} method, a new {@code TagNode} will be constructed leaving those 1445 * out. Finally, the old {@code TagNode} will be removed from input HTML {@code Vector}, and 1446 * replaced with the new one. 1447 * 1448 * <EMBED CLASS='external-html' DATA-PROC_TYPE=filtering DATA-FILE-ID=ATTR_RESTRICT_POSARR> 1449 * 1450 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP> 1451 * @param filter <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILTER_PARAM> 1452 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_PA_RESTRICT_REM> 1453 * @param posArr <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM> 1454 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILT_EXAMPLE> 1455 * 1456 * @throws ArrayIndexOutOfBoundsException 1457 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX> 1458 * @throws OpeningTagNodeExpectedException 1459 * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX> 1460 * 1461 * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX> 1462 * 1463 * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET> 1464 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILT_RET_NOTE> 1465 * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL> 1466 * 1467 * @see TagNode#allAN() 1468 * @see TagNode#isTagNode() 1469 * @see TagNode#isClosing 1470 * @see TagNode#removeAttributes(String[]) 1471 */ 1472 public static int[] filter(Vector<? super TagNode> html, int[] posArr, StrFilter filter) 1473 { 1474 // Use Java Stream to keep a list of Vector-Locations that were updated / modified. 1475 IntStream.Builder b = IntStream.builder(); 1476 1477 for (int i: posArr) 1478 { 1479 HTMLNode n = (HTMLNode) html.elementAt(i); 1480 1481 if (! n.isTagNode()) throw new TagNodeExpectedException(i); 1482 1483 TagNode tn = (TagNode) n; 1484 1485 if (tn.isClosing) throw new OpeningTagNodeExpectedException(i); 1486 1487 // Minimum TagNode.str Length (in order to have attributes): '<', TOKEN, SPACE '>' 1488 if (tn.str.length() < (tn.tok.length() + 3)) continue; 1489 1490 // Build a list of all inner-tags that must be removed 1491 String[] innerTagsToRemove = tn 1492 .allAN(true, true) // Builds attibute Stream<String> 1493 .filter(innerTag -> filter.test(innerTag)) // Run the user provided filter 1494 .toArray(String[]::new); // Stream<String> -> String[] 1495 1496 if (innerTagsToRemove.length > 0) 1497 { 1498 // Build a new TagNode, and then replace the old one with the newly built one 1499 // on the page or sub-page, and at the same location. 1500 1501 tn = tn.removeAttributes(innerTagsToRemove); 1502 html.setElementAt(tn, i); 1503 1504 // Java's IntStream-Builder is just a way to "build" a short list of integer's. 1505 // The list shall contain all Vector indices where a "TagNode swap" occurred 1506 1507 b.accept(i); 1508 } 1509 } 1510 1511 // Build the IntStream, Convert the IntStream -> int[], Return it. 1512 return b.build().toArray(); 1513 } 1514}