001package Torello.HTML; 002 003import java.util.*; 004import java.util.stream.*; 005 006import Torello.HTML.NodeSearch.*; 007import Torello.Java.*; 008 009import Torello.Java.Additional.Ret2; 010import Torello.HTML.Tools.Images.IF; 011 012/** 013 * Tools to retrieve and insert tags into the {@code <HEAD>} of a web-page. 014 * 015 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES> 016 */ 017@Torello.JavaDoc.StaticFunctional 018public class Features 019{ 020 private Features() { } 021 022 /** Error Message that is used repeatedly. */ 023 public static final String NO_HEADER_MESSAGE = 024 "You are attempting to insert an HTML INSERT-STR, but such an element belongs in the " + 025 "page's header. Unfortunately, the page or sub-page you have passed does not have a " + 026 "<HEAD>...</HEAD> sub-section. Therefore, there is no place to insert the elements."; 027 028 /** 029 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 030 * {@code <HEAD> ... </HEAD>}</B> section to add a "logo-image" at the top-left corner of the 031 * Web-Browser's tab for the page when it loads. This logo is called a {@code 'favicon'}. 032 * 033 * @see #insertFavicon(Vector, String) 034 * @see #hasFavicon(Vector) 035 */ 036 public static final String favicon = 037 "<LINK REL='icon' TYPE='image/INSERT-IMAGE-TYPE-HERE' HREF='INSERT-URL-STRING-HERE' />"; 038 039 /** 040 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 041 * {@code <HEAD> ... </HEAD>}</B> section to add a <B>Cascading Style Sheet</B> (a 042 * {@code '.css'} file) to your page. 043 * 044 * <BR /><BR />The web-browser that ultimately loads the HTML that you are exporting will 045 * render the style elements across all the HTML elements in your page that match their 046 * respective CSS-Selectors. Without going into a big diatribe about how CSS works, just know 047 * that the {@code String} used to build / instantiate a new {@link TagNode} with an externally 048 * linked {@code CSS}-Page is provided here, by this field. 049 * 050 * @see #insertCSSLink(Vector, String) 051 * @see #getAllCSSLinks(Vector) 052 */ 053 public static final String cssExternalSheet = 054 "<LINK REL=stylesheet TYPE='text/css' HREF='INSERT-URL-STRING-HERE' />"; 055 056 /** 057 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 058 * {@code <HEAD> ... </HEAD>}</B> section to add a <B>Cascading Style Sheet</B> (a 059 * {@code '.css'} file) to your page. This particular {@code String}-Constant Field includes / 060 * allows for a {@code MEDIA}-Attribute / Inner-Tag. 061 * 062 * @see #insertCSSLink(Vector, String) 063 * @see #insertCSSLink(Vector, String, String) 064 * @see #getAllCSSLinks(Vector) 065 */ 066 public static final String cssExternalSheetWithMediaAttribute = 067 "<LINK REL=stylesheet TYPE='text/css' HREF='INSERT-URL-STRING-HERE' " + 068 "MEDIA='INSERT-MEDIA-ATTRIBUTE-VALUE-HERE' />"; 069 070 /** 071 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 072 * {@code <HEAD> ... </HEAD>}</B> section to add an externally-linked 073 * <B>Java-Script File</B> ({@code '.js'} File) to your page. 074 * 075 * <BR /><BR />The Web-Browser will download this <B>Java-Script</B> page from the 076 * {@code URL} that you ultimately provide and (hopefully) load all your variable definitions 077 * and methods when the page loads. 078 * 079 * <BR /><BR /><B CLASS=JDDescLabel>Closing {@code </SCRIPT>} Tag:</B> 080 * 081 * <BR />Inserting an external <B>Java-Script</B> Page has one important difference vis-a-vis 082 * inserting an external CSS-Page. Inserting a link to a {@code '.js'} page requires 083 * <B><I>both</I></B> the opening 084 * <B STYLE='color: red;'>{@code <SCRIPT ..>}</B> <B><I>and</I></B> the closing 085 * <B STYLE='color: red;'>{@code </SCRIPT>}</B> 086 * Tags. 087 * 088 * <BR /><BR />This is expected and required even-when / especially-when there is no actual 089 * java-script code being placed on the {@code '.html'} page itself. Effectively, regardless 090 * of whether you are putting actual java-script code into / inside your HTML page, or you are 091 * just inserting a link to a {@code '.js'} File on your server - <I>you must always create 092 * both the open and the closed HTML 093 * <B STYLE='color: red;'>{@code <SCRIPT SRC='...'></SCRIPT>}</B> tags and insert them into 094 * your Vectorized-HTML Web-Page</I>. 095 * 096 * <BR /><BR />In the brief example below, it should be clear that even though the 097 * {@code SCRIPT}-Tags do not enclose any <B>Java-Script</B>, both the open and the closed 098 * versions of the tag are placed into the HTML-File. 099 * 100 * <DIV CLASS="HTML">{@code 101 * <!-- This is a short note about including the HTML SCRIPT element in your web-pages. --> 102 * <HTML> 103 * <HEAD> 104 * <!-- Version #1 Inserting a java-script 'variables & functions' external-page --> 105 * <SCRIPT TYPE='text/javascript' SRC='/script/javaScriptFiles/functions.js'> 106 * </SCRIPT> 107 * <!-- Right here (line above) we always need the closing Script-tag, even when there is no 108 * actual java-script present, and the methods/variables are going to be downloaded from 109 * the java-script file identified in by the SRC="..." attribute! --> 110 * 111 * <SCRIPT TYPE='text/javascript'> 112 * var someVar1; 113 * var someVar2; 114 * 115 * function someFunction() 116 * { return; } 117 * 118 * </SCRIPT> <!-- Either way, the closing-script tag is expected. --> 119 * }</DIV> 120 * 121 * @see #insertExternalJavaScriptLink(Vector, String) 122 * @see #getAllExternalJSLinks(Vector) 123 */ 124 public static final String javaScriptExternalPage = 125 "<SCRIPT TYPE='text/javascript' SRC='INSERT-URL-STRING-HERE'>"; 126 127 /** 128 * If you have pages on your site that are almost identical, then you may need to inform search 129 * engines which one to prioritize. Or you might have syndicated content on your site which was 130 * republished elsewhere. You can do both of these things without incurring a duplicate content 131 * penalty – as long as you use a {@code CANONICAL}-Tag. 132 * 133 * <BR /><BR />Instead of confusing Google and missing your ranking on the SERP's, you are 134 * guiding the crawlers as to which URL counts as the “main” one. This places the emphasis on 135 * the right URL and prevents the others from cannibalizing your SEO. 136 * 137 * <BR /><BR />Use {@code CANONICAL}-Tags to avoid having problems with duplicate content that 138 * may affect your rankings. 139 * 140 * <BR /><BR /><HR><BR /> 141 * 142 * The content of this Documentation Page was copied from a page on the web-domain 143 * {@code 'http://searchenginewatch.com'}. It was lifted on May 24th, 2019. 144 * 145 * <BR /><BR />See link below, if still valid: 146 * 147 * <BR /><A 148 * HREF="https://searchenginewatch.com/2018/04/04/a-quick-and-easy-guide-to-meta-tags-in-seo/"> 149 * https://searchenginewatch.com/2018/04/04/a-quick-and-easy-guide-to-meta-tags-in-seo/ </A> 150 * 151 * @see #insertCanonicalURL(Vector, String) 152 * @see #hasCanonicalURL(Vector) 153 */ 154 public static final String canonicalTag = 155 "<LINK REL=canonical HREF='INSERT-URL-STRING-HERE' />"; 156 157 /** This is a new-line {@code HTMLNode} */ 158 protected static final TextNode NEWLINE = new TextNode("\n"); 159 160 /** 161 * This method checks whether the {@code String}-Parameter {@code 's'} contains a 162 * Single-Quotations Punctuation-Mark anywhere inside that {@code String}. If so, a properly 163 * formatted exception is thrown. This is used as an internal Helper-Method. 164 * 165 * @param s This may be any Java {@code String}, but generally it is one used to insert into an 166 * HTML {@code CONTENT}-Attribute. 167 * 168 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=s DATA-FILE-ID=FT_Q_EX> 169 */ 170 protected static void checkForSingleQuote(String s) 171 { 172 int pos; 173 174 if ((pos = s.indexOf("'")) != -1) throw new QuotesException( 175 "The passed string-parameter may not contain a single-quote punctuation mark. " + 176 "Yours was: [" + s + "], and has a single-quotation mark at string-position " + 177 "[" + pos + "]" 178 ); 179 } 180 181 /** 182 * This inserts a favicon HTML link element into the right location so that a particular 183 * Web-Page will render an "browser icon image" into the top-left corner of the Web-Page's 184 * Browser-Tab. 185 * 186 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 187 * 188 * @param imageURLAsString <EMBED CLASS='external-html' DATA-FIELD=favicon 189 * DATA-FILE-ID=FT_STR_INS_PARAM> 190 * 191 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 192 * 193 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=imageURLAsString 194 * DATA-FILE-ID=FT_Q_EX> 195 * 196 * @see #favicon 197 * @see #checkForSingleQuote(String) 198 */ 199 public static void insertFavicon(Vector<HTMLNode> html, String imageURLAsString) 200 { 201 // Insert the Favicon <LINK ...> element into the <HEAD> section of the input html page. 202 // <link rel='icon' type='image/INSERT-IMAGE-TYPE-HERE' href='INSERT-URL-STRING-HERE' /> 203 204 checkForSingleQuote(imageURLAsString); 205 206 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 207 DotPair header = TagNodeFindInclusive.first(html, "head"); 208 209 if (header == null) throw new NodeNotFoundException 210 (NO_HEADER_MESSAGE.replace("INSERT-STR", "favicon <LINK> element")); 211 212 String ext = IF.getGuess(imageURLAsString).extension; 213 214 if (ext == null) throw new IllegalArgumentException( 215 "The Image-Type of the 'imageURLAsString' parameter could not be determined. " + 216 "The method IF.getGuess(faviconURL) returned null. Please provide a favicon with " + 217 "standard image file-type. This is required because the image-type is required " + 218 "to be placed inside the HTML <LINK TYPE=... HREF=...> Element 'TYPE' Attribute." 219 ); 220 221 // Build a new Favicon TagNode. 222 TagNode faviconTN = new TagNode 223 ("<LINK REL='icon' TYPE='image/" + ext + "' HREF='" + imageURLAsString + "' />"); 224 225 // Insert the Favicon into the page. Put it at the top of the header, just after <HEAD> 226 Util.insertNodes(html, header.start + 1, NEWLINE, faviconTN, NEWLINE); 227 } 228 229 /** 230 * This method will search for an HTML <B STYLE='color: red;'>{@code <LINK REL="icon" ...>}</B> 231 * Tag, in hopes of finding a {@code REL}-Attribute whose value is {@code 'icon'}. 232 * 233 * <BR /><BR />When this method finds such a tag, it will return the 234 * <B STYLE='color: red;'>value</B> of that Tag's {@code HREF}-Attribute. 235 * 236 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 237 * 238 * @return This method will return the {@code String}-<B STYLE='color: red;'>value</B> of the 239 * {@code HREF}-Attribute found inside the {@code LINK}-Tag. 240 * 241 * If this page or sub-page does not have such a tag with an {@code HREF}-Attribute, then null 242 * is returned. 243 * 244 * <BR /><BR /><B STYLE="color: red;">NOTE:</B> In the event that multiple copies 245 * of the HTML {@code LINK}-Tag are found, and more than one of these tags has a 246 * {@code REL}-Attribute with a <B STYLE='color: red;'>value</B> equal to {@code "icon"}, then 247 * this method will simple return the first of the {@code 'favicon'} tags that were found. 248 * 249 * <BR /><BR />An (albeit erroneous) page, with multiple favicon definitions, will not cause 250 * this method to throw an exception. 251 * 252 * @see InnerTagGet 253 * @see #favicon 254 * @see TagNode#AV(String) 255 */ 256 public static String hasFavicon(Vector<? extends HTMLNode> html) 257 { 258 // InnerTagGet.all: Returns a vector of TagNode's that resemble: <LINK rel="icon" ...> 259 // 260 // EQ_CI_TRM: Check the 'rel' Attribute-Value using a Case-Insensitive, Equality 261 // String-Comparison. 262 // Trim the 'rel' Attribute-Value String of possible leading & trailing 263 // White-Space before performing the comparison. 264 265 Vector<TagNode> list = InnerTagGet.all 266 (html, "LINK", "REL", TextComparitor.EQ_CI_TRM, "icon"); 267 268 // If there were no HTML "<LINK ...>" elements with REL='ICON' attributes, then 269 // there was no favicon. 270 271 if (list.size() == 0) return null; 272 273 // Just in case there were multiple favicon <LINK ...> tags, just return the first 274 // one found. Inside of a <LINK REL="icon" HREF="..."> the 'HREF' Attribute contains 275 // the Image-URL. Use TagNode.AV("HREF") to retrieve that image url. 276 277 String s; 278 for (TagNode tn : list) if ((s = tn.AV("HREF")) != null) return s; 279 280 // If for some reason, none of these <LINK REL='ICON' ...> elements had an "HREF" 281 // attribute, then just return null. 282 283 return null; 284 } 285 286 /** 287 * This inserts an HTML {@code LINK}-Tag into Web-Page parameter {@code 'html'} with the 288 * purpose of linking an externally-defined <B>Cascading Style Sheet</B> (also known as a 289 * {@code CSS}-Page) into that Page-{@code Vector}. 290 * 291 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 292 * 293 * @param externalCSSFileURLAsString <EMBED CLASS='external-html' DATA-FIELD=cssExternalSheet 294 * DATA-FILE-ID=FT_STR_INS_PARAM> 295 * 296 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 297 * 298 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=externalCSSFileURLAsString 299 * DATA-FILE-ID=FT_Q_EX> 300 * 301 * @see #cssExternalSheet 302 * @see #cssExternalSheetWithMediaAttribute 303 * @see #insertCSSLink(Vector, String, String) 304 * @see #getAllCSSLinks(Vector) 305 * @see #checkForSingleQuote(String) 306 * @see DotPair 307 * @see TagNode 308 */ 309 public static void insertCSSLink(Vector<HTMLNode> html, String externalCSSFileURLAsString) 310 { 311 // Inserts an external CSS Link into the <HEAD> section of this html page vector 312 // <link REL=stylesheet type='text/css' href='INSERT-URL-STRING-HERE' /> 313 314 checkForSingleQuote(externalCSSFileURLAsString); 315 316 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 317 DotPair header = TagNodeFindInclusive.first(html, "head"); 318 319 if (header == null) throw new NodeNotFoundException( 320 NO_HEADER_MESSAGE.replace 321 ("INSERT-STR", "externally-linked CSS page <LINK> element") 322 ); 323 324 TagNode cssTN = new TagNode 325 ("<LINK REL=stylesheet TYPE='text/css' HREF='" + externalCSSFileURLAsString + "' />"); 326 327 // Insert the Style-Sheet link into the page. Put it at the top of the header, 328 // just after <HEAD> 329 330 Util.insertNodes(html, header.start + 1, NEWLINE, cssTN, NEWLINE); 331 } 332 333 /** 334 * This inserts a <B>Cascading Style Sheet</B> with the extra {@code MEDIA}-Attribute using 335 * an HTML {@code LINK}-Tag into the Vectorized-HTML Web-Page parameter {@code 'html'} 336 * 337 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 338 * 339 * @param externalCSSFileURLAsString <EMBED CLASS='external-html' DATA-FIELD=cssExternalSheet 340 * DATA-FILE-ID=FT_STR_INS_PARAM> 341 * 342 * @param mediaInnerTagValue Externally linked CSS-Pages, which are included using the HTML 343 * {@code LINK}-Tag may explicitly request a {@code MEDIA}-Attribute be inserted into that 344 * Tag. That {@code MEDIA}-Attribute may take one of five values. In such a tag, the extra 345 * attribute specifies when the listed CSS-Rules are to be applied. 346 * 347 * <BR /><BR />Listed here are the most common values for the {@code MEDIA}-Attribute: 348 * 349 * <BR /><TABLE CLASS=JDBriefTable> 350 * <TR> 351 * <TH>Attribute Value</TH> 352 * <TH>Intended CSS Meaning</TH> 353 * </TR> 354 * <TR> 355 * <TD>screen</TD> 356 * <TD>indicates for use on a computer screen</TD> 357 * </TR> 358 * <TR> 359 * <TD>projection</TD> 360 * <TD>for projected presentations</TD> 361 * </TR> 362 * <TR> 363 * <TD>handheld</TD> 364 * <TD>for handheld devices (typically with small screens)</TD></TR> 365 * <TR> 366 * <TD>print</TD> 367 * <TD>to style printed Web-Pages</TD> 368 * </TR> 369 * <TR> 370 * <TD>all</TD> 371 * <TD>(default value) This is what most people choose. You can leave off the 372 * {@code MEDIA}-Attribute completely if you want your styles to be applied for all 373 * media types. 374 * </TD> 375 * </TR> 376 * </TABLE> 377 * 378 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 379 * 380 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM1=externalCSSFileURLAsString 381 * DATA-PARAM2=mediaInnerTagValue DATA-FILE-ID=FT_Q_EX_DOUBL> 382 * 383 * @see #cssExternalSheet 384 * @see #cssExternalSheetWithMediaAttribute 385 * @see #insertCSSLink(Vector, String) 386 * @see #getAllCSSLinks(Vector) 387 * @see #checkForSingleQuote(String) 388 * @see DotPair 389 */ 390 public static void insertCSSLink 391 (Vector<HTMLNode> html, String externalCSSFileURLAsString, String mediaInnerTagValue) 392 { 393 // Inserts an external CSS Link (with 'media' attribute) into the <HEAD> section of 394 // this html page vector 395 // <link REL=stylesheet type='text/css' href='INSERT-URL-STRING-HERE' 396 // media='INSERT-MEDIA-ATTRIBUTE-VALUE-HERE' /> 397 398 checkForSingleQuote(externalCSSFileURLAsString); 399 checkForSingleQuote(mediaInnerTagValue); 400 401 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 402 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 403 404 if (header == null) throw new NodeNotFoundException( 405 NO_HEADER_MESSAGE.replace 406 ("INSERT-STR", "externally-linked CSS Style-Sheet LINK-Tag") 407 ); 408 409 // Build the TagNode 410 TagNode cssTN = new TagNode( 411 "<LINK REL=stylesheet TYPE='text/css' HREF='" + externalCSSFileURLAsString + "' " + 412 "MEDIA='" + mediaInnerTagValue + "' />" 413 ); 414 415 // Insert the Style-Sheet link into the page. Put it at the top of the header, just 416 // after <HEAD> 417 418 Util.insertNodes(html, header.start + 1, NEWLINE, cssTN, NEWLINE); 419 } 420 421 /** 422 * This will retrieve all linked CSS-Pages from Vectorized-HTML Web-Page parameter 423 * {@code 'html'}. 424 * 425 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 426 * @return This will return the links as a list of {@link TagNode}'s' 427 * @see #insertCSSLink(Vector, String) 428 * @see #insertCSSLink(Vector, String, String) 429 * @see InnerTagGet 430 */ 431 public static Vector<TagNode> getAllCSSLinks(Vector<? extends HTMLNode> html) 432 { 433 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 434 // <LINK rel="stylesheet" ...> 435 // 436 // EQ_CI_TRM: Check the 'rel' Attribute-Value using a Case-Insensitive, Equality 437 // String-Comparison 438 // Trim the 'rel' Attribute-Value String of possible leading & trailing 439 // White-Space before performing the comparison. 440 441 return InnerTagGet.all(html, "LINK", "REL", TextComparitor.EQ_CI_TRM, "stylesheet"); 442 } 443 444 /** 445 * This inserts an HTML <B STYLE='color: red;'>{@code '<LINK ...>'}</B> element into the proper 446 * location for linking an externally-defined <B>Java-Script</B> (a {@code '.js'} File) into 447 * the Web-Page. 448 * 449 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 450 * 451 * @param externalJSFileURLAsString 452 * <EMBED CLASS='external-html' DATA-FIELD=javaScriptExternalPage DATA-FILE-ID=FT_STR_INS_PARAM> 453 * 454 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 455 * 456 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=externalJSFileURLAsString 457 * DATA-FILE-ID=FT_Q_EX> 458 * 459 * @see #javaScriptExternalPage 460 * @see #getAllExternalJSLinks(Vector) 461 * @see #checkForSingleQuote(String) 462 * @see TagNode 463 * @see TextNode 464 * @see DotPair 465 * @see HTMLTags#hasTag(String, TC) 466 */ 467 public static void insertExternalJavaScriptLink 468 (Vector<HTMLNode> html, String externalJSFileURLAsString) 469 { 470 // Builds an external Java-Script link, and inserts it into the header portion of 471 // this html page. 472 // <script type='text/javascript' src='INSERT-URL-STRING-HERE'> 473 474 checkForSingleQuote(externalJSFileURLAsString); 475 476 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 477 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 478 479 if (header == null) throw new NodeNotFoundException( 480 NO_HEADER_MESSAGE.replace( 481 "INSERT-STR", "externally-linked Java-Script <SCRIPT> ... </SCRIPT> elements") 482 ); 483 484 // Build an HTML <SCRIPT ...> node, and a </SCRIPT> node. 485 HTMLNode n = new TagNode 486 ("<SCRIPT TYPE='text/javascript' SRC='" + externalJSFileURLAsString + "'>"); 487 488 HTMLNode closeN = HTMLTags.hasTag("script", TC.ClosingTags); 489 490 // Insert the Java-Script link into the page. Put it at the top of the header, just 491 // after <HEAD> 492 493 Util.insertNodes(html, header.start + 1, NEWLINE, n, closeN, NEWLINE); 494 } 495 496 /** 497 * Inserting <B>Java-Script</B> directly onto an HTML-Page and including an external link to a 498 * {@code '.js'} File are extremely similar tasks. Either way, in both cases the construct is 499 * simply: 500 * 501 * <BR /><BR /><B STYLE='color: red;'>{@code <SCRIPT TYPE='text/javascript'> ... </SCRIPT>}</B> 502 * 503 * <BR /><BR />When the actual functions and methods are pasted into an HTML-Page directly, 504 * they are pasted into the {@code String} above where the ellipses {@code '...'} are. When a 505 * link is made to an external page from a directory on the same Web-Server - both the open and 506 * the close HTML {@code SCRIPT}-Tag's must be included. 507 * 508 * <BR /><BR />If just a link is being added, then the text-content of the {@code SCRIPT}-Tag 509 * should just be left blank or empty. Instead, the {@code URL} to the Java-Script Page is 510 * added as an HTML {@code SRC}-Attribute. 511 * 512 * <BR /><BR />This method will retrieve any and all {@code 'SCRIPT'} nodes that meet the 513 * following criteria: 514 * 515 * <BR /><BR /><OL CLASS=JDOL> 516 * <LI> The <B>Script Body</B> must be empty, meaning there is no Java-Script between the 517 * opening and closing {@code SCRIPT}-Tags 518 * </LI> 519 * 520 * <LI> The HTML {@code SRC}-Attribute must contain a non-null, non-zero-length 521 * <B STYLE='color: red;'>value</B> 522 * </LI> 523 * 524 * </OL> 525 * 526 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 527 * 528 * @return This will return a list of relative {@code URL's} to externally linked 529 * <B>Java-Script</B> Pages as {@code String's} 530 * 531 * @see InnerTagGetInclusive 532 * @see #javaScriptExternalPage 533 * @see #insertExternalJavaScriptLink(Vector, String) 534 * @see TagNode 535 * @see TextNode 536 * @see TagNode#AV(String) 537 * @see HTMLNode#str 538 */ 539 public static String[] getAllExternalJSLinks(Vector<? extends HTMLNode> html) 540 { 541 // InnerTagGetInclusive.all: Returns a vector of TagNode's that resemble: 542 // <SCRIPT TYPE="javascript" ...> 543 // 544 // CN_CI: Check the 'rel' Attribute-Value using a Case-Insensitive, "Contains" 545 // String-Comparison 546 // 'contains' rather than 'equals' testing is done because this value may be 547 // "javascript", but it may also be "text/javascript" 548 // 549 // Inclusive: This means that everything between the <SCRIPT type="javascript"> ... and 550 // the closing </SCRIPT> tag are returned in a vector of vectors. 551 552 Vector<Vector<HTMLNode>> v = InnerTagGetInclusive.all 553 (html, "SCRIPT", "TYPE", TextComparitor.CN_CI, "javascript"); 554 555 Stream.Builder<String> b = Stream.builder(); 556 557 TOP: 558 for (Vector<HTMLNode> scriptSection : v) 559 { 560 String srcValue = null; 561 562 for (HTMLNode n : scriptSection) 563 { 564 if (n.isTagNode()) 565 if ((srcValue = ((TagNode) n).AV("SRC")) != null) 566 break; 567 568 if (n.isTextNode()) 569 if (n.str.trim().length() > 0) 570 break TOP; 571 } 572 573 b.add(srcValue); 574 } 575 576 return b.build().toArray(String[]::new); 577 } 578 579 /** 580 * This section will insert a Canonical-{@code URL} into Vectorized-HTML parameter 581 * {@code 'html'}. The {@code URL} itself will be inserted into an HTML {@code LINK}-Tag as 582 * below: 583 * 584 * <BR /><BR /><B STYLE='color: red;'>{@code <LINK REL=canonical HREF='the_url'>}</B> 585 * 586 * <BR /><BR />Since HTML mandates that such elements be located in the {@code 'HEAD'} portion 587 * of an HTML-Page, if the Vectorized-HTML parameter {@code 'html'} does not have a 588 * {@code 'HEAD'} area, then this method shall throw a {@link NodeNotFoundException}. 589 * 590 * <BR /><BR />Note that this exception is an unchecked / runtime exception. 591 * 592 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 593 * 594 * @param canonicalURLAsStr 595 * <EMBED CLASS='external-html' DATA-FIELD=canonicalTag DATA-FILE-ID=FT_STR_INS_PARAM> 596 * 597 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 598 * 599 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=canonicalURLAsStr 600 * DATA-FILE-ID=FT_Q_EX> 601 * 602 * @see #canonicalTag 603 * @see #hasCanonicalURL(Vector) 604 * @see #checkForSingleQuote(String) 605 * @see TagNode 606 * @see DotPair 607 */ 608 public static void insertCanonicalURL(Vector<HTMLNode> html, String canonicalURLAsStr) 609 { 610 // Inserts a link element into the header of this page 611 // <link REL=canonical href='INSERT-URL-STRING-HERE' /> 612 613 checkForSingleQuote(canonicalURLAsStr); 614 615 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 616 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 617 618 if (header == null) throw new NodeNotFoundException 619 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Canonical-url LINK-Tag")); 620 621 // Builds the canonical <LINK ...> element 622 TagNode linkTN = new TagNode 623 ("<LINK REL=canonical HREF='" + canonicalURLAsStr + "' />"); 624 625 // Insert the canonical-url into the page. Put it at the top of the header, just 626 // after <HEAD> 627 628 Util.insertNodes(html, header.start + 1, NEWLINE, linkTN, NEWLINE); 629 } 630 631 /** 632 * This method will check whether a Vectorized-HTML Page has an HTML 633 * <B STYLE='color: red;'>{@code <LINK REL=canonical ...>}</B> Tag. This tag is used to 634 * inform Search-Engines whether or not this page <I>surrenders</I> or <I>relays</I> to a 635 * "Canonical-{@code URL}". 636 * 637 * <BR /><BR />Canonical-Pages help Search-Engines index large web-sites by providing a root or 638 * Master-{@code URL} to which all sub-pages may point. Such {@code URL's} are often (but not 639 * always) like a "Table of Contents". 640 * 641 * <BR /><BR />The primary goal of having a canonical is to avoid forcing Search-Engines (and 642 * their users) from sifting through and indexing every page of a large Web-Site, and instead 643 * focusing on either an introductory T.O.C. or a Title-Page. 644 * 645 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 646 * 647 * @return This will return whatever text was placed inside the canonical-url 648 * {@code HREF='some_url'} attribute/value pair of the HTML link tag. If there were no HTML 649 * {@code <LINK REL=canonical HREF='some_url'>} tag, then this method will return null. 650 * 651 * @throws MalformedHTMLException This exception will be thrown if there are multiple html tags 652 * that match the link, and REL=canonical search criteria requirements. If an HTML element 653 * {@code <link REL=canonical>} is found, but that element does not have an 654 * {@code href='...'} attribute, or that attribute is of zero length, then this a situation 655 * that will also force this exception to throw. 656 * 657 * @see InnerTagGet 658 * @see #canonicalTag 659 * @see #insertCanonicalURL(Vector, String) 660 * @see TagNode#AV(String) 661 */ 662 public static String hasCanonicalURL(Vector<? extends HTMLNode> html) 663 throws MalformedHTMLException 664 { 665 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 666 // <LINK rel="canonical" ...> 667 // 668 // EQ_CI_TRM: Check the 'rel' Attribute-Value using a Case-Insensitive, Equality 669 // String-Comparison 670 // Trim the 'rel' Attribute-Value String of possible leading & trailing 671 // White-Space before performing the comparison. 672 673 Vector<TagNode> v = InnerTagGet.all 674 (html, "LINK", "REL", TextComparitor.EQ_CI_TRM, "canonical"); 675 676 if (v.size() == 0) return null; 677 678 if (v.size() > 1) throw new MalformedHTMLException( 679 "The Web-Page you have passed has precisely " + v.size() + 680 " Canonical-URL LINK-Tags, but it may not have more than 1. This is " + 681 "invalid HTML." 682 ); 683 684 String s = v.elementAt(0).AV("href"); 685 686 if (s == null) throw new MalformedHTMLException( 687 "The HTML LINK-Tag that was retrieved, contained a " + 688 "REL=canonical Attribute-Value pair, but did not have an HREF-Attribute." + 689 "This is invalid HTML." 690 ); 691 692 if (s.length() == 0) throw new MalformedHTMLException( 693 "The HTML LINK-Tag that was retrieved contained a zero-length " + 694 "String as the Attribute-Value for the HREF-Attribute. This is not " + 695 "invalid, but poorly formatted HTML." 696 ); 697 698 return s; 699 } 700 701 /** 702 * Tools made specifically for the {@code <META>} tags in the {@code <HEAD>} of a web-page. 703 * 704 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES_META> 705 */ 706 @Torello.JavaDoc.StaticFunctional 707 public static class Meta 708 { 709 private Meta() { } 710 711 712 // **************************************************************************************** 713 // **************************************************************************************** 714 // Static String-Constants (the tags!) 715 // **************************************************************************************** 716 // **************************************************************************************** 717 718 719 /** 720 * This is the most common HTML <B STYLE='color: red;'>{@code <META ... >}</B> Tag. 721 * 722 * @see #getAllMetaTagNames(Vector) 723 * @see #insertMetaTagName(Vector, MetaTagName, String) 724 */ 725 public static final String metaTagName = 726 "<META NAME='INSERT-NAME-STRING-HERE' CONTENT='INSERT-CONTENT-STRING-HERE'>"; 727 728 /** 729 * This HTML <B STYLE='color: red;'>{@code <META ...>}</B> Tag is less frequently used, but 730 * does provide some properties needed and used by various Web-Servers. It is the 731 * <B>{@code 'ITEMPROP'}</B> Meta-Tag. 732 * 733 * @see #getItemProp(Vector, String) 734 * @see #insertItemProp(Vector, String, String) 735 */ 736 public static final String metaTagItemProp = 737 "<META ITEMPROP='INSERT-ITEMPROP-STRING-HERE' CONTENT='INSERT-CONTENT-STRING-HERE'>"; 738 739 /** 740 * <EMBED CLASS='external-html' DATA-PROP=robots DATA-FILE-ID=FEATURES_HTTP_EQUIV> 741 * 742 * @see #getHTTPEquiv(Vector, String) 743 * @see #insertHTTPEquiv(Vector, String, String) 744 */ 745 public static final String metaTagHTTPEquiv = 746 "<META HTTP-EQUIV='INSERT-HTTP-EQUIV-STRING-HERE' CONTENT='INSERT-CONTENT-STRING-HERE'>"; 747 748 /** 749 * <EMBED CLASS='external-html' DATA-PROP=robots DATA-FILE-ID=FEATURES_META_PROP> 750 * 751 * A {@code Robots}-Property Meta-Tag lets you utilize a granular, page-specific approach 752 * to controlling how an individual page should be indexed and served to users in 753 * Search-Engine results. 754 * 755 * @see #insertRobots(Vector, boolean, boolean) 756 * @see #getAllRobots(Vector) 757 */ 758 public static final String robotsMetaTag = 759 "<META NAME=robots CONTENT='INSERT-CONTENT-STRING-HERE'>"; 760 761 /** 762 * <EMBED CLASS='external-html' DATA-PROP=description DATA-FILE-ID=FEATURES_META_PROP> 763 * 764 * When search engines crawl Internet Web-Pages to read the provided key-words and 765 * descriptions used for indexing, this particular Meta-Tag Property is one of the first 766 * those crawlers will look at. 767 * 768 * <BR /><BR />You may include a {@code Description}-Property in the {@code 'HEAD'} portion 769 * of your site’s main-page. A {@code META}-Description can influence both a Search-Engine's 770 * Web-Crawlers, and ultimately the click-through rates of your readers. 771 * 772 * <BR /><BR />Google has stated that Meta-Tag {@code Description}-Properties are NOT used 773 * to rank pages. 774 * 775 * @see #insertDescription(Vector, String) 776 * @see #hasDescription(Vector) 777 */ 778 public static final String descriptionMetaTag = 779 "<META NAME=description CONTENT='INSERT-DESCRIPTION-OR-KEYWORDS-HERE'>"; 780 781 /** 782 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES_UTF8> 783 * 784 * @see #insertUTF8MetaTag(Vector) 785 * @see #hasUTF8MetaTag(Vector) 786 */ 787 public static final String UTF8MetaTag = 788 "<META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=utf-8'>"; 789 790 /** 791 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES_OPEN_GRAPH> 792 * 793 * @see #insertOGMetaTag(Vector, String, String) 794 * @see #getAllOGMetaTags(Vector) 795 */ 796 public static final String openGraphMetaTag = 797 "<META PROPERTY='og:INSERT-OG-PROPERTY-HERE' CONTENT='INSERT-OG-VALUE-HERE'>"; 798 799 /** All Open-Graph Property names. */ 800 public static final TreeMap<String, String> openGraphProperties = new TreeMap<>(); 801 802 /** 803 * <EMBED CLASS='external-html' DATA-PROP=keywords DATA-FILE-ID=FEATURES_META_PROP> 804 * 805 * A {@code KeyWords}-Property helps identify relevant, pertinent or 'germane' words that 806 * describe the content of a Web-Site or Web-Page to a Web-Indexing or Web-Search 807 * Organization. 808 * 809 * @see #insertKeyWords(Vector, String[]) 810 * @see #getAllKeyWords(Vector) 811 */ 812 public static final String keyWordsMetaTag = 813 "<META NAME=keywords CONTENT='INSERT-COMMA-SEPARATED-KEYWORDS-HERE'>"; 814 815 /** 816 * <EMBED CLASS='external-html' DATA-PROP=author DATA-FILE-ID=FEATURES_META_PROP> 817 * 818 * This helps identify Web-Sites or Web-Pages "Author-Names" to Web-Indexing and Web-Search 819 * Organizations. 820 * 821 * @see #insertAuthor(Vector, String) 822 * @see #hasAuthor(Vector) 823 */ 824 public static final String authorMetaTag = 825 "<META NAME=author CONTENT='INSERT-AUTHOR-NAME-HERE'>"; 826 827 828 // **************************************************************************************** 829 // **************************************************************************************** 830 // Retrieve all Meta-Tags as a java.util.Properties instance 831 // **************************************************************************************** 832 // **************************************************************************************** 833 834 835 /** 836 * This simple method will retrieve a {@code java.util.Properties} object for each and 837 * every HTML <B STYLE='color: red'>{@code <META ...>}</B> tag found within a 838 * Vectorized-HTML Web-Page. 839 * 840 * @param page Any Vectorized-HTML page. It is expected that this page contain a few 841 * {@code META}-Tags. If not, the method will still return an empty 842 * {@code Vector<Properties>} having {@code size()} of zero. 843 * 844 * @return The Java {@code 'Properties'} object that is returned from a call to 845 * {@link TagNode#allAV()} 846 * 847 * @see TagNode#allAV() 848 * @see TagNodeGet 849 */ 850 public static Vector<Properties> getAllMeta(Vector<HTMLNode> page) 851 { 852 Vector<Properties> ret = new Vector<>(); 853 854 // Retrieve all TagNode's that are HTML <META ...> Elements. Invoke TagNode.allAV() 855 // on each of these nodes to retrieve a java.util.Properties instance.\ 856 // 857 // NOTE: These "Properties" could possibly be combined into a single Properties 858 // instance, but because of the ever-changing nature of Web-Page 859 // Meta-Information tags, this is not employed here. It is an exercise 860 // left to the programmer. 861 862 for (TagNode tn : TagNodeGet.all(page, TC.OpeningTags, "META")) 863 ret.add(tn.allAV()); 864 865 return ret; 866 } 867 868 869 // **************************************************************************************** 870 // **************************************************************************************** 871 // Retrieve NAME/Property Meta-Tags 872 // **************************************************************************************** 873 // **************************************************************************************** 874 875 876 /** 877 * This method will find an HTML 878 * <B STYLE='color: red;'>{@code <META NAME=... CONTENT=...>}</B> element whose 879 * {@code NAME}-Attribute has a {@code String}-value equal-to (<I>ignoring case</I>) the 880 * value of the provided {@code String}-parameter {@code 'name'}. 881 * 882 * <BR /><BR />After this HTML {@code META}-Tag has been identified, the 883 * {@code String}-value of it's {@code CONTENT}-Attribute will be extracted and returned. 884 * 885 * <BR /><BR /><B CLASS=JDDescLabel>Returning null, Gracefully:</B> 886 * 887 * <BR />If the page provided does not have an HTML Meta-Tag with a {@code NAME}-Attribute 888 * whose <B STYLE='color: red;'>value</B> is {@code 'name'} or if such an element is 889 * identified, but that tag does not have a {@code CONTENT}-Attribute, then this method 890 * will return null. 891 * 892 * <BR /><BR /><B CLASS=JDDescLabel>Case Insensitive Comparison:</B> 893 * 894 * <BR />Before the comparison is done with the {@code 'name'} parameter, that 895 * {@code String} is trimmed with {@code String.trim()}, and the comparison performed 896 * <I>is done while ignoring case</I>. 897 * 898 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 899 * 900 * @param name The name of the {@code <META NAME=...>} Tag. 901 * 902 * @return The {@code String}-<B STYLE='color: red;'>value</B> of the 903 * {@code CONTENT}-Attribute for a Meta-Tag whose {@code NAME}-Attribute is equal to the 904 * specified name provided by parameter {@code 'name'}. If such information is not found 905 * on the page, then this method shall return null. 906 * 907 * @see #getItemProp(Vector, String) 908 * @see #getHTTPEquiv(Vector, String) 909 */ 910 public static String getMetaTagName(Vector<HTMLNode> html, String name) 911 { 912 // Find the first <META NAME=... CONTENT=...> tag element where the name equals 913 // the string-value provided by parameter name. 914 915 TagNode tn = InnerTagGet.first 916 (html, "META", "NAME", TextComparitor.EQ_CI, name.trim()); 917 918 // If there are no <META NAME='NAME' CONTENT=...> elements found on the page, 919 // then this method returns null. 920 921 if (tn == null) return null; 922 923 // Return the string-value of the attribute 'content'. Note that if this 924 // attribute isn't available, this method shall return 'null', gracefully. 925 926 return tn.AV("CONTENT"); 927 } 928 929 930 /** 931 * This will retrieve all Meta-Tag's having {@code NAME}-Attribute and 932 * {@code CONTENT}-Attribute pairs. 933 * 934 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 935 * 936 * @return a {@code java.util.Hashtable} of all the Meta-Tag Name/Content pairs that do not 937 * have null values. 938 * 939 * @throws IllegalArgumentException The method {@code MetaTagName.valueOf(...)} will throw 940 * an Illegal Argument Exception if any of the {@code <META NAME=...>} elements use a value 941 * of "NAME" that is not listed or identified in the Enumerated Type "MetaTagName". 942 * 943 * <BR /><BR /><B><SPAN STYLE="color: red">ALTERNATIVE:</SPAN></B> As Internet Companies 944 * come and go, pinning down a complete list of valid Meta Tag's that use the "NAME" 945 * Attribute is a possibly misguided approach. In lieu of eliminating the Enumerated-Type 946 * {@code MetaTagName}, it should be easier to just use the standard TagNode search below: 947 * 948 * <DIV CLASS="EXAMPLE">{@code 949 * // This code should be used as an alternative to this method if there are non-standard 950 * // HTML Meta Tag Names. It uses the more fundamental InnerTagGet Method. 951 * 952 * // This will retrieve all <META ...> HTML Elements that have a "NAME" Property. 953 * Vector<TagNode> metaTags = InnerTagGet.all(page, "meta", "name"); 954 * 955 * // This will print out those results: 956 * for (TagNode metaTag : metaTags) System.out.println 957 * ("Name:\t" + metaTag.AV("name") + "\tContent:\t" + metaTag.AV("content")); 958 * }</DIV> 959 * 960 * @see MetaTagName 961 * @see #metaTagName 962 * @see #insertMetaTagName(Vector, MetaTagName, String) 963 * @see InnerTagGet 964 */ 965 public static Hashtable<MetaTagName, String> getAllMetaTagNames 966 (Vector<? extends HTMLNode> html) 967 { 968 Hashtable<MetaTagName, String> ret = new Hashtable<>(); 969 970 // Converting the output "Vector<TagNode>" to a "Stream<TagNode>" by calling the 971 // .stream() method mainly because java streams provide the very simple 972 // 'filter(Predicate)' and 'forEach(Consumer)' methods. Vector.removeIf and 973 // Vector.forEach could also have been easily used as well. 974 975 // InnerTagGet.all returns a vector containing all <META NAME=...> TagNode's where 976 // the value of the 'name' attribute is one of the pre-defined MetaTagName 977 // EnumeratedTypes. 978 979 // NOTE: This is done via a java.util.function.Predicate<String> and a lambda 980 // expression 981 982 InnerTagGet 983 .all (html, "META", "NAME", (String nameAttributeValue) -> 984 MetaTagName.valueOf 985 (nameAttributeValue.toLowerCase().trim()) != null) 986 987 .stream() 988 .filter((TagNode tn) -> tn.AV("CONTENT") != null) 989 990 .forEach((TagNode tn) -> 991 992 ret.put( 993 MetaTagName.valueOf(tn.AV("NAME").toLowerCase().trim()), 994 tn.AV("CONTENT") 995 )); 996 997 return ret; 998 } 999 1000 1001 // **************************************************************************************** 1002 // **************************************************************************************** 1003 // Retrieve **SPECIFIC** NAME/Property Meta-Tags 1004 // **************************************************************************************** 1005 // **************************************************************************************** 1006 1007 1008 /** 1009 * This method looks for robots HTML <B STYLE='color: red;'>{@code <META NAME=robots>} 1010 * </B> tag, and returns the value of the {@code content}-Attribute. 1011 * 1012 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1013 * 1014 * @return This will return a vector of the robots named or specified by the HTML 1015 * Meta-Tag's present on this page. 1016 * 1017 * <BR /><BR /><B><SPAN STYLE="color: red;">NOTE:</B></SPAN> Please do not be disturbed by 1018 * java-streams, they are of limited use, but once a programmer is accustomed to the words 1019 * above, they actually improve code-readability (<B><I>once in a while!</I></B>). A 1020 * series of simple {@code for-loops} which eliminate-duplicates / add / sort would 1021 * accomplish the same task as above. 1022 * 1023 * @throws MalformedHTMLException If any invalid robot-strings are found on the page, this 1024 * method will throw an exception. The impetus behind this is to prevent accidentally 1025 * ignoring newly found tags, or incorrect tags. The extraction of the robots Meta-Tag from 1026 * an HTML page can be performed manually, if throwing an exception is causing problems. 1027 * The code to do this is listed in the documentation of this method. 1028 * 1029 * @see #robotsMetaTag 1030 * @see #insertRobots(Vector, boolean, boolean) 1031 */ 1032 public static Vector<Robots> getAllRobots(Vector<? extends HTMLNode> html) 1033 throws MalformedHTMLException 1034 { 1035 // Here, again, using Java Streams can be sometimes useful - primarily whenever a 1036 // 'filter' operation is going to be used on a Vector. Vector.removeIf works, BUT 1037 // this also extracts attribute values, and the original TagNode are discarded, and 1038 // replaced by the the <META> attributes. 1039 // 1040 // ALSO SALIENT: the "Arrays.asList" produces an array of string, and the "::addAll" 1041 // puts each separate String in each array into the TreeSet. 1042 // 1043 // NOTE: The TreeSet also functions as a "duplicate checker" although this is also 1044 // provided by Stream.distinct() 1045 // 1046 // InnerTagGet.all; Returns a vector of TagNode's that resemble: 1047 // <META NAME="robots" ...> 1048 // 1049 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1050 // String-Comparison 1051 // Trim the 'name' Attribute-Value String of possible leading & trailing 1052 // White-Space before performing the comparison. 1053 1054 TreeSet<String> temp = InnerTagGet 1055 .all (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "robots") 1056 .stream () 1057 .map ((TagNode tn) -> tn.AV("CONTENT")) 1058 1059 .filter ((String contents) -> 1060 (contents != null) && (contents.trim().length() > 0)) 1061 1062 .map ((String contents) -> 1063 Arrays.asList(StrCSV.CSV(contents.toLowerCase()))) 1064 1065 .collect (TreeSet<String>::new, TreeSet::addAll, TreeSet::addAll); 1066 1067 // I cannot use EXCEPTIONS and STREAMS together, there is no simple way. 1068 // It would be too ugly to read. 1069 1070 Vector<Robots> ret = new Vector<>(); 1071 1072 // If an invalid robot-attribute is found, this will 1073 // throw a MalformedHTMLException 1074 1075 for (String s : temp) ret.add(Robots.getRobot(s)); 1076 1077 return ret; 1078 } 1079 1080 /** 1081 * This will retrieve the {@code 'robots'} Meta-Tag 1082 * Attribute-<B STYLE='color: red;'>value</B> present on a Web-Page. 1083 * 1084 * <BR /><BR />If any of them are not in accordance with the tags listed in the 1085 * Enumerated-Type {@link Robots}, this will not cause a {@link MalformedHTMLException} to 1086 * throw. Instead, the result will just be eliminated and ignored. Take care that all of 1087 * the necessary {@code ROBOTS}-Tags are listed in the Enumerated-Type, and that there 1088 * are no "undefined, but necessary" robot elements to be found before using this method! 1089 * 1090 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1091 * @return A vector of all the valid robots attribute values found on the web-page. 1092 * @see #robotsMetaTag 1093 * @see #insertRobots(Vector, boolean, boolean) 1094 * @see TagNode#AV(String) 1095 */ 1096 public static Vector<Robots> getAllRobotsNOMHE(Vector<? extends HTMLNode> html) 1097 { 1098 // Java Streams, used here, filter out irrelevant meta tags, and also convert the 1099 // HTML Meta TagNode's into their their "CONTENT" Attribute String value. The TreeSet 1100 // provides a duplicate check elimination and sorts the {@code String's} as well. 1101 // 1102 // ALSO SALIENT: the "Arrays.asList" produces an array of string, and the "::addAll" 1103 // puts each separate String in each array into the TreeSet 1104 // 1105 // NOTE: The 'getRobotNOMHE' suppresses a possible exception, and converts such a 1106 // situation to 'null.' The suppressed-exception is the "MalformedHTMLException" 1107 // 1108 // InnerTagGet.all; Returns a vector of TagNode's that resemble: 1109 // <META NAME="robots" ...> 1110 // 1111 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1112 // String-Comparison 1113 // Trim the 'name' Attribute-Value String of possible leading & trailing 1114 // White-Space before performing the comparison. 1115 1116 return InnerTagGet 1117 .all (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "robots") 1118 .stream () 1119 .map ((TagNode tn) -> tn.AV("CONTENT")) 1120 1121 .filter ((String contents) -> 1122 (contents != null) && (contents.trim().length() > 0)) 1123 1124 .map ((String contents) -> 1125 Arrays.asList(StrCSV.CSV(contents.toLowerCase()))) 1126 1127 .collect (TreeSet<String>::new, TreeSet::addAll, TreeSet::addAll) 1128 .stream () 1129 .map ((String robotParam) -> Robots.getRobotNOMHE(robotParam)) 1130 .filter ((Robots robot) -> robot != null) 1131 .collect (Collectors.toCollection(Vector<Robots>::new)); 1132 } 1133 1134 /** 1135 * This method will extract any / all HTML 1136 * <B STYLE='color: red;'>{@code <META NAME='keywords' ...>}</B> Meta-Tags, and then extract 1137 * the relevant page key-words. These key-words will be returned as a Java 1138 * {@code String-Vector}. 1139 * 1140 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1141 * 1142 * @return The list of words that were stored in the 'keywords' HTML Meta-Tags. If there 1143 * were no keywords in any {@code 'KEYWORDS'} Meta-Tags, then an empty Java 1144 * {@code String[]}-Array is returned. 1145 * 1146 * <BR /><BR /><B CLASS=JDDescLabel>Java Stream's Utility:</B> 1147 * 1148 * <BR />If the code below looks complicated, Java's Streams-Package does have a tendency 1149 * to make <I>simple things look difficult</I>. However, once the {@code Stream}-Methods 1150 * are understood, it's usually pretty useful for actually being very concise. 1151 * 1152 * <BR /><BR /><OL CLASS=JDOL> 1153 * <LI> Get all HTML {@code <META name="keywords" content="...">} elements</LI> 1154 * 1155 * <LI> Extracts the {@code CONTENT}-Attribute, <I>and particularly the 1156 * <B STYLE='color: red;'>value</B> stored there</I> 1157 * </LI> 1158 * 1159 * <LI> Removes blanks, and {@code nulls}</LI> 1160 * <LI> Converts a {@code String[]} to {@code List<String>}</LI> 1161 * <LI> Collects all the List<String> into a single java String-Array</LI> 1162 * </OL> 1163 * 1164 * @see #insertKeyWords(Vector, String[]) 1165 * @see #keyWordsMetaTag 1166 * @see TagNode 1167 * @see TagNode#AV(String) 1168 * @see StrCSV#CSV(String) 1169 */ 1170 public static String[] getAllKeyWords(Vector<? extends HTMLNode> html) 1171 { 1172 // Java Streams here both filter irrelevant meta tags, and also convert the type from 1173 // TagNode to String... using the 'map' function. Ultimately, those strings are 1174 // 'collected' into the returned vector. 1175 // ALSO SALIENT: the "Arrays.asList" produces an array of string, and the "::addAll" 1176 // puts each separate String into the returned Vector. 1177 1178 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 1179 // <META name="keywords" ...> 1180 // 1181 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1182 // String-Comparison 1183 // Trim the 'name' Attribute-Value String of possible leading & trailing 1184 // White-Space before performing the comparison. 1185 1186 return InnerTagGet.all(html, "META", "NAME", TextComparitor.EQ_CI_TRM, "keywords") 1187 .stream () 1188 .map ((TagNode tn) -> tn.AV("content")) 1189 1190 .filter ((String contents) -> 1191 (contents != null) && (contents.trim().length() > 0)) 1192 1193 .map ((String contents) -> Arrays.asList(StrCSV.CSV(contents))) 1194 .collect (Vector::new, Vector::addAll, Vector::addAll) 1195 .stream () 1196 .toArray (String[]::new); 1197 } 1198 1199 /** 1200 * This method attempts to retrieve a {@code 'description'}-Property Meta-Tag out of an 1201 * HTML_Page. If no such Meta-Tag is found, then null is returned. 1202 * 1203 * <BR /><BR />If a partial Meta-Tag is found, but that tag is incomplete, then a 1204 * {@link MalformedHTMLException} will be thrown. 1205 * 1206 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1207 * 1208 * @return The content-description that has been extracted from the HTML Meta-Tag 1209 * <B STYLE='color: red;'>{@code <META NAME="description" CONTENT="the-description">}</B>. 1210 * 1211 * <BR /><BR />If this tag is not found, then null is returned. If this tag is found, but 1212 * does not posses a {@code CONTENT}-Attribute, then a {@code MalformedHTMLException} is 1213 * thrown. 1214 * 1215 * @throws MalformedHTMLException This is thrown if there are multiple definitions of the 1216 * {@code 'ROBOTS'} Meta-Tag. There ought to only be a single definition, and if multiple 1217 * are found, it would be better to identify why, and do the data-extraction manually. 1218 * 1219 * This is en-lieu of randomly picking one of them, and randomly returning one of the 1220 * Meta-Tag's {@code CONTENT}-Attribute <B STYLE='color: red;'>value</B>. 1221 * 1222 * <BR />This exception will also be thrown if proper-values for {@code 'index'} or 1223 * {@code 'follow'} are not found in the {@code CONTENT}-Attribute of the 1224 * {@code 'ROBOTS'} Meta-Tag. 1225 * 1226 * <BR /><BR />These are probably unlikely occurrences. This exception is a 1227 * Checked-Exception and must have a {@code try-catch} block or be declared thrown in your 1228 * method-declaration. 1229 * 1230 * @see #descriptionMetaTag 1231 * @see #insertDescription(Vector, String) 1232 * @see InnerTagGet 1233 */ 1234 public static String hasDescription(Vector<? extends HTMLNode> html) 1235 throws MalformedHTMLException 1236 { 1237 // InnerTagGet.all; Returns a vector of TagNode's that resemble: 1238 // <META NAME="description" ...> 1239 // 1240 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1241 // String-Comparison 1242 // Trim the 'name' Attribute-Value String of possible leading & trailing 1243 // White-Space before performing the comparison. 1244 1245 Vector<TagNode> v = InnerTagGet.all 1246 (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "description"); 1247 1248 if (v.size() == 0) return null; 1249 1250 if (v.size() > 1) throw new MalformedHTMLException( 1251 "You have asked for the value of the HTML 'description' <META ...> Tag, but " + 1252 "unfortunately there were multiple instances of this Tag on your page. " + 1253 "This is poorly formatted HTML, and not allowed here." 1254 ); 1255 1256 String s = v.elementAt(0).AV("CONTENT"); 1257 1258 if (s == null) throw new MalformedHTMLException( 1259 "An HTML Meta-Tag was found with a NAME-Attribute whose value was " + 1260 "'description,' but unfortunately this Meta-Tag did not posses a CONTENT-Attribute" 1261 ); 1262 1263 return s; 1264 } 1265 1266 /** 1267 * This helps identify Web-Sites & Web-Pages "author-names" to Web-Indexing and 1268 * Web-Search Organizations. 1269 * 1270 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1271 * 1272 * @return This returns the author's name of a Web-Page, as delineated in the 1273 * {@code 'AUTHOR'} Meta-Tag, or null if the Web-Page parameter {@code 'html'} does not 1274 * have an {@code 'AUTHOR'} Meta-Tag. 1275 * 1276 * @throws MalformedHTMLException If multiple {@code 'AUTHOR'} Meta-Tags are found, this 1277 * method is forced to throw an exception. It is necessary to avoid "picking a favorite 1278 * author among a list". 1279 * 1280 * <BR /><BR />HTML does not actually adhere to these exact requirements, so if there is 1281 * such a scenario with a page having multiple-authors, this method throws an exception in 1282 * order to avoid returning a {@code String[]}-Array or {@code Vector<String>} which would 1283 * be an alternative that would add unnecessary complexity. 1284 * 1285 * <BR /><BR />If this method throws this exception, it is better to know about it, and 1286 * just perform the search again, using a manual {@code 'AUTHOR'} retrieval. The code for 1287 * extracting these properties is, indeed listed directly at the bottom. 1288 * 1289 * @see #insertAuthor(Vector, String) 1290 * @see #authorMetaTag 1291 * @see TagNode#AV(String) 1292 */ 1293 public static String hasAuthor(Vector<? extends HTMLNode> html) 1294 throws MalformedHTMLException 1295 { 1296 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 1297 // <META name="author" ...> 1298 // 1299 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1300 // String-Comparison 1301 // Trim the 'name' Attribute-Value String of possible leading & trailing 1302 // White-Space before performing the comparison. 1303 1304 Vector<TagNode> v = InnerTagGet.all 1305 (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "author"); 1306 1307 if (v.size() > 1) throw new MalformedHTMLException( 1308 "This method has identified multiple author Meta-Tags. To handle this " + 1309 "situation, the search should be performed manually using InnerTagGet, with " + 1310 "your code deciding what to do about the HTML Web-Page having multiple 'author' " + 1311 "Meta-Tags." 1312 ); 1313 1314 // No HTML TagNode's were found that resembled <META NAME=author ...> 1315 if (v.size() == 0) return null; 1316 1317 // Just return the first one that was found, always check for 'null' first to 1318 // avoid the embarrassing NullPointerException. 1319 1320 String author = v.elementAt(0).AV("CONTENT"); 1321 1322 if (author == null) return null; 1323 1324 return author.trim(); 1325 } 1326 1327 1328 // **************************************************************************************** 1329 // **************************************************************************************** 1330 // Retrieve HTTP-EQUIV Meta-Tags 1331 // **************************************************************************************** 1332 // **************************************************************************************** 1333 1334 1335 /** 1336 * This method will find an HTML 1337 * <B STYLE='color: red;'>{@code <META HTTP-EQUIV=... CONTENT=...>}</B> element whose 1338 * {@code HTTP-EQUIV}-Attribute's <B STYLE='color: red;'>value</B> is equal to the 1339 * {@code String}-Parameter {@code 'httpEquiv'} (ignoring case). 1340 * 1341 * <BR /><BR />After such an HTML {@code META}-Tag has been identified, its 1342 * {@code CONTENT}-Attribute {@code String}-value will be subsequently queried, extracted 1343 * and returned by this method. 1344 * 1345 * <BR /><BR /><B CLASS=JDDescLabel>Returning null, Gracefully:</B> 1346 * 1347 * <BR />If the page provided does not have an HTML Meta-Tag with a {@code NAME}-Attribute 1348 * whose <B STYLE='color: red;'>value</B> is {@code 'name'} or if such an element is 1349 * identified, but that tag does not have a {@code CONTENT}-Attribute, then this method 1350 * will return null. 1351 * 1352 * <BR /><BR /><B CLASS=JDDescLabel>Case Insensitive Comparison:</B> 1353 * 1354 * <BR />Before the comparison is done with the {@code 'httpEquiv'} parameter, that 1355 * {@code String} is trimmed with {@code String.trim()}, and the comparison performed 1356 * <I>is done while ignoring case</I>. 1357 * 1358 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1359 * 1360 * @param httpEquiv The Attribute-<B STYLE='color: red;'>name</B> of the 1361 * {@code HTTP-EQUIV}-Attribute. 1362 * 1363 * @return The {@code String}-value of the {@code CONTENT}-Attribute for a 1364 * {@code META}-Tag whose {@code HTTP-EQUIV}-Attribute is equal to the specified name 1365 * provided by parameter {@code 'httpEquiv'}. 1366 * 1367 * <BR /><BR />If no such tag is found on the page, then this method shall return null. 1368 */ 1369 public static String getHTTPEquiv(Vector<HTMLNode> html, String httpEquiv) 1370 { 1371 // Find the first <META HTTP-EQUIV=... CONTENT=...> tag element where the name equals 1372 // the string-value provided by parameter 'httpEquiv'. 1373 1374 TagNode tn = InnerTagGet.first 1375 (html, "META", "HTTP-EQUIV", TextComparitor.EQ_CI, httpEquiv.trim()); 1376 1377 // If there are no <META HTTP-EQUIV='httpEquiv' CONTENT=...> elements found on the 1378 // page, then this method returns null. 1379 1380 if (tn == null) return null; 1381 1382 // Return the string-value of the attribute 'content'. Note that if this 1383 // attribute isn't available, this method shall return 'null', gracefully. 1384 1385 return tn.AV("CONTENT"); 1386 } 1387 1388 /** 1389 * This method will find all HTML {@code HTTP-EQUIV}-Directives, and return them in a Java 1390 * {@code Properties} object. 1391 * 1392 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1393 * 1394 * @return An instance of {@code java.util.Properties} containing all 1395 * {@code HTTP-EQUIV}-Directives. If HTML-Page paramter {@code 'html'} does not have any 1396 * such Meta-Tags, then an empty {@code Properties} instance is returned, rather than null. 1397 * 1398 * @throws MalformedHTMLException If the page provided has multiple definitions for the 1399 * exact same {@code HTTP}-Header property, then this exception will throw. 1400 */ 1401 public static Properties getAllHTTPEquiv(Vector<HTMLNode> html) 1402 throws MalformedHTMLException 1403 { 1404 Properties ret = new Properties(); 1405 String prev = null; 1406 1407 // Find the first <META HTTP-EQUIV=... CONTENT=...> tag element where the name equals 1408 // the string-value provided by parameter 'httpEquiv'. 1409 1410 for (TagNode httpEquivTN : InnerTagGet.all(html, "META", "HTTP-EQUIV")) 1411 1412 if ((prev = (String) ret.put 1413 (httpEquivTN.AV("HTTP-EQUIV"), httpEquivTN.AV("CONTENT"))) != null) 1414 1415 throw new MalformedHTMLException( 1416 "This HTML Page has multiple Meta-Tag Definitions for the HTTP-" + 1417 "EQUIVALENT Property [" + httpEquivTN.AV("HTTP-EQUIV") + "].\n" + 1418 " " + prev + "\n" + 1419 "and " + httpEquivTN.AV("CONTENT") + '\n' 1420 ); 1421 1422 return ret; 1423 } 1424 1425 /** 1426 * This will detect whether a {@code UTF-8} HTML Meta-Tag is included on this page. Below 1427 * are examples of what such tags look like. 1428 * 1429 * <DIV CLASS="HTML">{@code 1430 * <meta http-equiv="content-type" content="text/html; charset=UTF-8"> 1431 * <meta charset="UTF-8"> 1432 * }</DIV> 1433 * 1434 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1435 * 1436 * @return {@code TRUE} If an appropriate HTML Meta-Tag identifying this page as a 1437 * {@code UTF-8} Character-Set Web-Site. will {@code FALSE} otherwise. 1438 * 1439 * @see #hasUTF8MetaTag(Vector) 1440 * @see #UTF8MetaTag 1441 * @see StrCmpr#containsAND_CI(String, String[]) 1442 * @see TagNode#AV(String) 1443 */ 1444 public static boolean hasUTF8MetaTag(Vector<? extends HTMLNode> html) 1445 { 1446 String s; 1447 1448 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 1449 // <META http-equiv="content-type" ...> 1450 // 1451 // EQ_CI_TRM: Check the 'http-equiv' Attribute-Value using a Case-Insensitive, 1452 // Equality String-Comparison 1453 // Trim the 'http-equiv' Attribute-Value String of possible leading & 1454 // trailing White-Space before performing the comparison. 1455 1456 Vector<TagNode> v = InnerTagGet.all 1457 (html, "META", "HTTP-EQUIV", TextComparitor.EQ_CI_TRM, "content-type"); 1458 1459 for (TagNode tn : v) 1460 if ((s = tn.AV("CONTENT")) != null) 1461 if (StrCmpr.containsAND_CI(s, "charset", "utf-8")) 1462 return true; 1463 1464 // InnerTagGet.aall retrieves all TagNode's that resemble <META charset="utf-8" ...> 1465 // EQ_CI_TRM: Equality-Test, Case-Insensitive, Trim any White-Space before 1466 // performing comparison. 1467 1468 v = InnerTagGet.all(html, "META", "CHARSET", TextComparitor.EQ_CI_TRM, "utf-8"); 1469 1470 for (TagNode tn : v) 1471 if ((s = tn.AV("CHARSET")) != null) 1472 if (StrCmpr.containsAND_CI(s, "utf-8")) 1473 return true; 1474 1475 return false; 1476 } 1477 1478 1479 // **************************************************************************************** 1480 // **************************************************************************************** 1481 // Insert NAME/Property Meta-Tags 1482 // **************************************************************************************** 1483 // **************************************************************************************** 1484 1485 1486 /** 1487 * This does a very simple insertion of an HTML Meta-Tag for a specific type, 1488 * Meta-Tags that have both a {@code NAME}-Attribute and a {@code CONTENT}-Attribute 1489 * set. 1490 * 1491 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1492 * 1493 * @param m This is any of the enumerated-types of specific Meta-Tag {@code NAME}-Attribute 1494 * & {@code CONTENT}-Attribute pair / combinations. 1495 * 1496 * @param contentAttributeValue This is the value that will be used to set the 1497 * <B STYLE='color: red;'>value</B> for the {@code CONTENT}-Attribute. 1498 * 1499 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1500 * 1501 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=contentAttributeValue 1502 * DATA-FILE-ID=FT_Q_EX> 1503 * 1504 * @see #metaTagName 1505 * @see #getAllMetaTagNames(Vector) 1506 * @see DotPair 1507 * @see TagNode 1508 */ 1509 public static void insertMetaTagName 1510 (Vector<HTMLNode> html, MetaTagName m, String contentAttributeValue) 1511 { 1512 // Builds and inserts a TagNode HTML Element that looks like: 1513 // <meta name='INSERT-NAME-STRING-HERE' content='INSERT-CONTENT-STRING-HERE'> 1514 1515 // Single Quotes are used, so the attribute-value may not contain single quotes. 1516 checkForSingleQuote(contentAttributeValue); 1517 1518 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1519 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1520 1521 if (header == null) throw new NodeNotFoundException 1522 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META NAME=... CONTENT=...> tag")); 1523 1524 // Build a <META> tag, as in the comment above 1525 TagNode metaTN = new TagNode 1526 ("<META NAME='" + m.name + "' CONTENT='" + contentAttributeValue + "'>"); 1527 1528 // Insert the meta-tag into the page. Put it at the top of the header, 1529 // just after <HEAD> 1530 1531 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1532 } 1533 1534 /** 1535 * This does an insertion of a list of HTML Meta-Tags from a java Hashtable of Meta-Tag 1536 * Name-Attribute / Content-Attribute pairs. All name-based Meta-Tags have both a 1537 * {@code NAME}-Attribute, and also a {@code CONTENT}-Attribute. 1538 * 1539 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1540 * 1541 * @param metaTags This is a hash-table of the enumerated-types of specific Meta-Tag Name 1542 * property/content pairs. 1543 * 1544 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1545 * 1546 * @throws QuotesException If any of the <B STYLE='color: red;'>values</B> from the 1547 * <B STYLE='color:red'>key-value</B> pair hash-table contain a {@code String} that has a 1548 * single-quotation mark, anywhere inside the it. 1549 * 1550 * @see #metaTagName 1551 * @see #getAllMetaTagNames(Vector) 1552 * @see #insertMetaTagName(Vector, MetaTagName, String) 1553 * @see TagNode 1554 */ 1555 public static void insertMetaTagNames 1556 (Vector<HTMLNode> html, Hashtable<MetaTagName, String> metaTags) 1557 { 1558 // Builds and inserts a TagNode HTML Element that looks like: 1559 // "<meta name='INSERT-NAME-STRING-HERE' content='INSERT-CONTENT-STRING-HERE'"; 1560 1561 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1562 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1563 1564 if (header == null) throw new NodeNotFoundException 1565 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META NAME=... CONTENT=...> tag")); 1566 1567 // Java Stream's can be addictive... It is an easier way to build a list. 1568 Stream.Builder<HTMLNode> b = Stream.builder(); 1569 b.accept(NEWLINE); 1570 1571 // Iterate the complete list of meta-tag names to insert 1572 for (MetaTagName m : metaTags.keySet()) 1573 { 1574 String contentAttributeValue = metaTags.get(m); 1575 checkForSingleQuote(contentAttributeValue); 1576 1577 // Build the new node 1578 TagNode metaTN = new TagNode 1579 ("<META NAME='" + m.name + "' CONTENT='" + contentAttributeValue + "'>"); 1580 1581 b.accept(metaTN); b.accept(NEWLINE); 1582 } 1583 1584 // Insert the meta-tag names into the page. Put it at the top of the header, 1585 // just after <HEAD> 1586 1587 Util.insertNodes(html, header.start + 1, b.build().toArray(HTMLNode[]::new)); 1588 } 1589 1590 1591 // **************************************************************************************** 1592 // **************************************************************************************** 1593 // Insert **SPECIFIC** NAME/Property Meta-Tags 1594 // **************************************************************************************** 1595 // **************************************************************************************** 1596 1597 1598 /** 1599 * One common HTML Meta-Tag is the one which informs Google & Yahoo (and all 1600 * search-engine sites) which of your pages you would like to be indexed by their search 1601 * engine, and which pages you would like to not be indexed. Worrying about what Google 1602 * does and does not index may seem daunting, but this meta-tag can prevent certain 1603 * behaviors. 1604 * 1605 * <BR /><BR />The {@code 'ROBOTS'} Meta-Tag informs Search-Engines which pages on your 1606 * site should be indexed. This Meta-Tag serves a similar purpose to a {@code 'robots.txt'} 1607 * File. It is generally used to prevent a Search-Engine from indexing individual pages, 1608 * while {@code 'robots.txt'} is used to prevent the search from indexing a whole site or 1609 * section of a site. 1610 * 1611 * <BR /><BR />A {@code 'ROBOTS'} Meta-Tag which instructs the Search-Engine Crawler not to 1612 * index a page, or follow any links on it, would be written as below. 1613 * 1614 * <DIV CLASS="HTML">{@code 1615 * <meta name="robots" content="noindex, nofollow" /> 1616 * <meta name="robots" content="index, follow" /> 1617 * }</DIV> 1618 * 1619 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1620 * 1621 * @param index This is a {@code boolean}-Parameter that when set to {@code TRUE} will 1622 * force this method to place an {@code INDEX-String} into the finally-exported HTML 1623 * element. If {@code FALSE} is passed, then a {@code NOINDEX-String} will be put into the 1624 * HTML-Tag. 1625 * 1626 * @param follow This is also a {@code boolean}-Parameter. When {@code TRUE} this will 1627 * force the method to put a {@code FOLLOW-String} into the finally-exported HTML-Tag. 1628 * When {@code FALSE}, then a {@code 'NOFOLLOW'} will be inserted. 1629 * 1630 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1631 * 1632 * @see #robotsMetaTag 1633 * @see #getAllRobots(Vector) 1634 * @see #getAllRobotsNOMHE(Vector) 1635 * @see TagNode 1636 */ 1637 public static void insertRobots(Vector<HTMLNode> html, boolean index, boolean follow) 1638 { 1639 // Builds a robots meta tag. These are used by google and search engines 1640 // <meta NAME=robots content='INSERT-CONTENT-STRING-HERE' /> 1641 1642 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1643 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1644 1645 if (header == null) throw new NodeNotFoundException 1646 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Robots <META ... > Tag")); 1647 1648 // Build a 'robots' TagNode 1649 TagNode robotsTN = new TagNode( 1650 "<META NAME=robots CONTENT='" + 1651 (index ? "index" : "noindex") + ", " + (follow ? "follow" : "nofollow") + 1652 "' >" 1653 ); 1654 1655 // Insert the robots-tag into the page. 1656 // Put it at the top of the header, just after <HEAD> 1657 1658 Util.insertNodes(html, header.start + 1, NEWLINE, robotsTN, NEWLINE); 1659 } 1660 1661 /** 1662 * This will add an HTML Meta-Tag with a 1663 * <B STYLE='color: red;'>{@code <META NAME=robots>}</B> 1664 * 1665 * <BR /><BR /><B CLASS=JDDescLabel>Validity Check Warning:</B> 1666 * 1667 * <BR />This method avoids all presumed <I><B>validity check,</B></I> primarily because 1668 * making an attempt to identify what is absolutely correct or not-correct seems a little 1669 * far-fetched. 1670 * 1671 * <BR /><BR />Although the number of actual values the {@code ROBOTS}-Attribute may 1672 * contain is very low, throwing a {@code MalformedHTMLException} for some errors, while 1673 * ignoring others was decided to best avoid during this method's development. 1674 * 1675 * <BR /><BR /><B CLASS=JDDescLabel:>Aside:</B> 1676 * 1677 * <BR />If a programmer were to pass both the {@link Robots#Follow} and the 1678 * {@link Robots#NoFollow} Enum-Constants, both of these tags would be inserted into an 1679 * HTML {@code 'robots'} Meta-Tag without any kind of warning or exception throw. 1680 * 1681 * <BR /><BR />This, clearly, would be a faulty HTML directive, though. 1682 * 1683 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1684 * 1685 * @param rArr This is an array of the Enumerated-Type {@link Robots}. It may contain a 1686 * list of any number of the items available to add into an HTML Meta-Tag's 1687 * {@code ROBOTS}-Attribute. If any of the array elements are null, they will be skipped 1688 * and ignored. 1689 * 1690 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1691 * 1692 * @see #robotsMetaTag 1693 * @see #getAllRobots(Vector) 1694 * @see #insertRobots(Vector, boolean, boolean) 1695 * @see StrCSV#toCSV(Object[], IntTFunction, boolean, Integer) 1696 * @see DotPair 1697 */ 1698 public static void insertRobots(Vector<HTMLNode> html, Robots... rArr) 1699 { 1700 // Builds a series-of-robots meta tag. These are used by google and search engines 1701 // <meta NAME=robots content='INSERT-CONTENT-STRING-HERE' /> 1702 1703 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1704 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1705 1706 if (header == null) throw new NodeNotFoundException 1707 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Robots <META ... > Tag")); 1708 1709 String robotsStr = StrCSV.toCSV(rArr, (int i, Robots r) -> r.name, false, null); 1710 1711 // Build the <META> TagNode 1712 TagNode robotsTN = new TagNode("<META NAME=robots CONTENT='" + robotsStr + "'>"); 1713 1714 // Insert the robots-tag into the page. Put it at the top of the header, just 1715 // after <HEAD> 1716 1717 Util.insertNodes(html, header.start + 1, NEWLINE, robotsTN, NEWLINE); 1718 } 1719 1720 /** 1721 * Another common HTML {@code META}-Tag is the one that provides a brief description of 1722 * the page in question. This method facilitates adding a Meta-Tag that contains two 1723 * attributes: 1724 * 1725 * <BR /><BR /><OL CLASS=JDUL> 1726 * <LI> {@code NAME}-Attribute whose <B STYLE='color: red;'>value</B> must be 1727 * {@code 'description'} 1728 * </LI> 1729 * 1730 * <LI> {@code CONTENT}-Attribute whose <B STYLE='color: red;'>value</B> should be a brief 1731 * textual description of the content of the page 1732 * </LI> 1733 * </OL> 1734 * 1735 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1736 * 1737 * @param description This is a textual-description of the Web-Page to which this HTML 1738 * <B STYLE='color: red;'>{@code <META NAME=description CONTENT='...'}</B> Tag is being 1739 * added. If Google or any of the other Internet Search Sites, return your Web-Page as a 1740 * part of a search-results, this description is usually used. 1741 * 1742 * <BR /><BR />Furthermore, the key-words that are listed here are some-how (in a way that 1743 * is not-knownst to this programmer) used in indexing your particular page in the 1744 * search-algorithms. 1745 * 1746 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1747 * 1748 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=description 1749 * DATA-FILE-ID=FT_Q_EX> 1750 * 1751 * @see #descriptionMetaTag 1752 * @see #hasDescription(Vector) 1753 * @see #checkForSingleQuote(String) 1754 * @see TagNode 1755 */ 1756 public static void insertDescription(Vector<HTMLNode> html, String description) 1757 { 1758 // Meta-Tag for Descriptions. This will be inserted into the HTML page. 1759 // <meta NAME=description content='INSERT-DESCRIPTION-OR-KEYWORDS-HERE'> 1760 1761 checkForSingleQuote(description); 1762 1763 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1764 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1765 1766 if (header == null) throw new NodeNotFoundException 1767 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Description <META ... > Tag")); 1768 1769 // Build the Meta Tag for a description to google and search engines 1770 TagNode metaTN = new TagNode 1771 ("<META NAME=description CONTENT='" + description + "'>"); 1772 1773 // Insert the description-tag into the page. Put it at the top of the header, 1774 // just after <HEAD> 1775 1776 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1777 } 1778 1779 /** 1780 * This will attempt to insert key-words into an HTML Meta-Tag. This is usually used to 1781 * summarize-explain 'main-points' that a Web-Page author wants to make to any 1782 * search-engineer or any-listener on the internet about the Web-Page that includes such a 1783 * Meta-Tag. 1784 * 1785 * <BR /><BR /><B CLASS=JDDescLabel>Validity Checking:</B> 1786 * 1787 * <BR />This method does a few minor validity checks regarding the content inside of a 1788 * description keyword. All it does is look for things like White-Space and a few 1789 * punctuation rules. If either of these problems occur inside any of the key-words 1790 * provided to the {@code 'keyWords'} Var-Args Parameter, then an 1791 * {@code IllegalArgumentException} is thrown. 1792 * 1793 * <BR /><BR /><B CLASS=JDDescLabel>Disallowed Punctuation:</B> 1794 * 1795 * <BR />This list of disallowed punctuation marks for the key-words are as processed as 1796 * follows: 1797 * 1798 * <DIV CLASS="SNIP">{@code 1799 * if (StrCmpr.containsOR 1800 * (keyWord, ";", ",", "'", "\"", "!", "#", "<", ">", 1801 * "(", ")", "*", "/", "\\") 1802 * ) 1803 * throw new IllegalArgumentException(...); 1804 * }</DIV> 1805 * 1806 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1807 * 1808 * @param keyWords This is a list of germane key-words that help identify, indicate or 1809 * describe the content of the Web-Page in which they are placed. 1810 * 1811 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1812 * 1813 * @throws IllegalArgumentException If any of the key-words provided to the Java Var-Args 1814 * {@code 'keyWords'} parameter contain invalid punctuation characters, or white-space. 1815 * 1816 * @see #keyWordsMetaTag 1817 * @see #getAllKeyWords(Vector) 1818 * @see StringParse#hasWhiteSpace(String) 1819 * @see StrCmpr#containsOR(String, String[]) 1820 * @see StrCSV#toCSV(String[], boolean, boolean, Integer) 1821 */ 1822 public static void insertKeyWords(Vector<HTMLNode> html, String... keyWords) 1823 { 1824 // The meta-tag for key-words. Search Engines look for these key-words when indexing 1825 // <meta NAME=keywords content='INSERT-COMMA-SEPARATED-KEYWORDS-HERE'> 1826 1827 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1828 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1829 1830 if (header == null) throw new NodeNotFoundException 1831 (NO_HEADER_MESSAGE.replace("INSERT-STR", "KeyWords Meta-Tag")); 1832 1833 for (String keyWord : keyWords) if (StringParse.hasWhiteSpace(keyWord)) 1834 1835 throw new IllegalArgumentException( 1836 "You have tried to insert keywords into an HTML Meta-Tag KeyWord-{roperty, " + 1837 "but unfortunately one of the words provided [" + keyWord + "] contains " + 1838 "white-space. This is not allowed here." 1839 ); 1840 1841 1842 for (String keyWord : keyWords) 1843 1844 if (StrCmpr.containsOR 1845 (keyWord, ";", ",", "'", "\"", "!", "<", ">", "(", ")", "*", "/", "\\")) 1846 1847 throw new IllegalArgumentException( 1848 "You have tried to insert keywords into an HTML Meta-Tag KeyWords-" + 1849 "Property, but unfortunately one of the words provide [" + keyWord + "] " + 1850 "contains error-prone punctuation, and cannot be used here." 1851 ); 1852 1853 // All this does is build a list - Comma Separated values. 1854 String listAsString = StrCSV.toCSV(keyWords, true, false, null); 1855 1856 // Build the TagNode, it will contain all key-words listed in the input var-args 1857 // String array 1858 1859 TagNode metaTN = new TagNode("<META NAME=keywords CONTENT='" + listAsString + "'>"); 1860 1861 // Insert the tag into the page. Put it at the top of the header, just after <HEAD> 1862 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1863 } 1864 1865 /** 1866 * This method will insert an "author" HTML Meta-Tag into the 1867 * <B STYLE='color: red;'>{@code <HEAD> ... </HEAD>}</B> section of this page. 1868 * 1869 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1870 * @param author This is the author of this Web-Page. 1871 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1872 * 1873 * @throws QuotesException If the author's name prevents the HTML-Engine from building any 1874 * version of an {@code AUTHOR} Meta-Tag. This will happen, certainly, if the author's 1875 * name-{@code String} contains <I><B>both</B></I> a single <I><B>and</B></I> a double 1876 * quote. 1877 * 1878 * <BR /><BR />Choose either the single-quote, or the double. Do not use both, or this 1879 * exception will throw. 1880 * 1881 * <BR /><BR /><B><SPAN STYLE="color: red;">MOST IMPORTANT</B></SPAN> Most author's names 1882 * don't have any quotes at all! Checking for these things prevents unexplainable 1883 * exceptions later on. 1884 * 1885 * @see #authorMetaTag 1886 * @see #hasAuthor(Vector) 1887 * @see SD 1888 * @see DotPair 1889 */ 1890 public static void insertAuthor(Vector<HTMLNode> html, String author) 1891 { 1892 // The 'Author' Meta tag shall be inserted into the html page. 1893 // <meta NAME=author content='INSERT-AUTHOR-NAME-HERE'> 1894 1895 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1896 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1897 1898 if (header == null) throw new NodeNotFoundException 1899 (NO_HEADER_MESSAGE.replace("INSERT-STR", "author meta-tag")); 1900 1901 if ((author.indexOf("'") != -1) && (author.indexOf("\"") != -1)) 1902 1903 throw new QuotesException( 1904 "The author string provided here contains both a single-quote and a double-" + 1905 "quote, but this cannot be inserted into any HTML-Tag. Please remove " + 1906 "one or the other." 1907 ); 1908 1909 // Use the more complicated TagNode constructor to build the "author" tag. 1910 SD quote = (author.indexOf("'") == -1) ? SD.SingleQuotes : SD.DoubleQuotes; 1911 Properties p = new Properties(); 1912 1913 p.put("NAME", "author"); 1914 p.put("CONTENT", author); 1915 1916 // This constructor accepts a properties instance. 1917 TagNode authorTN = new TagNode("META", p, quote, true); 1918 1919 // Insert the tag into the page. Put it at the top of the header, just after <HEAD> 1920 Util.insertNodes(html, header.start + 1, NEWLINE, authorTN, NEWLINE); 1921 } 1922 1923 1924 // **************************************************************************************** 1925 // **************************************************************************************** 1926 // Insert HTTP-EQUIV Meta-Tags 1927 // **************************************************************************************** 1928 // **************************************************************************************** 1929 1930 1931 /** 1932 * This does a very simple insertion of an HTML Meta-Tag for a specific type, 1933 * Meta-Tags that have a {@code HTTP-EQUIV}-Attribute paired with a 1934 * {@code CONTENT}-Attribute. 1935 * 1936 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1937 * 1938 * @param httpEquiv This is the property that is passed using the 1939 * {@code HTTP-EQUIV}-Attribute. 1940 * 1941 * @param contentAttributeValue This is the value that will be used to set the 1942 * {@code CONTENT}-Attribute. 1943 * 1944 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1945 * 1946 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=contentAttributeValue 1947 * DATA-FILE-ID=FT_Q_EX> 1948 * 1949 * @see #metaTagHTTPEquiv 1950 * @see #getHTTPEquiv(Vector, String) 1951 * @see DotPair 1952 * @see TagNode 1953 */ 1954 public static void insertHTTPEquiv 1955 (Vector<HTMLNode> html, String httpEquiv, String contentAttributeValue) 1956 { 1957 // Builds and inserts a TagNode HTML Element that looks like: 1958 // <meta http-equiv='INSERT-HTTP-EQUIV-STRING-HERE' 1959 // content='INSERT-CONTENT-STRING-HERE' > 1960 1961 // Single Quotes are used, so the attribute-value may not contain single quotes. 1962 checkForSingleQuote(contentAttributeValue); 1963 1964 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1965 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1966 1967 if (header == null) throw new NodeNotFoundException 1968 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META HTTP-EQUIV=... CONTENT=...> Tag")); 1969 1970 // Build a <META> tag, as in the comment above 1971 TagNode metaTN = new TagNode 1972 ("<META HTTP-EQUIV='" + httpEquiv + "' CONTENT='" + contentAttributeValue + "'>"); 1973 1974 // Insert the meta-tag into the page. Put it at the top of the header, 1975 // just after <HEAD> 1976 1977 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1978 } 1979 1980 /** 1981 * The method will insert a {@code UTF-8} Meta-Tag that identifies the HTML-Page to any 1982 * Web-Browser that attempts to render its content as containing Foreign-Language 1983 * Characters, Emoji's & other non-{@code ASCII} Glyphs. 1984 * 1985 * <BR /><BR />{@code UTF-8} text utilizes/makes-use-of characters in a higher 1986 * {@code 'byte-range'} than the traditional <I>single-byte (256 different-characters) ASCII</I> 1987 * Character-Set. {@code UTF-8} allows for Chinese, Japanese and just about every variant of 1988 * language in the rest of the world. 1989 * 1990 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1991 * 1992 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1993 * 1994 * @see #hasUTF8MetaTag(Vector) 1995 * @see #UTF8MetaTag 1996 * @see TagNode 1997 * @see DotPair 1998 */ 1999 public static void insertUTF8MetaTag(Vector<HTMLNode> html) 2000 { 2001 // Meta-Tag to assert that the UTF-8 Charset is being used: 2002 // <meta http-equiv='Content-Type' content='text/html; charset=utf-8' /> 2003 2004 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 2005 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 2006 2007 if (header == null) throw new NodeNotFoundException 2008 (NO_HEADER_MESSAGE.replace("INSERT-STR", "UTF-8 <META> Tag")); 2009 2010 // Insert the UTF-8 tag into the page. Put it at the top of the header, just 2011 // after <HEAD> 2012 2013 Util.insertNodes(html, header.start + 1, NEWLINE, new TagNode(UTF8MetaTag), NEWLINE); 2014 } 2015 2016 2017 // **************************************************************************************** 2018 // **************************************************************************************** 2019 // ITEMPROP Meta-Tags 2020 // **************************************************************************************** 2021 // **************************************************************************************** 2022 2023 2024 /** 2025 * This does a very simple insertion of an HTML Meta-Tag for a specific type, 2026 * Meta-Tags that have an {@code ITEMPROP}-Attribute paired with a 2027 * {@code CONTENT}-Attribute set. 2028 * 2029 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 2030 * 2031 * @param itemProp This is a property that is passed via the {@code ITEMPROP}-Attribute 2032 * 2033 * @param contentAttributeValue This is the value that will be used to set the 2034 * {@code CONTENT}-Attribute 2035 * 2036 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 2037 * 2038 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=contentAttributeValue 2039 * DATA-FILE-ID=FT_Q_EX> 2040 * 2041 * @see #metaTagItemProp 2042 * @see #getItemProp(Vector, String) 2043 * @see DotPair 2044 * @see TagNode 2045 */ 2046 public static void insertItemProp 2047 (Vector<HTMLNode> html, String itemProp, String contentAttributeValue) 2048 { 2049 // Builds and inserts a TagNode HTML Element that looks like: 2050 // <meta itemprop='INSERT-ITEMPROP-STRING-HERE' content='INSERT-CONTENT-STRING-HERE' > 2051 2052 // Single Quotes are used, so the attribute-value may not contain single quotes. 2053 checkForSingleQuote(contentAttributeValue); 2054 2055 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 2056 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 2057 2058 if (header == null) throw new NodeNotFoundException 2059 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META ITEMPROP=... CONTENT=...> tag")); 2060 2061 // Build a <META> tag, as in the comment above 2062 TagNode metaTN = new TagNode 2063 ("<META ITEMPROP='" + itemProp + "' CONTENT='" + contentAttributeValue + "'>"); 2064 2065 // Insert the meta-tag into the page. Put it at the top of the header, 2066 // just after <HEAD> 2067 2068 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 2069 } 2070 2071 /** 2072 * This method will find an HTML 2073 * <B STYLE='color: red;'>{@code <META ITEMPROP=... CONTENT=...>}</B> element whose 2074 * {@code ITEMPROP}-Attribute <B STYLE='color: red;'>value</B> is equal to the 2075 * {@code String}-parameter {@code 'itemProp'} (ignoring case). 2076 * 2077 * <BR /><BR />After such an HTML {@code META}-Tag has been identified, its 2078 * {@code CONTENT}-Attribute {@code String}-value will be subsequently queried, extracted 2079 * and returned by this method. 2080 * 2081 * <BR /><BR /><B CLASS=JDDescLabel>Returning null, Gracefully:</B> 2082 * 2083 * <BR />If the page provided does not have an HTML Meta-Tag with a {@code NAME}-Attribute 2084 * whose <B STYLE='color: red;'>value</B> is {@code 'name'} or if such an element is 2085 * identified, but that tag does not have a {@code CONTENT}-Attribute, then this method 2086 * will return null. 2087 * 2088 * <BR /><BR /><B CLASS=JDDescLabel>Case Insensitive Comparison:</B> 2089 * 2090 * <BR />Before the comparison is done with the {@code 'itemProp'} parameter, that 2091 * {@code String} is trimmed with {@code String.trim()}, and the comparison performed 2092 * <I>is done while ignoring case</I>. 2093 * 2094 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 2095 * 2096 * @param itemProp The Attribute-<B STYLE='color: red;'>name</B> of the 2097 * {@code ITEMPROP}-Attribute. 2098 * 2099 * @return The {@code String}-value of the {@code CONTENT}-Attribute for a 2100 * {@code META}-Tag whose {@code ITEMPROP}-Attribute is equal to the specified name 2101 * provided by parameter {@code 'itemProp'}. 2102 * 2103 * <BR /><BR />If such information is not found on the page, then this method returns null. 2104 */ 2105 public static String getItemProp(Vector<HTMLNode> html, String itemProp) 2106 { 2107 // Find the first <META ITEMPROP=... CONTENT=...> tag element where the name equals 2108 // the string-value provided by parameter 'itemProp'. 2109 2110 TagNode tn = InnerTagGet.first 2111 (html, "META", "ITEMPROP", TextComparitor.EQ_CI, itemProp.trim()); 2112 2113 // If there are no <META ITEMPROP='itemProp' CONTENT=...> elements found on the page, 2114 // then this method returns null. 2115 2116 if (tn == null) return null; 2117 2118 // Return the string-value of the attribute 'content'. Note that if this 2119 // attribute isn't available, this method shall return 'null', gracefully. 2120 2121 return tn.AV("content"); 2122 } 2123 2124 2125 // **************************************************************************************** 2126 // **************************************************************************************** 2127 // Open-Graph Meta-Tags 2128 // **************************************************************************************** 2129 // **************************************************************************************** 2130 2131 2132 /** 2133 * This will insert a single Open-Graph Meta-Tag into an HTML-Page. 2134 * 2135 * <BR /><BR /><B CLASS=JDDescLabel>Prepending <CODE>'og:'</CODE></B> 2136 * 2137 * <BR />The name of the property <I><B>MUST NOT</B></I> begin with the characters 2138 * {@code "og:"}, because they will be prepended when the HTML 2139 * <B STYLE='color: red;'>{@code <META PROPERTY='...' CONTENT='...' />}</B> Tag is 2140 * instantiated. 2141 * 2142 * <BR /><BR />Please review <I>exact</I> method body below. 2143 * 2144 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 2145 * 2146 * @param ogProperty This is the name of the Open-Graph protocol property that is being 2147 * inserted. Generally these are simple text-{@code String's} with alphanumeric-limited 2148 * names, or they are series of alphanumeric text-{@code String's}, separated by a period 2149 * {@code '.'} character. 2150 * 2151 * @param ogValueAsStr If you look at the definition of the {@link #openGraphMetaTag} above 2152 * in this class, you may view all of the acceptable types that Open-Graph Properties may 2153 * use. 2154 * 2155 * <BR /><BR />Whichever property or field that is being inserted, mostly, the field must 2156 * be converted to a {@code String} when being passed to this method. 2157 * 2158 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 2159 * 2160 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM1=ogProperty 2161 * DATA-PARAM2=ogValueAsStr DATA-FILE-ID=FT_Q_EX_DOUBL> 2162 * 2163 * @see #openGraphMetaTag 2164 * @see #getAllOGMetaTags(Vector) 2165 * @see #checkForSingleQuote(String) 2166 * @see TagNode 2167 */ 2168 public static void insertOGMetaTag 2169 (Vector<HTMLNode> html, String ogProperty, String ogValueAsStr) 2170 { 2171 // Open graph tag looks like this: 2172 // <meta property='og:INSERT-OG-PROPERTY-HERE' content='INSERT-OG-VALUE-HERE' /> 2173 2174 checkForSingleQuote(ogProperty); 2175 checkForSingleQuote(ogValueAsStr); 2176 2177 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 2178 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 2179 2180 if (header == null) throw new NodeNotFoundException( 2181 NO_HEADER_MESSAGE.replace 2182 ("INSERT-STR", "Open-Graph <META NAME='og:...' ...> Tag") 2183 ); 2184 2185 // Build the Open-Graph Meta Tag 2186 TagNode metaTN = new TagNode 2187 ("<META PROPERTY='og:" + ogProperty+ "' CONTENT='" + ogValueAsStr +"'>"); 2188 2189 // Insert the tag into the page. Put it at the top of the header, just after <HEAD> 2190 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 2191 } 2192 2193 /** 2194 * This will search any Vectorized HTML-Pge for 2195 * <B STYLE='color: red;'>{@code <META PROPERTY='og:...' CONTENT='...'>}</B> Tags, and 2196 * retrieve them for placement into a {@code java.util.Properties} table. 2197 * 2198 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 2199 * 2200 * @return This will return a Java {@code 'Properties'} Object, with all Open-Graph 2201 * properties saved inside. 2202 * 2203 * @see #openGraphMetaTag 2204 * @see #insertOGMetaTag(Vector, String, String) 2205 * @see TagNode#AV(String) 2206 * @see InnerTagGet 2207 */ 2208 public static Properties getAllOGMetaTags(Vector<? extends HTMLNode> html) 2209 { 2210 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 2211 // <META property="og:..." ...> 2212 // 2213 // SW_CI_TRM: Check the 'property' Attribute-Value using a Case-Insensitive, 2214 // 'Starts-With' String-Comparison 2215 // Trim the 'property' Attribute-Value String of possible leading & 2216 // trailing White-Space before performing the comparison. 2217 2218 Vector<TagNode> v = InnerTagGet.all 2219 (html, "META", "PROPERTY", TextComparitor.SW_CI_TRM, "og:"); 2220 2221 Properties ret = new Properties(); 2222 2223 for (TagNode tn : v) 2224 ret.put(tn.AV("PROPERTY").substring(3), tn.AV("CONTENT")); 2225 2226 return ret; 2227 } 2228 } 2229}