001package Torello.HTML; 002 003import java.util.*; 004import java.util.stream.*; 005 006import Torello.HTML.NodeSearch.*; 007import Torello.Java.*; 008 009import Torello.Java.Additional.Ret2; 010import Torello.HTML.Tools.Images.IF; 011 012/** 013 * Tools to retrieve and insert tags into the {@code <HEAD>} of a web-page. 014 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES> 015 */ 016@Torello.JavaDoc.StaticFunctional 017public class Features 018{ 019 private Features() { } 020 021 /** Error Message that is used repeatedly. */ 022 public static final String NO_HEADER_MESSAGE = 023 "You are attempting to insert an HTML INSERT-STR, but such an element belongs in the " + 024 "page's header. Unfortunately, the page or sub-page you have passed does not have a " + 025 "<HEAD>...</HEAD> sub-section. Therefore, there is no place to insert the elements."; 026 027 /** 028 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 029 * {@code <HEAD> ... </HEAD>}</B> section to add a "logo-image" at the top-left corner of the 030 * Web-Browser's tab for the page when it loads. This logo is called a {@code 'favicon'}. 031 * 032 * @see #insertFavicon(Vector, String) 033 * @see #hasFavicon(Vector) 034 */ 035 public static final String favicon = 036 "<LINK REL='icon' TYPE='image/INSERT-IMAGE-TYPE-HERE' HREF='INSERT-URL-STRING-HERE' />"; 037 038 /** 039 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 040 * {@code <HEAD> ... </HEAD>}</B> section to add a <B>Cascading Style Sheet</B> (a 041 * {@code '.css'} file) to your page. 042 * 043 * <BR /><BR />The web-browser that ultimately loads the HTML that you are exporting will 044 * render the style elements across all the HTML elements in your page that match their 045 * respective CSS-Selectors. Without going into a big diatribe about how CSS works, just know 046 * that the {@code String} used to build / instantiate a new {@link TagNode} with an externally 047 * linked {@code CSS}-Page is provided here, by this field. 048 * 049 * @see #insertCSSLink(Vector, String) 050 * @see #getAllCSSLinks(Vector) 051 */ 052 public static final String cssExternalSheet = 053 "<LINK REL=stylesheet TYPE='text/css' HREF='INSERT-URL-STRING-HERE' />"; 054 055 /** 056 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 057 * {@code <HEAD> ... </HEAD>}</B> section to add a <B>Cascading Style Sheet</B> (a 058 * {@code '.css'} file) to your page. This particular {@code String}-Constant Field includes / 059 * allows for a {@code MEDIA}-Attribute / Inner-Tag. 060 * 061 * @see #insertCSSLink(Vector, String) 062 * @see #insertCSSLink(Vector, String, String) 063 * @see #getAllCSSLinks(Vector) 064 */ 065 public static final String cssExternalSheetWithMediaAttribute = 066 "<LINK REL=stylesheet TYPE='text/css' HREF='INSERT-URL-STRING-HERE' " + 067 "MEDIA='INSERT-MEDIA-ATTRIBUTE-VALUE-HERE' />"; 068 069 /** 070 * This {@code String} may be inserted in the HTML <B STYLE='color: red;'> 071 * {@code <HEAD> ... </HEAD>}</B> section to add an externally-linked 072 * <B>Java-Script File</B> ({@code '.js'} File) to your page. 073 * 074 * <BR /><BR />The Web-Browser will download this <B>Java-Script</B> page from the 075 * {@code URL} that you ultimately provide and (hopefully) load all your variable definitions 076 * and methods when the page loads. 077 * 078 * <BR /><BR /><B CLASS=JDDescLabel>Closing {@code </SCRIPT>} Tag:</B> 079 * 080 * <BR />Inserting an external <B>Java-Script</B> Page has one important difference vis-a-vis 081 * inserting an external CSS-Page. Inserting a link to a {@code '.js'} page requires 082 * <B><I>both</I></B> the opening 083 * <B STYLE='color: red;'>{@code <SCRIPT ..>}</B> <B><I>and</I></B> the closing 084 * <B STYLE='color: red;'>{@code </SCRIPT>}</B> 085 * Tags. 086 * 087 * <BR /><BR />This is expected and required even-when / especially-when there is no actual 088 * java-script code being placed on the {@code '.html'} page itself. Effectively, regardless 089 * of whether you are putting actual java-script code into / inside your HTML page, or you are 090 * just inserting a link to a {@code '.js'} File on your server - <I>you must always create 091 * both the open and the closed HTML 092 * <B STYLE='color: red;'>{@code <SCRIPT SRC='...'></SCRIPT>}</B> tags and insert them into 093 * your Vectorized-HTML Web-Page</I>. 094 * 095 * <BR /><BR />In the brief example below, it should be clear that even though the 096 * {@code SCRIPT}-Tags do not enclose any <B>Java-Script</B>, both the open and the closed 097 * versions of the tag are placed into the HTML-File. 098 * 099 * <DIV CLASS="HTML">{@code 100 * <!-- This is a short note about including the HTML SCRIPT element in your web-pages. --> 101 * <HTML> 102 * <HEAD> 103 * <!-- Version #1 Inserting a java-script 'variables & functions' external-page --> 104 * <SCRIPT TYPE='text/javascript' SRC='/script/javaScriptFiles/functions.js'> 105 * </SCRIPT> 106 * <!-- Right here (line above) we always need the closing Script-tag, even when there is no 107 * actual java-script present, and the methods/variables are going to be downloaded from 108 * the java-script file identified in by the SRC="..." attribute! --> 109 * 110 * <SCRIPT TYPE='text/javascript'> 111 * var someVar1; 112 * var someVar2; 113 * 114 * function someFunction() 115 * { return; } 116 * 117 * </SCRIPT> <!-- Either way, the closing-script tag is expected. --> 118 * }</DIV> 119 * 120 * @see #insertExternalJavaScriptLink(Vector, String) 121 * @see #getAllExternalJSLinks(Vector) 122 */ 123 public static final String javaScriptExternalPage = 124 "<SCRIPT TYPE='text/javascript' SRC='INSERT-URL-STRING-HERE'>"; 125 126 /** 127 * If you have pages on your site that are almost identical, then you may need to inform search 128 * engines which one to prioritize. Or you might have syndicated content on your site which was 129 * republished elsewhere. You can do both of these things without incurring a duplicate content 130 * penalty – as long as you use a {@code CANONICAL}-Tag. 131 * 132 * <BR /><BR />Instead of confusing Google and missing your ranking on the SERP's, you are 133 * guiding the crawlers as to which URL counts as the “main” one. This places the emphasis on 134 * the right URL and prevents the others from cannibalizing your SEO. 135 * 136 * <BR /><BR />Use {@code CANONICAL}-Tags to avoid having problems with duplicate content that 137 * may affect your rankings. 138 * 139 * <BR /><BR /><HR><BR /> 140 * 141 * The content of this Documentation Page was copied from a page on the web-domain 142 * {@code 'http://searchenginewatch.com'}. It was lifted on May 24th, 2019. 143 * 144 * <BR /><BR />See link below, if still valid: 145 * 146 * <BR /><A 147 * HREF="https://searchenginewatch.com/2018/04/04/a-quick-and-easy-guide-to-meta-tags-in-seo/"> 148 * https://searchenginewatch.com/2018/04/04/a-quick-and-easy-guide-to-meta-tags-in-seo/ </A> 149 * 150 * @see #insertCanonicalURL(Vector, String) 151 * @see #hasCanonicalURL(Vector) 152 */ 153 public static final String canonicalTag = 154 "<LINK REL=canonical HREF='INSERT-URL-STRING-HERE' />"; 155 156 /** This is a new-line {@code HTMLNode} */ 157 protected static final TextNode NEWLINE = new TextNode("\n"); 158 159 /** 160 * This method checks whether the {@code String}-Parameter {@code 's'} contains a 161 * Single-Quotations Punctuation-Mark anywhere inside that {@code String}. If so, a properly 162 * formatted exception is thrown. This is used as an internal Helper-Method. 163 * 164 * @param s This may be any Java {@code String}, but generally it is one used to insert into an 165 * HTML {@code CONTENT}-Attribute. 166 * 167 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=s DATA-FILE-ID=FT_Q_EX> 168 */ 169 protected static void checkForSingleQuote(String s) 170 { 171 int pos; 172 173 if ((pos = s.indexOf("'")) != -1) throw new QuotesException( 174 "The passed string-parameter may not contain a single-quote punctuation mark. " + 175 "Yours was: [" + s + "], and has a single-quotation mark at string-position " + 176 "[" + pos + "]" 177 ); 178 } 179 180 /** 181 * This inserts a favicon HTML link element into the right location so that a particular 182 * Web-Page will render an "browser icon image" into the top-left corner of the Web-Page's 183 * Browser-Tab. 184 * 185 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 186 * 187 * @param imageURLAsString <EMBED CLASS='external-html' DATA-FIELD=favicon 188 * DATA-FILE-ID=FT_STR_INS_PARAM> 189 * 190 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 191 * 192 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=imageURLAsString 193 * DATA-FILE-ID=FT_Q_EX> 194 * 195 * @see #favicon 196 * @see #checkForSingleQuote(String) 197 */ 198 public static void insertFavicon(Vector<HTMLNode> html, String imageURLAsString) 199 { 200 // Insert the Favicon <LINK ...> element into the <HEAD> section of the input html page. 201 // <link rel='icon' type='image/INSERT-IMAGE-TYPE-HERE' href='INSERT-URL-STRING-HERE' /> 202 203 checkForSingleQuote(imageURLAsString); 204 205 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 206 DotPair header = TagNodeFindInclusive.first(html, "head"); 207 208 if (header == null) throw new NodeNotFoundException 209 (NO_HEADER_MESSAGE.replace("INSERT-STR", "favicon <LINK> element")); 210 211 String ext = IF.getGuess(imageURLAsString).extension; 212 213 if (ext == null) throw new IllegalArgumentException( 214 "The Image-Type of the 'imageURLAsString' parameter could not be determined. " + 215 "The method IF.getGuess(faviconURL) returned null. Please provide a favicon with " + 216 "standard image file-type. This is required because the image-type is required " + 217 "to be placed inside the HTML <LINK TYPE=... HREF=...> Element 'TYPE' Attribute." 218 ); 219 220 // Build a new Favicon TagNode. 221 TagNode faviconTN = new TagNode 222 ("<LINK REL='icon' TYPE='image/" + ext + "' HREF='" + imageURLAsString + "' />"); 223 224 // Insert the Favicon into the page. Put it at the top of the header, just after <HEAD> 225 Util.insertNodes(html, header.start + 1, NEWLINE, faviconTN, NEWLINE); 226 } 227 228 /** 229 * This method will search for an HTML <B STYLE='color: red;'>{@code <LINK REL="icon" ...>}</B> 230 * Tag, in hopes of finding a {@code REL}-Attribute whose value is {@code 'icon'}. 231 * 232 * <BR /><BR />When this method finds such a tag, it will return the 233 * <B STYLE='color: red;'>value</B> of that Tag's {@code HREF}-Attribute. 234 * 235 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 236 * 237 * @return This method will return the {@code String}-<B STYLE='color: red;'>value</B> of the 238 * {@code HREF}-Attribute found inside the {@code LINK}-Tag. 239 * 240 * If this page or sub-page does not have such a tag with an {@code HREF}-Attribute, then null 241 * is returned. 242 * 243 * <BR /><BR /><B STYLE="color: red;">NOTE:</B> In the event that multiple copies 244 * of the HTML {@code LINK}-Tag are found, and more than one of these tags has a 245 * {@code REL}-Attribute with a <B STYLE='color: red;'>value</B> equal to {@code "icon"}, then 246 * this method will simple return the first of the {@code 'favicon'} tags that were found. 247 * 248 * <BR /><BR />An (albeit erroneous) page, with multiple favicon definitions, will not cause 249 * this method to throw an exception. 250 * 251 * @see InnerTagGet 252 * @see #favicon 253 * @see TagNode#AV(String) 254 */ 255 public static String hasFavicon(Vector<? extends HTMLNode> html) 256 { 257 // InnerTagGet.all: Returns a vector of TagNode's that resemble: <LINK rel="icon" ...> 258 // 259 // EQ_CI_TRM: Check the 'rel' Attribute-Value using a Case-Insensitive, Equality 260 // String-Comparison. 261 // Trim the 'rel' Attribute-Value String of possible leading & trailing 262 // White-Space before performing the comparison. 263 264 Vector<TagNode> list = InnerTagGet.all 265 (html, "LINK", "REL", TextComparitor.EQ_CI_TRM, "icon"); 266 267 // If there were no HTML "<LINK ...>" elements with REL='ICON' attributes, then 268 // there was no favicon. 269 270 if (list.size() == 0) return null; 271 272 // Just in case there were multiple favicon <LINK ...> tags, just return the first 273 // one found. Inside of a <LINK REL="icon" HREF="..."> the 'HREF' Attribute contains 274 // the Image-URL. Use TagNode.AV("HREF") to retrieve that image url. 275 276 String s; 277 for (TagNode tn : list) if ((s = tn.AV("HREF")) != null) return s; 278 279 // If for some reason, none of these <LINK REL='ICON' ...> elements had an "HREF" 280 // attribute, then just return null. 281 282 return null; 283 } 284 285 /** 286 * This inserts an HTML {@code LINK}-Tag into Web-Page parameter {@code 'html'} with the 287 * purpose of linking an externally-defined <B>Cascading Style Sheet</B> (also known as a 288 * {@code CSS}-Page) into that Page-{@code Vector}. 289 * 290 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 291 * 292 * @param externalCSSFileURLAsString <EMBED CLASS='external-html' DATA-FIELD=cssExternalSheet 293 * DATA-FILE-ID=FT_STR_INS_PARAM> 294 * 295 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 296 * 297 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=externalCSSFileURLAsString 298 * DATA-FILE-ID=FT_Q_EX> 299 * 300 * @see #cssExternalSheet 301 * @see #cssExternalSheetWithMediaAttribute 302 * @see #insertCSSLink(Vector, String, String) 303 * @see #getAllCSSLinks(Vector) 304 * @see #checkForSingleQuote(String) 305 * @see DotPair 306 * @see TagNode 307 */ 308 public static void insertCSSLink(Vector<HTMLNode> html, String externalCSSFileURLAsString) 309 { 310 // Inserts an external CSS Link into the <HEAD> section of this html page vector 311 // <link REL=stylesheet type='text/css' href='INSERT-URL-STRING-HERE' /> 312 313 checkForSingleQuote(externalCSSFileURLAsString); 314 315 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 316 DotPair header = TagNodeFindInclusive.first(html, "head"); 317 318 if (header == null) throw new NodeNotFoundException( 319 NO_HEADER_MESSAGE.replace 320 ("INSERT-STR", "externally-linked CSS page <LINK> element") 321 ); 322 323 TagNode cssTN = new TagNode 324 ("<LINK REL=stylesheet TYPE='text/css' HREF='" + externalCSSFileURLAsString + "' />"); 325 326 // Insert the Style-Sheet link into the page. Put it at the top of the header, 327 // just after <HEAD> 328 329 Util.insertNodes(html, header.start + 1, NEWLINE, cssTN, NEWLINE); 330 } 331 332 /** 333 * This inserts a <B>Cascading Style Sheet</B> with the extra {@code MEDIA}-Attribute using 334 * an HTML {@code LINK}-Tag into the Vectorized-HTML Web-Page parameter {@code 'html'} 335 * 336 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 337 * 338 * @param externalCSSFileURLAsString <EMBED CLASS='external-html' DATA-FIELD=cssExternalSheet 339 * DATA-FILE-ID=FT_STR_INS_PARAM> 340 * 341 * @param mediaInnerTagValue Externally linked CSS-Pages, which are included using the HTML 342 * {@code LINK}-Tag may explicitly request a {@code MEDIA}-Attribute be inserted into that 343 * Tag. That {@code MEDIA}-Attribute may take one of five values. In such a tag, the extra 344 * attribute specifies when the listed CSS-Rules are to be applied. 345 * 346 * <BR /><BR />Listed here are the most common values for the {@code MEDIA}-Attribute: 347 * 348 * <BR /><TABLE CLASS=JDBriefTable> 349 * <TR> 350 * <TH>Attribute Value</TH> 351 * <TH>Intended CSS Meaning</TH> 352 * </TR> 353 * <TR> 354 * <TD>screen</TD> 355 * <TD>indicates for use on a computer screen</TD> 356 * </TR> 357 * <TR> 358 * <TD>projection</TD> 359 * <TD>for projected presentations</TD> 360 * </TR> 361 * <TR> 362 * <TD>handheld</TD> 363 * <TD>for handheld devices (typically with small screens)</TD></TR> 364 * <TR> 365 * <TD>print</TD> 366 * <TD>to style printed Web-Pages</TD> 367 * </TR> 368 * <TR> 369 * <TD>all</TD> 370 * <TD>(default value) This is what most people choose. You can leave off the 371 * {@code MEDIA}-Attribute completely if you want your styles to be applied for all 372 * media types. 373 * </TD> 374 * </TR> 375 * </TABLE> 376 * 377 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 378 * 379 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM1=externalCSSFileURLAsString 380 * DATA-PARAM2=mediaInnerTagValue DATA-FILE-ID=FT_Q_EX_DOUBL> 381 * 382 * @see #cssExternalSheet 383 * @see #cssExternalSheetWithMediaAttribute 384 * @see #insertCSSLink(Vector, String) 385 * @see #getAllCSSLinks(Vector) 386 * @see #checkForSingleQuote(String) 387 * @see DotPair 388 */ 389 public static void insertCSSLink 390 (Vector<HTMLNode> html, String externalCSSFileURLAsString, String mediaInnerTagValue) 391 { 392 // Inserts an external CSS Link (with 'media' attribute) into the <HEAD> section of 393 // this html page vector 394 // <link REL=stylesheet type='text/css' href='INSERT-URL-STRING-HERE' 395 // media='INSERT-MEDIA-ATTRIBUTE-VALUE-HERE' /> 396 397 checkForSingleQuote(externalCSSFileURLAsString); 398 checkForSingleQuote(mediaInnerTagValue); 399 400 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 401 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 402 403 if (header == null) throw new NodeNotFoundException( 404 NO_HEADER_MESSAGE.replace 405 ("INSERT-STR", "externally-linked CSS Style-Sheet LINK-Tag") 406 ); 407 408 // Build the TagNode 409 TagNode cssTN = new TagNode( 410 "<LINK REL=stylesheet TYPE='text/css' HREF='" + externalCSSFileURLAsString + "' " + 411 "MEDIA='" + mediaInnerTagValue + "' />" 412 ); 413 414 // Insert the Style-Sheet link into the page. Put it at the top of the header, just 415 // after <HEAD> 416 417 Util.insertNodes(html, header.start + 1, NEWLINE, cssTN, NEWLINE); 418 } 419 420 /** 421 * This will retrieve all linked CSS-Pages from Vectorized-HTML Web-Page parameter 422 * {@code 'html'}. 423 * 424 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 425 * @return This will return the links as a list of {@link TagNode}'s' 426 * @see #insertCSSLink(Vector, String) 427 * @see #insertCSSLink(Vector, String, String) 428 * @see InnerTagGet 429 */ 430 public static Vector<TagNode> getAllCSSLinks(Vector<? extends HTMLNode> html) 431 { 432 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 433 // <LINK rel="stylesheet" ...> 434 // 435 // EQ_CI_TRM: Check the 'rel' Attribute-Value using a Case-Insensitive, Equality 436 // String-Comparison 437 // Trim the 'rel' Attribute-Value String of possible leading & trailing 438 // White-Space before performing the comparison. 439 440 return InnerTagGet.all(html, "LINK", "REL", TextComparitor.EQ_CI_TRM, "stylesheet"); 441 } 442 443 /** 444 * This inserts an HTML <B STYLE='color: red;'>{@code '<LINK ...>'}</B> element into the proper 445 * location for linking an externally-defined <B>Java-Script</B> (a {@code '.js'} File) into 446 * the Web-Page. 447 * 448 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 449 * 450 * @param externalJSFileURLAsString 451 * <EMBED CLASS='external-html' DATA-FIELD=javaScriptExternalPage DATA-FILE-ID=FT_STR_INS_PARAM> 452 * 453 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 454 * 455 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=externalJSFileURLAsString 456 * DATA-FILE-ID=FT_Q_EX> 457 * 458 * @see #javaScriptExternalPage 459 * @see #getAllExternalJSLinks(Vector) 460 * @see #checkForSingleQuote(String) 461 * @see TagNode 462 * @see TextNode 463 * @see DotPair 464 * @see HTMLTags#hasTag(String, TC) 465 */ 466 public static void insertExternalJavaScriptLink 467 (Vector<HTMLNode> html, String externalJSFileURLAsString) 468 { 469 // Builds an external Java-Script link, and inserts it into the header portion of 470 // this html page. 471 // <script type='text/javascript' src='INSERT-URL-STRING-HERE'> 472 473 checkForSingleQuote(externalJSFileURLAsString); 474 475 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 476 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 477 478 if (header == null) throw new NodeNotFoundException( 479 NO_HEADER_MESSAGE.replace( 480 "INSERT-STR", "externally-linked Java-Script <SCRIPT> ... </SCRIPT> elements") 481 ); 482 483 // Build an HTML <SCRIPT ...> node, and a </SCRIPT> node. 484 HTMLNode n = new TagNode 485 ("<SCRIPT TYPE='text/javascript' SRC='" + externalJSFileURLAsString + "'>"); 486 487 HTMLNode closeN = HTMLTags.hasTag("script", TC.ClosingTags); 488 489 // Insert the Java-Script link into the page. Put it at the top of the header, just 490 // after <HEAD> 491 492 Util.insertNodes(html, header.start + 1, NEWLINE, n, closeN, NEWLINE); 493 } 494 495 /** 496 * Inserting <B>Java-Script</B> directly onto an HTML-Page and including an external link to a 497 * {@code '.js'} File are extremely similar tasks. Either way, in both cases the construct is 498 * simply: 499 * 500 * <BR /><BR /><B STYLE='color: red;'>{@code <SCRIPT TYPE='text/javascript'> ... </SCRIPT>}</B> 501 * 502 * <BR /><BR />When the actual functions and methods are pasted into an HTML-Page directly, 503 * they are pasted into the {@code String} above where the ellipses {@code '...'} are. When a 504 * link is made to an external page from a directory on the same Web-Server - both the open and 505 * the close HTML {@code SCRIPT}-Tag's must be included. 506 * 507 * <BR /><BR />If just a link is being added, then the text-content of the {@code SCRIPT}-Tag 508 * should just be left blank or empty. Instead, the {@code URL} to the Java-Script Page is 509 * added as an HTML {@code SRC}-Attribute. 510 * 511 * <BR /><BR />This method will retrieve any and all {@code 'SCRIPT'} nodes that meet the 512 * following criteria: 513 * 514 * <BR /><BR /><OL CLASS=JDOL> 515 * <LI> The <B>Script Body</B> must be empty, meaning there is no Java-Script between the 516 * opening and closing {@code SCRIPT}-Tags 517 * </LI> 518 * 519 * <LI> The HTML {@code SRC}-Attribute must contain a non-null, non-zero-length 520 * <B STYLE='color: red;'>value</B> 521 * </LI> 522 * 523 * </OL> 524 * 525 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 526 * 527 * @return This will return a list of relative {@code URL's} to externally linked 528 * <B>Java-Script</B> Pages as {@code String's} 529 * 530 * @see InnerTagGetInclusive 531 * @see #javaScriptExternalPage 532 * @see #insertExternalJavaScriptLink(Vector, String) 533 * @see TagNode 534 * @see TextNode 535 * @see TagNode#AV(String) 536 * @see HTMLNode#str 537 */ 538 public static String[] getAllExternalJSLinks(Vector<? extends HTMLNode> html) 539 { 540 // InnerTagGetInclusive.all: Returns a vector of TagNode's that resemble: 541 // <SCRIPT TYPE="javascript" ...> 542 // 543 // CN_CI: Check the 'rel' Attribute-Value using a Case-Insensitive, "Contains" 544 // String-Comparison 545 // 'contains' rather than 'equals' testing is done because this value may be 546 // "javascript", but it may also be "text/javascript" 547 // 548 // Inclusive: This means that everything between the <SCRIPT type="javascript"> ... and 549 // the closing </SCRIPT> tag are returned in a vector of vectors. 550 551 Vector<Vector<HTMLNode>> v = InnerTagGetInclusive.all 552 (html, "SCRIPT", "TYPE", TextComparitor.CN_CI, "javascript"); 553 554 Stream.Builder<String> b = Stream.builder(); 555 556 TOP: 557 for (Vector<HTMLNode> scriptSection : v) 558 { 559 String srcValue = null; 560 561 for (HTMLNode n : scriptSection) 562 { 563 if (n.isTagNode()) 564 if ((srcValue = ((TagNode) n).AV("SRC")) != null) 565 break; 566 567 if (n.isTextNode()) 568 if (n.str.trim().length() > 0) 569 break TOP; 570 } 571 572 b.add(srcValue); 573 } 574 575 return b.build().toArray(String[]::new); 576 } 577 578 /** 579 * This section will insert a Canonical-{@code URL} into Vectorized-HTML parameter 580 * {@code 'html'}. The {@code URL} itself will be inserted into an HTML {@code LINK}-Tag as 581 * below: 582 * 583 * <BR /><BR /><B STYLE='color: red;'>{@code <LINK REL=canonical HREF='the_url'>}</B> 584 * 585 * <BR /><BR />Since HTML mandates that such elements be located in the {@code 'HEAD'} portion 586 * of an HTML-Page, if the Vectorized-HTML parameter {@code 'html'} does not have a 587 * {@code 'HEAD'} area, then this method shall throw a {@link NodeNotFoundException}. 588 * 589 * <BR /><BR />Note that this exception is an unchecked / runtime exception. 590 * 591 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 592 * 593 * @param canonicalURLAsStr 594 * <EMBED CLASS='external-html' DATA-FIELD=canonicalTag DATA-FILE-ID=FT_STR_INS_PARAM> 595 * 596 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 597 * 598 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=canonicalURLAsStr 599 * DATA-FILE-ID=FT_Q_EX> 600 * 601 * @see #canonicalTag 602 * @see #hasCanonicalURL(Vector) 603 * @see #checkForSingleQuote(String) 604 * @see TagNode 605 * @see DotPair 606 */ 607 public static void insertCanonicalURL(Vector<HTMLNode> html, String canonicalURLAsStr) 608 { 609 // Inserts a link element into the header of this page 610 // <link REL=canonical href='INSERT-URL-STRING-HERE' /> 611 612 checkForSingleQuote(canonicalURLAsStr); 613 614 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 615 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 616 617 if (header == null) throw new NodeNotFoundException 618 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Canonical-url LINK-Tag")); 619 620 // Builds the canonical <LINK ...> element 621 TagNode linkTN = new TagNode 622 ("<LINK REL=canonical HREF='" + canonicalURLAsStr + "' />"); 623 624 // Insert the canonical-url into the page. Put it at the top of the header, just 625 // after <HEAD> 626 627 Util.insertNodes(html, header.start + 1, NEWLINE, linkTN, NEWLINE); 628 } 629 630 /** 631 * This method will check whether a Vectorized-HTML Page has an HTML 632 * <B STYLE='color: red;'>{@code <LINK REL=canonical ...>}</B> Tag. This tag is used to 633 * inform Search-Engines whether or not this page <I>surrenders</I> or <I>relays</I> to a 634 * "Canonical-{@code URL}". 635 * 636 * <BR /><BR />Canonical-Pages help Search-Engines index large web-sites by providing a root or 637 * Master-{@code URL} to which all sub-pages may point. Such {@code URL's} are often (but not 638 * always) like a "Table of Contents". 639 * 640 * <BR /><BR />The primary goal of having a canonical is to avoid forcing Search-Engines (and 641 * their users) from sifting through and indexing every page of a large Web-Site, and instead 642 * focusing on either an introductory T.O.C. or a Title-Page. 643 * 644 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 645 * 646 * @return This will return whatever text was placed inside the canonical-url 647 * {@code HREF='some_url'} attribute/value pair of the HTML link tag. If there were no HTML 648 * {@code <LINK REL=canonical HREF='some_url'>} tag, then this method will return null. 649 * 650 * @throws MalformedHTMLException This exception will be thrown if there are multiple html tags 651 * that match the link, and REL=canonical search criteria requirements. If an HTML element 652 * {@code <link REL=canonical>} is found, but that element does not have an 653 * {@code href='...'} attribute, or that attribute is of zero length, then this a situation 654 * that will also force this exception to throw. 655 * 656 * @see InnerTagGet 657 * @see #canonicalTag 658 * @see #insertCanonicalURL(Vector, String) 659 * @see TagNode#AV(String) 660 */ 661 public static String hasCanonicalURL(Vector<? extends HTMLNode> html) 662 throws MalformedHTMLException 663 { 664 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 665 // <LINK rel="canonical" ...> 666 // 667 // EQ_CI_TRM: Check the 'rel' Attribute-Value using a Case-Insensitive, Equality 668 // String-Comparison 669 // Trim the 'rel' Attribute-Value String of possible leading & trailing 670 // White-Space before performing the comparison. 671 672 Vector<TagNode> v = InnerTagGet.all 673 (html, "LINK", "REL", TextComparitor.EQ_CI_TRM, "canonical"); 674 675 if (v.size() == 0) return null; 676 677 if (v.size() > 1) throw new MalformedHTMLException( 678 "The Web-Page you have passed has precisely " + v.size() + 679 " Canonical-URL LINK-Tags, but it may not have more than 1. This is " + 680 "invalid HTML." 681 ); 682 683 String s = v.elementAt(0).AV("href"); 684 685 if (s == null) throw new MalformedHTMLException( 686 "The HTML LINK-Tag that was retrieved, contained a " + 687 "REL=canonical Attribute-Value pair, but did not have an HREF-Attribute." + 688 "This is invalid HTML." 689 ); 690 691 if (s.length() == 0) throw new MalformedHTMLException( 692 "The HTML LINK-Tag that was retrieved contained a zero-length " + 693 "String as the Attribute-Value for the HREF-Attribute. This is not " + 694 "invalid, but poorly formatted HTML." 695 ); 696 697 return s; 698 } 699 700 /** 701 * Tools made specifically for the {@code <META>} tags in the {@code <HEAD>} of a web-page. 702 * 703 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES_META> 704 */ 705 @Torello.JavaDoc.StaticFunctional 706 public static class Meta 707 { 708 private Meta() { } 709 710 711 // **************************************************************************************** 712 // **************************************************************************************** 713 // Static String-Constants (the tags!) 714 // **************************************************************************************** 715 // **************************************************************************************** 716 717 718 /** 719 * This is the most common HTML <B STYLE='color: red;'>{@code <META ... >}</B> Tag. 720 * 721 * @see #getAllMetaTagNames(Vector) 722 * @see #insertMetaTagName(Vector, MetaTagName, String) 723 */ 724 public static final String metaTagName = 725 "<META NAME='INSERT-NAME-STRING-HERE' CONTENT='INSERT-CONTENT-STRING-HERE'>"; 726 727 /** 728 * This HTML <B STYLE='color: red;'>{@code <META ...>}</B> Tag is less frequently used, but 729 * does provide some properties needed and used by various Web-Servers. It is the 730 * <B>{@code 'ITEMPROP'}</B> Meta-Tag. 731 * 732 * @see #getItemProp(Vector, String) 733 * @see #insertItemProp(Vector, String, String) 734 */ 735 public static final String metaTagItemProp = 736 "<META ITEMPROP='INSERT-ITEMPROP-STRING-HERE' CONTENT='INSERT-CONTENT-STRING-HERE'>"; 737 738 /** 739 * <EMBED CLASS='external-html' DATA-PROP=robots DATA-FILE-ID=FEATURES_HTTP_EQUIV> 740 * 741 * @see #getHTTPEquiv(Vector, String) 742 * @see #insertHTTPEquiv(Vector, String, String) 743 */ 744 public static final String metaTagHTTPEquiv = 745 "<META HTTP-EQUIV='INSERT-HTTP-EQUIV-STRING-HERE' CONTENT='INSERT-CONTENT-STRING-HERE'>"; 746 747 /** 748 * <EMBED CLASS='external-html' DATA-PROP=robots DATA-FILE-ID=FEATURES_META_PROP> 749 * 750 * A {@code Robots}-Property Meta-Tag lets you utilize a granular, page-specific approach 751 * to controlling how an individual page should be indexed and served to users in 752 * Search-Engine results. 753 * 754 * @see #insertRobots(Vector, boolean, boolean) 755 * @see #getAllRobots(Vector) 756 */ 757 public static final String robotsMetaTag = 758 "<META NAME=robots CONTENT='INSERT-CONTENT-STRING-HERE'>"; 759 760 /** 761 * <EMBED CLASS='external-html' DATA-PROP=description DATA-FILE-ID=FEATURES_META_PROP> 762 * 763 * When search engines crawl Internet Web-Pages to read the provided key-words and 764 * descriptions used for indexing, this particular Meta-Tag Property is one of the first 765 * those crawlers will look at. 766 * 767 * <BR /><BR />You may include a {@code Description}-Property in the {@code 'HEAD'} portion 768 * of your site’s main-page. A {@code META}-Description can influence both a Search-Engine's 769 * Web-Crawlers, and ultimately the click-through rates of your readers. 770 * 771 * <BR /><BR />Google has stated that Meta-Tag {@code Description}-Properties are NOT used 772 * to rank pages. 773 * 774 * @see #insertDescription(Vector, String) 775 * @see #hasDescription(Vector) 776 */ 777 public static final String descriptionMetaTag = 778 "<META NAME=description CONTENT='INSERT-DESCRIPTION-OR-KEYWORDS-HERE'>"; 779 780 /** 781 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES_UTF8> 782 * 783 * @see #insertUTF8MetaTag(Vector) 784 * @see #hasUTF8MetaTag(Vector) 785 */ 786 public static final String UTF8MetaTag = 787 "<META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=utf-8'>"; 788 789 /** 790 * <EMBED CLASS='external-html' DATA-FILE-ID=FEATURES_OPEN_GRAPH> 791 * 792 * @see #insertOGMetaTag(Vector, String, String) 793 * @see #getAllOGMetaTags(Vector) 794 */ 795 public static final String openGraphMetaTag = 796 "<META PROPERTY='og:INSERT-OG-PROPERTY-HERE' CONTENT='INSERT-OG-VALUE-HERE'>"; 797 798 /** All Open-Graph Property names. */ 799 public static final TreeMap<String, String> openGraphProperties = new TreeMap<>(); 800 801 /** 802 * <EMBED CLASS='external-html' DATA-PROP=keywords DATA-FILE-ID=FEATURES_META_PROP> 803 * 804 * A {@code KeyWords}-Property helps identify relevant, pertinent or 'germane' words that 805 * describe the content of a Web-Site or Web-Page to a Web-Indexing or Web-Search 806 * Organization. 807 * 808 * @see #insertKeyWords(Vector, String[]) 809 * @see #getAllKeyWords(Vector) 810 */ 811 public static final String keyWordsMetaTag = 812 "<META NAME=keywords CONTENT='INSERT-COMMA-SEPARATED-KEYWORDS-HERE'>"; 813 814 /** 815 * <EMBED CLASS='external-html' DATA-PROP=author DATA-FILE-ID=FEATURES_META_PROP> 816 * 817 * This helps identify Web-Sites or Web-Pages "Author-Names" to Web-Indexing and Web-Search 818 * Organizations. 819 * 820 * @see #insertAuthor(Vector, String) 821 * @see #hasAuthor(Vector) 822 */ 823 public static final String authorMetaTag = 824 "<META NAME=author CONTENT='INSERT-AUTHOR-NAME-HERE'>"; 825 826 827 // **************************************************************************************** 828 // **************************************************************************************** 829 // Retrieve all Meta-Tags as a java.util.Properties instance 830 // **************************************************************************************** 831 // **************************************************************************************** 832 833 834 /** 835 * This simple method will retrieve a {@code java.util.Properties} object for each and 836 * every HTML <B STYLE='color: red'>{@code <META ...>}</B> tag found within a 837 * Vectorized-HTML Web-Page. 838 * 839 * @param page Any Vectorized-HTML page. It is expected that this page contain a few 840 * {@code META}-Tags. If not, the method will still return an empty 841 * {@code Vector<Properties>} having {@code size()} of zero. 842 * 843 * @return The Java {@code 'Properties'} object that is returned from a call to 844 * {@link TagNode#allAV()} 845 * 846 * @see TagNode#allAV() 847 * @see TagNodeGet 848 */ 849 public static Vector<Properties> getAllMeta(Vector<HTMLNode> page) 850 { 851 Vector<Properties> ret = new Vector<>(); 852 853 // Retrieve all TagNode's that are HTML <META ...> Elements. Invoke TagNode.allAV() 854 // on each of these nodes to retrieve a java.util.Properties instance.\ 855 // 856 // NOTE: These "Properties" could possibly be combined into a single Properties 857 // instance, but because of the ever-changing nature of Web-Page 858 // Meta-Information tags, this is not employed here. It is an exercise 859 // left to the programmer. 860 861 for (TagNode tn : TagNodeGet.all(page, TC.OpeningTags, "META")) 862 ret.add(tn.allAV()); 863 864 return ret; 865 } 866 867 868 // **************************************************************************************** 869 // **************************************************************************************** 870 // Retrieve NAME/Property Meta-Tags 871 // **************************************************************************************** 872 // **************************************************************************************** 873 874 875 /** 876 * This method will find an HTML 877 * <B STYLE='color: red;'>{@code <META NAME=... CONTENT=...>}</B> element whose 878 * {@code NAME}-Attribute has a {@code String}-value equal-to (<I>ignoring case</I>) the 879 * value of the provided {@code String}-parameter {@code 'name'}. 880 * 881 * <BR /><BR />After this HTML {@code META}-Tag has been identified, the 882 * {@code String}-value of it's {@code CONTENT}-Attribute will be extracted and returned. 883 * 884 * <BR /><BR /><B CLASS=JDDescLabel>Returning null, Gracefully:</B> 885 * 886 * <BR />If the page provided does not have an HTML Meta-Tag with a {@code NAME}-Attribute 887 * whose <B STYLE='color: red;'>value</B> is {@code 'name'} or if such an element is 888 * identified, but that tag does not have a {@code CONTENT}-Attribute, then this method 889 * will return null. 890 * 891 * <BR /><BR /><B CLASS=JDDescLabel>Case Insensitive Comparison:</B> 892 * 893 * <BR />Before the comparison is done with the {@code 'name'} parameter, that 894 * {@code String} is trimmed with {@code String.trim()}, and the comparison performed 895 * <I>is done while ignoring case</I>. 896 * 897 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 898 * 899 * @param name The name of the {@code <META NAME=...>} Tag. 900 * 901 * @return The {@code String}-<B STYLE='color: red;'>value</B> of the 902 * {@code CONTENT}-Attribute for a Meta-Tag whose {@code NAME}-Attribute is equal to the 903 * specified name provided by parameter {@code 'name'}. If such information is not found 904 * on the page, then this method shall return null. 905 * 906 * @see #getItemProp(Vector, String) 907 * @see #getHTTPEquiv(Vector, String) 908 */ 909 public static String getMetaTagName(Vector<HTMLNode> html, String name) 910 { 911 // Find the first <META NAME=... CONTENT=...> tag element where the name equals 912 // the string-value provided by parameter name. 913 914 TagNode tn = InnerTagGet.first 915 (html, "META", "NAME", TextComparitor.EQ_CI, name.trim()); 916 917 // If there are no <META NAME='NAME' CONTENT=...> elements found on the page, 918 // then this method returns null. 919 920 if (tn == null) return null; 921 922 // Return the string-value of the attribute 'content'. Note that if this 923 // attribute isn't available, this method shall return 'null', gracefully. 924 925 return tn.AV("CONTENT"); 926 } 927 928 929 /** 930 * This will retrieve all Meta-Tag's having {@code NAME}-Attribute and 931 * {@code CONTENT}-Attribute pairs. 932 * 933 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 934 * 935 * @return a {@code java.util.Hashtable} of all the Meta-Tag Name/Content pairs that do not 936 * have null values. 937 * 938 * @throws IllegalArgumentException The method {@code MetaTagName.valueOf(...)} will throw 939 * an Illegal Argument Exception if any of the {@code <META NAME=...>} elements use a value 940 * of "NAME" that is not listed or identified in the Enumerated Type "MetaTagName". 941 * 942 * <BR /><BR /><B><SPAN STYLE="color: red">ALTERNATIVE:</SPAN></B> As Internet Companies 943 * come and go, pinning down a complete list of valid Meta Tag's that use the "NAME" 944 * Attribute is a possibly misguided approach. In lieu of eliminating the Enumerated-Type 945 * {@code MetaTagName}, it should be easier to just use the standard TagNode search below: 946 * 947 * <DIV CLASS="EXAMPLE">{@code 948 * // This code should be used as an alternative to this method if there are non-standard 949 * // HTML Meta Tag Names. It uses the more fundamental InnerTagGet Method. 950 * 951 * // This will retrieve all <META ...> HTML Elements that have a "NAME" Property. 952 * Vector<TagNode> metaTags = InnerTagGet.all(page, "meta", "name"); 953 * 954 * // This will print out those results: 955 * for (TagNode metaTag : metaTags) System.out.println 956 * ("Name:\t" + metaTag.AV("name") + "\tContent:\t" + metaTag.AV("content")); 957 * }</DIV> 958 * 959 * @see MetaTagName 960 * @see #metaTagName 961 * @see #insertMetaTagName(Vector, MetaTagName, String) 962 * @see InnerTagGet 963 */ 964 public static Hashtable<MetaTagName, String> getAllMetaTagNames 965 (Vector<? extends HTMLNode> html) 966 { 967 Hashtable<MetaTagName, String> ret = new Hashtable<>(); 968 969 // Converting the output "Vector<TagNode>" to a "Stream<TagNode>" by calling the 970 // .stream() method mainly because java streams provide the very simple 971 // 'filter(Predicate)' and 'forEach(Consumer)' methods. Vector.removeIf and 972 // Vector.forEach could also have been easily used as well. 973 974 // InnerTagGet.all returns a vector containing all <META NAME=...> TagNode's where 975 // the value of the 'name' attribute is one of the pre-defined MetaTagName 976 // EnumeratedTypes. 977 978 // NOTE: This is done via a java.util.function.Predicate<String> and a lambda 979 // expression 980 981 InnerTagGet 982 .all (html, "META", "NAME", (String nameAttributeValue) -> 983 MetaTagName.valueOf 984 (nameAttributeValue.toLowerCase().trim()) != null) 985 986 .stream() 987 .filter((TagNode tn) -> tn.AV("CONTENT") != null) 988 989 .forEach((TagNode tn) -> 990 991 ret.put( 992 MetaTagName.valueOf(tn.AV("NAME").toLowerCase().trim()), 993 tn.AV("CONTENT") 994 )); 995 996 return ret; 997 } 998 999 1000 // **************************************************************************************** 1001 // **************************************************************************************** 1002 // Retrieve **SPECIFIC** NAME/Property Meta-Tags 1003 // **************************************************************************************** 1004 // **************************************************************************************** 1005 1006 1007 /** 1008 * This method looks for robots HTML <B STYLE='color: red;'>{@code <META NAME=robots>} 1009 * </B> tag, and returns the value of the {@code content}-Attribute. 1010 * 1011 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1012 * 1013 * @return This will return a vector of the robots named or specified by the HTML 1014 * Meta-Tag's present on this page. 1015 * 1016 * <BR /><BR /><B><SPAN STYLE="color: red;">NOTE:</B></SPAN> Please do not be disturbed by 1017 * java-streams, they are of limited use, but once a programmer is accustomed to the words 1018 * above, they actually improve code-readability (<B><I>once in a while!</I></B>). A 1019 * series of simple {@code for-loops} which eliminate-duplicates / add / sort would 1020 * accomplish the same task as above. 1021 * 1022 * @throws MalformedHTMLException If any invalid robot-strings are found on the page, this 1023 * method will throw an exception. The impetus behind this is to prevent accidentally 1024 * ignoring newly found tags, or incorrect tags. The extraction of the robots Meta-Tag from 1025 * an HTML page can be performed manually, if throwing an exception is causing problems. 1026 * The code to do this is listed in the documentation of this method. 1027 * 1028 * @see #robotsMetaTag 1029 * @see #insertRobots(Vector, boolean, boolean) 1030 */ 1031 public static Vector<Robots> getAllRobots(Vector<? extends HTMLNode> html) 1032 throws MalformedHTMLException 1033 { 1034 // Here, again, using Java Streams can be sometimes useful - primarily whenever a 1035 // 'filter' operation is going to be used on a Vector. Vector.removeIf works, BUT 1036 // this also extracts attribute values, and the original TagNode are discarded, and 1037 // replaced by the the <META> attributes. 1038 // 1039 // ALSO SALIENT: the "Arrays.asList" produces an array of string, and the "::addAll" 1040 // puts each separate String in each array into the TreeSet. 1041 // 1042 // NOTE: The TreeSet also functions as a "duplicate checker" although this is also 1043 // provided by Stream.distinct() 1044 // 1045 // InnerTagGet.all; Returns a vector of TagNode's that resemble: 1046 // <META NAME="robots" ...> 1047 // 1048 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1049 // String-Comparison 1050 // Trim the 'name' Attribute-Value String of possible leading & trailing 1051 // White-Space before performing the comparison. 1052 1053 TreeSet<String> temp = InnerTagGet 1054 .all (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "robots") 1055 .stream () 1056 .map ((TagNode tn) -> tn.AV("CONTENT")) 1057 1058 .filter ((String contents) -> 1059 (contents != null) && (contents.trim().length() > 0)) 1060 1061 .map ((String contents) -> 1062 Arrays.asList(StrCSV.CSV(contents.toLowerCase()))) 1063 1064 .collect (TreeSet<String>::new, TreeSet::addAll, TreeSet::addAll); 1065 1066 // I cannot use EXCEPTIONS and STREAMS together, there is no simple way. 1067 // It would be too ugly to read. 1068 1069 Vector<Robots> ret = new Vector<>(); 1070 1071 // If an invalid robot-attribute is found, this will 1072 // throw a MalformedHTMLException 1073 1074 for (String s : temp) ret.add(Robots.getRobot(s)); 1075 1076 return ret; 1077 } 1078 1079 /** 1080 * This will retrieve the {@code 'robots'} Meta-Tag 1081 * Attribute-<B STYLE='color: red;'>value</B> present on a Web-Page. 1082 * 1083 * <BR /><BR />If any of them are not in accordance with the tags listed in the 1084 * Enumerated-Type {@link Robots}, this will not cause a {@link MalformedHTMLException} to 1085 * throw. Instead, the result will just be eliminated and ignored. Take care that all of 1086 * the necessary {@code ROBOTS}-Tags are listed in the Enumerated-Type, and that there 1087 * are no "undefined, but necessary" robot elements to be found before using this method! 1088 * 1089 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1090 * @return A vector of all the valid robots attribute values found on the web-page. 1091 * @see #robotsMetaTag 1092 * @see #insertRobots(Vector, boolean, boolean) 1093 * @see TagNode#AV(String) 1094 */ 1095 public static Vector<Robots> getAllRobotsNOMHE(Vector<? extends HTMLNode> html) 1096 { 1097 // Java Streams, used here, filter out irrelevant meta tags, and also convert the 1098 // HTML Meta TagNode's into their their "CONTENT" Attribute String value. The TreeSet 1099 // provides a duplicate check elimination and sorts the {@code String's} as well. 1100 // 1101 // ALSO SALIENT: the "Arrays.asList" produces an array of string, and the "::addAll" 1102 // puts each separate String in each array into the TreeSet 1103 // 1104 // NOTE: The 'getRobotNOMHE' suppresses a possible exception, and converts such a 1105 // situation to 'null.' The suppressed-exception is the "MalformedHTMLException" 1106 // 1107 // InnerTagGet.all; Returns a vector of TagNode's that resemble: 1108 // <META NAME="robots" ...> 1109 // 1110 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1111 // String-Comparison 1112 // Trim the 'name' Attribute-Value String of possible leading & trailing 1113 // White-Space before performing the comparison. 1114 1115 return InnerTagGet 1116 .all (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "robots") 1117 .stream () 1118 .map ((TagNode tn) -> tn.AV("CONTENT")) 1119 1120 .filter ((String contents) -> 1121 (contents != null) && (contents.trim().length() > 0)) 1122 1123 .map ((String contents) -> 1124 Arrays.asList(StrCSV.CSV(contents.toLowerCase()))) 1125 1126 .collect (TreeSet<String>::new, TreeSet::addAll, TreeSet::addAll) 1127 .stream () 1128 .map ((String robotParam) -> Robots.getRobotNOMHE(robotParam)) 1129 .filter ((Robots robot) -> robot != null) 1130 .collect (Collectors.toCollection(Vector<Robots>::new)); 1131 } 1132 1133 /** 1134 * This method will extract any / all HTML 1135 * <B STYLE='color: red;'>{@code <META NAME='keywords' ...>}</B> Meta-Tags, and then extract 1136 * the relevant page key-words. These key-words will be returned as a Java 1137 * {@code String-Vector}. 1138 * 1139 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1140 * 1141 * @return The list of words that were stored in the 'keywords' HTML Meta-Tags. If there 1142 * were no keywords in any {@code 'KEYWORDS'} Meta-Tags, then an empty Java 1143 * {@code String[]}-Array is returned. 1144 * 1145 * <BR /><BR /><B CLASS=JDDescLabel>Java Stream's Utility:</B> 1146 * 1147 * <BR />If the code below looks complicated, Java's Streams-Package does have a tendency 1148 * to make <I>simple things look difficult</I>. However, once the {@code Stream}-Methods 1149 * are understood, it's usually pretty useful for actually being very concise. 1150 * 1151 * <BR /><BR /><OL CLASS=JDOL> 1152 * <LI> Get all HTML {@code <META name="keywords" content="...">} elements</LI> 1153 * 1154 * <LI> Extracts the {@code CONTENT}-Attribute, <I>and particularly the 1155 * <B STYLE='color: red;'>value</B> stored there</I> 1156 * </LI> 1157 * 1158 * <LI> Removes blanks, and {@code nulls}</LI> 1159 * <LI> Converts a {@code String[]} to {@code List<String>}</LI> 1160 * <LI> Collects all the List<String> into a single java String-Array</LI> 1161 * </OL> 1162 * 1163 * @see #insertKeyWords(Vector, String[]) 1164 * @see #keyWordsMetaTag 1165 * @see TagNode 1166 * @see TagNode#AV(String) 1167 * @see StrCSV#CSV(String) 1168 */ 1169 public static String[] getAllKeyWords(Vector<? extends HTMLNode> html) 1170 { 1171 // Java Streams here both filter irrelevant meta tags, and also convert the type from 1172 // TagNode to String... using the 'map' function. Ultimately, those strings are 1173 // 'collected' into the returned vector. 1174 // ALSO SALIENT: the "Arrays.asList" produces an array of string, and the "::addAll" 1175 // puts each separate String into the returned Vector. 1176 1177 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 1178 // <META name="keywords" ...> 1179 // 1180 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1181 // String-Comparison 1182 // Trim the 'name' Attribute-Value String of possible leading & trailing 1183 // White-Space before performing the comparison. 1184 1185 return InnerTagGet.all(html, "META", "NAME", TextComparitor.EQ_CI_TRM, "keywords") 1186 .stream () 1187 .map ((TagNode tn) -> tn.AV("content")) 1188 1189 .filter ((String contents) -> 1190 (contents != null) && (contents.trim().length() > 0)) 1191 1192 .map ((String contents) -> Arrays.asList(StrCSV.CSV(contents))) 1193 .collect (Vector::new, Vector::addAll, Vector::addAll) 1194 .stream () 1195 .toArray (String[]::new); 1196 } 1197 1198 /** 1199 * This method attempts to retrieve a {@code 'description'}-Property Meta-Tag out of an 1200 * HTML_Page. If no such Meta-Tag is found, then null is returned. 1201 * 1202 * <BR /><BR />If a partial Meta-Tag is found, but that tag is incomplete, then a 1203 * {@link MalformedHTMLException} will be thrown. 1204 * 1205 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1206 * 1207 * @return The content-description that has been extracted from the HTML Meta-Tag 1208 * <B STYLE='color: red;'>{@code <META NAME="description" CONTENT="the-description">}</B>. 1209 * 1210 * <BR /><BR />If this tag is not found, then null is returned. If this tag is found, but 1211 * does not posses a {@code CONTENT}-Attribute, then a {@code MalformedHTMLException} is 1212 * thrown. 1213 * 1214 * @throws MalformedHTMLException This is thrown if there are multiple definitions of the 1215 * {@code 'ROBOTS'} Meta-Tag. There ought to only be a single definition, and if multiple 1216 * are found, it would be better to identify why, and do the data-extraction manually. 1217 * 1218 * This is en-lieu of randomly picking one of them, and randomly returning one of the 1219 * Meta-Tag's {@code CONTENT}-Attribute <B STYLE='color: red;'>value</B>. 1220 * 1221 * <BR />This exception will also be thrown if proper-values for {@code 'index'} or 1222 * {@code 'follow'} are not found in the {@code CONTENT}-Attribute of the 1223 * {@code 'ROBOTS'} Meta-Tag. 1224 * 1225 * <BR /><BR />These are probably unlikely occurrences. This exception is a 1226 * Checked-Exception and must have a {@code try-catch} block or be declared thrown in your 1227 * method-declaration. 1228 * 1229 * @see #descriptionMetaTag 1230 * @see #insertDescription(Vector, String) 1231 * @see InnerTagGet 1232 */ 1233 public static String hasDescription(Vector<? extends HTMLNode> html) 1234 throws MalformedHTMLException 1235 { 1236 // InnerTagGet.all; Returns a vector of TagNode's that resemble: 1237 // <META NAME="description" ...> 1238 // 1239 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1240 // String-Comparison 1241 // Trim the 'name' Attribute-Value String of possible leading & trailing 1242 // White-Space before performing the comparison. 1243 1244 Vector<TagNode> v = InnerTagGet.all 1245 (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "description"); 1246 1247 if (v.size() == 0) return null; 1248 1249 if (v.size() > 1) throw new MalformedHTMLException( 1250 "You have asked for the value of the HTML 'description' <META ...> Tag, but " + 1251 "unfortunately there were multiple instances of this Tag on your page. " + 1252 "This is poorly formatted HTML, and not allowed here." 1253 ); 1254 1255 String s = v.elementAt(0).AV("CONTENT"); 1256 1257 if (s == null) throw new MalformedHTMLException( 1258 "An HTML Meta-Tag was found with a NAME-Attribute whose value was " + 1259 "'description,' but unfortunately this Meta-Tag did not posses a CONTENT-Attribute" 1260 ); 1261 1262 return s; 1263 } 1264 1265 /** 1266 * This helps identify Web-Sites & Web-Pages "author-names" to Web-Indexing and 1267 * Web-Search Organizations. 1268 * 1269 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1270 * 1271 * @return This returns the author's name of a Web-Page, as delineated in the 1272 * {@code 'AUTHOR'} Meta-Tag, or null if the Web-Page parameter {@code 'html'} does not 1273 * have an {@code 'AUTHOR'} Meta-Tag. 1274 * 1275 * @throws MalformedHTMLException If multiple {@code 'AUTHOR'} Meta-Tags are found, this 1276 * method is forced to throw an exception. It is necessary to avoid "picking a favorite 1277 * author among a list". 1278 * 1279 * <BR /><BR />HTML does not actually adhere to these exact requirements, so if there is 1280 * such a scenario with a page having multiple-authors, this method throws an exception in 1281 * order to avoid returning a {@code String[]}-Array or {@code Vector<String>} which would 1282 * be an alternative that would add unnecessary complexity. 1283 * 1284 * <BR /><BR />If this method throws this exception, it is better to know about it, and 1285 * just perform the search again, using a manual {@code 'AUTHOR'} retrieval. The code for 1286 * extracting these properties is, indeed listed directly at the bottom. 1287 * 1288 * @see #insertAuthor(Vector, String) 1289 * @see #authorMetaTag 1290 * @see TagNode#AV(String) 1291 */ 1292 public static String hasAuthor(Vector<? extends HTMLNode> html) 1293 throws MalformedHTMLException 1294 { 1295 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 1296 // <META name="author" ...> 1297 // 1298 // EQ_CI_TRM: Check the 'name' Attribute-Value using a Case-Insensitive, Equality 1299 // String-Comparison 1300 // Trim the 'name' Attribute-Value String of possible leading & trailing 1301 // White-Space before performing the comparison. 1302 1303 Vector<TagNode> v = InnerTagGet.all 1304 (html, "META", "NAME", TextComparitor.EQ_CI_TRM, "author"); 1305 1306 if (v.size() > 1) throw new MalformedHTMLException( 1307 "This method has identified multiple author Meta-Tags. To handle this " + 1308 "situation, the search should be performed manually using InnerTagGet, with " + 1309 "your code deciding what to do about the HTML Web-Page having multiple 'author' " + 1310 "Meta-Tags." 1311 ); 1312 1313 // No HTML TagNode's were found that resembled <META NAME=author ...> 1314 if (v.size() == 0) return null; 1315 1316 // Just return the first one that was found, always check for 'null' first to 1317 // avoid the embarrassing NullPointerException. 1318 1319 String author = v.elementAt(0).AV("CONTENT"); 1320 1321 if (author == null) return null; 1322 1323 return author.trim(); 1324 } 1325 1326 1327 // **************************************************************************************** 1328 // **************************************************************************************** 1329 // Retrieve HTTP-EQUIV Meta-Tags 1330 // **************************************************************************************** 1331 // **************************************************************************************** 1332 1333 1334 /** 1335 * This method will find an HTML 1336 * <B STYLE='color: red;'>{@code <META HTTP-EQUIV=... CONTENT=...>}</B> element whose 1337 * {@code HTTP-EQUIV}-Attribute's <B STYLE='color: red;'>value</B> is equal to the 1338 * {@code String}-Parameter {@code 'httpEquiv'} (ignoring case). 1339 * 1340 * <BR /><BR />After such an HTML {@code META}-Tag has been identified, its 1341 * {@code CONTENT}-Attribute {@code String}-value will be subsequently queried, extracted 1342 * and returned by this method. 1343 * 1344 * <BR /><BR /><B CLASS=JDDescLabel>Returning null, Gracefully:</B> 1345 * 1346 * <BR />If the page provided does not have an HTML Meta-Tag with a {@code NAME}-Attribute 1347 * whose <B STYLE='color: red;'>value</B> is {@code 'name'} or if such an element is 1348 * identified, but that tag does not have a {@code CONTENT}-Attribute, then this method 1349 * will return null. 1350 * 1351 * <BR /><BR /><B CLASS=JDDescLabel>Case Insensitive Comparison:</B> 1352 * 1353 * <BR />Before the comparison is done with the {@code 'httpEquiv'} parameter, that 1354 * {@code String} is trimmed with {@code String.trim()}, and the comparison performed 1355 * <I>is done while ignoring case</I>. 1356 * 1357 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1358 * 1359 * @param httpEquiv The Attribute-<B STYLE='color: red;'>name</B> of the 1360 * {@code HTTP-EQUIV}-Attribute. 1361 * 1362 * @return The {@code String}-value of the {@code CONTENT}-Attribute for a 1363 * {@code META}-Tag whose {@code HTTP-EQUIV}-Attribute is equal to the specified name 1364 * provided by parameter {@code 'httpEquiv'}. 1365 * 1366 * <BR /><BR />If no such tag is found on the page, then this method shall return null. 1367 */ 1368 public static String getHTTPEquiv(Vector<HTMLNode> html, String httpEquiv) 1369 { 1370 // Find the first <META HTTP-EQUIV=... CONTENT=...> tag element where the name equals 1371 // the string-value provided by parameter 'httpEquiv'. 1372 1373 TagNode tn = InnerTagGet.first 1374 (html, "META", "HTTP-EQUIV", TextComparitor.EQ_CI, httpEquiv.trim()); 1375 1376 // If there are no <META HTTP-EQUIV='httpEquiv' CONTENT=...> elements found on the 1377 // page, then this method returns null. 1378 1379 if (tn == null) return null; 1380 1381 // Return the string-value of the attribute 'content'. Note that if this 1382 // attribute isn't available, this method shall return 'null', gracefully. 1383 1384 return tn.AV("CONTENT"); 1385 } 1386 1387 /** 1388 * This method will find all HTML {@code HTTP-EQUIV}-Directives, and return them in a Java 1389 * {@code Properties} object. 1390 * 1391 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1392 * 1393 * @return An instance of {@code java.util.Properties} containing all 1394 * {@code HTTP-EQUIV}-Directives. If HTML-Page paramter {@code 'html'} does not have any 1395 * such Meta-Tags, then an empty {@code Properties} instance is returned, rather than null. 1396 * 1397 * @throws MalformedHTMLException If the page provided has multiple definitions for the 1398 * exact same {@code HTTP}-Header property, then this exception will throw. 1399 */ 1400 public static Properties getAllHTTPEquiv(Vector<HTMLNode> html) 1401 throws MalformedHTMLException 1402 { 1403 Properties ret = new Properties(); 1404 String prev = null; 1405 1406 // Find the first <META HTTP-EQUIV=... CONTENT=...> tag element where the name equals 1407 // the string-value provided by parameter 'httpEquiv'. 1408 1409 for (TagNode httpEquivTN : InnerTagGet.all(html, "META", "HTTP-EQUIV")) 1410 1411 if ((prev = (String) ret.put 1412 (httpEquivTN.AV("HTTP-EQUIV"), httpEquivTN.AV("CONTENT"))) != null) 1413 1414 throw new MalformedHTMLException( 1415 "This HTML Page has multiple Meta-Tag Definitions for the HTTP-" + 1416 "EQUIVALENT Property [" + httpEquivTN.AV("HTTP-EQUIV") + "].\n" + 1417 " " + prev + "\n" + 1418 "and " + httpEquivTN.AV("CONTENT") + '\n' 1419 ); 1420 1421 return ret; 1422 } 1423 1424 /** 1425 * This will detect whether a {@code UTF-8} HTML Meta-Tag is included on this page. Below 1426 * are examples of what such tags look like. 1427 * 1428 * <DIV CLASS="HTML">{@code 1429 * <meta http-equiv="content-type" content="text/html; charset=UTF-8"> 1430 * <meta charset="UTF-8"> 1431 * }</DIV> 1432 * 1433 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 1434 * 1435 * @return {@code TRUE} If an appropriate HTML Meta-Tag identifying this page as a 1436 * {@code UTF-8} Character-Set Web-Site. will {@code FALSE} otherwise. 1437 * 1438 * @see #hasUTF8MetaTag(Vector) 1439 * @see #UTF8MetaTag 1440 * @see StrCmpr#containsAND_CI(String, String[]) 1441 * @see TagNode#AV(String) 1442 */ 1443 public static boolean hasUTF8MetaTag(Vector<? extends HTMLNode> html) 1444 { 1445 String s; 1446 1447 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 1448 // <META http-equiv="content-type" ...> 1449 // 1450 // EQ_CI_TRM: Check the 'http-equiv' Attribute-Value using a Case-Insensitive, 1451 // Equality String-Comparison 1452 // Trim the 'http-equiv' Attribute-Value String of possible leading & 1453 // trailing White-Space before performing the comparison. 1454 1455 Vector<TagNode> v = InnerTagGet.all 1456 (html, "META", "HTTP-EQUIV", TextComparitor.EQ_CI_TRM, "content-type"); 1457 1458 for (TagNode tn : v) 1459 if ((s = tn.AV("CONTENT")) != null) 1460 if (StrCmpr.containsAND_CI(s, "charset", "utf-8")) 1461 return true; 1462 1463 // InnerTagGet.aall retrieves all TagNode's that resemble <META charset="utf-8" ...> 1464 // EQ_CI_TRM: Equality-Test, Case-Insensitive, Trim any White-Space before 1465 // performing comparison. 1466 1467 v = InnerTagGet.all(html, "META", "CHARSET", TextComparitor.EQ_CI_TRM, "utf-8"); 1468 1469 for (TagNode tn : v) 1470 if ((s = tn.AV("CHARSET")) != null) 1471 if (StrCmpr.containsAND_CI(s, "utf-8")) 1472 return true; 1473 1474 return false; 1475 } 1476 1477 1478 // **************************************************************************************** 1479 // **************************************************************************************** 1480 // Insert NAME/Property Meta-Tags 1481 // **************************************************************************************** 1482 // **************************************************************************************** 1483 1484 1485 /** 1486 * This does a very simple insertion of an HTML Meta-Tag for a specific type, 1487 * Meta-Tags that have both a {@code NAME}-Attribute and a {@code CONTENT}-Attribute 1488 * set. 1489 * 1490 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1491 * 1492 * @param m This is any of the enumerated-types of specific Meta-Tag {@code NAME}-Attribute 1493 * & {@code CONTENT}-Attribute pair / combinations. 1494 * 1495 * @param contentAttributeValue This is the value that will be used to set the 1496 * <B STYLE='color: red;'>value</B> for the {@code CONTENT}-Attribute. 1497 * 1498 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1499 * 1500 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=contentAttributeValue 1501 * DATA-FILE-ID=FT_Q_EX> 1502 * 1503 * @see #metaTagName 1504 * @see #getAllMetaTagNames(Vector) 1505 * @see DotPair 1506 * @see TagNode 1507 */ 1508 public static void insertMetaTagName 1509 (Vector<HTMLNode> html, MetaTagName m, String contentAttributeValue) 1510 { 1511 // Builds and inserts a TagNode HTML Element that looks like: 1512 // <meta name='INSERT-NAME-STRING-HERE' content='INSERT-CONTENT-STRING-HERE'> 1513 1514 // Single Quotes are used, so the attribute-value may not contain single quotes. 1515 checkForSingleQuote(contentAttributeValue); 1516 1517 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1518 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1519 1520 if (header == null) throw new NodeNotFoundException 1521 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META NAME=... CONTENT=...> tag")); 1522 1523 // Build a <META> tag, as in the comment above 1524 TagNode metaTN = new TagNode 1525 ("<META NAME='" + m.name + "' CONTENT='" + contentAttributeValue + "'>"); 1526 1527 // Insert the meta-tag into the page. Put it at the top of the header, 1528 // just after <HEAD> 1529 1530 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1531 } 1532 1533 /** 1534 * This does an insertion of a list of HTML Meta-Tags from a java Hashtable of Meta-Tag 1535 * Name-Attribute / Content-Attribute pairs. All name-based Meta-Tags have both a 1536 * {@code NAME}-Attribute, and also a {@code CONTENT}-Attribute. 1537 * 1538 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1539 * 1540 * @param metaTags This is a hash-table of the enumerated-types of specific Meta-Tag Name 1541 * property/content pairs. 1542 * 1543 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1544 * 1545 * @throws QuotesException If any of the <B STYLE='color: red;'>values</B> from the 1546 * <B STYLE='color:red'>key-value</B> pair hash-table contain a {@code String} that has a 1547 * single-quotation mark, anywhere inside the it. 1548 * 1549 * @see #metaTagName 1550 * @see #getAllMetaTagNames(Vector) 1551 * @see #insertMetaTagName(Vector, MetaTagName, String) 1552 * @see TagNode 1553 */ 1554 public static void insertMetaTagNames 1555 (Vector<HTMLNode> html, Hashtable<MetaTagName, String> metaTags) 1556 { 1557 // Builds and inserts a TagNode HTML Element that looks like: 1558 // "<meta name='INSERT-NAME-STRING-HERE' content='INSERT-CONTENT-STRING-HERE'"; 1559 1560 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1561 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1562 1563 if (header == null) throw new NodeNotFoundException 1564 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META NAME=... CONTENT=...> tag")); 1565 1566 // Java Stream's can be addictive... It is an easier way to build a list. 1567 Stream.Builder<HTMLNode> b = Stream.builder(); 1568 b.accept(NEWLINE); 1569 1570 // Iterate the complete list of meta-tag names to insert 1571 for (MetaTagName m : metaTags.keySet()) 1572 { 1573 String contentAttributeValue = metaTags.get(m); 1574 checkForSingleQuote(contentAttributeValue); 1575 1576 // Build the new node 1577 TagNode metaTN = new TagNode 1578 ("<META NAME='" + m.name + "' CONTENT='" + contentAttributeValue + "'>"); 1579 1580 b.accept(metaTN); b.accept(NEWLINE); 1581 } 1582 1583 // Insert the meta-tag names into the page. Put it at the top of the header, 1584 // just after <HEAD> 1585 1586 Util.insertNodes(html, header.start + 1, b.build().toArray(HTMLNode[]::new)); 1587 } 1588 1589 1590 // **************************************************************************************** 1591 // **************************************************************************************** 1592 // Insert **SPECIFIC** NAME/Property Meta-Tags 1593 // **************************************************************************************** 1594 // **************************************************************************************** 1595 1596 1597 /** 1598 * One common HTML Meta-Tag is the one which informs Google & Yahoo (and all 1599 * search-engine sites) which of your pages you would like to be indexed by their search 1600 * engine, and which pages you would like to not be indexed. Worrying about what Google 1601 * does and does not index may seem daunting, but this meta-tag can prevent certain 1602 * behaviors. 1603 * 1604 * <BR /><BR />The {@code 'ROBOTS'} Meta-Tag informs Search-Engines which pages on your 1605 * site should be indexed. This Meta-Tag serves a similar purpose to a {@code 'robots.txt'} 1606 * File. It is generally used to prevent a Search-Engine from indexing individual pages, 1607 * while {@code 'robots.txt'} is used to prevent the search from indexing a whole site or 1608 * section of a site. 1609 * 1610 * <BR /><BR />A {@code 'ROBOTS'} Meta-Tag which instructs the Search-Engine Crawler not to 1611 * index a page, or follow any links on it, would be written as below. 1612 * 1613 * <DIV CLASS="HTML">{@code 1614 * <meta name="robots" content="noindex, nofollow" /> 1615 * <meta name="robots" content="index, follow" /> 1616 * }</DIV> 1617 * 1618 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1619 * 1620 * @param index This is a {@code boolean}-Parameter that when set to {@code TRUE} will 1621 * force this method to place an {@code INDEX-String} into the finally-exported HTML 1622 * element. If {@code FALSE} is passed, then a {@code NOINDEX-String} will be put into the 1623 * HTML-Tag. 1624 * 1625 * @param follow This is also a {@code boolean}-Parameter. When {@code TRUE} this will 1626 * force the method to put a {@code FOLLOW-String} into the finally-exported HTML-Tag. 1627 * When {@code FALSE}, then a {@code 'NOFOLLOW'} will be inserted. 1628 * 1629 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1630 * 1631 * @see #robotsMetaTag 1632 * @see #getAllRobots(Vector) 1633 * @see #getAllRobotsNOMHE(Vector) 1634 * @see TagNode 1635 */ 1636 public static void insertRobots(Vector<HTMLNode> html, boolean index, boolean follow) 1637 { 1638 // Builds a robots meta tag. These are used by google and search engines 1639 // <meta NAME=robots content='INSERT-CONTENT-STRING-HERE' /> 1640 1641 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1642 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1643 1644 if (header == null) throw new NodeNotFoundException 1645 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Robots <META ... > Tag")); 1646 1647 // Build a 'robots' TagNode 1648 TagNode robotsTN = new TagNode( 1649 "<META NAME=robots CONTENT='" + 1650 (index ? "index" : "noindex") + ", " + (follow ? "follow" : "nofollow") + 1651 "' >" 1652 ); 1653 1654 // Insert the robots-tag into the page. 1655 // Put it at the top of the header, just after <HEAD> 1656 1657 Util.insertNodes(html, header.start + 1, NEWLINE, robotsTN, NEWLINE); 1658 } 1659 1660 /** 1661 * This will add an HTML Meta-Tag with a 1662 * <B STYLE='color: red;'>{@code <META NAME=robots>}</B> 1663 * 1664 * <BR /><BR /><B CLASS=JDDescLabel>Validity Check Warning:</B> 1665 * 1666 * <BR />This method avoids all presumed <I><B>validity check,</B></I> primarily because 1667 * making an attempt to identify what is absolutely correct or not-correct seems a little 1668 * far-fetched. 1669 * 1670 * <BR /><BR />Although the number of actual values the {@code ROBOTS}-Attribute may 1671 * contain is very low, throwing a {@code MalformedHTMLException} for some errors, while 1672 * ignoring others was decided to best avoid during this method's development. 1673 * 1674 * <BR /><BR /><B CLASS=JDDescLabel:>Aside:</B> 1675 * 1676 * <BR />If a programmer were to pass both the {@link Robots#Follow} and the 1677 * {@link Robots#NoFollow} Enum-Constants, both of these tags would be inserted into an 1678 * HTML {@code 'robots'} Meta-Tag without any kind of warning or exception throw. 1679 * 1680 * <BR /><BR />This, clearly, would be a faulty HTML directive, though. 1681 * 1682 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1683 * 1684 * @param rArr This is an array of the Enumerated-Type {@link Robots}. It may contain a 1685 * list of any number of the items available to add into an HTML Meta-Tag's 1686 * {@code ROBOTS}-Attribute. If any of the array elements are null, they will be skipped 1687 * and ignored. 1688 * 1689 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1690 * 1691 * @see #robotsMetaTag 1692 * @see #getAllRobots(Vector) 1693 * @see #insertRobots(Vector, boolean, boolean) 1694 * @see StrCSV#toCSV(Object[], IntTFunction, boolean, Integer) 1695 * @see DotPair 1696 */ 1697 public static void insertRobots(Vector<HTMLNode> html, Robots... rArr) 1698 { 1699 // Builds a series-of-robots meta tag. These are used by google and search engines 1700 // <meta NAME=robots content='INSERT-CONTENT-STRING-HERE' /> 1701 1702 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1703 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1704 1705 if (header == null) throw new NodeNotFoundException 1706 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Robots <META ... > Tag")); 1707 1708 String robotsStr = StrCSV.toCSV(rArr, (int i, Robots r) -> r.name, false, null); 1709 1710 // Build the <META> TagNode 1711 TagNode robotsTN = new TagNode("<META NAME=robots CONTENT='" + robotsStr + "'>"); 1712 1713 // Insert the robots-tag into the page. Put it at the top of the header, just 1714 // after <HEAD> 1715 1716 Util.insertNodes(html, header.start + 1, NEWLINE, robotsTN, NEWLINE); 1717 } 1718 1719 /** 1720 * Another common HTML {@code META}-Tag is the one that provides a brief description of 1721 * the page in question. This method facilitates adding a Meta-Tag that contains two 1722 * attributes: 1723 * 1724 * <BR /><BR /><OL CLASS=JDUL> 1725 * <LI> {@code NAME}-Attribute whose <B STYLE='color: red;'>value</B> must be 1726 * {@code 'description'} 1727 * </LI> 1728 * 1729 * <LI> {@code CONTENT}-Attribute whose <B STYLE='color: red;'>value</B> should be a brief 1730 * textual description of the content of the page 1731 * </LI> 1732 * </OL> 1733 * 1734 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1735 * 1736 * @param description This is a textual-description of the Web-Page to which this HTML 1737 * <B STYLE='color: red;'>{@code <META NAME=description CONTENT='...'}</B> Tag is being 1738 * added. If Google or any of the other Internet Search Sites, return your Web-Page as a 1739 * part of a search-results, this description is usually used. 1740 * 1741 * <BR /><BR />Furthermore, the key-words that are listed here are some-how (in a way that 1742 * is not-knownst to this programmer) used in indexing your particular page in the 1743 * search-algorithms. 1744 * 1745 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1746 * 1747 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=description 1748 * DATA-FILE-ID=FT_Q_EX> 1749 * 1750 * @see #descriptionMetaTag 1751 * @see #hasDescription(Vector) 1752 * @see #checkForSingleQuote(String) 1753 * @see TagNode 1754 */ 1755 public static void insertDescription(Vector<HTMLNode> html, String description) 1756 { 1757 // Meta-Tag for Descriptions. This will be inserted into the HTML page. 1758 // <meta NAME=description content='INSERT-DESCRIPTION-OR-KEYWORDS-HERE'> 1759 1760 checkForSingleQuote(description); 1761 1762 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1763 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1764 1765 if (header == null) throw new NodeNotFoundException 1766 (NO_HEADER_MESSAGE.replace("INSERT-STR", "Description <META ... > Tag")); 1767 1768 // Build the Meta Tag for a description to google and search engines 1769 TagNode metaTN = new TagNode 1770 ("<META NAME=description CONTENT='" + description + "'>"); 1771 1772 // Insert the description-tag into the page. Put it at the top of the header, 1773 // just after <HEAD> 1774 1775 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1776 } 1777 1778 /** 1779 * This will attempt to insert key-words into an HTML Meta-Tag. This is usually used to 1780 * summarize-explain 'main-points' that a Web-Page author wants to make to any 1781 * search-engineer or any-listener on the internet about the Web-Page that includes such a 1782 * Meta-Tag. 1783 * 1784 * <BR /><BR /><B CLASS=JDDescLabel>Validity Checking:</B> 1785 * 1786 * <BR />This method does a few minor validity checks regarding the content inside of a 1787 * description keyword. All it does is look for things like White-Space and a few 1788 * punctuation rules. If either of these problems occur inside any of the key-words 1789 * provided to the {@code 'keyWords'} Var-Args Parameter, then an 1790 * {@code IllegalArgumentException} is thrown. 1791 * 1792 * <BR /><BR /><B CLASS=JDDescLabel>Disallowed Punctuation:</B> 1793 * 1794 * <BR />This list of disallowed punctuation marks for the key-words are as processed as 1795 * follows: 1796 * 1797 * <DIV CLASS="SNIP">{@code 1798 * if (StrCmpr.containsOR 1799 * (keyWord, ";", ",", "'", "\"", "!", "#", "<", ">", 1800 * "(", ")", "*", "/", "\\") 1801 * ) 1802 * throw new IllegalArgumentException(...); 1803 * }</DIV> 1804 * 1805 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1806 * 1807 * @param keyWords This is a list of germane key-words that help identify, indicate or 1808 * describe the content of the Web-Page in which they are placed. 1809 * 1810 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1811 * 1812 * @throws IllegalArgumentException If any of the key-words provided to the Java Var-Args 1813 * {@code 'keyWords'} parameter contain invalid punctuation characters, or white-space. 1814 * 1815 * @see #keyWordsMetaTag 1816 * @see #getAllKeyWords(Vector) 1817 * @see StringParse#hasWhiteSpace(String) 1818 * @see StrCmpr#containsOR(String, String[]) 1819 * @see StrCSV#toCSV(String[], boolean, boolean, Integer) 1820 */ 1821 public static void insertKeyWords(Vector<HTMLNode> html, String... keyWords) 1822 { 1823 // The meta-tag for key-words. Search Engines look for these key-words when indexing 1824 // <meta NAME=keywords content='INSERT-COMMA-SEPARATED-KEYWORDS-HERE'> 1825 1826 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1827 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1828 1829 if (header == null) throw new NodeNotFoundException 1830 (NO_HEADER_MESSAGE.replace("INSERT-STR", "KeyWords Meta-Tag")); 1831 1832 for (String keyWord : keyWords) if (StringParse.hasWhiteSpace(keyWord)) 1833 1834 throw new IllegalArgumentException( 1835 "You have tried to insert keywords into an HTML Meta-Tag KeyWord-{roperty, " + 1836 "but unfortunately one of the words provided [" + keyWord + "] contains " + 1837 "white-space. This is not allowed here." 1838 ); 1839 1840 1841 for (String keyWord : keyWords) 1842 1843 if (StrCmpr.containsOR 1844 (keyWord, ";", ",", "'", "\"", "!", "<", ">", "(", ")", "*", "/", "\\")) 1845 1846 throw new IllegalArgumentException( 1847 "You have tried to insert keywords into an HTML Meta-Tag KeyWords-" + 1848 "Property, but unfortunately one of the words provide [" + keyWord + "] " + 1849 "contains error-prone punctuation, and cannot be used here." 1850 ); 1851 1852 // All this does is build a list - Comma Separated values. 1853 String listAsString = StrCSV.toCSV(keyWords, true, false, null); 1854 1855 // Build the TagNode, it will contain all key-words listed in the input var-args 1856 // String array 1857 1858 TagNode metaTN = new TagNode("<META NAME=keywords CONTENT='" + listAsString + "'>"); 1859 1860 // Insert the tag into the page. Put it at the top of the header, just after <HEAD> 1861 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1862 } 1863 1864 /** 1865 * This method will insert an "author" HTML Meta-Tag into the 1866 * <B STYLE='color: red;'>{@code <HEAD> ... </HEAD>}</B> section of this page. 1867 * 1868 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1869 * @param author This is the author of this Web-Page. 1870 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1871 * 1872 * @throws QuotesException If the author's name prevents the HTML-Engine from building any 1873 * version of an {@code AUTHOR} Meta-Tag. This will happen, certainly, if the author's 1874 * name-{@code String} contains <I><B>both</B></I> a single <I><B>and</B></I> a double 1875 * quote. 1876 * 1877 * <BR /><BR />Choose either the single-quote, or the double. Do not use both, or this 1878 * exception will throw. 1879 * 1880 * <BR /><BR /><B><SPAN STYLE="color: red;">MOST IMPORTANT</B></SPAN> Most author's names 1881 * don't have any quotes at all! Checking for these things prevents unexplainable 1882 * exceptions later on. 1883 * 1884 * @see #authorMetaTag 1885 * @see #hasAuthor(Vector) 1886 * @see SD 1887 * @see DotPair 1888 */ 1889 public static void insertAuthor(Vector<HTMLNode> html, String author) 1890 { 1891 // The 'Author' Meta tag shall be inserted into the html page. 1892 // <meta NAME=author content='INSERT-AUTHOR-NAME-HERE'> 1893 1894 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1895 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1896 1897 if (header == null) throw new NodeNotFoundException 1898 (NO_HEADER_MESSAGE.replace("INSERT-STR", "author meta-tag")); 1899 1900 if ((author.indexOf("'") != -1) && (author.indexOf("\"") != -1)) 1901 1902 throw new QuotesException( 1903 "The author string provided here contains both a single-quote and a double-" + 1904 "quote, but this cannot be inserted into any HTML-Tag. Please remove " + 1905 "one or the other." 1906 ); 1907 1908 // Use the more complicated TagNode constructor to build the "author" tag. 1909 SD quote = (author.indexOf("'") == -1) ? SD.SingleQuotes : SD.DoubleQuotes; 1910 Properties p = new Properties(); 1911 1912 p.put("NAME", "author"); 1913 p.put("CONTENT", author); 1914 1915 // This constructor accepts a properties instance. 1916 TagNode authorTN = new TagNode("META", p, quote, true); 1917 1918 // Insert the tag into the page. Put it at the top of the header, just after <HEAD> 1919 Util.insertNodes(html, header.start + 1, NEWLINE, authorTN, NEWLINE); 1920 } 1921 1922 1923 // **************************************************************************************** 1924 // **************************************************************************************** 1925 // Insert HTTP-EQUIV Meta-Tags 1926 // **************************************************************************************** 1927 // **************************************************************************************** 1928 1929 1930 /** 1931 * This does a very simple insertion of an HTML Meta-Tag for a specific type, 1932 * Meta-Tags that have a {@code HTTP-EQUIV}-Attribute paired with a 1933 * {@code CONTENT}-Attribute. 1934 * 1935 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1936 * 1937 * @param httpEquiv This is the property that is passed using the 1938 * {@code HTTP-EQUIV}-Attribute. 1939 * 1940 * @param contentAttributeValue This is the value that will be used to set the 1941 * {@code CONTENT}-Attribute. 1942 * 1943 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1944 * 1945 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=contentAttributeValue 1946 * DATA-FILE-ID=FT_Q_EX> 1947 * 1948 * @see #metaTagHTTPEquiv 1949 * @see #getHTTPEquiv(Vector, String) 1950 * @see DotPair 1951 * @see TagNode 1952 */ 1953 public static void insertHTTPEquiv 1954 (Vector<HTMLNode> html, String httpEquiv, String contentAttributeValue) 1955 { 1956 // Builds and inserts a TagNode HTML Element that looks like: 1957 // <meta http-equiv='INSERT-HTTP-EQUIV-STRING-HERE' 1958 // content='INSERT-CONTENT-STRING-HERE' > 1959 1960 // Single Quotes are used, so the attribute-value may not contain single quotes. 1961 checkForSingleQuote(contentAttributeValue); 1962 1963 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 1964 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 1965 1966 if (header == null) throw new NodeNotFoundException 1967 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META HTTP-EQUIV=... CONTENT=...> Tag")); 1968 1969 // Build a <META> tag, as in the comment above 1970 TagNode metaTN = new TagNode 1971 ("<META HTTP-EQUIV='" + httpEquiv + "' CONTENT='" + contentAttributeValue + "'>"); 1972 1973 // Insert the meta-tag into the page. Put it at the top of the header, 1974 // just after <HEAD> 1975 1976 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 1977 } 1978 1979 /** 1980 * The method will insert a {@code UTF-8} Meta-Tag that identifies the HTML-Page to any 1981 * Web-Browser that attempts to render its content as containing Foreign-Language 1982 * Characters, Emoji's & other non-{@code ASCII} Glyphs. 1983 * 1984 * <BR /><BR />{@code UTF-8} text utilizes/makes-use-of characters in a higher 1985 * {@code 'byte-range'} than the traditional <I>single-byte (256 different-characters) ASCII</I> 1986 * Character-Set. {@code UTF-8} allows for Chinese, Japanese and just about every variant of 1987 * language in the rest of the world. 1988 * 1989 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 1990 * 1991 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 1992 * 1993 * @see #hasUTF8MetaTag(Vector) 1994 * @see #UTF8MetaTag 1995 * @see TagNode 1996 * @see DotPair 1997 */ 1998 public static void insertUTF8MetaTag(Vector<HTMLNode> html) 1999 { 2000 // Meta-Tag to assert that the UTF-8 Charset is being used: 2001 // <meta http-equiv='Content-Type' content='text/html; charset=utf-8' /> 2002 2003 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 2004 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 2005 2006 if (header == null) throw new NodeNotFoundException 2007 (NO_HEADER_MESSAGE.replace("INSERT-STR", "UTF-8 <META> Tag")); 2008 2009 // Insert the UTF-8 tag into the page. Put it at the top of the header, just 2010 // after <HEAD> 2011 2012 Util.insertNodes(html, header.start + 1, NEWLINE, new TagNode(UTF8MetaTag), NEWLINE); 2013 } 2014 2015 2016 // **************************************************************************************** 2017 // **************************************************************************************** 2018 // ITEMPROP Meta-Tags 2019 // **************************************************************************************** 2020 // **************************************************************************************** 2021 2022 2023 /** 2024 * This does a very simple insertion of an HTML Meta-Tag for a specific type, 2025 * Meta-Tags that have an {@code ITEMPROP}-Attribute paired with a 2026 * {@code CONTENT}-Attribute set. 2027 * 2028 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 2029 * 2030 * @param itemProp This is a property that is passed via the {@code ITEMPROP}-Attribute 2031 * 2032 * @param contentAttributeValue This is the value that will be used to set the 2033 * {@code CONTENT}-Attribute 2034 * 2035 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 2036 * 2037 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM=contentAttributeValue 2038 * DATA-FILE-ID=FT_Q_EX> 2039 * 2040 * @see #metaTagItemProp 2041 * @see #getItemProp(Vector, String) 2042 * @see DotPair 2043 * @see TagNode 2044 */ 2045 public static void insertItemProp 2046 (Vector<HTMLNode> html, String itemProp, String contentAttributeValue) 2047 { 2048 // Builds and inserts a TagNode HTML Element that looks like: 2049 // <meta itemprop='INSERT-ITEMPROP-STRING-HERE' content='INSERT-CONTENT-STRING-HERE' > 2050 2051 // Single Quotes are used, so the attribute-value may not contain single quotes. 2052 checkForSingleQuote(contentAttributeValue); 2053 2054 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 2055 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 2056 2057 if (header == null) throw new NodeNotFoundException 2058 (NO_HEADER_MESSAGE.replace("INSERT-STR", "<META ITEMPROP=... CONTENT=...> tag")); 2059 2060 // Build a <META> tag, as in the comment above 2061 TagNode metaTN = new TagNode 2062 ("<META ITEMPROP='" + itemProp + "' CONTENT='" + contentAttributeValue + "'>"); 2063 2064 // Insert the meta-tag into the page. Put it at the top of the header, 2065 // just after <HEAD> 2066 2067 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 2068 } 2069 2070 /** 2071 * This method will find an HTML 2072 * <B STYLE='color: red;'>{@code <META ITEMPROP=... CONTENT=...>}</B> element whose 2073 * {@code ITEMPROP}-Attribute <B STYLE='color: red;'>value</B> is equal to the 2074 * {@code String}-parameter {@code 'itemProp'} (ignoring case). 2075 * 2076 * <BR /><BR />After such an HTML {@code META}-Tag has been identified, its 2077 * {@code CONTENT}-Attribute {@code String}-value will be subsequently queried, extracted 2078 * and returned by this method. 2079 * 2080 * <BR /><BR /><B CLASS=JDDescLabel>Returning null, Gracefully:</B> 2081 * 2082 * <BR />If the page provided does not have an HTML Meta-Tag with a {@code NAME}-Attribute 2083 * whose <B STYLE='color: red;'>value</B> is {@code 'name'} or if such an element is 2084 * identified, but that tag does not have a {@code CONTENT}-Attribute, then this method 2085 * will return null. 2086 * 2087 * <BR /><BR /><B CLASS=JDDescLabel>Case Insensitive Comparison:</B> 2088 * 2089 * <BR />Before the comparison is done with the {@code 'itemProp'} parameter, that 2090 * {@code String} is trimmed with {@code String.trim()}, and the comparison performed 2091 * <I>is done while ignoring case</I>. 2092 * 2093 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 2094 * 2095 * @param itemProp The Attribute-<B STYLE='color: red;'>name</B> of the 2096 * {@code ITEMPROP}-Attribute. 2097 * 2098 * @return The {@code String}-value of the {@code CONTENT}-Attribute for a 2099 * {@code META}-Tag whose {@code ITEMPROP}-Attribute is equal to the specified name 2100 * provided by parameter {@code 'itemProp'}. 2101 * 2102 * <BR /><BR />If such information is not found on the page, then this method returns null. 2103 */ 2104 public static String getItemProp(Vector<HTMLNode> html, String itemProp) 2105 { 2106 // Find the first <META ITEMPROP=... CONTENT=...> tag element where the name equals 2107 // the string-value provided by parameter 'itemProp'. 2108 2109 TagNode tn = InnerTagGet.first 2110 (html, "META", "ITEMPROP", TextComparitor.EQ_CI, itemProp.trim()); 2111 2112 // If there are no <META ITEMPROP='itemProp' CONTENT=...> elements found on the page, 2113 // then this method returns null. 2114 2115 if (tn == null) return null; 2116 2117 // Return the string-value of the attribute 'content'. Note that if this 2118 // attribute isn't available, this method shall return 'null', gracefully. 2119 2120 return tn.AV("content"); 2121 } 2122 2123 2124 // **************************************************************************************** 2125 // **************************************************************************************** 2126 // Open-Graph Meta-Tags 2127 // **************************************************************************************** 2128 // **************************************************************************************** 2129 2130 2131 /** 2132 * This will insert a single Open-Graph Meta-Tag into an HTML-Page. 2133 * 2134 * <BR /><BR /><B CLASS=JDDescLabel>Prepending <CODE>'og:'</CODE></B> 2135 * 2136 * <BR />The name of the property <I><B>MUST NOT</B></I> begin with the characters 2137 * {@code "og:"}, because they will be prepended when the HTML 2138 * <B STYLE='color: red;'>{@code <META PROPERTY='...' CONTENT='...' />}</B> Tag is 2139 * instantiated. 2140 * 2141 * <BR /><BR />Please review <I>exact</I> method body below. 2142 * 2143 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=FT_HTML_PARAM> 2144 * 2145 * @param ogProperty This is the name of the Open-Graph protocol property that is being 2146 * inserted. Generally these are simple text-{@code String's} with alphanumeric-limited 2147 * names, or they are series of alphanumeric text-{@code String's}, separated by a period 2148 * {@code '.'} character. 2149 * 2150 * @param ogValueAsStr If you look at the definition of the {@link #openGraphMetaTag} above 2151 * in this class, you may view all of the acceptable types that Open-Graph Properties may 2152 * use. 2153 * 2154 * <BR /><BR />Whichever property or field that is being inserted, mostly, the field must 2155 * be converted to a {@code String} when being passed to this method. 2156 * 2157 * @throws NodeNotFoundException <EMBED CLASS='external-html' DATA-FILE-ID=FT_NNF_EX> 2158 * 2159 * @throws QuotesException <EMBED CLASS='external-html' DATA-PARAM1=ogProperty 2160 * DATA-PARAM2=ogValueAsStr DATA-FILE-ID=FT_Q_EX_DOUBL> 2161 * 2162 * @see #openGraphMetaTag 2163 * @see #getAllOGMetaTags(Vector) 2164 * @see #checkForSingleQuote(String) 2165 * @see TagNode 2166 */ 2167 public static void insertOGMetaTag 2168 (Vector<HTMLNode> html, String ogProperty, String ogValueAsStr) 2169 { 2170 // Open graph tag looks like this: 2171 // <meta property='og:INSERT-OG-PROPERTY-HERE' content='INSERT-OG-VALUE-HERE' /> 2172 2173 checkForSingleQuote(ogProperty); 2174 checkForSingleQuote(ogValueAsStr); 2175 2176 // The HTML Page must have a <HEAD> ... </HEAD> section, or an exception shall throw. 2177 DotPair header = TagNodeFindInclusive.first(html, "HEAD"); 2178 2179 if (header == null) throw new NodeNotFoundException( 2180 NO_HEADER_MESSAGE.replace 2181 ("INSERT-STR", "Open-Graph <META NAME='og:...' ...> Tag") 2182 ); 2183 2184 // Build the Open-Graph Meta Tag 2185 TagNode metaTN = new TagNode 2186 ("<META PROPERTY='og:" + ogProperty+ "' CONTENT='" + ogValueAsStr +"'>"); 2187 2188 // Insert the tag into the page. Put it at the top of the header, just after <HEAD> 2189 Util.insertNodes(html, header.start + 1, NEWLINE, metaTN, NEWLINE); 2190 } 2191 2192 /** 2193 * This will search any Vectorized HTML-Pge for 2194 * <B STYLE='color: red;'>{@code <META PROPERTY='og:...' CONTENT='...'>}</B> Tags, and 2195 * retrieve them for placement into a {@code java.util.Properties} table. 2196 * 2197 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 2198 * 2199 * @return This will return a Java {@code 'Properties'} Object, with all Open-Graph 2200 * properties saved inside. 2201 * 2202 * @see #openGraphMetaTag 2203 * @see #insertOGMetaTag(Vector, String, String) 2204 * @see TagNode#AV(String) 2205 * @see InnerTagGet 2206 */ 2207 public static Properties getAllOGMetaTags(Vector<? extends HTMLNode> html) 2208 { 2209 // InnerTagGet.all: Returns a vector of TagNode's that resemble: 2210 // <META property="og:..." ...> 2211 // 2212 // SW_CI_TRM: Check the 'property' Attribute-Value using a Case-Insensitive, 2213 // 'Starts-With' String-Comparison 2214 // Trim the 'property' Attribute-Value String of possible leading & 2215 // trailing White-Space before performing the comparison. 2216 2217 Vector<TagNode> v = InnerTagGet.all 2218 (html, "META", "PROPERTY", TextComparitor.SW_CI_TRM, "og:"); 2219 2220 Properties ret = new Properties(); 2221 2222 for (TagNode tn : v) 2223 ret.put(tn.AV("PROPERTY").substring(3), tn.AV("CONTENT")); 2224 2225 return ret; 2226 } 2227 } 2228}