001package Torello.HTML.NodeSearch; 002 003import java.util.*; 004import Torello.HTML.*; 005 006/** 007 * A simple, demonstrative set of functions for retrieving {@code HTMLNode's} from a web-page 008 * (a 'Workbook Class'). 009 * 010 * <EMBED CLASS='external-html' DATA-FILE-ID=ELEMENTS> 011 */ 012@Torello.JavaDoc.StaticFunctional 013public class Elements 014{ 015 private Elements() { } 016 017 /** 018 * Retrieves the start and end points of the web-page body in the underlying HTML 019 * page-{@code Vector}. 020 * All nodes between {@code <BODY> ... </BODY>} will be included. 021 * 022 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 023 * 024 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested 025 * HTML sublist. 026 * 027 * @see InnerTagFindInclusive 028 */ 029 public static DotPair findBody(Vector<? extends HTMLNode> html) 030 { return InnerTagFindInclusive.first(html, "body"); } 031 032 /** 033 * Gets the nodes of the web-page body. 034 * All nodes between {@code <BODY> ... </BODY>} will be included. 035 * 036 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 037 * @return The requested HTML sublist, as a {@code Vector}. 038 * @see InnerTagGetInclusive 039 */ 040 public static Vector<HTMLNode> getBody(Vector<? extends HTMLNode> html) 041 { return InnerTagGetInclusive.first(html, "body"); } 042 043 /** 044 * Retrieves the start and end points of the web-page header in the underlying HTML 045 * page-{@code Vector}. 046 * All nodes between {@code <HEAD> ... </HEAD>} will be included. 047 * 048 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 049 * 050 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested 051 * HTML sublist. 052 * 053 * @see InnerTagFindInclusive 054 */ 055 public static DotPair findHead(Vector<? extends HTMLNode> html) 056 { return InnerTagFindInclusive.first(html, "head"); } 057 058 /** 059 * Gets the nodes of the web-page header. 060 * All nodes between {@code <HEAD> ... </HEAD>} will be included. 061 * 062 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 063 * @return The requested HTML sublist, as a {@code Vector}. 064 * @see InnerTagGetInclusive 065 */ 066 public static Vector<HTMLNode> getHead(Vector<? extends HTMLNode> html) 067 { return InnerTagGetInclusive.first(html, "head"); } 068 069 /** 070 * Gets all {@code <META NAME="..." CONTENT="...">} (or {@code <META CHARSET="...">} 071 * and {@code <META HTTP-EQUIV="...">}) elements in a web-page header - returned via 072 * their position in the page-{@code Vector}. 073 * 074 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 075 * 076 * @return The requested HTML Elements, as an integer-array list of index-pointers to 077 * the underlying {@code Vector}. 078 * 079 * @see TagNodeFind 080 */ 081 public static int[] findMeta(Vector<? extends HTMLNode> html) 082 { return TagNodeFind.all(html, TC.OpeningTags, "meta"); } 083 084 /** 085 * Gets all {@code <META NAME="..." CONTENT="...">} (or {@code <META CHARSET="...">} 086 * and {@code <META HTTP-EQUIV="...">}) elements in a web-page header. 087 * 088 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 089 * @return The requested HTML Elements, as {@code TagNode's}, in a return {@code Vector}. 090 * @see TagNodeGet 091 */ 092 public static Vector<TagNode> getMeta(Vector<? extends HTMLNode> html) 093 { return TagNodeGet.all(html, TC.OpeningTags, "meta"); } 094 095 /** 096 * Gets all {@code <LINK REL="..." HREF="...">} elements in a web-page header - returned 097 * via their position in the page-{@code Vector}. 098 * 099 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 100 * 101 * @return The requested HTML Elements, as an integer-array list of index-pointers to 102 * the underlying {@code Vector}. 103 * 104 * @see TagNodeFind 105 */ 106 public static int[] findLink(Vector<? extends HTMLNode> html) 107 { return TagNodeFind.all(html, TC.OpeningTags, "link"); } 108 109 /** 110 * Gets all {@code <LINK REL="..." HREF="...">} elements in a web-page header. 111 * 112 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 113 * @return The requested HTML Elements, as {@code TagNode's}, in a return {@code Vector}. 114 * @see TagNodeGet 115 */ 116 public static Vector<TagNode> getLink(Vector<? extends HTMLNode> html) 117 { return TagNodeGet.all(html, TC.OpeningTags, "link"); } 118 119 /** 120 * Returns the start and end positions in the page-{@code Vector} of the HTML 121 * {@code <TITLE>...</TITLE>} elements. 122 * 123 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 124 * 125 * @return The start and end index pointers, as a {@code DotPair}, of the HTML 126 * requested HTML sublist. 127 * 128 * @see InnerTagFindInclusive 129 */ 130 public static DotPair findTitle(Vector<? extends HTMLNode> html) 131 { return TagNodeFindInclusive.first(html, "title"); } 132 133 /** 134 * Returns the {@code <TITLE>...</TITLE>} elements sub-list from the HTML page-{@code Vector}. 135 * 136 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 137 * @return The requested HTML sublist, as a {@code Vector}. 138 * @see InnerTagGetInclusive 139 */ 140 public static Vector<HTMLNode> getTitle(Vector<? extends HTMLNode> html) 141 { return TagNodeGetInclusive.first(html, "title"); } 142 143 /** 144 * Returns the {@code String} encapsulated by the HTML {@code 'HEAD'}-section's 145 * {@code "<TITLE>...</TITLE>"} element, if there such an element. If there is no such 146 * element, null is returned. If there is a {@code 'TITLE'} element, but it has the 147 * empty-{@code String} (zero-length-string) an empty {@code String} is returned. 148 * 149 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 150 * Retrieves the {@code 'TITLE'} of an HTML page - by getting the {@code String}-text between 151 * the {@code 'TITLE'} elements. 152 * 153 * @return The title string 154 */ 155 public static String titleString(Vector<? extends HTMLNode> html) 156 { 157 Vector<HTMLNode> title = getTitle(html); 158 159 if (title == null) return null; 160 161 return Util.textNodesString(title); 162 } 163 164 /** 165 * This method will find the very first HTML {@code 'TABLE'} 166 * (<CODE><TABLE> <TH>...</TH> <TR> <TD>..</TD> ... 167 * </TR> ... </TABLE></CODE>) element set. This returns the {@code Vector} 168 * Position starting and ending boundaries {@code DotPair.start, DotPair.end} rather than 169 * pointer-references to the nodes. This is what the <B>{@code 'FIND'}</B> keyword usually 170 * means in this HTML-Scrape package. 171 * 172 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 173 * 174 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested HTML 175 * sublist. 176 * 177 * @see TagNodeFindInclusive 178 */ 179 public static DotPair findTable(Vector<? extends HTMLNode> html) 180 { return TagNodeFindInclusive.first(html, "table"); } 181 182 /** 183 * This method will find the very first HTML {@code 'TABLE'} 184 * (<CODE><TABLE> <TH>...</TH> <TR> <TD>..</TD> ... 185 * </TR> ... </TABLE></CODE>) element set. This returns the {@code Vector} Position 186 * starting and ending boundaries {@code DotPair.start, DotPair.end} rather than 187 * pointer-references to the nodes. This is what the <B>{@code 'FIND'}</B> keyword usually 188 * means in this HTML-Scrape package. 189 * 190 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 191 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 192 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 193 * 194 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested HTML 195 * sublist. 196 * 197 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 198 * @see TagNodeFindInclusive 199 */ 200 public static DotPair findTable(Vector<? extends HTMLNode> html, int sPos, int ePos) 201 { return TagNodeFindInclusive.first(html, sPos, ePos, "table"); } 202 203 /** 204 * This method will get the very first HTML {@code 'TABLE'} 205 * (<CODE><TABLE> <TR> <TH>...</TH> </TR> <TR> 206 * <TD>..</TD> ... </TR> ... </TABLE></CODE>) element set. This 207 * returns a sub-{@code Vector} (an actual {@code Vector<HTMLNode>} object, not a {@code Vector 208 * / array} starting and ending indices pair). This is what the <B>{@code 'GET'}</B> keyword 209 * usually means in this HTML-Scrape package. 210 * 211 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 212 * @return The requested HTML sublist, as a {@code Vector}. 213 * @see TagNodeGetInclusive 214 */ 215 public static Vector<HTMLNode> getTable(Vector<? extends HTMLNode> html) 216 { return TagNodeGetInclusive.first(html, "table"); } 217 218 /** 219 * This method will get the very first HTML {@code 'TABLE'} 220 * (<CODE><TABLE> <TH>...</TH> <TR> <TD>..</TD> ... 221 * </TR> ... </TABLE></CODE>) element set. This returns a sub-vector (an actual 222 * {@code Vector<HTMLNode>} object, not a {@code Vector / array} starting and ending indices 223 * pair). This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape 224 * package. 225 * 226 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 227 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 228 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 229 * @return The requested HTML sublist, as a {@code Vector}. 230 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 231 * @see TagNodeGetInclusive 232 */ 233 public static Vector<HTMLNode> getTable(Vector<? extends HTMLNode> html, int sPos, int ePos) 234 { return TagNodeGetInclusive.first(html, sPos, ePos, "table"); } 235 236 237 238 239 240 241 242 /** 243 * This method will find the very first first HTML {@code 'SELECT-OPTION'} set. 244 * (<CODE><SELECT> ... <OPTION> ... </OPTION> .. </SELECT></CODE>) 245 * element set. This returns the {@code Vector} Position starting and ending boundaries 246 * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes. This is 247 * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package. 248 * 249 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 250 * 251 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested 252 * HTML sublist. 253 * 254 * @see TagNodeFindInclusive 255 */ 256 public static DotPair findSelect(Vector<? extends HTMLNode> html) 257 { return TagNodeFindInclusive.first(html, "select"); } 258 259 /** 260 * This method will find the very first first HTML {@code 'SELECT-OPTION'} set. 261 * (<CODE><SELECT> ... <OPTION> ... </OPTION> .. </SELECT></CODE>) 262 * element set. This returns the {@code Vector} Position starting and ending boundaries 263 * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes. This is 264 * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package. 265 * 266 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 267 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 268 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 269 * 270 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested 271 * HTML sublist. 272 * 273 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 274 * 275 * @see TagNodeFindInclusive 276 */ 277 public static DotPair findSelect(Vector<? extends HTMLNode> html, int sPos, int ePos) 278 { return TagNodeFindInclusive.first(html, sPos, ePos, "select"); } 279 280 /** 281 * This method will find the very first first HTML {@code 'SELECT-OPTION'} set. 282 * (<CODE><SELECT> ... <OPTION> ... </OPTION> .. </SELECT></CODE>) 283 * element set. This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not 284 * a {@code Vector / array} starting and ending indices pair.) This is what the 285 * <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package. 286 * 287 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 288 * @return The requested HTML sublist, as a {@code Vector}. 289 * @see TagNodeGetInclusive 290 */ 291 public static Vector<HTMLNode> getSelect(Vector<? extends HTMLNode> html) 292 { return TagNodeGetInclusive.first(html, "select"); } 293 294 /** 295 * This method will find the very first first HTML {@code 'SELECT-OPTION'} set. 296 * (<CODE><SELECT> ... <OPTION> ... </OPTION> .. </SELECT></CODE>) 297 * element set. This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not 298 * a {@code Vector / array} starting and ending indices pair). This is what the 299 * <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package. 300 * 301 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 302 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 303 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 304 * @return The requested HTML sublist, as a {@code Vector}. 305 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 306 * @see TagNodeGetInclusive 307 */ 308 public static Vector<HTMLNode> getSelect(Vector<? extends HTMLNode> html, int sPos, int ePos) 309 { return TagNodeGetInclusive.first(html, sPos, ePos, "select"); } 310 311 312 313 314 315 316 317 /** 318 * This method will find the very first HTML Un-Ordered List 319 * (<CODE><UL> ..<LI>...</LI> ... </UL></CODE>) element set. 320 * This returns the {@code Vector} Position starting and ending boundaries 321 * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes. This is 322 * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package. 323 * 324 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 325 * 326 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested 327 * HTML sublist. 328 * 329 * @see TagNodeFindInclusive 330 */ 331 public static DotPair findUL(Vector<? extends HTMLNode> html) 332 { return TagNodeFindInclusive.first(html, "ul"); } 333 334 /** 335 * This method will find the very first HTML Un-Ordered List 336 * (<CODE><UL> ..<LI>...</LI> ... </UL></CODE>) element set. 337 * This returns the {@code Vector} Position starting and ending boundaries 338 * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes. This is 339 * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package. 340 * 341 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 342 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 343 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 344 * 345 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested HTML 346 * sublist. 347 * 348 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 349 * @see TagNodeFindInclusive 350 */ 351 public static DotPair findUL(Vector<? extends HTMLNode> html, int sPos, int ePos) 352 { return TagNodeFindInclusive.first(html, sPos, ePos, "ul"); } 353 354 /** 355 * This method will find the very first HTML Un-Ordered List 356 * (<CODE><UL> ..<LI>...</LI> ... </UL></CODE>) element set. 357 * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a 358 * {@code Vector / array} starting and ending indices pair). 359 * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package. 360 * 361 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 362 * @return The requested HTML sublist, as a {@code Vector}. 363 * @see TagNodeGetInclusive 364 */ 365 public static Vector<HTMLNode> getUL(Vector<? extends HTMLNode> html) 366 { return TagNodeGetInclusive.first(html, "ul"); } 367 368 /** 369 * This method will find the very first HTML Un-Ordered List 370 * (<CODE><UL> ..<LI>...</LI> ... </UL></CODE>) element set. 371 * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a 372 * {@code Vector / array} starting and ending indices pair). 373 * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package. 374 * 375 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 376 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 377 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 378 * @return The requested HTML sublist, as a {@code Vector}. 379 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 380 * @see TagNodeGetInclusive 381 */ 382 public static Vector<HTMLNode> getUL(Vector<? extends HTMLNode> html, int sPos, int ePos) 383 { return TagNodeGetInclusive.first(html, sPos, ePos, "ul"); } 384 385 386 387 388 389 390 391 /** 392 * This method will find the very first HTML Un-Ordered List 393 * (<CODE><OL> ..<LI>...</LI> ... </OL></CODE>) element set. 394 * This returns the {@code Vector} Position starting and ending boundaries 395 * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes. This is 396 * what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package. 397 * 398 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 399 * 400 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested 401 * HTML sublist. 402 * 403 * @see TagNodeFindInclusive 404 */ 405 public static DotPair findOL(Vector<? extends HTMLNode> html) 406 { return TagNodeFindInclusive.first(html, "ol"); } 407 408 /** 409 * This method will find the very first HTML Un-Ordered List 410 * (<CODE><OL> ..<LI>...</LI> ... </OL></CODE>) element set. 411 * This returns the {@code Vector} Position starting and ending boundaries 412 * {@code DotPair.start, DotPair.end} rather than pointer-references to the nodes. This 413 * is what the <B>{@code 'FIND'}</B> keyword usually means in this HTML-Scrape package. 414 * 415 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 416 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 417 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 418 * 419 * @return The start and end index pointers, as a {@code DotPair}, of the HTML requested 420 * HTML sublist. 421 * 422 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 423 * @see TagNodeFindInclusive 424 */ 425 public static DotPair findOL(Vector<? extends HTMLNode> html, int sPos, int ePos) 426 { return TagNodeFindInclusive.first(html, sPos, ePos, "ol"); } 427 428 /** 429 * This method will find the very first HTML Un-Ordered List 430 * (<CODE><OL> ..<LI>...</LI> ... </OL></CODE>) element set. 431 * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a 432 * {@code Vector / array} starting and ending indices pair). 433 * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package. 434 * 435 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 436 * @return The requested HTML sublist, as a {@code Vector}. 437 * @see TagNodeGetInclusive 438 */ 439 public static Vector<HTMLNode> getOL(Vector<? extends HTMLNode> html) 440 { return TagNodeGetInclusive.first(html, "ol"); } 441 442 /** 443 * This method will find the very first HTML Un-Ordered List 444 * (<CODE><OL> ..<LI>...</LI> ... </OL></CODE>) element set. 445 * This returns a sub-vector (an actual {@code Vector<HTMLNode>} object, not a 446 * {@code Vector / array} starting and ending indices pair). 447 * This is what the <B>{@code 'GET'}</B> keyword usually means in this HTML-Scrape package. 448 * 449 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 450 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 451 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 452 * @return The requested HTML sublist, as a {@code Vector}. 453 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 454 * @see TagNodeGetInclusive 455 */ 456 public static Vector<HTMLNode> getOL(Vector<? extends HTMLNode> html, int sPos, int ePos) 457 { return TagNodeGetInclusive.first(html, sPos, ePos, "ol"); } 458 459 460 461 462 463 /** 464 * This will use the "L1 Inclusive" concept defined in this HTML package to provide a list 465 * (returned using the type: {@code java.util.Vector<DotPair>}) of each element that fits the 466 * <CODE><OPTION> ... </OPTION></CODE> HTML "select-option element" structure. 467 * 468 * @param selectList An HTML list of {@code TagNode's} and {@code TextNode's} that constitute 469 * an selection-option drop-down menu. This list cannot contain extraneous {@code TagNode's} 470 * or {@code TextNode's}, but rather, must begin and end with the open and close "select" 471 * HTML drop-down menu Tags. 472 * 473 * @return A <I>"list of lists"</I> - specifically, a list of <B>{@code Torello.HTML.DotPair 474 * }</B>, each of which delineate a complete {@code <OPTION> ... </OPTION>} sub-list that are 475 * present within this HTML "select" drop-down-menu structure. 476 * 477 * @throws MalformedHTMLException This method in no way performs a complete evaluation of the 478 * HTML structure provided by the user in the <B>{@code Vector<? extends HTMLNode> list} 479 * parameter </B> that is passed. However rules that are related to the HTML 480 * elements "Select Option" {@code <SELECT>...<OPTION> ... </OPTION> ... </SELECT>} are 481 * inspected. 482 * 483 * <BR /><BR /><UL CLASS=JDUL> 484 * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B> exact HTML 485 * elements</B> - {@code <SELECT>, </SELECT>} </I>, then this exception is thrown. 486 * </LI> 487 * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous text" 488 * in between the {@code <OPTION> ... </OPTION> or <SELECT> ... </SELECT>} list-start 489 * and list-end demarcated HTML TagNodes, then the 490 * {@code Torello.HTML.MalformedHTMLException } will, again, be thrown 491 * </LI> 492 * </UL> 493 * 494 * @see #checkEndPoints(Vector, String[]) 495 * @see #checkL1(Vector, Vector) 496 * @see TagNodeFindL1Inclusive 497 */ 498 public static Vector<DotPair> findAllOption 499 (Vector<? extends HTMLNode> selectList) throws MalformedHTMLException 500 { 501 checkEndPoints(selectList, "select"); 502 503 Vector<DotPair> ret = TagNodeFindL1Inclusive.all(selectList, "option"); 504 505 checkL1(selectList, ret); 506 507 return ret; 508 } 509 510 /** 511 * This does the exact same thing as {@code findAllOption(Vector)} but the returned value is 512 * converted from "sublist endpoints" (a vector of start/end pairs), and into a "List of 513 * Sub-Lists", which is specifically a list {@code (java.util.Vector<>)} containing sub-lists 514 * (also: {@code java.util.Vector<HTMLNode>}) 515 * 516 * <BR /><BR /><B>NOTE:</B> All of the rules and conditions explained in the comments for 517 * method <B>{@code findAllOption(Vector)}</B> apply to this method as well. 518 * 519 * @param selectList An HTML list of {@code TagNode's} and {@code TextNode's} that constitute 520 * an selection-option drop-down menu. 521 * This list cannot contain extraneous {@code TagNode's} or {@code TextNode's}, but rather, 522 * must begin and end with the open and close "select" HTML drop-down menu Tags. 523 * 524 * @return A <I>"list of lists"</I> - specifically, a list of 525 * <B>{@code java.util.Vector<HTMLNode>} (sublists)</B>, each of which delineate 526 * a complete {@code <OPTION> ... </OPTION>} sub-list that are present within this HTML 527 * "select" drop-down-menu structure. 528 * 529 * @throws MalformedHTMLException This method in no way performs a complete evaluation of the 530 * HTML structure provided by the user in the <B>{@code Vector<? extends HTMLNode> list} 531 * parameter </B> that is passed. However rules that are related to the HTML 532 * elements "Select Option" {@code <SELECT>...<OPTION> ... </OPTION> ... </SELECT>} are 533 * inspected. 534 * 535 * <BR ><BR /><UL CLASS=JDUL> 536 * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B> exact HTML 537 * elements</B> - {@code <SELECT>, </SELECT>}</I>, then this exception is thrown. 538 * </LI> 539 * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous 540 * text" in between the {@code <OPTION> ... </OPTION> or <SELECT> ... </SELECT>} 541 * list-start and list-end demarcated HTML TagNodes, then the 542 * {@code Torello.HTML.MalformedHTMLException } will, again, be thrown 543 * </LI> 544 * </UL> 545 * 546 * @see DPUtil#toVectors(Vector, Iterable) 547 */ 548 public static Vector<Vector<HTMLNode>> getAllOption 549 (Vector<? extends HTMLNode> selectList) throws MalformedHTMLException 550 { return DPUtil.toVectors(selectList, findAllOption(selectList)); } 551 552 553 554 555 556 557 558 559 560 561 /** 562 * This will use the "L1 Inclusive" concept defined in this HTML package to provide a list 563 * (returned using the type: 564 * {@code java.util.Vector<DotPair>}) of each element that fits the 565 * <CODE><LI> ... </LI></CODE> HTML "list element" structure. 566 * 567 * @param list An HTML list of {@code TagNode's} and {@code TextNode's} that constitute an 568 * ordered or unordered list. This list cannot contain 569 * extraneous {@code TagNode's} or {@code TextNode's}, but rather, must begin and end with 570 * the open and close list Tags. 571 * 572 * @return A <I>"list of lists"</I> - specifically, a list of 573 * <B>{@code Torello.HTML.DotPair}</B>, each of which delineate a complete {@code <LI> ... 574 * </LI>} sub-list that are present within this HTML list structure. 575 * 576 * @throws MalformedHTMLException This method in no way performs a complete evaluation of the 577 * HTML structure provided by the user in the <B>{@code Vector<? extends HTMLNode> list} 578 * parameter </B> that is passed. However rules that are related to the HTML elements 579 * "Ordered List" {@code <OL>...</OL>} and "unordered list" {@code <UL>...</UL>} are 580 * inspected. 581 * 582 * <BR /><BR /><UL CLASS=JDUL> 583 * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B>same HTML 584 * elements</B> - specifically {@code <OL>, <UL>} </I>, then this exception is thrown. 585 * </LI> 586 * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous text" 587 * in between the {@code <OL> or <UL> ... </OL> or </UL>} list-start and list-end 588 * demarcated HTML TagNodes, then the {@code Torello.HTML.MalformedHTMLException } 589 * will, again, be thrown 590 * </LI> 591 * </UL> 592 * 593 * @see #checkEndPoints(Vector, String[]) 594 * @see #checkL1(Vector, Vector) 595 * @see TagNodeFindL1Inclusive 596 */ 597 public static Vector<DotPair> findAllLI(Vector<? extends HTMLNode> list) 598 throws MalformedHTMLException 599 { 600 checkEndPoints(list, "ol", "ul"); 601 602 Vector<DotPair> ret = TagNodeFindL1Inclusive.all(list, "li"); 603 604 checkL1(list, ret); 605 606 return ret; 607 } 608 609 /** 610 * This does the exact same thing as {@code findAllLI(Vector)} but the returned value is 611 * converted from "sublist endpoints" (a vector of start/end pairs), and into a "List of 612 * Sub-Lists", which is specifically a list {@code (java.util.Vector<>)} containing sub-lists 613 * (also: {@code java.util.Vector<HTMLNode>}) 614 * 615 * <BR /><BR /><B>NOTE:</B> All of the rules and conditions explained in the comments for 616 * method <B>{@code findAllLI(Vector)}</B> apply to this method as well. 617 * 618 * @param list An HTML list of {@code TagNode's} and {@code TextNode's} that constitute an 619 * ordered or unordered list. This list cannot contain extraneous {@code TagNode's} or 620 * {@code TextNode's}, but rather, must begin and end with the open and close list Tags. 621 * 622 * @return A <I>"list of lists"</I> - specifically, a list of 623 * <B>{@code java.util.Vector<HTMLNode>} (sublists)</B>, each of which delineate 624 * a complete <UL>...</UL> sub-list that are present within this HTML list 625 * structure. 626 * 627 * @throws MalformedHTMLException This method in no way performs a complete evaluation of the 628 * HTML structure provided by the 629 * user in the <B>{@code Vector<? extends HTMLNode> list} parameter </B> that is passed. 630 * However rules that are related to the HTML elements "Ordered List" 631 * (<CODE><OL>...</OL></CODE>) and "unordered list" 632 * (<CODE><UL>...</UL></CODE>) are inspected. 633 * 634 * <BR /><BR /><UL CLASS=JDUL> 635 * <LI> If the <B>passed list parameter</B> <I>does not start and end with the <B>same HTML 636 * elements</B> - specifically {@code <OL>, <UL>} </I>, then this exception is thrown. 637 * </LI> 638 * <LI> If the <B>passed list parameter</B> contains "extraneous HTML tags" or "extraneous text" 639 * in between the {@code <OL> or <UL> ... </OL> or </UL>} list-start and list-end 640 * demarcated HTML {@code TagNode's}, then the {@code Torello.HTML.MalformedHTMLException} 641 * will, again, be thrown. 642 * </LI> 643 * </UL> 644 * 645 * @see DPUtil#toVectors(Vector, Iterable) 646 */ 647 public static Vector<Vector<HTMLNode>> getAllLI 648 (Vector<? extends HTMLNode> list) throws MalformedHTMLException 649 { return DPUtil.toVectors(list, findAllLI(list)); } 650 651 652 653 654 655 /** 656 * This method is used to guarantee precisely two conditions to the passed HTML Tag list. 657 * 658 * <BR /><BR /><UL CLASS=JDUL> 659 * <LI> <B>Condition 1:</B> The {@code Vector<HTMLNode> list } parameter begins and ends with 660 * the <I>exact same HTML Tag</I>, (for instance: {@code <H1> ... </H1>}, or perhaps 661 * {@code <LI> ... </LI> }) 662 * </LI> 663 * <LI> <B>Condition 2:</B> The HTML-Tag that is found at the start and end of this list is one 664 * contained within the {@code 'tokList'} variable-length {@code String-array} parameter. 665 * (if the {@code 'tokList'} parameter was a {@code java.lang.String[] tokList = { "th", 666 * "tr" }}, then the passed "HTMLNode list" ({@code Vector}) parameter would have to begin 667 * and end with either: {@code <TH> ... </TH> } or with {@code <TR> ... </TR> } 668 * </LI> 669 * </UL> 670 * 671 * <BR />Much of the java code in this method is used to provide some explanatory Exception 672 * message information. 673 * 674 * @param list This is supposed to be a typical "open" and "close" HTML TagNode structure. It 675 * may be anything including: 676 * <SPAN STYLE="color: green;">{@code <DIV ID="..."> ... </DIV> }, or 677 * {@code <TABLE ...> ... </TABLE> }, or even {@code <BODY> ... </BODY> } 678 * </SPAN> 679 * 680 * @param tokList This is expected to be the possible set of tokens with which this HTML list 681 * may begin or end with. 682 * 683 * @return If the passed list parameter passes both the conditions specified above, then the 684 * token from the list of tokens that were provided is returned. 685 * 686 * <BR /><BR /><B>NOTE:</B> If the list does not meet these conditions, a 687 * {@code Torello.HTML.MalformedHTMLException } will be thrown with an 688 * explanatory exception-message (and, obviously, the method will not return anything!) 689 * 690 * @throws MalformedHTMLException Some explanatory information is provided to the coder for 691 * what has failed with the input list. 692 */ 693 protected static String checkEndPoints 694 (Vector<? extends HTMLNode> list, String... tokList) throws MalformedHTMLException 695 { return checkEndPoints(list, 0, list.size()-1, tokList); } 696 697 /** 698 * This method, functionally, does the exact same thing as "checkEndPoints" - but with the 699 * endpoints specified. It is being kept with <B><I>protected</I></B> access since it might 700 * be unclear what endpoints are being checked. The previous method has many java exception 701 * case strings laboriously typed out. Rather than retype this, this method is being 702 * introduced. Functionally, it does the same thing as {@code checkEndPoints(Vector, String)} 703 * - except it does not use {@code list.elementAt(0)} or 704 * {@code list.elementAt(element.size()-1)} as the starting and ending points. 705 * 706 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 707 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 708 * @param tokList The list of valid HTML Element names (tokens). 709 * @see #checkEndPoints(Vector, String[]) 710 */ 711 protected static String checkEndPoints 712 (Vector<? extends HTMLNode> list, int sPos, int ePos, String... tokList) 713 throws MalformedHTMLException 714 { 715 HTMLNode n = null; String tok = null; 716 717 if ((n = list.elementAt(sPos)).isTagNode()) 718 tok = ((TagNode) n).tok; 719 720 else throw new MalformedHTMLException( 721 "This list does not begin an HTML TagNode, but rather a: " + 722 n.getClass().getName() + "\n" + n.str 723 ); 724 725 if (! (n = list.elementAt(ePos)).isTagNode()) 726 727 throw new MalformedHTMLException( 728 "This list does not end with an HTML TagNode, but rather a : " + 729 n.getClass().getName() + "\n" + n.str 730 ); 731 732 if (! ((TagNode) n).tok.equals(tok)) 733 734 throw new MalformedHTMLException( 735 "This list does not begin and end with the same HTML TagNode:\n" + 736 "[OpeningTag: " + tok + "]\t[ClosingTag: " + ((TagNode) n).tok + "]" 737 ); 738 739 for (String t : tokList) if (t.equals(tok)) return tok; 740 741 String expectedTokList = ""; 742 743 for (String t: tokList) expectedTokList += " " + t; 744 745 throw new MalformedHTMLException( 746 "The opening and closing HTML Tag tokens for this list are not members of the " + 747 "tokList parameter set...\n" + 748 "Expected HTML Tag List: " + expectedTokList + "\nFound Tag: " + tok 749 ); 750 } 751 752 /** 753 * This checks that the sublists demarcated by the {@code Vector<DotPair> htmlSubLists } 754 * parameter are properly formatted HTML. It would be easier to provide an example of 755 * "proper HTML formatting" and "improper HTML formatting" here, rather that trying to explain 756 * this using English. 757 * 758 * <BR /><BR /> 759 * <B>PROPER HTML:</B> 760 * 761 * <DIV CLASS="HTML">{@code 762 * <UL> 763 * <LI> This is a list element.</LI> 764 * <LI> This is another list element.</LI> 765 * <LI> This list element contains <B><I> extra-tags</I></B> like "bold", "italics", and 766 * even a <A HREF="http://Torello.Directory">link!</A></LI> 767 * </UL> 768 * }</DIV> 769 * 770 * <BR /><B>IMPROPER HTML:</B> 771 * 772 * <DIV CLASS="HTML">{@code 773 * <UL> 774 * This text should not be here, and constitutes "malformed HTML" 775 * <LI> This LI element is just fine.</LI> 776 * <A HREF="http://ChineseNewsBoard.com">This link</A> should be between LI elements 777 * <LI> This LI element is also just fine!</LI> 778 * </UL> 779 * }</DIV> 780 * <BR />In the above two lists, the latter would generate a MalformedHTMLException 781 * 782 * @throws MalformedHTMLException whenever improper HTML is presented to this function 783 */ 784 protected static void checkL1(Vector<? extends HTMLNode> list, Vector<DotPair> sublists) 785 throws MalformedHTMLException 786 { checkL1(list, 0, list.size()-1, sublists); } 787 788 /** 789 * This method, functionally, does the exact same thing as "checkEL1" - but with the endpoints 790 * specified. It is being kept with <B><I>protected</I></B> access since it might be unclear 791 * what endpoints are being checked. The previous method has many java exception case 792 * {@code String's} laboriously typed out. Rather than retype this, this method is being 793 * introduced. Functionally, it does the same thing as 794 * {@code checkL1(Vector, String)} - except it does not use {@code list.elementAt(0)} 795 * or {@code list.elementAt(element.size()-1) } as the starting and ending points. 796 * 797 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 798 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 799 * @see #checkL1(Vector, Vector) 800 */ 801 protected static void checkL1 802 (Vector<? extends HTMLNode> list, int sPos, int ePos, Vector<DotPair> sublists) 803 throws MalformedHTMLException 804 { 805 int last = sPos; 806 int t = ePos - 1; 807 HTMLNode n = null; 808 809 for (DotPair sublist : sublists) 810 811 if (sublist.start == (last+1)) last = sublist.end; 812 813 else 814 { 815 if ((sublist.start < (last+1)) || (sublist.start >= t)) 816 817 throw new IllegalArgumentException( 818 "The provided subLists parameter does not contain subLists that are in " + 819 "order of the original list. The 'list of sublists' must contain " + 820 "sublists that are in increasing sorted order.\n" + 821 "Specifically, each sublist must contain start and end points that are " + 822 "sequentially increasing. Also, they may not overlap." 823 ); 824 825 else 826 { 827 for (int i=(last+1); i < sublist.start; i++) 828 829 if ((n = list.elementAt(i)).isTagNode()) 830 831 throw new MalformedHTMLException( 832 "There is a spurious HTML-Tag element at Vector position: " + i + 833 "\n=>\t" + n.str 834 ); 835 836 else if (n.isTextNode() && (n.str.trim().length() > 0)) 837 838 throw new MalformedHTMLException( 839 "There is a spurious Text-Node element at Vector position: " + i + 840 "\n=>\t" + n.str 841 ); 842 } 843 } 844 } 845 846}