1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 | package Torello.HTML; import java.util.*; import java.io.IOException; import java.util.stream.IntStream; import Torello.Java.*; import Torello.Java.Additional.RemoveUnsupportedIterator; /** * A basic tool for finding Java-Script Listener Attributes in the {@link TagNode} elements in a * Vectorized-HTML Web-Page. * * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=LISTENERS> */ @Torello.JavaDoc.StaticFunctional public class Listeners { private Listeners() { } @SuppressWarnings("unchecked") private static final TreeSet<String> l = (TreeSet<String>) LFEC.readObjectFromFile_JAR (Listeners.class, "data-files/Listeners.tsdat", true, TreeSet.class); public static void main(String[] argv) { for (String s : l) System.out.print(s + ", "); } /** * This will return an {@code Iterator} of the listed java-script listeners available in this * class */ public static Iterator<String> listAllAvailable() { return new RemoveUnsupportedIterator<String>(l.iterator()); } /** * This just allows the user to add a name of a new listener that was not already stored in the * internal-set of known java-script listeners. When searching a page for listeners, this * class will only (obviously) be able to find ones whose names are known. * * @param listenerName The name of a listener that is not already 'known-about' in by this * class * * @return {@code TRUE} If the internal table of listener names was not already stored in the * set, {@code FALSE} if attempting to add a listener that is already in the set. */ public static boolean addNewListenerName(String listenerName) { return l.add(listenerName.toLowerCase()); } /** * This will test whether listeners are present in the {@code TagNode}, and if so - return * them. * * <BR /><TABLE CLASS=JDBriefTable> * <TR><TH>Input {@code TagNode}</TH><TH>Output Properties:</TH></TR> * <TR><TD><CODE><frameset cols="20%,80%" title="Documentation frame" * onload="top.loadFrames()"></CODE></TD> <TD><CODE>onload: * top.loadFrames()</CODE></TD></TR> * <TR><TD><CODE><a href="javascript:void(0);" onclick="return * j2gb('http://www.gov.cn');"></CODE></TD> <TD><CODE>onclick: return * j2gb('http://www.gov.cn');</CODE></TD></TR> * </TABLE> * * @param tn This may be any {@code TagNode}, but it will be tested for JavaScript listeners. * * @return Will return a {@code java.util.Properties} object that contains a key-value table of * any/all listeners present in the {@code TagNode.} If there are no listeners, this method * <I>will not return null</I>, it will return an <I>empty {@code Properties} object</I>. * * @see TagNode#AV(String) * @see StrCmpr#containsIgnoreCase(String, String) */ public static Properties extract(TagNode tn) { Properties p = new Properties(); String s; for (String listener : l) if (StrCmpr.containsIgnoreCase(tn.str, listener)) if ((s = tn.AV(listener)) != null) // This **may** seem redundant, but it is not, because what if it was phony? // What if the "listener" key-word was actually buried in some "ALT=..." text? // The initial "StrCmpr.contains..." an optimization p.put(listener, s); return p; } /** * If you have performed a Java-Script Listener Get, this method will cycle through the list * that was returned and generate <I><B>an identical length return {@code Properties[]}</B></I> * array that has called {@code extract(tn)} for-each element in the parameter {@code 'list.'} * * @param list A list of {@code TagNode's} that are expected to contain Java-Script listeners. * If some of the members of this input {@code Vector} have {@code TagNode's} with no * listeners, the return array will <I>still remain a parallel (same-size) array</I>, * however some of it's elements will have {@code Properties} with no key/value pairs in them * (zero-size). * * @return A list of {@code Properties} for each element in this {@code 'list.'} * * @see #extract(TagNode) */ public static Properties[] extractAll(Vector<TagNode> list) { Properties[] ret = new Properties[list.size()]; for (int i=0; i < list.size(); i++) ret[i] = extract(list.elementAt(i)); return ret; } // ******************************************************************************************** // ******************************************************************************************** // FIND // ******************************************************************************************** // ******************************************************************************************** /** * Convenience Method. * <BR />Invokes: {@link #find(Vector, int, int)} */ public static int[] find(Vector<? extends HTMLNode> html) { return find(html, 0, -1); } /** * Convenience Method. * <BR />Receives: {@code DotPair} * <BR />Invokes: {@link #find(Vector, int, int)} */ public static int[] find(Vector<? extends HTMLNode> html, DotPair dp) { return find(html, dp.start, dp.end + 1); } /** * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of * the page to a sublist of that page, * * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> * * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> * * @return A list of index-pointers into the underlying parameter {@code 'html'} where each * node pointed to by the list contains a {@code TagNode} element with a listener attribute / * inner-tag. Search results shall be limited to only considering elements between * {@code sPos ... ePos.} * * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> * * @see #hasListener(TagNode) * @see LV */ public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos) { // Java Streams to keep lists of int's IntStream.Builder b = IntStream.builder(); LV l = new LV(html, sPos, ePos); TagNode tn; for (int i=l.start; i < l.end; i++) // Only check Openening TagNode's, long enought to have attributes, and then only // retain TagNode's that have a listener attribute. if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) b.add(i); return b.build().toArray(); } /** * Convenience Method. * <BR />Invokes: {@link #find(Vector, int, int, String[])} */ public static int[] find(Vector<? extends HTMLNode> html, String... htmlTags) { return find(html, 0, -1, htmlTags); } /** * Convenience Method. * <BR />Receives: {@code DotPair} * <BR />Invokes: {@link #find(Vector, int, int, String[])} */ public static int[] find(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags) { return find(html, dp.start, dp.end + 1, htmlTags); } /** * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only * allow for matches where the HTML Element is among the list of elements in parameter * {@code 'htmlTags'} * * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> * * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> * * @param htmlTags A list of HTML Elements, as a varargs {@code String...} Array, that * constitute a match. Any HTML Element in the web-page that has a listener attribute, but * whose HTML tag/token is not present in this list will not be considered a match, and will * not be returned in this method's search results. * * @return A list of index-pointers into the underlying parameter {@code 'html'} where each * node pointed to by the list contains a {@code TagNode} element with a listener attribute / * inner-tag. Search results shall be limited to only considering elements between * {@code sPos ... ePos,} <B><I>and also</I></B> limited to HTML Elements in parameter * {@code 'htmlTags'} * * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> * * @see #HAS_TOK_MATCH(String, String[]) * @see #hasListener(TagNode) * @see LV */ public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags) { // Java Streams can keep lists of int's IntStream.Builder b = IntStream.builder(); LV l = new LV(html, sPos, ePos); TagNode tn; htmlTags = toLowerCase(htmlTags); for (int i=l.start; i < l.end; i++) if ( // Only Match Opening-Tags with internal-string's long enough to contain Attributes ((tn = html.elementAt(i).openTagPWA()) != null) // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags' && HAS_TOK_MATCH(tn.tok, htmlTags) // Check whethr or not that the TagNode has a listener attribute (if yes, save it) && hasListener(tn) ) // Save the array-index b.add(i); return b.build().toArray(); } // ******************************************************************************************** // ******************************************************************************************** // GET // ******************************************************************************************** // ******************************************************************************************** /** * Convenience Method. * <BR />Invokes {@link #get(Vector, int, int)} */ public static Vector<TagNode> get(Vector<? extends HTMLNode> html) { return get(html, 0, -1); } /** * Convenience Method. * <BR />Receives: {@code DotPair} * <BR />Invokes: {@link #get(Vector, int, int)} */ public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp) { return get(html, dp.start, dp.end + 1); } /** * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of * the page to a sublist of that page, * * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> * * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> * * @return A list TagNode elements that have a listener attribute / inner-tag. Search results * shall be limited to only considering elements between sPos ... ePos. * * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> * * @see #hasListener(TagNode) * @see LV */ public static Vector<TagNode> get(Vector<? extends HTMLNode> html, int sPos, int ePos) { Vector<TagNode> ret = new Vector<>(); LV l = new LV(html, sPos, ePos); TagNode tn; for (int i=l.start; i < l.end; i++) // Only check Openening TagNode's, long enought to have attributes, and then only // retain TagNode's that have a listener attribute. If this TagNodes does have a // listener, place it in the return vector. if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) ret.add(tn); return ret; } /** * Convenience Method. * <BR />Invokes: {@link #get(Vector, int, int, String[])} */ public static Vector<TagNode> get(Vector<? extends HTMLNode> html, String... htmlTags) { return get(html, 0, -1, htmlTags); } /** Convenience Method. (Range-Limited Method) * <BR />Receives: {@code DotPair} * <BR />Invokes: {@link #get(Vector, int, int, String[])} */ public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags) { return get(html, dp.start, dp.end + 1, htmlTags); } /** * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only * allow for matches where the HTML Element is among the list of elements in parameter * {@code 'htmlTags'} * * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> * * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> * * @param htmlTags A list of HTML Elements, as a varargs {@code String} Array, that constitute * a match. Any HTML Element in the web-page that has a listener attribute, but whose HTML * tag/token is not present in this list will not be considered a match, and will not be * returned in this method's search results. * * @return A list of TagNode elements that have a listener attribute / inner-tag. Search * results shall be limited to only considering elements between sPos ... ePos, <B><I>and * also</I></B> limited to HTML Elements in parameter {@code 'htmlTags'} * * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> * * @see #HAS_TOK_MATCH(String, String[]) * @see #hasListener(TagNode) * @see LV */ public static Vector<TagNode> get (Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags) { Vector<TagNode> ret = new Vector<>(); LV l = new LV(html, sPos, ePos); TagNode tn; htmlTags = toLowerCase(htmlTags); for (int i=l.start; i < l.end; i++) if ( // Only Match Opening-Tags with internal-string's long enough to contain Attributes ((tn = html.elementAt(i).openTagPWA()) != null) // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags' && HAS_TOK_MATCH(tn.tok, htmlTags) // Check whethr or not that the TagNode has a listener attribute (if yes, save it) && hasListener(tn) ) // All requirements have been affirmed, save this node in the return vector. ret.add(tn); return ret; } // ******************************************************************************************** // ******************************************************************************************** // Helpers // ******************************************************************************************** // ******************************************************************************************** /** * Checks if a certain {@code class TagNode} has a listener inner-tag / attribute. * * @param tn Any HTML Element {@code TagNode} * @return {@code TRUE} If this {@code TagNode} has a listener, and {@code FALSE} otherwise. * @see StrCmpr#containsIgnoreCase(String, String) */ public static boolean hasListener(TagNode tn) { Properties p = new Properties(); for (String listener : l) // This is a simple string-comparison - with no reg-ex involved if (StrCmpr.containsIgnoreCase(tn.str, listener)) // Slightly slower, uses a - TagNode.AV(attribute) uses a Regular-Expression if (tn.AV(listener) != null) // This **may** seem redundant, but it is not, because what if it was phony? // What if the "listener" key-word was actually buried in some "ALT=..." text? return true; return false; } /** * Converts the varargs parameter to lower-case {@code Strings.} * * <BR /><BR />Note that this is <I><B>{@code "Varargs Safe"}</B></I>, * because a new {@code String}-Array is created that has new {@code String}-pointers. * * @param tags The varargs {@code String} parameter acquired from the search-methods in this * class. * * @return a lower-case version of the input. */ protected static String[] toLowerCase(String[] tags) { String[] ret = new String[tags.length]; for (int i=0; i < tags.length; i++) if (tags[i] != null) ret[i] = tags[i].toLowerCase(); else throw new HTMLTokException( "One of the HTML tokens you have passed to the variable-length parameter " + "'htmlTags' was null." ); return ret; } /** * Checks if the var-args parameter {@code String... htmlTags} matches a particular token * * @param htmlTag The token to be checked against the user's requested {@code 'htmlTags'} list * parameter * * @param htmlTags The list of acceptable HTML Tag Elements. This is a search specification * parameter used by some of the search-methods in this class. * * @return {@code TRUE} If the tested token parameter {@code 'htmlTag'} is a member of this * elements in list parameter {@code 'htmlTags'}, and {@code FALSE} otherwise. */ protected static boolean HAS_TOK_MATCH(String htmlTag, String... htmlTags) { for (String s : htmlTags) if (s.equals(htmlTag)) return true; return false; } } |