001package Torello.HTML; 002 003import java.util.*; 004import java.io.IOException; 005import java.util.stream.IntStream; 006 007import Torello.Java.*; 008import Torello.Java.Additional.RemoveUnsupportedIterator; 009 010 011/** 012 * A basic tool for finding Java-Script Listener Attributes in the {@link TagNode} elements in a 013 * Vectorized-HTML Web-Page. 014 * 015 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=LISTENERS> 016 */ 017@Torello.JavaDoc.StaticFunctional 018public class Listeners 019{ 020 private Listeners() { } 021 022 @SuppressWarnings("unchecked") 023 private static final TreeSet<String> l = (TreeSet<String>) LFEC.readObjectFromFile_JAR 024 (Listeners.class, "data-files/Listeners.tsdat", true, TreeSet.class); 025 026 public static void main(String[] argv) 027 { 028 for (String s : l) System.out.print(s + ", "); 029 } 030 031 /** 032 * This will return an {@code Iterator} of the listed java-script listeners available in this 033 * class 034 */ 035 public static Iterator<String> listAllAvailable() 036 { return new RemoveUnsupportedIterator<String>(l.iterator()); } 037 038 /** 039 * This just allows the user to add a name of a new listener that was not already stored in the 040 * internal-set of known java-script listeners. When searching a page for listeners, this 041 * class will only (obviously) be able to find ones whose names are known. 042 * 043 * @param listenerName The name of a listener that is not already 'known-about' in by this 044 * class 045 * 046 * @return {@code TRUE} If the internal table of listener names was not already stored in the 047 * set, {@code FALSE} if attempting to add a listener that is already in the set. 048 */ 049 public static boolean addNewListenerName(String listenerName) 050 { return l.add(listenerName.toLowerCase()); } 051 052 /** 053 * This will test whether listeners are present in the {@code TagNode}, and if so - return 054 * them. 055 * 056 * <BR /><TABLE CLASS=JDBriefTable> 057 * <TR><TH>Input {@code TagNode}</TH><TH>Output Properties:</TH></TR> 058 * <TR><TD><CODE><frameset cols="20%,80%" title="Documentation frame" 059 * onload="top.loadFrames()"></CODE></TD> <TD><CODE>onload: 060 * top.loadFrames()</CODE></TD></TR> 061 * <TR><TD><CODE><a href="javascript:void(0);" onclick="return 062 * j2gb('http://www.gov.cn');"></CODE></TD> <TD><CODE>onclick: return 063 * j2gb('http://www.gov.cn');</CODE></TD></TR> 064 * </TABLE> 065 * 066 * @param tn This may be any {@code TagNode}, but it will be tested for JavaScript listeners. 067 * 068 * @return Will return a {@code java.util.Properties} object that contains a key-value table of 069 * any/all listeners present in the {@code TagNode.} If there are no listeners, this method 070 * <I>will not return null</I>, it will return an <I>empty {@code Properties} object</I>. 071 * 072 * @see TagNode#AV(String) 073 * @see StrCmpr#containsIgnoreCase(String, String) 074 */ 075 public static Properties extract(TagNode tn) 076 { 077 Properties p = new Properties(); 078 String s; 079 080 for (String listener : l) 081 082 if (StrCmpr.containsIgnoreCase(tn.str, listener)) 083 084 if ((s = tn.AV(listener)) != null) 085 086 // This **may** seem redundant, but it is not, because what if it was phony? 087 // What if the "listener" key-word was actually buried in some "ALT=..." text? 088 // The initial "StrCmpr.contains..." an optimization 089 090 p.put(listener, s); 091 092 return p; 093 } 094 095 /** 096 * If you have performed a Java-Script Listener Get, this method will cycle through the list 097 * that was returned and generate <I><B>an identical length return {@code Properties[]}</B></I> 098 * array that has called {@code extract(tn)} for-each element in the parameter {@code 'list.'} 099 * 100 * @param list A list of {@code TagNode's} that are expected to contain Java-Script listeners. 101 * If some of the members of this input {@code Vector} have {@code TagNode's} with no 102 * listeners, the return array will <I>still remain a parallel (same-size) array</I>, 103 * however some of it's elements will have {@code Properties} with no key/value pairs in them 104 * (zero-size). 105 * 106 * @return A list of {@code Properties} for each element in this {@code 'list.'} 107 * 108 * @see #extract(TagNode) 109 */ 110 public static Properties[] extractAll(Vector<TagNode> list) 111 { 112 Properties[] ret = new Properties[list.size()]; 113 114 for (int i=0; i < list.size(); i++) ret[i] = extract(list.elementAt(i)); 115 116 return ret; 117 } 118 119 120 // ******************************************************************************************** 121 // ******************************************************************************************** 122 // FIND 123 // ******************************************************************************************** 124 // ******************************************************************************************** 125 126 127 /** 128 * Convenience Method. 129 * <BR />Invokes: {@link #find(Vector, int, int)} 130 */ 131 public static int[] find(Vector<? extends HTMLNode> html) 132 { return find(html, 0, -1); } 133 134 /** 135 * Convenience Method. 136 * <BR />Receives: {@code DotPair} 137 * <BR />Invokes: {@link #find(Vector, int, int)} 138 */ 139 public static int[] find(Vector<? extends HTMLNode> html, DotPair dp) 140 { return find(html, dp.start, dp.end + 1); } 141 142 /** 143 * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of 144 * the page to a sublist of that page, 145 * 146 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 147 * 148 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 149 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 150 * 151 * @return A list of index-pointers into the underlying parameter {@code 'html'} where each 152 * node pointed to by the list contains a {@code TagNode} element with a listener attribute / 153 * inner-tag. Search results shall be limited to only considering elements between 154 * {@code sPos ... ePos.} 155 * 156 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 157 * 158 * @see #hasListener(TagNode) 159 * @see LV 160 */ 161 public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos) 162 { 163 // Java Streams to keep lists of int's 164 IntStream.Builder b = IntStream.builder(); 165 LV l = new LV(html, sPos, ePos); 166 TagNode tn; 167 168 for (int i=l.start; i < l.end; i++) 169 170 // Only check Openening TagNode's, long enought to have attributes, and then only 171 // retain TagNode's that have a listener attribute. 172 173 if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) b.add(i); 174 175 return b.build().toArray(); 176 } 177 178 /** 179 * Convenience Method. 180 * <BR />Invokes: {@link #find(Vector, int, int, String[])} 181 */ 182 public static int[] find(Vector<? extends HTMLNode> html, String... htmlTags) 183 { return find(html, 0, -1, htmlTags); } 184 185 /** 186 * Convenience Method. 187 * <BR />Receives: {@code DotPair} 188 * <BR />Invokes: {@link #find(Vector, int, int, String[])} 189 */ 190 public static int[] find(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags) 191 { return find(html, dp.start, dp.end + 1, htmlTags); } 192 193 /** 194 * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of 195 * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only 196 * allow for matches where the HTML Element is among the list of elements in parameter 197 * {@code 'htmlTags'} 198 * 199 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 200 * 201 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 202 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 203 * 204 * @param htmlTags A list of HTML Elements, as a varargs {@code String...} Array, that 205 * constitute a match. Any HTML Element in the web-page that has a listener attribute, but 206 * whose HTML tag/token is not present in this list will not be considered a match, and will 207 * not be returned in this method's search results. 208 * 209 * @return A list of index-pointers into the underlying parameter {@code 'html'} where each 210 * node pointed to by the list contains a {@code TagNode} element with a listener attribute / 211 * inner-tag. Search results shall be limited to only considering elements between 212 * {@code sPos ... ePos,} <B><I>and also</I></B> limited to HTML Elements in parameter 213 * {@code 'htmlTags'} 214 * 215 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 216 * 217 * @see #HAS_TOK_MATCH(String, String[]) 218 * @see #hasListener(TagNode) 219 * @see LV 220 */ 221 public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags) 222 { 223 // Java Streams can keep lists of int's 224 IntStream.Builder b = IntStream.builder(); 225 LV l = new LV(html, sPos, ePos); 226 TagNode tn; 227 228 htmlTags = toLowerCase(htmlTags); 229 230 for (int i=l.start; i < l.end; i++) 231 232 if ( 233 // Only Match Opening-Tags with internal-string's long enough to contain Attributes 234 ((tn = html.elementAt(i).openTagPWA()) != null) 235 236 // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags' 237 && HAS_TOK_MATCH(tn.tok, htmlTags) 238 239 // Check whethr or not that the TagNode has a listener attribute (if yes, save it) 240 && hasListener(tn) 241 ) 242 // Save the array-index 243 b.add(i); 244 245 return b.build().toArray(); 246 } 247 248 249 // ******************************************************************************************** 250 // ******************************************************************************************** 251 // GET 252 // ******************************************************************************************** 253 // ******************************************************************************************** 254 255 256 /** 257 * Convenience Method. 258 * <BR />Invokes {@link #get(Vector, int, int)} 259 */ 260 public static Vector<TagNode> get(Vector<? extends HTMLNode> html) 261 { return get(html, 0, -1); } 262 263 /** 264 * Convenience Method. 265 * <BR />Receives: {@code DotPair} 266 * <BR />Invokes: {@link #get(Vector, int, int)} 267 */ 268 public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp) 269 { return get(html, dp.start, dp.end + 1); } 270 271 /** 272 * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of 273 * the page to a sublist of that page, 274 * 275 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 276 * 277 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 278 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 279 * 280 * @return A list TagNode elements that have a listener attribute / inner-tag. Search results 281 * shall be limited to only considering elements between sPos ... ePos. 282 * 283 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 284 * 285 * @see #hasListener(TagNode) 286 * @see LV 287 */ 288 public static Vector<TagNode> get(Vector<? extends HTMLNode> html, int sPos, int ePos) 289 { 290 Vector<TagNode> ret = new Vector<>(); 291 LV l = new LV(html, sPos, ePos); 292 TagNode tn; 293 294 for (int i=l.start; i < l.end; i++) 295 296 // Only check Openening TagNode's, long enought to have attributes, and then only 297 // retain TagNode's that have a listener attribute. If this TagNodes does have a 298 // listener, place it in the return vector. 299 300 if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) ret.add(tn); 301 302 return ret; 303 } 304 305 /** 306 * Convenience Method. 307 * <BR />Invokes: {@link #get(Vector, int, int, String[])} 308 */ 309 public static Vector<TagNode> get(Vector<? extends HTMLNode> html, String... htmlTags) 310 { return get(html, 0, -1, htmlTags); } 311 312 /** Convenience Method. (Range-Limited Method) 313 * <BR />Receives: {@code DotPair} 314 * <BR />Invokes: {@link #get(Vector, int, int, String[])} 315 */ 316 public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags) 317 { return get(html, dp.start, dp.end + 1, htmlTags); } 318 319 /** 320 * Find all HTML Elements ({@code TagNode} elements) that have listeners. Limit the index of 321 * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only 322 * allow for matches where the HTML Element is among the list of elements in parameter 323 * {@code 'htmlTags'} 324 * 325 * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> 326 * 327 * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC> 328 * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC> 329 * 330 * @param htmlTags A list of HTML Elements, as a varargs {@code String} Array, that constitute 331 * a match. Any HTML Element in the web-page that has a listener attribute, but whose HTML 332 * tag/token is not present in this list will not be considered a match, and will not be 333 * returned in this method's search results. 334 * 335 * @return A list of TagNode elements that have a listener attribute / inner-tag. Search 336 * results shall be limited to only considering elements between sPos ... ePos, <B><I>and 337 * also</I></B> limited to HTML Elements in parameter {@code 'htmlTags'} 338 * 339 * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX> 340 * 341 * @see #HAS_TOK_MATCH(String, String[]) 342 * @see #hasListener(TagNode) 343 * @see LV 344 */ 345 public static Vector<TagNode> get 346 (Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags) 347 { 348 Vector<TagNode> ret = new Vector<>(); 349 LV l = new LV(html, sPos, ePos); 350 TagNode tn; 351 352 htmlTags = toLowerCase(htmlTags); 353 354 for (int i=l.start; i < l.end; i++) 355 356 if ( 357 // Only Match Opening-Tags with internal-string's long enough to contain Attributes 358 ((tn = html.elementAt(i).openTagPWA()) != null) 359 360 // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags' 361 && HAS_TOK_MATCH(tn.tok, htmlTags) 362 363 // Check whethr or not that the TagNode has a listener attribute (if yes, save it) 364 && hasListener(tn) 365 ) 366 367 // All requirements have been affirmed, save this node in the return vector. 368 ret.add(tn); 369 370 return ret; 371 } 372 373 374 // ******************************************************************************************** 375 // ******************************************************************************************** 376 // Helpers 377 // ******************************************************************************************** 378 // ******************************************************************************************** 379 380 381 /** 382 * Checks if a certain {@code class TagNode} has a listener inner-tag / attribute. 383 * 384 * @param tn Any HTML Element {@code TagNode} 385 * @return {@code TRUE} If this {@code TagNode} has a listener, and {@code FALSE} otherwise. 386 * @see StrCmpr#containsIgnoreCase(String, String) 387 */ 388 public static boolean hasListener(TagNode tn) 389 { 390 Properties p = new Properties(); 391 392 for (String listener : l) 393 394 // This is a simple string-comparison - with no reg-ex involved 395 if (StrCmpr.containsIgnoreCase(tn.str, listener)) 396 397 // Slightly slower, uses a - TagNode.AV(attribute) uses a Regular-Expression 398 if (tn.AV(listener) != null) 399 400 // This **may** seem redundant, but it is not, because what if it was phony? 401 // What if the "listener" key-word was actually buried in some "ALT=..." text? 402 403 return true; 404 405 return false; 406 } 407 408 /** 409 * Converts the varargs parameter to lower-case {@code Strings.} 410 * 411 * <BR /><BR />Note that this is <I><B>{@code "Varargs Safe"}</B></I>, 412 * because a new {@code String}-Array is created that has new {@code String}-pointers. 413 * 414 * @param tags The varargs {@code String} parameter acquired from the search-methods in this 415 * class. 416 * 417 * @return a lower-case version of the input. 418 */ 419 protected static String[] toLowerCase(String[] tags) 420 { 421 String[] ret = new String[tags.length]; 422 423 for (int i=0; i < tags.length; i++) 424 425 if (tags[i] != null) ret[i] = tags[i].toLowerCase(); 426 427 else throw new HTMLTokException( 428 "One of the HTML tokens you have passed to the variable-length parameter " + 429 "'htmlTags' was null." 430 ); 431 432 return ret; 433 } 434 435 /** 436 * Checks if the var-args parameter {@code String... htmlTags} matches a particular token 437 * 438 * @param htmlTag The token to be checked against the user's requested {@code 'htmlTags'} list 439 * parameter 440 * 441 * @param htmlTags The list of acceptable HTML Tag Elements. This is a search specification 442 * parameter used by some of the search-methods in this class. 443 * 444 * @return {@code TRUE} If the tested token parameter {@code 'htmlTag'} is a member of this 445 * elements in list parameter {@code 'htmlTags'}, and {@code FALSE} otherwise. 446 */ 447 protected static boolean HAS_TOK_MATCH(String htmlTag, String... htmlTags) 448 { for (String s : htmlTags) if (s.equals(htmlTag)) return true; return false; } 449}