001package Torello.HTML;
002
003import java.util.*;
004import java.io.IOException;
005import java.util.stream.IntStream;
006
007import Torello.Java.*;
008import Torello.Java.Additional.RemoveUnsupportedIterator;
009
010
011/**
012 * A basic tool for finding Java-Script Listener Attributes in the {@link TagNode} elements in a
013 * Vectorized-HTML Web-Page.
014 * 
015 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=LISTENERS>
016 */
017@Torello.JavaDoc.StaticFunctional
018public class Listeners
019{
020    private Listeners() { }
021
022    @SuppressWarnings("unchecked")
023    private static final TreeSet<String> l = (TreeSet<String>) LFEC.readObjectFromFile_JAR
024        (Listeners.class, "data-files/Listeners.tsdat", true, TreeSet.class);
025
026    public static void main(String[] argv)
027    {
028        for (String s : l) System.out.print(s + ", ");
029    }
030    
031    /**
032     * This will return an {@code Iterator} of the listed java-script listeners available in this
033     * class
034     */
035    public static Iterator<String> listAllAvailable()
036    { return new RemoveUnsupportedIterator<String>(l.iterator()); }
037
038    /**
039     * This just allows the user to add a name of a new listener that was not already stored in the
040     * internal-set of known java-script listeners.  When searching a page for listeners, this
041     * class will only (obviously) be able to find ones whose names are known.
042     * 
043     * @param listenerName The name of a listener that is not already 'known-about' in by this
044     * class
045     * 
046     * @return {@code TRUE} If the internal table of listener names was not already stored in the
047     * set, {@code FALSE} if attempting to add a listener that is already in the set.
048     */
049    public static boolean addNewListenerName(String listenerName)
050    { return l.add(listenerName.toLowerCase()); }
051
052    /**
053     * This will test whether listeners are present in the {@code TagNode}, and if so - return
054     * them.
055     * 
056     * <BR /><TABLE CLASS=JDBriefTable>
057     * <TR><TH>Input {@code TagNode}</TH><TH>Output Properties:</TH></TR>
058     * <TR><TD><CODE>&lt;frameset cols="20%,80%" title="Documentation frame"
059     *      onload="top.loadFrames()"&gt;</CODE></TD>     <TD><CODE>onload:
060     *      top.loadFrames()</CODE></TD></TR>
061     * <TR><TD><CODE>&lt;a href="javascript:void(0);" onclick="return
062     *      j2gb('http://www.gov.cn');"&gt;</CODE></TD>     <TD><CODE>onclick:  return
063     *      j2gb('http://www.gov.cn');</CODE></TD></TR>
064     * </TABLE>
065     * 
066     * @param tn This may be any {@code TagNode}, but it will be tested for JavaScript listeners.
067     * 
068     * @return Will return a {@code java.util.Properties} object that contains a key-value table of
069     * any/all listeners present in the {@code TagNode.}  If there are no listeners, this method
070     * <I>will not return null</I>, it will return an <I>empty {@code Properties} object</I>.
071     * 
072     * @see TagNode#AV(String)
073     * @see StrCmpr#containsIgnoreCase(String, String)
074     */
075    public static Properties extract(TagNode tn)
076    {
077        Properties  p = new Properties();
078        String      s;
079
080        for (String listener : l)
081
082            if (StrCmpr.containsIgnoreCase(tn.str, listener))
083
084                if ((s = tn.AV(listener)) != null) 
085
086                    // This **may** seem redundant, but it is not, because what if it was phony?
087                    // What if the "listener" key-word was actually buried in some "ALT=..." text?
088                    // The initial "StrCmpr.contains..." an optimization
089
090                    p.put(listener, s);
091
092        return p;
093    }
094
095    /**
096     * If you have performed a Java-Script Listener Get, this method will cycle through the list
097     * that was returned and generate <I><B>an identical length return {@code Properties[]}</B></I>
098     * array that has called {@code extract(tn)} for-each element in the parameter {@code 'list.'}
099     * 
100     * @param list A list of {@code TagNode's} that are expected to contain Java-Script listeners.
101     * If some of the members of this input {@code Vector} have {@code TagNode's} with no
102     * listeners, the return array will <I>still remain a parallel (same-size) array</I>,
103     * however some of it's elements will have {@code Properties} with no key/value pairs in them
104     * (zero-size).
105     * 
106     * @return A list of {@code Properties} for each element in this {@code 'list.'}
107     * 
108     * @see #extract(TagNode)
109     */
110    public static Properties[] extractAll(Vector<TagNode> list)
111    {
112        Properties[] ret = new Properties[list.size()];
113
114        for (int i=0; i < list.size(); i++) ret[i] = extract(list.elementAt(i));
115
116        return ret;
117    }
118
119
120    // ********************************************************************************************
121    // ********************************************************************************************
122    // FIND
123    // ********************************************************************************************
124    // ********************************************************************************************
125
126
127    /** 
128     * Convenience Method.
129     * <BR />Invokes: {@link #find(Vector, int, int)}
130     */
131    public static int[] find(Vector<? extends HTMLNode> html)
132    { return find(html, 0, -1); }
133
134    /**
135     * Convenience Method.
136     * <BR />Receives: {@code DotPair}
137     * <BR />Invokes: {@link #find(Vector, int, int)}
138     */
139    public static int[] find(Vector<? extends HTMLNode> html, DotPair dp)
140    { return find(html, dp.start, dp.end + 1); }
141
142    /**
143     * Find all HTML Elements ({@code TagNode} elements) that have listeners.  Limit the index of
144     * the page to a sublist of that page, 
145     * 
146     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
147     * 
148     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
149     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
150     * 
151     * @return A list of index-pointers into the underlying parameter {@code 'html'} where each
152     * node pointed to by the list contains a {@code TagNode} element with a listener attribute /
153     * inner-tag. Search results shall be limited to only considering elements between
154     * {@code sPos ... ePos.}
155     * 
156     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
157     * 
158     * @see #hasListener(TagNode)
159     * @see LV
160     */
161    public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos)
162    {
163        // Java Streams to keep lists of int's
164        IntStream.Builder   b = IntStream.builder();
165        LV                  l = new LV(html, sPos, ePos);
166        TagNode             tn;
167
168        for (int i=l.start; i < l.end; i++)
169
170            // Only check Openening TagNode's, long enought to have attributes, and then only
171            // retain TagNode's that have a listener attribute.
172
173            if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) b.add(i);
174
175        return b.build().toArray();
176    }
177
178    /**
179     * Convenience Method.
180     * <BR />Invokes: {@link #find(Vector, int, int, String[])}
181     */
182    public static int[] find(Vector<? extends HTMLNode> html, String... htmlTags)
183    { return find(html, 0, -1, htmlTags); }
184
185    /**
186     * Convenience Method.
187     * <BR />Receives: {@code DotPair}
188     * <BR />Invokes: {@link #find(Vector, int, int, String[])}
189     */
190    public static int[] find(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags)
191    { return find(html, dp.start, dp.end + 1, htmlTags); }
192
193    /**
194     * Find all HTML Elements ({@code TagNode} elements) that have listeners.  Limit the index of
195     * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only
196     * allow for matches where the HTML Element is among the list of elements in parameter 
197     * {@code 'htmlTags'}
198     * 
199     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
200     * 
201     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
202     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
203     * 
204     * @param htmlTags A list of HTML Elements, as a varargs {@code String...} Array, that
205     * constitute a match.  Any HTML Element in the web-page that has a listener attribute, but
206     * whose HTML tag/token is not present in this list will not be considered a match, and will
207     * not be returned in this method's search results.
208     * 
209     * @return A list of index-pointers into the underlying parameter {@code 'html'} where each
210     * node pointed to by the list contains a {@code TagNode} element with a listener attribute /
211     * inner-tag. Search results shall be limited to only considering elements between
212     * {@code sPos ... ePos,} <B><I>and also</I></B> limited to HTML Elements in parameter
213     * {@code 'htmlTags'}
214     * 
215     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
216     * 
217     * @see #HAS_TOK_MATCH(String, String[])
218     * @see #hasListener(TagNode)
219     * @see LV
220     */
221    public static int[] find(Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags)
222    {
223        // Java Streams can keep lists of int's
224        IntStream.Builder   b = IntStream.builder();
225        LV                  l = new LV(html, sPos, ePos);   
226        TagNode             tn;
227
228        htmlTags = toLowerCase(htmlTags);
229
230        for (int i=l.start; i < l.end; i++)
231
232            if (
233                // Only Match Opening-Tags with internal-string's long enough to contain Attributes
234                ((tn = html.elementAt(i).openTagPWA()) != null)
235
236                // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags'
237                &&  HAS_TOK_MATCH(tn.tok, htmlTags)
238
239                // Check whethr or not that the TagNode has a listener attribute (if yes, save it)
240                &&  hasListener(tn)
241            )
242                // Save the array-index
243                b.add(i);
244
245        return b.build().toArray();
246    }
247
248
249    // ********************************************************************************************
250    // ********************************************************************************************
251    // GET
252    // ********************************************************************************************
253    // ********************************************************************************************
254
255
256    /**
257     * Convenience Method.
258     * <BR />Invokes {@link #get(Vector, int, int)}
259     */
260    public static Vector<TagNode> get(Vector<? extends HTMLNode> html)
261    { return get(html, 0, -1); }
262
263    /**
264     * Convenience Method.
265     * <BR />Receives: {@code DotPair}
266     * <BR />Invokes: {@link #get(Vector, int, int)}
267     */
268    public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp)
269    { return get(html, dp.start, dp.end + 1); }
270
271    /**
272     * Find all HTML Elements ({@code TagNode} elements) that have listeners.  Limit the index of
273     * the page to a sublist of that page, 
274     * 
275     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
276     * 
277     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
278     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
279     * 
280     * @return A list TagNode elements that have a listener attribute / inner-tag.  Search results
281     * shall be limited to only considering elements between sPos ... ePos.
282     * 
283     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
284     * 
285     * @see #hasListener(TagNode)
286     * @see LV
287     */
288    public static Vector<TagNode> get(Vector<? extends HTMLNode> html, int sPos, int ePos)
289    {
290        Vector<TagNode> ret = new Vector<>();
291        LV              l   = new LV(html, sPos, ePos);
292        TagNode         tn;
293
294        for (int i=l.start; i < l.end; i++)
295
296            // Only check Openening TagNode's, long enought to have attributes, and then only
297            // retain TagNode's that have a listener attribute.  If this TagNodes does have a 
298            // listener, place it in the return vector.
299
300            if (((tn = html.elementAt(i).openTagPWA()) != null) && hasListener(tn)) ret.add(tn);
301
302        return ret;
303    }
304
305    /**
306     * Convenience Method.
307     * <BR />Invokes: {@link #get(Vector, int, int, String[])}
308     */
309    public static Vector<TagNode> get(Vector<? extends HTMLNode> html, String... htmlTags)
310    { return get(html, 0, -1, htmlTags); }
311
312    /** Convenience Method.  (Range-Limited Method)
313     * <BR />Receives: {@code DotPair}
314     * <BR />Invokes: {@link #get(Vector, int, int, String[])}
315     */
316    public static Vector<TagNode> get(Vector<? extends HTMLNode> html, DotPair dp, String... htmlTags)
317    { return get(html, dp.start, dp.end + 1, htmlTags); }
318
319    /**
320     * Find all HTML Elements ({@code TagNode} elements) that have listeners.  Limit the index of
321     * the page to a sublist of that page, <B><I>and also</I></B> limit the search to only
322     * allow for matches where the HTML Element is among the list of elements in parameter
323     * {@code 'htmlTags'}
324     * 
325     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
326     * 
327     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
328     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
329     * 
330     * @param htmlTags A list of HTML Elements, as a varargs {@code String} Array, that constitute
331     * a match.  Any HTML Element in the web-page that has a listener attribute, but whose HTML
332     * tag/token is not present in this list will not be considered a match, and will not be
333     * returned in this method's search results.
334     * 
335     * @return A list of TagNode elements that have a listener attribute / inner-tag.  Search
336     * results shall be limited to only considering elements between sPos ... ePos, <B><I>and
337     * also</I></B> limited to HTML Elements in parameter {@code 'htmlTags'}
338     * 
339     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
340     * 
341     * @see #HAS_TOK_MATCH(String, String[])
342     * @see #hasListener(TagNode)
343     * @see LV
344     */
345    public static Vector<TagNode> get
346        (Vector<? extends HTMLNode> html, int sPos, int ePos, String... htmlTags)
347    {
348        Vector<TagNode> ret = new Vector<>();
349        LV              l   = new LV(html, sPos, ePos);
350        TagNode         tn;
351
352        htmlTags = toLowerCase(htmlTags);
353
354        for (int i=l.start; i < l.end; i++)
355
356            if (
357                // Only Match Opening-Tags with internal-string's long enough to contain Attributes
358                ((tn = html.elementAt(i).openTagPWA()) != null)
359
360                // Make sure the HTML Element (.tok field) is among the user-requested 'htmlTags'
361                &&  HAS_TOK_MATCH(tn.tok, htmlTags)
362
363                // Check whethr or not that the TagNode has a listener attribute (if yes, save it)
364                &&  hasListener(tn)
365            )
366
367                // All requirements have been affirmed, save this node in the return vector.
368                ret.add(tn);
369
370        return ret;
371    }
372
373
374    // ********************************************************************************************
375    // ********************************************************************************************
376    // Helpers
377    // ********************************************************************************************
378    // ********************************************************************************************
379
380
381    /**
382     * Checks if a certain {@code class TagNode} has a listener inner-tag / attribute.
383     * 
384     * @param tn Any HTML Element {@code TagNode}
385     * @return {@code TRUE} If this {@code TagNode} has a listener, and {@code FALSE} otherwise.
386     * @see StrCmpr#containsIgnoreCase(String, String)
387     */
388    public static boolean hasListener(TagNode tn)
389    {
390        Properties p = new Properties();
391
392        for (String listener : l)
393
394            // This is a simple string-comparison - with no reg-ex involved
395            if (StrCmpr.containsIgnoreCase(tn.str, listener))
396
397                // Slightly slower, uses a - TagNode.AV(attribute) uses a Regular-Expression
398                if (tn.AV(listener) != null)
399
400                    // This **may** seem redundant, but it is not, because what if it was phony?
401                    // What if the "listener" key-word was actually buried in some "ALT=..." text?
402
403                    return true;
404
405        return false;
406    }
407
408    /**
409     * Converts the varargs parameter to lower-case {@code Strings.}
410     * 
411     * <BR /><BR />Note that this is <I><B>{@code "Varargs Safe"}</B></I>,
412     * because a new {@code String}-Array is created that has new {@code String}-pointers.
413     * 
414     * @param tags The varargs {@code String} parameter acquired from the search-methods in this
415     * class.
416     * 
417     * @return a lower-case version of the input.
418     */
419    protected static String[] toLowerCase(String[] tags)
420    {
421        String[] ret = new String[tags.length];
422
423        for (int i=0; i < tags.length; i++)
424
425            if (tags[i] != null) ret[i] = tags[i].toLowerCase();
426
427            else throw new HTMLTokException(
428                "One of the HTML tokens you have passed to the variable-length parameter " +
429                "'htmlTags' was null."
430            );
431
432        return ret;
433    }
434
435    /**
436     * Checks if the var-args parameter {@code String... htmlTags} matches a particular token
437     * 
438     * @param htmlTag The token to be checked against the user's requested {@code 'htmlTags'} list
439     * parameter
440     * 
441     * @param htmlTags The list of acceptable HTML Tag Elements.  This is a search specification
442     * parameter used by some of the search-methods in this class.
443     * 
444     * @return {@code TRUE} If the tested token parameter {@code 'htmlTag'} is a member of this
445     * elements in list parameter {@code 'htmlTags'}, and {@code FALSE} otherwise.
446     */
447    protected static boolean HAS_TOK_MATCH(String htmlTag, String... htmlTags)
448    { for (String s : htmlTags) if (s.equals(htmlTag)) return true; return false; }
449}