001package Torello.HTML;
002
003import java.util.*;
004import java.util.stream.*;
005
006import Torello.Java.LV;
007import Torello.Java.StrFilter;
008import Torello.Java.StringParse;
009import Torello.Java.Additional.Ret2;
010import Torello.Java.Function.IntTFunction;
011
012/**
013 * Utilities for getting, setting and removing attributes from the {@link TagNode} elements in a
014 * Web-Page {@code Vector}.
015 * 
016 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=ATTRIBUTES>
017 * @see AUM
018 */
019@Torello.JavaDoc.StaticFunctional
020public class Attributes
021{
022    private Attributes() { }
023
024
025    // ***************************************************************************************
026    // ***************************************************************************************
027    // Update Attributes
028    // ***************************************************************************************
029    // ***************************************************************************************
030
031
032    /**
033     * Convenience Method.
034     * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}.
035     * <BR />Passes: Simple Update Lambda that <B>always</B> assigns {@code 'itValue'} to the
036     * Attribute
037     * <BR />Iterates: The entire {@code html}-page,  Passes {@code 0, -1} to {@code sPos, ePos}
038     */
039    public static int[] update
040        (Vector<? super TagNode> html, AUM mode, String innerTag, final String itValue, SD quote)
041    {
042        return update
043            (html, mode, 0, -1, innerTag, (int index, TagNode tn) -> itValue, quote);
044    }
045
046    /**
047     * Convenience Method.
048     * <BR />Receives: {@code DotPair}
049     * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}.
050     * <BR />Passes: Simple Update Lambda that <B>always</B> assigns {@code 'itValue'} to the
051     * Attribute
052     * <BR />Iterates: The {@code html}-page from {@code dp.start} (inclusive) to {@code dp.end}
053     * (also inclusive)
054     */
055    public static int[] update(
056            Vector<? super TagNode> html, AUM mode, DotPair dp,
057            String innerTag, final String itValue, SD quote
058        )
059    {
060        return update(
061            html, mode, dp.start, dp.end + 1, innerTag,
062            (int index, TagNode tn) -> itValue, quote
063        );
064    }
065
066    /**
067     * Convenience Method.
068     * <BR />Receives: An Attribute-Update Lambda-Function {@code 'newITValueStrGetter'}
069     * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}.
070     * <BR />Iterates: The entire {@code html}-page,  Passes {@code 0, -1} to {@code sPos, ePos}
071     */
072    public static int[] update(
073            Vector<? super TagNode> html, AUM mode,
074            String innerTag, IntTFunction<TagNode, String> newITValueStrGetter, SD quote
075        )
076    { return update(html, mode, 0, -1, innerTag, newITValueStrGetter, quote); }
077
078    /**
079     * Convenience Method.
080     * <BR />Receives: {@code DotPair}
081     * <BR />And-Receives: An Attribute-Update Lambda-Function {@code 'newITValueStrGetter'}
082     * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}.
083     * <BR />Iterates: The {@code html}-page from {@code dp.start} (inclusive) to {@code dp.end}
084     * (also inclusive)
085     */
086    public static int[] update(
087            Vector<? super TagNode> html, AUM mode, DotPair dp,
088            String innerTag, IntTFunction<TagNode, String> newITValueStrGetter, SD quote
089        )
090    { return update(html, mode, dp.start, dp.end + 1, innerTag, newITValueStrGetter, quote); }
091
092    /**
093     * Convenience Method.
094     * <BR />Receives: HTML-{@code Vector} starting &amp; ending indices
095     * ({@code sPos} and {@code ePos}).
096     * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}.
097     * <BR />Passes: Simple Update Lambda that <B>always</B> assigns {@code 'itValue'} to the
098     * Attribute
099     * <BR />Iterates: The {@code html}-page from {@code sPos} (inclusive) to {@code ePos}
100     * (exclusive)
101     */
102    public static int[] update(
103            @SuppressWarnings("unchecked") Vector<? super TagNode> html, AUM mode,
104            int sPos, int ePos, String innerTag, final String itValue, SD quote
105        )
106    { return update(html, mode, sPos, ePos, innerTag, (int index, TagNode tn) -> itValue, quote); }
107
108    /**
109     * Will update any HTML {@code TagNode's} present in the vector-parameter {@code 'html'}
110     * according to passed <B>{@code AUM}</B> mode and the {@code 'innerTag'} parameter.
111     *
112     * <EMBED CLASS='external-html' DATA-PROC_TYPE=update DATA-FILE-ID=ATTR_RESTRICT_SE_POS>
113     * 
114     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
115     * @param mode <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_MODE_PARAM>
116     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
117     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
118     * 
119     * @param innerTag This is the <B STYLE="color: red;">name</B> of the HTML attribute that needs
120     * to be changed, added, or removed.
121     * 
122     * @param newITValueStrGetter <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_NEW_ITSTR_FUNC>
123     * 
124     * @param quote <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_QUOTE_PARAM>
125     * 
126     * @return This method shall return an integer-{@code array} index-list whose values identify
127     * which HTML {@code Vector} Elements were changed as a result of this method invocation.
128     *
129     * <BR /><BR /><B>NOTE:</B> One minor subtlety, there could be cases where a new HTML Element
130     * {@code 'TagNode'} reference / object were instantiated or 'created,' even though the actual
131     * {@code String} that comprised the {@code HTMLNode} itself were identical to the original 
132     * {@code HTMLNode.str String}.  In the {@code 'AUM'} enumerated-type, when {@code AUM.Set}
133     * is invoked, the original {@code String} data for an attribute is always clobbered, even in
134     * cases where an identical version of the {@code String} is replaced or substituted.
135     * 
136     * @throws QuotesException              <EMBED CLASS='external-html' DATA-FILE-ID=QEX>
137     * @throws InnerTagKeyException         <EMBED CLASS='external-html' DATA-FILE-ID=ITKEYEX>
138     * @throws IndexOutOfBoundsException    <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
139     * 
140     * @see AUM#update(TagNode, String, String, SD)
141     * @see LV
142     * @see TagNode#isTagNode()
143     * @see TagNode#isClosing
144     */
145    public static int[] update(
146            @SuppressWarnings("unchecked") Vector<? super TagNode> html, AUM mode,
147            int sPos, int ePos, String innerTag, IntTFunction<TagNode, String> newITValueStrGetter,
148            SD quote
149        )
150    {
151        InnerTagKeyException.check(innerTag);
152
153        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
154        IntStream.Builder b = IntStream.builder();
155
156        // This optimization is the same as the one in TagNode.openTagPWA().  However, that method
157        // cannot be used here, becaue for AUM.set, zero-attribute TagNode's **ALSO** have to be
158        // updated.  So this is re-implemented here.
159
160        int MIN = 3 + innerTag.length();
161
162        // Loop Variables
163        LV      l   = new LV(sPos, ePos, html);
164        TagNode tn;
165
166        for (int i=l.start; i < l.end; i++)
167
168            // Only instances of Opening-TagNodes need to be checked - All others should be skipped
169            if ((tn = ((HTMLNode) html.elementAt(i)).openTag()) != null)
170
171                // AUM.Set does not require the attribute to already exist
172                // **OR** Check for minimum possible str-length to have the attribute at all.
173
174                if ((mode == AUM.Set) || (tn.str.length() >= (MIN + tn.tok.length())))  
175
176                    // If AUM.update returns a **NEW** (non-null) TagNode, replace the old one.
177                    // Make sure to use the User-Provided Function-Pointer
178
179                    if ((tn = mode.update(tn, innerTag, newITValueStrGetter.apply(i, tn), quote)) != null)
180                    {
181                        // Replace the old TagNode
182                        html.setElementAt(tn, i);
183
184                        // Make sure to keep the index where it resides, to return to the user
185                        b.accept(i);
186                    }
187
188        // Build the IntStream, Convert the IntStream -> int[], Return it.
189        return b.build().toArray();
190    }
191
192    /**
193     * Convenience Method.
194     * <BR />Receives: An {@code int[]}-Array which identifes which nodes in the {@code Vector} to update.
195     * <BR />Invokes: {@link #update(Vector, AUM, int, int, String, IntTFunction, SD)}.
196     * <BR />Passes: Simple Update Lambda that <B>always</B> assigns {@code 'itValue'} to the
197     * Attribute
198     * <BR />Iterates: All {@code Vector}-indices pointed to by the values in {@code 'posArr'}
199     */
200    public static int[] update(
201            Vector<? super TagNode> html, AUM mode, int[] posArr, 
202            String innerTag, final String itValue, SD quote
203        )
204    { return update(html, mode, posArr, innerTag, (int index, TagNode tn) -> itValue, quote); }
205
206    /**
207     * Will update any HTML {@code TagNode's} present in the vector-parameter {@code 'html'}
208     * according to a passed <B>{@code 'AUM'}</B> mode and the {@code 'innerTag'} parameter.
209     *
210     * <EMBED CLASS='external-html' DATA-PROC_TYPE=update DATA-FILE-ID=ATTR_RESTRICT_POSARR>
211     * 
212     * @param html      <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
213     * @param mode      <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_MODE_PARAM>
214     * @param posArr    <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM>
215     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_UPDATE_AUM_EXAMPLE>
216     *
217     * @param innerTag This is the <B STYLE="color: red;">name</B> of the HMTL attribute that needs
218     * to be changed, added, or removed.
219     * 
220     * @param newITValueStrGetter <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_NEW_ITSTR_FUNC>
221     * 
222     * @param quote <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_QUOTE_PARAM>
223     * 
224     * @return This method shall return an integer-{@code array} index-list whose values identify
225     * which HTML {@code Vector} Elements were changed as a result of this method invokation.
226     *
227     * <BR /><BR /><B>NOTE:</B> One minor subtlety, there could be cases where a new HTML Element
228     * {@code 'TagNode'} reference / object were instantiated or 'created,' even though the actual
229     * {@code String} that comprised the {@code HTMLNode} itself were identical to the original 
230     * {@code HTMLNode.str String}.  In the {@code 'AUM'} enumerated-type, when {@code AUM.Set}
231     * is invoked, the original {@code String} data for an attribute is always clobbered, even in
232     * cases where an identical version of the {@code String} is replaced or substituted.
233     * 
234     * @throws QuotesException          <EMBED CLASS='external-html' DATA-FILE-ID=QEX>
235     * @throws InnerTagKeyException     <EMBED CLASS='external-html' DATA-FILE-ID=ITKEYEX>
236     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
237     * 
238     * @throws OpeningTagNodeExpectedException
239     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
240     * @throws ArrayIndexOutOfBoundsException
241     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
242     * 
243     * @see AUM#update(TagNode, String, String, SD)
244     * @see TagNode#isTagNode()
245     * @see TagNode#isClosing
246     */
247    public static int[] update(
248            Vector<? super TagNode> html, AUM mode, int[] posArr, 
249            String innerTag, IntTFunction<TagNode, String> newITValueStrGetter, SD quote
250        )
251    {
252        InnerTagKeyException.check(innerTag);
253
254        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
255        IntStream.Builder b = IntStream.builder();
256
257        // minimum possible length to have an attribute at all.
258        // '<', TOKEN, SPACE, ATTRIBUTE, '>'
259
260        int MIN = 3 + innerTag.length();
261
262        for (int i : posArr)
263        {
264            HTMLNode n = (HTMLNode) html.elementAt(i); 
265
266            if (! n.isTagNode())  throw new TagNodeExpectedException(i);
267
268            TagNode tn = (TagNode) n;
269
270            if (tn.isClosing) throw new OpeningTagNodeExpectedException(i);
271
272            // AUM.Set *DOES NOT* require the attribute to exist already (the other *DO*)
273            if (mode != AUM.Set)
274
275                // Minimum length of this element before it even could have the named inner-tag
276                // '<', TOKEN, SPACE, ATTRIBUTE, '=', '>'
277
278                if (tn.str.length() < (MIN + tn.tok.length())) continue;
279
280                    // Make sure to use the User-Provided Function-Pointer
281                    tn = mode.update(tn, innerTag, newITValueStrGetter.apply(i, tn), quote);
282
283            // if 'tn' is non-null ==> an update *WAS* performed
284            if (tn != null)
285            {
286                // Replace the old TagNode
287                html.setElementAt(tn, i);
288
289                // Make sure to keep the index where it resides, to return to the user
290                b.accept(i);
291            }
292        }
293
294        // Build the IntStream, Convert the IntStream -> int[], Return it.
295         return b.build().toArray(); 
296    }
297
298
299    // ***************************************************************************************
300    // ***************************************************************************************
301    // Remove All Attributes
302    // ***************************************************************************************
303    // ***************************************************************************************
304
305
306    /**
307     * Convenience Method.
308     * <BR />Invokes: {@link #removeAll(Vector, int, int)}
309     */
310    public static int[] removeAll(Vector<? super TagNode> html)
311    { return removeAll(html, 0, -1); }
312
313    /**
314     * Convenience Method.
315     * <BR />Receives: {@code DotPair}
316     * <BR />Invokes: {@link #removeAll(Vector, int, int)}
317     */
318    public static int[] removeAll(Vector<? super TagNode> html, DotPair dp)
319    { return removeAll(html, dp.start, dp.end + 1); }
320
321    /**
322     * The purpose of this method is to remove all attributes / Inner-Tag
323     * <B STYLE="color: red;">key-value pairs</B> from each and every non-{@code 'TextNode'} and
324     * non-{@code 'CommentNode'} HTML Element found on the vectorized-html page parameter
325     * {@code 'html'}. The removal process is limited to the range specified by method-parameters
326     * {@code sPos, ePos.} 
327     * 
328     * <BR /><BR /><B CLASS=JDDescLabel>Attribute Removal Specifics:</B>
329     * 
330     * <BR />This method will remove each and every {@code class=... id=... src=... alt=...}
331     * {@code href=... onclick=... etc...} attribute from all {@link TagNode}-instances whose 
332     * {@code Vector}-index location inside {@code 'html'} falls between {@code 'sPos'} and
333     * {@code 'ePos'}.
334     * 
335     * <BR /><BR />When this method exists, all {@link TagNode} instances inside {@code 'html'}
336     * that fall within the specified sub-range will be attribute-free.
337     *
338     * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_SE_POS>
339     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_REMOVEALL_EXAMPLE1>
340     * 
341     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
342     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
343     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
344     * 
345     * @return An integer array of {@code 'Vector'}-index positions / locations for each and every
346     * HTML {@code 'TagNode'} whose attributes have been removed.
347     * 
348     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
349     * 
350     * @see TagNode#removeAllAV
351     * @see TagNode#isTagNode()
352     * @see TagNode#isClosing
353     * @see LV
354     */
355    public static int[] removeAll
356        (@SuppressWarnings("unchecked") Vector<? super TagNode> html, int sPos, int ePos)
357    {
358        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
359        IntStream.Builder b = IntStream.builder();
360
361        // Loop Variables
362        LV      l = new LV(sPos, ePos, html);
363        TagNode tn;
364
365        for (int i=l.start; i < l.end; i++)
366
367            // REQUIREMENTS: Only Opening-TagNodes
368            // If element-length = tok-length+2, there are no attributes: '<', TOKEN, '>'
369
370            if ((tn = ((HTMLNode) html.elementAt(i)).openTag()) != null)
371
372                if (tn.str.length() > (tn.tok.length() + 2))
373                {
374                    // Replace the old TagNode
375                    html.setElementAt(tn.removeAllAV(), i);
376
377                    // Make sure to keep the index where it resides, to return to the user
378                    b.accept(i);
379                }
380
381        // Build the IntStream, Convert the IntStream -> int[], Return it.
382        return b.build().toArray();
383    }
384
385    /**
386     * The purpose of this method is to remove all attributes / Inner-Tag
387     * <B STYLE="color: red;">key-value pairs</B> from each and every non-{@code 'TextNode'} and
388     * non-{@code 'CommentNode'} HTML Element found on the vectorized-html page parameter
389     * {@code 'html'}. The removal process is limited to the only removing attributes from elements
390     * pointed to by the contents of passed-parameter {@code 'posArr'}
391     * 
392     * <BR /><BR /><B CLASS=JDDescLabel>Attribute Removal Specifics:</B>
393     * 
394     * <BR />This method will remove each and every {@code class=... id=... src=... alt=...}
395     * {@code href=... onclick=... etc...} attribute from all {@link TagNode}-instances whose 
396     * {@code Vector}-index location within {@code 'html'} are indices among those listed by
397     * the index-list {@code int[]}-Array {@code 'posArr'}.
398     * 
399     * <BR /><BR />When this method exits, all {@link TagNode} instances inside {@code 'html'}
400     * specified by {@code 'posArr'} will be attribute-free.
401     *
402     * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_POSARR>
403     * 
404     * @param html      <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
405     * @param posArr    <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM>
406     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_REMOVEALL_EXAMPLE2>
407     * 
408     * @return An integer array of {@code 'Vector'}-index positions / locations for each and every
409     * HTML {@code 'TagNode'} whose attributes have been removed.
410     * 
411     * @throws ArrayIndexOutOfBoundsException
412     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
413     * @throws OpeningTagNodeExpectedException
414     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
415     * 
416     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
417     * 
418     * @see TagNode#removeAllAV()
419     * @see TagNode#isTagNode()
420     * @see TagNode#isClosing
421     */
422    public static int[] removeAll(Vector<? super TagNode> html, int[] posArr)
423    {
424        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
425        IntStream.Builder b = IntStream.builder();
426
427        for (int i : posArr)
428        {
429            HTMLNode n = (HTMLNode) html.elementAt(i);
430
431            if (! n.isTagNode()) throw new TagNodeExpectedException(i);
432
433            TagNode tn = (TagNode) n;
434
435            if (tn.isClosing) throw new OpeningTagNodeExpectedException(i);
436
437            // If element-length = tok-length+2, there are no attributes!
438            // Otherwise, replace the old TagNode with a new, empty, one
439            // Make sure to keep the index where it resides, to return to the user
440
441            if (tn.str.length() > (tn.tok.length() + 2))
442            {                                                   
443                html.setElementAt(tn.removeAllAV(), i);
444                b.accept(i);
445            }
446        }
447
448        // Build the IntStream, Convert the IntStream -> int[], Return it.
449        return b.build().toArray();
450    }
451
452
453    // ***************************************************************************************
454    // ***************************************************************************************
455    // Remove Data-Attributes
456    // ***************************************************************************************
457    // ***************************************************************************************
458
459
460    /**
461     * Convenience Method.
462     * <BR />Invokes: {@link #removeData(Vector, int, int)}
463     */
464    public static int[] removeData(Vector<? super TagNode> html)
465    { return removeData(html, 0, -1); }
466
467    /**
468     * Convenience Method.
469     * <BR />Receives: {@code DotPair}
470     * <BR />Invokes: {@link #removeData(Vector, int, int)}
471     */
472    public static int[] removeData(Vector<? super TagNode> html, DotPair dp)
473    { return removeData(html, dp.start, dp.end + 1); }
474
475    /**
476     * The purpose of this method is to remove all HTML <B STYLE="color: red;">data</B>-attribute
477     * <B STYLE="color: red;">key-value</B> pairs from {@code 'TagNode'} Elements contained inside
478     * parameter {@code 'html'}.
479     *
480     * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_SE_POS>
481     * 
482     * @param html  <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
483     * @param sPos  <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
484     * @param ePos  <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
485     * @return      <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
486     *              <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_DATA_ATTR_RET_NOTE>
487     *              <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL>
488     * 
489     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
490     * 
491     * @see TagNode#removeDataAttributes()
492     * @see TagNode#isTagNode()
493     * @see TagNode#isClosing
494     * @see LV
495     */
496    public static int[] removeData(Vector<? super TagNode> html, int sPos, int ePos)
497    {
498        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
499        IntStream.Builder b = IntStream.builder();
500
501        // Loop Counter & Temporary Variables
502        LV      l   = new LV(sPos, ePos, html);
503        TagNode tn, newTN;
504
505        for (int i=l.start; i < l.end; i++)
506
507            // Only instances of Opening-TagNodes, possibly with attributes
508            if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null)
509
510                // A "new" TagNode is *only returned* if the "data-attributes" were removed.
511                if ((newTN = tn.removeDataAttributes()) != tn)
512                {                                                   
513                    html.setElementAt(newTN, i);    // Replace the old TagNode
514                    b.accept(i);                    // Make sure to keep the index where it resides
515                }                                   // Method returns list of modified node's
516
517        // Build the IntStream, Convert the IntStream -> int[], Return it.
518        return b.build().toArray();
519    }
520
521    /**
522     * The purpose of this method is to remove all HTML <B STYLE="color: red;">data</B>-attribute
523     * <B STYLE="color: red;">key-value</B> pairs from {@code 'TagNode'} Elements contained inside
524     * parameter {@code 'html'}.
525     *
526     * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_POSARR>
527     * 
528     * @param html      <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
529     * @param posArr    <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM>
530     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_DATA_ATTR_EXAMPLE>
531     * @return          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
532     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_DATA_ATTR_RET_NOTE>
533     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL>
534     * 
535     * @throws ArrayIndexOutOfBoundsException
536     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
537     * @throws OpeningTagNodeExpectedException
538     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
539     * 
540     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
541     * 
542     * @see TagNode#removeDataAttributes()
543     * @see TagNode#isTagNode()
544     * @see TagNode#isClosing
545     */
546    public static int[] removeData(Vector<? super TagNode> html, int[] posArr)
547    {        
548        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
549        IntStream.Builder b = IntStream.builder();
550
551        // Minimum Length of TagNode.str to even have a "data-*=" attribute
552        // '<', HTML-TOKEN, SPACE, "data-*", '>'
553
554        int MIN = 9;
555
556        for (int i: posArr)
557        {
558            HTMLNode n = (HTMLNode) html.elementAt(i);
559
560            if (! n.isTagNode()) throw new TagNodeExpectedException(i);
561
562            TagNode tn = (TagNode) n;
563
564            if (tn.isClosing) throw new OpeningTagNodeExpectedException(i);
565
566            // Minimum Length of TagNode.str to even have a "data-*=" attribute
567            if (tn.str.length() < (tn.tok.length() + MIN)) continue;
568
569            TagNode newTN = tn.removeDataAttributes();
570
571            // A "new" TagNode is *only returned* by this method if the "data-attributes" were
572            // removed.  If new, replace the old TagNode
573
574            if (newTN != tn)
575            {                    
576                html.setElementAt(newTN, i);
577
578                // Make sure to keep the index where it resides, so it may be returned to the user
579                b.accept(i);
580            }
581        }
582
583        // Build the IntStream, Convert the IntStream -> int[], Return it.
584        return b.build().toArray();
585    }
586
587
588    // ***************************************************************************************
589    // ***************************************************************************************
590    // Remove Specified Attributes
591    // ***************************************************************************************
592    // ***************************************************************************************
593
594
595    /**
596     * Convenience Method.
597     * <BR />Invokes: {@link #remove(Vector, int, int, String[])}
598     */
599    public static int[] remove(Vector<? super TagNode> html, String... innerTags)
600    { return remove(html, 0, -1, innerTags); }
601
602    /**
603     * Convenience Method.
604     * <BR />Receives: {@code DotPair}
605     * <BR />Invokes: {@link #remove(Vector, int, int, String[])}
606     */
607    public static int[] remove(Vector<? super TagNode> html, DotPair dp, String... innerTags)
608    { return remove(html, dp.start, dp.end + 1, innerTags); }
609
610    /**
611     * This will remove all copies of the attributes whose <B STYLE="color: red;">names</B> are
612     * listed among the by {@code String[]} array parameter {@code 'innerTags'} from the
613     * vectorized-html web-page parameter {@code 'html'}.
614     *
615     * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_SE_POS>
616     * 
617     * @param html      <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
618     * @param innerTags <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INNERTAGS_PARAM>
619     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_SE_RESTRICT_REM>
620     * @param sPos      <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
621     * @param ePos      <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
622     * @return          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
623     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_VARARGS_S_RET_NOTE>
624     * 
625     * @throws InnerTagKeyException         <EMBED CLASS='external-html' DATA-FILE-ID=ITKEYEX>
626     * @throws IndexOutOfBoundsException    <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
627     * @throws IllegalArgumentException     If parameter {@code 'innerTags'} has zero elements.
628     * 
629     * @see TagNode#removeAttributes(String[])
630     * @see LV
631     * @see TagNode#hasOR(boolean, String[])
632     * @see TagNode#isTagNode()
633     * @see TagNode#isClosing
634     * @see InnerTagKeyException#check(String[])
635     */
636    public static int[] remove
637        (Vector<? super TagNode> html, int sPos, int ePos, String... innerTags)
638    {
639        InnerTagKeyException.check(innerTags);
640
641        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
642        IntStream.Builder b = IntStream.builder();
643
644        // Loop Counter & Temporary Variables
645        LV      l   = new LV(sPos, ePos, html);
646        TagNode tn;
647
648        for (int i=l.start; i < l.end; i++)
649
650            // Only instances of Opening-TagNodes, possibly with attributes
651            if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null)
652
653                // If this TagNode has the attributes that have been requested for removal, then...
654                if (tn.hasOR(false, innerTags))
655                {
656                    // Build a new TagNode, and then replace the old one with the newly built one
657                    // on the page or sub-page, and at the same location.
658
659                    tn = tn.removeAttributes(innerTags);
660                    html.setElementAt(tn, i);
661
662                    // Java's IntStream-Builder is just a way to "build" a short list of integer's.
663                    // At the end of this method, the list will be built and returned to the user.
664                    // It shall contain all Vector locations where a "TagNode swap" (replaced
665                    // TagNode, with attributes filtered) has occurred.
666
667                    b.accept(i);
668                }
669
670        // Build the IntStream, Convert the IntStream -> int[], Return it.
671        return b.build().toArray();
672    }
673
674    /**
675     * This will remove all copies of the attributes whose <B STYLE="color: red;">names</B> are
676     * listed among the by {@code String[]} array parameter {@code 'innerTags'} from the
677     * vectorized-html web-page parameter {@code 'html'}.
678     *
679     * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_POSARR>
680     * 
681     * @param html      <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
682     * @param innerTags <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INNERTAGS_PARAM>
683     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_PA_RESTRICT_REM>
684     * @param posArr    <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM>
685     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_VARARGS_S_EXAMPLE>
686     * @return          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
687     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_VARARGS_S_RET_NOTE>
688     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL>
689     * 
690     * @throws InnerTagKeyException
691     * <EMBED CLASS='external-html' DATA-FILE-ID=ITKEYEX>
692     * @throws ArrayIndexOutOfBoundsException
693     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
694     * @throws OpeningTagNodeExpectedException
695     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
696     * 
697     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
698     * @throws IllegalArgumentException If parameter {@code 'innerTags'} has zero elements.
699     * 
700     * @see TagNode#removeAttributes(String[])
701     * @see TagNode#hasOR(boolean, String[])
702     * @see TagNode#isTagNode()
703     * @see TagNode#isClosing
704     * @see InnerTagKeyException#check(String[])
705     */
706    public static int[] remove(Vector<? super TagNode> html, int[] posArr, String... innerTags)
707    {
708        InnerTagKeyException.check(innerTags);
709
710        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
711        IntStream.Builder b = IntStream.builder();
712
713        // Compute the "minimum length" of a TagNode.str field
714        int MIN = 1000;
715
716        // Minimum-Length of TagNode.str would have to be 3 + smallest inner-tag passed
717        for (String attrib : innerTags) if (attrib.length() < MIN) MIN = attrib.length();
718        MIN += 3;
719
720        for (int i : posArr)
721        {
722            HTMLNode n = (HTMLNode) html.elementAt(i);
723
724            if (! n.isTagNode()) throw new TagNodeExpectedException(i);
725
726            TagNode tn = (TagNode) n;
727
728            if (tn.isClosing) throw new OpeningTagNodeExpectedException(i);
729
730            // If element-length <= MIN, none of the attributes could possibly be present.
731            if (tn.str.length() < (tn.tok.length() + MIN)) continue;
732
733            // If this TagNode has the attributes that have been requested for removal, then...
734            if (tn.hasOR(false, innerTags))
735            {
736                // Build a new TagNode, and then replace the old one with the newly built one
737                // on the page or sub-page, and at the same location.
738
739                tn = tn.removeAttributes(innerTags);
740                html.setElementAt(tn, i);
741
742                // Java's IntStream-Builder is just a way to "build" a short list of integer's.
743                // At the end of this method, the list will be built and returned to the user.
744                // It shall contain all Vector locations where a "TagNode swap" (replaced
745                // TagNode, with attributes filtered) has occurred.
746
747                b.accept(i);
748            }
749        }
750
751        // Build the IntStream, Convert the IntStream -> int[], Return it.
752        return b.build().toArray();
753    }
754
755
756    // ***************************************************************************************
757    // ***************************************************************************************
758    // Retrieve Attributes
759    // ***************************************************************************************
760    // ***************************************************************************************
761
762
763    /**
764     * Convenience Method.
765     * <BR />Invokes: {@link #retrieve(Vector, int, int, String)}
766     */
767    public static Ret2<int[], String[]> retrieve(Vector<? super TagNode> html, String attribute)
768    { return retrieve(html, 0, -1, attribute); }
769
770    /**
771     * Convenience Method.
772     * <BR />Receives: {@code DotPair}
773     * <BR />Invokes: {@link #retrieve(Vector, int, int, String)}
774     */
775    public static Ret2<int[], String[]> retrieve
776        (Vector<? super TagNode> html, DotPair dp, String attribute)
777    { return retrieve(html, dp.start, dp.end + 1, attribute); }
778
779    /**
780     * The purpose of this method is to retrieve the <B STYLE="color: red">value</B> of each
781     * attribute in each {@code TagNode} in an HTML {@code Vector} (or sub-{@code Vector}) that
782     * contained such an attribute.  
783     *
784     * <EMBED CLASS='external-html' DATA-PROC_TYPE=retrieval DATA-FILE-ID=ATTR_RESTRICT_SE_POS>
785     * 
786     * @param html      <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
787     * @param sPos      <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
788     * @param ePos      <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
789     * @param attribute <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_ATTR_RETR_SE_PARAM>
790     * 
791     * @throws InnerTagKeyException If the attribute <B STYLE="color: red;">name</B> passed to
792     * this parameter does not contain the <B STYLE="color: red;">name</B> of a valid HTML5
793     * attribute, then this exception shall throw.
794     * 
795     * @throws IndexOutOfBoundsException
796     * <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
797     * 
798     * @return <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_RETRIEVE_SE_RET>
799     * 
800     * @see TagNode#AV(String)
801     * @see TagNode#isTagNode()
802     * @see TagNode#isClosing
803     * @see InnerTagKeyException#check(String[])
804     * @see LV
805     */
806    public static Ret2<int[], String[]> retrieve
807            (Vector<? super TagNode> html, int sPos, int ePos, String attribute)
808    {
809        InnerTagKeyException.check(attribute);
810
811        // Use a Java Int-Stream.  Save matches here (vector-position)
812        IntStream.Builder posB = IntStream.builder();
813
814        // Use a Java Stream<String>.  Save attribute-values here
815        Stream.Builder<String> strB = Stream.builder();
816
817        // Temp Variables & Loop Variable
818        LV      l = new LV(sPos, ePos, html);
819        TagNode tn;
820        String  attribValue;
821
822        for (int i=l.start; i < l.end; i++)
823
824            // Only Visit Open TagNode Elements with '.str' long enough to contain attributes
825            if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null)
826
827                // If the Open-Tag does not have the attribute, skip the node. If it does, save it.
828                if ((attribValue = tn.AV(attribute)) != null)
829                { 
830                    posB.accept(i);             // Save the vector-index position of the TagNode
831                    strB.accept(attribValue);   // Save the Attribute-Value inside that TagNode
832                }
833
834        // Java Stream's shall build the arrays.  Put them into an instance of Ret2, and return
835        return new Ret2<>(posB.build().toArray(), strB.build().toArray(String[]::new));
836    }
837
838    /**
839     * This shall visit each {@link TagNode} indicated by the {@code int[]}-Array parameter
840     * {@code 'posArr'}), and then query those {@code TagNode's} for the
841     * Attribute-<B STYLE="color: red;">value</B> of the attribute named by
842     * {@code String}-Parameter {@code 'attribute'}
843     * 
844     * <BR /><BR />The <B STYLE="color: red;">value</B> of each of these attributes will be
845     * recorded to a parallel {@code String}-array and returned.  This {@code String[]} array shall
846     * be parallel to the input {@code Vector}-index {@code 'posArr'} parameter.
847     * 
848     * <EMBED CLASS='external-html' DATA-PROC_TYPE=retrieval DATA-FILE-ID=ATTR_RESTRICT_POSARR>
849     * 
850     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
851     * 
852     * @param posArr This shall be a list of {@code Vector}-indices that contain opening
853     * {@code TagNode} elements.  The <B STYLE="color: red;">value</B> of the attribute provided by
854     * parameter {@code 'attribute'} will be returned in a parallel {@code String[]} array for each
855     * {@code TagNode} identified by {@code 'posArr'}.
856     * 
857     * @param attribute <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_ATTR_RETR_PA_PARAM>
858     * @return          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_RETRIEVE_PA_RET>
859     * 
860     * @throws InnerTagKeyException If the {@code String} provided to parameter {@code 'attribute'}
861     * is not a valid HTML-5 attribute-<B STYLE="color: red;">name</B>, then this exception shall
862     * thow.
863     * 
864     * @throws ArrayIndexOutOfBoundsException
865     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
866     * @throws OpeningTagNodeExpectedException
867     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
868     * 
869     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
870     * 
871     * @see InnerTagKeyException#check(String[])
872     * @see TagNode#AV(String)
873     * @see TagNode#isTagNode()
874     * @see TagNode#isClosing
875     */
876    public static String[] retrieve(Vector<? super TagNode> html, int[] posArr, String attribute)
877    {
878        InnerTagKeyException.check(attribute);
879
880        // Return Array, and its corresponding array-index pointer.
881        String[]    ret = new String[posArr.length];
882        int         i   = 0;
883
884        // Minimum length of the TagNode.str to even have the specified attribute
885        // '<', TOKEN, SPACE, INNERTAG, '=', '>'
886
887        int MIN = 4 + attribute.length();
888
889        for (int pos: posArr)
890        {
891            HTMLNode n = (HTMLNode) html.elementAt(pos);
892
893            if (! n.isTagNode()) throw new TagNodeExpectedException(pos);
894
895            TagNode tn = (TagNode) n;
896
897            if (tn.isClosing) throw new OpeningTagNodeExpectedException(pos);
898
899            ret[i++] = (tn.str.length() < (tn.tok.length() + MIN))
900
901                ? null              // CASE-1: TagNode.str is too short to even have the attribute
902                : tn.AV(attribute); // CASE-2: Possibly has it: Save the result of TagNode.AV(...)
903        }
904
905        return ret;
906    }
907
908
909    // ***************************************************************************************
910    // ***************************************************************************************
911    // Functional Interface Filter
912    // ***************************************************************************************
913    // ***************************************************************************************
914
915
916    /**
917     * Lambda-target for creating attribute-filters.
918     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTRIBUTES_FILTER>
919     */
920    @FunctionalInterface
921    public interface Filter
922    { 
923        /**
924         * This receives the contents of a {@code 'TagNode'} - after the html-tag and the
925         * inner-tags have been extracted.  This method is intended to be used to selectively
926         * remove specific inner-tags / attributes that the programmer would like to see removed.
927         * 
928         * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=FUNC_INTER_METH>
929         * 
930         * @param htmlTag When this method is implemented by a class, or by a lambda-expression,
931         * the user will receive a copy of a TagNode's {@code TagNode.tok} field through this
932         * parameter.  The class or lambda-expression which implements method
933         * {@code 'filter(...)'} may use the {@code String} that is passed via the
934         * {@code 'htmlTag'} parameter to, possibly - if needed, help decide which attributes to
935         * remove from the {@code java.util.Properties} parameter {@code 'attributes'}
936         *
937         * @param attributes When this method, {@code 'filter(...)'}, is implemented by a class or
938         * a lambda-expression, he or she is tasked with eliminating any attributes in this 
939         * {@code Properties} class that he wishes to filter.
940         * 
941         * <BR /><BR /><B><SPAN STYLE="color: red;">NOTE:</B></SPAN> The 
942         * <B STYLE="color: red;">key-value</B> pairs of this {@code java.util.Properties} method
943         * are generated by calling
944         * {@link TagNode#allAV(boolean, boolean)}.  The <B STYLE="color: red;">values</B>
945         * returned by that method will all have their original quotation-marks included in the 
946         * <CODE><B STYLE="color: red;">'value'</B> String</CODE>.
947         * 
948         * <BR /><BR /><B>ALSO:</B> This class is intended to function as a filter, and should be
949         * used to remove property <B STYLE="color: red;">key-value</B> pairs from the attributes
950         * parameter received here.  However, there is nothing stopping the programmer from
951         * modifying the contents by adding properties, or even changing the
952         * <B STYLE="color: red;">values</B> of the properties.
953         *
954         * @return This method must return a boolean indicating whether or not the attributes
955         * parameter has been changed in any way.  If {@code FALSE} were returned, but the class or
956         * lambda-expression which implements this method has modified the attributes 
957         * {@code Properties} instance, the changes that were made would be lost, and the 
958         * vectorized-html page that contained the {@code TagNode} wouldn't be updated with the
959         * new {@code TagNode}.
960         *
961         * @see TagNode#allAV(boolean, boolean)
962         * @see TagNode#tok
963         */
964        public boolean filter(String htmlTag, Properties attributes);
965    }
966
967
968    // ***************************************************************************************
969    // ***************************************************************************************
970    // Use BiPredicate to Filter Attributes
971    // ***************************************************************************************
972    // ***************************************************************************************
973
974
975    /**
976     * Convenience Method.
977     * <BR />Invokes: {@link #update(Vector, int, int, Filter)}
978     */
979    public static int[] update(Vector<? super TagNode> html, Filter f)
980    { return update(html, 0, -1, f); }
981
982    /**
983     * Convenience Method.
984     * <BR />Receives: {@code DotPair}
985     * <BR />Invokes: {@link #update(Vector, int, int, Filter)}
986     */
987    public static int[] update(Vector<? super TagNode> html, DotPair dp, Filter f)
988    { return update(html, dp.start, dp.end + 1, f); }
989
990    /**
991     * Modifies the contents of each instance of a {@code 'TC.OpeningTags'} element found in the
992     * input {@code Vector}.  The type of update that's performed is defined by the parameter
993     * {@code Filter 'f'}. Each time a {@code TagNode} found in the input vectorized-html web-page,
994     * or html sub-list, is changed or modified the, original {@code TagNode} will be removed and
995     * replaced by a new, modified {@code TagNode} instance.
996     *
997     * <EMBED CLASS='external-html' DATA-PROC_TYPE=filtering DATA-FILE-ID=ATTR_RESTRICT_SE_POS>
998     * 
999     * @param html  <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
1000     * @param f     <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_PARAM>
1001     *              <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_SE_RESTRICT_REM>
1002     * @param sPos  <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1003     * @param ePos  <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1004     * 
1005     * @throws InnerTagKeyException         <EMBED CLASS='external-html' DATA-FILE-ID=ITKEX>
1006     * @throws IndexOutOfBoundsException    <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1007     * @throws QuotesException              <EMBED CLASS='external-html' DATA-FILE-ID=QEX> 
1008     * 
1009     * @return  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
1010     *          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_RET_NOTE>
1011     * 
1012     * @see TagNode#allAV(boolean, boolean)
1013     * @see TagNode#isTagNode()
1014     * @see TagNode#isClosing
1015     * @see LV
1016     */
1017    public static int[] update(Vector<? super TagNode> html, int sPos, int ePos, Filter f)
1018    {
1019        // Save Modified node-locations in a java stream (Use the "Primitive Int Stream")
1020        IntStream.Builder b = IntStream.builder();
1021
1022        // Temp-Loop Variables
1023        LV          l   = new LV(sPos, ePos, html);
1024        Properties  p;
1025        TagNode     tn;
1026
1027        for (int i=l.start; i < l.end; i++)
1028
1029            if (
1030                // Only Opening TagNode's that could possibly have attributes.
1031                ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null)
1032
1033                // Retrieve all Attribute Key-Value Pairs.  Take note of surrounding quotes.
1034                &&  ((p = tn.allAV(true, true)).size() > 0)
1035
1036                // Run the provided filter logic, if it returns TRUE, then build new TagNode
1037                &&  f.filter(tn.tok, p)
1038            )
1039            {
1040                // This makes sure not to leave out any possible "boolean" (a.k.a "Key Only") 
1041                // attributes when we rebuild the new TagNode.  An example of a "boolean" attribute
1042                // in HTML is "HIDDEN" which is a key that does not require any value to convey its
1043                // purpose or function.  Sometimes web-page designers might type "HIDDENT=TRUE",
1044                // but it is not necessary.  In any case, the "allAV(boolean, boolean)" method only
1045                // returns attributes that have BOTH a 'key' AND a 'value'.
1046
1047                List<String> keyOnly = tn.allKeyOnlyAttributes(true).collect(Collectors.toList());
1048
1049                // Build a new TagNode, then replace the old one
1050                tn = new TagNode(tn.tok, p, keyOnly, null, tn.str.endsWith("/>")); 
1051                html.setElementAt(tn, i);
1052
1053                // Save the vector-index where a replacement has occurred.  The user will be
1054                // provided a list of all locations where an old TagNode was replaced with a new one.
1055
1056                b.accept(i);
1057            }
1058
1059        // Build the IntStream, Convert the IntStream -> int[], Return it.
1060        return b.build().toArray(); 
1061    }
1062
1063    /**
1064     * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input
1065     * {@code Vector}.  The type of filter performed is defined by the parameter
1066     * {@code Filter 'f'}. Each time a {@code TagNode} in the input vectorized-html web-page, or
1067     * html sub-list, is changed or modified the original {@code TagNode} will be removed and
1068     * replaced by a new, updated or modified {@code TagNode} instance.
1069     *
1070     * <EMBED CLASS='external-html' DATA-PROC_TYPE=filtering DATA-FILE-ID=ATTR_RESTRICT_POSARR>
1071     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_EXAMPLE>
1072     * 
1073     * @param html      <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
1074     * @param f         <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_PARAM>
1075     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_PA_RESTRICT_REM>
1076     * @param posArr    <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM>
1077     *
1078     * @throws ArrayIndexOutOfBoundsException
1079     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
1080     * @throws OpeningTagNodeExpectedException
1081     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
1082     * 
1083     * @throws InnerTagKeyException     <EMBED CLASS='external-html' DATA-FILE-ID=ITKEX>
1084     * @throws QuotesException          <EMBED CLASS='external-html' DATA-FILE-ID=QEX>
1085     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
1086     * 
1087     * @return  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
1088     *          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_FILTER_RET_NOTE>
1089     *          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL>
1090     * 
1091     * @see TagNode#allAV(boolean, boolean)
1092     * @see TagNode#isTagNode()
1093     * @see TagNode#isClosing
1094     */
1095    public static int[] update(Vector<? super TagNode> html, int[] posArr, Filter f)
1096    {
1097        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
1098        IntStream.Builder b = IntStream.builder();
1099
1100        for (int i: posArr)
1101        {
1102            HTMLNode n = (HTMLNode) html.elementAt(i);
1103
1104            if (! n.isTagNode()) throw new TagNodeExpectedException(i);
1105
1106            TagNode tn = (TagNode) n;
1107
1108            if (tn.isClosing) throw new OpeningTagNodeExpectedException(i);
1109
1110            // If element-length < tok-length+5, there are no attributes!
1111            // '<', TOKEN, SPACE, ATTRIBUTE<MIN-1>, '=', '>'
1112
1113            if (tn.str.length() < (tn.tok.length() + 5)) continue;
1114
1115            // Retrieve all Attribute Key-Value Pairs.
1116            Properties p =  tn.allAV(true, true);
1117
1118            // This makes sure not to leave out any possible "boolean" (a.k.a "Key Only") 
1119            // attributes when we rebuild the new TagNode.   An example of a "boolean" attribute
1120            // in HTML is "HIDDEN" which is a key that does not require any value to convey its
1121            // purpose or function.  Sometimes web-page designers might type "HIDDENT=TRUE", but
1122            // it is not necessary.  In any case, the "allAV(boolean, boolean)" method only returns
1123            // attributes that have BOTH a 'key' AND a 'value'.
1124
1125            List<String> keyOnly = tn.allKeyOnlyAttributes(true).collect(Collectors.toList());
1126
1127            // Run the provided filter logic, if it returns TRUE, then build new TagNode
1128            if ((p.size() > 0) && f.filter(tn.tok, p))
1129            {
1130                // Build a new TagNode, and replace the old one.
1131                tn = new TagNode(tn.tok, p, keyOnly, null, tn.str.endsWith("/>"));
1132                html.setElementAt(tn, i);
1133
1134                // Save the vector-index where a replacement has occured.  The user will be
1135                // provided a list of all locations where an old TagNode was replaced with a
1136                // new one.
1137
1138                b.accept(i);
1139            }
1140        }
1141
1142        // Build the IntStream, Convert the IntStream -> int[], Return it.
1143        return b.build().toArray();
1144    }
1145
1146
1147    // ***************************************************************************************
1148    // ***************************************************************************************
1149    // Use Attribute White-Lists to Filter Attributes
1150    // ***************************************************************************************
1151    // ***************************************************************************************
1152
1153
1154    /**
1155     * Convenience Method.
1156     * <BR />Invokes: {@link #filter(Vector, int, int, String[])}
1157     */
1158    public static int[] filter(Vector<? super TagNode> html, String... innerTagWhiteList)
1159    { return filter(html, 0, -1, innerTagWhiteList); }
1160
1161    /**
1162     * Convenience Method.
1163     * <BR />Receives: {@code DotPair}
1164     * <BR />Invokes: {@link #filter(Vector, int, int, String[])}
1165     */
1166    public static int[] filter
1167        (Vector<? super TagNode> html, DotPair dp, String... innerTagWhiteList)
1168    { return filter(html, dp.start, dp.end + 1, innerTagWhiteList); }
1169
1170    /**
1171     * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input
1172     * {@code Vector} using an attribute {@code 'white-list'}.  All input-{@code Vector TagNode's}
1173     * that have attributes whose <B STYLE="color: red;">names</B> are not members of the inner-tag
1174     * {@code white-list} will be removed, and a new {@code TagNode} whose only attributes are
1175     * members of the innerTag {@code white-list} will replace the old {@code TagNode}.
1176     *
1177     * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_SE_POS>
1178     * 
1179     * @param html              <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
1180     * @param innerTagWhiteList <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_LIST_PARAM>
1181     *                          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_SE_RESTRICT_REM>
1182     * @param sPos              <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1183     * @param ePos              <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1184     * 
1185     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1186     * 
1187     * @return  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
1188     *          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_L_RET_NOTE>
1189     * 
1190     * @see TagNode#allAN(boolean, boolean)
1191     * @see TagNode#isTagNode()
1192     * @see TagNode#removeAttributes(String[])
1193     * @see TagNode#isClosing
1194     * @see LV
1195     */
1196    public static int[] filter
1197        (Vector<? super TagNode> html, int sPos, int ePos, String... innerTagWhiteList)
1198    {
1199        TreeSet<String> whiteList = new TreeSet<>();
1200
1201        // Java Streams keep a list of which TagNode's were changed
1202        IntStream.Builder b = IntStream.builder();
1203
1204        // Build the tree-set with the contents of the list.  Trim them, convert to lower-case
1205        //
1206        // REMEMBER: Internally, attribute key-value pairs are returned in a java.util.Properties
1207        //           instance.  This Properties instance always has keys in lower case format.
1208
1209        for (String attribute: innerTagWhiteList) whiteList.add(attribute.trim().toLowerCase());
1210
1211        // Loop Variables, Temp Variables
1212        LV              l               = new LV(sPos, ePos, html);
1213        Vector<String>  attrToRemove    = new Vector<>();
1214        TagNode         tn;
1215
1216        for (int i=l.start; i < l.end; i++)
1217
1218            if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null)
1219            {
1220                // Will keep the list of attributes that didn't pass the white-list
1221                attrToRemove.clear();
1222
1223                // List of all attributes in the TagNode, as a String-Array
1224                String[] allAN = tn.allAN(true, true).toArray(String[]::new);
1225
1226                for (String attribute : allAN)
1227                    if (! whiteList.contains(attribute))
1228                        attrToRemove.addElement(attribute);
1229
1230                // if there were attributes that didn't pass...
1231                if (attrToRemove.size() > 0)
1232                {
1233                    // Build a new TagNode, and then replace the old one with the newly built one
1234                    // on the page or sub-page, and at the same location.
1235                    // NOTE: 'removeAttributes' needs a var-args String-Array, not a Vector<String>
1236
1237                    tn = tn.removeAttributes(attrToRemove.toArray(StringParse.EMPTY_STR_ARRAY));
1238                    html.setElementAt(tn, i);
1239
1240                    // Java's IntStream-Builder is just a way to "build" a short list of integer's.
1241                    // This lists has all Vector locations where a "TagNode swap" has occurred.
1242
1243                    b.accept(i);
1244                }
1245            }
1246
1247        // Build the IntStream, Convert the IntStream -> int[], Return it.
1248        return b.build().toArray();
1249    }
1250
1251    /**
1252     * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input
1253     * {@code Vector} using an attribute {@code 'white-list'}.  All input-{@code Vector TagNode's}
1254     * that have attributes whose <B STYLE="color: red;">names</B> are not members of the inner-tag
1255     * {@code white-list} will be removed, and a new {@code TagNode} whose only attributes are
1256     * members of the innerTag {@code white-list} will replace the old {@code TagNode}.
1257     *
1258     * <EMBED CLASS='external-html' DATA-PROC_TYPE=removal DATA-FILE-ID=ATTR_RESTRICT_POSARR>
1259     * 
1260     * @param html              <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
1261     * @param innerTagWhiteList <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_LIST_PARAM>
1262     *                          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_PA_RESTRICT_REM>
1263     * @param posArr            <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM>
1264     *                          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_LIST_EXAMPLE>
1265     * 
1266     * @throws ArrayIndexOutOfBoundsException
1267     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
1268     * @throws TagNodeExpectedException
1269     * <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
1270     * @throws OpeningTagNodeExpectedException
1271     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
1272     * 
1273     * @return  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
1274     *          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_WHITE_L_RET_NOTE>
1275     *          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL>
1276     * 
1277     * @see TagNode#allAN(boolean, boolean)
1278     * @see TagNode#removeAttributes(String[])
1279     * @see TagNode#isTagNode()
1280     * @see TagNode#isClosing
1281     */
1282    public static int[] filter
1283        (Vector<? super TagNode> html, int[] posArr, String... innerTagWhiteList)
1284    {
1285        TreeSet<String> whiteList = new TreeSet<>();
1286
1287        // Java Streams to keep a list of vector-indices that were updated.
1288        IntStream.Builder b = IntStream.builder();
1289
1290        // Build the tree-set with the contents of the list.  Trim them, convert to lower-case
1291        //
1292        // REMEMBER: Internally, attribute key-value pairs are returned in a java.util.Properties
1293        //           instance.  This Properties instance always has keys in lower case format.
1294
1295        for (String attribute: innerTagWhiteList) whiteList.add(attribute.trim().toLowerCase());
1296
1297        for (int i: posArr)
1298        {
1299            HTMLNode n = (HTMLNode) html.elementAt(i);
1300
1301            if (! n.isTagNode()) throw new TagNodeExpectedException(i);
1302
1303            TagNode tn = (TagNode) n;
1304
1305            if (tn.isClosing) throw new OpeningTagNodeExpectedException(i);
1306
1307            // If element-length = tok-length+2, THERE ARE NO ATTRIBUTES!
1308            if (tn.str.length() <= (tn.tok.length() + 3)) continue;
1309
1310            // List of all attributes in the TagNode
1311            String[] allAN = tn.allAN(true, true).toArray(String[]::new);
1312
1313            // List of the attributes that DIDN'T PASS the WHITE-LIST
1314            Vector<String> attrToRemove = new Vector<>();
1315
1316            for (String attribute : allAN)
1317                if (! whiteList.contains(attribute))
1318                    attrToRemove.addElement(attribute);
1319
1320            // if there were attributes that didn't pass...
1321            if (attrToRemove.size() > 0)
1322            {
1323                // Build a new TagNode, and then replace the old one with the newly built one
1324                // on the page or sub-page, and at the same location.
1325                // NOTE: 'removeAttributes' needs a var-args String-Array, not a Vector<String>
1326
1327                tn = tn.removeAttributes(attrToRemove.toArray(StringParse.EMPTY_STR_ARRAY));
1328                html.setElementAt(tn, i);
1329
1330                // Java's IntStream-Builder is just a way to "build" a short list of integer's.
1331                // This lists has all Vector locations where a "TagNode swap" has occurred.
1332
1333                b.accept(i);
1334            }
1335        }
1336
1337        // Build the IntStream, Convert the IntStream -> int[], Return it.
1338        return b.build().toArray();
1339    }
1340
1341
1342    // ***************************************************************************************
1343    // ***************************************************************************************
1344    // Use class StrFilter to Filter Attributes
1345    // ***************************************************************************************
1346    // ***************************************************************************************
1347
1348
1349    /**
1350     * Convenience Method.
1351     * <BR />Invokes: {@link #filter(Vector, int, int, StrFilter)}
1352     */
1353    public static int[] filter(Vector<? super TagNode> html, StrFilter filter)
1354    { return filter(html, 0, -1, filter); }
1355
1356    /**
1357     * Convenience Method.
1358     * <BR />Receives: {@code DotPair}
1359     * <BR />Invokes: {@link #filter(Vector, int, int, StrFilter)}
1360     */
1361    public static int[] filter(Vector<? super TagNode> html, DotPair dp, StrFilter filter)
1362    { return filter(html, dp.start, dp.end + 1, filter); }
1363
1364    /**
1365     * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input 
1366     * {@code Vector} using a {@link StrFilter}.  All input-{@code Vector TagNode's} which have
1367     * attributes will have the list of attribute-<B STYLE="color: red;">names</B> tested against
1368     * the provided {@code StrFilter.test(attribute)} predicate.
1369     *
1370     * <BR /><BR />If any attribute whose <B STYLE="color: red;">name</B> fails the
1371     * {@code Predicate} test, then that attribute will be removed.  After testing all of a
1372     * {@code TagNode's} inner-tags, if any of those attributes did fail the
1373     * {@code StrFilter.test(...)} method, a new {@code TagNode} will be constructed leaving those
1374     * out.  Finally, the old {@code TagNode} will be removed from input HTML {@code Vector}, and
1375     * replaced with the new one.
1376     *
1377     * <EMBED CLASS='external-html' DATA-PROC_TYPE=filtering DATA-FILE-ID=ATTR_RESTRICT_SE_POS>
1378     * 
1379     * @param html      <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
1380     * @param filter    <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILTER_PARAM>
1381     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_SE_RESTRICT_REM>
1382     * @param sPos      <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
1383     * @param ePos      <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
1384     * 
1385     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
1386     * 
1387     * @return  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
1388     *          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILT_RET_NOTE>
1389     * 
1390     * @see TagNode#allAN()
1391     * @see TagNode#isTagNode()
1392     * @see TagNode#isClosing
1393     * @see TagNode#removeAttributes(String[])
1394     * @see LV
1395     */
1396    public static int[] filter
1397        (Vector<? super TagNode> html, int sPos, int ePos, StrFilter filter)
1398    {
1399        // Save the list of modified TagNode's in a Java Stream
1400        IntStream.Builder b = IntStream.builder();
1401
1402        // Temp Var, Loop Variable
1403        LV      l   = new LV(sPos, ePos, html);
1404        TagNode tn;
1405
1406        for (int i=l.start; i < l.end; i++)
1407
1408            if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null) 
1409            {
1410                // Build a list of all inner-tags that must be removed
1411                String[] innerTagsToRemove = tn
1412                    .allAN(true, true)                          // Builds attibute Stream<String>
1413                    .filter(innerTag -> filter.test(innerTag))  // Run the user provided filter
1414                    .toArray(String[]::new);                    // Stream<String> -> String[]
1415
1416                if (innerTagsToRemove.length > 0)
1417                {
1418                    // Build a new TagNode, and then replace the old one with the newly built one
1419                    // on the page or sub-page, and at the same location.
1420
1421                    tn = tn.removeAttributes(innerTagsToRemove);
1422                    html.setElementAt(tn, i);
1423
1424                    // Java's IntStream-Builder is just a way to "build" a short list of integer's.
1425                    // The list shall contain all Vector indices where a "TagNode swap"  occurred
1426
1427                    b.accept(i);
1428                }
1429            }
1430
1431        // Build the IntStream, Convert the IntStream -> int[], Return it.
1432        return b.build().toArray();
1433    }
1434
1435    /**
1436     * Filters the contents of each instance of a {@code 'TC.OpeningTags'} element in the input 
1437     * {@code Vector} using a {@link StrFilter}.  All input-{@code Vector TagNode's} which have
1438     * attributes will have the list of attribute-<B STYLE="color: red;">names</B> tested against
1439     * the provided {@code StrFilter.test(attribute)} predicate.
1440     *
1441     * <BR /><BR />If any attribute whose <B STYLE="color: red;">name</B> fails the
1442     * {@code Predicate} test, then that attribute will be removed.  After testing all of a
1443     * {@code TagNode's} inner-tags, if any of those attributes did fail the
1444     * {@code StrFilter.test(...)} method, a new {@code TagNode} will be constructed leaving those
1445     * out.  Finally, the old {@code TagNode} will be removed from input HTML {@code Vector}, and
1446     * replaced with the new one.
1447     *
1448     * <EMBED CLASS='external-html' DATA-PROC_TYPE=filtering DATA-FILE-ID=ATTR_RESTRICT_POSARR>
1449     * 
1450     * @param html      <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
1451     * @param filter    <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILTER_PARAM>
1452     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_PA_RESTRICT_REM>
1453     * @param posArr    <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POS_ARR_PARAM>
1454     *                  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILT_EXAMPLE>
1455     * 
1456     * @throws ArrayIndexOutOfBoundsException
1457     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
1458     * @throws OpeningTagNodeExpectedException
1459     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
1460     * 
1461     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
1462     * 
1463     * @return  <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_INT_ARR_RET>
1464     *          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_STR_FILT_RET_NOTE>
1465     *          <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_POSARR_SHORT_EXPL>
1466     * 
1467     * @see TagNode#allAN()
1468     * @see TagNode#isTagNode()
1469     * @see TagNode#isClosing
1470     * @see TagNode#removeAttributes(String[])
1471     */
1472    public static int[] filter(Vector<? super TagNode> html, int[] posArr, StrFilter filter)
1473    {
1474        // Use Java Stream to keep a list of Vector-Locations that were updated / modified.
1475        IntStream.Builder b = IntStream.builder();
1476
1477        for (int i: posArr)
1478        {
1479            HTMLNode n = (HTMLNode) html.elementAt(i);
1480
1481            if (! n.isTagNode()) throw new TagNodeExpectedException(i);
1482
1483            TagNode tn = (TagNode) n;
1484
1485            if (tn.isClosing) throw new OpeningTagNodeExpectedException(i);
1486
1487            // Minimum TagNode.str Length (in order to have attributes): '<', TOKEN, SPACE '>'
1488            if (tn.str.length() < (tn.tok.length() + 3)) continue;
1489
1490            // Build a list of all inner-tags that must be removed
1491            String[] innerTagsToRemove = tn
1492                .allAN(true, true)                          // Builds attibute Stream<String>
1493                .filter(innerTag -> filter.test(innerTag))  // Run the user provided filter
1494                .toArray(String[]::new);                    // Stream<String> -> String[]
1495
1496            if (innerTagsToRemove.length > 0)
1497            {
1498                // Build a new TagNode, and then replace the old one with the newly built one
1499                // on the page or sub-page, and at the same location.
1500
1501                tn = tn.removeAttributes(innerTagsToRemove);
1502                html.setElementAt(tn, i);
1503
1504                // Java's IntStream-Builder is just a way to "build" a short list of integer's.
1505                // The list shall contain all Vector indices where a "TagNode swap"  occurred
1506
1507                b.accept(i);
1508            }
1509        }
1510
1511        // Build the IntStream, Convert the IntStream -> int[], Return it.
1512        return b.build().toArray();
1513    }
1514}