1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package Torello.HTML;

import Torello.Java.LV;
import Torello.Java.StringParse;

import java.util.Vector;
import java.util.TreeSet;

import java.util.stream.IntStream;


class WhiteListFilter
{
    static int[] filter(
            final Vector<? super TagNode>   html, 
            final int                       sPos,
            final int                       ePos,
            final String...                 innerTagWhiteList
        )
    {
        TreeSet<String> whiteList = new TreeSet<>();

        // Java Streams keep a list of which TagNode's were changed
        IntStream.Builder b = IntStream.builder();


        // Build the tree-set with the contents of the list.  Trim them, convert to lower-case
        //
        // REMEMBER: Internally, attribute key-value pairs are returned in a java.util.Properties
        //           instance.  This Properties instance always has keys in lower case format.

        for (String attribute: innerTagWhiteList) whiteList.add(attribute.trim().toLowerCase());

        // Loop Variables, Temp Variables
        LV              l               = new LV(sPos, ePos, html);
        Vector<String>  attrToRemove    = new Vector<>();
        TagNode         tn;

        for (int i=l.start; i < l.end; i++)

            if ((tn = ((HTMLNode) html.elementAt(i)).openTagPWA()) != null)
            {
                // Will keep the list of attributes that didn't pass the white-list
                attrToRemove.clear();

                // List of all attributes in the TagNode, as a String-Array
                String[] allAN = tn.allAN(true, true).toArray(String[]::new);

                for (String attribute : allAN)
                    if (! whiteList.contains(attribute))
                        attrToRemove.addElement(attribute);

                // if there were attributes that didn't pass...
                if (attrToRemove.size() > 0)
                {
                    // Build a new TagNode, and then replace the old one with the newly built one
                    // on the page or sub-page, and at the same location.
                    // NOTE: 'removeAttributes' needs a var-args String-Array, not a Vector<String>

                    tn = tn.removeAttributes(attrToRemove.toArray(StringParse.EMPTY_STR_ARRAY));
                    html.setElementAt(tn, i);


                    // Java's IntStream-Builder is just a way to "build" a short list of integer's.
                    // This lists has all Vector locations where a "TagNode swap" has occurred.

                    b.accept(i);
                }
            }

        // Build the IntStream, Convert the IntStream -> int[], Return it.
        return b.build().toArray();
    }

    static int[] filter(
            final Vector<? super TagNode>   html,
            final int[]                     posArr,
            final String...                 innerTagWhiteList
        )
    {
        TreeSet<String> whiteList = new TreeSet<>();

        // Java Streams to keep a list of vector-indices that were updated.
        IntStream.Builder b = IntStream.builder();


        // Build the tree-set with the contents of the list.  Trim them, convert to lower-case
        //
        // REMEMBER: Internally, attribute key-value pairs are returned in a java.util.Properties
        //           instance.  This Properties instance always has keys in lower case format.

        for (String attribute: innerTagWhiteList) whiteList.add(attribute.trim().toLowerCase());

        for (int i: posArr)
        {
            HTMLNode n = (HTMLNode) html.elementAt(i);

            if (! n.isTagNode()) throw new TagNodeExpectedException(i);

            TagNode tn = (TagNode) n;

            if (tn.isClosing) throw new OpeningTagNodeExpectedException(i);

            // If element-length = tok-length+2, THERE ARE NO ATTRIBUTES!
            if (tn.str.length() <= (tn.tok.length() + 3)) continue;

            // List of all attributes in the TagNode
            String[] allAN = tn.allAN(true, true).toArray(String[]::new);

            // List of the attributes that DIDN'T PASS the WHITE-LIST
            Vector<String> attrToRemove = new Vector<>();

            for (String attribute : allAN)
                if (! whiteList.contains(attribute))
                    attrToRemove.addElement(attribute);

            // if there were attributes that didn't pass...
            if (attrToRemove.size() > 0)
            {
                // Build a new TagNode, and then replace the old one with the newly built one
                // on the page or sub-page, and at the same location.
                // NOTE: 'removeAttributes' needs a var-args String-Array, not a Vector<String>

                tn = tn.removeAttributes(attrToRemove.toArray(StringParse.EMPTY_STR_ARRAY));
                html.setElementAt(tn, i);


                // Java's IntStream-Builder is just a way to "build" a short list of integer's.
                // This lists has all Vector locations where a "TagNode swap" has occurred.

                b.accept(i);
            }
        }

        // Build the IntStream, Convert the IntStream -> int[], Return it.
        return b.build().toArray();
    }
}