1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
package Torello.CSS;

import Torello.Java.Additional.Ret2;
import Torello.Java.Additional.ByRef;

import java.util.Vector;
import java.util.function.Consumer;

/*
 * The output of tokenization step is a stream of zero or more of the following tokens:
 * <ident-token>, <function-token>, <at-keyword-token>, <hash-token>, <string-token>,
 * <bad-string-token>, <url-token>, <bad-url-token>, <delim-token>, <number-token>,
 * <percentage-token>, <dimension-token>, <unicode-range-token>, <whitespace-token>,
 * <CDO-token>, <CDC-token>, <colon-token>, <semicolon-token>, <comma-token>,
 * <[-token>, <]-token>, <(-token>, <)-token>, <{-token>, and <}-token>.
 */
@Torello.JavaDoc.StaticFunctional
public class CSSTokenizer
{
    private CSSTokenizer() { }

    /**
     * Converts CSS to a Token-{@code Vector}.
     * 
     * @param css The CSS Code-Points.  Code-Points can be obtained from any CSS-File
     * as-a-{@code String} by invoking {@code String.codePoints().toArray()}.
     * 
     * @return An instance of {@code Ret2} as:
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     * <LI><CODE><B>Ret2.a:</B> Vector&lt;CSSToken&gt;</CODE> (CSS-Tokens)</LI>
     * <LI><CODE><B>Ret2.b:</B> Vector&lt;TokenizeError&gt;</CODE> (Errors)</LI>
     * </UL>
     */
    public static Ret2<Vector<CSSToken>, Vector<TokenizeError>>
        parse(final int[] css)
    {
        final Vector<CSSToken>          tokens  = new Vector<>();
        final Vector<TokenizeError>     errors  = new Vector<>();
        final int[]                     C       = css;
        final ByRef<Integer>            P       = new ByRef<>(0);
        final Consumer<CSSToken>        T       = tokens::add;
        final Consumer<TokenizeError>   E       = errors::add;

        final String RV_MSG = "There is a Reverse-Solidus (Back-Slash '\\') that " +
        "cannot be properly used as an Escape-Character";

        while (P.f < css.length)
        
            if (Comment.is(css, P.f)) Comment.consume(C, P, T, E);

            else switch (C[P.f])
            {
                case '\u000B':
                case ' ':
                case '\t':
                case '\f':
                case '\n':
                case '\r': Whitespace.consume(C, P, T); break;

                case ',': tokens.add(Punct.COMMA);          P.f++; break;
                case ':': tokens.add(Punct.COLON);          P.f++; break;
                case ';': tokens.add(Punct.SEMICOLON);      P.f++; break;
                case ']': tokens.add(Punct.RIGHT_BRACKET);  P.f++; break;
                case '[': tokens.add(Punct.LEFT_BRACKET);   P.f++; break;
                case '{': tokens.add(Punct.LEFT_SQUIGGLY);  P.f++; break;
                case '}': tokens.add(Punct.RIGHT_SQUIGGLY); P.f++; break;
                case '(': tokens.add(Punct.LEFT_PAREN);     P.f++; break;
                case ')': tokens.add(Punct.RIGHT_PAREN);    P.f++; break;

                case '\'':
                case '"': Str.consume(C, P, T, E); break;

                case '-':

                    if (Num.is(C, P.f))
                        Num.consume(C, P, T, E);

                    else if (CDC.is(C, P.f))
                        { tokens.add(CDC.SINGLETON); P.f += 3; }

                    else if (Identifier.startsIdentSequence(C, P.f))
                        Identifier.consumeIdentLikeSequence(C, P, T, E);

                    else
                        tokens.add(new Delimiter(C, P.f++));

                    break;

                case '\\':

                    if (CSSUtil.isValidEscape(C, P.f))
                        Identifier.consumeIdentLikeSequence(C, P, T, E);

                    else 
                    {
                        errors.add(new TokenizeError(C, P.f, P.f+1, Identifier.class, RV_MSG));
                        tokens.add(new Delimiter(C, P.f++));
                    }

                    break;

                case '#':

                    if (Hash.is(C, P.f))    Hash.consume(C, P, T);
                    else                    tokens.add(new Delimiter(css, P.f++));
                    break;

                case '+':

                    if (Num.is(C, P.f)) Num.consume(C, P, T, E);
                    else                tokens.add(new Delimiter(css, P.f++));
                    break;

                case '.':

                    if (Num.is(C, P.f)) Num.consume(C, P, T, E);
                    else                tokens.add(new Delimiter(css, P.f++));
                    break;

                case '<':

                    if (CDO.is(C, P.f)) { tokens.add(CDO.SINGLETON); P.f += 4; }
                    else                tokens.add(new Delimiter(css, P.f++));
                    break;

                case '@':

                    if (Identifier.startsIdentSequence(C, P.f))
                        AtKeyword.consume(C, P, T);
                    else
                        tokens.add(new Delimiter(css, P.f++));

                    break;

                case 'u':
                case 'U':

                    if (UnicodeRange.is(C, P.f))
                        UnicodeRange.consume(C, P, T);
                    else
                        Identifier.consumeIdentLikeSequence(C, P, T, E);

                    break;

                default:

                    final int c = C[P.f];

                    if ((c >= '0') && (c <= '9'))
                        Num.consume(C, P, T, E);

                    else if (Identifier.isIdentStartCodePoint(c))
                        Identifier.consumeIdentLikeSequence(C, P, T, E);

                    else
                        tokens.add(new Delimiter(css, P.f++));
        }

        return new Ret2<>(tokens, errors);
    }
}