001package Torello.CSS;
002
003import Torello.Java.Additional.Ret2;
004import Torello.Java.Additional.ByRef;
005
006import java.util.Vector;
007import java.util.function.Consumer;
008
009/*
010 * The output of tokenization step is a stream of zero or more of the following tokens:
011 * <ident-token>, <function-token>, <at-keyword-token>, <hash-token>, <string-token>,
012 * <bad-string-token>, <url-token>, <bad-url-token>, <delim-token>, <number-token>,
013 * <percentage-token>, <dimension-token>, <unicode-range-token>, <whitespace-token>,
014 * <CDO-token>, <CDC-token>, <colon-token>, <semicolon-token>, <comma-token>,
015 * <[-token>, <]-token>, <(-token>, <)-token>, <{-token>, and <}-token>.
016 */
017@Torello.JavaDoc.StaticFunctional
018public class CSSTokenizer
019{
020    private CSSTokenizer() { }
021
022    /**
023     * Converts CSS to a Token-{@code Vector}.
024     * 
025     * @param css The CSS Code-Points.  Code-Points can be obtained from any CSS-File
026     * as-a-{@code String} by invoking {@code String.codePoints().toArray()}.
027     * 
028     * @return An instance of {@code Ret2} as:
029     * 
030     * <BR /><BR /><UL CLASS=JDUL>
031     * <LI><CODE><B>Ret2.a:</B> Vector&lt;CSSToken&gt;</CODE> (CSS-Tokens)</LI>
032     * <LI><CODE><B>Ret2.b:</B> Vector&lt;TokenizeError&gt;</CODE> (Errors)</LI>
033     * </UL>
034     */
035    public static Ret2<Vector<CSSToken>, Vector<TokenizeError>>
036        parse(final int[] css)
037    {
038        final Vector<CSSToken>          tokens  = new Vector<>();
039        final Vector<TokenizeError>     errors  = new Vector<>();
040        final int[]                     C       = css;
041        final ByRef<Integer>            P       = new ByRef<>(0);
042        final Consumer<CSSToken>        T       = tokens::add;
043        final Consumer<TokenizeError>   E       = errors::add;
044
045        final String RV_MSG = "There is a Reverse-Solidus (Back-Slash '\\') that " +
046        "cannot be properly used as an Escape-Character";
047
048        while (P.f < css.length)
049        
050            if (Comment.is(css, P.f)) Comment.consume(C, P, T, E);
051
052            else switch (C[P.f])
053            {
054                case '\u000B':
055                case ' ':
056                case '\t':
057                case '\f':
058                case '\n':
059                case '\r': Whitespace.consume(C, P, T); break;
060
061                case ',': tokens.add(Punct.COMMA);          P.f++; break;
062                case ':': tokens.add(Punct.COLON);          P.f++; break;
063                case ';': tokens.add(Punct.SEMICOLON);      P.f++; break;
064                case ']': tokens.add(Punct.RIGHT_BRACKET);  P.f++; break;
065                case '[': tokens.add(Punct.LEFT_BRACKET);   P.f++; break;
066                case '{': tokens.add(Punct.LEFT_SQUIGGLY);  P.f++; break;
067                case '}': tokens.add(Punct.RIGHT_SQUIGGLY); P.f++; break;
068                case '(': tokens.add(Punct.LEFT_PAREN);     P.f++; break;
069                case ')': tokens.add(Punct.RIGHT_PAREN);    P.f++; break;
070
071                case '\'':
072                case '"': Str.consume(C, P, T, E); break;
073
074                case '-':
075
076                    if (Num.is(C, P.f))
077                        Num.consume(C, P, T, E);
078
079                    else if (CDC.is(C, P.f))
080                        { tokens.add(CDC.SINGLETON); P.f += 3; }
081
082                    else if (Identifier.startsIdentSequence(C, P.f))
083                        Identifier.consumeIdentLikeSequence(C, P, T, E);
084
085                    else
086                        tokens.add(new Delimiter(C, P.f++));
087
088                    break;
089
090                case '\\':
091
092                    if (CSSUtil.isValidEscape(C, P.f))
093                        Identifier.consumeIdentLikeSequence(C, P, T, E);
094
095                    else 
096                    {
097                        errors.add(new TokenizeError(C, P.f, P.f+1, Identifier.class, RV_MSG));
098                        tokens.add(new Delimiter(C, P.f++));
099                    }
100
101                    break;
102
103                case '#':
104
105                    if (Hash.is(C, P.f))    Hash.consume(C, P, T);
106                    else                    tokens.add(new Delimiter(css, P.f++));
107                    break;
108
109                case '+':
110
111                    if (Num.is(C, P.f)) Num.consume(C, P, T, E);
112                    else                tokens.add(new Delimiter(css, P.f++));
113                    break;
114
115                case '.':
116
117                    if (Num.is(C, P.f)) Num.consume(C, P, T, E);
118                    else                tokens.add(new Delimiter(css, P.f++));
119                    break;
120
121                case '<':
122
123                    if (CDO.is(C, P.f)) { tokens.add(CDO.SINGLETON); P.f += 4; }
124                    else                tokens.add(new Delimiter(css, P.f++));
125                    break;
126
127                case '@':
128
129                    if (Identifier.startsIdentSequence(C, P.f))
130                        AtKeyword.consume(C, P, T);
131                    else
132                        tokens.add(new Delimiter(css, P.f++));
133
134                    break;
135
136                case 'u':
137                case 'U':
138
139                    if (UnicodeRange.is(C, P.f))
140                        UnicodeRange.consume(C, P, T);
141                    else
142                        Identifier.consumeIdentLikeSequence(C, P, T, E);
143
144                    break;
145
146                default:
147
148                    final int c = C[P.f];
149
150                    if ((c >= '0') && (c <= '9'))
151                        Num.consume(C, P, T, E);
152
153                    else if (Identifier.isIdentStartCodePoint(c))
154                        Identifier.consumeIdentLikeSequence(C, P, T, E);
155
156                    else
157                        tokens.add(new Delimiter(css, P.f++));
158        }
159
160        return new Ret2<>(tokens, errors);
161    }
162}