CSSTokenizer.java.html

package Torello.CSS;

import Torello.Java.Additional.Ret2;
import Torello.Java.Additional.ByRef;

import java.util.Vector;
import java.util.function.Consumer;

/*
 * The output of tokenization step is a stream of zero or more of the following tokens:
 * <ident-token>, <function-token>, <at-keyword-token>, <hash-token>, <string-token>,
 * <bad-string-token>, <url-token>, <bad-url-token>, <delim-token>, <number-token>,
 * <percentage-token>, <dimension-token>, <unicode-range-token>, <whitespace-token>,
 * <CDO-token>, <CDC-token>, <colon-token>, <semicolon-token>, <comma-token>,
 * <[-token>, <]-token>, <(-token>, <)-token>, <{-token>, and <}-token>.
 */
@Torello.JavaDoc.StaticFunctional
public class CSSTokenizer
{
    private CSSTokenizer() { }

    /**
     * Converts CSS to a Token-{@code Vector}.
     * 
     * @param css The CSS Code-Points.  Code-Points can be obtained from any CSS-File
     * as-a-{@code String} by invoking {@code String.codePoints().toArray()}.
     * 
     * @return An instance of {@code Ret2} as:
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     * <LI><CODE><B>Ret2.a:</B> Vector&lt;CSSToken&gt;</CODE> (CSS-Tokens)</LI>
     * <LI><CODE><B>Ret2.b:</B> Vector&lt;TokenizeError&gt;</CODE> (Errors)</LI>
     * </UL>
     */
    public static Ret2<Vector<CSSToken>, Vector<TokenizeError>>
        parse(final int[] css)
    {
        final Vector<CSSToken>          tokens  = new Vector<>();
        final Vector<TokenizeError>     errors  = new Vector<>();
        final int[]                     C       = css;
        final ByRef<Integer>            P       = new ByRef<>(0);
        final Consumer<CSSToken>        T       = tokens::add;
        final Consumer<TokenizeError>   E       = errors::add;

        final String RV_MSG = "There is a Reverse-Solidus (Back-Slash '\\') that " +
        "cannot be properly used as an Escape-Character";

        while (P.f < css.length)
        
            if (Comment.is(css, P.f)) Comment.consume(C, P, T, E);

            else switch (C[P.f])
            {
                case '\u000B':
                case ' ':
                case '\t':
                case '\f':
                case '\n':
                case '\r': Whitespace.consume(C, P, T); break;

                case ',': tokens.add(Punct.COMMA);          P.f++; break;
                case ':': tokens.add(Punct.COLON);          P.f++; break;
                case ';': tokens.add(Punct.SEMICOLON);      P.f++; break;
                case ']': tokens.add(Punct.RIGHT_BRACKET);  P.f++; break;
                case '[': tokens.add(Punct.LEFT_BRACKET);   P.f++; break;
                case '{': tokens.add(Punct.LEFT_SQUIGGLY);  P.f++; break;
                case '}': tokens.add(Punct.RIGHT_SQUIGGLY); P.f++; break;
                case '(': tokens.add(Punct.LEFT_PAREN);     P.f++; break;
                case ')': tokens.add(Punct.RIGHT_PAREN);    P.f++; break;

                case '\'':
                case '"': Str.consume(C, P, T, E); break;

                case '-':

                    if (Num.is(C, P.f))
                        Num.consume(C, P, T, E);

                    else if (CDC.is(C, P.f))
                        { tokens.add(CDC.SINGLETON); P.f += 3; }

                    else if (Identifier.startsIdentSequence(C, P.f))
                        Identifier.consumeIdentLikeSequence(C, P, T, E);

                    else
                        tokens.add(new Delimiter(C, P.f++));

                    break;

                case '\\':

                    if (CSSUtil.isValidEscape(C, P.f))
                        Identifier.consumeIdentLikeSequence(C, P, T, E);

                    else 
                    {
                        errors.add(new TokenizeError(C, P.f, P.f+1, Identifier.class, RV_MSG));
                        tokens.add(new Delimiter(C, P.f++));
                    }

                    break;

                case '#':

                    if (Hash.is(C, P.f))    Hash.consume(C, P, T);
                    else                    tokens.add(new Delimiter(css, P.f++));
                    break;

                case '+':

                    if (Num.is(C, P.f)) Num.consume(C, P, T, E);
                    else                tokens.add(new Delimiter(css, P.f++));
                    break;

                case '.':

                    if (Num.is(C, P.f)) Num.consume(C, P, T, E);
                    else                tokens.add(new Delimiter(css, P.f++));
                    break;

                case '<':

                    if (CDO.is(C, P.f)) { tokens.add(CDO.SINGLETON); P.f += 4; }
                    else                tokens.add(new Delimiter(css, P.f++));
                    break;

                case '@':

                    if (Identifier.startsIdentSequence(C, P.f))
                        AtKeyword.consume(C, P, T);
                    else
                        tokens.add(new Delimiter(css, P.f++));

                    break;

                case 'u':
                case 'U':

                    if (UnicodeRange.is(C, P.f))
                        UnicodeRange.consume(C, P, T);
                    else
                        Identifier.consumeIdentLikeSequence(C, P, T, E);

                    break;

                default:

                    final int c = C[P.f];

                    if ((c >= '0') && (c <= '9'))
                        Num.consume(C, P, T, E);

                    else if (Identifier.isIdentStartCodePoint(c))
                        Identifier.consumeIdentLikeSequence(C, P, T, E);

                    else
                        tokens.add(new Delimiter(css, P.f++));
        }

        return new Ret2<>(tokens, errors);
    }
}