1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | package Torello.CSS; import Torello.Java.Additional.Ret2; import Torello.Java.Additional.ByRef; import java.util.Vector; import java.util.function.Consumer; /* * The output of tokenization step is a stream of zero or more of the following tokens: * <ident-token>, <function-token>, <at-keyword-token>, <hash-token>, <string-token>, * <bad-string-token>, <url-token>, <bad-url-token>, <delim-token>, <number-token>, * <percentage-token>, <dimension-token>, <unicode-range-token>, <whitespace-token>, * <CDO-token>, <CDC-token>, <colon-token>, <semicolon-token>, <comma-token>, * <[-token>, <]-token>, <(-token>, <)-token>, <{-token>, and <}-token>. */ @Torello.JavaDoc.StaticFunctional public class CSSTokenizer { private CSSTokenizer() { } /** * Converts CSS to a Token-{@code Vector}. * * @param css The CSS Code-Points. Code-Points can be obtained from any CSS-File * as-a-{@code String} by invoking {@code String.codePoints().toArray()}. * * @return An instance of {@code Ret2} as: * * <BR /><BR /><UL CLASS=JDUL> * <LI><CODE><B>Ret2.a:</B> Vector<CSSToken></CODE> (CSS-Tokens)</LI> * <LI><CODE><B>Ret2.b:</B> Vector<TokenizeError></CODE> (Errors)</LI> * </UL> */ public static Ret2<Vector<CSSToken>, Vector<TokenizeError>> parse(final int[] css) { final Vector<CSSToken> tokens = new Vector<>(); final Vector<TokenizeError> errors = new Vector<>(); final int[] C = css; final ByRef<Integer> P = new ByRef<>(0); final Consumer<CSSToken> T = tokens::add; final Consumer<TokenizeError> E = errors::add; final String RV_MSG = "There is a Reverse-Solidus (Back-Slash '\\') that " + "cannot be properly used as an Escape-Character"; while (P.f < css.length) if (Comment.is(css, P.f)) Comment.consume(C, P, T, E); else switch (C[P.f]) { case '\u000B': case ' ': case '\t': case '\f': case '\n': case '\r': Whitespace.consume(C, P, T); break; case ',': tokens.add(Punct.COMMA); P.f++; break; case ':': tokens.add(Punct.COLON); P.f++; break; case ';': tokens.add(Punct.SEMICOLON); P.f++; break; case ']': tokens.add(Punct.RIGHT_BRACKET); P.f++; break; case '[': tokens.add(Punct.LEFT_BRACKET); P.f++; break; case '{': tokens.add(Punct.LEFT_SQUIGGLY); P.f++; break; case '}': tokens.add(Punct.RIGHT_SQUIGGLY); P.f++; break; case '(': tokens.add(Punct.LEFT_PAREN); P.f++; break; case ')': tokens.add(Punct.RIGHT_PAREN); P.f++; break; case '\'': case '"': Str.consume(C, P, T, E); break; case '-': if (Num.is(C, P.f)) Num.consume(C, P, T, E); else if (CDC.is(C, P.f)) { tokens.add(CDC.SINGLETON); P.f += 3; } else if (Identifier.startsIdentSequence(C, P.f)) Identifier.consumeIdentLikeSequence(C, P, T, E); else tokens.add(new Delimiter(C, P.f++)); break; case '\\': if (CSSUtil.isValidEscape(C, P.f)) Identifier.consumeIdentLikeSequence(C, P, T, E); else { errors.add(new TokenizeError(C, P.f, P.f+1, Identifier.class, RV_MSG)); tokens.add(new Delimiter(C, P.f++)); } break; case '#': if (Hash.is(C, P.f)) Hash.consume(C, P, T); else tokens.add(new Delimiter(css, P.f++)); break; case '+': if (Num.is(C, P.f)) Num.consume(C, P, T, E); else tokens.add(new Delimiter(css, P.f++)); break; case '.': if (Num.is(C, P.f)) Num.consume(C, P, T, E); else tokens.add(new Delimiter(css, P.f++)); break; case '<': if (CDO.is(C, P.f)) { tokens.add(CDO.SINGLETON); P.f += 4; } else tokens.add(new Delimiter(css, P.f++)); break; case '@': if (Identifier.startsIdentSequence(C, P.f)) AtKeyword.consume(C, P, T); else tokens.add(new Delimiter(css, P.f++)); break; case 'u': case 'U': if (UnicodeRange.is(C, P.f)) UnicodeRange.consume(C, P, T); else Identifier.consumeIdentLikeSequence(C, P, T, E); break; default: final int c = C[P.f]; if ((c >= '0') && (c <= '9')) Num.consume(C, P, T, E); else if (Identifier.isIdentStartCodePoint(c)) Identifier.consumeIdentLikeSequence(C, P, T, E); else tokens.add(new Delimiter(css, P.f++)); } return new Ret2<>(tokens, errors); } } |