001package Torello.CSS; 002 003import Torello.Java.Additional.Ret2; 004import Torello.Java.Additional.ByRef; 005 006import java.util.Vector; 007import java.util.function.Consumer; 008 009/* 010 * The output of tokenization step is a stream of zero or more of the following tokens: 011 * <ident-token>, <function-token>, <at-keyword-token>, <hash-token>, <string-token>, 012 * <bad-string-token>, <url-token>, <bad-url-token>, <delim-token>, <number-token>, 013 * <percentage-token>, <dimension-token>, <unicode-range-token>, <whitespace-token>, 014 * <CDO-token>, <CDC-token>, <colon-token>, <semicolon-token>, <comma-token>, 015 * <[-token>, <]-token>, <(-token>, <)-token>, <{-token>, and <}-token>. 016 */ 017@Torello.JavaDoc.StaticFunctional 018public class CSSTokenizer 019{ 020 private CSSTokenizer() { } 021 022 /** 023 * Converts CSS to a Token-{@code Vector}. 024 * 025 * @param css The CSS Code-Points. Code-Points can be obtained from any CSS-File 026 * as-a-{@code String} by invoking {@code String.codePoints().toArray()}. 027 * 028 * @return An instance of {@code Ret2} as: 029 * 030 * <BR /><BR /><UL CLASS=JDUL> 031 * <LI><CODE><B>Ret2.a:</B> Vector<CSSToken></CODE> (CSS-Tokens)</LI> 032 * <LI><CODE><B>Ret2.b:</B> Vector<TokenizeError></CODE> (Errors)</LI> 033 * </UL> 034 */ 035 public static Ret2<Vector<CSSToken>, Vector<TokenizeError>> 036 parse(final int[] css) 037 { 038 final Vector<CSSToken> tokens = new Vector<>(); 039 final Vector<TokenizeError> errors = new Vector<>(); 040 final int[] C = css; 041 final ByRef<Integer> P = new ByRef<>(0); 042 final Consumer<CSSToken> T = tokens::add; 043 final Consumer<TokenizeError> E = errors::add; 044 045 final String RV_MSG = "There is a Reverse-Solidus (Back-Slash '\\') that " + 046 "cannot be properly used as an Escape-Character"; 047 048 while (P.f < css.length) 049 050 if (Comment.is(css, P.f)) Comment.consume(C, P, T, E); 051 052 else switch (C[P.f]) 053 { 054 case '\u000B': 055 case ' ': 056 case '\t': 057 case '\f': 058 case '\n': 059 case '\r': Whitespace.consume(C, P, T); break; 060 061 case ',': tokens.add(Punct.COMMA); P.f++; break; 062 case ':': tokens.add(Punct.COLON); P.f++; break; 063 case ';': tokens.add(Punct.SEMICOLON); P.f++; break; 064 case ']': tokens.add(Punct.RIGHT_BRACKET); P.f++; break; 065 case '[': tokens.add(Punct.LEFT_BRACKET); P.f++; break; 066 case '{': tokens.add(Punct.LEFT_SQUIGGLY); P.f++; break; 067 case '}': tokens.add(Punct.RIGHT_SQUIGGLY); P.f++; break; 068 case '(': tokens.add(Punct.LEFT_PAREN); P.f++; break; 069 case ')': tokens.add(Punct.RIGHT_PAREN); P.f++; break; 070 071 case '\'': 072 case '"': Str.consume(C, P, T, E); break; 073 074 case '-': 075 076 if (Num.is(C, P.f)) 077 Num.consume(C, P, T, E); 078 079 else if (CDC.is(C, P.f)) 080 { tokens.add(CDC.SINGLETON); P.f += 3; } 081 082 else if (Identifier.startsIdentSequence(C, P.f)) 083 Identifier.consumeIdentLikeSequence(C, P, T, E); 084 085 else 086 tokens.add(new Delimiter(C, P.f++)); 087 088 break; 089 090 case '\\': 091 092 if (CSSUtil.isValidEscape(C, P.f)) 093 Identifier.consumeIdentLikeSequence(C, P, T, E); 094 095 else 096 { 097 errors.add(new TokenizeError(C, P.f, P.f+1, Identifier.class, RV_MSG)); 098 tokens.add(new Delimiter(C, P.f++)); 099 } 100 101 break; 102 103 case '#': 104 105 if (Hash.is(C, P.f)) Hash.consume(C, P, T); 106 else tokens.add(new Delimiter(css, P.f++)); 107 break; 108 109 case '+': 110 111 if (Num.is(C, P.f)) Num.consume(C, P, T, E); 112 else tokens.add(new Delimiter(css, P.f++)); 113 break; 114 115 case '.': 116 117 if (Num.is(C, P.f)) Num.consume(C, P, T, E); 118 else tokens.add(new Delimiter(css, P.f++)); 119 break; 120 121 case '<': 122 123 if (CDO.is(C, P.f)) { tokens.add(CDO.SINGLETON); P.f += 4; } 124 else tokens.add(new Delimiter(css, P.f++)); 125 break; 126 127 case '@': 128 129 if (Identifier.startsIdentSequence(C, P.f)) 130 AtKeyword.consume(C, P, T); 131 else 132 tokens.add(new Delimiter(css, P.f++)); 133 134 break; 135 136 case 'u': 137 case 'U': 138 139 if (UnicodeRange.is(C, P.f)) 140 UnicodeRange.consume(C, P, T); 141 else 142 Identifier.consumeIdentLikeSequence(C, P, T, E); 143 144 break; 145 146 default: 147 148 final int c = C[P.f]; 149 150 if ((c >= '0') && (c <= '9')) 151 Num.consume(C, P, T, E); 152 153 else if (Identifier.isIdentStartCodePoint(c)) 154 Identifier.consumeIdentLikeSequence(C, P, T, E); 155 156 else 157 tokens.add(new Delimiter(css, P.f++)); 158 } 159 160 return new Ret2<>(tokens, errors); 161 } 162}