001package Torello.CSS; 002 003import Torello.Java.Additional.ByRef; 004import Torello.Java.Additional.EffectivelyFinal; 005import Torello.Java.UnreachableError; 006 007import java.util.Vector; 008import java.util.stream.IntStream; 009import java.util.function.Consumer; 010 011/** Any {@code URL} */ 012@Torello.JavaDoc.JDHeaderBackgroundImg(EmbedTagFileID="CSS_TOK") 013public class URLToken extends CSSToken 014 implements CharSequence, java.io.Serializable, Comparable<CharSequence> 015{ 016 /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */ 017 protected static final long serialVersionUID = 1; 018 019 /** The unescaped text that constitutes this {@code URL}. */ 020 public final String unescapedURL; 021 022 023 // ******************************************************************************************** 024 // ******************************************************************************************** 025 // Private Constructor, API "is" and "if" Methods 026 // ******************************************************************************************** 027 // ******************************************************************************************** 028 029 030 private URLToken( 031 final int[] css, 032 final int sPos, 033 final int ePos, 034 final IntStream.Builder urlStrBuilder 035 ) 036 { 037 super(css, sPos, ePos); 038 039 int[] urlArr = urlStrBuilder.build().toArray(); 040 this.unescapedURL = new String(urlArr, 0, urlArr.length); 041 } 042 043 @Override 044 public final boolean isURL() { return true; } 045 046 @Override 047 public final URLToken ifURL() { return this; } 048 049 050 // ******************************************************************************************** 051 // ******************************************************************************************** 052 // User's Constructor: a static "build" method 053 // ******************************************************************************************** 054 // ******************************************************************************************** 055 056 057 /** 058 * <EMBED CLASS=defs DATA-TOK=Str DATA-P=urlStr> 059 * <EMBED CLASS='external-html' DATA-FILE-ID=BUILD_DESC> 060 * @param urlStr <EMBED CLASS='external-html' DATA-FILE-ID=BUILD_PARAM> 061 * @return <EMBED CLASS='external-html' DATA-FILE-ID=BUILD_RET> 062 * @throws TokenizeException <EMBED CLASS='external-html' DATA-FILE-ID=BUILD_TOK_EX> 063 */ 064 @SuppressWarnings("unchecked") 065 public static URLToken build(final String urlStr) 066 { 067 if (urlStr.length() == 0) throw new TokenizeException(); 068 069 final int[] css = urlStr.codePoints().toArray(); 070 071 if (css.length < 1) throw new TokenizeException(URLToken.class); 072 073 if (Whitespace.is(css[0])) throw new TokenizeException 074 ("A URL cannot begin with Whitespace."); 075 076 final EffectivelyFinal<CSSToken> saveIt = new EffectivelyFinal<>(null); 077 078 final Consumer<CSSToken> acceptor = (CSSToken t) -> 079 { 080 if (t instanceof Whitespace) throw new TokenizeException 081 ("The URL provided contained unescaped Whitespace"); 082 083 else if (t instanceof Comment) throw new TokenizeException 084 ("The URL provided contained a CSS Comment"); 085 086 else if (t instanceof BadURL) throw new TokenizeException 087 ("The URL provided was parsed into an instanceof BadURL: [" + t.str + "]"); 088 089 else if (t instanceof URLToken) saveIt.f = t; 090 091 // These are the only types that may be returned by Class CSSToken 092 else throw new UnreachableError(); 093 }; 094 095 URLToken.consume( 096 css, 097 new ByRef<>(0), 098 acceptor, 099 (TokenizeError te) -> te.throwException(), 100 true 101 ); 102 103 // Need to guarantee that the entire String was consumed in the process of tokenizing the 104 // input String. 'TokenzeException' has a nicely worded Esception-Message to explain what 105 // has occured here. 106 107 if (urlStr.length() != saveIt.f.str.length()) 108 throw new TokenizeException(urlStr, saveIt.f.str); 109 110 return (URLToken) saveIt.f; 111 } 112 113 114 // ******************************************************************************************** 115 // ******************************************************************************************** 116 // CONSUME 117 // ******************************************************************************************** 118 // ******************************************************************************************** 119 120 121 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 122 // Copied from: 123 // https://drafts.csswg.org/css-syntax-3/#consume-url-token 124 // April 2024 125 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 126 // 127 // 4.3.6. Consume a url token 128 // 129 // This section describes how to consume a url token from a stream of code points. It returns 130 // either a <url-token> or a <bad-url-token>. 131 // 132 // NOTE: This algorithm assumes that the initial "url(" has already been consumed. This 133 // algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo). 134 // A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like 135 // token automatically handles this distinction; this algorithm shouldn’t be called directly 136 // otherwise. 137 // 138 // 1. Initially create a <url-token> with its value set to the empty string. 139 // 140 // 2. Consume as much whitespace as possible. 141 // 142 // 3. Repeatedly consume the next input code point from the stream: 143 // 144 // ** U+0029 RIGHT PARENTHESIS ()) 145 // Return the <url-token>. 146 // 147 // ** EOF 148 // This is a parse error. Return the <url-token>. 149 // 150 // ** whitespace 151 // Consume as much whitespace as possible. If the next input code point is 152 // U+0029 RIGHT PARENTHESIS ()) or EOF, consume it and return the <url-token> (if EOF was 153 // encountered, this is a parse error); otherwise, consume the remnants of a bad url, 154 // create a <bad-url-token>, and return it. 155 // 156 // ** U+0022 QUOTATION MARK (") 157 // ** U+0027 APOSTROPHE (') 158 // ** U+0028 LEFT PARENTHESIS (() 159 // ** non-printable code point 160 // This is a parse error. Consume the remnants of a bad url, create a <bad-url-token>, and 161 // return it. 162 // 163 // ** U+005C REVERSE SOLIDUS (\) 164 // If the stream starts with a valid escape, consume an escaped code point and append the 165 // returned code point to the <url-token>’s value. 166 // 167 // Otherwise, this is a parse error. Consume the remnants of a bad url, create a 168 // <bad-url-token>, and return it. 169 // 170 // ** anything else 171 // Append the current input code point to the <url-token>’s value. 172 173 /** 174 * This is a tokenizer method which <B>"consumes"</B> the next {@code URLToken} from the input 175 * Code-Point Array. 176 * 177 * <EMBED CLASS=defs DATA-TOK=URLToken DATA-URL=consume-url-token DATA-OP=Consume> 178 * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG> 179 * <EMBED CLASS=external-html DATA-FILE-ID=URL_TOKEN> 180 * <EMBED CLASS=external-html DATA-FILE-ID=URL_TOK_SVG> 181 */ 182 protected static void consume( // When invoked from 'CSSTokenizer' 183 final int[] css, // C, int[] css 184 final ByRef<Integer> POS, // P, array-pos loop-variable 185 final Consumer<CSSToken> returnParsedToken, // T, Vector<CSSToken>.add 186 final Consumer<TokenizeError> errorEncountered, // E, Vector<TokenizeError>.add 187 final boolean fromBuildMethod // Minor-Hack to solve a problems 188 // SOLVING-PROBLEMS, THAT'S WHAT WE DO 189 ) 190 { 191 final IntStream.Builder urlStrBuilder = IntStream.builder(); 192 final int sPos = POS.f; 193 194 int c; 195 196 while (POS.f < css.length) switch (c = css[POS.f]) 197 { 198 // ** U+0029 RIGHT PARENTHESIS ()) 199 // Return the <url-token>. 200 201 case ')': 202 203 returnParsedToken.accept(new URLToken(css, sPos, POS.f, urlStrBuilder)); 204 return; 205 206 // ** whitespace 207 // Consume as much whitespace as possible. If the next input code point is 208 // U+0029 RIGHT PARENTHESIS ()) or EOF, consume it and return the <url-token> (if EOF was 209 // encountered, this is a parse error); otherwise, consume the remnants of a bad url, 210 // create a <bad-url-token>, and return it. 211 212 case '\u000B': 213 case ' ': 214 case '\t': 215 case '\f': 216 case '\n': 217 case '\r': 218 219 final int ePos = POS.f; 220 221 Vector<CSSToken> v = new Vector<>(); 222 Consumer<CSSToken> acceptor = v::add; 223 224 while (POS.f < css.length) 225 226 if (Whitespace.is(css[POS.f])) 227 Whitespace.consume(css, POS, acceptor); 228 229 else if (Comment.is(css, POS.f)) 230 Comment.consume(css, POS, acceptor, errorEncountered); 231 232 else break; 233 234 // ==> EOF, consume it and return the <url-token> (if EOF was encountered, this is 235 // a parse error) 236 237 if (POS.f >= css.length) 238 { 239 if (! fromBuildMethod) errorEncountered.accept( 240 new TokenizeError( 241 css, sPos, POS.f, URLToken.class, 242 "CSS-Input EOF was encountered before reaching the URL's closing ')'" 243 )); 244 245 returnParsedToken.accept(new URLToken(css, sPos, ePos, urlStrBuilder)); 246 if (v.size() > 0) for (CSSToken t : v) returnParsedToken.accept(t); 247 } 248 249 else if (css[POS.f] == ')') 250 { 251 returnParsedToken.accept(new URLToken(css, sPos, ePos, urlStrBuilder)); 252 if (v.size() > 0) for (CSSToken t : v) returnParsedToken.accept(t); 253 } 254 255 else 256 { 257 errorEncountered.accept( 258 new TokenizeError( 259 css, sPos, POS.f, URLToken.class, 260 "Whitespace and/or comments before the end of a URL" 261 )); 262 263 BadURL.consume(css, POS, returnParsedToken, sPos); 264 } 265 266 return; 267 268 // ** U+0022 QUOTATION MARK (") 269 // ** U+0027 APOSTROPHE (') 270 // ** U+0028 LEFT PARENTHESIS (() 271 // ** non-printable code point 272 // This is a parse error. Consume the remnants of a bad url, create a 273 // <bad-url-token>, and return it. 274 275 case '"': 276 case '\'': 277 case '(': 278 279 errorEncountered.accept( 280 new TokenizeError( 281 css, sPos, POS.f, URLToken.class, 282 "Unescaped Character within URL Found: ['" + c + "'']" 283 )); 284 285 // NOTE: The "non-printable code-point" will be handled by the default-case 286 BadURL.consume(css, POS, returnParsedToken, sPos); 287 return; 288 289 290 // ** U+005C REVERSE SOLIDUS (\) 291 // If the stream starts with a valid escape, consume an escaped code point and 292 // append the returned code point to the <url-token>’s value. 293 // 294 // Otherwise, this is a parse error. Consume the remnants of a bad url, create a 295 // <bad-url-token>, and return it. 296 297 case '\\': 298 299 if (CSSUtil.isValidEscape(css, POS.f)) 300 { 301 POS.f = CSSUtil.consumeEscapedUnicode(css, POS.f+1, urlStrBuilder); 302 break; 303 } 304 305 else 306 { 307 errorEncountered.accept( 308 new TokenizeError( 309 css, sPos, POS.f, URLToken.class, 310 "A Reverse-Solidu (Backslash) Character was encountered, but " + 311 "unfortunately it was not a valid CSS Character-Escape Sequence" 312 )); 313 314 BadURL.consume(css, POS, returnParsedToken, sPos); 315 return; 316 } 317 318 // ** non-printable code point 319 // This is a parse error. Consume the remnants of a bad url, create a 320 // <bad-url-token>, and return it. 321 // 322 // ** anything else 323 // Append the current input code point to the <url-token>’s value. 324 325 default: 326 327 if (CSSUtil.nonPrintableCodePoint(c)) 328 { 329 errorEncountered.accept( 330 new TokenizeError( 331 css, sPos, POS.f, URLToken.class, 332 "A non-printable Code-Point was Encountered (CodePonit #" + c + ")" 333 )); 334 335 BadURL.consume(css, POS, returnParsedToken, sPos); 336 return; 337 } 338 339 POS.f++; 340 urlStrBuilder.accept(c); 341 } 342 343 344 // If this line is reached, it means that the loop "broke" because the end of the CSS was 345 // reached. If there had been a proper ending to the URL, it would already have been 346 // returned inside the Loop's Main Switch-Statement 347 // 348 // MINOR-SPAGHETTI: If this "consume" method is called from "build", then there will not 349 // be a closing ')'. If this line is reached, and it was called from the 350 // build-method, this is success, rather than failure 351 // 352 // The value of boolean "fromBuildMethod" is retrieved as a parameter from this method's 353 // input-parameters. URLToken.consume(...) is called from one two places: 354 // 1) Identifer.consumeIdentLikeSequence 355 // 2) Method "build" (at the top of this class) 356 // 357 // When called from 358 // 2) Identifier: fromBuildMethod ==> false 359 // 3) Build (above): fromBuildMethod ==> true 360 361 if (! fromBuildMethod) errorEncountered.accept( 362 new TokenizeError( 363 css, sPos, POS.f, URLToken.class, 364 "EOF Encountered prior to reaching the end of a URL" 365 )); 366 367 returnParsedToken.accept(new URLToken(css, sPos, POS.f, urlStrBuilder)); 368 } 369}