001package Torello.CSS; 002 003import Torello.Java.StrPrint; 004import Torello.Java.StringParse; 005 006import static Torello.Java.C.BGREEN; 007import static Torello.Java.C.RESET; 008 009import java.util.Vector; 010import java.util.stream.IntStream; 011 012public class CSSUtil 013{ 014 public static void print(Vector<CSSToken> tokens) 015 { 016 for (CSSToken n : tokens) System.out.println( 017 BGREEN + StringParse.rightSpacePad(n.getClass().getSimpleName() + RESET + ':', 23) + 018 '[' + PRINT(n) + ']' 019 ); 020 } 021 022 public static String PRINT(CSSToken n) 023 { 024 return ((n instanceof Whitespace) || (n instanceof Comment)) 025 ? StrPrint.abbrev(n.str, 40, true, "...", 80) 026 : n.str; 027 } 028 029 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 030 // Copied From: 031 // https://drafts.csswg.org/css-syntax-3/#consume-an-escaped-code-point 032 // April 2024 033 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 034 // 035 // 4.3.7. Consume an escaped code point 036 // 037 // This section describes how to consume an escaped code point. It assumes that the 038 // U+005C REVERSE SOLIDUS (\) has already been consumed and that the next input code point has 039 // already been verified to be part of a valid escape. It will return a code point. 040 // 041 // Consume the next input code point. 042 // 043 // ** hex digit: 044 // ==> Consume as many hex digits as possible, but no more than 5. Note that this means 045 // 1-6 hex digits have been consumed in total. If the next input code point is 046 // whitespace, consume it as well. Interpret the hex digits as a hexadecimal number. 047 // If this number is zero, or is for a surrogate, or is greater than the maximum 048 // allowed code point, return U+FFFD REPLACEMENT CHARACTER (�). Otherwise, return the 049 // code point with that value. 050 // ** EOF: 051 // ==> This is a parse error. Return U+FFFD REPLACEMENT CHARACTER (�). 052 // 053 // ** anything else 054 // ==> Return the current input code point. 055 // 056 // NOTE ABOUT MINOR MODIFICATION: The "EOF" case is handled outside of this method, and this 057 // method presumes that the next code-point to be read is not past the end of the css-array. 058 059 /** 060 * This is a tokenizer method which <B>"consumes"</B> an escaped Unicode Code-Point. 061 * 062 * <EMBED CLASS=defs DATA-TOK=Escaped-Unicode DATA-URL=consume-escaped-code-point 063 * DATA-OP=Consume> 064 * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG_RR> 065 * <EMBED CLASS=external-html DATA-FILE-ID=ESCAPED_CODE_POINT> 066 * <EMBED CLASS=external-html DATA-FILE-ID=ESCAPE_SVG> 067 */ 068 protected static int consumeEscapedUnicode 069 (final int[] css, final int sPos, final IntStream.Builder b) 070 { 071 int pos = sPos; 072 int c = css[sPos]; 073 int count = 0; 074 075 // If a CSS String (delimited by single or double quotes) contains a 076 // backslash followed by valid Hex-Digits, then this is a Uni-Code 077 // Escape-Sequence. There may be 1 to 6 Hex-Digits after a '\' Back 078 // Slash within the confines a String (that is inside quotations) 079 080 while ( (c >= 'A' && c <= 'F') 081 || (c >= 'a' && c <= 'f') 082 || (c >= '0' && c <= '9') 083 ) 084 { 085 if ((++pos == css.length) || (++count == 7)) break; 086 c = css[pos]; 087 } 088 089 // This if-statement, essentially, checks if there was a Hexadecimal IMMEDIATELY-AFTER the 090 // Escape-Backslash (Here called "Reverse-Solidus") 091 092 if (pos > sPos) 093 { 094 // The Pseudo-Code says that if there is a space-character immediately following a 095 // Unicode Escape-Sequence, ignore-it / get rid of it. Quietly. 096 097 if (Whitespace.is(c)) pos++; 098 099 String hexString = new String(css, sPos, pos - sPos); 100 int hexValue = Integer.parseInt(hexString, 16); 101 102 if ((hexValue == 0) || (hexValue >= 0x1FFFF)) 103 b.accept(0xFFFD); // U+FFFD REPLACEMENT CHARACTER (�) 104 else 105 b.accept(hexValue); 106 } 107 108 else 109 { 110 b.accept(c); 111 pos++; 112 } 113 114 return pos; 115 } 116 117 118 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 119 // Copied from: 120 // https://drafts.csswg.org/css-syntax-3/#check-if-two-code-points-are-a-valid-escape 121 // March 27th, 2024 122 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 123 // 124 // 4.3.8. Check if two code points are a valid escape 125 // This section describes how to check if two code points are a valid escape. The algorithm 126 // described here can be called explicitly with two code points, or can be called with the 127 // input stream itself. In the latter case, the two code points in question are the current 128 // input code point and the next input code point, in that order. 129 // 130 // NOTE: This algorithm will not consume any additional code point. 131 // 132 // ** If the first code point is not U+005C REVERSE SOLIDUS (\), return false. 133 // ** Otherwise, if the second code point is a newline, return false. 134 // ** Otherwise, return true. 135 136 137 /** 138 * Convenience Method. 139 * <BR />Invokes: {@link isValidEscape(int, int)} 140 */ 141 public static boolean isValidEscape(final int[] css, final int sPos) 142 { return isValidEscape(css[sPos], ((sPos+1) < css.length) ? css[sPos+1] : 0); } 143 144 /** 145 * Checks whether or not the next token to consume constitutes a valid Escape-Sequence 146 * <EMBED CLASS=defs DATA-TOK=Unicode-Range 147 * DATA-URL=check-if-two-code-points-are-a-valid-escape DATA-OP=Check> 148 * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG> 149 * <EMBED CLASS=external-html DATA-FILE-ID=CHECK_VALID_ESC> 150 */ 151 public static boolean isValidEscape(final int codePoint1, final int codePoint2) 152 { 153 if (codePoint1 != '\\') return false; 154 if (codePoint2 == '\n') return false; 155 if (codePoint2 == '\r') return false; 156 return true; 157 } 158 159 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 160 // Copied From: 161 // https://drafts.csswg.org/css-syntax-3/#non-printable-code-point 162 // April 2024 163 // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 164 // 165 // non-printable code point 166 // 167 // A code point: between U+0000 NULL and U+0008 BACKSPACE inclusive, 168 // or U+000B LINE TABULATION, 169 // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE inclusive, 170 // or U+007F DELETE. 171 172 /** 173 * A code point: 174 * 175 * <UL CLASS=JDUL> 176 * <LI>between U+0000 NULL and U+0008 BACKSPACE inclusive</LI> 177 * <LI>U+000B LINE TABULATION</LI> 178 * <LI>between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE inclusive 179 * <LI>U+007F DELETE</LI> 180 * </UL> 181 * 182 * @param codePoint Any Code-Point 183 * @return {@code TRUE} if and only if the provided code-point is non-printable. 184 */ 185 static boolean nonPrintableCodePoint(int codePoint) 186 { 187 if ((codePoint >= 0) && (codePoint <= 8)) return true; 188 if ((codePoint >= 0xE) && (codePoint <= 0x1F)) return true; 189 if (codePoint == 0x0B) return true; 190 if (codePoint == 0x7F) return true; 191 return false; 192 } 193}