Source code

001package Torello.CSS;
002
003import Torello.Java.StrPrint;
004import Torello.Java.StringParse;
005
006import static Torello.Java.C.BGREEN;
007import static Torello.Java.C.RESET;
008
009import java.util.Vector;
010import java.util.stream.IntStream;
011
012public class CSSUtil
013{
014    public static void print(Vector<CSSToken> tokens)
015    {
016        for (CSSToken n : tokens) System.out.println(
017            BGREEN + StringParse.rightSpacePad(n.getClass().getSimpleName() + RESET + ':', 23) +
018            '[' + PRINT(n) + ']'
019        );
020    }
021
022    public static String PRINT(CSSToken n)
023    {
024        return ((n instanceof Whitespace) || (n instanceof Comment))
025            ? StrPrint.abbrev(n.str, 40, true, "...", 80)
026            : n.str;
027    }
028
029    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
030    // Copied From:
031    // https://drafts.csswg.org/css-syntax-3/#consume-an-escaped-code-point
032    // April 2024
033    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
034    // 
035    // 4.3.7. Consume an escaped code point
036    //
037    // This section describes how to consume an escaped code point. It assumes that the
038    // U+005C REVERSE SOLIDUS (\) has already been consumed and that the next input code point has
039    // already been verified to be part of a valid escape. It will return a code point.
040    // 
041    // Consume the next input code point.
042    // 
043    // ** hex digit:
044    //      ==> Consume as many hex digits as possible, but no more than 5. Note that this means
045    //          1-6 hex digits have been consumed in total. If the next input code point is
046    //          whitespace, consume it as well. Interpret the hex digits as a hexadecimal number.
047    //          If this number is zero, or is for a surrogate, or is greater than the maximum
048    //          allowed code point, return U+FFFD REPLACEMENT CHARACTER (�). Otherwise, return the
049    //          code point with that value.
050    // ** EOF:
051    //      ==> This is a parse error. Return U+FFFD REPLACEMENT CHARACTER (�).
052    // 
053    // ** anything else
054    //      ==> Return the current input code point.
055    //
056    // NOTE ABOUT MINOR MODIFICATION: The "EOF" case is handled outside of this method, and this
057    // method presumes that the next code-point to be read is not past the end of the css-array.
058
059    /**
060     * This is a tokenizer method which <B>"consumes"</B> an escaped Unicode Code-Point.
061     * 
062     * <EMBED CLASS=defs DATA-TOK=Escaped-Unicode DATA-URL=consume-escaped-code-point
063     *      DATA-OP=Consume>
064     * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG_RR>
065     * <EMBED CLASS=external-html DATA-FILE-ID=ESCAPED_CODE_POINT>
066     * <EMBED CLASS=external-html DATA-FILE-ID=ESCAPE_SVG>
067     */
068    protected static int consumeEscapedUnicode
069        (final int[] css, final int sPos, final IntStream.Builder b)
070    {
071        int pos     = sPos;
072        int c       = css[sPos];
073        int count   = 0;
074
075        // If a CSS String (delimited by single or double quotes) contains a
076        // backslash followed by valid Hex-Digits, then this is a Uni-Code 
077        // Escape-Sequence.  There may be 1 to 6 Hex-Digits after a '\' Back
078        // Slash within the confines a String (that is inside quotations)
079
080        while (     (c >= 'A' && c <= 'F')
081                ||  (c >= 'a' && c <= 'f')
082                ||  (c >= '0' && c <= '9')
083        )
084        {
085            if ((++pos == css.length) || (++count == 7)) break;
086            c = css[pos];
087        }
088
089        // This if-statement, essentially, checks if there was a Hexadecimal IMMEDIATELY-AFTER the
090        // Escape-Backslash (Here called "Reverse-Solidus")
091
092        if (pos > sPos)
093        {
094            // The Pseudo-Code says that if there is a space-character immediately following a
095            // Unicode Escape-Sequence, ignore-it / get rid of it.  Quietly.
096
097            if (Whitespace.is(c)) pos++;
098
099            String  hexString   = new String(css, sPos, pos - sPos);
100            int     hexValue    = Integer.parseInt(hexString, 16);
101
102            if ((hexValue == 0) || (hexValue >= 0x1FFFF))
103                b.accept(0xFFFD); // U+FFFD REPLACEMENT CHARACTER (�)
104            else 
105                b.accept(hexValue);
106        }
107
108        else
109        {
110            b.accept(c);
111            pos++;
112        }
113
114        return pos;
115    }
116
117
118    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
119    // Copied from:
120    // https://drafts.csswg.org/css-syntax-3/#check-if-two-code-points-are-a-valid-escape
121    // March 27th, 2024
122    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
123    //
124    // 4.3.8. Check if two code points are a valid escape
125    // This section describes how to check if two code points are a valid escape. The algorithm
126    // described here can be called explicitly with two code points, or can be called with the
127    // input stream itself. In the latter case, the two code points in question are the current
128    // input code point and the next input code point, in that order.
129    // 
130    // NOTE: This algorithm will not consume any additional code point.
131    // 
132    // ** If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
133    // ** Otherwise, if the second code point is a newline, return false.
134    // ** Otherwise, return true.
135
136
137    /**
138     * Convenience Method.
139     * <BR />Invokes: {@link isValidEscape(int, int)}
140     */
141    public static boolean isValidEscape(final int[] css, final int sPos)
142    { return isValidEscape(css[sPos], ((sPos+1) < css.length) ? css[sPos+1] : 0); }
143
144    /**
145     * Checks whether or not the next token to consume constitutes a valid Escape-Sequence
146     * <EMBED CLASS=defs DATA-TOK=Unicode-Range
147     *      DATA-URL=check-if-two-code-points-are-a-valid-escape DATA-OP=Check>
148     * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG>
149     * <EMBED CLASS=external-html DATA-FILE-ID=CHECK_VALID_ESC>
150     */
151    public static boolean isValidEscape(final int codePoint1, final int codePoint2)
152    {
153        if (codePoint1 != '\\') return false;
154        if (codePoint2 == '\n') return false;
155        if (codePoint2 == '\r') return false;
156        return true;
157    }
158
159    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
160    // Copied From:
161    // https://drafts.csswg.org/css-syntax-3/#non-printable-code-point
162    // April 2024
163    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
164    // 
165    // non-printable code point
166    // 
167    // A code point: between U+0000 NULL and U+0008 BACKSPACE inclusive,
168    // or U+000B LINE TABULATION,
169    // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE inclusive,
170    // or U+007F DELETE.
171
172    /**
173     * A code point:
174     * 
175     * <UL CLASS=JDUL>
176     * <LI>between U+0000 NULL and U+0008 BACKSPACE inclusive</LI>
177     * <LI>U+000B LINE TABULATION</LI>
178     * <LI>between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE inclusive
179     * <LI>U+007F DELETE</LI>
180     * </UL>
181     * 
182     * @param codePoint Any Code-Point
183     * @return {@code TRUE} if and only if the provided code-point is non-printable.
184     */
185    static boolean nonPrintableCodePoint(int codePoint)
186    {
187        if ((codePoint >= 0)    && (codePoint <= 8))    return true;
188        if ((codePoint >= 0xE)  && (codePoint <= 0x1F)) return true;
189        if (codePoint == 0x0B)                          return true;
190        if (codePoint == 0x7F)                          return true;
191        return false;
192    }
193}