1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
package Torello.CSS;

import Torello.Java.StrPrint;
import Torello.Java.StringParse;

import static Torello.Java.C.BGREEN;
import static Torello.Java.C.RESET;

import java.util.Vector;
import java.util.stream.IntStream;

public class CSSUtil
{
    public static void print(Vector<CSSToken> tokens)
    {
        for (CSSToken n : tokens) System.out.println(
            BGREEN + StringParse.rightSpacePad(n.getClass().getSimpleName() + RESET + ':', 23) +
            '[' + PRINT(n) + ']'
        );
    }

    public static String PRINT(CSSToken n)
    {
        return ((n instanceof Whitespace) || (n instanceof Comment))
            ? StrPrint.abbrev(n.str, 40, true, "...", 80)
            : n.str;
    }

    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // Copied From:
    // https://drafts.csswg.org/css-syntax-3/#consume-an-escaped-code-point
    // April 2024
    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // 
    // 4.3.7. Consume an escaped code point
    //
    // This section describes how to consume an escaped code point. It assumes that the
    // U+005C REVERSE SOLIDUS (\) has already been consumed and that the next input code point has
    // already been verified to be part of a valid escape. It will return a code point.
    // 
    // Consume the next input code point.
    // 
    // ** hex digit:
    //      ==> Consume as many hex digits as possible, but no more than 5. Note that this means
    //          1-6 hex digits have been consumed in total. If the next input code point is
    //          whitespace, consume it as well. Interpret the hex digits as a hexadecimal number.
    //          If this number is zero, or is for a surrogate, or is greater than the maximum
    //          allowed code point, return U+FFFD REPLACEMENT CHARACTER (�). Otherwise, return the
    //          code point with that value.
    // ** EOF:
    //      ==> This is a parse error. Return U+FFFD REPLACEMENT CHARACTER (�).
    // 
    // ** anything else
    //      ==> Return the current input code point.
    //
    // NOTE ABOUT MINOR MODIFICATION: The "EOF" case is handled outside of this method, and this
    // method presumes that the next code-point to be read is not past the end of the css-array.

    /**
     * This is a tokenizer method which <B>"consumes"</B> an escaped Unicode Code-Point.
     * 
     * <EMBED CLASS=defs DATA-TOK=Escaped-Unicode DATA-URL=consume-escaped-code-point
     *      DATA-OP=Consume>
     * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG_RR>
     * <EMBED CLASS=external-html DATA-FILE-ID=ESCAPED_CODE_POINT>
     * <EMBED CLASS=external-html DATA-FILE-ID=ESCAPE_SVG>
     */
    protected static int consumeEscapedUnicode
        (final int[] css, final int sPos, final IntStream.Builder b)
    {
        int pos     = sPos;
        int c       = css[sPos];
        int count   = 0;

        // If a CSS String (delimited by single or double quotes) contains a
        // backslash followed by valid Hex-Digits, then this is a Uni-Code 
        // Escape-Sequence.  There may be 1 to 6 Hex-Digits after a '\' Back
        // Slash within the confines a String (that is inside quotations)

        while (     (c >= 'A' && c <= 'F')
                ||  (c >= 'a' && c <= 'f')
                ||  (c >= '0' && c <= '9')
        )
        {
            if ((++pos == css.length) || (++count == 7)) break;
            c = css[pos];
        }

        // This if-statement, essentially, checks if there was a Hexadecimal IMMEDIATELY-AFTER the
        // Escape-Backslash (Here called "Reverse-Solidus")

        if (pos > sPos)
        {
            // The Pseudo-Code says that if there is a space-character immediately following a
            // Unicode Escape-Sequence, ignore-it / get rid of it.  Quietly.

            if (Whitespace.is(c)) pos++;

            String  hexString   = new String(css, sPos, pos - sPos);
            int     hexValue    = Integer.parseInt(hexString, 16);

            if ((hexValue == 0) || (hexValue >= 0x1FFFF))
                b.accept(0xFFFD); // U+FFFD REPLACEMENT CHARACTER (�)
            else 
                b.accept(hexValue);
        }

        else
        {
            b.accept(c);
            pos++;
        }

        return pos;
    }


    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // Copied from:
    // https://drafts.csswg.org/css-syntax-3/#check-if-two-code-points-are-a-valid-escape
    // March 27th, 2024
    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    //
    // 4.3.8. Check if two code points are a valid escape
    // This section describes how to check if two code points are a valid escape. The algorithm
    // described here can be called explicitly with two code points, or can be called with the
    // input stream itself. In the latter case, the two code points in question are the current
    // input code point and the next input code point, in that order.
    // 
    // NOTE: This algorithm will not consume any additional code point.
    // 
    // ** If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
    // ** Otherwise, if the second code point is a newline, return false.
    // ** Otherwise, return true.


    /**
     * Convenience Method.
     * <BR />Invokes: {@link isValidEscape(int, int)}
     */
    public static boolean isValidEscape(final int[] css, final int sPos)
    { return isValidEscape(css[sPos], ((sPos+1) < css.length) ? css[sPos+1] : 0); }

    /**
     * Checks whether or not the next token to consume constitutes a valid Escape-Sequence
     * <EMBED CLASS=defs DATA-TOK=Unicode-Range
     *      DATA-URL=check-if-two-code-points-are-a-valid-escape DATA-OP=Check>
     * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG>
     * <EMBED CLASS=external-html DATA-FILE-ID=CHECK_VALID_ESC>
     */
    public static boolean isValidEscape(final int codePoint1, final int codePoint2)
    {
        if (codePoint1 != '\\') return false;
        if (codePoint2 == '\n') return false;
        if (codePoint2 == '\r') return false;
        return true;
    }

    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // Copied From:
    // https://drafts.csswg.org/css-syntax-3/#non-printable-code-point
    // April 2024
    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // 
    // non-printable code point
    // 
    // A code point: between U+0000 NULL and U+0008 BACKSPACE inclusive,
    // or U+000B LINE TABULATION,
    // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE inclusive,
    // or U+007F DELETE.

    /**
     * A code point:
     * 
     * <UL CLASS=JDUL>
     * <LI>between U+0000 NULL and U+0008 BACKSPACE inclusive</LI>
     * <LI>U+000B LINE TABULATION</LI>
     * <LI>between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE inclusive
     * <LI>U+007F DELETE</LI>
     * </UL>
     * 
     * @param codePoint Any Code-Point
     * @return {@code TRUE} if and only if the provided code-point is non-printable.
     */
    static boolean nonPrintableCodePoint(int codePoint)
    {
        if ((codePoint >= 0)    && (codePoint <= 8))    return true;
        if ((codePoint >= 0xE)  && (codePoint <= 0x1F)) return true;
        if (codePoint == 0x0B)                          return true;
        if (codePoint == 0x7F)                          return true;
        return false;
    }
}