CSSUtil.java.html

package Torello.CSS;

import Torello.Java.StrPrint;
import Torello.Java.StringParse;

import static Torello.Java.C.BGREEN;
import static Torello.Java.C.RESET;

import java.util.Vector;
import java.util.stream.IntStream;

public class CSSUtil
{
    public static void print(Vector<CSSToken> tokens)
    {
        for (CSSToken n : tokens) System.out.println(
            BGREEN + StringParse.rightSpacePad(n.getClass().getSimpleName() + RESET + ':', 23) +
            '[' + PRINT(n) + ']'
        );
    }

    public static String PRINT(CSSToken n)
    {
        return ((n instanceof Whitespace) || (n instanceof Comment))
            ? StrPrint.abbrev(n.str, 40, true, "...", 80)
            : n.str;
    }

    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // Copied From:
    // https://drafts.csswg.org/css-syntax-3/#consume-an-escaped-code-point
    // April 2024
    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // 
    // 4.3.7. Consume an escaped code point
    //
    // This section describes how to consume an escaped code point. It assumes that the
    // U+005C REVERSE SOLIDUS (\) has already been consumed and that the next input code point has
    // already been verified to be part of a valid escape. It will return a code point.
    // 
    // Consume the next input code point.
    // 
    // ** hex digit:
    //      ==> Consume as many hex digits as possible, but no more than 5. Note that this means
    //          1-6 hex digits have been consumed in total. If the next input code point is
    //          whitespace, consume it as well. Interpret the hex digits as a hexadecimal number.
    //          If this number is zero, or is for a surrogate, or is greater than the maximum
    //          allowed code point, return U+FFFD REPLACEMENT CHARACTER (�). Otherwise, return the
    //          code point with that value.
    // ** EOF:
    //      ==> This is a parse error. Return U+FFFD REPLACEMENT CHARACTER (�).
    // 
    // ** anything else
    //      ==> Return the current input code point.
    //
    // NOTE ABOUT MINOR MODIFICATION: The "EOF" case is handled outside of this method, and this
    // method presumes that the next code-point to be read is not past the end of the css-array.

    /**
     * This is a tokenizer method which <B>"consumes"</B> an escaped Unicode Code-Point.
     * 
     * <EMBED CLASS=defs DATA-TOK=Escaped-Unicode DATA-URL=consume-escaped-code-point
     *      DATA-OP=Consume>
     * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG_RR>
     * <EMBED CLASS=external-html DATA-FILE-ID=ESCAPED_CODE_POINT>
     * <EMBED CLASS=external-html DATA-FILE-ID=ESCAPE_SVG>
     */
    protected static int consumeEscapedUnicode
        (final int[] css, final int sPos, final IntStream.Builder b)
    {
        int pos     = sPos;
        int c       = css[sPos];
        int count   = 0;

        // If a CSS String (delimited by single or double quotes) contains a
        // backslash followed by valid Hex-Digits, then this is a Uni-Code 
        // Escape-Sequence.  There may be 1 to 6 Hex-Digits after a '\' Back
        // Slash within the confines a String (that is inside quotations)

        while (     (c >= 'A' && c <= 'F')
                ||  (c >= 'a' && c <= 'f')
                ||  (c >= '0' && c <= '9')
        )
        {
            if ((++pos == css.length) || (++count == 7)) break;
            c = css[pos];
        }

        // This if-statement, essentially, checks if there was a Hexadecimal IMMEDIATELY-AFTER the
        // Escape-Backslash (Here called "Reverse-Solidus")

        if (pos > sPos)
        {
            // The Pseudo-Code says that if there is a space-character immediately following a
            // Unicode Escape-Sequence, ignore-it / get rid of it.  Quietly.

            if (Whitespace.is(c)) pos++;

            String  hexString   = new String(css, sPos, pos - sPos);
            int     hexValue    = Integer.parseInt(hexString, 16);

            if ((hexValue == 0) || (hexValue >= 0x1FFFF))
                b.accept(0xFFFD); // U+FFFD REPLACEMENT CHARACTER (�)
            else 
                b.accept(hexValue);
        }

        else
        {
            b.accept(c);
            pos++;
        }

        return pos;
    }


    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // Copied from:
    // https://drafts.csswg.org/css-syntax-3/#check-if-two-code-points-are-a-valid-escape
    // March 27th, 2024
    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    //
    // 4.3.8. Check if two code points are a valid escape
    // This section describes how to check if two code points are a valid escape. The algorithm
    // described here can be called explicitly with two code points, or can be called with the
    // input stream itself. In the latter case, the two code points in question are the current
    // input code point and the next input code point, in that order.
    // 
    // NOTE: This algorithm will not consume any additional code point.
    // 
    // ** If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
    // ** Otherwise, if the second code point is a newline, return false.
    // ** Otherwise, return true.


    /**
     * Convenience Method.
     * <BR />Invokes: {@link isValidEscape(int, int)}
     */
    public static boolean isValidEscape(final int[] css, final int sPos)
    { return isValidEscape(css[sPos], ((sPos+1) < css.length) ? css[sPos+1] : 0); }

    /**
     * Checks whether or not the next token to consume constitutes a valid Escape-Sequence
     * <EMBED CLASS=defs DATA-TOK=Unicode-Range
     *      DATA-URL=check-if-two-code-points-are-a-valid-escape DATA-OP=Check>
     * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG>
     * <EMBED CLASS=external-html DATA-FILE-ID=CHECK_VALID_ESC>
     */
    public static boolean isValidEscape(final int codePoint1, final int codePoint2)
    {
        if (codePoint1 != '\\') return false;
        if (codePoint2 == '\n') return false;
        if (codePoint2 == '\r') return false;
        return true;
    }

    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // Copied From:
    // https://drafts.csswg.org/css-syntax-3/#non-printable-code-point
    // April 2024
    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // 
    // non-printable code point
    // 
    // A code point: between U+0000 NULL and U+0008 BACKSPACE inclusive,
    // or U+000B LINE TABULATION,
    // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE inclusive,
    // or U+007F DELETE.

    /**
     * A code point:
     * 
     * <UL CLASS=JDUL>
     * <LI>between U+0000 NULL and U+0008 BACKSPACE inclusive</LI>
     * <LI>U+000B LINE TABULATION</LI>
     * <LI>between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE inclusive
     * <LI>U+007F DELETE</LI>
     * </UL>
     * 
     * @param codePoint Any Code-Point
     * @return {@code TRUE} if and only if the provided code-point is non-printable.
     */
    static boolean nonPrintableCodePoint(int codePoint)
    {
        if ((codePoint >= 0)    && (codePoint <= 8))    return true;
        if ((codePoint >= 0xE)  && (codePoint <= 0x1F)) return true;
        if (codePoint == 0x0B)                          return true;
        if (codePoint == 0x7F)                          return true;
        return false;
    }
}