URLToken.java.html

package Torello.CSS;

import Torello.Java.Additional.ByRef;
import Torello.Java.Additional.EffectivelyFinal;
import Torello.Java.UnreachableError;

import java.util.Vector;
import java.util.stream.IntStream;
import java.util.function.Consumer;

/** Any {@code URL} */
@Torello.JavaDoc.JDHeaderBackgroundImg(EmbedTagFileID="CSS_TOK")
public class URLToken extends CSSToken
    implements CharSequence, java.io.Serializable, Comparable<CharSequence>
{
    /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */
    protected static final long serialVersionUID = 1;

    /** The unescaped text that constitutes this {@code URL}. */
    public final String unescapedURL;


    // ********************************************************************************************
    // ********************************************************************************************
    // Private Constructor, API "is" and "if" Methods
    // ********************************************************************************************
    // ********************************************************************************************


    private URLToken(
            final int[]             css,
            final int               sPos,
            final int               ePos,
            final IntStream.Builder urlStrBuilder
        )
    {
        super(css, sPos, ePos);

        int[] urlArr        = urlStrBuilder.build().toArray();
        this.unescapedURL   = new String(urlArr, 0, urlArr.length);
    }

    @Override 
    public final boolean isURL() { return true; }

    @Override
    public final URLToken ifURL() { return this; }


    // ********************************************************************************************
    // ********************************************************************************************
    // User's Constructor: a static "build" method
    // ********************************************************************************************
    // ********************************************************************************************


    /**
     * <EMBED CLASS=defs DATA-TOK=Str DATA-P=urlStr>
     * <EMBED CLASS='external-html' DATA-FILE-ID=BUILD_DESC>
     * @param urlStr <EMBED CLASS='external-html' DATA-FILE-ID=BUILD_PARAM>
     * @return <EMBED CLASS='external-html' DATA-FILE-ID=BUILD_RET>
     * @throws TokenizeException <EMBED CLASS='external-html' DATA-FILE-ID=BUILD_TOK_EX>
     */
    @SuppressWarnings("unchecked")
    public static URLToken build(final String urlStr)
    {
        if (urlStr.length() == 0) throw new TokenizeException();

        final int[] css = urlStr.codePoints().toArray();

        if (css.length < 1) throw new TokenizeException(URLToken.class);

        if (Whitespace.is(css[0])) throw new TokenizeException
            ("A URL cannot begin with Whitespace.");

        final EffectivelyFinal<CSSToken> saveIt = new EffectivelyFinal<>(null);

        final Consumer<CSSToken> acceptor = (CSSToken t) ->
        {
            if (t instanceof Whitespace) throw new TokenizeException
                ("The URL provided contained unescaped Whitespace");

            else if (t instanceof Comment) throw new TokenizeException
                ("The URL provided contained a CSS Comment");

            else if (t instanceof BadURL) throw new TokenizeException
                ("The URL provided was parsed into an instanceof BadURL: [" + t.str + "]");

            else if (t instanceof URLToken) saveIt.f = t;

            // These are the only types that may be returned by Class CSSToken
            else throw new UnreachableError();
        };

        URLToken.consume(
            css,
            new ByRef<>(0),
            acceptor,
            (TokenizeError te) -> te.throwException(),
            true
        );

        // Need to guarantee that the entire String was consumed in the process of tokenizing the
        // input String.  'TokenzeException' has a nicely worded Esception-Message to explain what
        // has occured here.

        if (urlStr.length() != saveIt.f.str.length())
            throw new TokenizeException(urlStr, saveIt.f.str);

        return (URLToken) saveIt.f;
    }


    // ********************************************************************************************
    // ********************************************************************************************
    // CONSUME
    // ********************************************************************************************
    // ********************************************************************************************


    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    // Copied from:
    // https://drafts.csswg.org/css-syntax-3/#consume-url-token
    // April 2024
    // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
    //
    // 4.3.6. Consume a url token
    // 
    // This section describes how to consume a url token from a stream of code points. It returns
    // either a <url-token> or a <bad-url-token>.
    // 
    // NOTE: This algorithm assumes that the initial "url(" has already been consumed. This
    // algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
    // A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like
    // token automatically handles this distinction; this algorithm shouldn’t be called directly
    // otherwise.
    // 
    // 1. Initially create a <url-token> with its value set to the empty string.
    // 
    // 2. Consume as much whitespace as possible.
    // 
    // 3. Repeatedly consume the next input code point from the stream:
    // 
    // **   U+0029 RIGHT PARENTHESIS ())
    //      Return the <url-token>.
    // 
    // **   EOF
    //      This is a parse error. Return the <url-token>.
    // 
    // **   whitespace
    //      Consume as much whitespace as possible. If the next input code point is
    //      U+0029 RIGHT PARENTHESIS ()) or EOF, consume it and return the <url-token> (if EOF was
    //      encountered, this is a parse error); otherwise, consume the remnants of a bad url,
    //      create a <bad-url-token>, and return it.
    // 
    // **   U+0022 QUOTATION MARK (")
    // **   U+0027 APOSTROPHE (')
    // **   U+0028 LEFT PARENTHESIS (()
    // **   non-printable code point
    //      This is a parse error. Consume the remnants of a bad url, create a <bad-url-token>, and
    //      return it.
    // 
    // **   U+005C REVERSE SOLIDUS (\)
    //      If the stream starts with a valid escape, consume an escaped code point and append the
    //      returned code point to the <url-token>’s value.
    // 
    //      Otherwise, this is a parse error. Consume the remnants of a bad url, create a
    //      <bad-url-token>, and return it.
    // 
    // **   anything else
    //      Append the current input code point to the <url-token>’s value.

    /**
     * This is a tokenizer method which <B>"consumes"</B> the next {@code URLToken} from the input
     * Code-Point Array.
     * 
     * <EMBED CLASS=defs DATA-TOK=URLToken DATA-URL=consume-url-token DATA-OP=Consume>
     * <EMBED CLASS=external-html DATA-FILE-ID=COPIED_CSS_WG>
     * <EMBED CLASS=external-html DATA-FILE-ID=URL_TOKEN>
     * <EMBED CLASS=external-html DATA-FILE-ID=URL_TOK_SVG>
     */
    protected static void consume(                              // When invoked from 'CSSTokenizer'
            final int[]                     css,                // C, int[] css
            final ByRef<Integer>            POS,                // P, array-pos loop-variable
            final Consumer<CSSToken>        returnParsedToken,  // T, Vector<CSSToken>.add
            final Consumer<TokenizeError>   errorEncountered,   // E, Vector<TokenizeError>.add
            final boolean                   fromBuildMethod // Minor-Hack to solve a problems
                                                            // SOLVING-PROBLEMS, THAT'S WHAT WE DO
        )
    {
        final IntStream.Builder urlStrBuilder   = IntStream.builder();
        final int               sPos            = POS.f;

        int c;

        while (POS.f < css.length) switch (c = css[POS.f])
        {
            // **   U+0029 RIGHT PARENTHESIS ())
            //      Return the <url-token>.

            case ')':

                returnParsedToken.accept(new URLToken(css, sPos, POS.f, urlStrBuilder));
                return;

            // **   whitespace
            //      Consume as much whitespace as possible. If the next input code point is
            //      U+0029 RIGHT PARENTHESIS ()) or EOF, consume it and return the <url-token> (if EOF was
            //      encountered, this is a parse error); otherwise, consume the remnants of a bad url,
            //      create a <bad-url-token>, and return it.

            case '\u000B':
            case ' ':
            case '\t':
            case '\f':
            case '\n':
            case '\r':

                final int ePos = POS.f;

                Vector<CSSToken>    v           = new Vector<>();
                Consumer<CSSToken>  acceptor    = v::add;

                while (POS.f < css.length)

                    if (Whitespace.is(css[POS.f]))
                        Whitespace.consume(css, POS, acceptor);

                    else if (Comment.is(css, POS.f))
                        Comment.consume(css, POS, acceptor, errorEncountered);

                    else break;

                // ==> EOF, consume it and return the <url-token> (if EOF was encountered, this is
                //     a parse error)

                if (POS.f >= css.length)
                {
                    if (! fromBuildMethod) errorEncountered.accept(
                        new TokenizeError(
                            css, sPos, POS.f, URLToken.class,
                            "CSS-Input EOF was encountered before reaching the URL's closing ')'"
                        ));

                    returnParsedToken.accept(new URLToken(css, sPos, ePos, urlStrBuilder));
                    if (v.size() > 0) for (CSSToken t : v) returnParsedToken.accept(t);
                }

                else if (css[POS.f] == ')')
                {
                    returnParsedToken.accept(new URLToken(css, sPos, ePos, urlStrBuilder));
                    if (v.size() > 0) for (CSSToken t : v) returnParsedToken.accept(t);
                }

                else
                {
                    errorEncountered.accept(
                        new TokenizeError(
                            css, sPos, POS.f, URLToken.class,
                            "Whitespace and/or comments before the end of a URL"
                        ));

                    BadURL.consume(css, POS, returnParsedToken, sPos);
                }

                return;
 
            // **   U+0022 QUOTATION MARK (")
            // **   U+0027 APOSTROPHE (')
            // **   U+0028 LEFT PARENTHESIS (()
            // **   non-printable code point
            //      This is a parse error. Consume the remnants of a bad url, create a
            //      <bad-url-token>, and return it.

            case '"':
            case '\'':
            case '(':

                errorEncountered.accept(
                    new TokenizeError(
                        css, sPos, POS.f, URLToken.class,
                        "Unescaped Character within URL Found: ['" + c + "'']"
                    ));

                // NOTE: The "non-printable code-point" will be handled by the default-case
                BadURL.consume(css, POS, returnParsedToken, sPos);
                return;

 
            // **   U+005C REVERSE SOLIDUS (\)
            //      If the stream starts with a valid escape, consume an escaped code point and
            //      append the returned code point to the <url-token>’s value.
            // 
            //      Otherwise, this is a parse error. Consume the remnants of a bad url, create a
            //      <bad-url-token>, and return it.

            case '\\':

                if (CSSUtil.isValidEscape(css, POS.f))
                {
                    POS.f = CSSUtil.consumeEscapedUnicode(css, POS.f+1, urlStrBuilder);
                    break;
                }

                else
                {
                    errorEncountered.accept(
                        new TokenizeError(
                            css, sPos, POS.f, URLToken.class,
                            "A Reverse-Solidu (Backslash) Character was encountered, but " +
                            "unfortunately it was not a valid CSS Character-Escape Sequence"
                        ));

                    BadURL.consume(css, POS, returnParsedToken, sPos);
                    return;
                }

            // **   non-printable code point
            //      This is a parse error. Consume the remnants of a bad url, create a
            //      <bad-url-token>, and return it.
            // 
            // **   anything else
            //      Append the current input code point to the <url-token>’s value.

            default:

                if (CSSUtil.nonPrintableCodePoint(c))
                {
                    errorEncountered.accept(
                        new TokenizeError(
                            css, sPos, POS.f, URLToken.class,
                            "A non-printable Code-Point was Encountered (CodePonit #" + c + ")"
                        ));

                    BadURL.consume(css, POS, returnParsedToken, sPos);
                    return;
                }

                POS.f++;
                urlStrBuilder.accept(c);
        }


        // If this line is reached, it means that the loop "broke" because the end of the CSS was
        // reached.  If there had been a proper ending to the URL, it would already have been 
        // returned inside the Loop's Main Switch-Statement
        // 
        // MINOR-SPAGHETTI: If this "consume" method is called from "build", then there will not
        //                  be a closing ')'.  If this line is reached, and it was called from the
        //                  build-method, this is success, rather than failure
        //
        // The value of boolean "fromBuildMethod" is retrieved as a parameter from this method's
        // input-parameters.  URLToken.consume(...) is called from one two places:
        //      1) Identifer.consumeIdentLikeSequence
        //      2) Method "build" (at the top of this class)
        //
        // When called from
        //      2) Identifier:      fromBuildMethod ==> false
        //      3) Build (above):   fromBuildMethod ==> true

        if (! fromBuildMethod) errorEncountered.accept(
            new TokenizeError(
                css, sPos, POS.f, URLToken.class,
                "EOF Encountered prior to reaching the end of a URL"
            ));

        returnParsedToken.accept(new URLToken(css, sPos, POS.f, urlStrBuilder));
    }
}