001package Torello.HTML;
002
003import java.net.*;
004import java.util.*;
005import java.util.stream.IntStream;
006
007import Torello.Java.*;
008
009import Torello.HTML.NodeSearch.InnerTagFind; // Used for an @see reference
010import Torello.HTML.NodeSearch.TagNodeFind;  // Used in getBaseURL
011import Torello.Java.Additional.Ret2;
012import Torello.Java.Additional.Ret3;
013
014/**
015 * Utilities for de-refrencing 'partially-completed' {@code URL's} in a Web-Page {@code Vector}.
016 * 
017 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=LINKS>
018 * @see ReplaceNodes
019 * @see ReplaceFunction
020 * @see HTMLPage
021 * @see InnerTagFind
022 * @see Ret2
023 */
024@Torello.JavaDoc.StaticFunctional
025public class Links
026{
027    private Links() { }
028
029    /**
030     * List of documented "starter-strings" that are sometimes used in Anchor URL
031     * {@code 'HREF=...'} attributes.
032     * 
033     * @see #NON_URL_HREFS
034     */
035    protected static final String[] _NON_URL_HREFS =
036        { "tel:", "magnet:", "javascript:", "mailto:", "ftp:", "file:", "data:", "blog:", "#" };
037
038    /**
039     * This small method just returns the complete list of commonly found Anchor
040     * {@code 'HREF' String's} that do not actually constitute an HTML {@code 'URL'.}  This method
041     * actually returns a "clone" of an internally stored {@code String[]} Array.  This is to
042     * protect and make sure that the list of potential HTML Anchor-Tag {@code 'HREF'} Attributes
043     * is not changed, doctored or modified
044     * 
045     * @return A clone of the {@code String}-array {@code '_NON_URL_HREFS'}
046     * 
047     * @see #_NON_URL_HREFS
048     */
049    public static String[] NON_URL_HREFS()
050    { return _NON_URL_HREFS.clone(); }
051
052    /**
053     * The methods in this class <I><B>will not automatically extract</I></B> any HTML
054     * {@code <BASE HREF=URL>} definitions that are found on this page.  If the user wishes to
055     * dereference partial / relative {@code URL} definitions that exist on the input page, all the
056     * while respecting any {@code <BASE HREF=URL>} definitions found on the input page, then this
057     * method should be utilized.
058     *
059     * @param page This may be any HTML page or partial page.  If this page has a valid HTML
060     * {@code <BASE HREF=URL>}, it will be extracted and returned as an instance of
061     * {@code class URL}.
062     *
063     * @return This shall return the HTML {@code <BASE HREF="http://...">} element found available
064     * within the input-page parameter {@code 'page'}.  If the page provided does not contain a
065     * {@code BASE URL} definition, then null shall be returned.
066     *
067     * <BR /><BR /><B>NOTE:</B> The HTML Specification clearly states that only one {@code URL}
068     * may be defined using the HTML Element {@code <BASE>}.  Clearly, due to the browser wars,
069     * unspecified / non-deterministic behavior is possible if multiple definitions are provided.
070     * For the purposes of this class, if such a situation arises, an exception is thrown.
071     *
072     * @throws MalformedHTMLException If the HTML page provided contains multiple definitions of
073     * the element {@code <BASE HREF=URL>}, then this exception will throw.
074     *
075     * @throws MalformedURLException If the {@code <BASE HREF=URL>} found / identified within the
076     * input page, but that {@code URL} is invalid, then this exception shall throw.
077     * 
078     * @see TagNodeFind
079     * @see Attributes#retrieve(Vector, int[], String)
080     */
081    public static URL getBaseURL(Vector<? extends HTMLNode> page)
082        throws MalformedHTMLException, MalformedURLException
083    {
084        int[] posArr = TagNodeFind.all(page, TC.OpeningTags, "base");
085
086        if (posArr.length == 0) return null;
087
088        // NOTE: The cast is all right because 'posArr' only points to TagNode's
089        // Attributes expects to avoid processing Vector<TextNode>, and Vector<CommentNode>
090        // Above, there will be nothing in the 'posArr' if either of those was passed.
091
092        @SuppressWarnings("unchecked")
093        String[]    urls    = Attributes.retrieve((Vector<HTMLNode>) page, posArr, "href");
094
095        boolean     found   = false;
096        String      ret     = null;
097
098        for (String url : urls)
099            if ((url != null) && (url.length() > 0))
100                if (found)
101                    throw new MalformedHTMLException(
102                        "The page you have provided has multiple <BASE HREF=URL> definitions.  " +
103                        "However, the HTML Specifications state that pages may provide just one " +
104                        "definition.  If you wish to proceed, retrieve the definitions manually " +
105                        "using class TagNodeFind.all and Attributes.retrieve, as explained in " +
106                        "the JavaDoc pages for this class."
107                    );
108                else 
109                {
110                    found = true;
111                    ret = url;
112                }
113
114        return new URL(ret);                    
115    }
116
117
118    // ********************************************************************************************
119    // ********************************************************************************************
120    // Complete Vector-Resolve Methods - SRC-ATTRIBUTE
121    // ********************************************************************************************
122    // ********************************************************************************************
123
124
125    /**
126     * Convenience Method.
127     * <BR />Invokes: {@link #resolveAllSRC(Vector, int, int, URL, SD, boolean)}
128     */
129    public static Ret3<int[], int[], int[]> resolveAllSRC(
130            Vector<? super TagNode> html, URL sourcePage, SD quote,
131            boolean askForReturnArraysOrReturnNull
132        )
133    { return resolveAllSRC(html, 0, -1, sourcePage, quote, askForReturnArraysOrReturnNull); }
134
135    /**
136     * Convenience Method.
137     * <BR />Accepts: {@code DotPair}.
138     * <BR />Invokes: {@link #resolveAllSRC(Vector, int, int, URL, SD, boolean)}
139     */
140    public static Ret3<int[], int[], int[]> resolveAllSRC(
141            Vector<? super TagNode> html, DotPair dp, URL sourcePage, SD quote,
142            boolean askForReturnArraysOrReturnNull
143        )
144    {
145        return resolveAllSRC
146            (html, dp.start, dp.end + 1, sourcePage, quote, askForReturnArraysOrReturnNull);
147    }
148
149    /**
150     * This method shall resolve all partial {@code URL} addresses that are found within
151     * {@code TagNode} elements having {@code 'SRC=...'} attributes.  Each instance of
152     * {@code TagNode} found in the input HTML {@code Vector} that has an {@code 'SRC'}
153     * attribute - if the {@code 'URL'} is only partially resolve - shall be updated and replaced
154     * with a new {@code TagNode} with a fully resolved {@code URL}.
155     * 
156     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
157     * 
158     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
159     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
160     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
161     * 
162     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
163     * (possibly-relative) {@code URL's} in the HTML-{@code Vector} will be resolved.
164     * 
165     * @param quote A choice for the quotes to use.  In most cases, {@code URL} attribute
166     * <B STYLE="color: red;">values</B> do not contain quotation-marks.  So likely either
167     * choice would work just fine, without exceptions.
168     * 
169     * <BR /><BR /><B>NOTE:</B> <I>null may be passed to this parameter</I>, and if it is
170     * the original quotation marks found in the {@code TagNode's 'SRC'} attribute will be
171     * reused.  Passing null to this parameter should almost always be easiest, safest.
172     * 
173     * @param askForReturnArraysOrReturnNull This (long-named) parameter is merely here to
174     * facilitate retrieving more information from this method - <I>if necessary</I>.  When this
175     * parameter receives the following values:
176     * 
177     * <BR /><BR /><UL CLASS=JDUL>
178     * <LI> <B>TRUE:</B> Three integer {@code int[]} arrays will be returned as listed in the
179     *      <B>{@code Returns:}</B> section of this method's documentation.
180     *      </LI>
181     * 
182     * <LI><B>FALSE:</B> This method shall return null.</LI>
183     * </UL>
184     * 
185     * @return If input parameter {@code 'askForReturnArraysOrReturnNull'} has been passed 
186     * {@code FALSE}, this method shall return null.  Otherwise, (if passed {@code TRUE}), then
187     * this method shall return an instance of {@code 'Ret3<int[], int[], int[]>'} - which is
188     * <I>returning three separate integer-arrays about what was found, and what has occurred.</I>
189     *
190     * <BR /><BR />
191     * Three arrays are returned as a result of this method's invocation.  Keep in mind that
192     * though the information might be superfluous, rejecting these arrays away is easy.
193     * They are provided as a matter of convenience for cases where more details information is
194     * mandatory for ensuring that long lists of {@code HTMLNode's} were properly updated.
195     * 
196     * <BR /><BR /><OL CLASS=JDOL>
197     * <LI> {@code Ret3.a (int[])}
198     *      <BR /><BR />
199     *      The first {@code int[] array} shall contain a list of the index of every
200     *      {@code TagNode} in the input-{@code Vector} parameter's range that <B><I>contained</B>
201     *      </I> a non-null HTML {@code 'SRC'} Attribute.
202     *      <BR /><BR />
203     *      </LI>
204     * 
205     * <LI> {@code Ret3.b (int[])}
206     *      <BR /><BR />
207     *      The second {@code int[] array} will contain an index-list of the indices
208     *      which contained {@code TagNode's} that were <B><I>replaced</I></B> by the
209     *      internal-resolve logic.
210     *      <BR /><BR />
211     *      </LI>
212     * 
213     * <LI> {@code Ret3.c (int[])}
214     *      <BR /><BR />
215     *      The third {@code int[] array} will contain an index-list of the indices
216     *      which contained {@code TagNode's} whose {@code 'SRC=...'} attribute
217     *      <I><B>failed</I></B> to be resolved by the internal-resolve logic, <I>or</I> caused a
218     *      {@code QuotesException} to throw.
219     *      </LI>
220     * </OL>
221     * 
222     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
223     * 
224     * @see #resolve(String, URL)
225     * @see TagNode#AV(String)
226     * @see TagNode#setAV(String, String, SD)
227     */
228    public static Ret3<int[], int[], int[]> resolveAllSRC(
229            Vector<? super TagNode> html, int sPos, int ePos, URL sourcePage, SD quote,
230            boolean askForReturnArraysOrReturnNull
231        )
232    {
233        // Retrieve the Vector-location of any TagNode on the page that has
234        // a "SRC=..." attribute.  These are almost always HTML <IMG> elements.
235        // NOTE: FIND Method's are "READ ONLY" - the Cast will make no difference at run-time.
236        //       The @SuppressWarnings is to overcome the cast of 'html'
237
238        @SuppressWarnings("unchecked")
239        int[] hasSrcPosArr = InnerTagFind.all((Vector<HTMLNode>) html, sPos, ePos, "src");
240
241        // Java Stream's are convenient for keeping "Growing Lists" of return values.
242        // This builder shall keep a list of all URL's that failed to update - for any reason
243        // **UNLESS** the reason is that the URL was already a fully-resolved, non-partial URL
244
245        IntStream.Builder failedUpdate = askForReturnArraysOrReturnNull
246            ? IntStream.builder() 
247            : null;
248
249        // This stream will keep a list of all URL's that were updated, and whose TagNode's
250        // were replaced inside the input HTML Vector
251
252        IntStream.Builder replaced = askForReturnArraysOrReturnNull
253            ? IntStream.builder()
254            : null;
255
256        for (int pos : hasSrcPosArr)
257        {
258            // Get the node at the index
259            TagNode tn = (TagNode) html.elementAt(pos);
260
261            // 1) Retrieve the SRC Attribute
262            // 2) if it is a partial-URL resolve it
263            // 3) Convert to a String
264
265            String  oldURL = tn.AV("src");
266            URL     newURL = resolve(oldURL, sourcePage);
267
268            // Some URL's cannot be resolved, if so, just skip this TagNode.
269            // Log the index to the stream (if requested), and continue.
270
271            if (newURL == null)
272            { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
273
274            // If the URL was already a fully-resolved-URL, continue - don't replace the TagNode;
275            // No logging needed here, the URL was *already* resolved...
276
277            if (oldURL.length() == newURL.toString().length()) continue;
278
279            // Replace the SRC Attribute in the TagNode.  This builds a new instance of TagNode
280            // If there is an exception, log the index to the stream (if requested), and continue.
281
282            try
283                { tn = tn.setAV("src", newURL.toString(), quote); }
284
285            catch (QuotesException qex)
286                { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
287
288            // Replace the index in the Vector containing the old TagNode with the new one.
289            html.setElementAt(tn , pos);
290
291            // The Vector-Index at this position had it's old TagNode removed and replaced with a
292            // new updated one.  Log this to the stream-list so to allow the user to know.
293
294            if (askForReturnArraysOrReturnNull) replaced.accept(pos);
295        }
296
297        return askForReturnArraysOrReturnNull
298
299            ? new Ret3<int[], int[], int[]>
300                (hasSrcPosArr, replaced.build().toArray(), failedUpdate.build().toArray())
301            : null;
302    }
303
304
305    // ********************************************************************************************
306    // ********************************************************************************************
307    // Complete Vector-Resolve Methods - HREF-ATTRIBUTE
308    // ********************************************************************************************
309    // ********************************************************************************************
310
311
312    /**
313     * Convenience Method.
314     * <BR />Invokes: {@link #resolveAllHREF(Vector, int, int, URL, SD, boolean)}
315     */
316    public static Ret3<int[], int[], int[]> resolveAllHREF(
317            Vector<? super TagNode> html, URL sourcePage, SD quote,
318            boolean askForReturnArraysOrReturnNull
319        )
320    { return resolveAllHREF(html, 0, -1, sourcePage, quote, askForReturnArraysOrReturnNull); }
321
322    /**
323     * Convenience Method.
324     * <BR />Accepts: {@code DotPair}.
325     * <BR />Invokes: {@link #resolveAllHREF(Vector, int, int, URL, SD, boolean)}
326     */
327    public static Ret3<int[], int[], int[]> resolveAllHREF(
328            Vector<? super TagNode> html, DotPair dp, URL sourcePage, SD quote,
329            boolean askForReturnArraysOrReturnNull
330        )
331    {
332        return resolveAllHREF
333            (html, dp.start, dp.end + 1, sourcePage, quote, askForReturnArraysOrReturnNull); 
334    }
335
336    /**
337     * This method shall resolve all partial {@code URL} addresses that are found within
338     * {@code TagNode} elements having {@code 'HREF=...'} attributes.  Each instance of
339     * {@code TagNode} found in the input HTML {@code Vector} that has an {@code 'HREF'}
340     * attribute - if the {@code 'URL'} is only partially resolve - shall be updated and replaced
341     * with a new {@code TagNode} with a fully resolved {@code URL}.
342     * 
343     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
344     * 
345     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVECSUP>
346     * @param sPos <EMBED CLASS='external-html' DATA-FILE-ID=SPOSVEC>
347     * @param ePos <EMBED CLASS='external-html' DATA-FILE-ID=EPOSVEC>
348     * 
349     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
350     * (possibly-relative) {@code URL's} in the HTML-{@code Vector} will be resolved.
351     * 
352     * @param quote A choice for the quotes to use.  In most cases, {@code URL} attribute
353     * <B STYLE="color: red;">values</B> do not contain quotation-marks.  So likely either
354     * choice would work just fine, without exceptions.
355     * 
356     * <BR /><BR /><B>NOTE:</B> <I>null may be passed to this parameter</I>, and if it is
357     * the original quotation marks found in the {@code TagNode's 'HREF'} attribute will be
358     * reused.  Passing null to this parameter should almost always be easiest, safest.
359     * 
360     * @param askForReturnArraysOrReturnNull This (long-named) parameter is merely here to
361     * facilitate retrieving more information from this method - <I>if necessary</I>.  When this
362     * parameter receives the following values:
363     * 
364     * <BR /><BR /><UL CLASS=JDUL>
365     * <LI> <B>TRUE:</B> Three integer {@code int[]} arrays will be returned as listed in the
366     *      <B>{@code Returns:}</B> section of this method's documentation.
367     *      </LI>
368     * 
369     * <LI><B>FALSE:</B> This method shall return null. </LI>
370     * </UL>
371     * 
372     * @return If input parameter {@code 'askForReturnArraysOrReturnNull'} has been passed 
373     * {@code FALSE}, this method shall return null.  Otherwise, (if passed {@code TRUE}), then
374     * this method shall return an instance of {@code 'Ret3<int[], int[], int[]>'} - which is
375     * <I>returning three separate integer-arrays about what was found, and what has occurred.</I>
376     *
377     * <BR /><BR />
378     * Three arrays are returned as a result of this method's invocation.  Keep in mind that
379     * though the information might be superfluous, rejecting these arrays away is easy.
380     * They are provided as a matter of convenience for cases where more details information is
381     * mandatory for ensuring that long lists of {@code HTMLNode's} were properly updated.
382     * 
383     * <BR /><BR /><OL CLASS=JDOL>
384     * <LI> {@code Ret3.a (int[])}
385     *      <BR /><BR />
386     *      The first {@code int[] array} shall contain a list of the index of every
387     *      {@code TagNode} in the input-{@code Vector} parameter's range that <B><I>contained</B>
388     *      </I> a non-null HTML {@code 'HREF'} Attribute.
389     *      <BR /><BR />
390     *      </LI>
391     * 
392     * <LI> {@code Ret3.b (int[])}
393     *      <BR /><BR />
394     *      The second {@code int[] array} will contain an index-list of the indices
395     *      which contained {@code TagNode's} that were <B><I>replaced</I></B> by the
396     *      internal-resolve logic.
397     *      <BR /><BR />
398     *      </LI>
399     * 
400     * <LI> {@code Ret3.c (int[])}
401     *      <BR /><BR />
402     *      The third {@code int[] array} will contain an index-list of the indices
403     *      which contained {@code TagNode's} whose {@code 'HREF=...'} attribute
404     *      <I><B>failed</I></B> to be resolved by the internal-resolve logic, <I>or</I> caused a
405     *      {@code QuotesException} to throw.
406     *      </LI>
407     * </OL>
408     * 
409     * @throws IndexOutOfBoundsException <EMBED CLASS='external-html' DATA-FILE-ID=VIOOBEX>
410     * 
411     * @see #resolve(String, URL)
412     * @see TagNode#AV(String)
413     * @see TagNode#setAV(String, String, SD)
414     */
415    public static Ret3<int[], int[], int[]> resolveAllHREF(
416            Vector<? super TagNode> html, int sPos, int ePos, URL sourcePage, SD quote,
417            boolean askForReturnArraysOrReturnNull
418        )
419    {
420        // Retrieve the Vector-location of any TagNode on the page that has
421        // a "HREF=..." attribute.  These are almost always HTML <IMG> elements.
422        // NOTE: FIND Method's are "READ ONLY" - the Cast will make no difference at run-time.
423        //       The @SuppressWarnings is to overcome the cast of 'html'
424
425        @SuppressWarnings("unchecked")
426        int[] hasHRefPosArr = InnerTagFind.all((Vector<HTMLNode>) html, sPos, ePos, "href");
427
428        // Java Stream's are convenient for keeping "Growing Lists" of return values.
429        // This builder shall keep a list of all URL's that failed to update - for any reason
430        // **UNLESS** the reason is that the URL was already a fully-resolved, non-partial URL
431
432        IntStream.Builder failedUpdate = askForReturnArraysOrReturnNull
433            ? IntStream.builder() 
434            : null;
435
436        // This stream will keep a list of all URL's that were updated, and whose TagNode's
437        // were replaced inside the input HTML Vector
438
439        IntStream.Builder replaced = askForReturnArraysOrReturnNull
440            ? IntStream.builder()
441            : null;
442
443        for (int pos : hasHRefPosArr)
444        {
445            // Get the node at the index
446            TagNode tn = (TagNode) html.elementAt(pos);
447
448            // 1) Retrieve the HREF Attribute
449            // 2) if it is a partial-URL resolve it
450            // 3) Convert to a String
451
452            String  oldURL = tn.AV("HREF");
453            URL     newURL = resolve(oldURL, sourcePage);
454
455            // Some URL's cannot be resolved, if so, just skip this TagNode.
456            // Log the index to the stream (if requested), and continue.
457
458            if (newURL == null)
459            { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
460
461            // If the URL was already a fully-resolved-URL, continue - don't replace the TagNode;
462            // No logging needed here, the URL was *already* resolved...
463
464            if (oldURL.length() == newURL.toString().length()) continue;
465
466            // Replace the HREF Attribute in the TagNode.  This builds a new instance of TagNode
467            // If there is an exception, log the index to the stream (if requested), and continue.
468
469            try
470                { tn = tn.setAV("href", newURL.toString(), quote); }
471
472            catch (QuotesException qex)
473                { if (askForReturnArraysOrReturnNull) failedUpdate.accept(pos); continue; }
474
475            // Replace the index in the Vector containing the old TagNode with the new one.
476            html.setElementAt(tn , pos);
477
478            // The Vector-Index at this position had it's old TagNode removed and replaced with a
479            // new updated one.  Log this to the stream-list so to allow the user to know.
480
481            if (askForReturnArraysOrReturnNull) replaced.accept(pos);
482        }
483
484        return askForReturnArraysOrReturnNull
485
486            ? new Ret3<int[], int[], int[]>
487                (hasHRefPosArr, replaced.build().toArray(), failedUpdate.build().toArray())
488            : null;
489    }
490
491
492    // ********************************************************************************************
493    // ********************************************************************************************
494    // Resolve, Not Keep Exceptions
495    // ********************************************************************************************
496    // ********************************************************************************************
497
498
499    /**
500     * Convenience Method.
501     * <BR />Invokes: {@link #resolveHREF(TagNode, URL)}.
502     * <BR />And-Then: {@link TagNode#setAV(String, String, SD)}
503     */
504    public static TagNode resolveHREFAndUpdate(TagNode tnWithHREF, URL sourcePage)
505    { 
506        URL url = resolveHREF(tnWithHREF, sourcePage);
507
508        return (url == null)
509            ? null
510            : tnWithHREF.setAV("href", url.toString(), null);
511    }
512
513
514    /**
515     * This should be used for {@code TagNode's} that contain an {@code 'HREF'} inner-tag
516     * (attribute).
517     * 
518     * @param tnWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_HREF>
519     * 
520     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode}
521     * (possibly-relative) {@code URL} will be resolved.
522     * 
523     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or
524     * directory.  Null is returned if attempting to build the {@code URL} generated a
525     * {@code MalformedURLException}.
526     * 
527     * <BR /><BR /><B STYLE="color: red;">SPECIFICALLY:</B> This method shall catch all 
528     * {@code MalformedURLException's}.
529     * 
530     * @throws HREFException If the {@code TagNode} passed to parameter {@code 'tnWithHREF'} does
531     * not actually contain an {@code HREF} attribute, then this exception shall throw.
532     * 
533     * @see #resolve(String, URL)
534     * @see TagNode#AV(String)
535     */
536    public static URL resolveHREF(TagNode tnWithHREF, URL sourcePage)
537    {
538        String href = tnWithHREF.AV("href");
539
540        if (href == null) throw new HREFException(
541            "The TagNode passed to parameter tnWithHREF does not actually contain an " +
542            "HREF attribute."
543        );
544
545        return resolve(href, sourcePage);
546    }
547
548
549    /**
550     * Convenience Method.
551     * <BR />Invokes: {@link #resolveSRC(TagNode, URL)} 
552     * <BR />And-Then: {@link TagNode#setAV(String, String, SD)}
553     */
554    public static TagNode resolveSRCAndUpdate(TagNode tnWithSRC, URL sourcePage)
555    { 
556        URL url = resolveSRC(tnWithSRC, sourcePage);
557
558        return (url == null) 
559            ? null 
560            : tnWithSRC.setAV("src", url.toString(), null);
561    }
562
563
564    /**
565     * This should be used for {@code TagNode's} that contain a {@code 'SRC'} inner-tag
566     * (attribute).
567     * 
568     * @param tnWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_SRC>
569     * 
570     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode}
571     * (possibly-relative) {@code URL} will be resolved.
572     * 
573     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or
574     * directory.  Null is returned if attempting to build the {@code URL} generated a
575     * {@code MalformedURLException}.
576     * 
577     * <BR /><BR /><B STYLE="color: red;">SPECIFICALLY:</B> This method shall catch all 
578     * {@code MalformedURLException's}.
579     * 
580     * @throws SRCException If the {@code TagNode} passed to parameter {@code 'tnWithSRC'} does not
581     * actually contain a {@code SRC} attribute, then this exception shall throw.
582     * 
583     * @see #resolve(String, URL)
584     * @see TagNode#AV(String)
585     */
586    public static URL resolveSRC(TagNode tnWithSRC, URL sourcePage)
587    {
588        String src = tnWithSRC.AV("src");
589
590        if (src == null) throw new SRCException(
591            "The TagNode passed to parameter tnWithSRC does not actually contain a " +
592            "SRC attribute."
593        );
594
595        return resolve(src, sourcePage);
596    }
597
598    /**
599     * This should be used for lists of {@code TagNode's}, each of which contain an {@code 'HREF'}
600     * inner-tag (attribute).
601     * 
602     * @param tnListWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_HREF>
603     * 
604     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
605     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
606     * 
607     * @return A list of {@code URL's}, each of which have been completed/resolved with the 
608     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
609     * result in a null value in the {@code Vector}.
610     * 
611     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
612     * 
613     * @see #resolve(String, URL)
614     * @see TagNode#AV(String)
615     */
616    public static Vector<URL> resolveHREFs(Iterable<TagNode> tnListWithHREF, URL sourcePage)
617    {
618        Vector<URL> ret = new Vector<>();
619
620        for (TagNode tn : tnListWithHREF) ret.addElement(resolve(tn.AV("href"), sourcePage));
621
622        return ret;
623    }
624
625
626    /**
627     * This should be used for lists of {@code TagNode's}, each of which contain a {@code 'SRC'}
628     * inner-tag (attribute).
629     * 
630     * @param tnListWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_SRC>
631     * 
632     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
633     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
634     * 
635     * @return A list of {@code URL's}, each of which have been completed/resolved with the
636     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
637     * result in a null value in the {@code Vector.}
638     * 
639     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
640     * 
641     * @see #resolve(String, URL)
642     * @see TagNode#AV(String)
643     */
644    public static Vector<URL> resolveSRCs(Iterable<TagNode> tnListWithSRC, URL sourcePage)
645    {
646        Vector<URL> ret = new Vector<>();
647
648        for (TagNode tn : tnListWithSRC) ret.addElement(resolve(tn.AV("src"), sourcePage));
649
650        return ret;
651    }
652
653
654    /**
655     * This will use a "pointer array" - an array containing indexes into the downloaded page to
656     * retrieve {@code TagNode's}.  The {@code TagNode's} to which this pointer-array points -
657     * must each contain an {@code HREF} inner-tag with a {@code URL}, or a partial {@code URL}.
658     * 
659     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
660     * 
661     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC>
662     * 
663     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
664     * reference {@code TagNode's} that contain {@code HREF} attributes.  Integer-pointer Arrays
665     * are usually returned from the {@code package 'NodeSearch'} "Find" methods.
666     *
667     * <DIV CLASS="EXAMPLE">{@code 
668     * // Retrieve 'pointers' to all the '<A HREF=...>' TagNode's.  The term 'pointer' refers to
669     * // integer-indices into the vectorized-html variable 'page'
670     * int[] anchorPosArr = TagNodeFind.all(page, TC.OpeningTags, "a");
671     * 
672     * // Extract each HREF inner-tag, and construct a {@code URL}.  Use the 'sourcePage' parameter
673     * // if the URL is only partially-resolved
674     * Vector<URL> urls = Links.resolveHREFs(page, anchorPosArr, mySourcePage);
675     * }</DIV>
676     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
677     * {@code "<A ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
678     * {@code 'html'}, and then resolve any shortened {@code URL's}. 
679     *
680     * @param sourcePage This is the source page {@code URL} from whence the (possibly relative)
681     * {@code TagNode URL's} in the {@code Vector} are to be resolved.
682     *
683     * @return A list of {@code URL's}, each of which have been completed/resolved with the
684     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
685     * result in a null value in the {@code Vector}.  However, if any of the nodes pointed to by
686     * the {@code 'nodePosArr'} parameter do not contain opening {@code TagNode} elements, then
687     * this mistake shall generate {@code TagNodeExpectedException's}.
688     *
689     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
690     *
691     * @throws ArrayIndexOutOfBoundsException
692     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
693     * @throws OpeningTagNodeExpectedException
694     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
695     * 
696     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
697     *
698     * @see #resolve(String, URL)
699     * @see TagNode#AV(String)
700     */
701    public static Vector<URL> resolveHREFs
702        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
703    {
704        // Return Vector
705        Vector<URL> ret = new Vector<>();
706
707        for (int nodePos : nodePosArr)
708        {
709            HTMLNode n = html.elementAt(nodePos);
710
711            // Must be an HTML TagNode
712            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
713
714            TagNode tn = (TagNode) n;
715
716            // Must be an "Opening" HTML TagNode
717            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
718
719            // Resolve the 'HREF', save the URL
720            ret.addElement(resolve(tn.AV("href"), sourcePage));
721        }
722
723        return ret;
724    }
725 
726
727    /**
728     * This will use a "pointer array" - an array containing indexes into the downloaded page to
729     * retrieve {@code TagNode's}.  The {@code TagNode's} to which this pointer-array points - must
730     * each contain a {@code SRC} inner-tag with a {@code URL}, or a partial {@code URL}.
731     * 
732     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
733     *
734     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> Any HTML page (or sub-page)
735     * 
736     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
737     * reference {@code TagNode's} that contain {@code SRC} attributes.  Integer-pointer Arrays are
738     * usually returned from the {@code package 'NodeSearch'} "Find" methods.
739     *
740     * <DIV CLASS="EXAMPLE">{@code 
741     * // Retrieve 'pointers' to all the '<IMG SRC=...>' TagNode's.  The term 'pointer' refers to
742     * // integer-indices into the vectorized-html variable 'page'
743     * 
744     * int[] picturePosArr = TagNodeFind.all(page, TC.OpeningTags, "img");
745     * 
746     * // Extract each SRC inner-tag, and construct a {@code URL}.  Use the 'sourcePage' parameter
747     * // if the URL is only partially-resolved
748     * 
749     * Vector<URL> urls = Links.resolveSRCs(page, picturePosArr, mySourcePage);
750     * }</DIV>
751     * 
752     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
753     * {@code "<IMG ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
754     * {@code 'html'}, and then resolve any shorted image {@code URL's}.
755     *
756     * @param sourcePage This is the source page {@code URL} from whence the (possibly relative)
757     * {@code TagNode URL's} in the {@code Vector} are to be resolved.
758     *
759     * @return A list of {@code URL's}, each of which have been completed/resolved with the
760     * {@code 'sourcePage'} parameter.  Any {@code TagNode} which generated an exception, will
761     * result in a null value in the {@code Vector}.  However, if any of the nodes pointed to by
762     * the {@code 'nodePosArr'} parameter do not contain opening {@code TagNode} elements, then
763     * this mistake shall generate {@code TagNodeExpectedException's}.
764     *
765     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
766     *
767     * @throws ArrayIndexOutOfBoundsException
768     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
769     * @throws OpeningTagNodeExpectedException
770     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
771     * 
772     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
773     *
774     * @see #resolve(String, URL)
775     * @see TagNode#AV(String)
776     */
777    public static Vector<URL> resolveSRCs
778        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
779    {
780        // Return Vector
781        Vector<URL> ret = new Vector<>();
782
783        for (int nodePos : nodePosArr)
784        {
785            HTMLNode n = html.elementAt(nodePos);
786
787            // Must be an HTML TagNode
788            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
789
790            TagNode tn = (TagNode) n;
791
792            // Must be an "Opening" HTML TagNode
793            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
794
795            // Resolve the "SRC", save the URL
796            ret.addElement(resolve(tn.AV("src"), sourcePage));
797        }
798
799        return ret;
800    }
801
802
803    /**
804     * This will convert <I><B>a list of </B></I> simple java {@code String's} to a
805     * list/{@code Vector} of {@code URL's}, de-referencing any missing information using the
806     * {@code 'sourcePage'} parameter.
807     * 
808     * @param src a list of strings - usually partially or totally completed Internet {@code URL's}
809     * 
810     * @param sourcePage This is the source page {@code URL} from which the {@code String's}
811     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
812     * 
813     * @return A list of {@code URL's}, each of which have been completed/resolved with the
814     * {@code 'sourcePage'} parameter.  If there were any {@code String's} that were zero-length or
815     * null,  then null is returned in the related {@code Vector} position.  If any
816     * {@code TagNode} causes a {@code MalformedURLException}, then that position in the
817     * {@code Vector} will be null.
818     * 
819     * @see #resolve(String, URL)
820     */
821    public static Vector<URL> resolve(Vector<String> src, URL sourcePage)
822    {
823        Vector<URL> ret = new Vector<>();
824
825        for (String s : src) ret.addElement(resolve(s, sourcePage));
826
827        return ret;
828    }
829
830    /**
831     * This will convert a simple java {@code String} to a {@code URL}, de-referencing any missing
832     * information using the {@code 'sourcePage'} parameter.
833     * 
834     * @param src Any java {@code String}, usually one which was scraped from an HTML-Page, and
835     * needs to be "completed."
836     * 
837     * @param sourcePage This is the source page {@code URL} from which the String
838     * (possibly-relative) {@code URL} will be resolved.
839     * 
840     * @return A {@code URL}, which has been completed/resolved with the {@code 'sourcePage'}
841     * parameter. If parameter {@code 'src'} is null or zero-length, then this method will also
842     * return null.  If a {@code MalformedURLException} is generated, null will also be returned.
843     */
844    public static URL resolve(String src, URL sourcePage)
845    {
846        if (sourcePage == null) throw new NullPointerException(
847            "Though you may provide null to the partial-URL to dereference parameter, null " +
848            "may not be passed to the Source-Page Parameter.  The purpose of the 'resolve' " +
849            "operation is to resolve partial-URLs against a source-page (root) URL. " +
850            "Therefore this is not allowed."
851        );
852
853        if (src == null) return null;
854
855        src = src.trim();
856
857        if (src.length() == 0) return null;
858
859        String srcLC = src.toLowerCase();
860
861        if (StrCmpr.startsWithXOR(srcLC, _NON_URL_HREFS)) return null;
862
863        if (srcLC.startsWith("http://") || srcLC.startsWith("https://"))
864
865            try
866                { return new URL(src); }
867
868            catch (MalformedURLException e) { return null; }
869
870        if (src.startsWith("//") && (src.charAt(3) != '/'))
871
872            try
873                { return new URL(sourcePage.getProtocol().toLowerCase() + ":" + src); }
874
875            catch (MalformedURLException e) { return null; }
876        
877        if (src.startsWith("/"))
878
879            try
880            { 
881                return new URL(
882                    sourcePage.getProtocol().toLowerCase() + "://" +
883                    sourcePage.getHost().toLowerCase() +
884                    src
885                );
886            }
887
888            catch (MalformedURLException e) { return null; }
889 
890        if (src.startsWith("../"))
891        {
892            String  sourcePageStr   = sourcePage.toString();
893            short   nLevels         = 0;
894
895            do      { nLevels++;  src = src.substring(3); }
896            while   (src.startsWith("../"));
897
898            String  directory = StringParse.dotDotParentDirectory(sourcePage.toString(), nLevels);
899
900            try     { return new URL(directory + src); }
901            catch   (Exception e) { return null; }
902        }
903
904        String  root =
905            sourcePage.getProtocol().toLowerCase() + "://" + 
906            sourcePage.getHost().toLowerCase();
907
908        String  path    = sourcePage.getPath().trim();
909        int     pos     = StringParse.findLastFrontSlashPos(path);
910
911        if (pos == -1) throw new StringIndexOutOfBoundsException(
912            "The URL you have provided: " + sourcePage.toString() + " does not have a '/' " +
913            "front-slash character in it's path.  Cannot proceed resolving relative-URL's " +
914            "without this."
915        );
916
917        path = path.substring(0, pos + 1);
918
919        try     { return new URL(root + path + src); }
920        catch   (MalformedURLException e) { return null; }
921    }
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943    // ********************************************************************************************
944    // ********************************************************************************************
945    // Resolve, KE - Keep Exceptions
946    // ********************************************************************************************
947    // ********************************************************************************************
948
949
950    /**
951     * This should be used for {@code TagNode's} that contain an {@code 'HREF'} inner-tag
952     * (attribute).
953     * 
954     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
955     * 
956     * @param tnWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_HREF>
957     * 
958     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
959     * (possibly-relative) {@code URL} will be resolved.
960     * 
961     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or 
962     * directory.  If there were no {@code HREF} tag, then null is returned.  If
963     * the {@code TagNode} causes a {@code MalformedURLException}, that is returned in
964     * {@code Ret2.b}
965     * 
966     * <BR /><BR /><B STYLE="color: red;">SPECIFICALLY:</B> This method shall catch all 
967     * {@code MalformedURLException's}.
968     * 
969     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
970     * 
971     * @throws HREFException If the {@code TagNode} passed to parameter {@code 'tnWithHREF'} does
972     * not actually contain an {@code HREF} attribute, then this exception shall throw.
973     * 
974     * @see #resolve_KE(String, URL)
975     * @see TagNode#AV(String)
976     * @see Ret2
977     */
978    public static Ret2<URL, MalformedURLException> resolveHREF_KE
979        (TagNode tnWithHREF, URL sourcePage)
980    {
981        String href = tnWithHREF.AV("href");
982
983        if (href == null) throw new HREFException(
984            "The TagNode passed to parameter tnWithHREF does not actually contain an " +
985            "HREF attribute."
986        );
987
988        return resolve_KE(href, sourcePage);
989    }
990
991
992    /**
993     * This should be used for {@code TagNode's} that contain a {@code 'SRC'} inner-tag
994     * (attribute).
995     * 
996     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
997     * 
998     * @param tnWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TN_SRC>
999     * 
1000     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1001     * (possibly-relative) {@code URL} will be resolved.
1002     * 
1003     * @return A complete-{@code URL} without any missing "presumed data" - such as host/domain or
1004     * directory.  If there were no {@code SRC} tag, then null is returned.  If the
1005     * {@code TagNode} causes a {@code MalformedURLException}, that is returned in {@code Ret2.b}
1006     * 
1007     * <BR /><BR /><B STYLE="color: red;">SPECIFICALLY:</B> This method shall catch all 
1008     * {@code MalformedURLException's}.
1009     *
1010     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1011     * 
1012     * @throws SRCException If the {@code TagNode} passed to parameter {@code 'tnWithSRC'} does not
1013     * actually contain a {@code SRC} attribute, then this exception shall throw.
1014     * 
1015     * @see #resolve_KE(String, URL)
1016     * @see TagNode#AV(String)
1017     * @see Ret2
1018     */
1019    public static Ret2<URL, MalformedURLException> resolveSRC_KE
1020        (TagNode tnWithSRC, URL sourcePage)
1021    {
1022        String src = tnWithSRC.AV("src");
1023
1024        if (src == null) throw new SRCException(
1025            "The TagNode passed to parameter tnWithSRC does not actually contain a " +
1026            "SRC attribute."
1027        );
1028
1029        return resolve_KE(src, sourcePage);
1030    }
1031
1032
1033    /**
1034     * This should be used for lists of {@code TagNode's}, each of which contain an {@code 'HREF'}
1035     * inner-tag (attribute).
1036     * 
1037     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1038     * 
1039     * @param tnListWithHREF <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_HREF>
1040     * 
1041     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's} 
1042     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
1043     * 
1044     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1045     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code HREF} tag,
1046     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1047     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1048     * exception in {@code Ret2.b}
1049     * 
1050     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
1051     *
1052     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1053     * 
1054     * @see #resolve_KE(String, URL)
1055     * @see TagNode#AV(String)
1056     * @see Ret2
1057     */
1058    public static Vector<Ret2<URL, MalformedURLException>> resolveHREFs_KE
1059        (Iterable<TagNode> tnListWithHREF, URL sourcePage)
1060    {
1061        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1062
1063        for (TagNode tn : tnListWithHREF) ret.addElement(resolve_KE(tn.AV("href"), sourcePage));
1064
1065        return ret;
1066    }
1067
1068
1069    /**
1070     * This should be used for lists of {@code TagNode's}, each of which contain a {@code 'SRC'}
1071     * inner-tag (attribute).
1072     * 
1073     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1074     * 
1075     * @param tnListWithSRC <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_TNLIST_SRC>
1076     * 
1077     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1078     * (possibly-relative) {@code URL's} in the {@code Iterable} will be resolved.
1079     * 
1080     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1081     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code SRC} tag,
1082     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1083     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1084     * exception in {@code Ret2.b}
1085     * 
1086     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
1087     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1088     * 
1089     * @see #resolve_KE(String, URL)
1090     * @see TagNode#AV(String)
1091     * @see Ret2
1092     */
1093    public static Vector<Ret2<URL, MalformedURLException>> resolveSRCs_KE
1094        (Iterable<TagNode> tnListWithSRC, URL sourcePage)
1095    {
1096        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1097
1098        for (TagNode tn : tnListWithSRC) ret.addElement(resolve_KE(tn.AV("src"), sourcePage));
1099
1100        return ret;
1101    }
1102
1103
1104    /**
1105     * This will use a "pointer array" - an array containing indexes into the downloaded page to
1106     * retrieve {@code TagNode's}.  The {@code TagNode} to which this pointer-array points - must
1107     * contain {@code HREF} inner-tags with {@code URL's}, or partial {@code URL's}.
1108     * 
1109     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
1110     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1111     * 
1112     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> Any HTML page (or sub-page)
1113     * 
1114     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
1115     * reference {@code TagNode's} that contain {@code HREF} attributes.  Integer-pointer Arrays
1116     * are usually return from the {@code package 'NodeSearch'} "Find" methods.
1117     *
1118     * <DIV CLASS="EXAMPLE">{@code 
1119     * // Retrieve 'pointers' to all the '<A HREF=...>' TagNode's.  The term 'pointer' refers to
1120     * // integer-indices into the vectorized-html variable 'page'
1121     * 
1122     * int[] anchorPosArr = TagNodeFind.all(page, TC.OpeningTags, "a");
1123     * 
1124     * // Extract each HREF inner-tag, and construct a URL.  Use the 'sourcePage' parameter if
1125     * // the URL is only partially-resolved.  If any URL's on the original-page are invalid, the
1126     * // method shall not crash, but save the exception instead.
1127     * 
1128     * Vector<Ret2<URL, MalformedURLException> urlsWithEx =
1129     *     Links.resolveHREFs_KE(page, picturePosArr, mySourcePage);
1130     *
1131     * // Print out any "failed" urls
1132     * for (Ret2<URL, MalformedURLException> r : urlsWithEx)
1133     *     if (r.b != null) 
1134     *         System.out.println("There was an exception: " + r.b.toString());
1135     * }</DIV>
1136     *
1137     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
1138     * {@code "<A ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
1139     * {@code 'html'}., and then resolve any shortened {@code URL's}.
1140     *
1141     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1142     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
1143     * 
1144     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1145     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code HREF} tag,
1146     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1147     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1148     * exception in {@code Ret2.b}
1149     *
1150     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_HREF>
1151     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1152     *
1153     * @throws ArrayIndexOutOfBoundsException
1154     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
1155     * @throws OpeningTagNodeExpectedException
1156     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
1157     * 
1158     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
1159     *
1160     * @see #resolve_KE(String, URL)
1161     * @see TagNode#AV(String)
1162     * @see Ret2
1163     */
1164    public static Vector<Ret2<URL, MalformedURLException>> resolveHREFs_KE
1165        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
1166    {
1167         // Return Vector
1168        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1169
1170        for (int nodePos : nodePosArr)
1171        {
1172            HTMLNode n = html.elementAt(nodePos);
1173
1174            // Must be an HTML TagNode
1175            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
1176
1177            TagNode tn = (TagNode) n;
1178
1179            // Must be an "Opening" HTML TagNode
1180            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
1181
1182            // Resolve the "HREF", keep the URL
1183            ret.addElement(resolve_KE(tn.AV("href"), sourcePage));
1184        }
1185
1186        return ret;
1187    }
1188 
1189    /**
1190     * This will use a "pointer array" - an array containing indexes into the downloaded page to
1191     * retrieve {@code TagNode's}.  The {@code TagNode} to which this pointer-array points - must 
1192     * contain {@code SRC} inner-tags with {@code URL's}, or partial {@code URL's}.
1193     * 
1194     * <EMBED CLASS='external-html' DATA-FILE-ID=BASE_URL_NOTE>
1195     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1196     *
1197     * @param html <EMBED CLASS='external-html' DATA-FILE-ID=HTMLVEC> Any HTML page (or sub-page)
1198     * 
1199     * @param nodePosArr An array of pointers into the page or sub-page.  The pointers must
1200     * reference {@code TagNode's} that contain {@code SRC} attributes.  Integer-pointer Arrays are
1201     * usually return from the {@code package 'NodeSearch'} "Find" methods.
1202     *
1203     * <DIV CLASS="EXAMPLE">{@code 
1204     * // Retrieve 'pointers' to all the '<IMG SRC=...>' TagNode's.  The term 'pointer' refers to
1205     * // integer-indices into the vectorized-html variable 'page'
1206     * 
1207     * int[] picturePosArr = TagNodeFind.all(page, TC.OpeningTags, "img");
1208     * 
1209     * // Extract each SRC inner-tag, and construct a URL.  Use the 'sourcePage' parameter if
1210     * // the URL is only partially-resolved.  If any URL's on the original-page are invalid,
1211     * // the method shall not crash, but save the exception instead.
1212     * 
1213     * Vector<Ret2<URL, MalformedURLException> urlsWithEx =
1214     *      Links.resolveSRCs_KE(page, picturePosArr, mySourcePage);
1215     *
1216     * // Print out any "failed" urls
1217     * for (Ret2<URL, MalformedURLException> r : urlsWithEx)
1218     *     if (r.b != null) 
1219     *         System.out.println("There was an exception: " + r.b.toString());
1220     * }</DIV>
1221     *
1222     * <BR /><I>which would obtain a pointer-array / (a.k.a. a "vector-index-array") to every HTML
1223     * {@code "<IMG ...>"} element</I> that was available in the HTML page-{@code Vector} parameter
1224     * {@code 'html'}, and then resolve any shortened {@code URL's}.
1225     *
1226     * @param sourcePage This is the source page {@code URL} from which the {@code TagNode's}
1227     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
1228     *
1229     * @return A list of {@code URL's}, each of which have been completed/resolved with the 
1230     * {@code 'sourcePage'} parameter.  If there were any {@code TagNode} with no {@code SRC} tag,
1231     * then null is returned in the related {@code Vector} position.  If any {@code TagNode} causes
1232     * a {@code MalformedURLException}, then that position in the {@code Vector} will contain the
1233     * exception in {@code Ret2.b}
1234     *
1235     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_NO_SRC>
1236     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1237     *
1238     * @throws ArrayIndexOutOfBoundsException
1239     * <EMBED CLASS='external-html' DATA-FILE-ID=ATTR_AIOOB_EX>
1240     * @throws OpeningTagNodeExpectedException
1241     * <EMBED CLASS='external-html' DATA-FILE-ID=OPEN_TNE_EX>
1242     * 
1243     * @throws TagNodeExpectedException <EMBED CLASS='external-html' DATA-FILE-ID=TNE_EX>
1244     *
1245     * @see #resolve_KE(String, URL)
1246     * @see TagNode#AV(String)
1247     * @see Ret2
1248     */
1249    public static Vector<Ret2<URL, MalformedURLException>> resolveSRCs_KE
1250        (Vector<? extends HTMLNode> html, int[] nodePosArr, URL sourcePage)
1251    {
1252         // Return Vector
1253        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();                                         
1254
1255        for (int nodePos : nodePosArr)
1256        {
1257            HTMLNode n = html.elementAt(nodePos);
1258
1259            // Must be an HTML TagNode
1260            if (! n.isTagNode()) throw new TagNodeExpectedException(nodePos);
1261
1262            TagNode tn = (TagNode) n;
1263
1264            // Must be an "Opening" HTML TagNode
1265            if (tn.isClosing) throw new OpeningTagNodeExpectedException(nodePos);
1266
1267            // Resolve "SRC" and keep URL's
1268            ret.addElement(resolve_KE(tn.AV("src"), sourcePage));
1269        }
1270
1271        return ret;
1272    }
1273
1274    /**
1275     * Resolve all {@code URL's}, represented as {@code String's}, inside of a {@code Vector}.
1276     * 
1277     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1278     * 
1279     * @param src a list of {@code String's} - usually partially or totally completed Internet
1280     * {@code URL's}
1281     * 
1282     * @param sourcePage This is the source page {@code URL} from which the {@code String's}
1283     * (possibly-relative) {@code URL's} in the {@code Vector} will be resolved.
1284     * 
1285     * @return A list of {@code URL's}, each of which have been completed/resolved with the
1286     * {@code 'sourcePage'} parameter.  If there were any {@code String's} that were zero-length or
1287     * null, then null is returned in the related {@code Vector} position.  If any {@code TagNode} 
1288     * causes a {@code MalformedURLException}, then that position in the {@code Vector} will
1289     * contain the exception in {@code Ret2.b}
1290     *
1291     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1292     * 
1293     * @see #resolve_KE(String, URL)
1294     * @see Ret2
1295     */
1296    public static Vector<Ret2<URL, MalformedURLException>> resolve_KE
1297        (Vector<String> src, URL sourcePage)
1298    {
1299        Vector<Ret2<URL, MalformedURLException>> ret = new Vector<>();
1300
1301        for (String s : src) ret.addElement(resolve_KE(s, sourcePage));
1302
1303        return ret;
1304    }
1305
1306    /**
1307     * This will convert a simple java {@code String} to a {@code URL}, de-referencing any missing
1308     * information using the {@code 'sourcePage'} parameter.
1309     * 
1310     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_KE>
1311     * 
1312     * @param src Any java {@code String}, usually one which was scraped from an HTML-Page, and
1313     * needs to be "completed."
1314     * 
1315     * @param sourcePage This is the source page {@code URL} from which the String (possibly
1316     * relative) {@code URL} will be resolved.
1317     * 
1318     * @return A {@code URL}, which has been completed/resolved with the {@code 'sourcePage'}
1319     * parameter. If parameter {@code 'src'} is null or zero-length, null will be returned.  If a
1320     * {@code MalformedURLException} is thrown, that will be included with the {@code Ret2<>}
1321     * result.
1322     *
1323     * <EMBED CLASS='external-html' DATA-FILE-ID=LINKS_RET2>
1324     * 
1325     * @see Ret2
1326     */
1327    public static Ret2<URL, MalformedURLException> resolve_KE(String src, URL sourcePage)
1328    {
1329        if (sourcePage == null) throw new NullPointerException(
1330            "Though you may provide null to the partial-URL to dereference parameter, null " +
1331            "may not be passed to the Source-Page Parameter.  The purpose of the 'resolve' " +
1332            "operation is to resolve partial-URLs against a source-page (root) URL. " +
1333            "Therefore this is not allowed."
1334        );
1335
1336        if (src == null) return null;
1337
1338        src = src.trim();
1339
1340        if (src.length() == 0) return null;
1341
1342        String srcLC = src.toLowerCase();
1343
1344        if (StrCmpr.startsWithXOR
1345                (srcLC, "tel:", "javascript:", "mailto:", "magnet:", "file:", "ftp:", "#"))
1346
1347            return new Ret2<URL, MalformedURLException>
1348                (null, new MalformedURLException(
1349                    "InnerTag/Attribute begins with: " + src.substring(0, 1 + src.indexOf(":")) +
1350                    ", so it is not a hyper-link."
1351                ));
1352
1353
1354        // Includes the first few characters of the URL - for reporting/convenience. 
1355        // If this is an "image", the image-type & name will be included
1356
1357        if (StrCmpr.startsWithXOR(srcLC, "data:", "blob:"))
1358
1359            return new Ret2<URL, MalformedURLException>(null, new MalformedURLException(
1360                "InnerTag/Attribute begins with: " +
1361                ((src.length() > 25) ? src.substring(0, 25) : src) +
1362                ", not a URL."
1363            ));
1364
1365
1366        if (srcLC.startsWith("http://") || srcLC.startsWith("https://"))
1367
1368            try
1369                { return new Ret2<URL, MalformedURLException>(new URL(src), null); }
1370
1371            catch (MalformedURLException e)
1372                { return new Ret2<URL, MalformedURLException>(null, e); }
1373
1374
1375        if (src.startsWith("//") && (src.charAt(3) != '/'))
1376
1377            try
1378            { 
1379                return new Ret2<URL, MalformedURLException>
1380                    (new URL(  sourcePage.getProtocol().toLowerCase() + ":" + src), null);
1381            }
1382
1383            catch (MalformedURLException e)
1384                { return new Ret2<URL, MalformedURLException>(null, e); }
1385
1386
1387        if (src.startsWith("/"))
1388
1389            try
1390            {
1391                return new Ret2<URL, MalformedURLException>(new URL(
1392                    sourcePage.getProtocol().toLowerCase() + "://" +
1393                    sourcePage.getHost().toLowerCase() +
1394                    src), null
1395                );
1396            }
1397
1398            catch (MalformedURLException e)
1399                { return new Ret2<URL, MalformedURLException>(null, e); }
1400
1401
1402        if (src.startsWith("../"))
1403        {
1404            String  sourcePageStr   = sourcePage.toString();
1405            short   nLevels         = 0;
1406
1407            do
1408                { nLevels++;  src = src.substring(3); }
1409            while (src.startsWith("../"));
1410
1411            String  directory = StringParse.dotDotParentDirectory(sourcePage.toString(), nLevels);
1412
1413            try
1414                { return new Ret2<URL, MalformedURLException>(new URL(directory + src), null); }
1415
1416            catch (MalformedURLException e)
1417                { return new Ret2<URL, MalformedURLException>(null, e); }
1418
1419            catch (Exception e)
1420            { 
1421                return new Ret2<URL, MalformedURLException>
1422                    (null,
1423                    new MalformedURLException(e.getClass().getCanonicalName() +
1424                    ":" + e.getMessage())
1425                    );
1426            }
1427        }
1428
1429
1430        String  root =
1431            sourcePage.getProtocol().toLowerCase() + "://" + 
1432            sourcePage.getHost().toLowerCase();
1433
1434        String  path    = sourcePage.getPath().trim();
1435        int     pos     = StringParse.findLastFrontSlashPos(path);
1436
1437        if (pos == -1) throw new StringIndexOutOfBoundsException(
1438            "The URL you have provided: " + sourcePage.toString() +
1439            " does not have a '/' front-slash character in it's path." +
1440            "Cannot proceed resolving relative-URL's without this."
1441        );
1442
1443        path = path.substring(0, pos + 1);
1444
1445        try
1446            { return new Ret2<URL, MalformedURLException>(new URL(root + path + src), null); }
1447
1448        catch (MalformedURLException e)
1449            { return new Ret2<URL, MalformedURLException>(null, e); }
1450    }
1451}