1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
package Torello.Java.Additional;

import java.util.*;
import java.net.*;
import java.util.regex.*;
import java.io.*;

import static Torello.Java.C.*;

import Torello.Java.StorageWriter;
import Torello.Java.StrReplace;

/**
 * A class that plays-with URL's, no more, no less.
 * 
 * <EMBED CLASS='external-html' DATA-FILE-ID=URLS>
 */
@Torello.JavaDoc.StaticFunctional
public class URLs
{
    private URLs() { }

    /**
     * This is a Regular-Expression Pattern {@code (java.util.regex.Pattern)} - saved as a 
     * {@code String}.  It is subsequently compiled.
     *
     * <BR /><BR />The primary function is to match {@code String's} that are intended to match
     * HTTP-{@code URL's}.  This Regular Expression matches:
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     * <LI>{@code http(s)://...<any-text>.../}</LI>
     * <LI>{@code http(s)://...<any-text, not front-slash>...}</LI>
     * <LI>{@code http(s)://...<any-text>.../...<any-text, not front-slash>...}</LI>
     * </UL>
     * 
     * <BR /><BR /><B CLASS=JDDescLabel>Primarily used in:</B>
     * 
     * <BR /><UL CLASS=JDUL>
     * <LI>{@link #toProperURLV3(String)}</LI>
     * <LI>{@link #toProperURLV4(String)}</LI>
     * </UL>
     * 
     * @see #P1
     */
    protected static final String RE1 =
         "^(http[s]?:\\/\\/.*?\\/$|http[s]?:\\/\\/[^\\/]*$|http[s]?:\\/\\/.*?\\/[^\\/]+)";

    /**
     * {@code P1 = Pattern.compile(RE1);}
     * 
     * @see #RE1
     */
    protected static final Pattern P1 = Pattern.compile(RE1);

    /**
     * Java Help Messag Explaining {@code class java.net.URL} - and the specific output of its
     * methods.
     *
     * <EMBED CLASS='external-html' DATA-FILE-ID=URLS_HELP_MSG>
     *
     * @param sw An instance of class StorageWriter.  This parameter may be null, and if it is
     * text-output will be sent to Standard-Output.
     */
    protected static final void javaURLHelpMessage(StorageWriter sw)
    {
        if (sw == null) sw = new StorageWriter();

        String[] urlStrArr = {
            "https://DALLASCITYHALL.com", "https://dallascityhall.com/",
            "https://dallascityhall.com/news",
            "https://dallascityhall.com/news/", "http://DALLASCITYHALL.com/news/ARTICLE-1.html",
            "https://DallasCityHall.com/NEWS/article1.html?q=somevalue",
            "https://DallasCityHall.com/news/ARTICLE-1.html#subpart1",
            "https://DallasCityHall.com/NEWS/article1.html?q=somevalue&q2=someOtherValue",
            "https://DallasCityHall.com/NEWS/article1.html?q=somevalue&q2=someOtherValue#LocalRef"
        };

        URL[] urlArr = new URL[urlStrArr.length];

        try
            { for (int i=0; i < urlStrArr.length; i++) urlArr[i] = new URL(urlStrArr[i]); }

        catch (Exception e)
        {
            sw.println(
                "Broke a URL, and it generated an exception.\n" +
                "Sorry, fix the URL's in this method.\n" + 
                "Did you change them?"
            );

            e.printStackTrace();
            return;
        }

        for (URL u : urlArr)
        {
            System.out.println(
                "u.toString():\t\t"     + BCYAN + u.toString() + RESET + '\n' +
                "u.getProtocol():\t"    + u.getProtocol() + '\n' +
                "u.getHost():\t\t"      + u.getHost() + '\n' +
                "u.getPath():\t\t"      + u.getPath() + '\n' +
                "u.getFile():\t\t"      + u.getFile() + '\n' +
                "u.getQuery():\t\t"     + u.getQuery() + '\n' +
                "u.getRef():\t\t"       + u.getRef() + '\n' +
                "u.getAuthority():\t"   + u.getAuthority() + '\n' +
                "u.getUserInfo():\t"    + u.getUserInfo() + '\n' +
                "urlToString(u):\t\t"   + urlToString(u)
            );
        }
    }


    // ********************************************************************************************
    // ********************************************************************************************
    // Helper function for making URL address readable by web-servers.
    //*********************************************************************************************
    // ********************************************************************************************


    /**
     * When scraping Spanish {@code URL's}, these characters can / should be escaped.
     * 
     * <BR /><BR /><B CLASS=JDDescLabel>Parallel Array Note:</B>
     * 
     * <BR />This array shall be considered parallel to the <B><I>replacement</I></B>
     * {@code String[]}-Array {@link #VOWELS_URL}.
     * 
     * @see #toProperURLV1(String)
     * @see #VOWELS_URL
     */
    protected static final char[] VOWELS = {
        'á', 'É', 'é', 'Í', 'í', 'Ó', 'ó', 'Ú', 'ú', 'Ü', 'ü',
        'Ñ', 'ñ', 'Ý', 'ý', '¿', '¡'
    };

    /**
     * When scraping Spanish {@code URL's}, these {@code String's} are the
     * <B>URL Escape Sequences</B> for the Spanish Vowel Characters listed in {@link #VOWELS}.
     * 
     * <BR /><BR /><B CLASS=JDDescLabel>Parallel Array Note:</B>
     * 
     * <BR />This array shall be considered parallel to {@code String[]}-Array {@link #VOWELS}.
     * 
     * @see #toProperURLV1(String)
     * @see #VOWELS
     */
    protected static final String[] VOWELS_URL =
    {
        "%C3%A1", "%C3%89", "%C3%A9", "%C3%8D", "%C3%AD", "%C3%93", "%C3%B3", "%C3%9A",
        "%C3%BA", "%C3%9C", "%C3%BC", "%C3%91", "%C3%B1", "%C3%9D", "%C3%BD", "%C2%BF",
        "%C2%A1"
    };

    /**
     * This will substitute many of the Spanish-characters that can make a web-query difficult.
     * These are the substitutions listed:
     *
     * <BR /><BR /><TABLE CLASS=JDBriefTable>
     * <TR><TH>Spanish Language Character</TH><TH>URL Escape Sequence</TH></TR>
     * <TR><TD>{@code Á}</TD><TD>{@code %C3%81}</TD></TR>
     * <TR><TD>{@code á}</TD><TD>{@code %C3%A1}</TD></TR>
     * <TR><TD>{@code É}</TD><TD>{@code %C3%89}</TD></TR>
     * <TR><TD>{@code é}</TD><TD>{@code %C3%A9}</TD></TR>
     * <TR><TD>{@code Í}</TD><TD>{@code %C3%8D}</TD></TR>
     * <TR><TD>{@code í}</TD><TD>{@code %C3%AD}</TD></TR>
     * <TR><TD>{@code Ó}</TD><TD>{@code %C3%93}</TD></TR>
     * <TR><TD>{@code ó}</TD><TD>{@code %C3%B3}</TD></TR>
     * <TR><TD>{@code Ú}</TD><TD>{@code %C3%9A}</TD></TR>
     * <TR><TD>{@code ú}</TD><TD>{@code %C3%BA}</TD></TR>
     * <TR><TD>{@code Ü}</TD><TD>{@code %C3%9C}</TD></TR>
     * <TR><TD>{@code ü}</TD><TD>{@code %C3%BC}</TD></TR>
     * <TR><TD>{@code Ñ}</TD><TD>{@code %C3%91}</TD></TR>
     * <TR><TD>{@code ñ}</TD><TD>{@code %C3%B1}</TD></TR>
     * <TR><TD>{@code Ý}</TD><TD>{@code %C3%9D}</TD></TR>
     * <TR><TD>{@code ý}</TD><TD>{@code %C3%BD}</TD></TR>
     * </TABLE>
     *
     * <BR /><BR /><B CLASS=JDDescLabel>Historical Note:</B>
     * 
     * <BR />This method was written the very first time that a {@code URL} needed to be escaped
     * during the writing of the Java-HTML {@code '.jar'}.
     *
     * @param url  Any website {@code URL} query.
     *
     * @return The same {@code URL} with substitutions made.
     * 
     * @see #VOWELS
     * @see #VOWELS_URL
     * @see StrReplace#r(String, char[], String[])
     */
    public static String toProperURLV1(String url)
    { return StrReplace.r(url, VOWELS, VOWELS_URL); }

    /**
     * This list of java {@code char's} are characters that are better off escaped when passing
     * them through a {@code URL}.
     * 
     * @see #toProperURLV2(String)
     */
    protected static final char[] URL_ESC_CHARS =
    {
        '%', ' ', '#', '$', '&', '@', '`', '/', ':', ';', '<', '=', '>', '?', '[', '\\',
        ']', '^', '{', '|', '}', '~', '\'', '+', ','
    };

    /**
     * This method will clobber the leading Domain-Name and Protocol -
     * {@code http://domain.name.something/} stuff.  It is best to use this method on
     * {@code String's} that will be inserted into a {@code URL} after the {@code '?'}
     * question-mark, inside the Query-String.
     * 
     * <BR /><BR />This can be very useful when sending JSON Arguments, for instance, inside a
     * {@code URL's} Query-String, instead of the GET / POST part of a request.
     * 
     * <BR /><BR />Note that this method should not be used to escape characters outside of the
     * range of Standard-ASCII (characters {@code 0 ... 255}).
     *
     * <BR /><BR /><B CLASS=JDDescLabel>State of the Experiment:</B>
     * 
     * <BR />It seems to help to escape these characters:
     * 
     * <BR /><B STYLE="color:red;">{@code # $ % & @ ` / : ; < = > ? [ \ ] ^ | ~ " ' + ,}
     * <CODE> { } </CODE></B>
     * 
     * @param urlStuff Any information that is intended to be sent via an HTTP-{@code URL}, and
     * needs to be escaped.
     *
     * @return An escaped version of this {@code URL-String}
     * 
     * @see #URL_ESC_CHARS
     * @see StrReplace#r(String, char[], IntCharFunction)
     */
    public static String toProperURLV2(String urlStuff)
    {
        return StrReplace.r(
            urlStuff, URL_ESC_CHARS,
            (int i, char c) -> '%' + Integer.toHexString((int) c)
        );
    }

    /**
     * This leaves out the actual domain name before starting HTTP-URL Escape Sequences.  If this
     * starts with the words "http://domain.something/" then the initial colon, forward-slash and
     * periods won't be escaped.  Everything after the first front-slash will include URL-HTTP
     * Escape characters.
     *
     * <BR /><BR />This does the same thing as {@code toProperURLV2(String)}, but skips the initial
     * part of the URL text/string - IF PRESENT!
     * 
     * <BR /><BR />{@code http(s?)://domain.something/} is skipped by the Regular Expression, 
     * everything else from {@code URLV2} is escaped.
     *
     * @param url This may be any internet {@code URL}, represented as a {@code String}.  It will
     * be escaped with the {@code %INT} format.
     *
     * @return An escaped {@code URL String}
     *
     * @see #toProperURLV2(String)
     * @see #P1
     */
    public static String toProperURLV3(String url)
    {
        String	beginsWith	= null;
        Matcher	m			= P1.matcher(url);

        if (m.find())
        {
            beginsWith = m.group(1); 
            url = url.substring(beginsWith.length());
        }

        return ((beginsWith != null) ? beginsWith : "") + toProperURLV2(url);
    }

    /**
     * This is a (shortened) list of characters that <I>should</I> be escaped before being used
     * within a {@code URL}.
     * 
     * <BR /><BR />This version differs from {@link #URL_ESC_CHARS} in that it does not include the
     * {@code '&'} (ampersand), the {@code '?'} (question-mark) or the {@code '/'} (forward-slash).
     * 
     * @see #URL_ESC_CHARS
     * @see #toProperURLV4(String)
     */
    protected static final char[] URL_ESC_CHARS_ABBREV =
    {
        '%', ' ', '#', '$', '@', '`', ':', ';', '<', '=', '>', '[', '\\', ']',
        '^', '{', '|', '}', '~', '\'', '+', ','
    };

    /**
     * This does the same thing as V3, but it also will avoid escaping any {@code '?'} 
     * (question-mark) or {@code '&'} (ampersand) or {@code '/'} (forward-slash) symbols anywhere
     * in the entire {@code String}.  It also "skips" escaping the initial
     * {@code HTTP(s)://domain.net.something/} as well - just like {@code toProperURLV3}
     *
     * @return This does the same thing as {@code toProperURLV3(String)}, but leaves out 100%
     * of the instances of Ampersand, Question-Mark, and Forward-Slash symbols. 
     *
     * @see #toProperURLV3(String)
     * @see #P1
     * @see #URL_ESC_CHARS_ABBREV
     * @see StrReplace#r(String, char[], IntCharFunction)
     */
    public static String toProperURLV4(String url)
    {
        String	beginsWith	= null;
        Matcher	m			= P1.matcher(url);

        if (m.find())
        {
            beginsWith = m.group(1); 
            url = url.substring(beginsWith.length());
        }

        return ((beginsWith != null) ? beginsWith : "") +
            StrReplace.r
                (url, URL_ESC_CHARS_ABBREV, (int i, char c) -> '%' + Integer.toHexString((int) c));
    }

    /**
     * <EMBED CLASS='external-html' DATA-FILE-ID=URLS_PRP_URL_V5>
     *
     * @param url This is the URL to be encoded, properly
     *
     * @return A properly encoded URL String.  Important, if calling the {@code java.net.URL}
     * constructor generates a {@code MalformedURLException}, then this method shall return.
     * The {@code java.net.URL} constructor will be called if the {@code String} passed begins with
     * the characters {@code 'http://'} or {@code 'https://'}.
     */
    public static String toProperURLV5(String url)
    {
        url = url.trim();

        URL         u       = null;
        String[]    sArr    = null;
        String      tlc     = url.toLowerCase();

        if (tlc.startsWith("http://") || tlc.startsWith("https://"))
        { try { u = new URL(url); } catch (Exception e) { return null; } }

        if (u == null)  sArr = url.split("/");
        else            sArr = u.getPath().split("/");

        String          slash   = "";
        StringBuilder   sb      = new StringBuilder();

        for (String s : sArr)
        {
            try
                { sb.append(slash + java.net.URLEncoder.encode(s, "UTF-8")); }

            catch (UnsupportedEncodingException e)
                { /* This really cannot happen, and I don't know what to put here! */ }

            slash = "/";
        }

        if (u == null)
            return sb.toString();
        else
            return
                u.getProtocol() + "://" + u.getHost() + sb.toString() +
                ((u.getQuery() != null) ? ("?" + u.getQuery())  : "") +
                ((u.getRef() != null)   ? ("#" + u.getRef())    : "");
    }

    /**
     * Rather than trying to explain what is escaped and what is left alone, please review the
     * exact code here.
     *
     * <BR /><BR /><B CLASS=JDDescLabel>Another One:</B>
     * 
     * <BR />Well, I just wrote another one, they told me to.  This, newest version of
     * {@code URL}-Encoding is actually pretty successful.  It handles all Extra-Characters and is
     * capable of dealing with {@code URL's} that contain the {@code '?'  '='  '&'} operators of
     * {@code GET}-Requests.
     *
     * <BR /><BR />Realize that though the out-of-the-box JDK, there is a class called
     * "URI Encoder" - but that class expects that the {@code URL} to have already been separated
     * out into it's distinct parts.
     * 
     * <BR /><BR />This method does the the {@code URL}-Separating into disparate parts
     * before performing the Character-Escaping.
     *
     * @param url This is any java {@code URL}.
     *
     * @return a new {@code String} version of the input parameter {@code 'url'}
     */
    public static String toProperURLV6(String url)
    {
        URL u = null;

        try
            { u = new URL(url); }

        catch (Exception e) { return null; }

        StringBuilder sb = new StringBuilder();

        sb.append(u.getProtocol());
        sb.append("://");
        sb.append(u.getHost());
        sb.append(toProperURLV5(u.getPath()));

        if (u.getQuery() != null)
        {
            String[]            sArr        = u.getQuery().split("&");
            StringBuilder       sb2         = new StringBuilder();
            String              ampersand   = "";

            for (String s : sArr)
            {
                String[]        s2Arr       = s.split("=");
                StringBuilder   sb3         = new StringBuilder();    
                String          equals      = "";

                for (String s2: s2Arr)
                {
                    try
                        { sb3.append(equals + java.net.URLEncoder.encode(s2, "UTF-8")); }

                    // This should never happen - UTF-8 is (sort-of) the only encoding.
                    catch (UnsupportedEncodingException e) { }

                    equals = "=";
                }

                sb2.append(ampersand + sb3.toString());
                ampersand = "&";
            }

            sb.append("?" + sb2.toString());
        }

        // Not really a clue, because a the "#" operator and the "?" probably shouldn't be used
        // together.  Java's java.net.URL class will parse a URL that has both the ? and the #, but
        // I have no idea which Web-Sites would allow this, or encourage this...

        if (u.getRef() != null)

            try
                { sb.append("#" + java.net.URLEncoder.encode(u.getRef(), "UTF-8")); }

            catch (UnsupportedEncodingException e) { }

        return sb.toString();        
    }

    /**
     * These strictly use Java's URI Encoding Mechanism.  They seem to work the same as "V6"
     * Internally, these are now used.  This as of November, 2019.
     *
     * @param url A Complete Java {@code URL}, as a {@code String}.  Any specialized
     * Escape-Characters that need to be escaped, will be.
     *
     * @throws URISyntaxException This will throw if building the {@code URI} generates an
     * exception.  Internally, all this method does is build a {@code URI}, and then call the Java
     * Method {@code 'toASCIIString()'}
     */
    public static String toProperURLV7(String url) throws URISyntaxException, MalformedURLException
    { return toProperURLV8(new URL(url)); }

    /**
     * These strictly use Java's URI Encoding Mechanism.  They seem to work the same as "V6"
     * Internally, these are now used.  This as of November, 2019.
     *
     * @param url A Complete Java {@code URL}.  Any specialized Escape-Characters that need to be
     * escaped, will be.
     *
     * @throws URISyntaxException This will throw if building the URI generates an exception.
     * Internally, all this method does is build a URI, and then call the Java Method
     * {@code 'toASCIIString()'}
     */
    public static String toProperURLV8(URL url) throws URISyntaxException, MalformedURLException
    {
        return new URI(
            url.getProtocol(),
            url.getUserInfo(),
            url.getHost(),
            url.getPort(),
            url.getPath(),
            url.getQuery(),
            url.getRef()
        ).toASCIIString();
    }


    // ********************************************************************************************
    // ********************************************************************************************
    // The original "URLs" class
    //*********************************************************************************************
    // ********************************************************************************************


    /**
     * If you have a list of {@code URL's}, and want to quickly remove any
     * duplicate-{@code URL's} found in the list - this will remove them.
     *
     * <BR /><BR /><B CLASS=JDDescLabel>Case Sensitivity:</B>
     * 
     * <BR />This method will perform a few "to-lower-case" operations on the protocol and
     * Web-Domain parts, but not on the file, directory, or Query-String portion of the
     * {@code URL}.
     *
     * <BR /><BR />This should hilite what is Case-Sensitive, and what is not:
     * 
     * <BR /><BR /><UL CLASS=JDUL>
     * <LI> These are considered duplicate URL's:
     *      <BR />
     *      <BR /><CODE>http://some.company.com/index.html</CODE>
     *      <BR /><CODE>HTTP://SOME.COMPANY.COM/index.html</CODE>
     *      <BR /><BR />
     *      </LI>
     * 
     * <LI> These are <I>not</I> considered duplicate URL's:
     *      <BR />
     *      <BR /><CODE>http://other.company.com/Directory/Ben-Bitdiddle.html</CODE>
     *      <BR /><CODE>http://other.company.com/DIRECTORY/BE.html</CODE>
     *      </LI>
     * </UL>
     *
     * @param urls Any list of {@code URL's}, some of which might have been duplicated.  The
     * difference between this {@code 'removeDuplicates'} and the other {@code 'removeDuplicates'}
     * available in this class is that this one only removes multiple instances of the same 
     * {@code URL} in this {@code Vector}, while the other one iterates through a list of 
     * {@code URL's} already visited in a previous-session.
     * 
     * <BR /><BR /><B>NOTE:</B> <I>Null {@code Vector}-values are skipped outright, they are
     * neither removed nor changed.</i>
     *
     * @return The number of {@code Vector} elements that were removed.  (i.e. <I>The size by which
     * the {@code Vector} was shrunk.</I>)
     */
    public static int removeDuplicates(Vector<URL> urls)
    {
        TreeSet<String> dups    = new TreeSet<>();
        int             count   = 0;
        int             size    = urls.size();
        URL             url     = null;

        for (int i=0; i < size; i++)

            if ((url = urls.elementAt(i)) != null)
                if (! dups.add(urlToString(url)))
                {
                    count++;
                    size--;
                    i--;
                    urls.removeElementAt(i);
                }

        return count;
    }

    /**
     * This simple method will remove any {@code URL's} from the input {@code Vector} parameter
     * {@code 'potentiallyNewURLs'} which are also present-members of the input {@code Vector} 
     * parameter {@code 'visitedURLs'}.
     * 
     * <BR /><BR />This may seem trivial, and it is, but it worries about things like the
     * {@code String's} Case for you.
     *
     * @param visitedURLs This parameter is a list of {@code URL's} that have already
     * "been visited."
     *
     * @param potentiallyNewURLs This parameter is a list of {@code URL's} that are possibly
     * "un-visited" - meaning whatever scrape, crawl or search being performed needs to know which
     * {@code URL's} are listed in the previous parameter's contents.  This may seem trivial, just
     * use the java {@code url1.equals(url2)} command, but, alas, java doesn't exactly take into
     * account upper-case and lower-case domain-names.  This worries about case.
     *
     * @return The number of {@code URL's} that were removed from the input {@code Vector}
     * parameter {@code 'potentiallyNewURLs'}.
     */
    public static int removeDuplicates(Vector<URL> visitedURLs, Vector<URL> potentiallyNewURLs)
    {
        // The easiest way to check for duplicates is to build a tree-set of all the URL's as a
        // String.  Java's TreeSet<> generic already (automatically) scans for duplicates
        // (efficiently) and will tell you if you have tried to add a duplicate

        TreeSet<String> dups = new TreeSet<>();

        // Build a TreeSet of the url's from the "Visited URLs" parameter
        visitedURLs.forEach(url -> dups.add(urlToString(url)));

        // Add the "Possibly New URLs", one-by-one, and remove them if they are already in the
        // visited list.

        int count   = 0;
        int size    = potentiallyNewURLs.size();
        URL url     = null;

        for (int i=0; i < size; i++)

            if ((url = potentiallyNewURLs.elementAt(i)) != null)

                if (! dups.add(urlToString(url)))
                {
                    count++;
                    size--;
                    i--;
                    potentiallyNewURLs.removeElementAt(i);
                }

        return count;
    }

    /**
     * Removes any Fragment-{@code URL} {@code '#'} symbols from a {@code URL}.
     * 
     * <BR /><BR />If this {@code URL} contains a pound-sign Anchor-Name according to the Standard
     * JDK's {@code URL.getRef()} method.  Specifically, if {@code URL.getRef()} returns a non-null
     * value, this method rebuilds the URL, without any Anchor-Name / Fragment information.
     * 
     * <BR /><BR />The intention is to return a {@code URL} where any / all {@code String}-data 
     * that occurs after a {@code '#'} Hash-Tab / Pound-Sign is removed.
     * 
     * @param url Any standard HTTP {@code URL}.  If this {@code 'url'} contains a {@code '#'}
     * (Pound Sign, Partial Reference) - according to the standard JDK {@code URL.getRef()} method,
     * then it shall be removed.
     * 
     * @return The {@code URL} without the partial-reference, or the original {@code URL} if there
     * was no partial reference.  Null is returned if there is an error instantiating the new
     * {@code URL} without the partial-reference.
     */
    public static URL shortenPoundREF(URL url)
    {
        try
        {
            if (url.getRef() != null) return new URL(
                ((url.getProtocol() != null) ? url.getProtocol().toLowerCase()  : "") +
                    "://" +
                ((url.getHost()     != null) ? url.getHost().toLowerCase()      : "") +
                ((url.getFile()     != null) ? url.getFile()                    : "")
            );

            else return url;
        }

        catch (MalformedURLException e) { return null; }
    }

    /**
     * <EMBED CLASS='external-html' DATA-FILE-ID=URLS_NAMED_ANCHORS>
     *
     * @param urls Any list of completed (read: <I>fully-resolved</I>) {@code URL's}.
     *
     * @param ifExceptionSetNull If this parameter is passed {@code TRUE}, if there is ever an
     * exception-throw while building the new {@code URL's} (without the fragment / pound-sign),
     * then that position in the {@code Vector} will be replaced with a null.
     * 
     * <BR /><BR />When this parameter is passed {@code FALSE}, if an exception is thrown, then
     * it will be caught and silently ignored.
     *
     * @return The number / count of {@code URL's} in this list that were modified.  Whenever a
     * {@code URL} Named-Anchor is encountered, it will be removed from the {@code URL}, and a
     * new {@code URL} without the fragment-part will be inserted to replace the old one.
     * 
     * <BR /><BR />The integer that is returned here is the number of times that a replacement
     * was made to the input {@code Vector}-parameter {@code 'urls'}.
     */
    public static int shortenPoundREFs(Vector<URL> urls, boolean ifExceptionSetNull)
    {
        int pos             = 0;
        int shortenCount    = 0;

        for (int i = (urls.size() - 1); i >= 0; i--)
        {
            URL url = urls.elementAt(i);

            try
            {
                if (url.getRef() != null)
                {
                    URL newURL = new URL(
                        ((url.getProtocol() != null) ? url.getProtocol().toLowerCase()  : "") +
                            "://" +
                        ((url.getHost()     != null) ? url.getHost().toLowerCase()      : "") +
                        ((url.getFile()     != null) ? url.getFile()                    : "")
                    );

                    urls.setElementAt(newURL, i);
                    shortenCount++;
                }
            }

            catch (MalformedURLException e)
                { if (ifExceptionSetNull) urls.setElementAt(null, i); }
        }

        return shortenCount;
    }

    /**
     * <EMBED CLASS='external-html' DATA-FILE-ID=URLS_NAMED_ANCHORS>
     *
     * <BR /><BR /><B CLASS=JDDescLabel>KE: Keep Exceptions</B>
     *
     * <BR />This method is identical to the previous method, defined above, except that it
     * allows a programmer to keep / retain any {@code MalformedURLException's} that are thrown
     * while re-building them.
     *
     * @param urls Any list of completed (read: <I>fully-resolved</I>) {@code URL's}.
     *
     * @param ifExceptionSetNull If this is {@code TRUE} then if there is ever an exception building
     * a new {@code URL} without a "Relative {@code URL '#'}" (Pound-Sign), then that position in
     * the {@code Vector} will be replaced with 'null.'
     *
     * @return The number/count of {@code URL's} in this list that were modified.  If a {@code URL}
     * was modified, it was because it had a partial-page reference in it.  If in the process of
     * generating a new {@code URL} out of an old one, a {@code MalformedURLException} occurs, the
     * exception will be placed in the {@code Ret2.b} position, which is a 
     * {@code Vector<MalformedURLException>}.
     *
     * <BR /><BR /><B>SPECIFICALLY:</B>
     *
     * <BR /><BR /><UL CLASS=JDUL>
     * 
     * <LI> {@code Ret2.a = 'Integer'} number of {@code URL's} shortened for having a {@code '#'}
     *      partial-reference.
     *      </LI>
     * 
     * <LI> {@code Ret2.b = Vector<MalformedURLException>} where each element of this
     *      {@code Vector} is null if there were no problems converting the {@code URL}, or the
     *      exception reference if there were exceptions thrown.
     *      </LI>
     * 
     * </UL>
     */
    public static Ret2<Integer, Vector<MalformedURLException>> shortenPoundREFs_KE
        (Vector<URL> urls, boolean ifExceptionSetNull)
    {
        int                             pos             = 0;
        int                             shortenCount    = 0;
        Vector<MalformedURLException>   v               = new Vector<>();

        for (int i=0; i < urls.size(); i++) v.setElementAt(null, i);

        for (int i = (urls.size() - 1); i >= 0; i--)
        {
            URL url = urls.elementAt(i);
 
            try
            {
                if (url.getRef() != null)
                {
                    URL newURL = new URL(
                        ((url.getProtocol() != null) ? url.getProtocol().toLowerCase()  : "") +
                            "://" +
                        ((url.getHost()     != null) ? url.getHost().toLowerCase()      : "") +
                        ((url.getFile()     != null) ? url.getFile()                    : "")
                    );

                    urls.setElementAt(newURL, i);
                    shortenCount++;
                }
            }

            catch (MalformedURLException e)
            {
                if (ifExceptionSetNull) urls.setElementAt(null, i);
                v.setElementAt(e, i);
            }
        }

        return new Ret2<Integer, Vector<MalformedURLException>>(Integer.valueOf(shortenCount), v);
    }

    /**
     * On the internet, a {@code URL} is part case-sensitive, and part case-insensitive.  The
     * Domain-Name and Protocol ({@code http://}, and {@code 'some.company.com'}) portions of the
     * {@code URL} <I>are Case-Insensitive - they may be in any combination of upper or lower
     * case</I>.
     *
     * <BR /><BR />However, the directory, file-name, and (optional) Query-{@code String} portion
     * of a {@code URL} are (often, but not always) Case-Sensitive.  The sensitivity to case in
     * these three parts of a {@code URL} is dependent upon the individual Web-Server that is 
     * providing the content for the {@code URL}.
     *
     * <BR /><BR />To summarize, DNS servers which monitor the Domain-Name part of a {@code URL}
     * treat upper &amp; lower case English-Letters as the same.  Web-Server that utilize the File
     * Directory part of a {@code URL} will sometimes care about case, and sometimes won't.  This
     * behavior is dependent upon how the Web-Master has configured his system.
     *
     * @param url This may be any Internet-Domain {@code URL}
     *
     * @return A {@code String} version of this {@code URL}, but the domain and protocol portions
     * of the {@code URL} will be a "consistent" lower case.  The case of the directory, file and
     * (possibly, but not guaranteed to be present) {@code query-string} portion will not have
     * their case modified either way.
     *
     * <BR /><BR /><B>NOTE:</B> This type of information is pretty important is you are attempting
     * to scan for duplicate {@code URL's} or check their equality.
     */
    public static String urlToString(URL url)
    {
        return
            ((url.getProtocol() != null)    ? url.getProtocol().toLowerCase()   : "") + "://" +
            ((url.getHost()     != null)    ? url.getHost().toLowerCase()       : "") +
            ((url.getPath()     != null)    ? url.getPath()                     : "") +
            ((url.getQuery()    != null)    ? ('?' + url.getQuery())            : "") +
            ((url.getRef()      != null)    ? ('#' + url.getRef())              : "");
    }

    /**
     * As of today, the version of UNIX {@code curl} command does not seem to be downloading
     * everything properly.  It downloaded an image {@code '.png'} file just fine, but seemed to
     * have botched a zip-file.  This does what UNIX {@code 'curl'} command, <I>but does not
     * actually invoke the UNIX operating system to do it.</I>  It just does this...
     *
     * @param url This may be any URL, but it is intended to be a downloadable file.  It will
     * download {@code '.html'} files fine, but you may try images, data-files, zip-files,
     * tar-archives, and movies.
     *
     * @param outFileName You must specify a file-name, and if this parameter is null, a
     * {@code NullPointerException} will be thrown immediately.  If you would like your program
     * to guess the filename - <I>based on the file named in the URL</I>, please use the method
     * {@code URL.getFile()}, or something to that effect.
     * 
     * @param userAgent A User-Agent, as a {@code String}.  If this parameter is passed null,
     * it will be silently ignored, and a User-Agent won't be used.
     * 
     * @throws IOException If there are I/O Errors when using the {@code HttpURLConnection}.
     */
    public static void CURL(URL url, String outFileName, String userAgent) throws IOException
    {   
        HttpURLConnection con = (HttpURLConnection) url.openConnection();

        con.setRequestMethod("GET");

        if (userAgent != null) con.setRequestProperty("User-Agent", userAgent);

        InputStream         is      = con.getInputStream();
        FileOutputStream    fos     = new FileOutputStream(outFileName);
        byte[]              b       = new byte[5000];
        int                 result  = 0;

        while ((result = is.read(b)) != -1) fos.write(b, 0, result);
 
        fos.flush();    fos.close();    is.close();
    }
}