001package Torello.HTML;
002
003import java.util.stream.*;
004import java.util.*;
005
006import Torello.Java.LV;
007import Torello.Java.StrCSV;
008import Torello.Java.ExceptionCheckError;
009
010/**
011 * A simple utility class that, used ubiquitously throughout Java HTML, which maintains two integer
012 * fields - <CODE><B><A HREF='#start'>DotPai&#46;start</A></B></CODE> and
013 * <CODE><B><A HREF='#end'>DotPai&#46;end</A></B></CODE> , for demarcating the begining and ending
014 * of a sub-list within an HTML web-page.
015 * 
016 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=DOT_PAIR>
017 * 
018 * @see NodeIndex
019 * @see SubSection
020 */
021public final class DotPair
022implements java.io.Serializable, Comparable<DotPair>, Cloneable, Iterable<Integer>
023{
024    /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */
025    public static final long serialVersionUID = 1;
026
027    /**
028     * This is intended to be the "starting index" into an sub-array of an HTML {@code Vector} of
029     * {@code HTMLNode} elements.
030     */
031    public final int start;
032
033    /**
034     * This is intended to be the "ending index" into a sub-array of an HTML {@code Vector} of 
035     * {@code HTMLNode} elements.
036     */
037    public final int end;
038
039
040    // ********************************************************************************************
041    // ********************************************************************************************
042    // Constructor
043    // ********************************************************************************************
044    // ********************************************************************************************
045
046
047    /**
048     * This constructor takes two integers and saves them into the {@code public} member fields.
049     * 
050     * @param start This is intended to store the starting position of a vectorized-webpage
051     * sub-list or subpage.
052     * 
053     * @param end This will store the ending position of a vectorized-html webpage or subpage.
054     * 
055     * @throws IndexOutOfBoundsException A negative {@code 'start'} or {@code 'end'}
056     * parameter-value will cause this exception throw.
057     * 
058     * @throws IllegalArgumentException A {@code 'start'} parameter-value that is larger than the
059     * {@code 'end'} parameter will cause this exception throw.
060     * 
061     * @see NodeIndex
062     * @see SubSection
063     */
064    public DotPair(int start, int end)
065    {
066        if (start < 0) throw new IndexOutOfBoundsException
067            ("Negative start value passed to DotPair constructor: start = " + start);
068
069        if (end < 0) throw new IndexOutOfBoundsException
070            ("Negative ending value passed to DotPair constructor: end = " + end);
071
072        if (end < start) throw new IllegalArgumentException(
073            "Start-parameter value passed to constructor is greater than ending-parameter: " +
074            "start: [" + start + "], end: [" + end + ']'
075        );
076
077        this.start  = start;
078        this.end    = end;
079    }
080
081    /**
082     * Creates a new instance that has been shifted by {@code 'delta'}.
083     * 
084     * @param delta The number of array indices to shift {@code 'this'} intance.  This parameter
085     * may be negative, and if so, {@code 'this'} will be shifted left, instead of right.
086     * 
087     * @return A new, shifted, instance of {@code 'this'}
088     */
089    public DotPair shift(int delta)
090    { return new DotPair(this.start + delta, this.end + delta); }
091
092
093    // ********************************************************************************************
094    // ********************************************************************************************
095    // Standard Java Methods
096    // ********************************************************************************************
097    // ********************************************************************************************
098
099
100    /**
101     * Implements the standard java {@code 'hashCode()'} method.  This will provide a hash-code
102     * that is likely to avoid crashes.
103     * 
104     * @return A hash-code that may be used for inserting {@code 'this'} instance into a hashed
105     * table, map or list.
106     */
107    public int hashCode()
108    { return this.start + (1000 * this.end); }
109
110    /**
111     * The purpose of this is to remind the user that the array bounds are inclusive at <B>BOTH</B>
112     * ends of the sub-list.  
113     * 
114     * <BR /><BR /><B CLASS=JDDescLabel>Inclusive &amp; Exclusive:</B>
115     * 
116     * <BR />For an instance of {@code 'DotPair'}, the intention is to include both the
117     * characters located at the {@code Vector}-index positions {@link #start} and the one at
118     * {@link #end}.  Specifically,  (and unlike many of the {@code Node-Search} package methods)
119     * both of the internal fields to this class are <B STYLE='color: red'><I>inclusive</I></B>,
120     * rather than exclusive.
121     * 
122     * <BR /><BR />For many of the search methods in package {@link Torello.HTML.NodeSearch}, the
123     * {@code 'ePos'} parameters are always <B STYLE='color: red'><I>exclusive</I></B> - meaning
124     * the character at {@code Vector}=index {@code 'ePos'} is not included in the search.
125     * 
126     * @return The length of a sub-array that would be indicated by this dotted pair.
127     */
128    public int size() { return this.end - this.start + 1; }
129
130    /**
131     * Java's {@code toString()} requirement.
132     * 
133     * @return A string representing 'this' instance of DotPair.
134     */
135    public String toString() { return "[" + start + ", " + end + "]"; }
136
137    /**
138     * Java's {@code public boolean equals(Object o)} requirements.
139     * 
140     * @param o This may be any Java {@code Object}, but only ones of {@code 'this'} type whose 
141     * internal-values are identical will force this method to return {@code TRUE}.
142     * 
143     * @return {@code TRUE} if (and only if) parameter {@code 'o'} is an {@code instanceof DotPair}
144     * and, also, both have equal start and ending field values.
145     */
146    public boolean equals(Object o)
147    {
148        if (o instanceof DotPair)
149        {
150            DotPair dp = (DotPair) o;
151            return (this.start == dp.start) && (this.end == dp.end);
152        }
153
154        else return false;
155    }
156
157    /**
158     * Java's {@code interface Cloneable} requirements.  This instantiates a new {@code DotPair}
159     * with identical {@code 'start', 'end'} fields.
160     * 
161     * @return A new {@code DotPair} whose internal fields are identical to this one.
162     */
163    public DotPair clone() { return new DotPair(this.start, this.end); }
164
165    /**
166     * Java's {@code interface Comparable<T>} requirements.  <I>This is not the only comparison4
167     * operation possible,</I> but it does satisfy one reasonable requirement -
168     * <I>SPECIFICALLY:</I> which of two separate instances of {@code DotPair} start first.
169     * 
170     * <BR /><BR /><B CLASS=JDDescLabel>Comparator Heuristic:</B>
171     * 
172     * <BR />If two {@code DotPair} instances begin at the same {@code Vector}-index, then the
173     * shorter of the two shall come first.
174     * 
175     * @param other Any other {@code DotPair} to be compared to {@code 'this' DotPair}
176     * 
177     * @return An integer that fulfils Java's
178     * {@code interface Comparable<T> public boolean compareTo(T t)} method requirements.
179     */
180    public int compareTo(DotPair other)
181    {
182        int ret = this.start - other.start;
183
184        return (ret != 0) ? ret : (this.size() - other.size());
185    }
186
187    /**
188     * This is an "alternative Comparitor" that can be used for sorting instances of this class.
189     * It should work with the {@code Collections.sort(List, Comparator)} method in the standard
190     * JDK package {@code java.util.*;}
191     * 
192     * <BR /><BR /><B CLASS=JDDescLabel>Comparator Heuristic:</B>
193     * 
194     * <BR />This "extra <CODE>Comparitor</CODE>" simply compares the size of one {@code DotPair}
195     * to a second.  The smaller shall be sorted first, and the larger (longer-in-length)
196     * {@code DotPair} shall be sorted later.  If they are of equal size, whichever of the two has
197     * an earlier {@link #start} position in the {@code Vector} is considered first.
198     * 
199     * @see CommentNode#body
200     */
201    public static Comparator<DotPair> comp2 = (DotPair dp1, DotPair dp2) ->
202    {
203        int ret = dp1.size() - dp2.size();
204
205        return (ret != 0) ? ret : (dp1.start - dp2.start);
206    };
207
208    /**
209     * This shall return an {@code int Iterator} (which is properly named
210     * {@code class java.util.PrimitiveIterator.OfInt}) that iterates integers beginning with the
211     * value in {@code this.start} and ending with the value in {@code this.end}.
212     * 
213     * @return An {@code Iterator} that iterates {@code 'this'} instance of {@code DotPair} from
214     * the beginning of the range, to the end of the range.  The {@code Iterator} returned will
215     * produce Java's primitive type {@code int}.
216     * 
217     * <BR /><BR /><B STYLE='color:red;'>NOTE:</B> The elements returned by the {@code Iterator}
218     * are integers, and this is, in effect, nothing more than one which counts from {@link #start}
219     * to {@link #end}.
220     */
221    public PrimitiveIterator.OfInt iterator()
222    { 
223        return new PrimitiveIterator.OfInt()
224        {
225            private int cursor = start;
226
227            public boolean hasNext()    { return this.cursor <= end; }
228
229            public int nextInt()
230            {
231                if (cursor == end) throw new NoSuchElementException
232                    ("Cursor has reached the value stored in 'end' [" + end + "]");
233
234                return cursor++;
235            }
236        };
237    }
238
239    /**
240     * A simple {@code Iterator} that will iterate elements on an input page, using {@code 'this'}
241     * intance of {@code DotPair's} indices, {@link #start}, and {@link #end}.
242     * 
243     * @param page This may be any HTML page or sub-page.  This page should correspond to 
244     * {@code 'this'} instance of {@code DotPair}.
245     * 
246     * @return An {@code Iterator} that will iterate each node in the page, beginning with the
247     * node at {@code page.elementAt(this.start)}, and ending with {@code page.elementAt(this.end)}
248     * 
249     * @throws IndexOutOfBoundsException This throws if {@code 'this'} instance does not have a
250     * range that adheres to the size of the input {@code 'page'} parameter.
251     */
252    public <T extends HTMLNode> Iterator<T> iterator(Vector<T> page)
253    {
254        if (this.start >= page.size()) throw new IndexOutOfBoundsException(
255            "This instance of DotPair points to elements that are outside of the range of the" +
256            "input 'page' Vector.\n" +
257            "'page' parameter size: " + page.size() + ", this.start: [" + this.start + "]"
258        );
259
260        if (this.end >= page.size()) throw new IndexOutOfBoundsException(
261            "This instance of DotPair points to elements that are outside of the range of the" +
262            "input 'page' Vector.\n" +
263            "'page' parameter size: " + page.size() + ", this.end: [" + this.end + "]"
264        );
265
266        return new Iterator<T>()
267        {
268            private int cursor          = start;    // a.k.a. 'this.start'
269            private int expectedSize    = page.size();
270            private int last            = end;      // a.k.a. 'this.end'
271
272            public boolean hasNext() { return cursor < last; }
273
274            public T next()
275            {
276                if (++cursor > last) throw new NoSuchElementException(
277                    "This iterator's cursor has run past the end of the DotPaiar instance that " +
278                    "formed this Iterator.  No more elements to iterate.  Did you call hasNext() ?"
279                );
280
281                if (page.size() != expectedSize) throw new ConcurrentModificationException(
282                    "The expected size of the underlying vector has changed." +
283                    "\nCurrent-Size " +
284                    "[" + page.size() + "], Expected-Size [" + expectedSize + "]\n" +
285                    "\nCursor location: [" + cursor + "]"
286                );
287
288                return page.elementAt(cursor);
289            }
290
291            // Removes the node from the underlying {@code Vector at the cursor's location.
292            public void remove()
293            { page.removeElementAt(cursor); expectedSize--; cursor--; last--; }
294        };
295    }
296
297
298    // ********************************************************************************************
299    // ********************************************************************************************
300    // Simple Boolean tests
301    // ********************************************************************************************
302    // ********************************************************************************************
303
304
305    /**
306     * This will test whether a specific index is contained (between {@code this.start} and
307     * {@code this.end}, inclusively.
308     * 
309     * @param index This is any integer index value.  It must be greater than zero.
310     * 
311     * @return {@code TRUE} If the value of index is greater-than-or-equal-to the value stored in
312     * field {@code 'start'} and furthermore is less-than-or-equal-to the value of field
313     * {@code 'end'}
314     * 
315     * @throws IndexOutOfBoundsException If the value is negative, this exception will throw.
316     */
317    public boolean isInside(int index)
318    {
319        if (index < 0) throw new IndexOutOfBoundsException
320            ("You have passed a negative index [" + index + "] here, but this is not allowed.");
321
322        return (index >= start) && (index <= end);
323    }
324
325    /**
326     * Tests whether {@code 'this' DotPair} is fully enclosed by {@code DotPair} parameter
327     * {@code 'other'}
328     * 
329     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
330     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
331     * comparison is likely meaningless.
332     * 
333     * @return {@code TRUE} If (and only if) parameter {@code 'other'} encloses {@code 'this'}.
334     */
335    public boolean enclosedBy(DotPair other)
336    { return (other.start <= this.start) && (other.end >= this.end); }
337
338    /**
339     * Tests whether {@code 'this' DotPair} is enclosed, completely, by parameter {@code DotPair}
340     * parameter {@code 'other'}
341     * 
342     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
343     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
344     * comparison is likely meaningless.
345     * 
346     * @return {@code TRUE} If (and only if) parameter {@code 'other'} is enclosed completely by
347     * {@code 'this'}.
348     */
349    public boolean encloses(DotPair other)
350    { return (this.start <= other.start) && (this.end >= other.end); }
351
352    /**
353     * Tests whether parameter {@code 'other'} has any overlapping {@code Vector}-indices with
354     * {@code 'this' DotPair}
355     * 
356     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
357     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
358     * comparison is likely meaningless.
359     * 
360     * @return {@code TRUE} If (and only if) parameter {@code 'other'} and {@code 'this'} have any
361     * overlap.
362     */
363    public boolean overlaps(DotPair other)
364    {
365        return
366            ((this.start >= other.start)    && (this.start <= other.end)) ||
367            ((this.end >= other.start)      && (this.end <= other.end));
368    }
369
370    /**
371     * Tests whether {@code 'this'} lays, <I>completely</I>, before {@code DotPair} parameter
372     * {@code 'other'}.
373     * 
374     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
375     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
376     * comparison is likely meaningless.
377     * 
378     * @return {@code TRUE} if <I>every index</I> of {@code 'this'} has a value that is less than
379     * every index of {@code 'other'}
380     */
381    public boolean isBefore(DotPair other)
382    { return this.end < other.start; }
383
384    /**
385     * Tests whether {@code 'this'} begins before {@code DotPair} parameter {@code 'other'}.
386     * 
387     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
388     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
389     * comparison is likely meaningless.
390     * 
391     * @return {@code TRUE} if {@code this.start} is less than {@code other.start}, and
392     * {@code FALSE} otherwise.
393     */
394    public boolean startsBefore(DotPair other)
395    { return this.start < other.start; }
396
397    /**
398     * Tests whether {@code 'this'} lays, <I>completely</I>, after {@code DotPair} parameter
399     * {@code 'other'}.
400     * 
401     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
402     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
403     * comparison is likely meaningless.
404     * 
405     * @return {@code TRUE} if <I>every index</I> of {@code 'this'} has a value that is greater
406     * than every index of {@code 'other'}
407     */
408    public boolean isAfter(DotPair other)
409    { return this.start > other.end; }
410
411    /**
412     * Tests whether {@code 'this'} ends after {@code DotPair} parameter {@code 'other'}.
413     * 
414     * @param other Another {@code DotPair}.  This parameter is expected to be a descriptor of the
415     * same vectorized-webpage as {@code 'this' DotPair} is.  It is not mandatory, but if not, the
416     * comparison is likely meaningless.
417     * 
418     * @return {@code TRUE} if {@code this.end} is greater than {@code other.end}, and
419     * {@code FALSE} otherwise.
420     */
421    public boolean endsAfter(DotPair other)
422    { return this.end > other.end; }
423
424
425
426
427    // ********************************************************************************************
428    // ********************************************************************************************
429    // Exception Check
430    // ********************************************************************************************
431    // ********************************************************************************************
432
433
434    /**
435     * A method that will do a fast check that {@code 'this'} intance holds index-pointers to
436     * an opening and closing HTML-Tag pair.  Note, though these mistakes may seem trivial, when
437     * parsing Internet Web-Pages, these are exactly the type of basic mistakes that users will
438     * make when their level of 'concentration' is low.  This is no different that checking an
439     * array-index or {@code String}-index for an {@code IndexOutOfBoundsException}.
440     * 
441     * <BR /><BR />This type of detailed exception message can make analyzing web-pages more
442     * direct and less error-prone.  The 'cost' incurred includes only a few {@code if}-statement
443     * comparisons, and <I>this check should be performed immediatley <B>before a loop is
444     * entered.</B></I>
445     * 
446     * @param page Any web-page, or sub-page.  It needs to be the page from whence {@code 'this'}
447     * instance of {@code DotPair} was retrieved.
448     * 
449     * @throws TagNodeExpectedException If {@code 'this'} instance' {@link #start} or {@link #end}
450     * fields do not point to {@code TagNode} elements on the {@code 'page'}.
451     * 
452     * @throws HTMLTokException If {@link #start} or {@link #end} do not point to a {@code TagNode}
453     * whose {@link TagNode#tok} field equals the {@code String} contained by parameter
454     * {@code 'token'}.
455     * 
456     * @throws OpeningTagNodeExpectedException If {@link #start} does not point to an opening
457     * {@code TagNode}.
458     * 
459     * @throws ClosingTagNodeExpectedException If {@link #end} does not point to a closing
460     * {@code TagNode}.
461     * 
462     * @throws NullPointerException If the {@code 'page'} parameter is null.
463     * 
464     * @throws ExceptionCheckError <B STYLE='color:red;'>IMPORTANT</B> Since this method is,
465     * indubuitably, a method for performing error checking, the presumption is that the programmer
466     * is trying to check for <I>his users input</I>.  If in the processes of checking for user
467     * error, another mistake is made that would generate an exception, this must thought of as a
468     * more serious error.
469     * 
470     * <BR /><BR />The purpose of the {@code 'possibleTokens'} array is to check that those tokens
471     * match the tokens that are contained by the {@code TagNode's} on the page at index 
472     * {@code this.start}, and {@code this.end}.  If invalid HTML tokens, null tokens, or even
473     * HTML Singleton tokens are passed <B>this exception-check, itself, is flawed!</B>  If there
474     * are problems with this var-args array, this error is thrown.
475     * 
476     * <BR /><BR />It is more serious because it indicates that the programmer has made a mistake
477     * in attempting to check for user-errors.
478     */
479    public void exceptionCheck(Vector<HTMLNode> page, String... possibleTokens)
480    {
481        if (page == null) throw new NullPointerException
482            ("HTML-Vector parameter was passed a null reference.");
483
484        if (possibleTokens == null) throw new ExceptionCheckError
485            ("HTML tags string-list was passed a null reference.");
486
487        for (String token : possibleTokens)
488        {
489            if (token == null) throw new ExceptionCheckError
490                ("One of the HTML Tag's in the tag-list String-array was null.");
491
492            if (! HTMLTags.isTag(token)) throw new ExceptionCheckError
493                ("One of the passed tokens [" + token +"] is not a valid HTML token.");
494
495            if (HTMLTags.isSingleton(token)) throw new ExceptionCheckError
496                ("One of the passed tokens [" + token +"] is an HTML Singleton.");
497        }
498
499
500        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
501        // Check the DotPair.start
502        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
503
504        if (this.start >= page.size()) throw new IndexOutOfBoundsException(
505            "DotPair's 'start' field [" + this.start + "], is greater than or equal to the " +
506            "size of the HTML-Vector [" + page.size() + "]."
507        );
508
509        if (! (page.elementAt(this.start) instanceof TagNode))
510            throw new TagNodeExpectedException(this.start);
511
512        TagNode t1 = (TagNode) page.elementAt(this.start);
513
514        if (t1.isClosing) throw new OpeningTagNodeExpectedException(
515            "The TagNode at index [" + this.start + "] was a closing " +
516            "</" + t1.tok.toUpperCase() + ">, but an opening tag was expected here."
517        );
518
519
520        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
521        // Now Check the DotPair.end
522        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
523
524        if (this.end >= page.size()) throw new IndexOutOfBoundsException(
525            "DotPair's 'end' field [" + this.end + "], is greater than or equal to the " +
526            "size of the HTML-Vector [" + page.size() + "]."
527        );
528
529        if (! (page.elementAt(this.end) instanceof TagNode))
530            throw new TagNodeExpectedException(this.end);
531
532        TagNode t2 = (TagNode) page.elementAt(this.end);
533
534        if (! t2.isClosing) throw new ClosingTagNodeExpectedException(
535            "The TagNode at index [" + this.start + "] was an opening " +
536            "<" + t2.tok.toUpperCase() + ">, but a closing tag was expected here."
537        );
538
539
540        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
541        // Token Check
542        // *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
543
544        if (! t1.tok.equalsIgnoreCase(t2.tok)) throw new HTMLTokException(
545            "The opening TagNode was the [" + t1.tok.toLowerCase() + "] HTML Tag, while the " +
546            "closing Tag was the [" + t2.tok.toLowerCase() + "].  These two tag's must be an " +
547            "opening and closing pair, and therefore must match each-other."
548        );
549
550        for (String possibleToken : possibleTokens)
551            if (possibleToken.equalsIgnoreCase(t1.tok))
552                return;
553
554        String t = t1.tok.toUpperCase();
555
556        throw new HTMLTokException(
557            "The opening and closing tags were: <" + t + ">, and </" + t + ">, but " +
558            "unfortunately this Tag is not included among the list of expected tags:\n" +
559            "    [" + StrCSV.toCSV(possibleTokens, false, false, 60) + "]."
560        );
561    }
562
563    /**
564     * Performs an exception check, using {@code 'this'} instance of {@code DotPair}, and throws
565     * an {@code IndexOutOfBoundsException} if {@code 'this'} contains end-points that do not fit
566     * inside the {@code 'page'} Vector Parameter.
567     * 
568     * @param page Any HTML Page, or subpage.  {@code page.size()} must return a value that is
569     * larger than <B STYLE='color: red;'>BOTH</B> {@link #start}
570     * <B STYLE='color:red;'>AND</B> {@link #end}.
571     * 
572     * @throws IndexOutOfBoundsException A value for {@link #start} or {@link #end} which
573     * are larger than the size of the {@code Vector} parameter {@code 'page'} will cause this
574     * exception throw.
575     */
576    public void exceptionCheck(Vector<HTMLNode> page)
577    {
578        if (this.end >= page.size()) throw new IndexOutOfBoundsException(
579            "The value of this.end [" + this.end + "] is greater than the size of Vector " +
580            "parameter 'page' [" + page.size() + "]"
581        );
582
583        // This is actually unnecessary.  If 'end' is fine, then 'start' must be fine.  If 'end' is
584        // out of bounds, then it is irrelevant whether 'start' is out of bounds.  "They" play with
585        // your brain when you are coding.
586
587        /*
588        if (this.start >= page.size()) throw new IndexOutOfBoundsException(
589            "The value of this.start [" + this.start + "] is greater than the size of Vector " +
590            "parameter 'page' [" + page.size() + "]"
591        );
592        */
593    }
594}