001package Torello.HTML;
002
003import Torello.Java.NException;
004
005import java.util.*;
006
007import java.util.stream.IntStream;
008
009public class DPUtil
010{
011    // ********************************************************************************************
012    // ********************************************************************************************
013    // To Vector & Sublist
014    // ********************************************************************************************
015    // ********************************************************************************************
016
017
018    /**
019     * This method converts a sublist, represented by a "dotted pair", and converts it into a
020     * {@code Vector} of {@code HTMLNode}.
021     * 
022     * <BR /><BR /><B CLASS=JDDescLabel>Retrieval Heuristic:</B>
023     * 
024     * <BR />It should be obvious that the parameter @code 'dp'} contains fields named
025     * {@link DotPair#start} and {@link DotPair#end}.  These two simply represent the starting and
026     * ending indices into an HTML page {@code Vector}.  In this method, the intention is that they
027     * are indices into the HTML-{@code Vector} parameter simply-named {@code 'html'}.
028     * 
029     * <BR /><BR />This method cycles through that {@code Vector}, beginning with the field
030     * {@code dp.start} (inclusive) and ending with {@code dp.end} (inclusive, as well).  Each
031     * {@code HTMLNode} reference within this sublist is inserted into the {@code Vector} that is 
032     * ultimately returned.
033     * 
034     * @param html Any Vectorized-HTML Web-Page, or sub-page
035     * 
036     * @param dp Any sublist within that HTML page.
037     * 
038     * @return A {@code Vector} version of the original sublist that was represented by 
039     * passed parameter {@code 'dp'}
040     */
041    public static Vector<HTMLNode> toVector(Vector<? extends HTMLNode> html, DotPair dp)
042    {
043        Vector<HTMLNode> ret = new Vector<>();
044
045        // NOTE: Cannot return the result of "subList" because it is linked/mapped to the original
046        //       Vector.  If changes are made to "subList", those changes will be reflected in the
047        //       original HTML Vector.
048
049        ret.addAll(html.subList(dp.start, dp.end + 1));
050
051        return ret;
052    }
053
054    /**
055     * This will cycle through a "list of sublists" and call the method
056     * {@code toVector(Vector<? extends HTMLNode> html, DotPair dp)}
057     * on each sublist in the input parameter {@code 'sublists'}  Those sublists will be collected
058     * into another {@code Vector} and returned.
059     * 
060     * @param html Any Vectorized-HTML Web-Page, or sub-page
061     * 
062     * @param sublists A "List of sublists" within that HTML page.
063     * 
064     * @return This method shall return a {@code Vector} containing vectors as sublists.
065     */
066    public static Vector<Vector<HTMLNode>> toVectors
067        (Vector<? extends HTMLNode> html, Iterable<DotPair> sublists)
068    {
069        Vector<Vector<HTMLNode>> ret = new Vector<>();
070
071        for (DotPair sublist : sublists) ret.addElement(toVector(html, sublist));
072
073        return ret;
074    }
075
076    /**
077     * This will cycle through a "list of sublists" and call the method
078     * {@code toVector(Vector<? extends HTMLNode> html, DotPair dp) }
079     * on each sublist in the input parameter {@code 'sublists'}.  Those sublists will be
080     * collected into another {@code Vector} and returned.
081     * 
082     * @param html Any Vectorized-HTML Web-Page, or sub-page
083     * 
084     * @param sublists A "List of sublists" within that HTML page.
085     * 
086     * @return This method shall return a {@code Vector} containing vectors as sublists.
087     */
088    public static Vector<SubSection> toSubSections(Vector<? extends HTMLNode> html, Iterable<DotPair> sublists)
089    {
090        Vector<SubSection> ret = new Vector<>();
091
092        for (DotPair sublist : sublists)
093            ret.addElement(new SubSection(sublist, toVector(html, sublist)));
094
095        return ret;
096    }
097
098
099    // ********************************************************************************************
100    // ********************************************************************************************
101    // List / Iterable of DotPair's to a Stream, Iterator, Array
102    // ********************************************************************************************
103    // ********************************************************************************************
104
105
106    /**
107     * Convenience Method.
108     * <BR />Invokes: {@link #toStream(Iterable, boolean)}
109     * <BR />Converts output to an {@code Iterator}
110     */
111    public static PrimitiveIterator.OfInt iterator(Iterable<DotPair> dpi, boolean leastToGreatest)
112    { return toStream(dpi, leastToGreatest).iterator(); }
113
114    /**
115     * Convenience Method.
116     * <BR />Invokes: {@link #toStream(Iterable, boolean)}
117     * <BR />Converts output to an {@code int[]} array.
118     */
119    public static int[] toPosArray(Iterable<DotPair> dpi, boolean leastToGreatest)
120    { return toStream(dpi, leastToGreatest).toArray(); }
121
122    /**
123     * This method will convert a list of {@code DotPair} instances to a Java
124     * {@code java.util.stream.IntStream}.  The generated {@code IntStream} shall contain all
125     * {@code Vector}-indices (integers) that are within the bounds of any of the {@code DotPair's}
126     * listed by parameter {@code 'dpi'}.
127     * 
128     * <BR /><BR />Stating this a second time, the returned position-index list (which is of Java
129     * Type {@code IntStream}) is built out of the contents of each and every one of the the
130     * {@code DotPair's} in {@code Iterable}-parameter {@code 'dpi'}.  This index-list which is
131     * ultimately returned from this method <I>will contain all indices that are "inside" all
132     * {@coded DotPair's}</I>.
133     * 
134     * <BR /><BR /><B CLASS=JDDescLabel>Repeated Indices &amp; Gaps:</B>
135     * 
136     * <BR />This method will never include an index more than once, and all integers in the
137     * returned {@code IntStream} will be unique.
138     *
139     * <BR /><BR />The sublists (the {@code DotPair's} of input-parameter {@code 'dpi'}) may
140     * possibly (or even 'likely') overlap each-other.  Furthermore, other {@code DotPair} /
141     * sublists may have several gaps between them.  This method shall return an {@code IntStream}
142     * of unique, integer indices all of which are guaranteed to be
143     * <B STYLE='color: red;'><I>inside</I></B> at least one of {@code dpi's} sublists.
144     * 
145     * <BR /><BR /><B CLASS=JDDescLabel>NodeSearch Find 'all' Methods:</B>
146     * 
147     * <BR />Many of the "Find" Methods available in the {@code Torello.HTML.NodeSearch} package
148     * return instances of {@code Vector<DotPair>}.  These {@code DotPair} lists are to be
149     * thought-of as "lists of sub-lists" for a Vectorized-HTML web-page.  This method can help
150     * identify each and every integer-index that is <I>inside at least one of these returned
151     * sublists</I>.
152     *
153     * <BR /><BR /><B CLASS=JDDescLabel>Stale Data Reminder:</B>
154     * 
155     * <BR />Try to keep in mind, always, that when writing code that modifies Vectorized-HTML,
156     * <I>the moment any node is inserted or deleted into a {@code Vector}</I> all references /
157     * indices to that exact {@code Vector} will become invalid (unless special care is taken to
158     * update those indices by the number of nodes that were inserted or removed!)
159     * 
160     * <BR /><BR />There are myriad ways to handle this issue, many of which are beyond the scope
161     * of this Documentation Entry.  Generally, one of the better suggestions to keep in mind, is
162     * that if you are modifying a Vectorized-HTML page, perform your updates or removals <I>in
163     * reverse order</I>, and your {@code Vector} index-list pointers <I>will never</I> become
164     * stale pointers.
165     * 
166     * <BR /><BR />The interface {@link Replaceable} is also another way to avoid making elementary
167     * mistakes involving stale {@code Vector}-indices.
168     *
169     * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements
170     * the public interface {@code java.lang.Iterable}.
171     * 
172     * @param leastToGreatest  When this parameter receives a {@code TRUE} value, the results that
173     * are returned from this {@code IntStream} will be sorted least to greatest.  To generated an
174     * {@code IntStream} that produces results that are sorted from greatest to least, pass
175     * {@code FALSE} to this parameter.
176     * 
177     * @return A java {@code java.util.stream.IntStream} of the integers in that are members of
178     * this {@code Iterable<DotPair>}
179     */
180    public static IntStream toStream(Iterable<DotPair> dpi, boolean leastToGreatest)
181    {
182        Iterator<DotPair>   iter    = dpi.iterator();
183        TreeSet<DotPair>    ts      = new TreeSet<>();
184
185        // The tree-set will add the "DotPair" to the tree - and keep them sorted,
186        // since that's what "TreeSet" does.
187
188        while (iter.hasNext()) ts.add(iter.next());
189
190        Iterator<DotPair>   tsIter  = leastToGreatest ? ts.iterator() : ts.descendingIterator();
191        IntStream.Builder   builder = IntStream.builder();
192        DotPair             dp      = null;
193
194        if (leastToGreatest)
195
196            // We are building a "forward-index" stream... DO AS MUCH SORTING... AS POSSIBLE!
197            while (tsIter.hasNext())
198                for (int i=(dp=tsIter.next()).start; i <= dp.end; i++)
199                    builder.add(i);
200
201        else
202
203            // we are building a "reverse-index" stream... Make sure to add the sub-lists in
204            // reverse-order.
205
206            while (tsIter.hasNext())
207                for (int i=(dp=tsIter.next()).end; i >= dp.start; i--)
208                    builder.add(i);
209
210        if (leastToGreatest)
211
212            // We have added them in order (mostly!!) - VERY-TRICKY, and this is the whole point... 
213            // MULTIPLE, OVERLAPPING DOTPAIRS
214            // We need to sort because the DotPair sublists have been added in "sorted order" but
215            // the overall list is not (necessarily, but possibly) sorted!
216
217            return builder.build().sorted().distinct();
218
219        else
220
221            // Here, the exact same argument holds, but also, when "re-sorting" we have to futz
222            // around with the fact that Java's 'IntStream' class does not have a specialized
223            // reverse-sort() (or alternate-sort()) method... (Kind of another JDK bug).
224
225            return builder.build().map(i -> -i).sorted().map(i -> -i).distinct();
226    }
227
228
229    // ********************************************************************************************
230    // ********************************************************************************************
231    // List / Iterable of DotPair's  into an "End-Poinnts" Stream, Iterator, Array
232    // ********************************************************************************************
233    // ********************************************************************************************
234
235
236    /**
237     * Convenience Method.
238     * <BR />Invokes: {@link #endPointsToStream(Iterable, boolean)}
239     * <BR />Converts: output to an {@code Iterator}
240     */
241    public static PrimitiveIterator.OfInt endPointsIterator
242        (Iterable<DotPair> dpi, boolean leastToGreatest)
243    { return endPointsToStream(dpi, leastToGreatest).iterator(); }
244
245    /**
246     * Convenience Method.
247     * <BR />Invokes: {@link #endPointsToStream(Iterable, boolean)}
248     * <BR />Converts: output to an {@code int[]} array.
249     */
250    public static int[] endPointsToPosArray(Iterable<DotPair> dpi, boolean leastToGreatest)
251    { return endPointsToStream(dpi, leastToGreatest).toArray(); }
252
253    /**
254     * Collates a list of dotted-pairs into an {@code IntStream}.  Here, only the starting and
255     * ending values of the {@code DotPair's} are inserted into the returned {@code IntStream}.
256     * Any indices that lay between {@code DotPair.start} and {@code DotPair.end} <I>are not
257     * placed</I> into the output-{@code IntStream}.
258     * 
259     * <BR /><BR />All other behaviors of this method are the same as
260     * {@link #toStream(Iterable, boolean)}.
261     * 
262     * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements
263     * the public interface {@code java.lang.Iterable}.
264     * 
265     * @param leastToGreatest  When this parameter receives a {@code TRUE} value, the results that
266     * are returned from this {@code IntStream} will be sorted least to greatest.  To generated an
267     * {@code IntStream} that produces results that are sorted from greatest to least, pass
268     * {@code FALSE} to this parameter.
269     * 
270     * @return A java {@code java.util.stream.IntStream} of the integers in that are members of
271     * this {@code Iterable<DotPair>}.  Only the values {@code DotPair.start}, and
272     * {@code DotPair.end} are included in the output-{@code IntStream}.  This is unlike the method
273     * {@link #toStream(Iterable, boolean)} in that, here, only the starting and ending points of
274     * the dotted-pair are placed into result.  In the other method, the start-index, end-index 
275     * <I>and all indices in between them</I> are placed into the returned-{@code Stream}.
276     * 
277     * @see #toStream(Iterable, boolean)
278     */
279    public static IntStream endPointsToStream(Iterable<DotPair> dpi, boolean leastToGreatest)
280    {
281        Iterator<DotPair>   iter    = dpi.iterator();
282        TreeSet<DotPair>    ts      = new TreeSet<>();
283
284        // The tree-set will add the "DotPair" to the tree - and keep them sorted,
285        // since that's what "TreeSet" does.
286
287        while (iter.hasNext()) ts.add(iter.next());
288
289        Iterator<DotPair>   tsIter  = leastToGreatest ? ts.iterator() : ts.descendingIterator();
290        IntStream.Builder   builder = IntStream.builder();
291        DotPair             dp      = null;
292
293        if (leastToGreatest)
294
295            // We are building a "forward-index" stream... DO AS MUCH SORTING... AS POSSIBLE!
296            while (tsIter.hasNext())
297            {
298                dp = tsIter.next();
299
300                // In this method, only the start/end are placed into the IntStream
301                builder.add(dp.start);
302
303                // The indices BETWEEN start/end ARE NOT appened to the IntStream
304                builder.add(dp.end);
305            }
306
307        else
308
309            // we are building a "reverse-index" stream... Make sure to add the sub-lists in
310            // reverse-order.
311
312            while (tsIter.hasNext())
313            {
314                dp = tsIter.next();
315
316                // Only start/end are appended.
317                builder.add(dp.end);
318
319                // NOTE: This is a "reverse order" IntStream
320                builder.add(dp.start);
321            }
322
323        if (leastToGreatest)
324
325            // We have added them in order (mostly!!) - VERY-TRICKY, and this is the whole point...
326            // MULTIPLE, OVERLAPPING DOTPAIRS
327            // We need to sort because the DotPair sublists have been added in "sorted order" but
328            // the overall list is not (necessarily, but possibly) sorted!
329
330            return builder.build().sorted().distinct();
331
332        else
333
334            // Here, the exact same argument holds, but also, when "re-sorting" we have to futz
335            // around with the fact that Java's 'IntStream' class does not have a specialized
336            // reverse-sort() (or alternate-sort()) method... (Kind of another JDK bug).
337
338            return builder.build().map(i -> -i).sorted().map(i -> -i).distinct();
339    }
340
341
342    // ********************************************************************************************
343    // ********************************************************************************************
344    // Mirror-Inverse Collated Stream, Iterator, Array
345    // ********************************************************************************************
346    // ********************************************************************************************
347
348
349    /**
350     * Convenience Method.
351     * <BR />Invokes: {@link #excludedToStream(Iterable, int, boolean)}
352     * <BR />Converts: output to an {@code Iterator}
353     */
354    public static PrimitiveIterator.OfInt excludedToIterator
355        (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest)
356    { return excludedToStream(dpi, vectorSize, leastToGreatest).iterator(); }
357
358    /**
359     * Convenience Method.
360     * <BR />Invokes: {@link #excludedToStream(Iterable, int, boolean)}
361     * <BR />Converts: output to an {@code int[]} array.
362     */
363    public static int[] excludedToPosArray
364        (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest)
365    { return excludedToStream(dpi, vectorSize, leastToGreatest).toArray(); }
366
367    /**
368     * This method will first collate and sort a list of input {@code 'DotPair'} instances.
369     * 
370     * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements
371     * the public interface {@code java.lang.Iterable}.
372     * 
373     * @param vectorSize This method internal-loop will begin at index {@code '0'} and proceed 
374     * until {@code 'vectorSize' - 1}.  Any value in this range that is found not be inside any of
375     * the provided {@code DotPair's} will be included inside the returned {@code IntStream}.
376     * 
377     * @param leastToGreatest  When this parameter receives a {@code TRUE} value, the results that
378     * are returned from this {@code IntStream} will be sorted least to greatest.  To generated an
379     * {@code IntStream} that produces results that are sorted from greatest to least, pass
380     * {@code FALSE} to this parameter.
381     * 
382     * @return The list of ints
383     */
384    public static IntStream excludedToStream
385        (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest)
386    {
387        Iterator<DotPair>   iter    = dpi.iterator();
388        TreeSet<DotPair>    ts      = new TreeSet<>();
389
390        if (vectorSize < 1) throw new NException(
391            "You have passed " + vectorSize + " to parameter vectorSize, but this value must be " +
392            "at least 1, or greater."
393        );
394
395        // All this is going to do is MAKE SURE that the DotPair's are ordered, least to greatest
396        while (iter.hasNext()) ts.add(iter.next());
397
398        iter = leastToGreatest ? ts.iterator() : ts.descendingIterator();
399
400        DotPair             dp  = iter.hasNext() ? iter.next() : null;
401        IntStream.Builder   b   = IntStream.builder();
402
403        if (leastToGreatest)
404
405            for (int i=0; i < vectorSize;)
406
407                if (dp == null)             b.accept(i++);
408                else if (i < dp.start)      b.accept(i++);
409                else if (dp.isInside(i))    { i++; continue; }
410                else if (iter.hasNext())    Objects.requireNonNull(dp = iter.next());
411                else                        dp = null;
412
413        else 
414
415            for (int i=(vectorSize-1); i >= 0;)
416
417                if (dp == null)             b.accept(i--);
418                else if (i < dp.start)      b.accept(i--);
419                else if (dp.isInside(i))    { i--; continue; }
420                else if (iter.hasNext())    Objects.requireNonNull(dp = iter.next());
421                else                        dp = null;
422
423        return b.build();
424    }
425}