001package Torello.HTML; 002 003import Torello.Java.NException; 004 005import java.util.*; 006 007import java.util.stream.IntStream; 008 009public class DPUtil 010{ 011 // ******************************************************************************************** 012 // ******************************************************************************************** 013 // To Vector & Sublist 014 // ******************************************************************************************** 015 // ******************************************************************************************** 016 017 018 /** 019 * This method converts a sublist, represented by a "dotted pair", and converts it into a 020 * {@code Vector} of {@code HTMLNode}. 021 * 022 * <BR /><BR /><B CLASS=JDDescLabel>Retrieval Heuristic:</B> 023 * 024 * <BR />It should be obvious that the parameter @code 'dp'} contains fields named 025 * {@link DotPair#start} and {@link DotPair#end}. These two simply represent the starting and 026 * ending indices into an HTML page {@code Vector}. In this method, the intention is that they 027 * are indices into the HTML-{@code Vector} parameter simply-named {@code 'html'}. 028 * 029 * <BR /><BR />This method cycles through that {@code Vector}, beginning with the field 030 * {@code dp.start} (inclusive) and ending with {@code dp.end} (inclusive, as well). Each 031 * {@code HTMLNode} reference within this sublist is inserted into the {@code Vector} that is 032 * ultimately returned. 033 * 034 * @param html Any Vectorized-HTML Web-Page, or sub-page 035 * 036 * @param dp Any sublist within that HTML page. 037 * 038 * @return A {@code Vector} version of the original sublist that was represented by 039 * passed parameter {@code 'dp'} 040 */ 041 public static Vector<HTMLNode> toVector(Vector<? extends HTMLNode> html, DotPair dp) 042 { 043 Vector<HTMLNode> ret = new Vector<>(); 044 045 // NOTE: Cannot return the result of "subList" because it is linked/mapped to the original 046 // Vector. If changes are made to "subList", those changes will be reflected in the 047 // original HTML Vector. 048 049 ret.addAll(html.subList(dp.start, dp.end + 1)); 050 051 return ret; 052 } 053 054 /** 055 * This will cycle through a "list of sublists" and call the method 056 * {@code toVector(Vector<? extends HTMLNode> html, DotPair dp)} 057 * on each sublist in the input parameter {@code 'sublists'} Those sublists will be collected 058 * into another {@code Vector} and returned. 059 * 060 * @param html Any Vectorized-HTML Web-Page, or sub-page 061 * 062 * @param sublists A "List of sublists" within that HTML page. 063 * 064 * @return This method shall return a {@code Vector} containing vectors as sublists. 065 */ 066 public static Vector<Vector<HTMLNode>> toVectors 067 (Vector<? extends HTMLNode> html, Iterable<DotPair> sublists) 068 { 069 Vector<Vector<HTMLNode>> ret = new Vector<>(); 070 071 for (DotPair sublist : sublists) ret.addElement(toVector(html, sublist)); 072 073 return ret; 074 } 075 076 /** 077 * This will cycle through a "list of sublists" and call the method 078 * {@code toVector(Vector<? extends HTMLNode> html, DotPair dp) } 079 * on each sublist in the input parameter {@code 'sublists'}. Those sublists will be 080 * collected into another {@code Vector} and returned. 081 * 082 * @param html Any Vectorized-HTML Web-Page, or sub-page 083 * 084 * @param sublists A "List of sublists" within that HTML page. 085 * 086 * @return This method shall return a {@code Vector} containing vectors as sublists. 087 */ 088 public static Vector<SubSection> toSubSections(Vector<? extends HTMLNode> html, Iterable<DotPair> sublists) 089 { 090 Vector<SubSection> ret = new Vector<>(); 091 092 for (DotPair sublist : sublists) 093 ret.addElement(new SubSection(sublist, toVector(html, sublist))); 094 095 return ret; 096 } 097 098 099 // ******************************************************************************************** 100 // ******************************************************************************************** 101 // List / Iterable of DotPair's to a Stream, Iterator, Array 102 // ******************************************************************************************** 103 // ******************************************************************************************** 104 105 106 /** 107 * Convenience Method. 108 * <BR />Invokes: {@link #toStream(Iterable, boolean)} 109 * <BR />Converts output to an {@code Iterator} 110 */ 111 public static PrimitiveIterator.OfInt iterator(Iterable<DotPair> dpi, boolean leastToGreatest) 112 { return toStream(dpi, leastToGreatest).iterator(); } 113 114 /** 115 * Convenience Method. 116 * <BR />Invokes: {@link #toStream(Iterable, boolean)} 117 * <BR />Converts output to an {@code int[]} array. 118 */ 119 public static int[] toPosArray(Iterable<DotPair> dpi, boolean leastToGreatest) 120 { return toStream(dpi, leastToGreatest).toArray(); } 121 122 /** 123 * This method will convert a list of {@code DotPair} instances to a Java 124 * {@code java.util.stream.IntStream}. The generated {@code IntStream} shall contain all 125 * {@code Vector}-indices (integers) that are within the bounds of any of the {@code DotPair's} 126 * listed by parameter {@code 'dpi'}. 127 * 128 * <BR /><BR />Stating this a second time, the returned position-index list (which is of Java 129 * Type {@code IntStream}) is built out of the contents of each and every one of the the 130 * {@code DotPair's} in {@code Iterable}-parameter {@code 'dpi'}. This index-list which is 131 * ultimately returned from this method <I>will contain all indices that are "inside" all 132 * {@coded DotPair's}</I>. 133 * 134 * <BR /><BR /><B CLASS=JDDescLabel>Repeated Indices & Gaps:</B> 135 * 136 * <BR />This method will never include an index more than once, and all integers in the 137 * returned {@code IntStream} will be unique. 138 * 139 * <BR /><BR />The sublists (the {@code DotPair's} of input-parameter {@code 'dpi'}) may 140 * possibly (or even 'likely') overlap each-other. Furthermore, other {@code DotPair} / 141 * sublists may have several gaps between them. This method shall return an {@code IntStream} 142 * of unique, integer indices all of which are guaranteed to be 143 * <B STYLE='color: red;'><I>inside</I></B> at least one of {@code dpi's} sublists. 144 * 145 * <BR /><BR /><B CLASS=JDDescLabel>NodeSearch Find 'all' Methods:</B> 146 * 147 * <BR />Many of the "Find" Methods available in the {@code Torello.HTML.NodeSearch} package 148 * return instances of {@code Vector<DotPair>}. These {@code DotPair} lists are to be 149 * thought-of as "lists of sub-lists" for a Vectorized-HTML web-page. This method can help 150 * identify each and every integer-index that is <I>inside at least one of these returned 151 * sublists</I>. 152 * 153 * <BR /><BR /><B CLASS=JDDescLabel>Stale Data Reminder:</B> 154 * 155 * <BR />Try to keep in mind, always, that when writing code that modifies Vectorized-HTML, 156 * <I>the moment any node is inserted or deleted into a {@code Vector}</I> all references / 157 * indices to that exact {@code Vector} will become invalid (unless special care is taken to 158 * update those indices by the number of nodes that were inserted or removed!) 159 * 160 * <BR /><BR />There are myriad ways to handle this issue, many of which are beyond the scope 161 * of this Documentation Entry. Generally, one of the better suggestions to keep in mind, is 162 * that if you are modifying a Vectorized-HTML page, perform your updates or removals <I>in 163 * reverse order</I>, and your {@code Vector} index-list pointers <I>will never</I> become 164 * stale pointers. 165 * 166 * <BR /><BR />The interface {@link Replaceable} is also another way to avoid making elementary 167 * mistakes involving stale {@code Vector}-indices. 168 * 169 * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements 170 * the public interface {@code java.lang.Iterable}. 171 * 172 * @param leastToGreatest When this parameter receives a {@code TRUE} value, the results that 173 * are returned from this {@code IntStream} will be sorted least to greatest. To generated an 174 * {@code IntStream} that produces results that are sorted from greatest to least, pass 175 * {@code FALSE} to this parameter. 176 * 177 * @return A java {@code java.util.stream.IntStream} of the integers in that are members of 178 * this {@code Iterable<DotPair>} 179 */ 180 public static IntStream toStream(Iterable<DotPair> dpi, boolean leastToGreatest) 181 { 182 Iterator<DotPair> iter = dpi.iterator(); 183 TreeSet<DotPair> ts = new TreeSet<>(); 184 185 // The tree-set will add the "DotPair" to the tree - and keep them sorted, 186 // since that's what "TreeSet" does. 187 188 while (iter.hasNext()) ts.add(iter.next()); 189 190 Iterator<DotPair> tsIter = leastToGreatest ? ts.iterator() : ts.descendingIterator(); 191 IntStream.Builder builder = IntStream.builder(); 192 DotPair dp = null; 193 194 if (leastToGreatest) 195 196 // We are building a "forward-index" stream... DO AS MUCH SORTING... AS POSSIBLE! 197 while (tsIter.hasNext()) 198 for (int i=(dp=tsIter.next()).start; i <= dp.end; i++) 199 builder.add(i); 200 201 else 202 203 // we are building a "reverse-index" stream... Make sure to add the sub-lists in 204 // reverse-order. 205 206 while (tsIter.hasNext()) 207 for (int i=(dp=tsIter.next()).end; i >= dp.start; i--) 208 builder.add(i); 209 210 if (leastToGreatest) 211 212 // We have added them in order (mostly!!) - VERY-TRICKY, and this is the whole point... 213 // MULTIPLE, OVERLAPPING DOTPAIRS 214 // We need to sort because the DotPair sublists have been added in "sorted order" but 215 // the overall list is not (necessarily, but possibly) sorted! 216 217 return builder.build().sorted().distinct(); 218 219 else 220 221 // Here, the exact same argument holds, but also, when "re-sorting" we have to futz 222 // around with the fact that Java's 'IntStream' class does not have a specialized 223 // reverse-sort() (or alternate-sort()) method... (Kind of another JDK bug). 224 225 return builder.build().map(i -> -i).sorted().map(i -> -i).distinct(); 226 } 227 228 229 // ******************************************************************************************** 230 // ******************************************************************************************** 231 // List / Iterable of DotPair's into an "End-Poinnts" Stream, Iterator, Array 232 // ******************************************************************************************** 233 // ******************************************************************************************** 234 235 236 /** 237 * Convenience Method. 238 * <BR />Invokes: {@link #endPointsToStream(Iterable, boolean)} 239 * <BR />Converts: output to an {@code Iterator} 240 */ 241 public static PrimitiveIterator.OfInt endPointsIterator 242 (Iterable<DotPair> dpi, boolean leastToGreatest) 243 { return endPointsToStream(dpi, leastToGreatest).iterator(); } 244 245 /** 246 * Convenience Method. 247 * <BR />Invokes: {@link #endPointsToStream(Iterable, boolean)} 248 * <BR />Converts: output to an {@code int[]} array. 249 */ 250 public static int[] endPointsToPosArray(Iterable<DotPair> dpi, boolean leastToGreatest) 251 { return endPointsToStream(dpi, leastToGreatest).toArray(); } 252 253 /** 254 * Collates a list of dotted-pairs into an {@code IntStream}. Here, only the starting and 255 * ending values of the {@code DotPair's} are inserted into the returned {@code IntStream}. 256 * Any indices that lay between {@code DotPair.start} and {@code DotPair.end} <I>are not 257 * placed</I> into the output-{@code IntStream}. 258 * 259 * <BR /><BR />All other behaviors of this method are the same as 260 * {@link #toStream(Iterable, boolean)}. 261 * 262 * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements 263 * the public interface {@code java.lang.Iterable}. 264 * 265 * @param leastToGreatest When this parameter receives a {@code TRUE} value, the results that 266 * are returned from this {@code IntStream} will be sorted least to greatest. To generated an 267 * {@code IntStream} that produces results that are sorted from greatest to least, pass 268 * {@code FALSE} to this parameter. 269 * 270 * @return A java {@code java.util.stream.IntStream} of the integers in that are members of 271 * this {@code Iterable<DotPair>}. Only the values {@code DotPair.start}, and 272 * {@code DotPair.end} are included in the output-{@code IntStream}. This is unlike the method 273 * {@link #toStream(Iterable, boolean)} in that, here, only the starting and ending points of 274 * the dotted-pair are placed into result. In the other method, the start-index, end-index 275 * <I>and all indices in between them</I> are placed into the returned-{@code Stream}. 276 * 277 * @see #toStream(Iterable, boolean) 278 */ 279 public static IntStream endPointsToStream(Iterable<DotPair> dpi, boolean leastToGreatest) 280 { 281 Iterator<DotPair> iter = dpi.iterator(); 282 TreeSet<DotPair> ts = new TreeSet<>(); 283 284 // The tree-set will add the "DotPair" to the tree - and keep them sorted, 285 // since that's what "TreeSet" does. 286 287 while (iter.hasNext()) ts.add(iter.next()); 288 289 Iterator<DotPair> tsIter = leastToGreatest ? ts.iterator() : ts.descendingIterator(); 290 IntStream.Builder builder = IntStream.builder(); 291 DotPair dp = null; 292 293 if (leastToGreatest) 294 295 // We are building a "forward-index" stream... DO AS MUCH SORTING... AS POSSIBLE! 296 while (tsIter.hasNext()) 297 { 298 dp = tsIter.next(); 299 300 // In this method, only the start/end are placed into the IntStream 301 builder.add(dp.start); 302 303 // The indices BETWEEN start/end ARE NOT appened to the IntStream 304 builder.add(dp.end); 305 } 306 307 else 308 309 // we are building a "reverse-index" stream... Make sure to add the sub-lists in 310 // reverse-order. 311 312 while (tsIter.hasNext()) 313 { 314 dp = tsIter.next(); 315 316 // Only start/end are appended. 317 builder.add(dp.end); 318 319 // NOTE: This is a "reverse order" IntStream 320 builder.add(dp.start); 321 } 322 323 if (leastToGreatest) 324 325 // We have added them in order (mostly!!) - VERY-TRICKY, and this is the whole point... 326 // MULTIPLE, OVERLAPPING DOTPAIRS 327 // We need to sort because the DotPair sublists have been added in "sorted order" but 328 // the overall list is not (necessarily, but possibly) sorted! 329 330 return builder.build().sorted().distinct(); 331 332 else 333 334 // Here, the exact same argument holds, but also, when "re-sorting" we have to futz 335 // around with the fact that Java's 'IntStream' class does not have a specialized 336 // reverse-sort() (or alternate-sort()) method... (Kind of another JDK bug). 337 338 return builder.build().map(i -> -i).sorted().map(i -> -i).distinct(); 339 } 340 341 342 // ******************************************************************************************** 343 // ******************************************************************************************** 344 // Mirror-Inverse Collated Stream, Iterator, Array 345 // ******************************************************************************************** 346 // ******************************************************************************************** 347 348 349 /** 350 * Convenience Method. 351 * <BR />Invokes: {@link #excludedToStream(Iterable, int, boolean)} 352 * <BR />Converts: output to an {@code Iterator} 353 */ 354 public static PrimitiveIterator.OfInt excludedToIterator 355 (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest) 356 { return excludedToStream(dpi, vectorSize, leastToGreatest).iterator(); } 357 358 /** 359 * Convenience Method. 360 * <BR />Invokes: {@link #excludedToStream(Iterable, int, boolean)} 361 * <BR />Converts: output to an {@code int[]} array. 362 */ 363 public static int[] excludedToPosArray 364 (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest) 365 { return excludedToStream(dpi, vectorSize, leastToGreatest).toArray(); } 366 367 /** 368 * This method will first collate and sort a list of input {@code 'DotPair'} instances. 369 * 370 * @param dpi This may be any source for a {@code class 'Dotpair'} instance which implements 371 * the public interface {@code java.lang.Iterable}. 372 * 373 * @param vectorSize This method internal-loop will begin at index {@code '0'} and proceed 374 * until {@code 'vectorSize' - 1}. Any value in this range that is found not be inside any of 375 * the provided {@code DotPair's} will be included inside the returned {@code IntStream}. 376 * 377 * @param leastToGreatest When this parameter receives a {@code TRUE} value, the results that 378 * are returned from this {@code IntStream} will be sorted least to greatest. To generated an 379 * {@code IntStream} that produces results that are sorted from greatest to least, pass 380 * {@code FALSE} to this parameter. 381 * 382 * @return The list of ints 383 */ 384 public static IntStream excludedToStream 385 (Iterable<DotPair> dpi, int vectorSize, boolean leastToGreatest) 386 { 387 Iterator<DotPair> iter = dpi.iterator(); 388 TreeSet<DotPair> ts = new TreeSet<>(); 389 390 if (vectorSize < 1) throw new NException( 391 "You have passed " + vectorSize + " to parameter vectorSize, but this value must be " + 392 "at least 1, or greater." 393 ); 394 395 // All this is going to do is MAKE SURE that the DotPair's are ordered, least to greatest 396 while (iter.hasNext()) ts.add(iter.next()); 397 398 iter = leastToGreatest ? ts.iterator() : ts.descendingIterator(); 399 400 DotPair dp = iter.hasNext() ? iter.next() : null; 401 IntStream.Builder b = IntStream.builder(); 402 403 if (leastToGreatest) 404 405 for (int i=0; i < vectorSize;) 406 407 if (dp == null) b.accept(i++); 408 else if (i < dp.start) b.accept(i++); 409 else if (dp.isInside(i)) { i++; continue; } 410 else if (iter.hasNext()) Objects.requireNonNull(dp = iter.next()); 411 else dp = null; 412 413 else 414 415 for (int i=(vectorSize-1); i >= 0;) 416 417 if (dp == null) b.accept(i--); 418 else if (i < dp.start) b.accept(i--); 419 else if (dp.isInside(i)) { i--; continue; } 420 else if (iter.hasNext()) Objects.requireNonNull(dp = iter.next()); 421 else dp = null; 422 423 return b.build(); 424 } 425}