001package Torello.HTML; 002 003import java.util.Vector; 004import java.io.Serializable; 005import java.util.Comparator; 006 007/** 008 * The abstract parent class of all three {@code NodeIndex} classes, {@link TagNodeIndex}, 009 * {@link TextNodeIndex} and {@link CommentNodeIndex}. 010 * 011 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=NODE_INDEX> 012 * 013 * @param <NODE> The class of {@code HTMLNode} represented by this {@code NodeIndex} instance. 014 * @see HTMLNode 015 * @see CommentNodeIndex 016 * @see TagNodeIndex 017 * @see TextNodeIndex 018 */ 019@SuppressWarnings("rawtypes") 020public abstract class NodeIndex<NODE extends HTMLNode> 021 implements CharSequence, Serializable, Cloneable, Replaceable 022{ 023 // ******************************************************************************************** 024 // ******************************************************************************************** 025 // Fields 026 // ******************************************************************************************** 027 // ******************************************************************************************** 028 029 030 /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */ 031 public static final long serialVersionUID = 1; 032 033 /** 034 * An index to a node from a web-page. This index must point to a the exact same node inside 035 * of a vectorized-html page as the node stored in member-field {@code HTMLNode 'n'}. 036 */ 037 public final int index; 038 039 /** 040 * A {@code HTMLNode} from a web-page. This node is supposed to be the same node stored at the 041 * index specified by member-field {@code int 'index'} of some vectorized-html web-page in 042 * memory or on disk. 043 */ 044 public NODE n; 045 046 047 // ******************************************************************************************** 048 // ******************************************************************************************** 049 // Constructor 050 // ******************************************************************************************** 051 // ******************************************************************************************** 052 053 054 /** 055 * a default constructor. This assigns a value to the index field. 056 * 057 * @param index This is the vector-index location of HTMLNode 'n' inside of a vectorized-HTML 058 * web-page. 059 * 060 * <BR /><BR /><B CLASS=JDDescLabel>Stale Data Note:</B> 061 * 062 * <BR />This class is a less commonly used class, rahter than one of the primary data classes. 063 * Instances of this class become 'useless' the moment the {@code Vector} that was used to 064 * instantiate this class is modified, implying that the node 'n' is no longer at 065 * {@code Vector}-index 'index.' 066 * 067 * <BR /><BR />These "NodeIndex" classes are retained, not deprecated due to the fundamental 068 * nature of using the classes of the NodeSearch Package. Data is easily made stale. 069 * Generally, when modifying HTML Vectors, the easiest thing to do is <I><B>to remember to 070 * modify a {@code Vector<HTMLNode>} at specific locations by iterating from the end of 071 * the {@code Vector}, in reverse order, back to the beginning.</I></B> 072 * 073 * <BR /><BR />Such a practice will generally prevent Stale-Data {@code Vector}-Indices from 074 * rearing their ugly head. 075 * 076 * @param n An HTMLNode that needs to be the node found in the underlying vector at 077 * vector-index 'index.' 078 * 079 * @throws IndexOutOfBoundsException if {@code index} is negative, this exception is thrown. 080 * @throws NullPointerException if {@code n} is null. 081 */ 082 protected NodeIndex(int index, NODE n) 083 { 084 this.index = index; 085 this.n = n; 086 087 if (n == null) throw new NullPointerException( 088 "HTMLNode parameter 'n' to this constructor was passed a null value, but this " + 089 "is not allowed here." 090 ); 091 092 if (index < 0) throw new IndexOutOfBoundsException( 093 "Integer parameter 'index' to this constructor was passed a negative value: " + 094 index 095 ); 096 } 097 098 /** 099 * Simple dispatch method helper that switches on the class of input parameter {@code 'n'}. 100 * 101 * @param n Any of the three Java HTML defined {@code HTMLNode} subclasses - {@link TagNode}, 102 * {@link TextNode} or {@code CommentNode} 103 * 104 * @return A {@code NodeIndex} inheriting class that is appropriate to {@code 'n'}. 105 * 106 * @throws IllegalArgumentException If the user has extended class {@code HTMLNode}, and passed 107 * this unrecognized {@code HTMLNode} Type. 108 */ 109 public static final NodeIndex<?> newNodeIndex(int index, HTMLNode n) 110 { 111 Class<?> newNodeClass = n.getClass(); 112 113 if (TagNode.class.isAssignableFrom(newNodeClass)) 114 return new TagNodeIndex(index, (TagNode) n); 115 116 if (TextNode.class.isAssignableFrom(newNodeClass)) 117 return new TextNodeIndex(index, (TextNode) n); 118 119 if (CommentNode.class.isAssignableFrom(newNodeClass)) 120 return new CommentNodeIndex(index, (CommentNode) n); 121 122 throw new IllegalArgumentException 123 ("Parameter 'n' has a Type that is an Unrecognized HTMLNode-SubClass Type"); 124 } 125 126 127 // ******************************************************************************************** 128 // ******************************************************************************************** 129 // java.lang.Object Methods 130 // ******************************************************************************************** 131 // ******************************************************************************************** 132 133 134 /** 135 * Java's {@code public boolean equals(Object o)} requirements. 136 * 137 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 138 * <BR />This method is final, and cannot be modified by sub-classes. 139 * 140 * @param o This may be any Java Object, but only ones of {@code 'this'} type whose 141 * internal-values are identical will bring this method to return true. 142 * 143 * @return {@code TRUE} If {@code 'this'} equals another object {@code HTMLNode}. 144 */ 145 public final boolean equals(Object o) 146 { 147 if (o == null) return false; 148 if (o == this) return true; 149 150 if (! this.getClass().equals(o.getClass())) return false; 151 152 NodeIndex<?> other = (NodeIndex) o; 153 154 return other.n.str.equals(this.n.str) && (other.index == this.index); 155 } 156 157 /** 158 * Java's hash-code requirement. 159 * 160 * @return A hash-code that may be used when storing this node in a java hashed-collection. 161 * The {@link #index} of this {@code NodeIndex} ought to be be a unique hash. 162 */ 163 public int hashCode() { return index; } 164 165 /** 166 * Java's {@code interface Comparable<T>} requirements. This does a very simple comparison 167 * using the vector-index position. 168 * 169 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 170 * <BR />This method is final, and cannot be modified by sub-classes. 171 * 172 * @param ni Any other {@code NodeIndex} to be compared to {@code 'this' NodeIndex} 173 * 174 * @return An integer that fulfils Java's {@code interface Comparable<T> public boolean 175 * compareTo(T t)} method requirements. 176 * 177 * @see #index 178 */ 179 180 /** 181 * This is an "alternative Comparitor" that can be used for sorting instances of this class. 182 * It should work with the {@code Collections.sort(List, Comparator)} method in the standard 183 * JDK package {@code java.util.*;} 184 * 185 * <BR /><BR /><B CLASS=JDDescLabel>Comparitor Heuristic:</B> 186 * 187 * <BR />This version utilizes the standard JDK method {@code String.compareTo(String)}. 188 * 189 * @see HTMLNode#str 190 */ 191 public static final Comparator<TextNodeIndex> comp2 = 192 (TextNodeIndex txni1, TextNodeIndex txni2) -> txni1.n.str.compareTo(txni2.n.str); 193 194 /** 195 * This is an "alternative Comparitor" that can be used for sorting instances of this class. 196 * It should work with the {@code Collections.sort(List, Comparator)} method in the standard 197 * JDK package {@code java.util.*;} 198 * 199 * <BR /><BR /><B CLASS=JDDescLabel>Comparitor Heuristic:</B> 200 * 201 * <BR />This version utilizes the standard JDK method 202 * {@code String.compareToIgnoreCase(String)}. 203 * 204 * @see HTMLNode#str 205 */ 206 public static final Comparator<TextNodeIndex> comp3 = 207 (TextNodeIndex txni1, TextNodeIndex txni2) -> txni1.n.str.compareToIgnoreCase(txni2.n.str); 208 209 210 // ******************************************************************************************** 211 // ******************************************************************************************** 212 // CharSequence Methods 213 // ******************************************************************************************** 214 // ******************************************************************************************** 215 216 217 /** 218 * Returns the char value at the specified index of the {@code public final String str} field 219 * of {@code 'this'} field {@code public final HTMLNode n}. 220 * An index ranges from zero to length() - 1. The first char value of the sequence is at index 221 * zero, the next at index one, and so on, as for array indexing. 222 * 223 * <BR /><BR />If the char value specified by the index is a surrogate, the surrogate value is 224 * returned. 225 * 226 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 227 * <BR />This method is final, and cannot be modified by sub-classes. 228 * 229 * @param index The index of the char value to be returned 230 * 231 * @return The specified char value 232 */ 233 public final char charAt(int index) { return n.str.charAt(index); } 234 235 /** 236 * Returns the length of the {@code public final String str} field of {@code 'this'} field 237 * {@code public final HTMLNode n}. The length is the number of 16-bit chars in the sequence. 238 * 239 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 240 * <BR />This method is final, and cannot be modified by sub-classes. 241 * 242 * @return the number of chars in {@code this.n.str} 243 */ 244 public final int length() { return n.str.length(); } 245 246 /** 247 * Returns a CharSequence that is a subsequence of the {@code public final String str} field of 248 * {@code 'this'} field {@code public final HTMLNode n}. 249 * The subsequence starts with the char value at the specified index and ends with the char 250 * value at index end - 1. The length (in chars) of the returned sequence is end - start, so 251 * if start == end then an empty sequence is returned. 252 * 253 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 254 * <BR />This method is final, and cannot be modified by sub-classes. 255 * 256 * @param start The start index, inclusive 257 * 258 * @param end The end index, exclusive 259 * 260 * @return The specified subsequence 261 */ 262 public final CharSequence subSequence(int start, int end) 263 { return n.str.substring(start, end); } 264 265 /** 266 * Returns the {@code public final String str} field of {@code 'this'} field {@code public 267 * final HTMLNode n}. 268 * 269 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 270 * <BR />This method is final, and cannot be modified by sub-classes. 271 * 272 * @return A string consisting of exactly this sequence of characters. 273 * 274 * @see HTMLNode#str 275 */ 276 public final String toString() 277 { return n.str; } 278 279 280 // ******************************************************************************************** 281 // ******************************************************************************************** 282 // Replaceable Methods 283 // ******************************************************************************************** 284 // ******************************************************************************************** 285 286 287 // THE SHAME: February 2025, An "HTML Bug" was found! 288 // 289 // All of the methods in this class were failing to take into consideration the presence or 290 // absence of the "CURRENT_NODES" Vector. If the user has modified the contents of a 291 // TagNodeIndex, TextNodeIndex or CommentNodeIndex, then the 'this.n' field is no longer the 292 // least bit relevant! 293 // 294 // When a NodeIndex instance is modified, it's 'this.n' field has been supersceded by the 295 // "CURRENT_NODES" Vector! 296 // 297 // DO NOT DOCUMENT - ALL DOCUMENTATION IS "INHERITED" FROM "REPLACEABLE" 298 299 public int originalSize() 300 { return 1; } 301 302 public int currentSize() 303 { return vectorAssigned ? CURRENT_NODES.size() : 1; }; 304 305 306 public int originalLocationStart() { return index; } 307 public int originalLocationEnd() { return index + 1; } 308 309 310 public HTMLNode firstCurrentNode() 311 { 312 if (vectorAssigned) 313 return (this.CURRENT_NODES.size() > 0) ? this.CURRENT_NODES.elementAt(0) : null; 314 else 315 return this.n; 316 } 317 318 public HTMLNode lastCurrentNode() 319 { 320 if (vectorAssigned) 321 { 322 final int S = this.CURRENT_NODES.size(); 323 return (S > 0) ? this.CURRENT_NODES.elementAt(S-1) : null; 324 } 325 else return this.n; 326 } 327 328 private Vector<HTMLNode> CURRENT_NODES = null; 329 private boolean vectorAssigned = false; 330 331 public Vector<HTMLNode> currentNodes() 332 { 333 if (! this.vectorAssigned) 334 { 335 this.CURRENT_NODES = new Vector<>(1); 336 this.CURRENT_NODES.add(n); 337 this.vectorAssigned = true; 338 } 339 340 return CURRENT_NODES; 341 } 342 343 344 public boolean addAllInto(Vector<HTMLNode> fileVec) 345 { 346 if (this.vectorAssigned) return fileVec.addAll(CURRENT_NODES); 347 else return fileVec.add(n); 348 } 349 350 public boolean addAllInto(int index, Vector<HTMLNode> fileVec) 351 { 352 if (this.vectorAssigned) return fileVec.addAll(index, this.CURRENT_NODES); 353 else fileVec.insertElementAt(this.n, index); 354 355 return true; 356 } 357 358 public int update(Vector<HTMLNode> fileVec) 359 { 360 if (! this.vectorAssigned) 361 { 362 fileVec.setElementAt(n, index); 363 return 0; 364 } 365 366 else 367 { 368 ReplaceNodes.r(fileVec, this.index, this.CURRENT_NODES); 369 return this.CURRENT_NODES.size() - 1; 370 } 371 } 372}