001package Torello.HTML; 002 003import java.util.Vector; 004import java.util.Comparator; 005 006/** 007 * Allows the NodeSearch Package to simultaneously return both an HTML-{@code Vector} sublist, and 008 * the location where that sub-list was located (as an instance of {@link DotPair}) where that 009 * sublist was located. 010 * 011 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=SUB_SECTION> 012 * <EMBED CLASS='external-html' DATA-FILE-ID=IMPLEMENTS_REPLACE> 013 * 014 * @see Torello.HTML.HTMLNode 015 * @see Torello.HTML.DotPair 016 * @see Torello.HTML.NodeIndex 017 * @see Torello.HTML.NodeSearch.TagNodePeekInclusive 018 * @see Torello.HTML.NodeSearch.InnerTagPeekInclusive 019 */ 020public class SubSection 021 implements CharSequence, java.io.Serializable, Cloneable, Replaceable 022{ 023 // ******************************************************************************************** 024 // ******************************************************************************************** 025 // Fields 026 // ******************************************************************************************** 027 // ******************************************************************************************** 028 029 030 /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */ 031 public static final long serialVersionUID = 1; 032 033 /** 034 * This public field identifies the sub-section location of a particular sub-section from a 035 * vectorized-html webpage. The location of the sub-page is specified by the 036 * {@code class DotPair} public-fields: 037 * {@code public final int 'start'} and {@code public final int 'end'} 038 * 039 * @see DotPair 040 */ 041 public final DotPair location; 042 043 /** 044 * This public field identifies the actual nodes - <I><B>the vectorized-html sub-list</B></I> - 045 * that are included in this sub-section of an html web-page. 046 * 047 * @see HTMLNode 048 */ 049 public Vector<HTMLNode> html; 050 051 052 // ******************************************************************************************** 053 // ******************************************************************************************** 054 // Constructor 055 // ******************************************************************************************** 056 // ******************************************************************************************** 057 058 059 /** 060 * This just builds a new instance of this class. It represents a 'sub-section' of the 061 * html-page that needs to encapsulated into an object-instance. The contents of this 062 * data-structure are merely these two parameters that are passed to this constructor. 063 * 064 * @param location This parameter value will be assigned immediately to the internal-field 065 * {@code public DotPair location.} It is a two-integer {@code Vector}-index class that points 066 * to the starting index-position, inside the main html-{@code Vector}, <I>of the html 067 * {@code class 'SubSection'}</I> being constructed here. 068 * 069 * @param html This parameter may be any vectorized-html web-page, but 070 * <SPAN STYLE="color: red;"><B><I>the intention is that this {@code Vector} is an exact 071 * "cloned range" (a copy of a portion of the web-page) whose starting and ending integer index 072 * {@code Vector}-positions are demarcated by the contents of the parameter 073 * {@code 'location'}</B></I></SPAN> 074 * 075 * @throws IllegalArgumentException This exception will throw if either of these two scenarios 076 * occur: 077 * 078 * <BR /><BR /><UL CLASS=JDUL> 079 * <LI>If the input {@code Vector<HTMLNode> 'html'} has {@code html.size() == 0}.</LI> 080 * <LI>If {@code html.size() != location.size()}</LI> 081 * </UL> 082 * 083 * @see SubSection#location 084 * @see SubSection#html 085 * @see DotPair 086 * @see NodeIndex 087 * @see HTMLNode 088 */ 089 public SubSection(DotPair location, Vector<HTMLNode> html) 090 { 091 if (location == null) throw new NullPointerException 092 ("Parameter 'DotPair location' to SubSection constructor was null."); 093 094 if (html == null) throw new NullPointerException 095 ("Parameter 'Vector<HTMLNode> html' to SubSection constructor was null."); 096 097 if (html.size() == 0) throw new IllegalArgumentException( 098 "Parameter 'Vector<HTMLNode> html' to SubSection constructor has size zero, but " + 099 "this is not allowed here." 100 ); 101 102 if (location.size() != html.size()) throw new IllegalArgumentException( 103 "Field 'public final int end' [value=" + location.end + "] of passed-parameter " + 104 "'DotPair location' to SubSection constructor is different than the length of the " + 105 "html-vector [" + html.size() + "]." 106 ); 107 108 this.location = location; 109 this.html = html; 110 } 111 112 113 // ******************************************************************************************** 114 // ******************************************************************************************** 115 // Misc Interface Methods 116 // ******************************************************************************************** 117 // ******************************************************************************************** 118 119 120 /** 121 * Java's {@code interface Cloneable} requirements. This instantiates a new {@code SubSection} 122 * with identical {@code Vector<HTMLNode> html} and {@code DotPair location} fields. 123 * 124 * @return A new {@code SubSection} whose internal fields are identical to this one. 125 */ 126 public SubSection clone() 127 { return new SubSection(location, html); } 128 129 /** 130 * Java's hash-code requirement. 131 * 132 * @return A hash-code that may be used when storing this node in a java hashed-collection. 133 * The starting location of this {@code SubSection} ought to be be a unique hash 134 */ 135 public int hashCode() { return location.start; } 136 137 /* 138 * Java's {@code interface Comparable<T>} requirements. This does a very simple comparison 139 * using the field {@code public DotPair location}. 140 * 141 * <BR /><BR /><B><SPAN STYLE="color: red;">FINAL METHOD:</B></SPAN> This method is declared 142 * {@code final}, and cannot be modified by sub-classes. 143 * 144 * @param other Another {@code SubSection}, to be compared to {@code 'this' SubSection} 145 * 146 * @return An integer that fulfils Java's {@code interface Comparable<T> public boolean 147 * compareTo(T t)} method requirements. 148 * 149 * @see DotPair#compareTo(DotPair) 150 */ 151 // public final int compareTo(SubSection other) 152 // { return this.location.compareTo(other.location); } 153 154 /** 155 * This is an "alternative Comparitor" that can be used for sorting instances of this class. 156 * It should work with the {@code Collections.sort(List, Comparator)} method in the standard 157 * JDK package {@code java.util.*;} 158 * 159 * <BR /><BR /><B CLASS=JDDescLabel>Comparator Heuristic:</B> 160 * 161 * <BR />This simply compares the {@code public} {@link DotPair}-Typed field 162 * {@link #location} to each-other <I>using that class' <B>secondary</B></I> instance of 163 * {@code Comparator}. 164 * 165 * <BR /><BR />{@code DotPair's} secondary-comparitor may be viewed at: {@link DotPair#comp2}. 166 * 167 * @see DotPair#comp2 168 */ 169 public static Comparator<SubSection> comp2 = 170 (SubSection ss1, SubSection ss2) -> DotPair.comp2.compare(ss1.location, ss2.location); 171 172 173 // ******************************************************************************************** 174 // ******************************************************************************************** 175 // CharSequence Methods 176 // ******************************************************************************************** 177 // ******************************************************************************************** 178 179 180 /** 181 * Java's {@code toString()} requirement. 182 * 183 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 184 * 185 * <BR />This method is final, and cannot be modified by sub-classes. 186 * 187 * @return A {@code String}-representation of this {@code HTMLNode.} 188 * 189 * @see #toString() 190 */ 191 public final String toString() 192 { return Util.pageToString(html); } 193 194 /** 195 * Returns the char value at the specified index of the String defined-by an invokation of the 196 * method: {@code Util.pageToString(html)}. 197 * An index ranges from {@code '0'} (zero) to {@code length() - 1.} The first {@code char} 198 * value of the sequence is at index {@code '0'}, the next at index one, and so on, as for 199 * array indexing. 200 * 201 * <BR /><BR /><B>NOTE:</B> If the char value specified by the index is a surrogate, the 202 * surrogate value is returned. 203 * 204 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 205 * 206 * <BR />This method is final, and cannot be modified by sub-classes. 207 * 208 * @param index The index of the char value to be returned 209 * 210 * @return The specified char value 211 * 212 * @see #toString() 213 */ 214 public final char charAt(int index) 215 { return toString().charAt(index); } 216 217 /** 218 * Returns the length of the {@code String} defined-by an invokation of the method: 219 * {@code Util.pageToString(html)}. The length is the number of 16-bit {@code char's} in the 220 * sequence. 221 * 222 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 223 * 224 * <BR />This method is final, and cannot be modified by sub-classes. 225 * 226 * @return the number of {@code chars} in {@code this.n.str} 227 * 228 * @see #toString() 229 */ 230 public final int length() { return toString().length(); } 231 232 /** 233 * Returns a {@code java.lang.CharSequence} that is a subsequence of the {@code String} 234 * defined-by an invokation of the method: {@code Util.pageToString(html)}. The subsequence 235 * starts with the {@code char} value at the specified index and ends with the {@code char} 236 * value at index {@code end - 1.} The length (in {@code char's}) of the returned sequence is 237 * {@code end - start}, so if {@code start == end} then an empty sequence is returned. 238 * 239 * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B> 240 * 241 * <BR />This method is final, and cannot be modified by sub-classes. 242 * 243 * @param start The start index, inclusive 244 * @param end The end index, exclusive 245 * 246 * @return The specified subsequence 247 * 248 * @see #toString() 249 */ 250 public final CharSequence subSequence(int start, int end) 251 { return toString().substring(start, end); } 252 253 254 // ******************************************************************************************** 255 // ******************************************************************************************** 256 // Replaceable Methods 257 // ******************************************************************************************** 258 // ******************************************************************************************** 259 260 261 // All of these "Inherit" the Replaceable-Interface Comments by javadoc. There is no need 262 // to retype the description that is already going to be there when JavaDoc does its thing 263 264 public int originalSize() { return location.size(); } 265 public int currentSize() { return html.size(); } 266 public int originalLocationStart() { return location.start; } 267 public int originalLocationEnd() { return location.end + 1; } 268 269 270 public Vector<HTMLNode> currentNodes() { return html; } 271 272 public HTMLNode firstCurrentNode() 273 { return currentNodes().elementAt(0); } 274 275 public HTMLNode lastCurrentNode() 276 { return html.elementAt(html.size() - 1); } 277 278 279 public boolean addAllInto(Vector<HTMLNode> fileVec) 280 { return fileVec.addAll(html); } 281 282 public boolean addAllInto(int index, Vector<HTMLNode> fileVec) 283 { return fileVec.addAll(index, html); } 284 285 public int update(Vector<HTMLNode> originalHTML) 286 { return ReplaceNodes.r(originalHTML, location, html); } 287}