001package Torello.HTML;
002
003import java.util.Vector;
004import java.util.Comparator;
005
006/**
007 * Allows the NodeSearch Package to simultaneously return both an HTML-{@code Vector} sublist, and
008 * the location where that sub-list was located (as an instance of {@link DotPair}) where that
009 * sublist was located.
010 * 
011 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=SUB_SECTION>
012 * <EMBED CLASS='external-html' DATA-FILE-ID=IMPLEMENTS_REPLACE>
013 *
014 * @see Torello.HTML.HTMLNode
015 * @see Torello.HTML.DotPair
016 * @see Torello.HTML.NodeIndex
017 * @see Torello.HTML.NodeSearch.TagNodePeekInclusive
018 * @see Torello.HTML.NodeSearch.InnerTagPeekInclusive
019 */
020public class SubSection
021    implements CharSequence, java.io.Serializable, Cloneable, Replaceable
022{
023    // ********************************************************************************************
024    // ********************************************************************************************
025    // Fields
026    // ********************************************************************************************
027    // ********************************************************************************************
028
029
030    /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUID> */
031    public static final long serialVersionUID = 1;
032
033    /**
034     * This public field identifies the sub-section location of a particular sub-section from a
035     * vectorized-html webpage.  The location of the sub-page is specified by the
036     * {@code class DotPair} public-fields:
037     * {@code public final int 'start'} and {@code public final int 'end'}
038     * 
039     * @see DotPair
040     */
041    public final DotPair location;
042
043    /**
044     * This public field identifies the actual nodes - <I><B>the vectorized-html sub-list</B></I> -
045     * that are included in this sub-section of an html web-page.
046     * 
047     * @see HTMLNode
048     */
049    public Vector<HTMLNode> html;
050
051
052    // ********************************************************************************************
053    // ********************************************************************************************
054    // Constructor
055    // ********************************************************************************************
056    // ********************************************************************************************
057
058
059    /**
060     * This just builds a new instance of this class.  It represents a 'sub-section' of the
061     * html-page that needs to encapsulated into an object-instance.  The contents of this
062     * data-structure are merely these two parameters that are passed to this constructor.
063     *
064     * @param location This parameter value will be assigned immediately to the internal-field 
065     * {@code public DotPair location.}  It is a two-integer {@code Vector}-index class that points
066     * to the starting index-position, inside the main html-{@code Vector}, <I>of the html
067     * {@code class 'SubSection'}</I> being constructed here.
068     * 
069     * @param html This parameter may be any vectorized-html web-page, but
070     * <SPAN STYLE="color: red;"><B><I>the intention is that this {@code Vector} is an exact 
071     * "cloned range" (a copy of a portion of the web-page) whose starting and ending integer index
072     * {@code Vector}-positions are demarcated by the contents of the parameter
073     * {@code 'location'}</B></I></SPAN>
074     *
075     * @throws IllegalArgumentException This exception will throw if either of these two scenarios
076     * occur:
077     * 
078     * <BR /><BR /><UL CLASS=JDUL>
079     * <LI>If the input {@code Vector<HTMLNode> 'html'} has {@code html.size() == 0}.</LI>
080     * <LI>If {@code html.size() != location.size()}</LI>
081     * </UL>
082     * 
083     * @see SubSection#location
084     * @see SubSection#html
085     * @see DotPair
086     * @see NodeIndex
087     * @see HTMLNode
088     */
089    public SubSection(DotPair location, Vector<HTMLNode> html)
090    {
091        if (location == null) throw new NullPointerException
092            ("Parameter 'DotPair location' to SubSection constructor was null.");
093
094        if (html == null) throw new NullPointerException
095            ("Parameter 'Vector<HTMLNode> html' to SubSection constructor was null.");
096
097        if (html.size() == 0) throw new IllegalArgumentException(
098            "Parameter 'Vector<HTMLNode> html' to SubSection constructor has size zero, but " +
099            "this is not allowed here."
100        );
101
102        if (location.size() != html.size()) throw new IllegalArgumentException(
103            "Field 'public final int end' [value=" + location.end + "] of passed-parameter " +
104            "'DotPair location' to SubSection constructor is different than the length of the " +
105            "html-vector [" + html.size() + "]."
106        );
107
108        this.location   = location;
109        this.html       = html;
110    }
111
112
113    // ********************************************************************************************
114    // ********************************************************************************************
115    // Misc Interface Methods
116    // ********************************************************************************************
117    // ********************************************************************************************
118
119
120    /**
121     * Java's {@code interface Cloneable} requirements.  This instantiates a new {@code SubSection}
122     * with identical {@code Vector<HTMLNode> html} and {@code DotPair location} fields.
123     * 
124     * @return A new {@code SubSection} whose internal fields are identical to this one.
125     */
126    public SubSection clone()
127    { return new SubSection(location, html); }
128
129    /**
130     * Java's hash-code requirement.
131     * 
132     * @return A hash-code that may be used when storing this node in a java hashed-collection.
133     * The starting location of this {@code SubSection} ought to be be a unique hash
134     */
135    public int hashCode() { return location.start; }
136
137    /*
138     * Java's {@code interface Comparable<T>} requirements.  This does a very simple comparison
139     * using the field {@code public DotPair location}.
140     * 
141     * <BR /><BR /><B><SPAN STYLE="color: red;">FINAL METHOD:</B></SPAN> This method is declared 
142     * {@code final}, and cannot be modified by sub-classes.
143     * 
144     * @param other Another {@code SubSection}, to be compared to {@code 'this' SubSection}
145     * 
146     * @return An integer that fulfils Java's {@code interface Comparable<T> public boolean
147     * compareTo(T t)} method requirements.
148     * 
149     * @see DotPair#compareTo(DotPair)
150     */
151    // public final int compareTo(SubSection other)
152    // { return this.location.compareTo(other.location); }
153
154    /**
155     * This is an "alternative Comparitor" that can be used for sorting instances of this class.
156     * It should work with the {@code Collections.sort(List, Comparator)} method in the standard
157     * JDK package {@code java.util.*;}
158     * 
159     * <BR /><BR /><B CLASS=JDDescLabel>Comparator Heuristic:</B>
160     * 
161     * <BR />This simply compares the {@code public} {@link DotPair}-Typed field
162     * {@link #location} to each-other <I>using that class' <B>secondary</B></I> instance of
163     * {@code Comparator}.
164     * 
165     * <BR /><BR />{@code DotPair's} secondary-comparitor may be viewed at: {@link DotPair#comp2}.
166     * 
167     * @see DotPair#comp2
168     */
169    public static Comparator<SubSection> comp2 =
170        (SubSection ss1, SubSection ss2) -> DotPair.comp2.compare(ss1.location, ss2.location);
171
172
173    // ********************************************************************************************
174    // ********************************************************************************************
175    // CharSequence Methods
176    // ********************************************************************************************
177    // ********************************************************************************************
178
179
180    /**
181     * Java's {@code toString()} requirement.
182     * 
183     * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B>
184     * 
185     * <BR />This method is final, and cannot be modified by sub-classes.
186     * 
187     * @return A {@code String}-representation of this {@code HTMLNode.}
188     * 
189     * @see #toString()
190     */
191    public final String toString()
192    { return Util.pageToString(html); }
193
194    /**
195     * Returns the char value at the specified index of the String defined-by an invokation of the
196     * method: {@code Util.pageToString(html)}.
197     * An index ranges from {@code '0'} (zero) to {@code length() - 1.} The first {@code char}
198     * value of the sequence is at index {@code '0'}, the next at index one, and so on, as for
199     * array indexing.
200     * 
201     * <BR /><BR /><B>NOTE:</B> If the char value specified by the index is a surrogate, the
202     * surrogate value is returned.
203     * 
204     * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B>
205     * 
206     * <BR />This method is final, and cannot be modified by sub-classes.
207     * 
208     * @param index The index of the char value to be returned
209     * 
210     * @return The specified char value
211     * 
212     * @see #toString()
213     */
214    public final char charAt(int index)
215    { return toString().charAt(index); }
216
217    /**
218     * Returns the length of the {@code String} defined-by an invokation of the method:
219     * {@code Util.pageToString(html)}.  The length is the number of 16-bit {@code char's} in the
220     * sequence.
221     * 
222     * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B>
223     * 
224     * <BR />This method is final, and cannot be modified by sub-classes.
225     * 
226     * @return the number of {@code chars} in {@code this.n.str}
227     * 
228     * @see #toString()
229     */
230    public final int length() { return toString().length(); }
231
232    /**
233     * Returns a {@code java.lang.CharSequence} that is a subsequence of the {@code String}
234     * defined-by an invokation of the method: {@code Util.pageToString(html)}.  The subsequence
235     * starts with the {@code char} value at the specified index and ends with the {@code char}
236     * value at index {@code end - 1.}  The length (in {@code char's}) of the returned sequence is
237     * {@code end - start}, so if {@code start == end} then an empty sequence is returned.
238     * 
239     * <BR /><BR /><B CLASS=JDDescLabel>Final Method:</B>
240     * 
241     * <BR />This method is final, and cannot be modified by sub-classes.
242     * 
243     * @param start The start index, inclusive
244     * @param end The end index, exclusive
245     * 
246     * @return The specified subsequence
247     * 
248     * @see #toString()
249     */
250    public final CharSequence subSequence(int start, int end)
251    { return toString().substring(start, end); }
252
253
254    // ********************************************************************************************
255    // ********************************************************************************************
256    // Replaceable Methods
257    // ********************************************************************************************
258    // ********************************************************************************************
259
260
261    // All of these "Inherit" the Replaceable-Interface Comments by javadoc.  There is no need
262    // to retype the description that is already going to be there when JavaDoc does its thing
263
264    public int originalSize()           { return location.size(); }
265    public int currentSize()            { return html.size(); }
266    public int originalLocationStart()  { return location.start; }
267    public int originalLocationEnd()    { return location.end + 1; }
268
269
270    public Vector<HTMLNode> currentNodes() { return html; }
271
272    public HTMLNode firstCurrentNode()
273    { return currentNodes().elementAt(0); }
274
275    public HTMLNode lastCurrentNode()
276    { return html.elementAt(html.size() - 1); }
277
278
279    public boolean addAllInto(Vector<HTMLNode> fileVec)
280    { return fileVec.addAll(html); }
281
282    public boolean addAllInto(int index, Vector<HTMLNode> fileVec)
283    { return fileVec.addAll(index, html); }
284
285    public int update(Vector<HTMLNode> originalHTML)
286    { return ReplaceNodes.r(originalHTML, location, html); }
287}