001package Torello.Languages;
002
003import java.util.*;
004
005import Torello.Java.*;
006
007/**
008 * Some simple String Utilities for helping parse (Español) Spanish <CODE>String's</CODE>.
009 * 
010 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=ES>
011 */
012public class ES
013{
014    private ES() { }
015
016    /**
017     * GRAVE &amp; ACCUTE are the "first bit" of this mask, if that bit is '0', then the mask is
018     * ACCUTE
019     */
020    public static final int GRAVE = 0b0001;
021
022    /**
023     * UPPER &amp; LOWER CASE are the "second bit" of this mask, if that bit is '0', then he mask
024     * is LOWER-CASE
025     */
026    public static final int UPPERCASE   = 0b0010;
027
028    /**
029     * This is intended to produce an accented vowel 'on request' from the method invocation.  The
030     * complete list of characters that may be returned by this function are listed below.
031     * 
032     * <BR /><BR /><TABLE BORDER='1'><TBODY>
033     * <TR><TH>Upper, Grave</TH><TH>Upper, Acute</TH><TH>Lower, Grave</TH><TH>Lower, Acute</TH></TR>    
034     * <TR><TD>À (192)</TD><TD>Á (193)</TD><TD>à (224)</TD><TD>á (225)</TD></TR>
035     * <TR><TD>È (200)</TD><TD>É (201)</TD><TD>è (232)</TD><TD>é (233)</TD></TR>
036     * <TR><TD>Ì (204)</TD><TD>Í (205)</TD><TD>ì (236)</TD><TD>í (237)</TD></TR>
037     * <TR><TD>Ò (210)</TD><TD>Ó (211)</TD><TD>ò (242)</TD><TD>ó (243)</TD></TR>
038     * <TR><TD>Ù (217)</TD><TD>Ú (218)</TD><TD>ù (249)</TD><TD>ú (250)</TD></TR>                                                                                    <BR />
039     * </TBODY></TABLE>
040     * 
041     * @param vowel Any vowel: [A, E, I, O, U] or [a, e, i, o, u]
042     * <BR /><BR />If 'vowel' is not one of these 10 choices, then other characters will be
043     * ignored, and this method will just return (char) 0.
044     * 
045     * @param flags The following values can be OR'D (masked): Helper.GRAVE or Helper.UPPERCASE
046     *
047     * <BR /> <BR />In total, there are 4 possible versions: Upper-Case/Lower-Case output, and
048     * Accute/Grave output.
049     * 
050     * <BR /><BR /><UL CLASS=JDUL>
051     * <LI> If Helper.GRAVE is not masked (binary-bit 0), 
052     *      then an "accute" accented vowel is returned (accute is "the default").
053     *      </LI>
054     * <LI> If Helper.UPPERCASE is not masked (binary-bit 1),
055     *      then a lower-case vowel is returned (lower-case is "the default").
056     *      </LI>
057     * </UL>
058     * 
059     * @return With correct input: one of ten listed vowels above - and if not, then ASCII 0 is
060     * returned.
061     */
062    public static char getAccentedVowel(char vowel, int flags)
063    {
064        int i = 0;
065
066        if      ((vowel == 'a') || (vowel == 'A')) i = 192;
067        else if ((vowel == 'e') || (vowel == 'E')) i = 200;
068        else if ((vowel == 'i') || (vowel == 'I')) i = 204;
069        else if ((vowel == 'o') || (vowel == 'O')) i = 210;
070        else if ((vowel == 'u') || (vowel == 'U')) i = 217;
071        else return (char) 0;
072
073        // À (192)È (200)Ì (204)Ò (210)Ù (217)
074        if (    ((flags & UPPERCASE) > 0)
075            &&  ((flags & GRAVE) > 0)
076        )
077            return (char) (i + 0);
078
079        // Á (193)É (201)Í (205)Ó (211)Ú (218)
080        else if ((flags & UPPERCASE) > 0) return (char) (i + 1);
081
082        // à (224)è (232)ì (236)ò (242)ù (249)
083        else if ((flags & GRAVE) > 0) return (char) (i + 32);
084
085        // á (225)é (233)í (237)ó (243)ú (250)
086        else return (char) (i + 33);
087    }
088
089
090    /**
091     * This converts all Spanish-Accented characters into a lower-case, and non-accented
092     * equivalent.  Also, upper-case regular characters are down-cased.  If specifically
093     * requested, case can be preserved.
094     * 
095     * <BR /><BR /><TABLE>
096     * <TR><TD>A (65) ... Z (90)                    </TD><TD>&rArr; a .. z  </TD></TR>
097     * <TR><TD>À (192), Á (193), à (224), á (225)   </TD><TD>&rArr; A or a  </TD></TR>
098     * <TR><TD>È (200), É (201), è (232), é (233)   </TD><TD>&rArr; E or e  </TD></TR>
099     * <TR><TD>Ì (204), Í (205), ì (236), í (237)   </TD><TD>&rArr; I or i  </TD></TR>
100     * <TR><TD>Ò (210), Ó (211), ò (242), ó (243)   </TD><TD>&rArr; O or o  </TD></TR>
101     * <TR><TD>Ù (217), Ú (218), ù (249), ú (250)   </TD><TD>&rArr; U or u  </TD></TR>
102     * <TR><TD>Ñ (209),  ñ (241)                    </TD><TD>&rArr; N or n  </TD></TR>
103     * <TR><TD>Ü (220),  ü (252)                    </TD><TD>&rArr; U or u  </TD></TR>
104     * <TR><TD>Ý (221),  ý (253)                    </TD><TD>&rArr; Y or y  </TD></TR>
105     * </TABLE>
106     * 
107     * @param c Any ASCII/UniCode character
108     * 
109     * @param preserveCase If this is TRUE, then accented capital letters remain capitlized.  If
110     * this is FALSE, then all letters are converted to lowercase.
111     *
112     * @return If this character contained an accent, it will be removed.  It will also be in
113     * lower-case form, unless preserveCase is TRUE.
114     */
115    public static char toNonAccented(char c, boolean preserveCase)
116    {
117        if ((c == 224) || (c == 225))   return 'a';
118        if ((c == 232) || (c == 233))   return 'e';
119        if ((c == 236) || (c == 237))   return 'i';
120        if ((c == 242) || (c == 243))   return 'o';
121        if ((c == 249) || (c == 250))   return 'u';
122        if (c == 241)                   return 'n';
123        if (c == 252)                   return 'u';
124        if (c == 253)                   return 'y';
125
126        if ((c == 192) || (c == 193))   return (preserveCase ? 'A' : 'a');
127        if ((c == 200) || (c == 201))   return (preserveCase ? 'E' : 'e');
128        if ((c == 204) || (c == 205))   return (preserveCase ? 'I' : 'i');
129        if ((c == 210) || (c == 211))   return (preserveCase ? 'O' : 'o');
130        if ((c == 217) || (c == 218))   return (preserveCase ? 'U' : 'u');
131        if (c == 209)                   return (preserveCase ? 'N' : 'n');
132        if (c == 220)                   return (preserveCase ? 'U' : 'u');
133        if (c == 221)                   return (preserveCase ? 'Y' : 'y');
134
135        if ((c >= 'A') && (c <= 'Z'))   return (char) (preserveCase ? c : (c -'A' + 'a'));
136
137        return c;
138    }
139
140    /**
141     * Removes Spanish-Accent Characters from all characters in a string.
142     * 
143     * @return a new String, one where toNonAccented(s.charAt(i), preserveCase) has been
144     * called for each character in the String.  This is just a small for-loop over a String.
145     * 
146     * @see #toNonAccented(char, boolean)
147     */
148    public static String toNonAccented(String s, boolean preserveCase)
149    {
150        StringBuilder   sb  = new StringBuilder();
151        int             len = s.length();
152
153        for (int i=0; i < len; i++) sb.append(toNonAccented(s.charAt(i), preserveCase));
154
155        return sb.toString();
156    }
157
158    /**
159     * Produces a <I>lower-case Spanish Character</I> - if and only if the input-parameter
160     * is an <I>upper-case Spanish Character</I>.
161     * This is almost identifical to the usual String function toLowerCase(char), but it
162     * also includes Spanish vowels and consonants with:
163     * 
164     * <BR /><BR /><UL CLASS=JDUL>
165     * <LI>accent marks:    À, Á, à, and á ... etc.</LI>
166     * <LI>umlaut's:        Ü and ü</LI>
167     * <LI>tildes:          Ñ and ñ</LI>
168     * </UL>
169     * 
170     * <BR /><B>NOTE:</B> The 'accute' and 'grave' accent marks are not so prevalently used anymore
171     * as in the time of "Don Quijote de la Mancha" - however, they are included here, just in case.
172     * Mostly the 'acute' accent mark (from top-right-corner to the lower-left-corner) is used in
173     * newspapers around here (Dallas, Texas).
174     * 
175     * @param c Any ASCII or UniCode {@code char}
176     * 
177     * @return Uppercase letters 'A' .. 'Z' are converted to 'a' .. 'z' <BR />
178     *  AND:
179     * 
180     * <BR /><BR /><TABLE>
181     * <TR><TD>À (192),  Á (193)    </TD><TD>&rArr; à (224), á (225)    </TD></TR>
182     * <TR><TD>È (200),  É (201)    </TD><TD>&rArr; è (232), é (233)    </TD></TR>
183     * <TR><TD>Ì (204),  Í (205)    </TD><TD>&rArr; ì (236), í (237)    </TD></TR>
184     * <TR><TD>Ò (210),  Ó (211)    </TD><TD>&rArr; ò (242), ó (243)    </TD></TR>
185     * <TR><TD>Ù (217),  Ú (218)    </TD><TD>&rArr; ù (249), ú (250)    </TD></TR>
186     * <TR><TD>Ñ (209)              </TD><TD>&rArr; ñ (241)             </TD></TR>
187     * <TR><TD>Ý (221)              </TD><TD>&rArr; ý (253)             </TD></TR>
188     * <TR><TD>Ü (220)              </TD><TD>&rArr; ü (252)             </TD></TR>
189     * </TABLE>
190     * 
191     * @see #toUpperCaseSpanish(char)
192     * @see #toLowerCaseSpanish(String)
193     */
194    public static char toLowerCaseSpanish(char c)
195    {
196        if ((c >= 'A') && (c <= 'Z')) return (char) (c + 'a' - 'A');
197
198        else if (
199                (c == 192) || (c == 193) || (c == 200) || (c == 201)
200            ||  (c == 204) || (c == 205) || (c == 210) || (c == 211)
201            ||  (c == 217) || (c == 218) || (c == 209) || (c == 220)
202            ||  (c == 221)
203        )
204            return (char) (c + 32);
205
206        return c;
207    }
208
209    /**
210     * This cycles through an input-String parameter, and converts any/all letters that are
211     * uppercase - including ones with accent marks, tildes, and umlaut's, and returns a
212     * {@code String} n which all characters are lower-case, but have their punctuation preserved.
213     * 
214     * @return a new string in which Helper.toLowerCaseSpanish(char) has been invoked on each
215     * character.
216     * 
217     * @see #toLowerCaseSpanish(char)
218     */
219    public static String toLowerCaseSpanish(String s)
220    {
221        StringBuilder ret = new StringBuilder();
222        for (int i=0; i < s.length(); i++) ret.append(toLowerCaseSpanish(s.charAt(i)));
223        return ret.toString();
224    }
225
226
227
228
229    /**
230     * Produces an <I>upper-case Spanish Character</I> - if and only if the input-parameter
231     * is a <I>lower-case Spanish Character</I>.  See toLowerCaseSpanish(char) for more notes!
232     * 
233     * @param c Any ASCII or UniCode char
234     * 
235     * @return  Lowercase letters {@code 'a' .. 'z'} are converted to {@code 'A' .. 'Z'}
236     * 
237     * <BR /><BR />AND:
238     *
239     * <BR /><BR /><TABLE>
240     * <TR><TD>à (224), á (225) </TD><TD>&rArr; À (192), Á (193)</TD></TR>
241     * <TR><TD>è (232), é (233) </TD><TD>&rArr; È (200), É (201)</TD></TR>
242     * <TR><TD>ì (236), í (237) </TD><TD>&rArr; Ì (204), Í (205)</TD></TR>
243     * <TR><TD>ò (242), ó (243) </TD><TD>&rArr; Ò (210), Ó (211)</TD></TR>
244     * <TR><TD>ù (249), ú (250) </TD><TD>&rArr; Ù (217), Ú (218)</TD></TR>
245     * <TR><TD>ñ (241)          </TD><TD>&rArr; Ñ (209)         </TD></TR>
246     * <TR><TD>ý (253)          </TD><TD>&rArr; Ý (221)         </TD></TR>
247     * <TR><TD>ü (252)          </TD><TD>&rArr; Ü (220)         </TD></TR>
248     * </TABLE>
249     * 
250     * @see #toLowerCaseSpanish(char)
251     * @see #toUpperCaseSpanish(String)
252     */
253    public static char toUpperCaseSpanish(char c)
254    {
255        if ((c >= 'a') && (c <= 'z'))
256            return (char) (c + 'A' - 'a');
257
258        else if (   (c == 224) || (c == 225) || (c == 232) || (c == 233)
259                ||  (c == 236) || (c == 237) || (c == 242) || (c == 243)
260                ||  (c == 249) || (c == 250) || (c == 241) || (c == 253)
261                ||  (c == 252)
262            )
263            return (char) (c - 32);
264
265        return c;
266    }
267    
268    /**
269     * This cycles through an input-String parameter, and converts any/all letters
270     * that are lower-case, including ones with accent marks, tildes, and umlaut's, and
271     * returns a String in which all characters are upper-case, but have their punctuation
272     * preserved.
273     * 
274     * @return a new string in which Helper.toUpperCaseSpanish(char) has been invoked on each
275     * character.
276     * 
277     * @see #toUpperCaseSpanish(char)
278     */
279    public static String toUpperCaseSpanish(String s)
280    {
281        StringBuilder ret = new StringBuilder();
282        for (int i=0; i < s.length(); i++) ret.append(toLowerCaseSpanish(s.charAt(i)));
283        return ret.toString();
284    }
285
286
287    /**
288     * Checks if this character could be a Spanish Language Character
289     * 
290     * @param c Any ASCII or Uni-Code Character
291     * 
292     * @return <B>TRUE:</B> If and only if 'c' is one of the following char-sets:
293     * 
294     * <BR /><BR /><UL CLASS=JDUL>
295     * <LI>a ... z</LI>
296     * <LI>A ... Z</LI>
297     * <LI>Á (193), É (201), Í (205), Ó (211), Ú (218), Ý (221), Ü (220), Ñ (209)</LI>
298     * <LI>á (225), é (233), í (237), ó (243), ú (250), ý (253), ü (252), ñ (241)</LI>
299     * </UL>
300     * 
301     * <BR />and <B>FALSE</B> otherwise...
302     */
303    public static boolean isLanguageChar(char c)
304    {
305        if ((c >= 'a') && (c <= 'z')) return true;
306        if ((c >= 'A') && (c <= 'Z')) return true;
307
308        // Á 193, É 201, Í 205, Ó 211, Ú 218, Ý 221, Ü 220, Ñ 209
309        if (    (c == 193) || (c == 201) || (c == 205) || (c == 211) || (c == 218) || (c == 221)
310            ||  (c == 220) || (c == 209))
311            return true;
312
313        // á 225, é 233, í 237, ó 243, ú 250, ý 253, ü 252, ñ 241
314        if (    (c == 225) || (c == 233) || (c == 237) || (c == 243) || (c == 250) || (c == 253)
315             || (c == 252) || (c == 241))
316            return true;
317
318        return false;
319    }
320    
321    /**
322     * Checks if a {@code String} contains non-Spanish-Language Characters.  Utilizes
323     * {@link #isLanguageChar(char)}
324     * 
325     * @param s Any {@code String} consisting of ASCII &amp; UniCode Characters
326     * 
327     * @return  {@code TRUE} only if {@code isLanguageChar(s.charAt(i))} returns {@code TRUE} for
328     * ever integer {@code i}, and <B>FALSE</B> otherwise.
329     * 
330     * @see #isLanguageChar(char)
331     */
332    public static boolean onlyLanguageChars(String s)
333    {
334        for (int i=0; i < s.length(); i++) if (! isLanguageChar(s.charAt(i))) return false;
335        return true;
336    }
337
338    /**
339     * This is a function which identifies Spanish Language Infinitive Form Verbs.
340     * 
341     * @param s Any String consisting of ASCII &amp; UniCode Characters
342     * 
343     * @return {@code TRUE} if and only if:
344     * <BR />input-parameter {@code 's'} ends with: ar, er, ir, arse, erse, irse, ír, írse
345     * <BR />{@code 's'} passes the {@link #onlyLanguageChars(String)} boolean test
346     * <BR /><B>FALSE</B> otherwise
347     * 
348     * @see #onlyLanguageChars(String)
349     */
350    public static boolean isSpanishVerbInfinitive(String s)
351    {
352        s = toLowerCaseSpanish(s);
353
354        if (onlyLanguageChars(s))
355            if (    s.endsWith("ar")    || s.endsWith("er")     || s.endsWith("ir")
356                ||  s.endsWith("arse")  || s.endsWith("erse")   || s.endsWith("irse")
357                ||  s.endsWith("ír")    || s.endsWith("írse"))
358                return true;
359
360        return false;
361    }
362
363    private static final String[] ESC_STRS =
364    {
365        "&aacute;", "&eacute;", "&iacute;", "&oacute;", "&uacute;", "&Aacute;", "&Eacute;",
366        "&Iacute;", "&Oacute;", "&Uacute;", "&ntilde;", "&laquo;", "&raquo;", "&mdash;", "&uuml;",
367        "&iuml;", "&iexcl;", "&iquest;", "&quot;"
368    };
369
370    private static final char[] REPL_CHARS =
371    {
372        'á',  'é', 'í', 'ó', 'ú', 'Á', 'É', 'Í', 'F', 'Ú', 'ñ', '«', '»', '-', 'ü', 'ï', '¡',
373        '¿', '\"'
374    };
375
376    /**
377     * This function is somewhat redundant, as a complete HTML-Character Escape-Sequence class is
378     * included in the Torello.HTML package.  There is a link provided to these methods at the end
379     * of this comment.  This method was written much earlier, and functions well, but it can only
380     * convert HTML-Escape-Sequences that are used in Spanish - rather than all HTML-Character
381     * Escape-Sequences. Here is the complete list:
382     * 
383     * <BR /><BR /><TABLE>
384     * <TR>
385     *      <TD>&amp;aacute;</TD><TD>&rArr; á</TD></TR><TR><TD>&amp;eacute;</TD>
386     *      <TD>&rArr;  é</TD>
387     * </TR>
388     * <TR>
389     *      <TD>&amp;iacute;</TD><TD>&rArr; í</TD></TR><TR><TD>&amp;oacute;</TD>
390     *      <TD>&rArr;  ó</TD>
391     * </TR>
392     * <TR>
393     *      <TD>&amp;uacute;</TD><TD>&rArr; ú</TD></TR><TR><TD>&amp;Aacute;</TD>
394     *      <TD>&rArr;  Á</TD>
395     * </TR>
396     * <TR>
397     *      <TD>&amp;Eacute;</TD><TD>&rArr; É</TD></TR><TR><TD>&amp;Iacute;</TD>
398     *      <TD>&rArr;  Í</TD>
399     * </TR>
400     * <TR>
401     *      <TD>&amp;Oacute;</TD><TD>&rArr; Ó</TD></TR><TR><TD>&amp;Uacute;</TD>
402     *      <TD>&rArr;  Ú</TD>
403     * </TR>
404     * <TR>
405     *      <TD>&amp;ntilde;</TD><TD>&rArr; ñ</TD></TR><TR><TD>&amp;laquo;</TD>
406     *      <TD>&rArr;  «</TD>
407     * </TR>
408     * <TR>
409     *      <TD>&amp;raquo; </TD><TD>&rArr; »</TD></TR><TR><TD>&amp;mdash;</TD>
410     *      <TD>&rArr;  -</TD>
411     * </TR>
412     * <TR>
413     *      <TD>&amp;uuml;  </TD><TD>&rArr; ü</TD></TR><TR><TD>&amp;iuml;</TD>
414     *      <TD>&rArr;  ï</TD>
415     * </TR>
416     * <TR>
417     *      <TD>&amp;iexcl; </TD><TD>&rArr; ¡</TD></TR><TR><TD>&amp;iquest;</TD>
418     *      <TD>&rArr;  ¿</TD>
419     * </TR>
420     * <TR>
421     *      <TD>&amp;quot;</TD>
422     *      <TD>&rArr;  "</TD>
423     * </TR>
424     * </TABLE>
425     * 
426     * @param s Any ASCII/UniCode String, which ostensibly ought to (possibly) contain
427     * Spanish-Language HTML-Escaped characters within them.
428     * 
429     * @return A string where all HTML escape-sequences have been converted to their actual
430     * character equivalent.
431     * 
432     * @see Torello.HTML.Escape#escHTMLToChar(String)
433     * @see Torello.HTML.Escape#htmlEsc(char)
434     * @see StrReplace#r(String, String[], char[])
435     */
436    public static String convertHTML_TO_UTF8(String s)
437    { return StrReplace.r(s, ESC_STRS, REPL_CHARS); }
438
439
440    //*********************************************************************************************
441    //*********************************************************************************************
442
443    /**
444     * This is some "list processing" stuff - used to grep "DictCC".  It's an internally used
445     * list.
446     */
447    private static Vector<String> removeList = null;
448
449    /**
450     * This just stores a list of "words", and they are removed from certain texts/articles.  This
451     * program currently uses it to remove certain extremely commonly used words, so they are not
452     * repeatedly searched for in the dictionary.  It is <I>kind of</I> a hack.
453     * 
454     * @param wordList An array of Strings.  It is expected to be a list of words that may be
455     * removed from Spanish Texts, but it can be any list of words.  It is checked to see if 100%
456     * of the characters in each word are alphabetic, and throws an IllegalArgumentException if they
457     * are not.
458     * 
459     * @throws IllegalArgumentException if the wordList parameter contains strings with invalid
460     * non-word characters.
461     */
462    public static void setRemoveWordsArr(String[] wordList)
463    {
464        removeList = new Vector<String>();
465        
466        for (int i=0; i < wordList.length; i++)
467        {
468            String word = wordList[i];
469
470            for (int j=0; j < word.length(); j++)
471
472                if (! isLanguageChar(word.charAt(j))) throw new IllegalArgumentException(
473                    "Contains word:" + word + " which has invalid, non-word, language-characters");
474
475            removeList.addElement(word);
476        }
477    }
478
479    /**
480     * This function references the words in the "removeList" and removes every occurence of each
481     * word that is present in the "removeList" {@code Vector<String>}
482     * 
483     * @param s A String of Spanish Words.
484     * 
485     * @return The same string with each instance of each word that is listed in the "removeList"
486     * {@code Vector} removed from the {@code String}
487     * 
488     * @see #setRemoveWordsArr(String[])
489     */
490    public static String removeWords(String s)
491    {
492        // boolean printIt = false;
493        // int tpos = s.indexOf(" a ");
494        // if (tpos != -1) if (s.indexOf(" a ", tpos + 3) != -1) printIt = true;
495        // if (printIt) System.out.println(s + ":");
496        
497        Enumeration<String> e = removeList.elements();
498        // System.out.println("CLEANING: [" + s + "]");
499
500        while (e.hasMoreElements())
501        {
502            String lc = toLowerCaseSpanish(s);
503
504            // System.out.print(" <" + lc + ">");
505            String word = e.nextElement();
506
507            // System.out.print(" {" + word + "}");
508    
509            int pos = 0;
510            while ((pos = lc.indexOf(word, pos)) != -1)
511            {
512                int     startPos    = pos;
513                int     endPos      = pos + word.length();
514                boolean leftEnd     = (startPos == 0);
515                boolean rightEnd    = (endPos == lc.length());
516                char    leftChar    = leftEnd ? 0 : lc.charAt(startPos - 1);
517                char    rightChar   = rightEnd ? 0 : lc.charAt(endPos);
518
519                // if (printIt) System.out.print("(" + leftChar + "," + rightChar + "," + leftEnd +
520                // "," + rightEnd + "," + startPos + "," + endPos + ") ");
521    
522                if (isLanguageChar(leftChar))   { pos = endPos; continue; }
523                if (isLanguageChar(rightChar))  { pos = endPos; continue; }
524
525                // System.out.print("(" + startPos + "," + endPos + ")" );
526                boolean leftSpace = (leftChar == ' ');
527                boolean rightSpace = (rightChar == ' ');
528
529                if (leftSpace && rightSpace)    startPos--;
530                else if (leftSpace && rightEnd) startPos--;
531                else if (leftEnd && rightSpace) endPos++;
532                
533                s = (leftEnd ? "" : s.substring(0, startPos)) +
534                    (rightEnd ? "" : s.substring(endPos));
535
536                // if (printIt) System.out.print("[" + s + "] ");
537                lc = toLowerCaseSpanish(s);
538            }
539        }
540
541        // if (printIt) System.out.println("\n");
542        return s;
543    }
544}