001package Torello.Java; 002 003import Torello.Java.ReadOnly.ReadOnlySet; 004import Torello.Java.ReadOnly.ReadOnlyHashSet; 005import Torello.Java.ReadOnly.ReadOnlyList; 006import Torello.Java.ReadOnly.ReadOnlyArrayList; 007 008import Torello.Java.Additional.Counter; 009 010import java.util.regex.Pattern; 011import java.util.regex.Matcher; 012 013import java.util.stream.Stream; 014 015import java.util.function.Supplier; 016 017@Torello.JavaDoc.StaticFunctional 018public class StrSource 019{ 020 private StrSource() { } 021 022 023 // ******************************************************************************************** 024 // ******************************************************************************************** 025 // FIELDS 026 // ******************************************************************************************** 027 // ******************************************************************************************** 028 029 030 private static final char[] REGEX_ESCAPE_CHARS_ARR = 031 { '\\', '/', '(', ')', '[', ']', '{', '}', '$', '^', '+', '*', '?', '-', '.' }; 032 033 /** 034 * These are 'control' characters (Reg Ex Code), so they must be escaped if the are to be 035 * treated as their ASCII-equivalent values. 036 */ 037 public static final ReadOnlySet<Character> REGEX_ESCAPE_CHARS = 038 new ReadOnlyHashSet<>(REGEX_ESCAPE_CHARS_ARR, null); 039 040 private static final char[] JS_ESCAPE_CHARS_ARR = 041 { '\\', '/', '\n', '\"' }; 042 043 /** 044 * When converting a {@code String} for a Java-Script {@code String}, these are the 045 * characters that must be escaped. 046 */ 047 public static final ReadOnlySet<Character> JS_ESCAPE_CHARS = 048 new ReadOnlyHashSet<>(JS_ESCAPE_CHARS_ARR, null); 049 050 /** 051 * The list of reserved Java Key-Words. This list was written by ChatGPT on February 1st, 052 * 2024. 053 */ 054 public static final ReadOnlyList<String> reservedKeywords = new ReadOnlyArrayList<>( 055 "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class", 056 "const", "continue", "default", "do", "double", "else", "enum", "extends", "false", 057 "final", "finally", "float", "for", "goto", "if", "implements", "import", "instanceof", 058 "int", "interface", "long", "native", "new", "null", "package", "permirs", "private", 059 "protected", "public", "return", "short", "static", "strictfp", "super", "switch", 060 "synchronized", "this", "throw", "throws", "transient", "true", "try", "void", "volatile", 061 "while" 062 ); 063 064 /** This will match the definition for a java {@code 'Generic'} class or interface */ 065 public static final Pattern GENERIC_PARAMS = Pattern.compile("^.+?<([\\s\\w\\<>,\\?]+)>$"); 066 067 /** This shall match a Java Package {@code String} */ 068 public static final Pattern PACKAGE_NAME = Pattern.compile("([A-Za-z_]\\w*\\.)+"); 069 070 071 // ******************************************************************************************** 072 // ******************************************************************************************** 073 // Searching for a tag in an HTML string (the early way - without regular expressions) 074 // ******************************************************************************************** 075 // ******************************************************************************************** 076 077 078 /** 079 * If parameter {@code String s} contains any tag within-which there is a valid 080 * {@code "HREF"}, this will return the contents of the {@code HREF} Attribute/InnerTag. 081 * 082 * @param s This is usually some variant of an HTML element/tag {@code String}. This method 083 * was the first one written for HTML in this scrape package, and is just kept here for legacy 084 * reasons. The {@code class HTML.TagNode} has a number of options for extracting the 085 * {@code 'HREF'} attribute from an HTML element. 086 * 087 * @return The attribute-value of an {@code HREF=...} attribute inside (usually an {@code <A>} 088 * 'Anchor') HTML tag. This will return 'null' if there is no {@code HREF="..."} 089 * attribute-value pair is found or identified. 090 * 091 * @throws IllegalArgumentException If there is no end-quote found for the {@code HREF="..."} 092 * sub-string. 093 */ 094 public static String grep_HREF_tag(String s) 095 { 096 s = s.toLowerCase(); 097 String quote = "\""; 098 099 int hrefPos = s.indexOf("href=\""); 100 101 if (hrefPos == -1) 102 { 103 hrefPos = s.indexOf("href='"); 104 if (hrefPos == -1) return null; 105 quote = "'"; 106 } 107 108 // System.out.print("\t[hrefPos=" + hrefPos + "]"); 109 110 // the " + 6" is because the string HREF=" is 6 characters long 111 String ret = s.substring(hrefPos + 6); 112 int endQuotePos = ret.indexOf(quote); 113 114 if (endQuotePos == -1) throw new IllegalArgumentException 115 ("HREF has no End-Quote!\n\nFor String:\n" + s); 116 117 // System.out.print("endQuotePos = " + endQuotePos + " " + ret.substring(0, endQuotePos)); 118 119 return ret.substring(0,endQuotePos); 120 } 121 122 /** 123 * If parameter {@code String s} contains an HTML {@code "IMG"} tag, this will return the 124 * contents of the {@code "SRC=..."} attribute tag-field. 125 * 126 * @param s This is usually some variant of an HTML element/tag {@code String}. This method 127 * was the first one written for HTML in this scrape package, and is just kept here for legacy 128 * reasons. The {@code class HTML.TagNode} has a number of options for extracting the 129 * {@code 'SRC'} attribute from an HTML element. 130 * 131 * @return The attribute-value of a {@code SRC=...} attribute inside (usually an {@code <IMG>} 132 * 'Image') HTML tag. 'null' is returned if: 133 * 134 * <BR /><BR /><OL CLASS=JDOL> 135 * <LI>There is no HTML {@code 'IMG'} token found in the {@code String}</LI> 136 * <LI>There is no {@code SRC='...'} attribute-value pair found.</LI> 137 * </OL> 138 */ 139 public static String grep_IMG_SRC_tag(String s) 140 { 141 String stlc = s.toLowerCase(); 142 // System.out.println("1: " + stlc); 143 144 int imgPos = stlc.indexOf("<img "); 145 146 if (imgPos == -1) return null; 147 148 stlc = stlc.substring(imgPos + 5); 149 // System.out.println("2: " + stlc + "[imgPos=" + imgPos + "]"); 150 151 // first check for double-quotes 152 String quote = "\""; 153 int srcPos = stlc.indexOf("src=\""); 154 155 if (srcPos == -1) 156 { 157 // if no double-quotes, try single quotes 158 srcPos = stlc.indexOf("src='"); 159 160 if (srcPos == -1) return null; 161 162 quote = "'"; 163 } 164 165 stlc = stlc.substring(srcPos + 5); 166 167 // System.out.println("3: " + stlc + "[srcPos=" + srcPos + "]"); 168 169 int endSrcPos = stlc.indexOf(quote); 170 171 if (endSrcPos == -1) return null; 172 173 int urlStart = imgPos + srcPos + 10; 174 int urlEnd = urlStart + endSrcPos; 175 176 // System.out.println 177 // ("4: [endSrcPos=" + endSrcPos + ", urlStart=" + urlStart + ", urlEnd=" + urlEnd); 178 179 return s.substring(urlStart, urlEnd); 180 } 181 182 183 // ******************************************************************************************** 184 // ******************************************************************************************** 185 // Java-Script & Reg-Ex String encoding (JSON.stringify()) 186 // ******************************************************************************************** 187 // ******************************************************************************************** 188 189 190 /** 191 * <EMBED CLASS='external-html' DATA-FILE-ID=STRSRC_ESC_4JS> 192 * 193 * @param str This may be any String in java. It is intended to be inserted into a Java-Script 194 * file between an open and close quotation marks. 195 * 196 * @return The String that is returned will have certain characters escaped, so that it may be 197 * wrapped in quotation marks and easily inserted into any java-script ".js" text-file. 198 * 199 * <BR /><BR /><B>Escaped-Text:</B> 200 * 201 * <BR /><BR /><UL CLASS=JDUL> 202 * <LI> {@code char '\'} will be escaped to: {@code "\\"}</LI> 203 * 204 * <LI> {@code char '/'} will be escaped to: {@code "\/"}, this is required in Java-Script, but 205 * not Java! 206 * </LI> 207 * 208 * <LI> {@code char '"'} will be escaped to: {@code "\""}</LI> 209 * <LI> {@code char '\n'} will be escaped to: {@code "\\n"}</LI> 210 * </UL> 211 * 212 * <BR /><B><SPAN STYLE="color: red;">IMPORTANT NOTE:</B></SPAN> There is no easy, nor clear, 213 * way to express what is being replaced and/or escaped in a simple list. You may run this 214 * method on any {@code String} and view for yourself what changes. <B><I>The primary 215 * goal</B></I> of the method is to allow <I>*any* Java String of *any* length</I> to be 216 * converted, wrapped inside of an open and closed quotation-marks, and printed into a 217 * Java-Script {@code ".js" file}. Escaping "escape characters" which does come up some-what 218 * often in HTML text/string processing is near-impossible to explain clearly! Review the 219 * stack-overflow "incantation" for possible help. 220 */ 221 public static String escStrForJavaScript(String str) 222 { return StrReplace.r(str, JS_ESCAPE_CHARS_ARR, '\\'); } 223 224 /** 225 * This method should only be used for a <B><I>precise {@code String} match</I></B> using a 226 * regular-expression. This method shall 'escape' all characters that the JVM Regular 227 * Expression Matcher in {@code package java.util.regex.*} would expect be escaped. If the 228 * input parameter {@code 'str'} contains any regular-expression code, then this method would 229 * <B>FAIL</B> as it would escape regular-expression code into unusable text. 230 * 231 * @param str This should be any {@code String} for which the user would like to find an 232 * <B>exact match, as-is</B>. 233 * 234 * @return A regular-expression ready {@code String} 235 */ 236 public static String escStrForRegEx(String str) 237 { return StrReplace.r(str, REGEX_ESCAPE_CHARS_ARR, '\\'); } 238 239 240 // ******************************************************************************************** 241 // ******************************************************************************************** 242 // Java Code String-Functions 243 // ******************************************************************************************** 244 // ******************************************************************************************** 245 246 247 /** 248 * Parses a {@code String} such as {@code T extends TreeMap<Integer, List<String>>}. It is 249 * strictly used, to <B><I>only parse</I></B> the generic-definition lists that are at the top 250 * of generic <B>classes</B> and <B>interfaces</B>. 251 * 252 * <EMBED CLASS='external-html' DATA-FILE-ID=STRSRC_PARSE_GENT DATA-NODE="An Example of Sorts"> 253 * 254 * @param genericTypeParamOrDefinition This should be {@code String} retrieved from inside the 255 * less-than ({@code '<'}) and greater-than ({@code '>'}) symbols. For example, for 256 * {@code SortedList<A extends Comparable, B>} the {@code String} passed to this method should 257 * be {@code "A extends Comparable, B"} 258 * 259 * @return This should break down this {@code CSV} (comma separated value) list into 260 * individual {@code String's}. 261 * 262 * @throws NoMatchException if the input {@code String} parameter does not match the 263 * generics regular-expression {@link #GENERIC_PARAMS}. 264 * 265 * @throws StringFormatException If the input {@code String} could not be parsed. 266 */ 267 public static String[] parseGenericType(String genericTypeParamOrDefinition) 268 { 269 Matcher m = GENERIC_PARAMS.matcher(genericTypeParamOrDefinition); 270 String innerGenericStr = m.find() ? m.group(1) : null; 271 272 if (innerGenericStr == null) throw new NoMatchException( 273 "The provided value to parameter 'genericTypeParamOrDefinition' [" + 274 genericTypeParamOrDefinition + "] did not match the Java Generics " + 275 "Regular-Expression:\n" + GENERIC_PARAMS.toString() 276 ); 277 278 Stream.Builder<String> b = Stream.builder(); 279 String[] sArr = innerGenericStr.split(","); 280 281 for (int i=0; i < sArr.length; i++) 282 283 // We have shifted elements, and now all of the remaining elements would be null 284 // return immediately 285 286 if (sArr[i] == null) return b.build().toArray(String[]::new); 287 288 // Simple generic-type definition: has no "sub-generics" or "inner-generics" 289 // Add this to the list, and move on 290 291 else if ((! sArr[i].contains("<")) && (! sArr[i].contains(">"))) 292 b.accept(sArr[i].trim()); 293 294 // This is a generic-type definition that has at least one "sub-generic" 295 // If there are an equal number of '<' and '>' then there were no commas 296 // in between the sub-generics. Add this to this list, and move on. 297 298 else if ( StringParse.countCharacters(sArr[i], '<') == 299 StringParse.countCharacters(sArr[i], '>') 300 ) 301 b.accept(sArr[i].trim()); 302 303 // There was a generic with a sub-generic that had a comma... 304 else 305 { 306 // If we have reached the end of the String, the number of greater than and 307 // less than symbols was not balanced. 308 309 if (i == (sArr.length - 1)) throw new StringFormatException( 310 "The provided value to parameter 'genericTypeParamOrDefinition' [" + 311 genericTypeParamOrDefinition + "], was not properly formatted, and could " + 312 "not be parsed." 313 ); 314 315 // Join the next String Array Element with the current one. 316 sArr[i] = sArr[i].trim() + ", " + sArr[i + 1].trim(); 317 318 // Shift the rest of the array left. 319 for (int j=i+1; j < (sArr.length-1); j++) sArr[j] = sArr[j+1]; 320 sArr[sArr.length - 1] = null; 321 322 // decrement the counter to retest this array-index location 323 i--; 324 } 325 326 // Return the list 327 return b.build().toArray(String[]::new); 328 } 329 330 /** 331 * This will print a caret-symbol on a line of text underneath the input {@code String} 332 * parameter {@code 'str'}. Preceeding the caret-symbol will be exactly {@code strPos - 1} 333 * space characters. This look of the output-{@code String} is similar to some of the error 334 * messages generated by a Java Compiler. 335 * 336 * <BR /><BR />The caret-symbol {@code '^'} will bee pointing to the character at index 337 * {@code strPos}. 338 * 339 * <DIV CLASS=EXAMPLE>{@code 340 * // Notice the (accidental, on-purpose) use of the '@'' character instead of an 'a' 341 * // To make this easy, lets compute the exact location of this erroneous character. 342 * String s = "This string has an inv@lid character."; 343 * int pos = s.indexOf("@"); 344 * 345 * // This will print out a line of text containing the string, with a caret pointing 346 * // at the '@' symbol. 347 * System.out.println(StringParse.caretBeneath(s, pos)); 348 * 349 * // PRINTS: 350 * // This string has an inv@lid character. 351 * // ^ 352 * }</DIV> 353 * 354 * @param str This may be any input-{@code String} that is less than 100 characters. 355 * 356 * @param strPos This must be a number between 0 and the length 357 * 358 * @return The same input-{@code String} with a second line appended underneath (using a 359 * newline) having a <B>caret</B> ({@code '^'}) directly underneath the character at 360 * {@code strPos}. 361 * 362 * @throws IllegalArgumentException If the input {@code String} is longer than 363 * {@code 100 characters}. 364 * 365 * @throws StringFormatException If the input {@code String} contains any new-line {@code '\n'} 366 * or tab {@code '\t'} characters. 367 * 368 * @throws StringIndexOutOfBoundsException If the value pased to {@code strPos} is negative or 369 * greater than the length of the input-{@code String}. 370 * 371 * @see StringParse#nChars(char, int) 372 */ 373 public static String caretBeneath(String str, int strPos) 374 { 375 if (str.length() > 100) throw new IllegalArgumentException( 376 "The length of the input-string must be less than 100. str has length: " + 377 str.length() 378 ); 379 380 if (StrCmpr.containsOR(str, "\n", "\t")) throw new StringFormatException 381 ("The input-string may not contain new-line or tab characters."); 382 383 if (strPos >= str.length()) throw new StringIndexOutOfBoundsException( 384 "The value you have passed to 'strPos' [" + strPos + "] is greater than the length " + 385 "the input-string [" + str.length() + "]" 386 ); 387 388 if (strPos < 0) throw new StringIndexOutOfBoundsException 389 ("You have passed a negative value to strPos [" + strPos + "]"); 390 391 return str + "\n" + StringParse.nChars(' ', strPos) + '^'; 392 } 393 394 private static StringFormatException REM_GENERIC_ERROR_MSG(String s, int charPos) 395 { 396 return new StringFormatException( 397 /* 398 "The opening '<' and closing '>' symbols in the type-string have not been " + 399 "properly placed.\n" + 400 */ 401 "Generic Type-String Error, Beginning at Noted Location:\n" + 402 caretBeneath(s, charPos) 403 ); 404 } 405 406 /** 407 * This will remove the generic type-parameters expression from a Java Type Declaration or 408 * Reference. In simple terms, this removes the {@code '<K, V>'} from a {@code String} such 409 * as {@code Map.Entry<K, V>}. 410 * 411 * <BR /><TABLE CLASS=JDBriefTable> 412 * <TR> <TH>Returned {@code String}</TH> 413 * <TH>Input {@code String}</TH> 414 * </TR> 415 * <TR> <TD>{@code "Vector"}</TD> 416 * <TD>{@code "Vector<E>"}</TD> 417 * </TR> 418 * <TR> <TD>{@code "AbstractHNLI"}</TD> 419 * <TD>{@code "AbstractHNLI<E extends HTMLNode, F>"}</TD> 420 * </TR> 421 * <TR> <TD>{@code "Torello.HTML.TagNode"}</TD> 422 * <TD>{@code "Torello.HTML.TagNode"}</TD> 423 * </TR> 424 * <TR> <TD>{@code "ClassA.InnerClassB.InnerClassC"}</TD> 425 * <TD>{@code "ClassA<X>.InnerClassB<Y>.InnerClassC"}</TD> 426 * </TR> 427 * <TR> <TD>{@code "String[]"}</TD> 428 * <TD>{@code "String[]"}</TD> 429 * </TR> 430 * <TR> <TD>{@code "java.lang.String[]"}</TD> 431 * <TD>{@code "java.lang.String[]"}</TD> 432 * </TR> 433 * <TR> <TD>{@code "Vector"}</TD> 434 * <TD>{@code "Vector<String[]>"}</TD> 435 * </TR> 436 * <TR> <TD>{@code "java.util.Vector"}</TD> 437 * <TD>{@code "java.util.Vector<String[]>"}</TD> 438 * </TR> 439 * <TR> <TH COLSPAN=2>Point of Interest:</TH> 440 * </TR> 441 * <TR> <TD>"I watched the World Series"</TD> 442 * <TD>"I watched the World Series"</TD> 443 * </TR> 444 * <TR> <TD>{@code "Vector"}</TD> 445 * <TD>{@code "Vector<Quoth the Raven>"}</TD> 446 * </TR> 447 * <TR> <TH COLSPAN=2>Throws an Exception</TH></TR> 448 * <TR> <TD COLSPAN=2>{@code "HNLI<E> <"}</TD></TR> 449 * <TR> <TD COLSPAN=2>{@code "> <Quoth the Raven>"}</TD></TR> 450 * </TABLE> 451 * 452 * @param typeAsStr The "Reference Type" or "Declaration Type". 453 * 454 * @return The same {@code String}, having everything between the <B>outer-most, matching</B> 455 * {@code '<'} and {@code '>'} symbols. 456 * 457 * <BR /><BR /><B>NOTE:</B> The returned {@code String} will not contain any leading or 458 * trailing white-space. It is trimmed before being returned. 459 * 460 * @throws StringFormatException An exhaustive check on everything that could be wrong with 461 * a type-{@code String} is an impossibility (if you include checking for valid types). This 462 * exception is only thrown if the {@code '<'} and {@code '>'} symbols inside the 463 * input-{@code String} do not match-up. 464 * 465 * <BR /><BR />In order to avoid throwing this exception, there must be an equal number of 466 * opening and closing symbols. 467 * 468 * <BR /><BR />There is also a check to ensure that the charcters in this {@code String} 469 * are valid. 470 */ 471 public static String removeGeneric(String typeAsStr) 472 { 473 int leftPos = typeAsStr.indexOf('<'); 474 475 if (leftPos == -1) 476 { 477 int pos = typeAsStr.indexOf('>'); 478 479 if (pos == -1) return typeAsStr.trim(); 480 481 throw REM_GENERIC_ERROR_MSG(typeAsStr, pos); 482 } 483 484 char[] cArr = typeAsStr.toCharArray(); 485 int count = 1; // The number of OPENING-CLOSING tags (same as Inclusive) 486 int END = cArr.length; // This is the location JUST-AFTER the last USEABLE-char 487 int delta = 0; // How many characters have been deleted already. 488 // NOTE: This is zero, because the loop hasn't started. 489 // If there is a "Shift" this will be PRECISELY-EQUAL 490 // to the size of the last generic parameter-expression. 491 // ALSO: The only purpose of this is for error-reporting. 492 493 // check for a closing '>' before the first opening '<' 494 for (int j=0; j < leftPos; j++) 495 if (cArr[j] == '>') throw REM_GENERIC_ERROR_MSG(typeAsStr, j); 496 497 // Check for in-valid characters 498 // This is a lot of lines of code, but these methods are extremely short, and the input 499 // string (for all VALID) input will be very short. This is peace of mind. It checks... 500 for (int pos=0; pos < cArr.length; pos++) 501 { 502 char c = cArr[pos]; 503 if (! Character.isJavaIdentifierPart(c)) 504 if (! Character.isIdentifierIgnorable(c)) 505 if (! Character.isWhitespace(c)) 506 if ( 507 (c != '[') && (c != ']') && (c != '?') && (c != '<') && 508 (c != '>') && (c != ',') && (c != '.') 509 ) 510 throw REM_GENERIC_ERROR_MSG(typeAsStr, pos); 511 } 512 513 do 514 { 515 // Keeps a count on the number of "Opening Braces" and "Closing Braces" 516 // This is the same thing as the whole "Inclusive" deal, but with braces instead. 517 // 518 // count: At loop start, count is '1' If it ever reaches 0, the loop exits. 519 // leftPos: The location of the '<' that has been found. 520 int i = leftPos + 1; 521 522 while ((count > 0) && (i < END)) 523 { 524 if (cArr[i] == '<') count++; 525 else if (cArr[i] == '>') count--; 526 527 if (count > 0) i++; 528 } 529 530 // The '<' and the '>' didn't match up. Better to throw exception, than ignore it. 531 if ((count != 0) && (i == END)) 532 throw REM_GENERIC_ERROR_MSG(typeAsStr, leftPos); 533 534 int rightPos = i; // 'i' is currently pointing to the '>' 535 536 // Erase the most recently found <...> expression 537 int sourcePos = rightPos + 1; // Pointing at first VALID / NEED-TO-COPY char 538 int destPos = leftPos; // Pointing at '<' 539 boolean possiblyAnother = false; 540 541 while (sourcePos < END) 542 { 543 // The next character to copy... check it first to see if it is valid! 544 char c = cArr[sourcePos]; 545 546 // continue to shift all the characters left to erase the expression. 547 cArr[destPos] = c; 548 549 if (! possiblyAnother) // Haven't found an opening '<' 550 { 551 // If there is a '>' - ***AND NO '<' HAS BEEN FOUND***, this is an error. 552 if (c == '>') 553 throw REM_GENERIC_ERROR_MSG(typeAsStr, delta + sourcePos); 554 555 // If there is another '<', then it is possible another expression awaits us 556 if (c == '<') 557 { 558 // Reset the outer-loop variables for the next iteration. There is going 559 // to be another iteration - guaranteed. 560 // 561 // NOTE: Delta is supposed to hold how many characters are being deleted. 562 // This is used for proper error-reporting (only) 563 564 // This is how many chars are in the current <...> expression 565 delta = rightPos - leftPos + 1; 566 567 leftPos = destPos; // Now pointing at the next open '<' char (just found!) 568 count = 1; // There was a new-unclosed '>', prepares for next loop 569 570 // You know it 571 possiblyAnother = true; 572 } 573 } 574 575 sourcePos++; destPos++; 576 } 577 578 // Completed without errors, and without another expression being found. 579 // NOTE: This used to be a one-line return call. 580 // ADDED: This now does a String.trim(). These little loops skip leading and 581 // trailing white-space BEFORE returning the String 582 // 583 // WORKS-NO-TRIM: return new String(cArr, 0, destPos); 584 // replace loop-body with the above line to get rid of trim() 585 if (! possiblyAnother) 586 { 587 int sPos = 0; 588 int len = destPos; // REMEMBER: new String(char[], int OFFSET, int COUNT) 589 // NOT: new String(char[], int SPOS, int EPOS) 590 591 // Skip LEADING-WHITESPACE 592 while ((sPos < cArr.length) && (destPos > 0) && Character.isWhitespace(cArr[sPos])) 593 { sPos++; destPos--; } // Advance start, *AND* shorten "count" 594 595 // Skip TRAILING WHITE-SPACE 596 while ((destPos > 1) && Character.isWhitespace(cArr[sPos + destPos-1])) 597 destPos--; // Shorten length *ONLY* 598 599 return new String(cArr, sPos, destPos); 600 } 601 602 END = destPos; // Pointing at the first invalid / unused / ALREADY-MOVED char 603 } 604 while (true); 605 } 606 607 /** 608 * This will remove any generic-parameter information from a Java type-{@code String} <B>and 609 * then</B> remove all package-information or outer-class {@code String's}. What is left 610 * is a single <B>Java Identifier {@code String}</B> that, <I>as long as the proper scope has 611 * been provided</I>, identifies a Java Type (Class, Interface, Enum, Record, Annotation). 612 * 613 * <BR /><TABLE CLASS=JDBriefTable> 614 * <TR><TH>Output</TH><TH>Input</TH></TR> 615 * <TR><TD>{@code "Integer"}</TD><TD>{@code "java.lang.Integer"}</TD></TR> 616 * <TR><TD>{@code "Vector"}</TD><TD>{@code "java.util.Vector<E>"}</TD></TR> 617 * <TR><TD>{@code "Entry"}</TD><TD>{@code "java.util.Map.Entry<String, Integer>"}</TD></TR> 618 * <TR><TD>{@code "Entry"}</TD><TD>{@code "Map.Entry<String, Intger>"}</TD></TR> 619 * <TR><TD>{@code "Entry"}</TD><TD>{@code "Entry<String, Integer>"}</TD></TR> 620 * <TR><TD>{@code "Entry"}</TD><TD>{@code "Entry"}</TD></TR> 621 * <TR><TD>{@code "String[]"}</TD><TD>{@code "String[]"}</TD></TR> 622 * <TR><TD>{@code "String[]"}</TD><TD>{@code "java.lang.String[]"}</TD></TR> 623 * <TR><TD>{@code "Vector"}</TD><TD>{@code "Vector<String[]>"}</TD></TR> 624 * <TR><TD>{@code "Vector[]"}</TD><TD>{@code "Vector<String>[]"}</TD></TR> 625 * 626 * <TR><TH COLSPAN=2>Point of Interest:</TH></TR> 627 * <TR><TD>{@code "The World Series"}</TD><TD>{@code "The World Series"}</TD></TR> 628 * <TR><TD>{@code "Quoth the Raven"}</TD><TD>{@code "Quoth the Raven<java.lang.Integer>"}</TD></TR> 629 * 630 * <TR><TH COLSPAN=2>Finally:</TH></TR> 631 * <TR><TD>{@code "String..."}</TD><TD>{@code "String..."}</TD></TR> 632 * <TR><TD>{@code "String..."}</TD><TD>{@code "java.lang.String..."}</TD></TR> 633 * <TR><TD>{@code "Vector..."}</TD><TD>{@code "Vector<E>..."}</TD></TR> 634 * <TR><TD>{@code "Vector..."}</TD><TD>{@code "java.util.Vector<E>..."}</TD></TR> 635 * </TABLE> 636 * 637 * @param typeStr This is a type as a {@code String}. These are usually retrieved from Java 638 * Parser, in the Java Doc Upgrader package. This method does not provide an exhaustive 639 * check for all variants of format and naming erros of a Java Type. Some validity checks 640 * are performed regarding the use of non-Java type characters. 641 * 642 * <BR /><BR /><B STYLE='color:red;'>NOTE:</B> All the exceptions thrown by the method 643 * {@link #removeGeneric(String)} will also be thrown here, if {@code 'typeStr'} is not 644 * not properly formatted. 645 * 646 * @return a Simplified version of the type that leaves out the scope, but provides a 647 * simple Java Identifier, instead. Throws exceptions if not properly formatted. If any 648 * array-bracket characters are passed, they is preserved, unless the arrays in this type 649 * are part of the generic-type parameters; please see the examples above. 650 * 651 * @throws StringFormatException Please see the explanation provided in 652 * {@link #removeGeneric(String)} under 'Throws'. 653 * 654 * @see #removeGeneric(String) 655 */ 656 public static String typeToJavaIdentifier(String typeStr) 657 { 658 String ret = removeGeneric(typeStr); 659 boolean isVarArgs = false; 660 661 if (ret.endsWith("...")) 662 { 663 ret = ret.substring(0, ret.length() - 3); 664 isVarArgs = true; 665 } 666 667 int pos = ret.lastIndexOf('.'); 668 669 if (isVarArgs) 670 { 671 if (pos == -1) return ret + "..."; 672 else return ret.substring(pos+1) + "..."; 673 } 674 675 else 676 { 677 if (pos == -1) return ret; 678 else return ret.substring(pos+1); 679 } 680 } 681 682 // This was designed while staring at the field retrieved from a JavaDoc HTML Page that 683 // looked like this (from AbstractHNLI) 684 // protected java.util.function.Predicate<E extends HTMLNode> p; 685 // This puts a group (group 1) around the ( extends HTMLNode) part, so it can be removed. 686 // JavaParser complained about it. 687 688 private static final Pattern exClause = 689 Pattern.compile("([A-Za-z][A-Za-z0-9]*)(\\s+extends\\s+[\\w\\.]+)"); 690 691 /** 692 * Removes the {@code 'extends'} part of a Java Generic 693 * 694 * <BR /><BR /><B STYLE='color:red;'>TO DO:</B> This will fail for a class such as: 695 * <BR />{@code public class MyClass<T extends Vector<String>}, where the extends clause 696 * also has a generic in it. Java HTML does not define such classes, but they are possible, 697 * and this needs to be fixed, as soon as they let me! 698 * 699 * @param decl Any Type Declaration that includes has the word {{@code 'extends'}}, 700 * followed by type-parameter information. 701 * 702 * @return The same {@code String} without the clause. 703 */ 704 public static String removeExtendsClause(String decl) 705 { 706 Matcher m = exClause.matcher(decl); 707 708 while (m.find()) 709 { 710 decl = m.replaceFirst(m.group(1)); 711 m.reset(decl); 712 } 713 714 return decl; 715 } 716 717 /** 718 * <EMBED CLASS='external-html' DATA-FILE-ID=STRSRC_JTYPE_STR> 719 * 720 * @param s Any Java {@code String}. 721 * 722 * @return {@code TRUE} if and only if the Java Compiler could interpret {@code 's'} as a valid 723 * reference to a Java Type. In computer-programming, the world <B>{@code Type}</B> can have a 724 * lot of meanings, but here, the word should be interpreted as a Java Class, Interface, 725 * Enumeration (an {@code 'enum'}), Annotation or Record. 726 * 727 * <BR /><BR /><B>NOTE:</B> {@code 's'} may include the period {@code '.'} since inner classes, 728 * enum's and interfaces are also valid Java Type's. Two consecutive period-characters, or a 729 * period at the beginning or ending of {@code 's'} will result in this method returning 730 * {@code FALSE}. 731 */ 732 public static boolean isJavaTypeStr(String s) 733 { 734 if (s.length() == 0) return false; 735 736 // Java restricts the first character of a java-identifier to a smaller subset than the 737 // other characters in an identifier. Use method 'isJavaIdentifierStart' 738 739 if (! Character.isJavaIdentifierStart(s.charAt(0))) return false; 740 741 int len = s.length(); 742 char c = 0; 743 744 for (int i=1; i < len; i++) 745 746 if (! Character.isJavaIdentifierPart(c = s.charAt(i))) 747 { 748 if (c == '.') 749 { 750 // A second (subsequent) period-character (in a row) ==> FALSE 751 if (s.charAt(i-1) == '.') return false; 752 753 // The LAST character in the String is a period-character ==> FALSE 754 if (i == (len-1)) return false; 755 756 // The character immediately following a period isn't a valid Java Identifier 757 // Start ==> FALSE 758 759 if (! Character.isJavaIdentifierStart(s.charAt(++i))) return false; 760 } 761 else 762 // Character is NEITHER a period, NOR a Java Identifier Part ==> FALSE 763 return false; 764 } 765 766 // All metrics / tests have succeeded (which would have resulted in immediate exiting of 767 // this method, and a FALSE return value) ... therefore return TRUE. 768 return true; 769 } 770 771 /** 772 * Checks whether an input {@code String} would be allowed as a Java Identifier - for instance, 773 * whether the input would make a valid Field-Name, Variable-Name, Class-Name or Method-Name. 774 * 775 * <BR /><BR /><B CLASS=JDDescLabel>ChatGPT Note:</B> 776 * 777 * <BR /><B>ChatGPT, 3.5</B> wrote this whole thing, including the in-line comments. I had to 778 * write the Java-Doc Comments, but I guess I could have asked it to do that too. 779 * 780 * @param identifier Any Java {@code String} 781 * 782 * @return {@code TRUE} if-and-only-if parameter {@code 'identifier'} is a valid Java 783 * Identifier. 784 */ 785 public static boolean isValidJavaIdentifier(String identifier) 786 { 787 // Check if the string is not null or empty 788 if (identifier == null || identifier.isEmpty()) return false; 789 790 // Check if the first character is a letter, underscore, or dollar sign 791 if (! Character.isJavaIdentifierStart(identifier.charAt(0))) return false; 792 793 // Check the remaining characters 794 for (int i = 1; i < identifier.length(); i++) 795 if (!Character.isJavaIdentifierPart(identifier.charAt(i))) 796 return false; 797 798 // Check if the identifier is a reserved keyword 799 if (reservedKeywords.contains(identifier)) return false; 800 801 // The string is a valid Java identifier 802 return true; 803 } 804 805 806 // ******************************************************************************************** 807 // ******************************************************************************************** 808 // Replace Special-Character 809 // ******************************************************************************************** 810 // ******************************************************************************************** 811 812 813 /** 814 * There are actually people out there who are willing to put character {@code '160'} into 815 * a file or document, instead of a simple {@code ' '} element. How rude. 816 * Any instances of this character shall be replaced with the standard space character 817 * {@code ASCII #32}. 818 * 819 * @param s Any {@code String} will pass. Generally {@code String's} that were converted from 820 * HTML pages will contain {@code char #160} as it is occasionally translated from the HTML 821 * escape sequence {@code } 822 * 823 * @return A String where any instance of white-space character {@code #160} have been 824 * replaced with character {@code #32} 825 */ 826 public static String replaceNBSP(String s) 827 { return s.replace(("" + ((char) 160)), " "); } 828 829 /** 830 * Even lower than {@code #160}, apparently is the {@code "Zero Width Space"} (character 831 * {@code #8203}. This is actually inserted by the <B>JavaDoc Tool</B> (by 832 * {@code Sun / Oracle}) into JavaDoc generated HTML Pages. Here, it shall be replaced by 833 * character {@code #32} - the <I>space-character</I>. 834 * 835 * <BR /><BR /><B>A.K.A.:</B> <CODE>"\u200B"</CODE>. 836 * 837 * <BR /><BR /><B><I STYLE='color: red;'>Can you see the character, above?</I></B> No? 838 * That's zero width space for you! If you ever sitting and wondering why a {@code String} 839 * seems to be something else than what it looks like - you might have a zero-width 840 * space in your {@code String}. If so, it will take a while to find the bug. 841 * 842 * @param s Any {@code String} will pass. Generally {@code String's} that were converted from 843 * JavaDoc HTML pages will contain {@code char #8203}. 844 * 845 * @return A String where any instance of white-space character {@code #8203} have been 846 * replaced with character {@code #32} 847 */ 848 public static String replaceZWSP(String s) 849 { return s.replace(("" + ((char) 8203)), " "); } 850 851 852 // ******************************************************************************************** 853 // ******************************************************************************************** 854 // CSS Source 855 // ******************************************************************************************** 856 // ******************************************************************************************** 857 858 859 /** 860 * Checks if a Java-{@code String} constitutes a valid CSS Property-Name. Note that this 861 * method, in no way consults any "complete list" of all known CSS-Properties. Instead, it 862 * simply analyzes whether the name is conguent with the CSS-Property Validator Reg-ex. 863 * 864 * @param cssPropertyName Any Java-{@code String} 865 * 866 * @return {@code TRUE} if and ony if {@code 'attributeName'} is a valid HTML Atribute-Name, 867 * according to the agreed upon CSS-Property Regular-Expression Validator. 868 */ 869 public static boolean isCSSPropertyName(String cssPropertyName) 870 { 871 if (cssPropertyName.length() == 0) return false; 872 873 if (! isCSSPropertyNameStart(cssPropertyName.charAt(0))) return false; 874 875 for (int i=1; i < cssPropertyName.length(); i++) 876 { 877 final char c = cssPropertyName.charAt(i); 878 if ((c >= 'A') && (c <= 'Z')) continue; 879 if ((c >= 'a') && (c <= 'z')) continue; 880 if ((c >= '0') && (c <= '9')) continue; 881 if ((c == '-') || (c == '_')) continue; 882 return false; 883 } 884 885 return true; 886 } 887 888 /** 889 * Checks whether parameter {@code 'c'} is one of the agreed-upon standard characters that are 890 * allowed to begin CSS Property-Names. 891 * 892 * @param c Any Java {@code char}-primitive 893 * 894 * @return {@code TRUE} if and ony if {@code 'c'} is a character that would be allowed to begin 895 * a CSS Property-Name 896 */ 897 public static boolean isCSSPropertyNameStart(char c) 898 { 899 if ((c >= 'A') && (c <= 'Z')) return true; 900 if ((c >= 'a') && (c <= 'z')) return true; 901 if ((c == '-') || (c == '_')) return true; 902 return false; 903 } 904 905 /** 906 * Checks whether parameter {@code 'c'} is one of the agreed-upon standard characters that are 907 * permitted within CSS Property-Names, after the first character of the name. 908 * 909 * @param c Any Java {@code char}-primitive 910 * 911 * @return {@code TRUE} if and ony if {@code 'c'} is a character that would be allowed within a 912 * valid CSS Property-Name. 913 */ 914 public static boolean isCSSPropertyNamePart(char c) 915 { 916 if ((c >= 'A') && (c <= 'Z')) return true; 917 if ((c >= 'a') && (c <= 'z')) return true; 918 if ((c >= '0') && (c <= '9')) return true; 919 if ((c == '-') || (c == '_')) return true; 920 return false; 921 } 922 923 924 // ******************************************************************************************** 925 // ******************************************************************************************** 926 // More HTML Source 927 // ******************************************************************************************** 928 // ******************************************************************************************** 929 930 931 /** 932 * Checks if a Java-{@code String} constitutes a valid HTML Attibute-Name. Note that this 933 * method, in no way consults any "complete list" of all know HTML-Attributes. Instead, it 934 * simply analyzes whether the name is conguent with the Attribute-Name Validator Reg-ex. 935 * 936 * @param attributeName Any Java-{@code String} 937 * 938 * @return {@code TRUE} if and ony if {@code 'attributeName'} is a valid HTML Atribute-Name, 939 * according to the agreed upon Attribute-Name Regular-Expression Validator. 940 */ 941 public static boolean isAttributeName(String attributeName) 942 { 943 if (attributeName.length() == 0) return false; 944 945 if (! isAttributeNameStart(attributeName.charAt(0))) return false; 946 947 for (int i=1; i < attributeName.length(); i++) 948 { 949 final char c = attributeName.charAt(i); 950 if ((c >= 'A') && (c <= 'Z')) continue; 951 if ((c >= 'a') && (c <= 'z')) continue; 952 if ((c >= '0') && (c <= '9')) continue; 953 if ((c == '-') || (c == '_')) continue; 954 return false; 955 } 956 957 return true; 958 } 959 960 /** 961 * Checks whether parameter {@code 'c'} is one of the agreed-upon standard characters that are 962 * allowed to begin HTML Attribute-Names. 963 * 964 * @param c Any Java {@code char}-primitive 965 * 966 * @return {@code TRUE} if and ony if {@code 'c'} is a character that would be allowed to begin 967 * an HTML Attribute-Name 968 */ 969 public static boolean isAttributeNameStart(char c) 970 { 971 if ((c >= 'A') && (c <= 'Z')) return true; 972 if ((c >= 'a') && (c <= 'z')) return true; 973 return false; 974 } 975 976 /** 977 * Checks whether parameter {@code 'c'} is one of the agreed-upon standard characters that are 978 * permitted within HTML Attribute-Names, after the first character of the name. 979 * 980 * @param c Any Java {@code char}-primitive 981 * 982 * @return {@code TRUE} if and ony if {@code 'c'} is a character that would be allowed within a 983 * valid HTML Attribute-Name. 984 */ 985 public static boolean isAttributeNamePart(char c) 986 { 987 if ((c >= 'A') && (c <= 'Z')) return true; 988 if ((c >= 'a') && (c <= 'z')) return true; 989 if ((c >= '0') && (c <= '9')) return true; 990 if ((c == '-') || (c == '_')) return true; 991 return false; 992 } 993 994}