1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
package Torello.Java;

import java.util.*;
import java.io.*;
import java.util.regex.*;
import java.util.zip.*;

/**
 * A utility for saving Regular-Expressions in a text-file that may be lazily-loaded at run-time
 * when needed.
 * 
 * <EMBED CLASS='external-html' DATA-FILE-ID=REGEX_FILES>
 */
@Torello.JavaDoc.StaticFunctional
public class RegExFiles
{
    private RegExFiles() { }

    /**
     * This loads a regular expression text file.  Each line is interpreted as a new Regular
     * Expression {@code Pattern}.
     *
     * <BR /><BR />This method expects the <B><I>entire regular expression to fit on a single
     * line</I></B>, and therefore, each new line containing text-data (without a starting
     * <B>{@code '#'}</B>) will be compile into a new regular expression.  Use the {@code '\n'}
     * within the expression to generated newlines.
     *
     * <BR /><BR /><B CLASS=JDDescLabel>Some Syntax Rules:</B>
     * 
     * <BR /><UL CLASS=JDUL>
     * <LI> <B>Comment lines</B> are lines beginning with the <I>POUND</I> <B>({@code '#'})</B>
     *      sign.
     *      </LI>
     * <LI> <B>Blank lines</B> are ignored by the file-parse completely.</LI>
     * <LI> Lines with <B>only white-space</B> are considered blank.</LI>
     * <LI> <B>Flag Lines</B> are lines that begin with two, successive, <I>POUND</I>
     *      <B>({@code '##'})</B> signs.
     *      </LI>
     * <LI> All non-comment, non-blank and non-flag lines are converted into Regular-Expression
     *      {@code Pattern's}
     *      </LI>
     * </UL>
     *
     * <BR /><BR /><B CLASS=JDDescLabel>LFEC Note:</B>
     * 
     * <BR />This method will <I>halt program execution</I> if any exceptions occur when loading a
     * Regular-Expression text file!  This is the primary-purpose of all {@code 'LFEC'} - Load File
     * Exception Catch methods.
     *
     * @param f Filename for a Regular Expression
     *
     * @return A {@code Vector} containing one compiled regular expression per line.  Comment lines
     * &amp; blank lines will all be ignored.
     *
     * @see java.util.regex.Pattern
     * @see #generateFlags(String)
     * @see LFEC#ERROR_EXIT(Throwable, String)
     */
    public static Vector<Pattern> LFEC(String f)
    {
        try
            { return parse(FileRW.loadFileToVector(f, false), f); }

        catch (Throwable t)
        {
            LFEC.ERROR_EXIT(t, "Attempt to load Regular Expression file: [" + f + "], failed.\n");
        }

        return null; // Should NOT be possible to reach this statement...
    }

    /**
     * This does the <B><I>exact same thing</I></B> as {@link LFEC}, but loads the file into a
     * {@code Vector} using the "JAR File" information included here.  In this case, parameter
     * {@code f} indicates a jar-file class-loader pointer.  It will not load from the standard
     * file-system.
     *
     * <BR /><BR /><B CLASS=JDDescLabel>Java's <CODE>getResourceAsStream</CODE>:</B>
     * 
     * <BR />The JAR implies that Java's "load resource as stream" features are being used in place
     * of standard file i/o routines.  Specifically, this loads from a JAR file, as seen below:
     *
     * <DIV CLASS=SNIP>{@code
     * BufferedReader br =
     *     new BufferedReader(new InputStreamReader(c.getResourceAsStream(f)));
     * }</DIV>
     *
     * @param c This contains the class that is loading the file.  It is not too important to use
     * the "exact class" - since the only reason the class doing the loading is because the
     * "Class Loader" employs the exact "Package Name" of the class for figuring out the
     * directory / sub-directory where the data-file is stored.  This variable may not be null.
     *
     * <BR /><BR /><B>EXAMPLE:</B> If you wanted to load a "Regular Expressions.txt" file that
     * was in the same BASH/Debian/etc...  directory as the following class - the following call
     * to {@code 'RegExFiles'} would load the text-file "Regular Expressions.txt" into memory
     * quickly.  The primary purpose being that text files are <B><I>much easier to read than
     * 'double-escaped' Java {@code String's}.</I></B>
     *
     * <BR /><BR /><B>NOTE:</B> It might be important to read the Java Doc's about the
     * {@code 'getResourceAsStream(String)'} method for retrieving data that was stored to a JAR
     * file instead of a UNIX/BASH/MS-DOS system file.  Oracle's Java 8 would help.
     *
     * <EMBED CLASS='external-html' DATA-FILE-ID=RAWTYPES>
     *
     * @param f This is a file-pointer to a file stored inside a Java JAR file.
     *
     * @return A Vector containing one compiled regular expression per line.  Comment lines &amp;
     * blank lines will all be ignored.
     *
     * @see #LFEC(String)
     * @see #parse(Vector, String)
     * @see LFEC#ERROR_EXIT(Throwable, String)
     */
    public static Vector<Pattern> LFEC_JAR(Class<?> c, String f)
    {
        try (
            InputStream     is = c.getResourceAsStream(f);
            BufferedReader  br = new BufferedReader(new InputStreamReader(is));
        )
        {
            String          s       = "";
            StringBuilder   sb      = new StringBuilder();
            Vector<String>  file    = new Vector<String>();

            while ((s = br.readLine()) != null) file.addElement(s);

            return parse(file, f);
        }

        catch (Throwable t)
        { 
            LFEC.ERROR_EXIT(
                t,
                "Attempted to load Regular Expression file: [" + f + "]\n" +
                "From jar-file using class: [" + c.getCanonicalName() + "]\n" +
                "Did not load successfully."
            );
        }

        // Should NOT be possible to reach this statement...
        // Compiler does not recognize LFEC.ERROR_EXIT

        return null;
    }

    /**
     * This is identical to {@code LFEC_JAR}, except that it presumes the file was compressed
     * before saving.
     *
     * @param c This contains the class that is loading the file.  It is not too important to use
     * the "exact class" - since the only reason the class doing the loading is because the "Class
     * Loader" employs the exact "Package Name" of the class for figuring out the directory /
     * sub-directory where the data-file is stored.  This variable may not be null.  Again, the
     * class-loader looks in the directory of the package that contains this class!
     *
     * <BR /><BR /><B>NOTE:</B> The method {@code public static Vector<Pattern> 
     * LFEC_JAR(Class, String;)} has a more detailed look at the particular use of this parameter.
     * The easy way to understand is: just pass the class that is doing the actual loading of the
     * regular-expression <B><I>(presuming the regex.dat file is in the same directory as the 
     * {@code '.class'} file!)</I></B>
     *
     * <EMBED CLASS='external-html' DATA-FILE-ID=RAWTYPES>
     *
     * @param f This is a file-pointer to a file stored inside a Java JAR file.
     *
     * @return A {@code Vector} containing one compiled regular expression per line.  Comment
     * lines &amp; blank lines will all be ignored.
     *
     * @see #LFEC_JAR(Class, String)
     * @see #parse(Vector, String)
     * @see LFEC#ERROR_EXIT(Throwable, String)
     */
    public static Vector<Pattern> LFEC_JAR_ZIP(Class<?> c, String f)
    {
        try (
            InputStream         is      = c.getResourceAsStream(f);
            GZIPInputStream     gzip    = new GZIPInputStream(is);
            ObjectInputStream   ois     = new ObjectInputStream(gzip);
        )
        {
            Object              ret         = ois.readObject();
            String              fileStr     = (String) ret;
            Vector<String>      file        = new Vector<>();
            int                 newLinePos  = 0;

            while ((newLinePos = fileStr.indexOf('\n')) != -1)
            {
                file.addElement(fileStr.substring(0, newLinePos));
                fileStr = fileStr.substring(newLinePos + 1);
            }

            return parse(file, f);

        }

        catch (Throwable t)
        {
            LFEC.ERROR_EXIT(t,
                "Attempted to load Regular Expression file: [" + f + "]\n" +
                "From jar-file using class: [" + c.getCanonicalName() + "]\n" +
                "Content was zipped, but failed to load."
            );
        }

        return null; // Should NOT be possible to reach this statement...
    }

    /**
     * This does the <B><I>exact same thing</I></B> as {@link LFEC}, but takes a "pre-loaded file"
     * as a {@code Vector}.  This is an internal class - used to ensure that the methods:
     * {@code LFEC_JAR} and {@code LFEC} do the exact same thing.
     *
     * @param file This presumes that the regular-expression text-file has been loaded into a
     * {@code Vector<String>} (w/out the "include newlines" option!)
     *
     * @param name The name of the file loading is required so that error-printing-information is
     * easier.
     *
     * @return A {@code Vector} containing one compiled regular expression per line.  Comment lines
     * &amp; blank lines will all be ignored.
     *
     * @see #LFEC(String)
     */
    protected static Vector<Pattern> parse(Vector<String> file, String name)
    {
        try
        {
            Vector<Pattern> ret     = new Vector<Pattern>();
            int             flags   = 0;

            for (String line : file)
            {
                if (line.trim().length() == 0) continue;

                if (line.charAt(0) == '#')
                {
                    if (line.length() > 1) if (line.charAt(1) == '#') flags = generateFlags(line);
                    continue;
                }

                if (flags != 0)                 ret.add(Pattern.compile(line, flags));
                else                            ret.add(Pattern.compile(line));

                flags = 0;
            }

            return ret;
        }

        catch (Throwable t)
            { LFEC.ERROR_EXIT(t, "error parsing regular expression file: " + name); }

        return null; // Should NOT be possible to reach this statement...
    }

    /**
     * This information has been copied from Java's regular expression: {@code Pattern}. This is a
     * Helper function as it converts the text-{@code String's} into their constants, so that a
     * user may include these text {@code String's} in a regular expression file.
     *
     * <BR /><BR /><B>NOTE:</B> The regular expression loader will only load regular expressions
     * that fit on a single line of text.  Other than lines that begin with a comment, each line
     * is intended/interpreted as an independent Regular Expression.
     *
     * @see java.util.regex.Pattern
     */
    protected static int generateFlags(String line)
    {
        int mask = 0;

        if (line.contains("CANON_EQ"))          mask |= Pattern.CANON_EQ;
        if (line.contains("CASE_INSENSITIVE"))  mask |= Pattern.CASE_INSENSITIVE;
        if (line.contains("DOTALL"))            mask |= Pattern.DOTALL;
        if (line.contains("COMMENTS"))          mask |= Pattern.COMMENTS;
        if (line.contains("LITERAL"))           mask |= Pattern.LITERAL;
        if (line.contains("MULTILINE"))         mask |= Pattern.MULTILINE;
        if (line.contains("UNICODE_CASE"))      mask |= Pattern.UNICODE_CASE;

        return mask;
    }
}