1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package Torello.HTML.Tools.NewsSite;

import Torello.Java.*;
import java.io.*;

/**
 * A Java function-pointer / lambda-target that provides a means for deciding where to save
 * downloaded article HTML, including a {@code static}-builder method for choosing to save articles
 * directly to the file-system.
 * 
 * <EMBED CLASS='external-html' DATA-FILE-ID=SCRAPE_ART_REC>
 */
@FunctionalInterface
public interface ScrapedArticleReceiver
{
    /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUIDFI>  */
    public static final long serialVersionUID = 1;

    /**
     * <B><SPAN STYLE="color: red;">FUNCTIONAL-INTERFACE METHOD:</SPAN></B> This is the method
     * that must be fulfilled to meet the requirements of this
     * <CODE>&#64;FunctionalInterface</CODE>
     *
     * @param articleBody After an article has been downloaded by the {@code ScrapeArticles} class,
     * it will build an instance of {@code class Article} and pass it to this class.  It is the
     * programmer's responsibility to ultimately decide what to do with news articles after they
     * have been successfully scraped and parsed.
     * 
     * @param sectionURLNum This is a convenience parameter that informs the 
     * implementing-{@code interface} <B><I>from which URL Section</I></B> of the News Web-Sites
     * main-page that this article is being downloaded.
     * 
     * <BR /><BR /><B>NOTE:</B> Review the {@code class ScrapeURLs} to read more about
     * "Section {@code URL's}."  The number of "Section {@code URL's}" for a news web-site scrape
     * is just the length of the {@code Vector<URL> sectionURL's}
     * 
     * @param articleNum This parameter informs the implementing-{@code interface} which article
     * number is being downloaded.  Each section-{@code URL} will have a number of different
     * articles in the section.  These numbers can be used to create unique file-names, for
     * instance.
     * 
     * @throws ReceiveException This exception may be thrown by the lambda-expression or class
     * instance that implements this {@code FunctionalInterface}.  It is not mandatory that this
     * exception be used.
     */
    public void receive(Article articleBody, int sectionURLNum, int articleNum)
        throws ReceiveException;

    /**
     * <B><SPAN STYLE="color: red;">saveToFS =&gt; Save To File-System</SPAN></B>
     * 
     * <BR /><BR />This is a static factory-builder method that will produce a
     * {@code 'ScrapedArticleReceiver'} that simply <B>saves downloaded articles to a
     * directory</B> on the file-system.
     * 
     * <BR /><BR />The user, here, merely needs to provide a Directory-Name using {@code String}
     * parameter {@code 'dirNameStr'}.  This is the <B><I>most simple</I></B> way to create an
     * instance of this class.
     * 
     * @param dirNameStr This is a directory on the file-system that will be used to save articles
     * that are received directly to the file-system.
     * 
     * @throws WritableDirectoryException This method shall check whether it is possible to
     * write to the provided directory name.
     */
    public static ScrapedArticleReceiver saveToFS(String dirNameStr)
    {
        WritableDirectoryException.check(dirNameStr);

        // Make sure that the directory name-string ends with the system File-Separator
        // character.  This '/' for UNIX and '\' for MS-DOS.

        final String finalDirNameStr = dirNameStr.endsWith(File.separator) 
            ? dirNameStr
            : dirNameStr + File.separator;

        // Create an instance of this functional-interface using a lambda-expression.
        // 
        // NOTE: This is literally just saving an object to a file using object-serialization.
        //       The exception catching / throwing is just to produce standardized error messages
        //       back to the user, if an exception occurs when saving files.

        return (Article articleBody, int sectionURLNum, int articleNum) ->
        {
            String outFileName = 
                finalDirNameStr +
                StringParse.zeroPad(sectionURLNum) + '.' +
                StringParse.zeroPad10e4(articleNum) + ".dat";

            try
                { FileRW.writeObjectToFile(articleBody, outFileName, true); }

            catch (Exception ex)
            {
                throw new ReceiveException(
                    "A " + ex.getClass().getCanonicalName() + " was thrown while attempting to " +
                    "write a downloaded article to the file-system.\n" +
                    "Section-URL [" + sectionURLNum + "], Article Number [" + articleNum + "]\n" +
                    "Unable to save file:\n" +
                    outFileName + "\n" + 
                    "Please review this exception's getCause() for more details.",
                    ex, sectionURLNum, articleNum
                );
            }
        };
    }
}