001package Torello.HTML.Tools.NewsSite;
002
003import Torello.Java.*;
004import java.io.*;
005
006/**
007 * A Java function-pointer / lambda-target that provides a means for deciding where to save
008 * downloaded article HTML, including a {@code static}-builder method for choosing to save articles
009 * directly to the file-system.
010 * 
011 * <EMBED CLASS='external-html' DATA-FILE-ID=SCRAPE_ART_REC>
012 */
013@FunctionalInterface
014public interface ScrapedArticleReceiver
015{
016    /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUIDFI>  */
017    public static final long serialVersionUID = 1;
018
019    /**
020     * <B><SPAN STYLE="color: red;">FUNCTIONAL-INTERFACE METHOD:</SPAN></B> This is the method
021     * that must be fulfilled to meet the requirements of this
022     * <CODE>&#64;FunctionalInterface</CODE>
023     *
024     * @param articleBody After an article has been downloaded by the {@code ScrapeArticles} class,
025     * it will build an instance of {@code class Article} and pass it to this class.  It is the
026     * programmer's responsibility to ultimately decide what to do with news articles after they
027     * have been successfully scraped and parsed.
028     * 
029     * @param sectionURLNum This is a convenience parameter that informs the 
030     * implementing-{@code interface} <B><I>from which URL Section</I></B> of the News Web-Sites
031     * main-page that this article is being downloaded.
032     * 
033     * <BR /><BR /><B>NOTE:</B> Review the {@code class ScrapeURLs} to read more about
034     * "Section {@code URL's}."  The number of "Section {@code URL's}" for a news web-site scrape
035     * is just the length of the {@code Vector<URL> sectionURL's}
036     * 
037     * @param articleNum This parameter informs the implementing-{@code interface} which article
038     * number is being downloaded.  Each section-{@code URL} will have a number of different
039     * articles in the section.  These numbers can be used to create unique file-names, for
040     * instance.
041     * 
042     * @throws ReceiveException This exception may be thrown by the lambda-expression or class
043     * instance that implements this {@code FunctionalInterface}.  It is not mandatory that this
044     * exception be used.
045     */
046    public void receive(Article articleBody, int sectionURLNum, int articleNum)
047        throws ReceiveException;
048
049    /**
050     * <B><SPAN STYLE="color: red;">saveToFS =&gt; Save To File-System</SPAN></B>
051     * 
052     * <BR /><BR />This is a static factory-builder method that will produce a
053     * {@code 'ScrapedArticleReceiver'} that simply <B>saves downloaded articles to a
054     * directory</B> on the file-system.
055     * 
056     * <BR /><BR />The user, here, merely needs to provide a Directory-Name using {@code String}
057     * parameter {@code 'dirNameStr'}.  This is the <B><I>most simple</I></B> way to create an
058     * instance of this class.
059     * 
060     * @param dirNameStr This is a directory on the file-system that will be used to save articles
061     * that are received directly to the file-system.
062     * 
063     * @throws WritableDirectoryException This method shall check whether it is possible to
064     * write to the provided directory name.
065     */
066    public static ScrapedArticleReceiver saveToFS(String dirNameStr)
067    {
068        WritableDirectoryException.check(dirNameStr);
069
070        // Make sure that the directory name-string ends with the system File-Separator
071        // character.  This '/' for UNIX and '\' for MS-DOS.
072
073        final String finalDirNameStr = dirNameStr.endsWith(File.separator) 
074            ? dirNameStr
075            : dirNameStr + File.separator;
076
077        // Create an instance of this functional-interface using a lambda-expression.
078        // 
079        // NOTE: This is literally just saving an object to a file using object-serialization.
080        //       The exception catching / throwing is just to produce standardized error messages
081        //       back to the user, if an exception occurs when saving files.
082
083        return (Article articleBody, int sectionURLNum, int articleNum) ->
084        {
085            String outFileName = 
086                finalDirNameStr +
087                StringParse.zeroPad(sectionURLNum) + '.' +
088                StringParse.zeroPad10e4(articleNum) + ".dat";
089
090            try
091                { FileRW.writeObjectToFile(articleBody, outFileName, true); }
092
093            catch (Exception ex)
094            {
095                throw new ReceiveException(
096                    "A " + ex.getClass().getCanonicalName() + " was thrown while attempting to " +
097                    "write a downloaded article to the file-system.\n" +
098                    "Section-URL [" + sectionURLNum + "], Article Number [" + articleNum + "]\n" +
099                    "Unable to save file:\n" +
100                    outFileName + "\n" + 
101                    "Please review this exception's getCause() for more details.",
102                    ex, sectionURLNum, articleNum
103                );
104            }
105        };
106    }
107}