001package Torello.HTML.Tools.NewsSite; 002 003import Torello.Java.*; 004import java.io.*; 005 006/** 007 * A Java function-pointer / lambda-target that provides a means for deciding where to save 008 * downloaded article HTML, including a {@code static}-builder method for choosing to save articles 009 * directly to the file-system. 010 * 011 * <EMBED CLASS='external-html' DATA-FILE-ID=SCRAPE_ART_REC> 012 */ 013@FunctionalInterface 014public interface ScrapedArticleReceiver 015{ 016 /** <EMBED CLASS='external-html' DATA-FILE-ID=SVUIDFI> */ 017 public static final long serialVersionUID = 1; 018 019 /** 020 * <B><SPAN STYLE="color: red;">FUNCTIONAL-INTERFACE METHOD:</SPAN></B> This is the method 021 * that must be fulfilled to meet the requirements of this 022 * <CODE>@FunctionalInterface</CODE> 023 * 024 * @param articleBody After an article has been downloaded by the {@code ScrapeArticles} class, 025 * it will build an instance of {@code class Article} and pass it to this class. It is the 026 * programmer's responsibility to ultimately decide what to do with news articles after they 027 * have been successfully scraped and parsed. 028 * 029 * @param sectionURLNum This is a convenience parameter that informs the 030 * implementing-{@code interface} <B><I>from which URL Section</I></B> of the News Web-Sites 031 * main-page that this article is being downloaded. 032 * 033 * <BR /><BR /><B>NOTE:</B> Review the {@code class ScrapeURLs} to read more about 034 * "Section {@code URL's}." The number of "Section {@code URL's}" for a news web-site scrape 035 * is just the length of the {@code Vector<URL> sectionURL's} 036 * 037 * @param articleNum This parameter informs the implementing-{@code interface} which article 038 * number is being downloaded. Each section-{@code URL} will have a number of different 039 * articles in the section. These numbers can be used to create unique file-names, for 040 * instance. 041 * 042 * @throws ReceiveException This exception may be thrown by the lambda-expression or class 043 * instance that implements this {@code FunctionalInterface}. It is not mandatory that this 044 * exception be used. 045 */ 046 public void receive(Article articleBody, int sectionURLNum, int articleNum) 047 throws ReceiveException; 048 049 /** 050 * <B><SPAN STYLE="color: red;">saveToFS => Save To File-System</SPAN></B> 051 * 052 * <BR /><BR />This is a static factory-builder method that will produce a 053 * {@code 'ScrapedArticleReceiver'} that simply <B>saves downloaded articles to a 054 * directory</B> on the file-system. 055 * 056 * <BR /><BR />The user, here, merely needs to provide a Directory-Name using {@code String} 057 * parameter {@code 'dirNameStr'}. This is the <B><I>most simple</I></B> way to create an 058 * instance of this class. 059 * 060 * @param dirNameStr This is a directory on the file-system that will be used to save articles 061 * that are received directly to the file-system. 062 * 063 * @throws WritableDirectoryException This method shall check whether it is possible to 064 * write to the provided directory name. 065 */ 066 public static ScrapedArticleReceiver saveToFS(String dirNameStr) 067 { 068 WritableDirectoryException.check(dirNameStr); 069 070 // Make sure that the directory name-string ends with the system File-Separator 071 // character. This '/' for UNIX and '\' for MS-DOS. 072 073 final String finalDirNameStr = dirNameStr.endsWith(File.separator) 074 ? dirNameStr 075 : dirNameStr + File.separator; 076 077 // Create an instance of this functional-interface using a lambda-expression. 078 // 079 // NOTE: This is literally just saving an object to a file using object-serialization. 080 // The exception catching / throwing is just to produce standardized error messages 081 // back to the user, if an exception occurs when saving files. 082 083 return (Article articleBody, int sectionURLNum, int articleNum) -> 084 { 085 String outFileName = 086 finalDirNameStr + 087 StringParse.zeroPad(sectionURLNum) + '.' + 088 StringParse.zeroPad10e4(articleNum) + ".dat"; 089 090 try 091 { FileRW.writeObjectToFile(articleBody, outFileName, true); } 092 093 catch (Exception ex) 094 { 095 throw new ReceiveException( 096 "A " + ex.getClass().getCanonicalName() + " was thrown while attempting to " + 097 "write a downloaded article to the file-system.\n" + 098 "Section-URL [" + sectionURLNum + "], Article Number [" + articleNum + "]\n" + 099 "Unable to save file:\n" + 100 outFileName + "\n" + 101 "Please review this exception's getCause() for more details.", 102 ex, sectionURLNum, articleNum 103 ); 104 } 105 }; 106 } 107}