001package Torello.Languages;
002
003import java.util.*;
004import java.util.regex.*;
005import java.io.*;
006
007import Torello.HTML.*;
008import Torello.HTML.NodeSearch.*;
009
010import Torello.Java.*;
011
012import Torello.Java.Additional.URLs;
013
014import Torello.JavaDoc.StaticFunctional;
015import Torello.JavaDoc.Excuse;
016
017/**
018 * Google Cloud Services, Translation API Wrapper.
019 * 
020 * <BR /><BR /><EMBED CLASS='external-html' DATA-FILE-ID=GCSTAPI>
021 */
022@StaticFunctional(Excused="key", Excuses=Excuse.CONFIGURATION)
023public class GCSTAPI
024{
025    private GCSTAPI() { }
026
027    private static final String baseQ =
028        "https://www.googleapis.com/language/translate/v2?key=";
029
030    private static final Pattern P1 =
031        Pattern.compile(
032            "\\{\\s+\"translatedText\":\\s+\"(.*?)\"\\s+\\}",
033             Pattern.DOTALL | Pattern.CASE_INSENSITIVE
034        );
035
036    /**
037     * This is a key that may be obtained from Google Corporation.  Complete the form for "Cloud
038     * Server Login" and then try adding the Translation package.  Google expects you to pay for
039     * translation services, but single articles are usually a penny or so.  Long articles with
040     * many translations can become expensive.  Set this key to a specific value to register it. 
041     * 
042     * <BR /><BR /><B>NOTE:</B> This class only has static methods for brevity and simplicity.  If
043     * you wish to use multiple keys for different projects, well, copy the code from these two
044     * methods and add them to one of your own classes - for "multi-threaded" applications.
045     * <I><B>Otherwise you might also just change/update the {@code public static String key}
046     * field</I></B> whenver you wish to bill your translations to a different billing project,
047     * card or account. 
048     * 
049     * <BR /><BR /><B>ALSO NOTE:</B> Use of this Java Package will not transmit your key to me
050     * (Ralph P. Torello), I'm not trying to steal your google key or account.  Obviously, I cannot
051     * prove this Java Package does not make outgoing connections to any server I own, but,
052     * <B>Guess What? It DOESN'T!</B> 
053     * 
054     * <BR /><BR /><I>If you do not set the key field here, Google Cloud Server Translate API will
055     * not respond to your query.</I>  Google bills each query on a word by word basis.  See their
056     * metrics on the GCS Translate API documentation website.
057     */
058    public static String key;
059
060    /**
061     * This method takes what is expected to be a sentence in (any) foreign language.  The
062     * foreign-language used is passed as a two-character String from Google Translate' list of
063     * two-character language codes.  The Target language may be any.  Usually it is
064     * English/Spanish.<BR /><BR />
065     * 
066     * @param text Any string in a foreign language.  This is expected to be a single sentence.
067     * 
068     * @param srcLang Two-Character String Language Code identifying the foreign language used.
069     * See the enum "LC" (Language Code)
070     * 
071     * @param targetLang Two-Character String Language Code of the "target" or "destination"
072     * language.  See the enum "LC" (Language Code)
073     * 
074     * @return Translated Sentence - generated by a call to Google Cloud Server Translate API.
075     * 
076     * @see Torello.Languages.LC
077     */
078    public static String sentence(String text, LC srcLang, LC targetLang) throws IOException
079    {
080        StringBuffer    sb      = new StringBuffer();
081        String          json    = null;
082
083        // REST API Query String
084        String q = baseQ + key + "&source=" + srcLang.gcsLanguageCode +
085            "&target=" + targetLang.gcsLanguageCode +
086            "&q=" + URLs.toProperURLV2(text);
087
088        try
089        {
090            json        = Scrape.scrapePage(Scrape.openConn_UTF8(q)).toString();
091            Matcher m   = P1.matcher(json);
092
093            // Since only a Single text-string was queried, it is better to return the result
094            // as a single string.
095
096            while (m.find()) sb.append(m.group(1) + "\n");
097
098            return sb.toString();
099        }
100        catch (Exception e)
101        {
102            System.out.println(
103                '\n' +
104                "TextTranslate.sentence(q=\n" + q + '\n' +
105                "TextTranslate.sentence(text=\n" + text + '\n' +
106                "TextTranslate.sentence(json=\n" + json
107            );
108
109            throw e;
110        }
111    }
112
113    /**
114     * This is similar to sentence, but the input is expected to be a series of individual
115     * vocabulary words.  The source and target/destination language codes are necessary.  A Vector
116     * of equal length to the original input Vector is returned.  It contains each vocabulary word
117     * from the original Vector in the destination/target language.
118     * 
119     * @param words Any string in a foreign language.  This is expected to be a single sentence.
120     * 
121     * @param srcLang Two-Character String Language Code identifying the foreign language used.
122     * See the enum "LC" (Language Code)
123     * 
124     * @param targetLang Two-Character String Language Code of the "target" or "destination"
125     * language.  See the enum "LC" (Language Code)
126     * 
127     * @return Translated Sentence - generated by a call to Google Cloud Server Translate API.
128     * 
129     * @see Torello.HTML.Scrape
130     * @see Torello.Languages.LC
131     */
132    public static Vector<String> wordByWord(Vector<String> words, LC srcLang, LC targetLang)
133        throws IOException
134    {
135        int             i       = 0;
136        int             len     = words.size();
137        Vector<String>  ret     = new Vector<String>();
138        String          q       = null;
139        String          json    = null;
140
141        while (i < len)
142        {
143            try
144            {
145                q = baseQ + key + "&source=" + srcLang.gcsLanguageCode +
146                        "&target=" + targetLang.gcsLanguageCode;
147
148                for (int j=0; (j < 50) && ((j + i) < len); j++)
149                    q += "&q=" + URLs.toProperURLV2(words.elementAt(j + i));
150
151                i += 50;
152
153                json = Scrape.scrapePage(Scrape.openConn_UTF8(q)).toString();
154                Matcher m = P1.matcher(json);
155
156                while (m.find()) ret.addElement(m.group(1).trim());
157            }
158            catch (Exception e)
159            {
160                System.out.println(
161                    '\n' +
162                    "TextTranslate.wordByWord, q=\n" + q + '\n' +
163                    "TextTranslate.wordByWord, json=\n" + json
164                );
165
166                throw e;
167            }
168        }
169
170        return ret;
171    }
172}