package edu.cmu.casos.wizard;

import edu.cmu.casos.logging.AllToConsoleConfigurator;
import java.awt.Component;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import javax.swing.JOptionPane;
import org.apache.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONObject;

/* loaded from: input_file:edu/cmu/casos/wizard/YahooExtractor.class */
public class YahooExtractor {
    private static final Logger logger = Logger.getLogger(YahooExtractor.class);
    String outputDir;
    String searchTerm;
    String region;
    String language;
    String country;
    String site;
    String type;
    String format;
    boolean fetchCachedOnly;
    boolean printAllNumResults;
    boolean setTitle;
    boolean similar_ok;
    int numResults;
    int firstIndex;
    private final String CASOS_REFERRER = "http://www.casos.cs.cmu.edu/";
    boolean fetchCachedAlso = false;

    public YahooExtractor(String[] strArr) {
        this.outputDir = null;
        this.searchTerm = null;
        this.region = null;
        this.language = null;
        this.country = null;
        this.site = null;
        this.type = null;
        this.format = null;
        this.fetchCachedOnly = false;
        this.printAllNumResults = false;
        this.setTitle = false;
        this.similar_ok = false;
        this.numResults = 10;
        this.firstIndex = 0;
        for (String str : strArr) {
            if (str.equals("--printAllNumResults")) {
                this.printAllNumResults = true;
            } else if (str.startsWith("--firstIndex=")) {
                try {
                    this.firstIndex = Integer.parseInt(str.substring(str.indexOf(61) + 1));
                    if (this.firstIndex < 1) {
                        JOptionPane.showMessageDialog((Component) null, "Please provide a positive integer value for the first index field.", "WARNING", 2);
                        return;
                    }
                } catch (NumberFormatException e) {
                    JOptionPane.showMessageDialog((Component) null, "Please provide a valid integer for the first index field.", "WARNING", 2);
                    return;
                }
            } else if (str.startsWith("--numResults=")) {
                try {
                    this.numResults = Integer.parseInt(str.substring(str.indexOf(61) + 1));
                    if (this.numResults < 1) {
                        JOptionPane.showMessageDialog((Component) null, "Please provide a positive integer value for the Number of Results field.", "WARNING", 2);
                        return;
                    }
                } catch (NumberFormatException e2) {
                    JOptionPane.showMessageDialog((Component) null, "Please provide a valid integer for the Number of Results field.", "WARNING", 2);
                    return;
                }
            } else if (str.startsWith("--region=")) {
                this.region = str.substring(str.indexOf(61) + 1);
            } else if (str.startsWith("--type=")) {
                this.type = str.substring(str.indexOf(61) + 1);
            } else if (str.startsWith("--language=")) {
                this.language = str.substring(str.indexOf(61) + 1);
            } else if (str.startsWith("--country=")) {
                this.country = str.substring(str.indexOf(61) + 1);
            } else if (str.startsWith("--site=")) {
                this.site = str.substring(str.indexOf(61) + 1);
            } else if (str.startsWith("--format=")) {
                this.format = str.substring(str.indexOf(61) + 1);
            } else if (str.startsWith("--similar_ok")) {
                this.similar_ok = true;
            } else if (str.equals("--setTitle")) {
                this.setTitle = true;
            } else if (this.searchTerm == null) {
                this.searchTerm = str;
            } else if (this.outputDir == null) {
                this.outputDir = str;
            }
        }
        if (this.searchTerm == null) {
            logger.warn("No search term entered.");
        } else if (this.outputDir == null) {
            logger.warn("No output directory entered.");
        } else if (this.numResults > 64) {
            logger.warn("The number of desired results cannot be over 64... truncating to 64");
            this.numResults = 64;
        }
        if (this.fetchCachedOnly && this.fetchCachedAlso) {
            this.fetchCachedOnly = false;
        }
        if (!this.outputDir.endsWith(File.separator)) {
            this.outputDir += File.separator;
        }
        ArrayList<ArrayList<String>> links = getLinks(this.searchTerm);
        if (this.setTitle) {
            extract(links.get(0), links.get(1));
        } else {
            extract(links.get(0));
        }
    }

    public static void main(String[] strArr) {
        AllToConsoleConfigurator.configure();
        if (strArr.length < 2) {
            Usage();
        }
        new YahooExtractor(strArr);
    }

    public ArrayList<ArrayList<String>> getLinks(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        ArrayList<String> arrayList2 = new ArrayList<>();
        try {
            JSONObject jSONObject = getJSONObject(str, this.numResults);
            String string = jSONObject.getJSONObject("ResultSet").getString("totalResultsAvailable");
            if (this.printAllNumResults) {
                logger.info("total number of results: " + string);
            }
            JSONArray jSONArray = jSONObject.getJSONObject("ResultSet").getJSONArray("Result");
            if (jSONObject == null) {
                logger.info("No results returned... exiting");
                System.exit(0);
            }
            int parseInt = Integer.parseInt(string);
            if (parseInt < this.numResults) {
                this.numResults = parseInt;
                logger.info("Number of results is less than desired...");
            }
            for (int i = 0; i < this.numResults; i++) {
                JSONObject jSONObject2 = jSONArray.getJSONObject(i);
                arrayList.add(jSONObject2.getString("Url"));
                if (this.setTitle) {
                    arrayList2.add(jSONObject2.getString("Title"));
                }
            }
        } catch (Exception e) {
            logger.error("An error occured.", e);
        }
        ArrayList<ArrayList<String>> arrayList3 = new ArrayList<>();
        arrayList3.add(arrayList);
        if (this.setTitle) {
            arrayList3.add(arrayList2);
        }
        return arrayList3;
    }

    public JSONObject getJSONObject(String str, int i) {
        JSONObject jSONObject = null;
        try {
            String str2 = "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=T6h_QG_V34FoCjRmhfJYYmlCtaF2LDLVqJ2tr_SLoJBZpNZp2pvIp7CYIPMbOb263VJYPxk-&query=" + URLEncoder.encode(str, "UTF-8") + "&results=" + i + "&start=" + this.firstIndex;
            if (this.region != null) {
                str2 = str2 + "&region=" + this.region;
            }
            if (this.language != null) {
                str2 = str2 + "&language=" + this.language;
            }
            if (this.country != null) {
                str2 = str2 + "&country=" + this.country;
            }
            if (this.site != null) {
                str2 = str2 + "&site=" + this.site;
            }
            if (this.type != null) {
                str2 = str2 + "&type=" + this.type;
            }
            if (this.format != null) {
                str2 = str2 + "&format=" + this.format;
            }
            if (this.similar_ok) {
                str2 = str2 + "&similar_ok=1";
            }
            URLConnection openConnection = new URL(str2 + "&output=json").openConnection();
            openConnection.addRequestProperty("Referer", "http://www.casos.cs.cmu.edu/");
            Scanner scanner = new Scanner(new InputStreamReader(openConnection.getInputStream()));
            String next = scanner.useDelimiter("\\Z").next();
            if (scanner.ioException() != null) {
                logger.error("An error occured.", scanner.ioException());
            }
            scanner.close();
            jSONObject = new JSONObject(next);
        } catch (Exception e) {
            logger.error("An error occured.", e);
        }
        return jSONObject;
    }

    public void extract(ArrayList<String> arrayList) {
        for (int i = 0; i < arrayList.size(); i++) {
            try {
                InputStreamReader inputStreamReader = new InputStreamReader(new URL(arrayList.get(i)).openStream());
                List<String> extractText = HTMLUtils.extractText(inputStreamReader);
                inputStreamReader.close();
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(this.outputDir + i + ".txt"));
                Iterator<String> it = extractText.iterator();
                while (it.hasNext()) {
                    outputStreamWriter.write(it.next());
                }
                outputStreamWriter.close();
            } catch (Exception e) {
                logger.error("An error occured.", e);
            }
        }
        logger.info(arrayList.size() + " pages");
    }

    public void extract(ArrayList<String> arrayList, ArrayList<String> arrayList2) {
        for (int i = 0; i < arrayList.size(); i++) {
            try {
                InputStreamReader inputStreamReader = new InputStreamReader(new URL(arrayList.get(i)).openStream());
                List<String> extractText = HTMLUtils.extractText(inputStreamReader);
                inputStreamReader.close();
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(this.outputDir + arrayList2.get(i).replaceAll(File.separator, Debug.reportMsg) + ".txt"));
                Iterator<String> it = extractText.iterator();
                while (it.hasNext()) {
                    outputStreamWriter.write(it.next());
                }
                outputStreamWriter.close();
            } catch (Exception e) {
                logger.error("An error occured.", e);
            }
        }
        logger.info(arrayList.size() + " pages");
    }

    public static void Usage() {
        logger.info("Usage: YahooExtractor [options] search_term output_directory");
        logger.info("options: ");
        logger.info("--printAllNumResults    Print total number of results for search term.");
        logger.info("--firstIndex=[number]   First index of results.");
        logger.info("--numResults=[number]   Number of results to fetch.");
        logger.info("--setTitle              Name sites by title than by number.");
        logger.info("--region=[region]       Regional search engine for performing the search.");
        logger.info("--type=[search_type]    The kind of search to submit.");
        logger.info("--language=[lang]       Language results are written in.");
        logger.info("--country=[country]     Country to restrict results in.");
        logger.info("--site=[web_addr]       Domain to restrict searches to.");
        logger.info("--format=[file_format]  Specify format of file for search.\n");
        logger.info("Options for format include: any, html, msword, pdf, ppt, rss, txt, xls");
        logger.info("--similar_ok            Allow multiple results with similar content.\n");
        System.exit(1);
    }
}
