package edu.cmu.casos.wizard;

import edu.cmu.casos.automap.CSVUtils;
import edu.cmu.casos.logging.AllToConsoleConfigurator;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.util.EncodingUtil;
import org.apache.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

/* loaded from: input_file:edu/cmu/casos/wizard/MediaWikiExtractor.class */
public class MediaWikiExtractor {
    private static final Logger logger = Logger.getLogger(MediaWikiExtractor.class);
    private static final String semanticDir = "semantic" + File.separator;
    private static final String conceptDir = "concept" + File.separator;
    private static final String propertiesDir = "properties" + File.separator;
    private static final String bodyDir = "body" + File.separator;
    private static final String networkDir = "networks" + File.separator;
    private static final SimpleDateFormat mediaWikiDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
    private String wikiBase;
    private HttpClient httpClient;

    public MediaWikiExtractor(String str) {
        this.wikiBase = (str.endsWith("/") ? str : str + "/") + "api.php?";
        this.httpClient = new HttpClient();
    }

    private String queryServer(NameValuePair[] nameValuePairArr) throws IOException {
        GetMethod getMethod = new GetMethod(this.wikiBase + EncodingUtil.formUrlEncode(nameValuePairArr, "utf-8"));
        this.httpClient.executeMethod(getMethod);
        byte[] responseBody = getMethod.getResponseBody();
        getMethod.releaseConnection();
        try {
            return new String(responseBody, "utf-8");
        } catch (UnsupportedEncodingException e) {
            logger.error("An error occured.", e);
            System.exit(1);
            return null;
        }
    }

    public JSONArray getRevisions(String str) throws IOException {
        try {
            JSONObject jSONObject = new JSONObject(queryServer(new NameValuePair[]{new NameValuePair("format", "json"), new NameValuePair("action", "query"), new NameValuePair("prop", "revisions"), new NameValuePair("titles", str), new NameValuePair("rvlimit", "500"), new NameValuePair("rvprop", "timestamp|user|comment|ids")})).getJSONObject("query").getJSONObject("pages");
            return jSONObject.getJSONObject(jSONObject.keys().next()).optJSONArray("revisions");
        } catch (JSONException e) {
            logger.error("Could not parse server response.");
            return null;
        }
    }

    public String getContent(String str) throws IOException {
        try {
            return new JSONObject(queryServer(new NameValuePair[]{new NameValuePair("format", "json"), new NameValuePair("action", "parse"), new NameValuePair("page", str), new NameValuePair("prop", "text")})).getJSONObject("parse").getJSONObject("text").getString("*");
        } catch (JSONException e) {
            logger.error("Could not parse server response.");
            return null;
        }
    }

    public List<String> getInternalLinks(String str) throws IOException {
        try {
            JSONObject jSONObject = new JSONObject(queryServer(new NameValuePair[]{new NameValuePair("format", "json"), new NameValuePair("action", "query"), new NameValuePair("titles", str), new NameValuePair("prop", "links"), new NameValuePair("pllimit", "500")})).getJSONObject("query").getJSONObject("pages");
            JSONArray jSONArray = jSONObject.getJSONObject(jSONObject.keys().next()).getJSONArray("links");
            ArrayList arrayList = new ArrayList(jSONArray.length());
            for (int i = 0; i < jSONArray.length(); i++) {
                arrayList.add(jSONArray.getJSONObject(i).getString("title"));
            }
            return arrayList;
        } catch (JSONException e) {
            logger.error("Could not parse server response.");
            return null;
        }
    }

    public static void main(String[] strArr) {
        AllToConsoleConfigurator.configure();
        if (strArr.length < 3) {
            logger.info("usage: [options] wiki_url article output_directory");
            logger.info("Options:");
            logger.info("--crawl-immediate    Visits articles that are directly linked to by specified article.");
            logger.info("\nWorks for MediaWiki versions 1.11 and up");
            logger.info("For English Wikipedia, the wiki_url is http://en.wikipedia.org/w/");
            System.exit(1);
        }
        String str = null;
        String str2 = null;
        String str3 = null;
        boolean z = false;
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        for (String str4 : strArr) {
            if (str4.equals("--crawl-immediate")) {
                z = true;
            } else if (str == null) {
                str = str4;
            } else if (str2 == null) {
                str2 = str4;
            } else if (str3 == null) {
                str3 = str4;
            } else {
                logger.warn("Excess parameter '" + str4 + "' ignored.");
            }
        }
        if (str3 == null) {
            logger.error("Too few parameters");
            System.exit(1);
        }
        if (!str3.endsWith(File.separator)) {
            str3 = str3 + File.separator;
        }
        Utils.createDir(str3 + semanticDir);
        Utils.createDir(str3 + conceptDir);
        Utils.createDir(str3 + propertiesDir);
        Utils.createDir(str3 + bodyDir);
        Utils.createDir(str3 + networkDir);
        try {
            MediaWikiExtractor mediaWikiExtractor = new MediaWikiExtractor(str);
            LinkedList linkedList = new LinkedList();
            linkedList.add(str2);
            if (z) {
                Iterator<String> it = mediaWikiExtractor.getInternalLinks(str2).iterator();
                while (it.hasNext()) {
                    linkedList.offer(it.next());
                }
            }
            while (!linkedList.isEmpty()) {
                String str5 = (String) linkedList.poll();
                JSONArray revisions = mediaWikiExtractor.getRevisions(str5);
                if (revisions == null) {
                    logger.error("Article '" + str5 + "' not found.\n");
                } else {
                    String content = mediaWikiExtractor.getContent(str5);
                    BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3 + conceptDir + str5 + ".csv")));
                    BufferedWriter bufferedWriter2 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3 + semanticDir + str5 + ".csv")));
                    BufferedWriter bufferedWriter3 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3 + propertiesDir + str5 + ".csv")));
                    bufferedWriter.write("concept,meta");
                    bufferedWriter.newLine();
                    bufferedWriter2.write("concept,concept,frequency");
                    bufferedWriter2.newLine();
                    HashSet hashSet = new HashSet();
                    HashSet hashSet2 = new HashSet();
                    HashSet hashSet3 = new HashSet();
                    BufferedWriter bufferedWriter4 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3 + bodyDir + str5 + ".txt")));
                    Iterator<String> it2 = HTMLUtils.extractText(new StringReader(content)).iterator();
                    while (it2.hasNext()) {
                        bufferedWriter4.write(it2.next());
                        bufferedWriter4.newLine();
                    }
                    bufferedWriter4.close();
                    for (int i5 = 0; i5 < revisions.length(); i5++) {
                        JSONObject jSONObject = revisions.getJSONObject(i5);
                        String str6 = Debug.reportMsg;
                        String str7 = Debug.reportMsg;
                        if (jSONObject.has("user")) {
                            str6 = jSONObject.getString("user");
                        }
                        if (jSONObject.has("timestamp")) {
                            str7 = formatDate(jSONObject.getString("timestamp"));
                        }
                        long j = jSONObject.has("revid") ? jSONObject.getLong("revid") : -1L;
                        long optLong = jSONObject.has("parentid") ? jSONObject.optLong("parentid", -1L) : -1L;
                        if (!str6.isEmpty() && hashSet.add(str6)) {
                            bufferedWriter.write(CSVUtils.makeRowFromArray(new String[]{str6, "agent"}));
                            bufferedWriter.newLine();
                            i++;
                        }
                        if (!str7.isEmpty() && hashSet2.add(str7)) {
                            bufferedWriter.write(CSVUtils.makeRowFromArray(new String[]{str7, "event"}));
                            bufferedWriter.newLine();
                            i2++;
                        }
                        if (j >= 0 && hashSet3.add(Long.valueOf(j))) {
                            bufferedWriter.write(CSVUtils.makeRowFromArray(new String[]{j + Debug.reportMsg, "resource"}));
                            bufferedWriter.newLine();
                            i3++;
                        }
                        if (optLong >= 0 && hashSet3.add(Long.valueOf(optLong))) {
                            bufferedWriter.write(CSVUtils.makeRowFromArray(new String[]{optLong + Debug.reportMsg, "resource"}));
                            bufferedWriter.newLine();
                            i3++;
                        }
                        if (j >= 0 && optLong >= 0) {
                            bufferedWriter2.write(CSVUtils.makeRowFromArray(new String[]{optLong + Debug.reportMsg, j + Debug.reportMsg, "1"}));
                            bufferedWriter2.newLine();
                        }
                        String[] strArr2 = {j + Debug.reportMsg, str6, "1"};
                        bufferedWriter2.write(CSVUtils.makeRowFromArray(strArr2));
                        bufferedWriter2.newLine();
                        strArr2[1] = str7;
                        bufferedWriter2.write(CSVUtils.makeRowFromArray(strArr2));
                        bufferedWriter2.newLine();
                    }
                    bufferedWriter.close();
                    bufferedWriter2.close();
                    bufferedWriter3.close();
                    i4++;
                }
            }
            logger.info(i4 + " articles");
            logger.info(i + " agents, " + i2 + " events, " + i3 + " resources");
            Utils.generateNetwork(str3 + conceptDir, str3 + semanticDir, str3 + propertiesDir, str3 + networkDir);
        } catch (Exception e) {
            logger.error("An error occured.", e);
            System.exit(1);
        }
    }

    private static String formatDate(String str) {
        try {
            return Utils.automapDate(mediaWikiDateFormat.parse(str));
        } catch (ParseException e) {
            logger.warn("Could not parse server dates.");
            return str;
        }
    }
}
