package edu.cmu.casos.parser;

import edu.cmu.casos.parser.configuration.Tableset;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.SimpleNodeIterator;
import org.htmlparser.util.Translate;

/* loaded from: input_file:edu/cmu/casos/parser/formatWWWcrawl.class */
public class formatWWWcrawl extends CasosParserFormatIn {
    HashMap<String, String> urlList = new HashMap<>();
    HashSet<String> nodeNameList = new HashSet<>();
    HashSet<String> linkNameList = new HashSet<>();
    String startURL = "";
    String startURLEncoded = "";
    Boolean gotRecs = false;

    public formatWWWcrawl() {
        System.out.println("IN formatWWWcrawl()");
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public void setFields() {
        this.startURL = this.tableset.getFld("startURL");
        this.startURLEncoded = fixHTMLString(this.startURL);
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public String getInType() {
        return "TRANSACTION";
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public void getInputValues() {
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public String getNodeType() {
        return "Facebook";
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public void setTableset(Tableset tableset) {
        this.tableset = tableset;
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public Tableset getTableset() {
        return this.tableset;
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public void open() throws Exception {
        DefaultHttpClient defaultHttpClient = new DefaultHttpClient();
        HttpGet httpGet = new HttpGet(this.startURL);
        this.urlList.put(this.startURLEncoded, this.startURLEncoded);
        this.nodeNameList.add(this.startURLEncoded);
        System.out.println("executing request " + httpGet.getURI());
        String str = (String) defaultHttpClient.execute(httpGet, new BasicResponseHandler());
        System.out.println(str);
        SimpleNodeIterator elements = Parser.createParser(str, "UTF-8").extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class)).elements();
        while (elements.hasMoreNodes()) {
            LinkTag nextNode = elements.nextNode();
            System.out.println("NODE:" + nextNode.toString());
            String link = nextNode.getLink();
            if (!link.isEmpty()) {
                System.out.println("LINK:" + link);
                String fixHTMLString = fixHTMLString(link);
                System.out.println("LINKENCODED:" + fixHTMLString);
                this.urlList.put(fixHTMLString, fixHTMLString);
                this.nodeNameList.add(fixHTMLString);
                this.linkNameList.add(this.startURLEncoded + "\t" + fixHTMLString);
            }
        }
        System.out.println("----------------------------------------");
        this.gotRecs = true;
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public void parseSingleMessageToTables(DataStoreTableset dataStoreTableset) {
        System.out.println("in formatWWWcrawl.parseSingleMessageToTables");
        DataStoreBaseTable dataStoreBaseTable = dataStoreTableset.getDataStoreTableByName("site").getDataStoreBaseTable();
        for (Map.Entry<String, String> entry : this.urlList.entrySet()) {
            dataStoreBaseTable.addRow(new String[]{entry.getValue(), entry.getKey()}, false);
        }
        DataStoreBaseTable dataStoreBaseTable2 = dataStoreTableset.getDataStoreTableByName("pageCallsPage").getDataStoreBaseTable();
        Iterator<String> it = this.linkNameList.iterator();
        while (it.hasNext()) {
            String next = it.next();
            System.out.println("INSIDE:  " + next);
            String[] split = next.split("\t");
            dataStoreBaseTable2.addRow(new String[]{split[0], split[1]}, false);
        }
        System.out.println("leaving formatWWWcrawl.parseSingleMessageToTables");
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public boolean getNext() throws Exception {
        if (!this.gotRecs.booleanValue()) {
            return false;
        }
        this.gotRecs = false;
        return true;
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public void close() {
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public void doDatabase(CasosParserFormatOut casosParserFormatOut) {
    }

    @Override // edu.cmu.casos.parser.CasosParserFormatIn
    public void loadTable(String str, DataStoreBaseTable dataStoreBaseTable) {
    }

    public String fixHTMLString(String str) {
        return Translate.encode(str);
    }
}
