package edu.cmu.casos.automap.reltypes;

import edu.cmu.casos.automap.TranslationEngine;
import edu.cmu.casos.automap.reltypes.io.AMFormats;
import edu.cmu.casos.automap.reltypes.io.Clusters;
import edu.cmu.casos.automap.reltypes.io.SentenceCandidateParser;
import edu.cmu.casos.automap.reltypes.utils.IterUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.LineIterator;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/casos/automap/reltypes/RelationExtractor.class */
public class RelationExtractor {
    static Logger logger;
    private static final int CONTEXT_MAX_LENGTH = 10;
    private static final int CONTEXT_MIN_LENGTH = 1;
    public static final String CLUSTER_DEFAULT = "";
    private static final String TE_ADJACENCY = "NO_ADJACENCY";
    private static final String INPUT_FILES_EXT = ".txt";
    private int minUniquePairs;
    private int minTotalPairs;
    static final /* synthetic */ boolean $assertionsDisabled;
    private boolean replaceEntities = false;
    private TranslationEngine translationEngine = null;
    private boolean extendEntities = true;
    private Map<String, Entity> entities = new HashMap();

    public RelationExtractor(int i, int i2) {
        this.minUniquePairs = i;
        this.minTotalPairs = i2;
    }

    public void setReplaceEntities(boolean z) {
        this.replaceEntities = z;
    }

    public void setThesaurus(String str) throws Exception {
        this.translationEngine = new TranslationEngine(str, (String) null, false);
        this.replaceEntities = true;
    }

    public static boolean isValidCandidate(RelationCandidate relationCandidate) {
        if (relationCandidate.length < 1 || relationCandidate.length > 10 || relationCandidate.getMainVerb() == null) {
            return false;
        }
        if (PatternValidator.validPosPattern(relationCandidate.sentence.getPosContext(relationCandidate.startContext, relationCandidate.endContext))) {
        }
        return PatternValidator.validChkPattern(relationCandidate.sentence.getChkContext(relationCandidate.startContext, relationCandidate.endContext));
    }

    public static void printInfo(RelationCandidate relationCandidate) {
        Sentence sentence = relationCandidate.sentence;
        System.err.println(relationCandidate);
        System.err.printf("'%s'\n\n", StringUtils.join(sentence.tokens, " "));
        System.err.printf("'%s'\n\n", StringUtils.join(sentence.pos, " "));
        System.err.printf("'%s'\n\n", StringUtils.join(sentence.chk, " "));
        System.err.printf("\n", new Object[0]);
    }

    private Map<String, RelationType> extractRelationTypes(Iterable<Sentence> iterable) {
        HashMap hashMap = new HashMap();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        for (Sentence sentence : iterable) {
            if (this.extendEntities) {
                sentence.extendEntities();
            }
            Iterator<RelationCandidate> generateCandidates = sentence.generateCandidates();
            if (generateCandidates.hasNext()) {
                i++;
            }
            while (generateCandidates.hasNext()) {
                RelationCandidate next = generateCandidates.next();
                i2++;
                if (isValidCandidate(next)) {
                    i3++;
                    String context = next.getContext();
                    RelationType relationType = (RelationType) hashMap.get(context);
                    if (relationType == null) {
                        relationType = new RelationType(context, next.getMainVerb());
                        hashMap.put(context, relationType);
                    }
                    relationType.addMention(new RelationMention(lookupEntity(next.getEntityA(), next.entA.type), lookupEntity(next.getEntityB(), next.entB.type)));
                }
            }
        }
        logger.info(String.format("Relation candidates extracted. sentences=%d candidates=%d matches=%d", Integer.valueOf(i), Integer.valueOf(i2), Integer.valueOf(i3)));
        return hashMap;
    }

    private String normalizeEntity(String str) {
        String lowerCase = str.replace(',', '_').toLowerCase();
        String thesProcessing = this.translationEngine.thesProcessing(lowerCase + " .", TE_ADJACENCY);
        if (!$assertionsDisabled && thesProcessing == null) {
            throw new AssertionError();
        }
        String substring = thesProcessing.substring(0, thesProcessing.length() - 1);
        if (substring.indexOf(32) >= 0) {
            substring = lowerCase;
        }
        return substring;
    }

    private Entity lookupEntity(String str, String str2) {
        String str3 = null;
        if (this.replaceEntities) {
            str3 = str;
            str = normalizeEntity(str);
        }
        Entity entity = this.entities.get(str);
        if (entity == null) {
            entity = new Entity(str, str2, str3);
            this.entities.put(str, entity);
        }
        return entity;
    }

    public Map<String, RelationCluster> cluster(Map<String, String> map, Map<String, RelationType> map2) {
        HashMap hashMap = new HashMap();
        for (RelationType relationType : map2.values()) {
            String str = map.get(relationType.verb);
            if (str == null) {
                str = "";
            }
            RelationCluster relationCluster = (RelationCluster) hashMap.get(str);
            if (relationCluster == null) {
                relationCluster = new RelationCluster(str);
                hashMap.put(str, relationCluster);
            }
            relationCluster.relationTypes.add(relationType);
        }
        return hashMap;
    }

    public Map<String, RelationCluster> filterMinSupport(Map<String, RelationCluster> map) {
        HashMap hashMap = new HashMap();
        for (String str : map.keySet()) {
            RelationCluster relationCluster = map.get(str);
            if (relationCluster.uniqueEntityPairs() >= this.minUniquePairs && relationCluster.totalEntityPairs() >= this.minTotalPairs) {
                hashMap.put(str, relationCluster);
            }
        }
        return hashMap;
    }

    public Map<String, RelationCluster> filterNetworks(Map<String, RelationCluster> map, Set<String> set) {
        HashMap hashMap = new HashMap();
        for (String str : map.keySet()) {
            if (set.contains(str)) {
                hashMap.put(str, map.get(str));
            }
        }
        return hashMap;
    }

    public RelationClusterSet extractRelationClusters(Iterator<String> it, Map<String, String> map) {
        logger.debug("Extracting relation clusters");
        return new RelationClusterSet(filterMinSupport(cluster(map, extractRelationTypes(SentenceCandidateParser.parse(it)))));
    }

    public static RelationClusterSet runExtractor(int i, int i2, File file, String str, Set<String> set, String str2) throws Exception {
        RelationExtractor relationExtractor = new RelationExtractor(i, i2);
        logger.info(String.format("Starting relation extractor. clusterDefs=%s minUniquePairs=%d networks=%s", file, Integer.valueOf(i), set));
        if (str2 != null) {
            relationExtractor.setThesaurus(str2);
            logger.debug("Using thesaurus file: " + str2);
        }
        File file2 = new File(str);
        ArrayList arrayList = new ArrayList();
        String[] list = file2.list(new SuffixFileFilter(INPUT_FILES_EXT));
        logger.info(String.format("Input files: %d", Integer.valueOf(list.length)));
        for (String str3 : list) {
            File file3 = new File(file2, str3);
            arrayList.add(new LineIterator(new BufferedReader(new FileReader(file3))));
            logger.debug(String.format("Adding input file: %s", file3));
        }
        RelationClusterSet extractRelationClusters = relationExtractor.extractRelationClusters(IterUtils.concat(arrayList.iterator()), Clusters.loadWordMap(file));
        if (set != null && set.size() > 0) {
            logger.debug("Filtering networks: " + set);
            extractRelationClusters = extractRelationClusters.filter(set);
        }
        return extractRelationClusters;
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length < 5) {
            System.err.println("Usage: minUniquePairs clusters inputDir outputDir networks [thesaurus]");
            System.exit(1);
        }
        int parseInt = Integer.parseInt(strArr[0]);
        File file = new File(strArr[1]);
        File file2 = new File(strArr[2]);
        File file3 = new File(strArr[3]);
        String str = strArr[4];
        if (str.equals("all")) {
            str = "";
        }
        String[] split = StringUtils.split(str, ',');
        String str2 = strArr.length == 6 ? strArr[5] : null;
        if (!file2.isDirectory()) {
            System.err.println("Input directory does not exist (or is not a directory).");
            System.exit(1);
        }
        if (file3.isDirectory()) {
            System.err.println("Output directory already exists. Overwriting files.");
        } else {
            file3.mkdirs();
            System.err.println("Output directory created.");
        }
        if (!file.isFile()) {
            System.err.println("Clusters definition file does not exist.");
            System.exit(1);
        }
        File file4 = new File(file3, "concepts");
        file4.mkdirs();
        File file5 = new File(file3, "relations");
        file5.mkdirs();
        File file6 = new File(file3, "properties");
        file6.mkdirs();
        File file7 = new File(file3, "networks");
        file7.mkdirs();
        File file8 = new File(file4, "reltypes.csv");
        File file9 = new File(file5, "reltypes.csv");
        File file10 = new File(file6, "reltypes.csv");
        File file11 = new File(file7, "networks.xml");
        file10.createNewFile();
        HashSet hashSet = null;
        if (split.length > 0) {
            hashSet = new HashSet();
            for (String str3 : split) {
                hashSet.add(str3);
            }
        }
        RelationClusterSet runExtractor = runExtractor(parseInt, 0, file, file2.getAbsolutePath(), hashSet, str2);
        int conceptList = AMFormats.conceptList(runExtractor, file8);
        int relationsList = AMFormats.relationsList(runExtractor, file9);
        AMFormats.semanticNetwork(runExtractor, file11, "relations");
        logger.debug(String.format("RelationExtractor finished. %d networks, %d concepts, %d relations", Integer.valueOf(runExtractor.size()), Integer.valueOf(conceptList), Integer.valueOf(relationsList)));
    }

    static {
        $assertionsDisabled = !RelationExtractor.class.desiredAssertionStatus();
        logger = Logger.getLogger(RelationExtractor.class);
    }
}
