package edu.cmu.casos.rex;

import au.com.bytecode.opencsv.CSVWriter;
import edu.cmu.casos.automap.FileExtensionFilter;
import edu.cmu.casos.automap.PosTextDataTokenizer;
import iitb2.CRF.CRF;
import iitb2.Model.FeatureGenImpl;
import iitb2.Utils.Options;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;

/* loaded from: input_file:edu/cmu/casos/rex/InferenceEngineAutoMap.class */
public class InferenceEngineAutoMap {
    private CRF crfModel;
    private FeatureGenImpl featureGen;
    public static final short ANALYSIS_METHOD_PREDICT_BOUNDARY = 0;
    public static final short ANALYSIS_METHOD_PREDICT_CLASS_LABEL = 1;
    public static final short ANALYSIS_METHOD_PREDICT_BOUNDARY_AND_CLASS_LABEL = 2;
    private Hashtable<String, Integer> htLabelCoding;
    private Hashtable<Integer, String> htLabelsInverse;
    private PosTextDataTokenizer ptdt;
    public String baseDir = "";
    public String validationDirectory = "";
    public short analysisMethod = 1;
    private HashSet<Character> hsIsolatedCharacters = new HashSet<>();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:edu/cmu/casos/rex/InferenceEngineAutoMap$Tuple.class */
    public class Tuple implements Comparable {
        String word;
        String category;

        public Tuple(String str, String str2) {
            this.word = str;
            this.category = str2;
        }

        @Override // java.lang.Comparable
        public int compareTo(Object obj) {
            if (!(obj instanceof Tuple)) {
                return toString().compareTo(obj.toString());
            }
            Tuple tuple = (Tuple) obj;
            return (this.word + this.category).compareTo(tuple.word + tuple.category);
        }

        public boolean equals(Object obj) {
            return compareTo(obj) == 0;
        }

        public String cleanCategory() {
            String str = this.category;
            int indexOf = str.indexOf("_");
            if (indexOf > -1) {
                str = str.substring(0, indexOf);
            }
            return str;
        }

        public String getMetaSpec() {
            String str = this.category;
            int indexOf = str.indexOf("_");
            return indexOf > -1 ? str.substring(indexOf + 1, str.length()) : "";
        }
    }

    public static void main(String[] strArr) {
        try {
            if (strArr.length == 3) {
                new InferenceEngineAutoMap().processTexts(strArr[0], strArr[1], strArr[2]);
            } else {
                System.out.println("Usage: [input_dir] [output_dir] [CRF_dir]");
            }
        } catch (Exception e) {
            System.out.println("InferenceEngineAutoMap.main() error: " + e);
            e.printStackTrace();
        }
    }

    public InferenceEngineAutoMap() {
        this.hsIsolatedCharacters.add(new Character('.'));
        this.hsIsolatedCharacters.add(new Character(','));
        this.hsIsolatedCharacters.add(new Character('&'));
        this.hsIsolatedCharacters.add(new Character('%'));
        this.hsIsolatedCharacters.add(new Character('\"'));
        this.hsIsolatedCharacters.add(new Character('$'));
    }

    public void processTexts(String str, String str2, String str3) {
        try {
            System.out.println(getClass().getName() + ".processTexts() inputDir:" + str + " outputDir:" + str2 + " resourceDir:" + str3);
            this.baseDir = str3;
            File[] listFiles = new File(str).listFiles((FilenameFilter) new FileExtensionFilter("txt"));
            prepareModelForInference();
            File file = new File(str2, "suggestedMetaNetThes.csv");
            File file2 = new File(str2, "attributes.csv");
            CSVWriter cSVWriter = new CSVWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
            CSVWriter cSVWriter2 = new CSVWriter(new OutputStreamWriter(new FileOutputStream(file2), "UTF-8"));
            HashSet hashSet = new HashSet();
            for (File file3 : listFiles) {
                if (file3.getName().toLowerCase().endsWith(".txt")) {
                    hashSet.addAll(getListOfEntitiesAsTuple(getSequenceResults(getCleanTextAsVector(getTextFromFile(file3.getAbsolutePath())))));
                }
            }
            cSVWriter.writeNext(new String[]{"conceptFrom", "conceptTo", "metaOntology", "metaName"});
            cSVWriter2.writeNext(new String[]{"conceptTo", "attribute"});
            ArrayList<String[]> arrayList = new ArrayList<>();
            ArrayList<String[]> arrayList2 = new ArrayList<>();
            Iterator it = hashSet.iterator();
            while (it.hasNext()) {
                Tuple tuple = (Tuple) it.next();
                String[] strArr = {tuple.word.trim(), convertConcept(strArr[0]), tuple.cleanCategory().trim(), ""};
                if (!isEmpty(strArr) && !contains(arrayList, strArr)) {
                    arrayList.add(strArr);
                }
                String[] strArr2 = {convertConcept(tuple.word.trim()), tuple.getMetaSpec().trim()};
                if (!isEmpty(strArr2) && !contains(arrayList2, strArr2) && strArr2[1].length() > 0) {
                    arrayList2.add(strArr2);
                }
            }
            Iterator<String[]> it2 = arrayList.iterator();
            while (it2.hasNext()) {
                cSVWriter.writeNext(it2.next());
            }
            cSVWriter.close();
            Iterator<String[]> it3 = arrayList2.iterator();
            while (it3.hasNext()) {
                cSVWriter2.writeNext(it3.next());
            }
            cSVWriter2.close();
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".processTexts() error: " + e);
            e.printStackTrace();
        }
    }

    private boolean contains(ArrayList<String[]> arrayList, String[] strArr) {
        Iterator<String[]> it = arrayList.iterator();
        while (it.hasNext()) {
            if (Arrays.toString(it.next()).equals(Arrays.toString(strArr))) {
                return true;
            }
        }
        return false;
    }

    private String convertConcept(String str) {
        return str.replaceAll(" ", "_");
    }

    private boolean isEmpty(String[] strArr) {
        for (String str : strArr) {
            if (!str.trim().isEmpty()) {
                return false;
            }
        }
        return true;
    }

    public void prepareModelForInference() {
        try {
            String str = "";
            if (this.analysisMethod == 0) {
                str = this.baseDir + "/labelCoding-ANALYSIS_METHOD_PREDICT_BOUNDARY.csv";
            } else if (this.analysisMethod == 1) {
                str = this.baseDir + "/labelCoding-ANALYSIS_METHOD_PREDICT_CLASS_LABEL.csv";
            } else if (this.analysisMethod == 2) {
                str = this.baseDir + "/labelCoding-ANALYSIS_METHOD_PREDICT_BOUNDARY_AND_CLASS_LABEL.csv";
            }
            this.htLabelCoding = getHashtableFromFile(str);
            this.htLabelsInverse = new Hashtable<>();
            for (String str2 : this.htLabelCoding.keySet()) {
                this.htLabelsInverse.put(this.htLabelCoding.get(str2), str2);
            }
            this.featureGen = new FeatureGenImpl("naive", this.htLabelCoding.size());
            this.crfModel = new CRF(this.featureGen.numStates(), this.featureGen, new Options());
            Options options = new Options();
            if (this.analysisMethod == 0) {
                this.featureGen.read(this.baseDir + "/features-ANALYSIS_METHOD_PREDICT_BOUNDARY.txt");
                this.crfModel = new CRF(this.featureGen.numStates(), this.featureGen, options);
                this.crfModel.read(this.baseDir + "/model-ANALYSIS_METHOD_PREDICT_BOUNDARY.txt");
            } else if (this.analysisMethod == 1) {
                this.featureGen.read(this.baseDir + "/features-ANALYSIS_METHOD_PREDICT_CLASS_LABEL.txt");
                this.crfModel = new CRF(this.featureGen.numStates(), this.featureGen, options);
                this.crfModel.read(this.baseDir + "/model-ANALYSIS_METHOD_PREDICT_CLASS_LABEL.txt");
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".prepareModelForInference() error: " + e);
            e.printStackTrace();
        }
    }

    public Vector<TextElement> getSequenceResults(Vector<String> vector) {
        Vector<TextElement> vector2 = null;
        try {
            RexTextDataSequence rexTextDataSequence = new RexTextDataSequence(vector);
            if (this.ptdt != null) {
                PosTextDataTokenizer posTextDataTokenizer = this.ptdt;
                PosTextDataTokenizer posTextDataTokenizer2 = this.ptdt;
                rexTextDataSequence.posTagSequence(posTextDataTokenizer, 1);
            }
            this.crfModel.apply(rexTextDataSequence);
            Iterator<TextElement> it = rexTextDataSequence.vContent.iterator();
            while (it.hasNext()) {
                TextElement next = it.next();
                next.label = this.htLabelsInverse.get(new Integer(next.y));
            }
            vector2 = rexTextDataSequence.vContent;
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".getSequenceResults() error: " + e);
            e.printStackTrace();
        }
        return vector2;
    }

    public Vector<String> getCleanTextAsVector(String str) {
        Vector<String> vector = new Vector<>();
        try {
            this.hsIsolatedCharacters.add(new Character('.'));
            this.hsIsolatedCharacters.add(new Character(','));
            this.hsIsolatedCharacters.add(new Character('&'));
            this.hsIsolatedCharacters.add(new Character('%'));
            this.hsIsolatedCharacters.add(new Character('\"'));
            this.hsIsolatedCharacters.add(new Character('$'));
            for (String str2 : isolatePunctuation(str.replace('\n', ' ').replace('\t', ' ')).split(" ")) {
                String trim = str2.trim();
                if (trim.length() > 0) {
                    vector.add(trim);
                }
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".getCleanTextAsVector() error: " + e);
            e.printStackTrace();
        }
        return vector;
    }

    public void printSequence(RexTextDataSequence rexTextDataSequence, PrintStream printStream, Hashtable<String, String> hashtable) {
        for (int i = 0; i < rexTextDataSequence.length(); i++) {
            try {
                String str = hashtable.get(rexTextDataSequence.y(i) + "");
                String str2 = this.htLabelsInverse.get(new Integer(rexTextDataSequence.y(i)));
                if (!str.equals("none__")) {
                    ((String) rexTextDataSequence.x(i)).replace("\"", "").replace("\"", "");
                    printStream.print(rexTextDataSequence.x(i));
                    printStream.print("\t");
                    printStream.print(str);
                    printStream.print("\t");
                    printStream.print(str2);
                    printStream.print("\t");
                    printStream.print(rexTextDataSequence.vContent.get(i).pos);
                    printStream.print("\n");
                }
            } catch (Exception e) {
                System.out.println(getClass().getName() + ".printSequence() error: " + e);
                e.printStackTrace();
                return;
            }
        }
    }

    public static Vector<TextElement> getVectorOfCombinedNamedEntities(Vector<TextElement> vector) {
        Vector<TextElement> vector2 = new Vector<>();
        Vector vector3 = new Vector();
        Iterator<TextElement> it = vector.iterator();
        while (it.hasNext()) {
            TextElement next = it.next();
            String label = next.getLabel();
            if ("none".equals(label)) {
                if (vector3.size() > 0) {
                    vector2.add(TextElement.getCombinedTextElement(vector3));
                    vector3.clear();
                }
                vector2.add(next);
            } else if (label.startsWith("1")) {
                vector3.add(next);
            } else if (label.startsWith("2")) {
                vector3.add(next);
            } else if (label.startsWith("3")) {
                vector3.add(next);
                vector2.add(TextElement.getCombinedTextElement(vector3));
                vector3.clear();
            } else if (label.startsWith("0")) {
                vector2.add(next);
            } else {
                vector2.add(next);
            }
        }
        return vector2;
    }

    public static void saveAnalysisToFile(Vector<TextElement> vector, File file, Hashtable<String, String> hashtable) {
        try {
            Hashtable hashtable2 = new Hashtable();
            Hashtable hashtable3 = new Hashtable();
            Iterator<TextElement> it = vector.iterator();
            while (it.hasNext()) {
                TextElement next = it.next();
                String characteristicString = next.getCharacteristicString();
                hashtable3.put(characteristicString, next);
                Integer num = (Integer) hashtable2.get(characteristicString);
                int i = 1;
                if (num != null) {
                    i = num.intValue() + 1;
                }
                hashtable2.put(characteristicString, new Integer(i));
            }
            FileWriter fileWriter = new FileWriter(file);
            PrintWriter printWriter = new PrintWriter(fileWriter);
            for (String str : hashtable2.keySet()) {
                TextElement textElement = (TextElement) hashtable3.get(str);
                if (!hashtable.get(textElement.y + "").equals("none__")) {
                    printWriter.print(textElement.text.replace("\"", "").replace("\"", ""));
                    printWriter.print("\t");
                    printWriter.print(textElement.getCombinedLabels());
                    printWriter.print("\t");
                    printWriter.print(textElement.pos);
                    printWriter.print("\t");
                    printWriter.print(hashtable2.get(str));
                    printWriter.println();
                }
            }
            printWriter.close();
            fileWriter.close();
        } catch (Exception e) {
            System.out.println("InferenceEngine.saveAnalysisToFile() error: " + e);
            e.printStackTrace();
        }
    }

    public void createListOfUniqueLabels(String str) {
        try {
            Vector vector = new Vector();
            vector.add(str);
            new RexTextDataIter(vector, new Hashtable(), this.analysisMethod);
            Iterator<String> it = TextElement.htLabels.keySet().iterator();
            while (it.hasNext()) {
                System.out.println(it.next());
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".createListOfUniqueLabels() error: " + e);
            e.printStackTrace();
        }
    }

    public void fixFiles() {
        try {
            File[] listFiles = new File(this.baseDir + "/2010-11-13/all").listFiles();
            for (int i = 0; i < listFiles.length; i++) {
                if (listFiles[i].getName().endsWith(".qa")) {
                    FileReader fileReader = new FileReader(listFiles[i]);
                    LineNumberReader lineNumberReader = new LineNumberReader(fileReader);
                    FileWriter fileWriter = new FileWriter(listFiles[i].getParent() + "\\" + (listFiles[i].getName().substring(0, listFiles[i].getName().length() - 3) + ".xml"));
                    PrintWriter printWriter = new PrintWriter(fileWriter);
                    while (true) {
                        String readLine = lineNumberReader.readLine();
                        if (readLine == null) {
                            break;
                        } else {
                            printWriter.println(readLine.replaceAll("&", "&amp;"));
                        }
                    }
                    printWriter.close();
                    fileWriter.close();
                    lineNumberReader.close();
                    fileReader.close();
                }
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".createListOfUniqueLabels() error: " + e);
            e.printStackTrace();
        }
    }

    public void createLabelExampleList(String str, String str2) {
        try {
            File file = new File(str);
            FileWriter fileWriter = new FileWriter(str2);
            PrintWriter printWriter = new PrintWriter(fileWriter);
            if (file.exists() && file.isDirectory()) {
                File[] listFiles = file.listFiles();
                int i = 1;
                for (File file2 : listFiles) {
                    int i2 = i;
                    i++;
                    System.out.println(getClass().getName() + ".RexTextDataInter() loading: " + i2 + "/" + listFiles.length + "  " + file2.getName());
                    if (file2.isFile()) {
                        Iterator<String> it = RexTextDataSequence.getLabeledSequences(file2).iterator();
                        while (it.hasNext()) {
                            printWriter.println(it.next());
                        }
                    }
                }
            } else {
                System.out.println(getClass().getName() + ".RexTextDataInter() error: directory " + file.getAbsolutePath() + " does not exist.");
            }
            printWriter.close();
            fileWriter.close();
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".createLabelExampleList() error: " + e);
            e.printStackTrace();
        }
    }

    public static String getTextFromFile(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        try {
            FileReader fileReader = new FileReader(str);
            LineNumberReader lineNumberReader = new LineNumberReader(fileReader);
            while (true) {
                String readLine = lineNumberReader.readLine();
                if (readLine == null) {
                    break;
                }
                stringBuffer.append(readLine);
                stringBuffer.append("\n");
            }
            lineNumberReader.close();
            fileReader.close();
        } catch (Exception e) {
            System.out.println("InferenceEngineAutoMap.getTextFromFile() error: " + e);
            e.printStackTrace();
        }
        return stringBuffer.toString();
    }

    public static Hashtable<String, String> getThesaurusFromFile(String str) {
        Hashtable<String, String> hashtable = new Hashtable<>();
        try {
            FileReader fileReader = new FileReader(str);
            LineNumberReader lineNumberReader = new LineNumberReader(fileReader);
            while (true) {
                String readLine = lineNumberReader.readLine();
                if (readLine == null) {
                    break;
                }
                String trim = readLine.trim();
                if (trim.length() > 0) {
                    int indexOf = trim.indexOf("\t");
                    hashtable.put(trim.substring(0, indexOf).trim(), trim.substring(indexOf, trim.length()).trim());
                }
            }
            lineNumberReader.close();
            fileReader.close();
        } catch (Exception e) {
            System.out.println("RexTextTool.getThesaurusFromFile() error: " + e);
            e.printStackTrace();
        }
        return hashtable;
    }

    public Vector<String> getTrainingBucketDirectories(String str) {
        Vector<String> vector = new Vector<>();
        try {
            int i = 0;
            for (File file : new File(str).listFiles()) {
                if (file.isDirectory()) {
                    if (!file.getName().equals(this.validationDirectory)) {
                        vector.add(file.getAbsolutePath());
                    }
                    i++;
                }
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".getTrainingBucketDirectories() error: " + e);
            e.printStackTrace();
        }
        return vector;
    }

    public Hashtable<String, String> getNumberOracle(String str) {
        Hashtable<String, String> hashtable = new Hashtable<>();
        try {
            FileReader fileReader = new FileReader(str);
            LineNumberReader lineNumberReader = new LineNumberReader(fileReader);
            while (true) {
                String readLine = lineNumberReader.readLine();
                if (readLine == null) {
                    break;
                }
                String trim = readLine.trim();
                if (trim.length() > 0) {
                    String[] split = trim.split("\t");
                    if (split.length == 2) {
                        hashtable.put(split[1].trim(), split[0].trim());
                    }
                }
            }
            lineNumberReader.close();
            fileReader.close();
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".getNumberOracle() error: " + e);
            e.printStackTrace();
        }
        return hashtable;
    }

    public void saveHtAsFile(Hashtable<String, Integer> hashtable, String str) {
        try {
            FileWriter fileWriter = new FileWriter(str);
            PrintWriter printWriter = new PrintWriter(fileWriter);
            for (String str2 : hashtable.keySet()) {
                printWriter.println(str2 + "," + hashtable.get(str2));
                System.out.println(getClass().getName() + ".saveHtAsFile() entry: " + str2 + ", " + hashtable.get(str2));
            }
            printWriter.close();
            fileWriter.close();
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".saveHtAsFile() error: " + e);
            e.printStackTrace();
        }
    }

    public Hashtable<String, Integer> getHashtableFromFile(String str) {
        Hashtable<String, Integer> hashtable = new Hashtable<>();
        try {
            LineNumberReader lineNumberReader = new LineNumberReader(new FileReader(str));
            while (true) {
                String readLine = lineNumberReader.readLine();
                if (readLine == null) {
                    break;
                }
                String trim = readLine.trim();
                if (trim.length() > 0) {
                    String[] split = trim.split(",");
                    if (split.length > 1) {
                        hashtable.put(split[0].trim(), new Integer(Integer.parseInt(split[1].trim())));
                    }
                }
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".saveHtAsFile() error: " + e);
            e.printStackTrace();
        }
        return hashtable;
    }

    private String isolatePunctuation(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        char[] charArray = str.toCharArray();
        for (int i = 0; i < charArray.length; i++) {
            if (this.hsIsolatedCharacters.contains(new Character(charArray[i]))) {
                stringBuffer.append(" ");
            }
            stringBuffer.append(charArray[i]);
        }
        return stringBuffer.toString();
    }

    public String getListOfEntities(Vector<TextElement> vector) {
        StringBuffer stringBuffer = new StringBuffer();
        try {
            String str = "";
            String str2 = "";
            Iterator<TextElement> it = vector.iterator();
            while (it.hasNext()) {
                TextElement next = it.next();
                if (!next.label.equals(str2) && !str2.equals("none")) {
                    stringBuffer.append(str + ",  " + str2 + "\n");
                    str = "";
                }
                str2 = next.label;
                str = !next.label.equals("none") ? str + " " + next.text : "";
            }
            if (!str2.equals("none")) {
                stringBuffer.append(str + " - " + str2 + "\n");
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".getListOfEntities() error: " + e);
            e.printStackTrace();
        }
        return stringBuffer.toString();
    }

    public HashSet<Tuple> getListOfEntitiesAsTuple(Vector<TextElement> vector) {
        HashSet<Tuple> hashSet = new HashSet<>();
        try {
            String str = "";
            String str2 = "";
            Iterator<TextElement> it = vector.iterator();
            while (it.hasNext()) {
                TextElement next = it.next();
                if (!next.label.equals(str2) && !str2.equals("none")) {
                    hashSet.add(new Tuple(str, str2));
                    str = "";
                }
                str2 = next.label;
                str = !next.label.equals("none") ? str + " " + next.text : "";
            }
            if (!str2.equals("none")) {
                hashSet.add(new Tuple(str, str2));
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".getListOfEntities() error: " + e);
            e.printStackTrace();
        }
        return hashSet;
    }
}
