package edu.cmu.casos.rex;

import edu.cmu.casos.automap.PosTextDataTokenizer;
import iitb2.CRF.DataSequence;
import java.io.File;
import java.io.FileReader;
import java.io.LineNumberReader;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

/* loaded from: input_file:edu/cmu/casos/rex/RexTextDataSequence.class */
public class RexTextDataSequence implements DataSequence {
    Hashtable<String, String> labelThesaurus;
    String fName;
    int analysisMethod;
    Vector<TextElement> vContent = new Vector<>();
    HashSet<Character> hsIsolatedCharacters = new HashSet<>();
    private boolean isPosTagged = false;

    public static void main(String[] strArr) {
        System.out.println(getNumberOfOccurances("this is a test and this isnt very difficult", "isnt"));
    }

    public RexTextDataSequence(String str, Hashtable<String, String> hashtable, int i) {
        try {
            this.analysisMethod = i;
            this.labelThesaurus = hashtable;
            this.fName = str;
            if (hashtable == null) {
                new Hashtable();
            }
            readDataFromFile(new File(str));
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".RexTextDataSequence() error: " + e);
            e.printStackTrace();
        }
    }

    public RexTextDataSequence(File file, Hashtable<String, String> hashtable, int i) {
        try {
            this.analysisMethod = i;
            this.fName = file.getAbsolutePath();
            this.labelThesaurus = hashtable;
            if (hashtable == null) {
                new Hashtable();
            }
            readDataFromFile(file);
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".RexTextDataSequence() error: " + e);
            e.printStackTrace();
        }
    }

    public RexTextDataSequence(Vector<String> vector) {
        try {
            Iterator<String> it = vector.iterator();
            while (it.hasNext()) {
                this.vContent.add(new TextElement(it.next(), null, -1));
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".RexTextDataSequence() error:" + e);
            e.printStackTrace();
        }
    }

    public RexTextDataSequence(String str) {
        try {
            this.hsIsolatedCharacters.add(new Character('.'));
            this.hsIsolatedCharacters.add(new Character(','));
            this.hsIsolatedCharacters.add(new Character('&'));
            this.hsIsolatedCharacters.add(new Character('%'));
            this.hsIsolatedCharacters.add(new Character('\"'));
            this.hsIsolatedCharacters.add(new Character('$'));
            FileReader fileReader = new FileReader(str);
            LineNumberReader lineNumberReader = new LineNumberReader(fileReader);
            while (true) {
                String readLine = lineNumberReader.readLine();
                if (readLine == null) {
                    lineNumberReader.close();
                    fileReader.close();
                    return;
                }
                for (String str2 : isolatePunctuation(readLine.replace('\n', ' ').replace('\t', ' ')).split(" ")) {
                    String trim = str2.trim();
                    if (trim.length() > 0) {
                        this.vContent.add(new TextElement(trim, null, -1));
                    }
                }
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".RexTextDataSequence() error:" + e);
            e.printStackTrace();
        }
    }

    private String isolatePunctuation(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        char[] charArray = str.toCharArray();
        for (int i = 0; i < charArray.length; i++) {
            if (this.hsIsolatedCharacters.contains(new Character(charArray[i]))) {
                stringBuffer.append(" ");
            }
            stringBuffer.append(charArray[i]);
        }
        return stringBuffer.toString();
    }

    public int length() {
        return this.vContent.size();
    }

    public void set_y(int i, int i2) {
        this.vContent.get(i).y = i2;
    }

    public Object x(int i) {
        return this.vContent.get(i).text;
    }

    public int y(int i) {
        return this.vContent.get(i).y;
    }

    public void readDataFromFile(File file) {
        try {
            NodeList elementsByTagName = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(file).getElementsByTagName("DOC");
            for (int i = 0; i < elementsByTagName.getLength(); i++) {
                Element element = (Element) elementsByTagName.item(i);
                NodeList elementsByTagName2 = element.getElementsByTagName("DOCNO");
                if (elementsByTagName2.getLength() > 0) {
                    Node firstChild = ((Element) elementsByTagName2.item(0)).getFirstChild();
                    if (firstChild.getNodeType() == 3) {
                        ((Text) firstChild).getTextContent().trim();
                    }
                }
                NodeList childNodes = element.getChildNodes();
                for (int i2 = 0; i2 < childNodes.getLength(); i2++) {
                    Node item = childNodes.item(i2);
                    if (item.getNodeType() == 3) {
                        for (String str : ((Text) item).getTextContent().split("\\s")) {
                            String trim = str.trim();
                            if (trim.length() > 0) {
                                this.vContent.add(new TextElement(trim, "none", -1));
                            }
                        }
                    } else if (item.getNodeType() == 1) {
                        Element element2 = (Element) item;
                        String tagName = element2.getTagName();
                        String attribute = element2.getAttribute("TYPE");
                        if (!tagName.equalsIgnoreCase("DOCNO")) {
                            NodeList childNodes2 = element2.getChildNodes();
                            for (int i3 = 0; i3 < childNodes2.getLength(); i3++) {
                                Node item2 = childNodes2.item(i3);
                                if (item2.getNodeType() == 3) {
                                    String[] split = ((Text) item2).getTextContent().trim().split("\\s");
                                    int i4 = 0;
                                    while (i4 < split.length) {
                                        int i5 = split.length == 1 ? 0 : i4 == 0 ? 1 : i4 == split.length - 1 ? 3 : 2;
                                        String str2 = tagName + "-" + attribute;
                                        String str3 = this.labelThesaurus.get(str2);
                                        if (str3 == null && (this.analysisMethod == 1 || this.analysisMethod == 2)) {
                                            str3 = str2;
                                        }
                                        if (this.analysisMethod == 1) {
                                            this.vContent.add(new TextElement(split[i4], str3, i5));
                                        } else if (this.analysisMethod == 0) {
                                            this.vContent.add(new TextElement(split[i4], i5 + "", i5));
                                        } else if (this.analysisMethod == 2) {
                                            this.vContent.add(new TextElement(split[i4], i5 + "-" + str3, i5));
                                        }
                                        i4++;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            System.out.println(getClass().getName() + ".readDataFromFile() error:" + e);
            e.printStackTrace();
        }
    }

    public static Vector<String> getLabeledSequences(File file) {
        Vector<String> vector = new Vector<>();
        try {
            NodeList elementsByTagName = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(file).getElementsByTagName("DOC");
            for (int i = 0; i < elementsByTagName.getLength(); i++) {
                Element element = (Element) elementsByTagName.item(i);
                NodeList elementsByTagName2 = element.getElementsByTagName("DOCNO");
                if (elementsByTagName2.getLength() > 0) {
                    Node firstChild = ((Element) elementsByTagName2.item(0)).getFirstChild();
                    if (firstChild.getNodeType() == 3) {
                        ((Text) firstChild).getTextContent().trim();
                    }
                }
                NodeList childNodes = element.getChildNodes();
                for (int i2 = 0; i2 < childNodes.getLength(); i2++) {
                    Node item = childNodes.item(i2);
                    if (item.getNodeType() == 3) {
                        for (String str : ((Text) item).getTextContent().split("\\s")) {
                            str.trim();
                        }
                    } else if (item.getNodeType() == 1) {
                        Element element2 = (Element) item;
                        String tagName = element2.getTagName();
                        element2.getAttribute("TYPE");
                        if (!tagName.equalsIgnoreCase("DOCNO")) {
                            NodeList childNodes2 = element2.getChildNodes();
                            for (int i3 = 0; i3 < childNodes2.getLength(); i3++) {
                                Node item2 = childNodes2.item(i3);
                                if (item2.getNodeType() == 3) {
                                    vector.add((tagName + "-" + element2.getAttribute("TYPE")) + "^" + ((Text) item2).getTextContent().replaceAll("\n", " ").trim());
                                }
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            System.out.println("RexTextDataSequence.getLabeledSequences() error:" + e);
            e.printStackTrace();
        }
        return vector;
    }

    public Vector<String> getSequenceAnalysis() {
        Vector<String> vector = new Vector<>();
        try {
            Iterator<TextElement> it = this.vContent.iterator();
            while (it.hasNext()) {
                TextElement next = it.next();
                if (next.nerFeaturePos == 2 || next.nerFeaturePos == 3) {
                }
            }
        } catch (Exception e) {
            System.out.println("RexTextDataSequence.getSequenceAnalysis() error:" + e);
            e.printStackTrace();
        }
        return vector;
    }

    public void posTagSequence(PosTextDataTokenizer posTextDataTokenizer, int i) {
        Vector vector = new Vector();
        Iterator<TextElement> it = this.vContent.iterator();
        while (it.hasNext()) {
            vector.add(it.next().text);
        }
        Vector vector2 = new Vector();
        for (int i2 = 0; i2 < vector.size(); i2++) {
            vector2.add(posTextDataTokenizer.viterbiTagText((String) vector.elementAt(i2), i));
        }
        for (int i3 = 0; i3 < this.vContent.size(); i3++) {
            this.vContent.get(i3).pos = (String) vector2.get(i3);
        }
        this.isPosTagged = true;
    }

    public boolean isPosTagged() {
        return this.isPosTagged;
    }

    public int getNumberOfCorrectlyIdentifiedElements() {
        int i = 0;
        Iterator<TextElement> it = this.vContent.iterator();
        while (it.hasNext()) {
            TextElement next = it.next();
            String str = TextElement.htLabelsInverse.get(new Integer(next.y));
            if (next.y == next.orgY && !str.equals("none")) {
                i++;
            }
        }
        return i;
    }

    public int getNumberOfElementsInOrgDataSet() {
        int i = 0;
        Iterator<TextElement> it = this.vContent.iterator();
        while (it.hasNext()) {
            if (!TextElement.htLabelsInverse.get(new Integer(it.next().orgY)).equals("none")) {
                i++;
            }
        }
        return i;
    }

    public int getNumberOfInferredElements() {
        int i = 0;
        Iterator<TextElement> it = this.vContent.iterator();
        while (it.hasNext()) {
            if (!TextElement.htLabelsInverse.get(new Integer(it.next().y)).equals("none")) {
                i++;
            }
        }
        return i;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void countOccurances(Hashtable<String, Integer> hashtable) {
        System.out.println(getClass().getName() + ".countOccurances() file:" + this.fName);
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(" ");
        Iterator<TextElement> it = this.vContent.iterator();
        while (it.hasNext()) {
            stringBuffer.append(it.next().text);
            stringBuffer.append(" ");
        }
        String stringBuffer2 = stringBuffer.toString();
        for (String str : hashtable.keySet()) {
            int numberOfOccurances = getNumberOfOccurances(stringBuffer2, str);
            if (numberOfOccurances > 0) {
                Integer num = hashtable.get(str);
                if (num == null) {
                    num = new Integer(0);
                }
                hashtable.put(str, new Integer(num.intValue() + numberOfOccurances));
            }
        }
    }

    protected static int getNumberOfOccurances(String str, String str2) {
        int i = 0;
        int i2 = 0;
        boolean z = true;
        while (z) {
            i = str.indexOf(str2, i);
            if (i != -1) {
                if (i > 0 && str.charAt(i - 1) == ' ' && str.charAt(i + str2.length()) == ' ') {
                    i2++;
                }
                i++;
            } else {
                z = false;
            }
        }
        return i2;
    }
}
