package edu.stanford.nlp.trees;

import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.util.FilePathProcessor;
import edu.stanford.nlp.util.FileProcessor;
import edu.stanford.nlp.util.Filter;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Writer;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

/* loaded from: input_file:edu/stanford/nlp/trees/DiskTreebank.class */
public final class DiskTreebank extends Treebank {
    private static final boolean PRINT_FILENAMES = false;
    private ArrayList<File> filePaths;
    private ArrayList<FileFilter> fileFilters;
    private File currentFile;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/trees/DiskTreebank$DiskTreebankIterator.class */
    public class DiskTreebankIterator implements Iterator<Tree> {
        private int fileUpto;
        private int treeUpto;
        private List<String> files;
        private MemoryTreebank currentFileTrees;
        private boolean hasNext;

        private DiskTreebankIterator() {
            this.fileUpto = -1;
            this.files = new ArrayList();
            FileProcessor fileProcessor = new FileProcessor() { // from class: edu.stanford.nlp.trees.DiskTreebank.DiskTreebankIterator.1
                @Override // edu.stanford.nlp.util.FileProcessor
                public void processFile(File file) {
                    DiskTreebankIterator.this.files.add(file.toString());
                }
            };
            int size = DiskTreebank.this.filePaths.size();
            for (int i = 0; i < size; i++) {
                FilePathProcessor.processPath((File) DiskTreebank.this.filePaths.get(i), (FileFilter) DiskTreebank.this.fileFilters.get(i), fileProcessor);
            }
            this.currentFileTrees = new MemoryTreebank(DiskTreebank.this.treeReaderFactory(), DiskTreebank.this.encoding());
            this.hasNext = primeNextFile();
        }

        private boolean primeNextFile() {
            while (this.fileUpto < this.files.size()) {
                if (this.treeUpto < this.currentFileTrees.size()) {
                    return true;
                }
                this.currentFileTrees.clear();
                this.fileUpto++;
                this.treeUpto = 0;
                if (this.fileUpto < this.files.size()) {
                    String str = this.files.get(this.fileUpto);
                    DiskTreebank.this.currentFile = new File(str);
                    this.currentFileTrees.loadPath(str);
                }
            }
            return false;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.hasNext;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public Tree next() {
            if (!this.hasNext) {
                throw new NoSuchElementException();
            }
            MemoryTreebank memoryTreebank = this.currentFileTrees;
            int i = this.treeUpto;
            this.treeUpto = i + 1;
            Tree tree = memoryTreebank.get(i);
            this.hasNext = primeNextFile();
            return tree;
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    public DiskTreebank() {
        this(new LabeledScoredTreeReaderFactory());
    }

    public DiskTreebank(String str) {
        this(new LabeledScoredTreeReaderFactory(), str);
    }

    public DiskTreebank(TreeReaderFactory treeReaderFactory) {
        super(treeReaderFactory);
        this.filePaths = new ArrayList<>();
        this.fileFilters = new ArrayList<>();
    }

    public DiskTreebank(TreeReaderFactory treeReaderFactory, String str) {
        super(treeReaderFactory, str);
        this.filePaths = new ArrayList<>();
        this.fileFilters = new ArrayList<>();
    }

    public DiskTreebank(int i) {
        this(i, new LabeledScoredTreeReaderFactory());
    }

    public DiskTreebank(int i, TreeReaderFactory treeReaderFactory) {
        this(treeReaderFactory);
    }

    @Override // edu.stanford.nlp.trees.Treebank, java.util.AbstractCollection, java.util.Collection
    public void clear() {
        this.filePaths.clear();
        this.fileFilters.clear();
    }

    @Override // edu.stanford.nlp.trees.Treebank
    public void loadPath(File file, FileFilter fileFilter) {
        this.filePaths.add(file);
        this.fileFilters.add(fileFilter);
    }

    @Override // edu.stanford.nlp.trees.Treebank
    public void apply(TreeVisitor treeVisitor) {
        Iterator<Tree> it = iterator();
        while (it.hasNext()) {
            treeVisitor.visitTree(it.next());
        }
    }

    public File getCurrentFile() {
        return this.currentFile;
    }

    @Override // java.util.AbstractCollection, java.util.Collection, java.lang.Iterable
    public Iterator<Tree> iterator() {
        return new DiskTreebankIterator();
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length == 0) {
            System.err.println("This main method will let you variously manipulate and view a treebank.");
            System.err.println("Usage: java DiskTreebank [-flags]* treebankPath fileRanges");
            System.err.println("Useful flags include:");
            System.err.println("\t-maxLength n\t-suffix ext\t-treeReaderFactory class");
            System.err.println("\t-pennPrint\t-encoding enc\t-tlp class\t-sentenceLengths");
            System.err.println("\t-summary\t-decimate\t-yield\t-correct\t-punct");
            return;
        }
        int i = 0;
        int i2 = -1;
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = false;
        boolean z6 = false;
        boolean z7 = false;
        boolean z8 = false;
        boolean z9 = false;
        String str = null;
        String str2 = "UTF-8";
        String str3 = Treebank.DEFAULT_TREE_FILE_SUFFIX;
        TreeReaderFactory treeReaderFactory = null;
        TreebankLanguagePack treebankLanguagePack = null;
        while (i < strArr.length && strArr[i].startsWith("-")) {
            if (strArr[i].equals("-maxLength") && i + 1 < strArr.length) {
                i2 = Integer.parseInt(strArr[i + 1]);
                i += 2;
            } else if (strArr[i].equals("-normalized")) {
                z = true;
                i++;
            } else if (strArr[i].equalsIgnoreCase("-tlp")) {
                try {
                    treebankLanguagePack = (TreebankLanguagePack) Class.forName(strArr[i + 1]).newInstance();
                    treeReaderFactory = treebankLanguagePack.treeReaderFactory();
                    i += 2;
                } catch (Exception e) {
                    System.err.println("Couldn't instantiate as TreebankLangParserParams: " + strArr[i + 1]);
                    return;
                }
            } else if (strArr[i].equals("-treeReaderFactory") || strArr[i].equals("-trf")) {
                try {
                    treeReaderFactory = (TreeReaderFactory) Class.forName(strArr[i + 1]).newInstance();
                    i += 2;
                } catch (Exception e2) {
                    System.err.println("Couldn't instantiate as TreeReaderFactory: " + strArr[i + 1]);
                    return;
                }
            } else if (strArr[i].equals("-suffix")) {
                str3 = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equals("-decimate")) {
                z2 = true;
                str = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equals("-encoding")) {
                str2 = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equals("-correct")) {
                z4 = true;
                i++;
            } else if (strArr[i].equals("-summary")) {
                z5 = true;
                i++;
            } else if (strArr[i].equals("-yield")) {
                z7 = true;
                i++;
            } else if (strArr[i].equals("-punct")) {
                z8 = true;
                i++;
            } else if (strArr[i].equals("-pennPrint")) {
                z3 = true;
                i++;
            } else if (strArr[i].equals("-timing")) {
                z6 = true;
                i++;
            } else if (strArr[i].equals("-sentenceLengths")) {
                z9 = true;
                i++;
            } else {
                System.err.println("Unknown option: " + strArr[i]);
                i++;
            }
        }
        if (treeReaderFactory == null) {
            treeReaderFactory = new TreeReaderFactory() { // from class: edu.stanford.nlp.trees.DiskTreebank.1
                @Override // edu.stanford.nlp.trees.TreeReaderFactory
                public TreeReader newTreeReader(Reader reader) {
                    return new PennTreeReader(reader, new LabeledScoredTreeFactory());
                }
            };
        }
        Treebank diskTreebank = z ? new DiskTreebank() : new DiskTreebank(treeReaderFactory, str2);
        final PrintWriter printWriter = new PrintWriter((Writer) new OutputStreamWriter(System.out, str2), true);
        if (i + 1 < strArr.length) {
            diskTreebank.loadPath(strArr[i], new NumberRangesFileFilter(strArr[i + 1], true));
        } else {
            diskTreebank.loadPath(strArr[i], str3, true);
        }
        if (z5) {
            System.out.println(diskTreebank.textualSummary());
        }
        if (z9) {
            sentenceLengths(diskTreebank, strArr[i], i + 1 < strArr.length ? strArr[i + 1] : null, printWriter);
        }
        if (z8) {
            printPunct(diskTreebank, treebankLanguagePack, printWriter);
        }
        if (z4) {
            diskTreebank = new EnglishPTBTreebankCorrector().transformTrees(diskTreebank);
        }
        if (z3) {
            diskTreebank.apply(new TreeVisitor() { // from class: edu.stanford.nlp.trees.DiskTreebank.2
                @Override // edu.stanford.nlp.trees.TreeVisitor
                public void visitTree(Tree tree) {
                    tree.pennPrint(printWriter);
                    printWriter.println();
                }
            });
        }
        if (z7) {
            diskTreebank.apply(new TreeVisitor() { // from class: edu.stanford.nlp.trees.DiskTreebank.3
                @Override // edu.stanford.nlp.trees.TreeVisitor
                public void visitTree(Tree tree) {
                    printWriter.println(tree.yield().toString());
                }
            });
        }
        if (z2) {
            diskTreebank.decimate(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str + "-train.txt"), str2)), new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str + "-dev.txt"), str2)), new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str + "-test.txt"), str2)));
            return;
        }
        if (i2 < 0) {
            if (z6) {
                runTiming(diskTreebank);
            }
        } else {
            Iterator<Tree> it = diskTreebank.iterator();
            while (it.hasNext()) {
                Tree next = it.next();
                if (next.yield().length() <= i2) {
                    System.out.println(next);
                }
            }
        }
    }

    private static void printPunct(Treebank treebank, TreebankLanguagePack treebankLanguagePack, PrintWriter printWriter) {
        if (treebankLanguagePack == null) {
            System.err.println("The -punct option requires you to specify -tlp");
            return;
        }
        Filter<String> punctuationTagAcceptFilter = treebankLanguagePack.punctuationTagAcceptFilter();
        Iterator<Tree> it = treebank.iterator();
        while (it.hasNext()) {
            for (TaggedWord taggedWord : it.next().taggedYield()) {
                if (punctuationTagAcceptFilter.accept(taggedWord.tag())) {
                    printWriter.println(taggedWord);
                }
            }
        }
    }

    private static void runTiming(Treebank treebank) {
        System.out.println();
        Timing.startTime();
        int i = 0;
        Iterator<Tree> it = treebank.iterator();
        while (it.hasNext()) {
            i += it.next().yield().length();
        }
        Timing.endTime("traversing corpus, counting words with iterator");
        System.err.println("There were " + i + " words in the treebank.");
        treebank.apply(new TreeVisitor() { // from class: edu.stanford.nlp.trees.DiskTreebank.4
            int num = 0;

            @Override // edu.stanford.nlp.trees.TreeVisitor
            public void visitTree(Tree tree) {
                this.num += tree.yield().length();
            }
        });
        System.err.println();
        Timing.endTime("traversing corpus, counting words with TreeVisitor");
        System.err.println("There were " + i + " words in the treebank.");
        System.err.println();
        Timing.startTime();
        System.err.println("This treebank contains " + treebank.size() + " trees.");
        Timing.endTime("size of corpus");
    }

    public static void sentenceLengths(Treebank treebank, String str, String str2, PrintWriter printWriter) {
        int[] iArr = new int[152];
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        String str3 = "";
        double d = 0.0d;
        DecimalFormat decimalFormat = new DecimalFormat("0.0");
        boolean z = false;
        Iterator<Tree> it = treebank.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            i++;
            int length = next.yield().length();
            if (length <= 150) {
                iArr[length] = iArr[length] + 1;
            } else {
                iArr[151] = iArr[151] + 1;
            }
            i3 += length;
            if (length > i2) {
                i2 = length;
                str3 = next.toString();
            }
        }
        System.out.print("Files " + str + ' ');
        if (str2 != null) {
            System.out.print(str2 + ' ');
        }
        System.out.println("consists of " + i + " sentences");
        int i4 = 0;
        for (int i5 = 0; i5 <= 150; i5++) {
            i4 += iArr[i5];
            System.out.println("  " + iArr[i5] + " of length " + i5 + " (running total: " + i4 + ')');
            if (!z && i4 > i / 2) {
                if (i % 2 == 0 && i4 == (i / 2) + 1) {
                    int i6 = i5 - 1;
                    while (i6 > 0 && iArr[i6] == 0) {
                        i6--;
                    }
                    d = (i5 + i6) / 2.0d;
                } else {
                    d = i5;
                }
                z = true;
            }
        }
        if (iArr[151] > 0) {
            System.out.println("  " + iArr[151] + " of length 151 to " + i2 + " (running total: " + (i4 + iArr[151]) + ')');
        }
        System.out.println("Average length: " + decimalFormat.format(i3 / i) + "; median length: " + decimalFormat.format(d));
        System.out.println("Longest sentence is of length: " + i2);
        printWriter.println(str3);
    }
}
