package edu.cmu.casos.automap;

import au.com.bytecode.opencsv.CSVWriter;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/cmu/casos/automap/ExtractNumerics.class */
public class ExtractNumerics {

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/cmu/casos/automap/ExtractNumerics$FormatPattern.class */
    public static class FormatPattern {
        private Pattern pat;
        private String type;

        FormatPattern(Pattern pattern, String str) {
            this.pat = pattern;
            this.type = str;
        }

        public String getType() {
            return this.type;
        }

        public Pattern getPattern() {
            return this.pat;
        }
    }

    public static void main(String[] strArr) {
        if (strArr.length != 2) {
            System.out.println("usage: input_directory output_directory");
            System.exit(1);
        }
        String str = strArr[0];
        String str2 = strArr[1];
        LinkedList<FormatPattern> buildPatternList = buildPatternList();
        try {
            String[] fileList = Utils.getFileList(str, new FileExtensionFilter("txt"));
            if (!str.endsWith(File.separator)) {
                str = str + File.separator;
            }
            if (!str2.endsWith(File.separator)) {
                str2 = str2 + File.separator;
            }
            for (String str3 : fileList) {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str + str3)));
                HashMap hashMap = new HashMap();
                String str4 = str3.substring(0, str3.length() - 3) + "csv";
                File file = new File(str2, str4);
                CSVWriter cSVWriter = new CSVWriter(new OutputStreamWriter(new FileOutputStream(str2 + str4), "utf-8"));
                cSVWriter.writeNext(new String[]{"conceptFrom", "conceptTo", "metaOntology", "metaName", "extractedType", "frequency"});
                boolean z = true;
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    Iterator<FormatPattern> it = buildPatternList.iterator();
                    while (it.hasNext()) {
                        FormatPattern next = it.next();
                        Pattern pattern = next.getPattern();
                        String type = next.getType();
                        Matcher matcher = pattern.matcher(readLine);
                        while (matcher.find()) {
                            String group = matcher.group();
                            group.toLowerCase();
                            if (!group.equals("")) {
                                if (hashMap.containsKey(group + "-separate-" + type)) {
                                    hashMap.put(group + "-separate-" + type, Integer.valueOf(((Integer) hashMap.get(group + "-separate-" + type)).intValue() + 1));
                                } else {
                                    hashMap.put(group + "-separate-" + type, 1);
                                }
                            }
                        }
                    }
                }
                for (String str5 : hashMap.keySet()) {
                    String[] split = str5.split("-separate-");
                    cSVWriter.writeNext(new String[]{split[0].trim(), split[0].trim().replaceAll("[^A-Za-z0-9]", "_"), "", "", split[1].trim(), Integer.toString(((Integer) hashMap.get(str5)).intValue())});
                    z = false;
                }
                hashMap.clear();
                cSVWriter.close();
                bufferedReader.close();
                if (z) {
                    System.out.println("Warning: No numerics found; " + str4 + " will not be created.");
                    file.deleteOnExit();
                }
            }
        } catch (Exception e) {
            Debug.exceptHandler(e, "ExtractNumerics");
        }
    }

    private static LinkedList<FormatPattern> buildPatternList() {
        LinkedList<FormatPattern> linkedList = new LinkedList<>();
        String str = "(?:January|February|March|April|May|June|July|August|September|October|November|December|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)\\.?)";
        linkedList.add(new FormatPattern(Pattern.compile("\\d{1,2}:\\d{2}(?::\\d{2})?(?:\\s*[AaPp][mM])?"), "time"));
        linkedList.add(new FormatPattern(Pattern.compile("\\d{1,2}:\\d{2}\\.\\d{2}"), "time"));
        linkedList.add(new FormatPattern(Pattern.compile("(?:-\\s*)?\\p{Sc}\\s*\\d{1,3}(?:[.,]?\\d{3})*(?:[.,]\\d{2})?"), "currency"));
        linkedList.add(new FormatPattern(Pattern.compile("(?:-\\s*)?\\d{1,3}(?:[.,]?\\d{3})*(?:[.,]\\d{2})?\\s*[A-Z]{3}"), "currency"));
        linkedList.add(new FormatPattern(Pattern.compile("^(1[- ]?)?\\(?(\\d{3})\\)?[- ]?(\\d{3})[- ]?(\\d{4})$"), "phone number"));
        linkedList.add(new FormatPattern(Pattern.compile("(?:(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),?\\s*)?" + str + "\\s+(?:\\d{4}|\\d{1,2}(?:,?\\s+(?:\\d{4}|\\d{2})|-\\d{1,2})?)"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("(?:Mon|Tu(?:es?)?|Wed|Thu(?:rs?)?|Fri)\\.?,?\\s*" + str + "\\s+(?:\\d{4}|\\d{1,2}(?:,?\\s+(?:\\d{4}|\\d{2}))?)"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("\\d{4}[/-](?:\\d{1,2}[/-])?\\d{2}\\b"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("\\d{1,2}[/-]\\d{1,2}[/-]\\d{4}\\b"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("\\d?\\d\\s\\d?\\d\\s\\d{4}"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("\\d{2}\\s\\d{2}\\s\\d{2}"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("\\d?\\d\\s\\d\\d(?:\\d\\d)?"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("\\d{4}-\\d{4}"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("(?:\\d{1,2}[/-]){2}\\d{2}\\b"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("\\b(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("\\b(?:Mon|Tu(?:es?)?|Wed|Thu(?:rs?)?|Fri)\\.?[^\\w,]"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("(?:\\d{4}|\\d{2}),\\s+" + str + "\\s+\\d{1,2}"), "date"));
        linkedList.add(new FormatPattern(Pattern.compile("(\\A|\\s)(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]"), "url"));
        linkedList.add(new FormatPattern(Pattern.compile("(\\A|\\s)(([\\w!#$%&'*+\\-/=?\\^`{|}~.]|([.]\"([\\w!#$%&'*+\\-/=?\\^`{|}~.\\[\\](),:;<>@\\\\]|(\\\\ )|(\\\"))*\"[.]))*@|([\\w!#$%&'*+\\-/=?\\^`{|}~.]|([.]\"([\\w!#$%&'*+\\-/=?\\^`{|}~.\\[\\](),:;<>@\\\\]|(\\\\ )|(\\\"))*\"@))*|(\"([\\w!#$%&'*+\\-/=?\\^`{|}~.\\[\\](),:;<>@\\\\]|(\\\\ )|(\\\"))*\")@|\"([\\w!#$%&'*+\\-/=?\\^`{|}~.\\[\\](),:;<>@\\\\]|(\\\\ )|(\\\"))*\"\\.[\\w!#$%&'*+\\-/=?\\^`{|}~.]*@)[a-zA-Z0-9][a-zA-Z0-9\\-]*(\\.[a-zA-Z]+)+"), "email"));
        return linkedList;
    }
}
