package edu.cmu.casos.wizard;

import java.io.IOException;
import java.io.Reader;
import java.util.LinkedList;
import java.util.List;
import org.apache.log4j.Logger;
import org.apache.xerces.dom.CoreDocumentImpl;
import org.cyberneko.html.parsers.DOMFragmentParser;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

/* loaded from: input_file:edu/cmu/casos/wizard/HTMLUtils.class */
public class HTMLUtils {
    private static final Logger logger = Logger.getLogger(HTMLUtils.class);

    private HTMLUtils() {
    }

    public static List<String> extractText(Reader reader) throws IOException {
        DOMFragmentParser dOMFragmentParser = new DOMFragmentParser();
        InputSource inputSource = new InputSource(reader);
        DocumentFragment createDocumentFragment = new CoreDocumentImpl().createDocumentFragment();
        try {
            dOMFragmentParser.parse(inputSource, createDocumentFragment);
        } catch (Exception e) {
            logger.error("Malformed HTML text found; unable to extract text.", e);
        }
        LinkedList linkedList = new LinkedList();
        getNodeText(createDocumentFragment, linkedList);
        return linkedList;
    }

    public static void getNodeText(Node node, List<String> list) {
        if (node != null) {
            if (node.getNodeType() == 3) {
                list.add(node.getNodeValue());
                return;
            }
            if (node.getNodeType() != 1) {
                if (node.hasChildNodes()) {
                    NodeList childNodes = node.getChildNodes();
                    int length = childNodes.getLength();
                    for (int i = 0; i < length; i++) {
                        getNodeText(childNodes.item(i), list);
                    }
                    return;
                }
                return;
            }
            if (node.getNodeName().equalsIgnoreCase("script") || node.getNodeName().equalsIgnoreCase("noscript") || node.getNodeName().equalsIgnoreCase("style")) {
                return;
            }
            NodeList childNodes2 = node.getChildNodes();
            int length2 = childNodes2.getLength();
            for (int i2 = 0; i2 < length2; i2++) {
                getNodeText(childNodes2.item(i2), list);
            }
        }
    }
}
