package com.k_int.ia.content_analysis;

import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.struts.tiles.ComponentDefinition;
import org.apache.xpath.XPathAPI;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.NodeIterator;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:WEB-INF/lib/content_analysis-1.1.1.jar:com/k_int/ia/content_analysis/HTMLAnalyser.class */
public class HTMLAnalyser implements ContentAnalysisPlugin {
    private static Log log = LogFactory.getLog(HTMLAnalyser.class);
    protected static String stylesheet = "/html_to_rdf.xsl";
    protected static String dc_ns = "http://purl.org/dc/elements/1.1/";

    @Override // com.k_int.ia.content_analysis.ContentAnalysisPlugin
    public ResourceInformation process(InputStream inputStream, Map map, List list) {
        String obj;
        Object obj2;
        ResourceInformation resourceInformation = new ResourceInformation();
        URL url = (URL) map.get(ComponentDefinition.URL);
        resourceInformation.setMimeType((String) map.get("content_type"));
        try {
            log.debug("Tidying URL stream...");
            Tidy tidy = new Tidy();
            tidy.setMakeClean(true);
            tidy.setDropFontTags(true);
            tidy.setShowWarnings(false);
            tidy.setXHTML(true);
            tidy.setXmlOut(true);
            tidy.setTidyMark(false);
            tidy.setQuiet(true);
            Document parseDOM = tidy.parseDOM(inputStream, null);
            TransformerFactory newInstance = TransformerFactory.newInstance();
            log.debug("Using " + stylesheet + " to compile results...");
            Transformer newTransformer = newInstance.newTransformer(new StreamSource(HTMLAnalyser.class.getResourceAsStream(stylesheet)));
            newTransformer.clearParameters();
            if (map != null) {
                for (Object obj3 : map.keySet()) {
                    if (obj3 != null && (obj2 = map.get((obj = obj3.toString()))) != null) {
                        newTransformer.setParameter(obj, obj2);
                    }
                }
            } else {
                log.debug("No transformer properties available");
            }
            DOMResult dOMResult = new DOMResult();
            newTransformer.transform(new DOMSource(parseDOM.getDocumentElement()), dOMResult);
            resourceInformation.setRDF(((Document) dOMResult.getNode()).getDocumentElement());
            NodeIterator selectNodeIterator = XPathAPI.selectNodeIterator(resourceInformation.getRDF(), "//linkage");
            for (Node nextNode = selectNodeIterator.nextNode(); nextNode != null; nextNode = selectNodeIterator.nextNode()) {
                if (nextNode instanceof Element) {
                    try {
                        resourceInformation.addLinkage(new URL(url, ((Element) nextNode).getAttribute("href")));
                    } catch (MalformedURLException e) {
                        e.printStackTrace();
                    }
                }
            }
            resourceInformation.setIdentity(url.toString());
            resourceInformation.setTitle(getValue(resourceInformation.getRDF(), "/rdf:RDF/rdf:Description/dc:title/text()"));
            resourceInformation.setDescription(getValue(resourceInformation.getRDF(), "/rdf:RDF/rdf:Description/dc:description/text()"));
            processAdditionalMetadata(XPathAPI.selectSingleNode(resourceInformation.getRDF(), "/rdf:RDF/rdf:Description"), list);
        } catch (TransformerConfigurationException e2) {
            log.warn("Problem", e2);
        } catch (TransformerException e3) {
            log.warn("Problem", e3);
        }
        return resourceInformation;
    }

    private String nvl(String str, String str2) {
        return str != null ? str : str2;
    }

    private String getValue(Node node, String str) {
        String str2 = null;
        try {
            Node selectSingleNode = XPathAPI.selectSingleNode(node, str);
            if (selectSingleNode != null && selectSingleNode.getNodeValue() != null) {
                str2 = selectSingleNode.getNodeValue().toString();
            }
        } catch (TransformerException e) {
            log.warn("Problem extracting data from RDF tree", e);
        }
        return str2;
    }

    private void processAdditionalMetadata(Node node, List list) {
        Iterator it = list.iterator();
        while (it.hasNext()) {
            AdditionalMetadataVO additionalMetadataVO = (AdditionalMetadataVO) it.next();
            log.debug("Testing additional metadata: " + additionalMetadataVO);
            if (additionalMetadataVO.getName() != null && (additionalMetadataVO.getName().equalsIgnoreCase("subject") || additionalMetadataVO.getName().equalsIgnoreCase("DC.subject"))) {
                Element createElementNS = node.getOwnerDocument().createElementNS(dc_ns, "dc:subject");
                createElementNS.appendChild(node.getOwnerDocument().createTextNode(additionalMetadataVO.getValue()));
                if (additionalMetadataVO.getScheme() != null) {
                    createElementNS.setAttributeNS(dc_ns, "dc:authority", additionalMetadataVO.getScheme());
                }
                node.appendChild(createElementNS);
            } else if (additionalMetadataVO.getName() != null && additionalMetadataVO.getName().equalsIgnoreCase("category")) {
                Element createElementNS2 = node.getOwnerDocument().createElementNS(dc_ns, "dc:category");
                createElementNS2.appendChild(node.getOwnerDocument().createTextNode(additionalMetadataVO.getValue()));
                if (additionalMetadataVO.getScheme() != null) {
                    createElementNS2.setAttributeNS(dc_ns, "dc:authority", additionalMetadataVO.getScheme());
                }
                node.appendChild(createElementNS2);
            } else if (additionalMetadataVO.getName() != null && additionalMetadataVO.getName().equalsIgnoreCase("dc.publisher")) {
                Element createElementNS3 = node.getOwnerDocument().createElementNS(dc_ns, "dc:publisher");
                createElementNS3.appendChild(node.getOwnerDocument().createTextNode(additionalMetadataVO.getValue()));
                if (additionalMetadataVO.getScheme() != null) {
                    createElementNS3.setAttributeNS(dc_ns, "dc:authority", additionalMetadataVO.getScheme());
                }
                node.appendChild(createElementNS3);
            } else if (additionalMetadataVO.getName() != null && additionalMetadataVO.getName().equalsIgnoreCase("dc.language")) {
                Element createElementNS4 = node.getOwnerDocument().createElementNS(dc_ns, "dc:language");
                createElementNS4.appendChild(node.getOwnerDocument().createTextNode(additionalMetadataVO.getValue()));
                if (additionalMetadataVO.getScheme() != null) {
                    createElementNS4.setAttributeNS(dc_ns, "dc:authority", additionalMetadataVO.getScheme());
                }
                node.appendChild(createElementNS4);
            } else if (additionalMetadataVO.getName() != null && additionalMetadataVO.getName().equalsIgnoreCase("dc.rights")) {
                Element createElementNS5 = node.getOwnerDocument().createElementNS(dc_ns, "dc:rights");
                createElementNS5.appendChild(node.getOwnerDocument().createTextNode(additionalMetadataVO.getValue()));
                if (additionalMetadataVO.getScheme() != null) {
                    createElementNS5.setAttributeNS(dc_ns, "dc:authority", additionalMetadataVO.getScheme());
                }
                node.appendChild(createElementNS5);
            }
        }
    }
}
