/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.html;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.config.Field;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractEncodingDetectorParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.html.DefaultHtmlMapper;
import org.apache.tika.parser.html.HtmlHandler;
import org.apache.tika.parser.html.HtmlMapper;
import org.apache.tika.parser.html.XHTMLDowngradeHandler;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.ccil.cowan.tagsoup.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class HtmlParser
extends AbstractEncodingDetectorParser {
    private static final long serialVersionUID = 7895315240498733128L;
    private static final Logger LOG = LoggerFactory.getLogger(HtmlParser.class);
    private static final MediaType XHTML = MediaType.application((String)"xhtml+xml");
    private static final MediaType WAP_XHTML = MediaType.application((String)"vnd.wap.xhtml+xml");
    private static final MediaType X_ASP = MediaType.application((String)"x-asp");
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(MediaType.text((String)"html"), XHTML, WAP_XHTML, X_ASP)));
    private static final Schema HTML_SCHEMA = new HTMLSchema();
    @Field
    private boolean extractScripts = false;

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }

    public HtmlParser() {
    }

    public HtmlParser(EncodingDetector encodingDetector) {
        super(encodingDetector);
    }

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        try (AutoDetectReader reader = new AutoDetectReader((InputStream)new CloseShieldInputStream(stream), metadata, this.getEncodingDetector(context));){
            Charset charset = reader.getCharset();
            String previous = metadata.get("Content-Type");
            MediaType contentType = null;
            if (previous == null || previous.startsWith("text/html")) {
                contentType = new MediaType(MediaType.TEXT_HTML, charset);
            } else if (previous.startsWith("application/xhtml+xml")) {
                contentType = new MediaType(XHTML, charset);
            } else if (previous.startsWith("application/vnd.wap.xhtml+xml")) {
                contentType = new MediaType(WAP_XHTML, charset);
            } else if (previous.startsWith("application/x-asp")) {
                contentType = new MediaType(X_ASP, charset);
            }
            if (contentType != null) {
                metadata.set("Content-Type", contentType.toString());
            }
            metadata.set("Content-Encoding", charset.name());
            HtmlMapper mapper = (HtmlMapper)context.get(HtmlMapper.class, (Object)new HtmlParserMapper());
            Parser parser = new Parser();
            Schema schema = (Schema)context.get(Schema.class, (Object)HTML_SCHEMA);
            parser.setProperty("http://www.ccil.org/~cowan/tagsoup/properties/schema", (Object)schema);
            parser.setFeature("http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons", true);
            parser.setContentHandler((ContentHandler)((Object)new XHTMLDowngradeHandler((ContentHandler)((Object)new HtmlHandler(mapper, handler, metadata, context, this.extractScripts)))));
            parser.parse(reader.asInputSource());
        }
    }

    protected String mapSafeElement(String name) {
        return DefaultHtmlMapper.INSTANCE.mapSafeElement(name);
    }

    protected boolean isDiscardElement(String name) {
        return DefaultHtmlMapper.INSTANCE.isDiscardElement(name);
    }

    public String mapSafeAttribute(String elementName, String attributeName) {
        return DefaultHtmlMapper.INSTANCE.mapSafeAttribute(elementName, attributeName);
    }

    @Field
    public void setExtractScripts(boolean extractScripts) {
        this.extractScripts = extractScripts;
    }

    public boolean getExtractScripts() {
        return this.extractScripts;
    }

    private class HtmlParserMapper
    implements HtmlMapper {
        private HtmlParserMapper() {
        }

        @Override
        public String mapSafeElement(String name) {
            return HtmlParser.this.mapSafeElement(name);
        }

        @Override
        public boolean isDiscardElement(String name) {
            return HtmlParser.this.isDiscardElement(name);
        }

        @Override
        public String mapSafeAttribute(String elementName, String attributeName) {
            return HtmlParser.this.mapSafeAttribute(elementName, attributeName);
        }
    }
}

