/*
 * Decompiled with CFR 0.152.
 */
package org.apache.solr.handler.extraction;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.util.Locale;
import org.apache.commons.io.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.extraction.ParseContextConfig;
import org.apache.solr.handler.extraction.RegexRulesPasswordProvider;
import org.apache.solr.handler.extraction.SolrContentHandler;
import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
import org.apache.solr.handler.loader.ContentStreamLoader;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.html.HtmlMapper;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.TextSerializer;
import org.apache.xml.serialize.XMLSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class ExtractingDocumentLoader
extends ContentStreamLoader {
    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    public static final String TEXT_FORMAT = "text";
    public static final String XML_FORMAT = "xml";
    private static final XPathParser PARSER = new XPathParser("xhtml", "http://www.w3.org/1999/xhtml");
    final SolrCore core;
    final SolrParams params;
    final UpdateRequestProcessor processor;
    final boolean ignoreTikaException;
    protected AutoDetectParser autoDetectParser;
    private final AddUpdateCommand templateAdd;
    protected TikaConfig config;
    protected ParseContextConfig parseContextConfig;
    protected SolrContentHandlerFactory factory;

    public ExtractingDocumentLoader(SolrQueryRequest req, UpdateRequestProcessor processor, TikaConfig config, ParseContextConfig parseContextConfig, SolrContentHandlerFactory factory) {
        this.params = req.getParams();
        this.core = req.getCore();
        this.config = config;
        this.parseContextConfig = parseContextConfig;
        this.processor = processor;
        this.templateAdd = new AddUpdateCommand(req);
        this.templateAdd.overwrite = this.params.getBool("overwrite", true);
        this.templateAdd.commitWithin = this.params.getInt("commitWithin", -1);
        this.autoDetectParser = new AutoDetectParser(config);
        this.factory = factory;
        this.ignoreTikaException = this.params.getBool("ignoreTikaException", false);
    }

    void doAdd(SolrContentHandler handler, AddUpdateCommand template) throws IOException {
        template.solrDoc = handler.newDocument();
        this.processor.processAdd(template);
    }

    void addDoc(SolrContentHandler handler) throws IOException {
        this.templateAdd.clear();
        this.doAdd(handler, this.templateAdd);
    }

    public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws Exception {
        AutoDetectParser parser = null;
        String streamType = req.getParams().get("stream.type", null);
        if (streamType != null) {
            MediaType mt = MediaType.parse((String)streamType.trim().toLowerCase(Locale.ROOT));
            parser = (Parser)new DefaultParser(this.config.getMediaTypeRegistry()).getParsers().get(mt);
        } else {
            parser = this.autoDetectParser;
        }
        if (parser != null) {
            Metadata metadata = new Metadata();
            String resourceName = req.getParams().get("resource.name", null);
            if (resourceName != null) {
                metadata.add("resourceName", resourceName);
            }
            if (stream.getContentType() != null) {
                metadata.add("Content-Type", stream.getContentType());
            }
            InputStream inputStream = null;
            try {
                SolrContentHandler handler;
                inputStream = stream.getStream();
                metadata.add("stream_name", stream.getName());
                metadata.add("stream_source_info", stream.getSourceInfo());
                metadata.add("stream_size", String.valueOf(stream.getSize()));
                metadata.add("stream_content_type", stream.getContentType());
                String charset = ContentStreamBase.getCharsetFromContentType((String)stream.getContentType());
                if (charset != null) {
                    metadata.add("Content-Encoding", charset);
                }
                String xpathExpr = this.params.get("xpath");
                boolean extractOnly = this.params.getBool("extractOnly", false);
                SolrContentHandler parsingHandler = handler = this.factory.createSolrContentHandler(metadata, this.params, req.getSchema());
                StringWriter writer = null;
                TextSerializer serializer = null;
                if (extractOnly) {
                    String extractFormat = this.params.get("extractFormat", XML_FORMAT);
                    writer = new StringWriter();
                    if (extractFormat.equals(TEXT_FORMAT)) {
                        serializer = new TextSerializer();
                        serializer.setOutputCharStream((Writer)writer);
                        serializer.setOutputFormat(new OutputFormat("Text", "UTF-8", true));
                    } else {
                        serializer = new XMLSerializer((Writer)writer, new OutputFormat("XML", "UTF-8", true));
                    }
                    if (xpathExpr != null) {
                        Matcher matcher = PARSER.parse(xpathExpr);
                        serializer.startDocument();
                        parsingHandler = new MatchingContentHandler((ContentHandler)serializer, matcher);
                    } else {
                        parsingHandler = serializer;
                    }
                } else if (xpathExpr != null) {
                    Matcher matcher = PARSER.parse(xpathExpr);
                    parsingHandler = new MatchingContentHandler((ContentHandler)handler, matcher);
                }
                try {
                    InputStream is;
                    ParseContext context = this.parseContextConfig.create();
                    context.set(Parser.class, (Object)parser);
                    context.set(HtmlMapper.class, (Object)MostlyPassthroughHtmlMapper.INSTANCE);
                    RegexRulesPasswordProvider epp = new RegexRulesPasswordProvider();
                    String pwMapFile = this.params.get("passwordsFile");
                    if (pwMapFile != null && pwMapFile.length() > 0 && (is = req.getCore().getResourceLoader().openResource(pwMapFile)) != null) {
                        log.debug("Password file supplied: {}", (Object)pwMapFile);
                        epp.parse(is);
                    }
                    context.set(PasswordProvider.class, (Object)epp);
                    String resourcePassword = this.params.get("resource.password");
                    if (resourcePassword != null) {
                        epp.setExplicitPassword(resourcePassword);
                        log.debug("Literal password supplied for file {}", (Object)resourceName);
                    }
                    parser.parse(inputStream, (ContentHandler)parsingHandler, metadata, context);
                }
                catch (TikaException e) {
                    if (this.ignoreTikaException) {
                        log.warn("skip extracting text due to " + e.getLocalizedMessage() + ". metadata=" + metadata.toString());
                    }
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, (Throwable)e);
                }
                if (!extractOnly) {
                    this.addDoc(handler);
                }
                if (xpathExpr != null) {
                    serializer.endDocument();
                }
                rsp.add(stream.getName(), (Object)writer.toString());
                writer.close();
                String[] names = metadata.names();
                NamedList metadataNL = new NamedList();
                for (int i = 0; i < names.length; ++i) {
                    String[] vals = metadata.getValues(names[i]);
                    metadataNL.add(names[i], (Object)vals);
                }
                rsp.add(stream.getName() + "_metadata", (Object)metadataNL);
            }
            catch (SAXException e) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, (Throwable)e);
            }
            finally {
                IOUtils.closeQuietly((InputStream)inputStream);
            }
        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stream type of " + streamType + " didn't match any known parsers.  Please supply the stream.type parameter.");
        }
    }

    public static class MostlyPassthroughHtmlMapper
    implements HtmlMapper {
        public static final HtmlMapper INSTANCE = new MostlyPassthroughHtmlMapper();

        public boolean isDiscardElement(String name) {
            return false;
        }

        public String mapSafeAttribute(String elementName, String attributeName) {
            return attributeName.toLowerCase(Locale.ENGLISH);
        }

        public String mapSafeElement(String name) {
            String lowerName = name.toLowerCase(Locale.ROOT);
            return lowerName.equals("br") || lowerName.equals("body") ? null : lowerName;
        }
    }
}

