package de.l3s.boilerpipe.sax;

import de.l3s.boilerpipe.BoilerpipeExtractor;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import java.io.IOException;
import java.io.StringReader;
import java.net.URL;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import mf.org.apache.xerces.parsers.AbstractSAXParser;
import org.cyberneko.html.HTMLConfiguration;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

/* loaded from: classes3.dex */
public final class HTMLHighlighter {
    private static final Pattern PAT_TAG_NO_TEXT = Pattern.compile("<[^/][^>]*></[^>]*>");
    private static final Pattern PAT_SUPER_TAG = Pattern.compile("^<[^>]*>(<.*?>)</[^>]*>$");
    private static final TagAction TA_IGNORABLE_ELEMENT = new TagAction() { // from class: de.l3s.boilerpipe.sax.HTMLHighlighter.1
        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void afterEnd(Implementation implementation, String str) {
            Implementation.access$110(implementation);
        }

        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void beforeStart(Implementation implementation, String str) {
            Implementation.access$108(implementation);
        }
    };
    private static final TagAction TA_IGNORABLE_TAG_BUT_INCLUDE_CONTENT = new TagAction() { // from class: de.l3s.boilerpipe.sax.HTMLHighlighter.2
        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void afterEnd(Implementation implementation, String str) {
            Implementation.access$210(implementation);
        }

        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void afterStart(Implementation implementation, String str) {
            Implementation.access$210(implementation);
        }

        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void beforeEnd(Implementation implementation, String str) {
            Implementation.access$208(implementation);
        }

        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void beforeStart(Implementation implementation, String str) {
            Implementation.access$208(implementation);
        }
    };
    private static final TagAction TA_HEAD = new TagAction() { // from class: de.l3s.boilerpipe.sax.HTMLHighlighter.3
        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void afterEnd(Implementation implementation, String str) {
            Implementation.access$110(implementation);
        }

        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void beforeEnd(Implementation implementation, String str) {
            implementation.html.append(implementation.hl.extraStyleSheet);
        }

        @Override // de.l3s.boilerpipe.sax.HTMLHighlighter.TagAction
        void beforeStart(Implementation implementation, String str) {
            Implementation.access$108(implementation);
        }
    };
    private boolean outputHighlightOnly = false;
    private boolean includeImages = false;
    private boolean bodyOnly = false;
    private String extraStyleSheet = "\n<style type=\"text/css\">\n.x-boilerpipe-mark1 { text-decoration:none; background-color: #ffff42 !important; color: black !important; display:inline !important; visibility:visible !important; }\n</style>\n";
    private String preHighlight = "<span class=\"x-boilerpipe-mark1\">";
    private String postHighlight = "</span>";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes3.dex */
    public final class Implementation extends AbstractSAXParser implements ContentHandler {
        private Map<String, TagAction> TAG_ACTIONS;
        private int characterElementIdx;
        private final BitSet contentBitSet;
        private final HTMLHighlighter hl;
        StringBuilder html;
        private int ignoreTagButIncludeContent;
        private int inIgnorableElement;

        Implementation() {
            super(new HTMLConfiguration());
            this.html = new StringBuilder();
            this.inIgnorableElement = 0;
            this.ignoreTagButIncludeContent = 0;
            this.characterElementIdx = 0;
            this.contentBitSet = new BitSet();
            this.hl = HTMLHighlighter.this;
            this.TAG_ACTIONS = new HashMap();
            setupTagActions();
            setContentHandler(this);
        }

        static /* synthetic */ int access$108(Implementation implementation) {
            int i = implementation.inIgnorableElement;
            implementation.inIgnorableElement = i + 1;
            return i;
        }

        static /* synthetic */ int access$110(Implementation implementation) {
            int i = implementation.inIgnorableElement;
            implementation.inIgnorableElement = i - 1;
            return i;
        }

        static /* synthetic */ int access$208(Implementation implementation) {
            int i = implementation.ignoreTagButIncludeContent;
            implementation.ignoreTagButIncludeContent = i + 1;
            return i;
        }

        static /* synthetic */ int access$210(Implementation implementation) {
            int i = implementation.ignoreTagButIncludeContent;
            implementation.ignoreTagButIncludeContent = i - 1;
            return i;
        }

        private void setupTagActions() {
            this.TAG_ACTIONS.put("STYLE", HTMLHighlighter.TA_IGNORABLE_ELEMENT);
            this.TAG_ACTIONS.put("SCRIPT", HTMLHighlighter.TA_IGNORABLE_ELEMENT);
            this.TAG_ACTIONS.put("OPTION", HTMLHighlighter.TA_IGNORABLE_ELEMENT);
            this.TAG_ACTIONS.put("NOSCRIPT", HTMLHighlighter.TA_IGNORABLE_ELEMENT);
            this.TAG_ACTIONS.put("OBJECT", HTMLHighlighter.TA_IGNORABLE_ELEMENT);
            this.TAG_ACTIONS.put("EMBED", HTMLHighlighter.TA_IGNORABLE_ELEMENT);
            this.TAG_ACTIONS.put("APPLET", HTMLHighlighter.TA_IGNORABLE_ELEMENT);
            this.TAG_ACTIONS.put("LINK", HTMLHighlighter.TA_IGNORABLE_ELEMENT);
            this.TAG_ACTIONS.put("HEAD", HTMLHighlighter.TA_HEAD);
            if (HTMLHighlighter.this.isBodyOnly()) {
                this.TAG_ACTIONS.put("HTML", HTMLHighlighter.TA_IGNORABLE_TAG_BUT_INCLUDE_CONTENT);
                this.TAG_ACTIONS.put("BODY", HTMLHighlighter.TA_IGNORABLE_TAG_BUT_INCLUDE_CONTENT);
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            this.characterElementIdx++;
            if (this.inIgnorableElement == 0 || this.ignoreTagButIncludeContent == 1) {
                boolean z = this.contentBitSet.get(this.characterElementIdx);
                if (z || !HTMLHighlighter.this.outputHighlightOnly) {
                    if (z) {
                        this.html.append(HTMLHighlighter.this.preHighlight);
                    }
                    this.html.append(HTMLHighlighter.xmlEncode(String.valueOf(cArr, i, i2)));
                    if (z) {
                        this.html.append(HTMLHighlighter.this.postHighlight);
                    }
                }
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            TagAction tagAction = this.TAG_ACTIONS.get(str2);
            if (tagAction != null) {
                tagAction.beforeEnd(this, str2);
            }
            try {
                if (this.inIgnorableElement == 0) {
                    boolean unused = HTMLHighlighter.this.outputHighlightOnly;
                    this.html.append("</");
                    this.html.append(str3);
                    this.html.append('>');
                }
            } finally {
                if (tagAction != null) {
                    tagAction.afterEnd(this, str2);
                }
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void endPrefixMapping(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
        }

        void process(TextDocument textDocument, InputSource inputSource) throws BoilerpipeProcessingException {
            BitSet containedTextElements;
            for (TextBlock textBlock : textDocument.getTextBlocks()) {
                if (textBlock.isContent() && (containedTextElements = textBlock.getContainedTextElements()) != null) {
                    this.contentBitSet.or(containedTextElements);
                }
            }
            try {
                parse(inputSource);
            } catch (IOException e) {
                throw new BoilerpipeProcessingException(e);
            } catch (SAXException e2) {
                throw new BoilerpipeProcessingException(e2);
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void processingInstruction(String str, String str2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void setDocumentLocator(Locator locator) {
        }

        @Override // org.xml.sax.ContentHandler
        public void skippedEntity(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            TagAction tagAction = this.TAG_ACTIONS.get(str2);
            if (tagAction != null) {
                tagAction.beforeStart(this, str2);
            }
            boolean z = "SPAN".equalsIgnoreCase(str2) && "x-boilerpipe-mark1".equals(attributes.getValue("class"));
            try {
                if (this.inIgnorableElement == 0 && this.ignoreTagButIncludeContent == 0) {
                    boolean unused = HTMLHighlighter.this.outputHighlightOnly;
                    this.html.append('<');
                    this.html.append(str3);
                    if (!z) {
                        int length = attributes.getLength();
                        for (int i = 0; i < length; i++) {
                            String qName = attributes.getQName(i);
                            String value = attributes.getValue(i);
                            this.html.append(' ');
                            this.html.append(qName);
                            this.html.append("=\"");
                            this.html.append(HTMLHighlighter.xmlEncode(value));
                            this.html.append("\"");
                        }
                    }
                    this.html.append('>');
                }
            } finally {
                if (tagAction != null) {
                    tagAction.afterStart(this, str2);
                }
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void startPrefixMapping(String str, String str2) throws SAXException {
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes3.dex */
    public static abstract class TagAction {
        private TagAction() {
        }

        void afterEnd(Implementation implementation, String str) {
        }

        void afterStart(Implementation implementation, String str) {
        }

        void beforeEnd(Implementation implementation, String str) {
        }

        void beforeStart(Implementation implementation, String str) {
        }
    }

    private HTMLHighlighter(boolean z, boolean z2, boolean z3) {
        if (z) {
            setOutputHighlightOnly(true);
            setIncludeImages(z2);
            setBodyOnly(z3);
            if (isBodyOnly()) {
                setExtraStyleSheet("");
            } else {
                setExtraStyleSheet("\n<style type=\"text/css\">\nA:before { content:' '; } \nA:after { content:' '; } \nSPAN:before { content:' '; } \nSPAN:after { content:' '; } \n</style>\n");
            }
            setPreHighlight("");
            setPostHighlight("");
        }
    }

    public static HTMLHighlighter newExtractingInstance() {
        return new HTMLHighlighter(true, false, false);
    }

    public static HTMLHighlighter newExtractingInstance(boolean z, boolean z2) {
        return new HTMLHighlighter(true, z, z2);
    }

    public static HTMLHighlighter newHighlightingInstance() {
        return new HTMLHighlighter(false, false, false);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String xmlEncode(String str) {
        if (str == null) {
            return "";
        }
        StringBuilder sb = new StringBuilder(str.length());
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (charAt == '\"') {
                sb.append("&quot;");
            } else if (charAt == '&') {
                sb.append("&amp;");
            } else if (charAt == '<') {
                sb.append("&lt;");
            } else if (charAt != '>') {
                sb.append(charAt);
            } else {
                sb.append("&gt;");
            }
        }
        return sb.toString();
    }

    public String getExtraStyleSheet() {
        return this.extraStyleSheet;
    }

    public String getPostHighlight() {
        return this.postHighlight;
    }

    public String getPreHighlight() {
        return this.preHighlight;
    }

    public boolean isBodyOnly() {
        return this.bodyOnly;
    }

    public boolean isIncludeImages() {
        return this.includeImages;
    }

    public boolean isOutputHighlightOnly() {
        return this.outputHighlightOnly;
    }

    public String process(TextDocument textDocument, String str) throws BoilerpipeProcessingException {
        return process(textDocument, new InputSource(new StringReader(str)));
    }

    public String process(TextDocument textDocument, InputSource inputSource) throws BoilerpipeProcessingException {
        Matcher matcher;
        Implementation implementation = new Implementation();
        implementation.process(textDocument, inputSource);
        String sb = implementation.html.toString();
        if (this.outputHighlightOnly) {
            loop0: while (true) {
                boolean z = true;
                while (z) {
                    z = false;
                    Matcher matcher2 = PAT_TAG_NO_TEXT.matcher(sb);
                    if (matcher2.find()) {
                        sb = matcher2.replaceAll("");
                        z = true;
                    }
                    matcher = PAT_SUPER_TAG.matcher(sb);
                    if (matcher.find()) {
                        break;
                    }
                }
                sb = matcher.replaceAll(matcher.group(1));
            }
        }
        return sb;
    }

    public String process(URL url, BoilerpipeExtractor boilerpipeExtractor) throws IOException, BoilerpipeProcessingException, SAXException {
        HTMLDocument fetch = HTMLFetcher.fetch(url);
        fetch.encodeEscapedCharsAsText();
        if (this.includeImages) {
            fetch.encodeImageTagsAsText();
        }
        TextDocument textDocument = new BoilerpipeSAXInput(fetch.toInputSource()).getTextDocument();
        boilerpipeExtractor.process(textDocument);
        String restoreTextEncodedEscapedChars = HTMLDocument.restoreTextEncodedEscapedChars(process(textDocument, fetch.toInputSource()), fetch.getCharset().name());
        return this.includeImages ? HTMLDocument.restoreTextEncodedImageTags(restoreTextEncodedEscapedChars, fetch.getCharset().name()) : restoreTextEncodedEscapedChars;
    }

    public void setBodyOnly(boolean z) {
        this.bodyOnly = z;
    }

    public void setExtraStyleSheet(String str) {
        this.extraStyleSheet = str;
    }

    public void setIncludeImages(boolean z) {
        this.includeImages = z;
    }

    public void setOutputHighlightOnly(boolean z) {
        this.outputHighlightOnly = z;
    }

    public void setPostHighlight(String str) {
        this.postHighlight = str;
    }

    public void setPreHighlight(String str) {
        this.preHighlight = str;
    }
}
