package net.dankito.readability4j.processor;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import kotlin.collections.CollectionsKt;
import kotlin.collections.CollectionsKt__IterablesKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.StringsKt;
import net.dankito.readability4j.extended.util.RegExUtilExtended;
import net.dankito.readability4j.model.ArticleGrabberOptions;
import net.dankito.readability4j.model.ReadabilityObject;
import okhttp3.internal.http2.Huffman;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: classes.dex */
public abstract class ArticleGrabber extends ProcessorBase {
    public String articleByline;
    public final int nbTopCandidates;
    public final HashMap readabilityDataTable;
    public final HashMap readabilityObjects;
    public final RegExUtilExtended regEx;
    public final int wordThreshold;
    public static final List DEFAULT_TAGS_TO_SCORE = Arrays.asList("section", "h2", "h3", "h4", "h5", "h6", "p", "td", "pre");
    public static final List DIV_TO_P_ELEMS = Arrays.asList("a", "blockquote", "dl", "div", "img", "ol", "p", "pre", "table", "ul", "select");
    public static final List ALTER_TO_DIV_EXCEPTIONS = Arrays.asList("div", "article", "section", "p");
    public static final List PRESENTATIONAL_ATTRIBUTES = Arrays.asList("align", "background", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "hspace", "rules", "style", "valign", "vspace");
    public static final List DEPRECATED_SIZE_ATTRIBUTE_ELEMS = Arrays.asList("table", "th", "td", "hr", "pre");
    public static final List EMBEDDED_NODES = Arrays.asList("object", "embed", "iframe");
    public static final List DATA_TABLE_DESCENDANTS = Arrays.asList("col", "colgroup", "tfoot", "thead", "th");
    public static final Logger log = LoggerFactory.getLogger(ArticleGrabber.class);

    public ArticleGrabber(Huffman.Node node, RegExUtilExtended regExUtilExtended) {
        Intrinsics.checkParameterIsNotNull("options", node);
        Intrinsics.checkParameterIsNotNull("regEx", regExUtilExtended);
        this.regEx = regExUtilExtended;
        this.nbTopCandidates = node.symbol;
        this.wordThreshold = node.terminalBitCount;
        this.readabilityObjects = new HashMap();
        this.readabilityDataTable = new HashMap();
    }

    public static void cleanStyles(Element element) {
        if (Intrinsics.areEqual(element.tag.tagName, "svg")) {
            return;
        }
        if (element.className() != "readability-styled") {
            List list = PRESENTATIONAL_ATTRIBUTES;
            Intrinsics.checkExpressionValueIsNotNull("PRESENTATIONAL_ATTRIBUTES", list);
            Iterator it = list.iterator();
            while (it.hasNext()) {
                element.removeAttr((String) it.next());
            }
            if (DEPRECATED_SIZE_ATTRIBUTE_ELEMS.contains(element.tag.tagName)) {
                element.removeAttr("width");
                element.removeAttr("height");
            }
        }
        Iterator it2 = element.children().iterator();
        while (it2.hasNext()) {
            Element element2 = (Element) it2.next();
            Intrinsics.checkExpressionValueIsNotNull("child", element2);
            cleanStyles(element2);
        }
    }

    public static Element getNextNode(Element element, boolean z) {
        if (!z && element.children().size() > 0) {
            return (Element) element.childElementsList().get(0);
        }
        Element nextElementSibling = element.nextElementSibling();
        if (nextElementSibling != null) {
            return nextElementSibling;
        }
        Element element2 = (Element) element.parentNode;
        while (element2 != null && element2.nextElementSibling() == null) {
            element2 = (Element) element2.parentNode;
        }
        if (element2 != null) {
            return element2.nextElementSibling();
        }
        return null;
    }

    public static ArrayList getNodeAncestors(Element element, int i) {
        Element element2 = element;
        Intrinsics.checkParameterIsNotNull("node", element2);
        ArrayList arrayList = new ArrayList();
        int i2 = 0;
        while (true) {
            Element element3 = (Element) element2.parentNode;
            if (element3 == null) {
                break;
            }
            arrayList.add(element3);
            i2++;
            if (i2 == i) {
                break;
            }
            element2 = (Element) element2.parentNode;
            Intrinsics.checkExpressionValueIsNotNull("next.parent()", element2);
        }
        return arrayList;
    }

    /* JADX WARN: Code restructure failed: missing block: B:509:?, code lost:
    
        return null;
     */
    /* JADX WARN: Code restructure failed: missing block: B:628:0x0797, code lost:
    
        if (r8.matcher(r12).find() != false) goto L981;
     */
    /* JADX WARN: Code restructure failed: missing block: B:95:0x009f, code lost:
    
        if (r10.byline.matcher(r1).find() != false) goto L688;
     */
    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Removed duplicated region for block: B:11:0x010e  */
    /* JADX WARN: Removed duplicated region for block: B:334:0x090d  */
    /* JADX WARN: Removed duplicated region for block: B:43:0x01fe  */
    /* JADX WARN: Removed duplicated region for block: B:468:0x0e91 A[LOOP:0: B:2:0x002b->B:468:0x0e91, LOOP_END] */
    /* JADX WARN: Removed duplicated region for block: B:469:0x0e16 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:46:0x020b  */
    /* JADX WARN: Removed duplicated region for block: B:587:0x07aa  */
    /* JADX WARN: Removed duplicated region for block: B:592:0x07ce  */
    /* JADX WARN: Type inference failed for: r11v0, types: [net.dankito.readability4j.model.ArticleGrabberOptions, java.lang.Object] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static org.jsoup.nodes.Element grabArticle$default(net.dankito.readability4j.extended.processor.ArticleGrabberExtended r46, org.jsoup.nodes.Document r47, org.jsoup.parser.Parser r48) {
        /*
            Method dump skipped, instructions count: 3808
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: net.dankito.readability4j.processor.ArticleGrabber.grabArticle$default(net.dankito.readability4j.extended.processor.ArticleGrabberExtended, org.jsoup.nodes.Document, org.jsoup.parser.Parser):org.jsoup.nodes.Element");
    }

    /* JADX WARN: Code restructure failed: missing block: B:21:0x0059, code lost:
    
        return true;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static boolean hasAncestorTag(org.jsoup.nodes.Element r7, java.lang.String r8, int r9, kotlin.jvm.functions.Function1 r10) {
        /*
            r4 = r7
            java.lang.String r6 = "node"
            r0 = r6
            kotlin.jvm.internal.Intrinsics.checkParameterIsNotNull(r0, r4)
            r6 = 4
            java.lang.String r6 = r8.toLowerCase()
            r8 = r6
            java.lang.String r6 = "(this as java.lang.String).toLowerCase()"
            r0 = r6
            kotlin.jvm.internal.Intrinsics.checkExpressionValueIsNotNull(r0, r8)
            r6 = 2
            r6 = 0
            r0 = r6
            r1 = r0
        L17:
            org.jsoup.nodes.Node r2 = r4.parentNode
            r6 = 1
            org.jsoup.nodes.Element r2 = (org.jsoup.nodes.Element) r2
            r6 = 7
            if (r2 == 0) goto L69
            r6 = 5
            if (r9 <= 0) goto L27
            r6 = 3
            if (r1 <= r9) goto L27
            r6 = 3
            return r0
        L27:
            r6 = 5
            org.jsoup.parser.Tag r2 = r2.tag
            r6 = 1
            java.lang.String r2 = r2.tagName
            r6 = 7
            boolean r6 = kotlin.jvm.internal.Intrinsics.areEqual(r2, r8)
            r2 = r6
            java.lang.String r6 = "parent.parent()"
            r3 = r6
            if (r2 == 0) goto L5a
            r6 = 1
            if (r10 == 0) goto L56
            r6 = 3
            org.jsoup.nodes.Node r2 = r4.parentNode
            r6 = 1
            org.jsoup.nodes.Element r2 = (org.jsoup.nodes.Element) r2
            r6 = 4
            kotlin.jvm.internal.Intrinsics.checkExpressionValueIsNotNull(r3, r2)
            r6 = 2
            java.lang.Object r6 = r10.invoke(r2)
            r2 = r6
            java.lang.Boolean r2 = (java.lang.Boolean) r2
            r6 = 5
            boolean r6 = r2.booleanValue()
            r2 = r6
            if (r2 == 0) goto L5a
            r6 = 3
        L56:
            r6 = 3
            r6 = 1
            r4 = r6
            return r4
        L5a:
            r6 = 4
            org.jsoup.nodes.Node r4 = r4.parentNode
            r6 = 5
            org.jsoup.nodes.Element r4 = (org.jsoup.nodes.Element) r4
            r6 = 3
            kotlin.jvm.internal.Intrinsics.checkExpressionValueIsNotNull(r3, r4)
            r6 = 3
            int r1 = r1 + 1
            r6 = 2
            goto L17
        L69:
            r6 = 4
            return r0
        */
        throw new UnsupportedOperationException("Method not decompiled: net.dankito.readability4j.processor.ArticleGrabber.hasAncestorTag(org.jsoup.nodes.Element, java.lang.String, int, kotlin.jvm.functions.Function1):boolean");
    }

    public static /* synthetic */ boolean hasAncestorTag$default(ArticleGrabber articleGrabber, Element element) {
        articleGrabber.getClass();
        return hasAncestorTag(element, "figure", 3, null);
    }

    public static boolean hasChildBlockElement(Element element) {
        Iterator it = element.children().iterator();
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            if (!DIV_TO_P_ELEMS.contains(element2.tag.tagName) && !hasChildBlockElement(element2)) {
            }
            return true;
        }
        return false;
    }

    public final void clean(Element element, String str) {
        final boolean contains = EMBEDDED_NODES.contains(str);
        ProcessorBase.removeNodes(element, str, new Function1() { // from class: net.dankito.readability4j.processor.ArticleGrabber$clean$1
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            @Override // kotlin.jvm.functions.Function1
            public final Object invoke(Object obj) {
                boolean z;
                Element element2 = (Element) obj;
                Intrinsics.checkParameterIsNotNull("element", element2);
                if (contains) {
                    Attributes attributes = element2.attributes();
                    Intrinsics.checkExpressionValueIsNotNull("element.attributes()", attributes);
                    ArrayList arrayList = new ArrayList(CollectionsKt__IterablesKt.collectionSizeOrDefault(attributes, 10));
                    Attributes.AnonymousClass1 anonymousClass1 = new Attributes.AnonymousClass1(attributes);
                    while (anonymousClass1.hasNext()) {
                        String str2 = ((Attribute) anonymousClass1.next()).val;
                        if (str2 == null) {
                            str2 = "";
                        }
                        arrayList.add(str2);
                    }
                    String joinToString$default = CollectionsKt.joinToString$default(arrayList, "|", null, null, null, 62);
                    ArticleGrabber articleGrabber = ArticleGrabber.this;
                    RegExUtilExtended regExUtilExtended = articleGrabber.regEx;
                    regExUtilExtended.getClass();
                    Intrinsics.checkParameterIsNotNull("matchString", joinToString$default);
                    boolean find = regExUtilExtended.videos.matcher(joinToString$default).find();
                    z = false;
                    if (!find) {
                        String html = element2.html();
                        Intrinsics.checkExpressionValueIsNotNull("element.html()", html);
                        RegExUtilExtended regExUtilExtended2 = articleGrabber.regEx;
                        regExUtilExtended2.getClass();
                        if (regExUtilExtended2.videos.matcher(html).find()) {
                        }
                    }
                    return Boolean.valueOf(z);
                }
                z = true;
                return Boolean.valueOf(z);
            }
        });
    }

    public final void cleanConditionally(Element element, String str, ArticleGrabberOptions articleGrabberOptions) {
        boolean z;
        if (articleGrabberOptions.cleanConditionally) {
            if (!str.equals("ul") && !str.equals("ol")) {
                z = false;
                ProcessorBase.removeNodes(element, str, new ArticleGrabber$cleanConditionally$1(this, articleGrabberOptions, z));
            }
            z = true;
            ProcessorBase.removeNodes(element, str, new ArticleGrabber$cleanConditionally$1(this, articleGrabberOptions, z));
        }
    }

    public final int getClassWeight(Element element, ArticleGrabberOptions articleGrabberOptions) {
        Intrinsics.checkParameterIsNotNull("e", element);
        int i = 0;
        if (!articleGrabberOptions.weightClasses) {
            return 0;
        }
        String className = element.className();
        Intrinsics.checkExpressionValueIsNotNull("e.className()", className);
        boolean isBlank = StringsKt.isBlank(className);
        RegExUtilExtended regExUtilExtended = this.regEx;
        if (!isBlank) {
            String className2 = element.className();
            Intrinsics.checkExpressionValueIsNotNull("e.className()", className2);
            if (regExUtilExtended.negative.matcher(className2).find()) {
                i = -25;
            }
            String className3 = element.className();
            Intrinsics.checkExpressionValueIsNotNull("e.className()", className3);
            if (regExUtilExtended.positive.matcher(className3).find()) {
                i += 25;
            }
        }
        if (!StringsKt.isBlank(element.id())) {
            if (regExUtilExtended.negative.matcher(element.id()).find()) {
                i -= 25;
            }
            if (regExUtilExtended.positive.matcher(element.id()).find()) {
                i += 25;
            }
        }
        return i;
    }

    public final double getLinkDensity(Element element) {
        Intrinsics.checkParameterIsNotNull("element", element);
        RegExUtilExtended regExUtilExtended = this.regEx;
        int length = ProcessorBase.getInnerText$default(this, element, regExUtilExtended, 4).length();
        if (length == 0) {
            return 0.0d;
        }
        Elements elementsByTag = element.getElementsByTag("a");
        Intrinsics.checkExpressionValueIsNotNull("element.getElementsByTag(\"a\")", elementsByTag);
        Iterator it = elementsByTag.iterator();
        int i = 0;
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            Intrinsics.checkExpressionValueIsNotNull("linkNode", element2);
            i += ProcessorBase.getInnerText$default(this, element2, regExUtilExtended, 4).length();
        }
        return i / length;
    }

    public final ReadabilityObject getReadabilityObject(Element element) {
        Intrinsics.checkParameterIsNotNull("element", element);
        return (ReadabilityObject) this.readabilityObjects.get(element);
    }

    /* JADX WARN: Type inference failed for: r0v0, types: [java.lang.Object, net.dankito.readability4j.model.ReadabilityObject] */
    /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
    public final void initializeNode(Element element, ArticleGrabberOptions articleGrabberOptions) {
        ?? obj = new Object();
        obj.contentScore = 0.0d;
        this.readabilityObjects.put(element, obj);
        String str = element.tag.tagName;
        if (str != null) {
            int hashCode = str.hashCode();
            switch (hashCode) {
                case -1147692044:
                    if (str.equals("address")) {
                        obj.contentScore -= 3;
                    }
                    break;
                case 3200:
                    if (str.equals("dd")) {
                        obj.contentScore -= 3;
                    }
                    break;
                case 3208:
                    if (str.equals("dl")) {
                        obj.contentScore -= 3;
                    }
                    break;
                case 3216:
                    if (str.equals("dt")) {
                        obj.contentScore -= 3;
                    }
                    break;
                case 3453:
                    if (str.equals("li")) {
                        obj.contentScore -= 3;
                    }
                    break;
                case 3549:
                    if (str.equals("ol")) {
                        obj.contentScore -= 3;
                    }
                    break;
                case 3696:
                    if (str.equals("td")) {
                        obj.contentScore += 3;
                        break;
                    }
                    break;
                case 3700:
                    if (str.equals("th")) {
                        obj.contentScore -= 5;
                        break;
                    }
                    break;
                case 3735:
                    if (str.equals("ul")) {
                        obj.contentScore -= 3;
                    }
                    break;
                case 99473:
                    if (str.equals("div")) {
                        obj.contentScore += 5;
                        break;
                    }
                    break;
                case 111267:
                    if (str.equals("pre")) {
                        obj.contentScore += 3;
                        break;
                    }
                    break;
                case 3148996:
                    if (str.equals("form")) {
                        obj.contentScore -= 3;
                    }
                    break;
                case 1303202319:
                    if (str.equals("blockquote")) {
                        obj.contentScore += 3;
                        break;
                    }
                    break;
                default:
                    switch (hashCode) {
                        case 3273:
                            if (str.equals("h1")) {
                                obj.contentScore -= 5;
                                break;
                            }
                            break;
                        case 3274:
                            if (str.equals("h2")) {
                                obj.contentScore -= 5;
                                break;
                            }
                            break;
                        case 3275:
                            if (str.equals("h3")) {
                                obj.contentScore -= 5;
                                break;
                            }
                            break;
                        case 3276:
                            if (str.equals("h4")) {
                                obj.contentScore -= 5;
                                break;
                            }
                            break;
                        case 3277:
                            if (str.equals("h5")) {
                                obj.contentScore -= 5;
                                break;
                            }
                            break;
                        case 3278:
                            if (str.equals("h6")) {
                                obj.contentScore -= 5;
                                break;
                            }
                            break;
                    }
            }
        }
        obj.contentScore += getClassWeight(element, articleGrabberOptions);
    }
}
