package net.dankito.readability4j.processor;

import androidx.compose.foundation.pager.PagerKt$pagerSemantics$1;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import kotlin.jvm.functions.Function1;
import kotlin.text.StringsKt__StringsKt;
import net.dankito.readability4j.model.ArticleGrabberOptions;
import net.dankito.readability4j.model.ReadabilityObject;
import net.dankito.readability4j.model.ReadabilityOptions;
import net.dankito.readability4j.util.RegExUtil;
import okio.Utf8;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: classes.dex */
public abstract class ArticleGrabber extends ProcessorBase {
    public String articleByline;
    public String articleDir;
    public final int nbTopCandidates;
    public final HashMap readabilityDataTable;
    public final HashMap readabilityObjects;
    public final RegExUtil regEx;
    public final int wordThreshold;
    public static final List DEFAULT_TAGS_TO_SCORE = Arrays.asList("section", "h2", "h3", "h4", "h5", "h6", "p", "td", "pre");
    public static final List DIV_TO_P_ELEMS = Arrays.asList("a", "blockquote", "dl", "div", "img", "ol", "p", "pre", "table", "ul", "select");
    public static final List ALTER_TO_DIV_EXCEPTIONS = Arrays.asList("div", "article", "section", "p");
    public static final List PRESENTATIONAL_ATTRIBUTES = Arrays.asList("align", "background", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "hspace", "rules", "style", "valign", "vspace");
    public static final List DEPRECATED_SIZE_ATTRIBUTE_ELEMS = Arrays.asList("table", "th", "td", "hr", "pre");
    public static final List EMBEDDED_NODES = Arrays.asList("object", "embed", "iframe");
    public static final List DATA_TABLE_DESCENDANTS = Arrays.asList("col", "colgroup", "tfoot", "thead", "th");
    public static final Logger log = LoggerFactory.getLogger(ArticleGrabber.class);

    public ArticleGrabber(ReadabilityOptions readabilityOptions, RegExUtil regExUtil) {
        Utf8.checkParameterIsNotNull("options", readabilityOptions);
        Utf8.checkParameterIsNotNull("regEx", regExUtil);
        this.regEx = regExUtil;
        this.nbTopCandidates = readabilityOptions.nbTopCandidates;
        this.wordThreshold = readabilityOptions.wordThreshold;
        this.readabilityObjects = new HashMap();
        this.readabilityDataTable = new HashMap();
    }

    public static void cleanStyles(Element element) {
        if (Utf8.areEqual(element.tag.tagName, "svg")) {
            return;
        }
        if (element.className() != "readability-styled") {
            List list = PRESENTATIONAL_ATTRIBUTES;
            Utf8.checkExpressionValueIsNotNull("PRESENTATIONAL_ATTRIBUTES", list);
            Iterator it = list.iterator();
            while (it.hasNext()) {
                element.removeAttr((String) it.next());
            }
            if (DEPRECATED_SIZE_ATTRIBUTE_ELEMS.contains(element.tag.tagName)) {
                element.removeAttr("width");
                element.removeAttr("height");
            }
        }
        Iterator it2 = element.children().iterator();
        while (it2.hasNext()) {
            Element element2 = (Element) it2.next();
            Utf8.checkExpressionValueIsNotNull("child", element2);
            cleanStyles(element2);
        }
    }

    public static Element getNextNode(Element element, boolean z) {
        if (!z && element.children().size() > 0) {
            return element.child();
        }
        Element nextElementSibling = element.nextElementSibling();
        if (nextElementSibling != null) {
            return nextElementSibling;
        }
        do {
            element = (Element) element.parentNode;
            if (element == null) {
                break;
            }
        } while (element.nextElementSibling() == null);
        if (element != null) {
            return element.nextElementSibling();
        }
        return null;
    }

    public static ArrayList getNodeAncestors(Element element, int i) {
        ArrayList arrayList = new ArrayList();
        int i2 = 0;
        while (true) {
            Element element2 = (Element) element.parentNode;
            if (element2 == null) {
                break;
            }
            arrayList.add(element2);
            i2++;
            if (i2 == i) {
                break;
            }
            element = (Element) element.parentNode;
            Utf8.checkExpressionValueIsNotNull("next.parent()", element);
        }
        return arrayList;
    }

    /* JADX WARN: Code restructure failed: missing block: B:101:0x008a, code lost:
    
        if (r12.byline.matcher(r5).find() != false) goto L678;
     */
    /* JADX WARN: Code restructure failed: missing block: B:183:0x043d, code lost:
    
        r4.add(r12, r6);
     */
    /* JADX WARN: Code restructure failed: missing block: B:184:0x0444, code lost:
    
        if (r4.size() <= r8) goto L836;
     */
    /* JADX WARN: Code restructure failed: missing block: B:185:0x0446, code lost:
    
        r4.remove(r8);
     */
    /* JADX WARN: Code restructure failed: missing block: B:355:0x08a9, code lost:
    
        if ((r6.intValue() * r1.intValue()) > 10) goto L1054;
     */
    /* JADX WARN: Code restructure failed: missing block: B:481:0x0cf5, code lost:
    
        return null;
     */
    /* JADX WARN: Code restructure failed: missing block: B:600:0x072f, code lost:
    
        if (r5.matcher(r13).find() != false) goto L988;
     */
    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Removed duplicated region for block: B:10:0x00cb  */
    /* JADX WARN: Removed duplicated region for block: B:14:0x00cf  */
    /* JADX WARN: Removed duplicated region for block: B:330:0x085c  */
    /* JADX WARN: Removed duplicated region for block: B:451:0x0d5e A[LOOP:0: B:2:0x0021->B:451:0x0d5e, LOOP_END] */
    /* JADX WARN: Removed duplicated region for block: B:452:0x0cff A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:558:0x073a  */
    /* JADX WARN: Removed duplicated region for block: B:564:0x0756 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:589:0x06f2  */
    /* JADX WARN: Removed duplicated region for block: B:591:0x06f9  */
    /* JADX WARN: Removed duplicated region for block: B:602:0x0733  */
    /* JADX WARN: Removed duplicated region for block: B:67:0x0226  */
    /* JADX WARN: Removed duplicated region for block: B:68:0x0233  */
    /* JADX WARN: Type inference failed for: r4v48, types: [org.jsoup.nodes.Element] */
    /* JADX WARN: Type inference failed for: r4v65, types: [org.jsoup.nodes.Element] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static org.jsoup.nodes.Element grabArticle$default(net.dankito.readability4j.processor.ArticleGrabber r41, org.jsoup.nodes.Document r42, androidx.compose.ui.graphics.vector.PathParser r43) {
        /*
            Method dump skipped, instructions count: 3496
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: net.dankito.readability4j.processor.ArticleGrabber.grabArticle$default(net.dankito.readability4j.processor.ArticleGrabber, org.jsoup.nodes.Document, androidx.compose.ui.graphics.vector.PathParser):org.jsoup.nodes.Element");
    }

    public static boolean hasAncestorTag(Element element, String str, int i, Function1 function1) {
        Utf8.checkParameterIsNotNull("node", element);
        String lowerCase = str.toLowerCase();
        Utf8.checkExpressionValueIsNotNull("(this as java.lang.String).toLowerCase()", lowerCase);
        int i2 = 0;
        while (true) {
            Element element2 = (Element) element.parentNode;
            if (element2 == null) {
                return false;
            }
            if (i > 0 && i2 > i) {
                return false;
            }
            if (Utf8.areEqual(element2.tag.tagName, lowerCase)) {
                if (function1 == null) {
                    return true;
                }
                Element element3 = (Element) element.parentNode;
                Utf8.checkExpressionValueIsNotNull("parent.parent()", element3);
                if (((Boolean) function1.invoke(element3)).booleanValue()) {
                    return true;
                }
            }
            element = (Element) element.parentNode;
            Utf8.checkExpressionValueIsNotNull("parent.parent()", element);
            i2++;
        }
    }

    public static boolean hasChildBlockElement(Element element) {
        Iterator it = element.children().iterator();
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            if (DIV_TO_P_ELEMS.contains(element2.tag.tagName) || hasChildBlockElement(element2)) {
                return true;
            }
        }
        return false;
    }

    public final void clean(String str, Element element) {
        ProcessorBase.removeNodes(element, str, new ArticleGrabber$clean$1(0, this, EMBEDDED_NODES.contains(str)));
    }

    public final void cleanConditionally(Element element, String str, ArticleGrabberOptions articleGrabberOptions) {
        Utf8.checkParameterIsNotNull("options", articleGrabberOptions);
        if (articleGrabberOptions.cleanConditionally) {
            ProcessorBase.removeNodes(element, str, new PagerKt$pagerSemantics$1(this, Utf8.areEqual(str, "ul") || Utf8.areEqual(str, "ol"), articleGrabberOptions, 4));
        }
    }

    public final int getClassWeight(Element element, ArticleGrabberOptions articleGrabberOptions) {
        Utf8.checkParameterIsNotNull("e", element);
        Utf8.checkParameterIsNotNull("options", articleGrabberOptions);
        if (!articleGrabberOptions.weightClasses) {
            return 0;
        }
        String className = element.className();
        Utf8.checkExpressionValueIsNotNull("e.className()", className);
        boolean z = !StringsKt__StringsKt.isBlank(className);
        RegExUtil regExUtil = this.regEx;
        if (z) {
            String className2 = element.className();
            Utf8.checkExpressionValueIsNotNull("e.className()", className2);
            r0 = regExUtil.negative.matcher(className2).find() ? -25 : 0;
            String className3 = element.className();
            Utf8.checkExpressionValueIsNotNull("e.className()", className3);
            if (regExUtil.positive.matcher(className3).find()) {
                r0 += 25;
            }
        }
        if (!(!StringsKt__StringsKt.isBlank(element.id()))) {
            return r0;
        }
        if (regExUtil.negative.matcher(element.id()).find()) {
            r0 -= 25;
        }
        return regExUtil.positive.matcher(element.id()).find() ? r0 + 25 : r0;
    }

    public final double getLinkDensity(Element element) {
        Utf8.checkParameterIsNotNull("element", element);
        RegExUtil regExUtil = this.regEx;
        int length = ProcessorBase.getInnerText$default(this, element, regExUtil, 4).length();
        if (length == 0) {
            return 0.0d;
        }
        Iterator it = element.getElementsByTag("a").iterator();
        int i = 0;
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            Utf8.checkExpressionValueIsNotNull("linkNode", element2);
            i += ProcessorBase.getInnerText$default(this, element2, regExUtil, 4).length();
        }
        return i / length;
    }

    public final ReadabilityObject getReadabilityObject(Element element) {
        Utf8.checkParameterIsNotNull("element", element);
        return (ReadabilityObject) this.readabilityObjects.get(element);
    }

    /* JADX WARN: Code restructure failed: missing block: B:12:0x00b6, code lost:
    
        if (r1.equals("h5") != false) goto L137;
     */
    /* JADX WARN: Code restructure failed: missing block: B:14:0x00bf, code lost:
    
        if (r1.equals("h4") != false) goto L137;
     */
    /* JADX WARN: Code restructure failed: missing block: B:16:0x00c8, code lost:
    
        if (r1.equals("h3") != false) goto L137;
     */
    /* JADX WARN: Code restructure failed: missing block: B:18:0x00d1, code lost:
    
        if (r1.equals("h2") != false) goto L137;
     */
    /* JADX WARN: Code restructure failed: missing block: B:20:0x00da, code lost:
    
        if (r1.equals("h1") != false) goto L137;
     */
    /* JADX WARN: Code restructure failed: missing block: B:22:0x002b, code lost:
    
        if (r1.equals("blockquote") != false) goto L100;
     */
    /* JADX WARN: Code restructure failed: missing block: B:23:0x0068, code lost:
    
        r1 = r0.contentScore;
        r3 = 3;
     */
    /* JADX WARN: Code restructure failed: missing block: B:26:0x0034, code lost:
    
        if (r1.equals("form") != false) goto L119;
     */
    /* JADX WARN: Code restructure failed: missing block: B:27:0x00a3, code lost:
    
        r1 = r0.contentScore;
        r3 = 3;
     */
    /* JADX WARN: Code restructure failed: missing block: B:29:0x003e, code lost:
    
        if (r1.equals("pre") != false) goto L100;
     */
    /* JADX WARN: Code restructure failed: missing block: B:34:0x0053, code lost:
    
        if (r1.equals("ul") != false) goto L119;
     */
    /* JADX WARN: Code restructure failed: missing block: B:36:0x005c, code lost:
    
        if (r1.equals("th") != false) goto L137;
     */
    /* JADX WARN: Code restructure failed: missing block: B:38:0x0066, code lost:
    
        if (r1.equals("td") != false) goto L100;
     */
    /* JADX WARN: Code restructure failed: missing block: B:40:0x0074, code lost:
    
        if (r1.equals("ol") != false) goto L119;
     */
    /* JADX WARN: Code restructure failed: missing block: B:42:0x007d, code lost:
    
        if (r1.equals("li") != false) goto L119;
     */
    /* JADX WARN: Code restructure failed: missing block: B:44:0x0086, code lost:
    
        if (r1.equals("dt") != false) goto L119;
     */
    /* JADX WARN: Code restructure failed: missing block: B:46:0x008f, code lost:
    
        if (r1.equals("dl") != false) goto L119;
     */
    /* JADX WARN: Code restructure failed: missing block: B:48:0x0098, code lost:
    
        if (r1.equals("dd") != false) goto L119;
     */
    /* JADX WARN: Code restructure failed: missing block: B:50:0x00a1, code lost:
    
        if (r1.equals("address") != false) goto L119;
     */
    /* JADX WARN: Code restructure failed: missing block: B:7:0x00ad, code lost:
    
        if (r1.equals("h6") != false) goto L137;
     */
    /* JADX WARN: Code restructure failed: missing block: B:8:0x00dc, code lost:
    
        r1 = r0.contentScore;
        r3 = 5;
     */
    /* JADX WARN: Code restructure failed: missing block: B:9:0x00df, code lost:
    
        r1 = r1 - r3;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final void initializeNode(org.jsoup.nodes.Element r6, net.dankito.readability4j.model.ArticleGrabberOptions r7) {
        /*
            Method dump skipped, instructions count: 308
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: net.dankito.readability4j.processor.ArticleGrabber.initializeNode(org.jsoup.nodes.Element, net.dankito.readability4j.model.ArticleGrabberOptions):void");
    }
}
