package de.l3s.boilerpipe.filters.simple;

import de.l3s.boilerpipe.BoilerpipeFilter;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes3.dex */
public final class MinClauseWordsFilter implements BoilerpipeFilter {
    public static final MinClauseWordsFilter INSTANCE = new MinClauseWordsFilter(5, false);
    private final Pattern PAT_CLAUSE_DELIMITER;
    private final Pattern PAT_WHITESPACE;
    private final boolean acceptClausesWithoutDelimiter;
    private int minWords;

    public MinClauseWordsFilter(int i) {
        this(i, false);
    }

    public MinClauseWordsFilter(int i, boolean z) {
        this.PAT_CLAUSE_DELIMITER = Pattern.compile("[\\p{L}\\d][\\,\\.\\:\\;\\!\\?]+([ \\n\\r]+|$)");
        this.PAT_WHITESPACE = Pattern.compile("[ \\n\\r]+");
        this.minWords = i;
        this.acceptClausesWithoutDelimiter = z;
    }

    private boolean isClause(CharSequence charSequence) {
        Matcher matcher = this.PAT_WHITESPACE.matcher(charSequence);
        int i = 1;
        while (matcher.find()) {
            i++;
            if (i >= this.minWords) {
                return true;
            }
        }
        return i >= this.minWords;
    }

    @Override // de.l3s.boilerpipe.BoilerpipeFilter
    public boolean process(TextDocument textDocument) throws BoilerpipeProcessingException {
        boolean z = false;
        for (TextBlock textBlock : textDocument.getTextBlocks()) {
            if (textBlock.isContent()) {
                String text = textBlock.getText();
                Matcher matcher = this.PAT_CLAUSE_DELIMITER.matcher(text);
                int i = 0;
                boolean z2 = false;
                for (boolean find = matcher.find(); find; find = matcher.find()) {
                    z2 = isClause(text.subSequence(i, matcher.start() + 1));
                    i = matcher.end();
                    if (z2) {
                        break;
                    }
                }
                int length = text.length();
                if (this.acceptClausesWithoutDelimiter) {
                    z2 |= isClause(text.subSequence(i, length));
                }
                if (!z2) {
                    textBlock.setIsContent(false);
                    z = true;
                }
            }
        }
        return z;
    }
}
