package com.atistudios.app.data.utils.language;

import com.atistudios.app.data.model.quiz.TokenModel;
import com.atistudios.app.data.model.word.WordTokenWithRangeModel;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
import kotlin.collections.n;
import lm.i;
import lm.o;
import um.f;
import um.q;

/* loaded from: classes.dex */
public final class WordPhraseTokenizer {
    public static final String COMPLETABLE_TOKEN = "_____";
    public static final Companion Companion = new Companion(null);
    private static final f anySpaceRegex;
    private static final f dashRegex;
    private static final f placeholderTokenRegex;
    private static final f punctuationRegex;
    private static final f specialCharsRegex;
    private static final Pattern splitPlaceholderTokenRegex;
    private static final List<String> unmatchedChars;
    private static final f whitespaceRegex;

    /* loaded from: classes.dex */
    public static final class Companion {
        private Companion() {
        }

        public /* synthetic */ Companion(i iVar) {
            this();
        }

        private final boolean detectPlaceholders(String str) {
            boolean L;
            boolean z10 = false;
            L = q.L(str, "_____", false, 2, null);
            if (!L) {
                if (o.b(str, "_____")) {
                }
                return z10;
            }
            z10 = true;
            return z10;
        }

        private final boolean isTextPlaceholder(String str) {
            return WordPhraseTokenizer.placeholderTokenRegex.a(str);
        }

        /* JADX WARN: Removed duplicated region for block: B:26:0x008b  */
        /* JADX WARN: Removed duplicated region for block: B:31:0x00b8 A[LOOP:1: B:14:0x0054->B:31:0x00b8, LOOP_END] */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        private final bm.o<java.util.List<com.atistudios.app.data.model.word.WordWithRangeModel>, java.lang.Integer> splitTokenWithPlaceholder(java.lang.String r13, int r14, java.util.Locale r15) {
            /*
                Method dump skipped, instructions count: 203
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: com.atistudios.app.data.utils.language.WordPhraseTokenizer.Companion.splitTokenWithPlaceholder(java.lang.String, int, java.util.Locale):bm.o");
        }

        public final List<TokenModel> mapWordTokenWithRangeModelListToSeparatePunctuationTokenModelList(List<WordTokenWithRangeModel> list) {
            o.g(list, "wordTokenWithRangeModelList");
            ArrayList arrayList = new ArrayList();
            f fVar = new f("\\s");
            int i10 = 0;
            while (true) {
                for (WordTokenWithRangeModel wordTokenWithRangeModel : list) {
                    String b10 = fVar.b(wordTokenWithRangeModel.getPreviousTokenLinker().getText(), "");
                    String b11 = fVar.b(wordTokenWithRangeModel.getRawPrefix().getText(), "");
                    String b12 = fVar.b(wordTokenWithRangeModel.getRaw().getText(), "");
                    String b13 = fVar.b(wordTokenWithRangeModel.getRawSuffix().getText(), "");
                    boolean z10 = true;
                    if (b10.length() > 0) {
                        arrayList.add(new TokenModel("txt" + i10, b10));
                        i10++;
                    }
                    if (b11.length() > 0) {
                        arrayList.add(new TokenModel("txt" + i10, b11));
                        i10++;
                    }
                    if (b12.length() > 0) {
                        arrayList.add(new TokenModel("txt" + i10, b12));
                        i10++;
                    }
                    if (b13.length() <= 0) {
                        z10 = false;
                    }
                    if (z10) {
                        arrayList.add(new TokenModel("txt" + i10, b13));
                        i10++;
                    }
                }
                return arrayList;
            }
        }

        public final ArrayList<String> tokenizeTextResourceInChars(String str, Locale locale) {
            List k10;
            o.g(str, "inputTextResource");
            o.g(locale, "languageLocale");
            k10 = n.k("\u202c", "\u202b", "\ud83d", "�", "‼", "【", "】", "《", "᙭", "\u200c", "\u202a", "⁉", "⃣", "》", "「", "〰", "ٟ", "༺", "༻", "\uf610", "￼");
            ArrayList<String> arrayList = new ArrayList<>();
            BreakIterator characterInstance = BreakIterator.getCharacterInstance(locale);
            characterInstance.setText(str);
            int first = characterInstance.first();
            while (first != -1) {
                int next = characterInstance.next();
                if (next != -1) {
                    String substring = str.substring(first, next);
                    o.f(substring, "this as java.lang.String…ing(startIndex, endIndex)");
                    if (!k10.contains(substring)) {
                        arrayList.add(substring);
                    }
                }
                first = next;
            }
            return arrayList;
        }

        /* JADX WARN: Removed duplicated region for block: B:100:0x0121  */
        /* JADX WARN: Removed duplicated region for block: B:39:0x011f  */
        /* JADX WARN: Removed duplicated region for block: B:41:0x0124  */
        /* JADX WARN: Removed duplicated region for block: B:81:0x01d2  */
        /* JADX WARN: Removed duplicated region for block: B:83:0x01d7  */
        /* JADX WARN: Removed duplicated region for block: B:94:0x0203  */
        /* JADX WARN: Removed duplicated region for block: B:96:0x0208  */
        /* JADX WARN: Removed duplicated region for block: B:98:0x0205  */
        /* JADX WARN: Removed duplicated region for block: B:99:0x01d4  */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public final java.util.List<com.atistudios.app.data.model.word.WordTokenWithRangeModel> tokenizeTextResourceInWordsByLanguage(java.lang.String r24, java.util.Locale r25) {
            /*
                Method dump skipped, instructions count: 890
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: com.atistudios.app.data.utils.language.WordPhraseTokenizer.Companion.tokenizeTextResourceInWordsByLanguage(java.lang.String, java.util.Locale):java.util.List");
        }
    }

    static {
        List<String> k10;
        k10 = n.k("`", "´", "΄", "°", "～");
        unmatchedChars = k10;
        Pattern compile = Pattern.compile("(?u)^[\\p{P}\\p{Z}\\p{M}\\p{C}\\s]+$", 64);
        o.f(compile, "compile(\"(?u)^[\\\\p{P}\\\\p…$\", Pattern.UNICODE_CASE)");
        specialCharsRegex = new f(compile);
        Pattern compile2 = Pattern.compile("(?u)^[\\p{P}]+$", 64);
        o.f(compile2, "compile(\"(?u)^[\\\\p{P}]+\\$\", Pattern.UNICODE_CASE)");
        punctuationRegex = new f(compile2);
        Pattern compile3 = Pattern.compile("(?u)^[\\s]+$", 64);
        o.f(compile3, "compile(\"(?u)^[\\\\s]+\\$\", Pattern.UNICODE_CASE)");
        whitespaceRegex = new f(compile3);
        Pattern compile4 = Pattern.compile("(?u)^_{2,}+$", 64);
        o.f(compile4, "compile(\"(?u)^_{2,}+\\$\", Pattern.UNICODE_CASE)");
        placeholderTokenRegex = new f(compile4);
        Pattern compile5 = Pattern.compile("(?u)^[\\p{Pd}]+$", 64);
        o.f(compile5, "compile(\"(?u)^[\\\\p{Pd}]+\\$\", Pattern.UNICODE_CASE)");
        dashRegex = new f(compile5);
        Pattern compile6 = Pattern.compile("(?u)[\\s]+$", 64);
        o.f(compile6, "compile(\"(?u)[\\\\s]+\\$\", Pattern.UNICODE_CASE)");
        anySpaceRegex = new f(compile6);
        splitPlaceholderTokenRegex = Pattern.compile("(?u)((?:(?!_{2,}).)+)?(_{2,})?", 64);
    }
}
