package com.atistudios.app.data.utils.language;

import com.atistudios.app.data.model.quiz.TokenModel;
import com.atistudios.app.data.model.word.WordTokenWithRangeModel;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
import kotlin.collections.t;
import pp.i;
import pp.u;
import wm.o;

/* loaded from: classes.dex */
public final class WordPhraseTokenizer {
    public static final String COMPLETABLE_TOKEN = "_____";
    public static final Companion Companion = new Companion(null);
    private static final i anySpaceRegex;
    private static final i dashRegex;
    private static final i placeholderTokenRegex;
    private static final i punctuationRegex;
    private static final i specialCharsRegex;
    private static final Pattern splitPlaceholderTokenRegex;
    private static final List<String> unmatchedChars;
    private static final i whitespaceRegex;

    /* loaded from: classes.dex */
    public static final class Companion {
        private Companion() {
        }

        public /* synthetic */ Companion(wm.i iVar) {
            this();
        }

        private final boolean detectPlaceholders(String str) {
            boolean M;
            M = u.M(str, "_____", false, 2, null);
            return M || o.b(str, "_____");
        }

        private final boolean isTextPlaceholder(String str) {
            return WordPhraseTokenizer.placeholderTokenRegex.b(str);
        }

        /* JADX WARN: Removed duplicated region for block: B:26:0x0079  */
        /* JADX WARN: Removed duplicated region for block: B:30:0x009f A[LOOP:1: B:14:0x0048->B:30:0x009f, LOOP_END] */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        private final lm.o<java.util.List<com.atistudios.app.data.model.word.WordWithRangeModel>, java.lang.Integer> splitTokenWithPlaceholder(java.lang.String r13, int r14) {
            /*
                r12 = this;
                r8 = r12
                java.util.ArrayList r0 = new java.util.ArrayList
                r11 = 7
                r0.<init>()
                java.lang.CharSequence r13 = pp.k.R0(r13)
                java.lang.String r11 = r13.toString()
                r13 = r11
                java.lang.String r1 = "_____"
                boolean r10 = wm.o.b(r13, r1)
                r2 = r10
                if (r2 == 0) goto L2d
                com.atistudios.app.data.model.word.WordWithRangeModel r1 = new com.atistudios.app.data.model.word.WordWithRangeModel
                int r11 = r13.length()
                r2 = r11
                r1.<init>(r13, r14, r2)
                r0.add(r1)
                int r13 = r13.length()
                int r14 = r14 + r13
                r10 = 3
                goto La5
            L2d:
                r11 = 6
                java.util.regex.Pattern r2 = com.atistudios.app.data.utils.language.WordPhraseTokenizer.access$getSplitPlaceholderTokenRegex$cp()
                java.util.regex.Matcher r13 = r2.matcher(r13)
            L36:
                r11 = 6
                boolean r2 = r13.find()
                if (r2 == 0) goto La4
                int r11 = r13.groupCount()
                r2 = r11
                r11 = 1
                r3 = r11
                if (r3 > r2) goto L36
                r10 = 2
                r4 = r3
            L48:
                java.lang.String r10 = r13.group(r4)
                r5 = r10
                boolean r6 = wm.o.b(r5, r1)
                if (r6 == 0) goto L66
                r6 = 2
                r11 = 7
                if (r4 != r6) goto L66
                com.atistudios.app.data.model.word.WordWithRangeModel r6 = new com.atistudios.app.data.model.word.WordWithRangeModel
                r11 = 4
                java.lang.String r7 = " "
                r11 = 7
                r6.<init>(r7, r14, r3)
                r0.add(r6)
                int r14 = r14 + 1
                r10 = 2
            L66:
                r10 = 3
                if (r5 == 0) goto L76
                int r10 = r5.length()
                r6 = r10
                if (r6 != 0) goto L72
                r11 = 6
                goto L76
            L72:
                r10 = 7
                r6 = 0
                r11 = 7
                goto L77
            L76:
                r6 = r3
            L77:
                if (r6 != 0) goto L9d
                r10 = 1
                java.lang.StringBuilder r6 = new java.lang.StringBuilder
                r6.<init>()
                r10 = 3
                java.lang.String r7 = "groupText: "
                r6.append(r7)
                r6.append(r5)
                if (r5 == 0) goto L9d
                com.atistudios.app.data.model.word.WordWithRangeModel r6 = new com.atistudios.app.data.model.word.WordWithRangeModel
                int r11 = r5.length()
                r7 = r11
                r6.<init>(r5, r14, r7)
                r0.add(r6)
                int r10 = r5.length()
                r5 = r10
                int r14 = r14 + r5
            L9d:
                if (r4 == r2) goto L36
                r10 = 3
                int r4 = r4 + 1
                r11 = 7
                goto L48
            La4:
                r10 = 4
            La5:
                lm.o r13 = new lm.o
                r11 = 7
                java.lang.Integer r14 = java.lang.Integer.valueOf(r14)
                r13.<init>(r0, r14)
                r11 = 6
                return r13
            */
            throw new UnsupportedOperationException("Method not decompiled: com.atistudios.app.data.utils.language.WordPhraseTokenizer.Companion.splitTokenWithPlaceholder(java.lang.String, int):lm.o");
        }

        public final List<TokenModel> mapWordTokenWithRangeModelListToSeparatePunctuationTokenModelList(List<WordTokenWithRangeModel> list) {
            o.f(list, "wordTokenWithRangeModelList");
            ArrayList arrayList = new ArrayList();
            i iVar = new i("\\s");
            int i10 = 0;
            while (true) {
                for (WordTokenWithRangeModel wordTokenWithRangeModel : list) {
                    String c10 = iVar.c(wordTokenWithRangeModel.getPreviousTokenLinker().getText(), "");
                    String c11 = iVar.c(wordTokenWithRangeModel.getRawPrefix().getText(), "");
                    String c12 = iVar.c(wordTokenWithRangeModel.getRaw().getText(), "");
                    String c13 = iVar.c(wordTokenWithRangeModel.getRawSuffix().getText(), "");
                    boolean z10 = true;
                    if (c10.length() > 0) {
                        arrayList.add(new TokenModel("txt" + i10, c10));
                        i10++;
                    }
                    if (c11.length() > 0) {
                        arrayList.add(new TokenModel("txt" + i10, c11));
                        i10++;
                    }
                    if (c12.length() > 0) {
                        arrayList.add(new TokenModel("txt" + i10, c12));
                        i10++;
                    }
                    if (c13.length() <= 0) {
                        z10 = false;
                    }
                    if (z10) {
                        arrayList.add(new TokenModel("txt" + i10, c13));
                        i10++;
                    }
                }
                return arrayList;
            }
        }

        public final ArrayList<String> tokenizeTextResourceInChars(String str, Locale locale) {
            List n10;
            o.f(str, "inputTextResource");
            o.f(locale, "languageLocale");
            n10 = t.n("\u202c", "\u202b", "\ud83d", "�", "‼", "【", "】", "《", "᙭", "\u200c", "\u202a", "⁉", "⃣", "》", "「", "〰", "ٟ", "༺", "༻", "\uf610", "￼");
            ArrayList<String> arrayList = new ArrayList<>();
            BreakIterator characterInstance = BreakIterator.getCharacterInstance(locale);
            characterInstance.setText(str);
            int first = characterInstance.first();
            while (first != -1) {
                int next = characterInstance.next();
                if (next != -1) {
                    String substring = str.substring(first, next);
                    o.e(substring, "this as java.lang.String…ing(startIndex, endIndex)");
                    if (!n10.contains(substring)) {
                        arrayList.add(substring);
                    }
                }
                first = next;
            }
            return arrayList;
        }

        /* JADX WARN: Removed duplicated region for block: B:100:0x0118  */
        /* JADX WARN: Removed duplicated region for block: B:39:0x0116  */
        /* JADX WARN: Removed duplicated region for block: B:41:0x011b  */
        /* JADX WARN: Removed duplicated region for block: B:81:0x01c8  */
        /* JADX WARN: Removed duplicated region for block: B:83:0x01cd  */
        /* JADX WARN: Removed duplicated region for block: B:94:0x01f9  */
        /* JADX WARN: Removed duplicated region for block: B:96:0x01fe  */
        /* JADX WARN: Removed duplicated region for block: B:98:0x01fb  */
        /* JADX WARN: Removed duplicated region for block: B:99:0x01ca  */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public final java.util.List<com.atistudios.app.data.model.word.WordTokenWithRangeModel> tokenizeTextResourceInWordsByLanguage(java.lang.String r24, java.util.Locale r25) {
            /*
                Method dump skipped, instructions count: 875
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: com.atistudios.app.data.utils.language.WordPhraseTokenizer.Companion.tokenizeTextResourceInWordsByLanguage(java.lang.String, java.util.Locale):java.util.List");
        }
    }

    static {
        List<String> n10;
        n10 = t.n("`", "´", "΄", "°", "～");
        unmatchedChars = n10;
        Pattern compile = Pattern.compile("(?u)^[\\p{P}\\p{Z}\\p{M}\\p{C}\\s]+$", 64);
        o.e(compile, "compile(\"(?u)^[\\\\p{P}\\\\p…$\", Pattern.UNICODE_CASE)");
        specialCharsRegex = new i(compile);
        Pattern compile2 = Pattern.compile("(?u)^[\\p{P}]+$", 64);
        o.e(compile2, "compile(\"(?u)^[\\\\p{P}]+\\$\", Pattern.UNICODE_CASE)");
        punctuationRegex = new i(compile2);
        Pattern compile3 = Pattern.compile("(?u)^[\\s]+$", 64);
        o.e(compile3, "compile(\"(?u)^[\\\\s]+\\$\", Pattern.UNICODE_CASE)");
        whitespaceRegex = new i(compile3);
        Pattern compile4 = Pattern.compile("(?u)^_{2,}+$", 64);
        o.e(compile4, "compile(\"(?u)^_{2,}+\\$\", Pattern.UNICODE_CASE)");
        placeholderTokenRegex = new i(compile4);
        Pattern compile5 = Pattern.compile("(?u)^[\\p{Pd}]+$", 64);
        o.e(compile5, "compile(\"(?u)^[\\\\p{Pd}]+\\$\", Pattern.UNICODE_CASE)");
        dashRegex = new i(compile5);
        Pattern compile6 = Pattern.compile("(?u)[\\s]+$", 64);
        o.e(compile6, "compile(\"(?u)[\\\\s]+\\$\", Pattern.UNICODE_CASE)");
        anySpaceRegex = new i(compile6);
        splitPlaceholderTokenRegex = Pattern.compile("(?u)((?:(?!_{2,}).)+)?(_{2,})?", 64);
    }
}
