package kr.co.shineware.nlp.komoran.corpus.builder;

import A2.Z;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.lang.Character;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import kr.co.shineware.nlp.komoran.constant.FILENAME;
import kr.co.shineware.nlp.komoran.constant.SYMBOL;
import kr.co.shineware.nlp.komoran.corpus.model.Dictionary;
import kr.co.shineware.nlp.komoran.corpus.model.Grammar;
import kr.co.shineware.nlp.komoran.corpus.parser.CorpusParser;
import kr.co.shineware.nlp.komoran.corpus.parser.IrregularParser;
import kr.co.shineware.nlp.komoran.corpus.parser.model.ProblemAnswerPair;
import kr.co.shineware.nlp.komoran.exception.FileFormatException;
import kr.co.shineware.nlp.komoran.interfaces.UnitParser;
import kr.co.shineware.nlp.komoran.parser.KoreanUnitParser;
import kr.co.shineware.util.common.file.FileUtil;
import kr.co.shineware.util.common.model.Pair;
import kr.co.shineware.util.common.string.StringUtil;

/* loaded from: classes.dex */
public class CorpusBuilder {
    private UnitParser unitParser = new KoreanUnitParser();
    private CorpusParser corpusParser = new CorpusParser();
    private IrregularParser irrParser = new IrregularParser();
    private Dictionary wordDic = new Dictionary();
    private Dictionary irrDic = new Dictionary();
    private Grammar grammar = new Grammar();
    private Set<String> irrExclusiveSet = new HashSet();

    private void appendGrammar(List<Pair<String, String>> list) {
        String str = SYMBOL.START;
        for (Pair<String, String> pair : list) {
            this.grammar.append(str, pair.getSecond());
            str = pair.getSecond();
        }
        this.grammar.append(str, SYMBOL.END);
    }

    private void appendIrregularDictionary(ProblemAnswerPair problemAnswerPair) {
        if (isIrregular(problemAnswerPair.getProblem(), problemAnswerPair.getAnswerList())) {
            for (Pair<String, String> pair : this.irrParser.parse(convertJaso(problemAnswerPair.getProblem()), convertJaso(problemAnswerPair.getAnswerList()))) {
                if (pair.getSecond().trim().length() != 0) {
                    Set<String> set = this.irrExclusiveSet;
                    StringBuilder sb = new StringBuilder();
                    sb.append(pair.getFirst());
                    sb.append("\t");
                    int i8 = 0;
                    sb.append(pair.getSecond().substring(0, pair.getSecond().lastIndexOf("/")));
                    if (!set.contains(sb.toString())) {
                        String combine = this.unitParser.combine(pair.getFirst());
                        while (true) {
                            if (i8 < combine.length()) {
                                if (StringUtil.getUnicodeBlock(combine.charAt(i8)) == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO) {
                                    break;
                                } else {
                                    i8++;
                                }
                            } else if (!pair.getFirst().endsWith("ㅇㅡ") || !pair.getSecond().endsWith("ㅇㅡㅅㅣ/EP")) {
                                this.irrDic.append(this.unitParser.combine(pair.getFirst()), this.unitParser.combine(pair.getSecond()));
                            }
                        }
                    }
                }
            }
        }
    }

    private void appendWordDictionary(List<Pair<String, String>> list) {
        for (Pair<String, String> pair : list) {
            if (pair.getFirst().trim().length() != 1 || StringUtil.getUnicodeBlock(pair.getFirst().trim().charAt(0)) != Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO || !pair.getSecond().contains("NN")) {
                if (!pair.getSecond().equals("SH") && !pair.getSecond().equals("SN") && !pair.getSecond().equals("SL")) {
                    this.wordDic.append(pair.getFirst(), pair.getSecond());
                }
            }
        }
    }

    private String convertJaso(String str) {
        return this.unitParser.parse(str);
    }

    private List<Pair<String, String>> convertJaso(List<Pair<String, String>> list) {
        ArrayList arrayList = new ArrayList();
        for (Pair<String, String> pair : list) {
            arrayList.add(new Pair(this.unitParser.parse(pair.getFirst()), pair.getSecond()));
        }
        return arrayList;
    }

    private boolean isIrregular(String str, List<Pair<String, String>> list) {
        StringBuffer stringBuffer = new StringBuffer();
        Iterator<Pair<String, String>> it = list.iterator();
        while (it.hasNext()) {
            stringBuffer.append(it.next().getFirst());
        }
        return !StringUtil.getKorean(this.unitParser.parse(str)).equals(StringUtil.getKorean(this.unitParser.parse(stringBuffer.toString())));
    }

    private String refineFormat(String str) {
        return str.replaceAll("[ ]+", " ").trim();
    }

    public void appendUserDic(String str) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), StandardCharsets.UTF_8));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                String trim = readLine.trim();
                if (trim.length() != 0 && trim.charAt(0) != '#' && this.wordDic.getPosList(trim) == null) {
                    this.wordDic.append(trim, SYMBOL.NNP);
                }
            }
        } catch (Exception e10) {
            e10.printStackTrace();
        }
    }

    public void appendUserDicPath(String str, String str2) {
        for (String str3 : FileUtil.getFileNames(str)) {
            if (str3.endsWith("." + str2)) {
                System.out.println(str3);
                appendUserDic(str3);
            }
        }
    }

    public void build(String str) {
        BufferedReader bufferedReader;
        int i8;
        ProblemAnswerPair problemAnswerPair;
        try {
            bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), StandardCharsets.UTF_8));
            i8 = 0;
        } catch (Exception e10) {
            e10.printStackTrace();
            return;
        }
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            i8++;
            String refineFormat = refineFormat(readLine);
            if (i8 < 10) {
                System.out.println(refineFormat);
            }
            if (refineFormat.length() != 0) {
                try {
                    problemAnswerPair = this.corpusParser.parse(refineFormat);
                } catch (FileFormatException e11) {
                    System.err.println(i8 + " : " + refineFormat);
                    e11.printStackTrace();
                    System.exit(1);
                    problemAnswerPair = null;
                }
                appendWordDictionary(problemAnswerPair.getAnswerList());
                appendIrregularDictionary(problemAnswerPair);
                appendGrammar(problemAnswerPair.getAnswerList());
            }
            e10.printStackTrace();
            return;
        }
    }

    public void buildPath(String str) {
        buildPath(str, null);
    }

    public void buildPath(String str, String str2) {
        for (String str3 : FileUtil.getFileNames(str)) {
            if (str2 != null && str3.endsWith(str2)) {
                System.out.println(str3);
                build(str3);
            }
            if (str2 == null) {
                System.out.println(str3);
                build(str3);
            }
        }
    }

    @Deprecated
    public void load(String str) {
        Dictionary dictionary = this.wordDic;
        StringBuilder l8 = Z.l(str);
        String str2 = File.separator;
        l8.append(str2);
        l8.append(FILENAME.WORD_DIC);
        dictionary.load(l8.toString());
        this.irrDic.load(str + str2 + FILENAME.IRREGULAR_DIC);
        this.grammar.load(str + str2 + FILENAME.GRAMMAR);
    }

    public void save(String str) {
        File file = new File(str);
        if (file.exists() && !file.isDirectory()) {
            PrintStream printStream = System.err;
            printStream.println("CorpusBuilder.save error!");
            printStream.println("savePathName is exists, but it's not a directory.");
            printStream.println("please check path name to save");
            System.exit(1);
        }
        file.mkdirs();
        Dictionary dictionary = this.wordDic;
        StringBuilder l8 = Z.l(str);
        String str2 = File.separator;
        l8.append(str2);
        l8.append(FILENAME.WORD_DIC);
        dictionary.save(l8.toString());
        this.irrDic.save(str + str2 + FILENAME.IRREGULAR_DIC);
        this.grammar.save(str + str2 + FILENAME.GRAMMAR);
    }

    public void setExclusiveIrrRule(String str) {
        try {
            this.irrExclusiveSet = new HashSet();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), StandardCharsets.UTF_8));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                String trim = readLine.trim();
                if (trim.length() != 0) {
                    String substring = trim.substring(6);
                    String substring2 = bufferedReader.readLine().substring(9);
                    this.irrExclusiveSet.add(substring + "\t" + substring2);
                }
            }
        } catch (Exception e10) {
            e10.printStackTrace();
        }
    }
}
