package de.l3s.boilerpipe.sax;

import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.xml.sax.InputSource;

/* loaded from: classes3.dex */
public class HTMLDocument implements InputSourceable {
    private Charset charset;
    private byte[] data;

    public HTMLDocument(String str) {
        Charset forName = Charset.forName("utf-8");
        this.data = str.getBytes(forName);
        this.charset = forName;
    }

    public HTMLDocument(byte[] bArr, Charset charset) {
        this.data = bArr;
        this.charset = charset;
    }

    public static String encodeEscapedCharsAsText(String str, String str2) {
        String encode;
        Pattern compile = Pattern.compile("&#(.*?);");
        while (true) {
            boolean z = true;
            while (z) {
                z = false;
                Matcher matcher = compile.matcher(str);
                if (matcher.find()) {
                    String group = matcher.group(1);
                    try {
                        encode = URLEncoder.encode(group, str2);
                    } catch (UnsupportedEncodingException e) {
                        e.printStackTrace();
                        encode = URLEncoder.encode(group);
                    }
                    str = matcher.replaceFirst("#esc#" + encode + "#/esc#");
                }
            }
            return str;
        }
    }

    public static String encodeImageTagsAsText(String str, String str2) {
        String encode;
        ArrayList arrayList = new ArrayList();
        Pattern compile = Pattern.compile("<img (.*?)[/]?>");
        while (true) {
            boolean z = true;
            while (z) {
                z = false;
                Matcher matcher = compile.matcher(str);
                if (matcher.find()) {
                    String group = matcher.group(1);
                    try {
                        encode = URLEncoder.encode(group, str2);
                    } catch (UnsupportedEncodingException e) {
                        e.printStackTrace();
                        encode = URLEncoder.encode(group);
                    }
                    String str3 = "#img#" + encode + "#/img#";
                    if (arrayList.contains(str3)) {
                        str = matcher.replaceFirst("");
                    } else {
                        arrayList.add(str3);
                        str = matcher.replaceFirst(str3);
                    }
                }
            }
            return str;
        }
    }

    public static String restoreTextEncodedEscapedChars(String str, String str2) {
        String decode;
        Pattern compile = Pattern.compile("#esc#(.*?)#/esc#");
        while (true) {
            boolean z = true;
            while (z) {
                z = false;
                Matcher matcher = compile.matcher(str);
                if (matcher.find()) {
                    String group = matcher.group(1);
                    try {
                        decode = URLDecoder.decode(group, str2);
                    } catch (UnsupportedEncodingException e) {
                        e.printStackTrace();
                        decode = URLDecoder.decode(group);
                    }
                    str = matcher.replaceFirst("&#" + decode + ";");
                }
            }
            return str;
        }
    }

    public static String restoreTextEncodedImageTags(String str, String str2) {
        String decode;
        Pattern compile = Pattern.compile("#img#(.*?)#/img#");
        while (true) {
            boolean z = true;
            while (z) {
                z = false;
                Matcher matcher = compile.matcher(str);
                if (matcher.find()) {
                    String group = matcher.group(1);
                    try {
                        decode = URLDecoder.decode(group, str2);
                    } catch (UnsupportedEncodingException e) {
                        e.printStackTrace();
                        decode = URLDecoder.decode(group);
                    }
                    str = matcher.replaceFirst("<img " + decode + ">");
                }
            }
            return str;
        }
    }

    public void encodeEscapedCharsAsText() {
        this.data = encodeEscapedCharsAsText(new String(this.data), this.charset.name()).getBytes();
    }

    public void encodeImageTagsAsText() {
        this.data = encodeImageTagsAsText(new String(this.data), this.charset.name()).getBytes();
    }

    public Charset getCharset() {
        return this.charset;
    }

    public byte[] getData() {
        return this.data;
    }

    public void restoreTextEncodedEscapedChars() {
        this.data = restoreTextEncodedEscapedChars(new String(this.data), this.charset.name()).getBytes();
    }

    public void restoreTextEncodedImageTags() {
        this.data = restoreTextEncodedImageTags(new String(this.data), this.charset.name()).getBytes();
    }

    @Override // de.l3s.boilerpipe.sax.InputSourceable
    public InputSource toInputSource() {
        InputSource inputSource = new InputSource(new ByteArrayInputStream(this.data));
        inputSource.setEncoding(this.charset.name());
        return inputSource;
    }
}
