/*
 * Decompiled with CFR 0.152.
 */
package org.archive.format.text.charset;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.format.http.HttpHeader;
import org.archive.format.http.HttpHeaders;
import org.mozilla.universalchardet.UniversalDetector;

public abstract class CharsetDetector {
    private static final String META_TAGNAME = "META";
    private static final String META_CONTENT_ATTRIBUTE = "content";
    private static final String META_HTTP_EQUIV_ATTRIBUTE = "http-equiv";
    private static final String META_CONTENT_TYPE = "Content-Type";
    private static final String QUOTED_ATTR_VALUE = "(?:\"[^\">]*\")";
    private static final String ESC_QUOTED_ATTR_VALUE = "(?:\\\\\"[^>\\\\]*\\\\\")";
    private static final String APOSED_ATTR_VALUE = "(?:'[^'>]*')";
    private static final String ANY_ATTR_VALUE = "(?:\"[^\">]*\")|(?:'[^'>]*')|(?:\\\\\"[^>\\\\]*\\\\\")|";
    private static final String META_TAG_PATTERN_STRING = "<\\s*META((>)|(\\s+[^>]*>))";
    private static final String META_CONTENT_ATTR_PATTERN_STRING = "\\bcontent\\s*=\\s*((?:\"[^\">]*\")|(?:'[^'>]*')|(?:\\\\\"[^>\\\\]*\\\\\")|)(?:\\s|>)?";
    private static final String META_HTTP_EQUIV_ATTR_PATTERN_STRING = "\\bhttp-equiv\\s*=\\s*((?:\"[^\">]*\")|(?:'[^'>]*')|(?:\\\\\"[^>\\\\]*\\\\\")|)(?:\\s|>)?";
    private static final Pattern META_TAG_PATTERN = Pattern.compile("<\\s*META((>)|(\\s+[^>]*>))", 2);
    private static final Pattern META_CONTENT_ATTR_PATTERN = Pattern.compile("\\bcontent\\s*=\\s*((?:\"[^\">]*\")|(?:'[^'>]*')|(?:\\\\\"[^>\\\\]*\\\\\")|)(?:\\s|>)?", 2);
    private static final Pattern META_HTTP_EQUIV_ATTR_PATTERN = Pattern.compile("\\bhttp-equiv\\s*=\\s*((?:\"[^\">]*\")|(?:'[^'>]*')|(?:\\\\\"[^>\\\\]*\\\\\")|)(?:\\s|>)?", 2);
    protected static final int MAX_CHARSET_READAHEAD = 65536;
    protected static final String CHARSET_TOKEN = "charset=";
    protected static final String HTTP_CONTENT_TYPE_HEADER = "CONTENT-TYPE";
    public static final String DEFAULT_CHARSET = "UTF-8";

    protected boolean isCharsetSupported(String charsetName) {
        if (charsetName == null) {
            return false;
        }
        try {
            return Charset.isSupported(charsetName);
        }
        catch (IllegalCharsetNameException e) {
            return false;
        }
    }

    protected String mapCharset(String orig) {
        String lc = orig.toLowerCase();
        if (lc.contains("iso8859-1") || lc.contains("iso-8859-1")) {
            return "cp1252";
        }
        return orig;
    }

    protected String contentTypeToCharset(String contentType) {
        int offset = contentType.toUpperCase().indexOf(CHARSET_TOKEN.toUpperCase());
        if (offset != -1) {
            String cs = contentType.substring(offset + CHARSET_TOKEN.length());
            if (this.isCharsetSupported(cs)) {
                return this.mapCharset(cs);
            }
            String alternate = cs.replace(" ", "");
            if (this.isCharsetSupported(alternate)) {
                return this.mapCharset(alternate);
            }
        }
        return null;
    }

    protected String getCharsetFromHeaders(HttpHeaders headers) throws IOException {
        if (headers == null) {
            return null;
        }
        for (HttpHeader header : headers) {
            if (!header.getName().toUpperCase().trim().equals(HTTP_CONTENT_TYPE_HEADER)) continue;
            return this.contentTypeToCharset(header.getValue());
        }
        return null;
    }

    protected String getCharsetFromMeta(byte[] buffer, int len) throws IOException {
        String charsetName = null;
        String sample = new String(buffer, 0, len, DEFAULT_CHARSET);
        String metaContentType = CharsetDetector.findMetaContentType(sample);
        if (metaContentType != null) {
            charsetName = this.contentTypeToCharset(metaContentType);
        }
        return charsetName;
    }

    private static String trimAttrValue(String value) {
        if (value.isEmpty()) {
            return value;
        }
        String result = value;
        if (result.charAt(0) == '\"') {
            result = result.substring(1, result.length() - 1);
        } else if (result.charAt(0) == '\'') {
            result = result.substring(1, result.length() - 1);
        }
        return result;
    }

    public static String findMetaContentType(String pageSample) {
        Matcher tagMatcher = META_TAG_PATTERN.matcher(pageSample);
        while (tagMatcher.find()) {
            String wholeTag = tagMatcher.group();
            Matcher whereAttrMatcher = META_HTTP_EQUIV_ATTR_PATTERN.matcher(wholeTag);
            if (!whereAttrMatcher.find()) continue;
            String attrValue = whereAttrMatcher.group(1);
            if ((attrValue = CharsetDetector.trimAttrValue(attrValue)).compareToIgnoreCase(META_CONTENT_TYPE) != 0) continue;
            Matcher findAttrMatcher = META_CONTENT_ATTR_PATTERN.matcher(wholeTag);
            String value = null;
            if (findAttrMatcher.find()) {
                value = findAttrMatcher.group(1);
                value = CharsetDetector.trimAttrValue(value);
            }
            return value;
        }
        return null;
    }

    protected String getCharsetFromBytes(byte[] buffer, int len) throws IOException {
        String charsetName = null;
        UniversalDetector detector = new UniversalDetector(null);
        detector.handleData(buffer, 0, len);
        detector.dataEnd();
        charsetName = detector.getDetectedCharset();
        detector.reset();
        if (this.isCharsetSupported(charsetName)) {
            return this.mapCharset(charsetName);
        }
        return null;
    }

    public abstract String getCharset(byte[] var1, int var2, HttpHeaders var3) throws IOException;
}

