package edu.stanford.nlp.trees.international.pennchinese;

import edu.stanford.nlp.international.morph.MorphoFeatures;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.PTBLexer;
import edu.stanford.nlp.trees.AbstractTreebankLanguagePack;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.util.Filter;
import edu.stanford.nlp.util.Filters;

/* loaded from: input_file:lib/stanford-corenlp-2012-07-09.jar:edu/stanford/nlp/trees/international/pennchinese/ChineseTreebankLanguagePack.class */
public class ChineseTreebankLanguagePack extends AbstractTreebankLanguagePack {
    private static final long serialVersionUID = 5757403475523638802L;
    private static TokenizerFactory<? extends HasWord> tf;
    public static final String ENCODING = "GB18030";
    private static final char[] annotationIntroducingChars = {'-', '=', '|', '#', '^', '~'};
    private static final String[] startSymbols = {"ROOT"};
    private static final String[] tags = {"PU"};
    private static final String[] comma = {",", "，", "\u3000"};
    private static final String[] endSentence = {"。", "．", "！", "？", "?", "!", "."};
    private static final String[] douHao = {"、"};
    private static final String[] quoteMark = {"“", "”", "‘", "’", "《", "》", "『", "』", "〈", "〉", "「", "」", "＂", "＜", "＞", "'", "`", "＇", "｀", "｢", "｣"};
    private static final String[] parenthesis = {"（", "）", PTBLexer.openparen, PTBLexer.closeparen, "【", "】", "〔", "〖", "〘", "〚", "｟", "〕", "〗", "〙", "〛", "｠"};
    private static final String[] colon = {"：", "；", "∶", MorphoFeatures.KEY_VAL_DELIM};
    private static final String[] dash = {PTBLexer.unicodeEllipsisStr, "—", "——", "———", "－", "－－", "──", "━", "━━", "—－", "-", "----", "~", "……", "～", "．．．"};
    private static final String[] other = {"·", "／", "／", "＊", "＆", "/", "//", "*"};
    private static final String[] leftQuoteMark = {"“", "‘", "《", "『", "〈", "「", "＜", "`", "｀", "｢"};
    private static final String[] rightQuoteMark = {"”", "’", "》", "』", "〉", "」", "＞", "＇", "｣"};
    private static final String[] leftParenthesis = {"（", PTBLexer.openparen, "【", "〔", "〖", "〘", "〚", "｟"};
    private static final String[] rightParenthesis = {"）", PTBLexer.closeparen, "】", "〕", "〗", "〙", "〛", "｠"};
    private static final String[] punctWords = new String[((((((((((comma.length + endSentence.length) + douHao.length) + quoteMark.length) + parenthesis.length) + colon.length) + dash.length) + other.length) + leftQuoteMark.length) + rightQuoteMark.length) + leftParenthesis.length) + rightParenthesis.length];

    public static void setTokenizerFactory(TokenizerFactory<? extends HasWord> tokenizerFactory) {
        tf = tokenizerFactory;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public TokenizerFactory<? extends HasWord> getTokenizerFactory() {
        return tf != null ? tf : super.getTokenizerFactory();
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String getEncoding() {
        return ENCODING;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public boolean isPunctuationTag(String str) {
        return str.equals("PU");
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public boolean isPunctuationWord(String str) {
        return chineseCommaAcceptFilter().accept(str) || chineseEndSentenceAcceptFilter().accept(str) || chineseDouHaoAcceptFilter().accept(str) || chineseQuoteMarkAcceptFilter().accept(str) || chineseParenthesisAcceptFilter().accept(str) || chineseColonAcceptFilter().accept(str) || chineseDashAcceptFilter().accept(str) || chineseOtherAcceptFilter().accept(str);
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public boolean isSentenceFinalPunctuationTag(String str) {
        return chineseEndSentenceAcceptFilter().accept(str);
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] punctuationTags() {
        return tags;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] punctuationWords() {
        return punctWords;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] sentenceFinalPunctuationTags() {
        return tags;
    }

    @Override // edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] sentenceFinalPunctuationWords() {
        return endSentence;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public boolean isEvalBIgnoredPunctuationTag(String str) {
        return Filters.collectionAcceptFilter(tags).accept(str);
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public char[] labelAnnotationIntroducingCharacters() {
        return annotationIntroducingChars;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] startSymbols() {
        return startSymbols;
    }

    public static Filter<String> chineseCommaAcceptFilter() {
        return Filters.collectionAcceptFilter(comma);
    }

    public static Filter<String> chineseEndSentenceAcceptFilter() {
        return Filters.collectionAcceptFilter(endSentence);
    }

    public static Filter<String> chineseDouHaoAcceptFilter() {
        return Filters.collectionAcceptFilter(douHao);
    }

    public static Filter<String> chineseQuoteMarkAcceptFilter() {
        return Filters.collectionAcceptFilter(quoteMark);
    }

    public static Filter<String> chineseParenthesisAcceptFilter() {
        return Filters.collectionAcceptFilter(parenthesis);
    }

    public static Filter<String> chineseColonAcceptFilter() {
        return Filters.collectionAcceptFilter(colon);
    }

    public static Filter<String> chineseDashAcceptFilter() {
        return Filters.collectionAcceptFilter(dash);
    }

    public static Filter<String> chineseOtherAcceptFilter() {
        return Filters.collectionAcceptFilter(other);
    }

    public static Filter<String> chineseLeftParenthesisAcceptFilter() {
        return Filters.collectionAcceptFilter(leftParenthesis);
    }

    public static Filter<String> chineseRightParenthesisAcceptFilter() {
        return Filters.collectionAcceptFilter(rightParenthesis);
    }

    public static Filter<String> chineseLeftQuoteMarkAcceptFilter() {
        return Filters.collectionAcceptFilter(leftQuoteMark);
    }

    public static Filter<String> chineseRightQuoteMarkAcceptFilter() {
        return Filters.collectionAcceptFilter(rightQuoteMark);
    }

    @Override // edu.stanford.nlp.trees.TreebankLanguagePack
    public String treebankFileExtension() {
        return "fid";
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public GrammaticalStructureFactory grammaticalStructureFactory() {
        return new GrammaticalStructureFactory("edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure");
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public GrammaticalStructureFactory grammaticalStructureFactory(Filter<String> filter) {
        return new GrammaticalStructureFactory("edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure", filter);
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public GrammaticalStructureFactory grammaticalStructureFactory(Filter<String> filter, HeadFinder headFinder) {
        return new GrammaticalStructureFactory("edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure", filter, headFinder);
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public TreeReaderFactory treeReaderFactory() {
        return new CTBTreeReaderFactory(new BobChrisTreeNormalizer());
    }

    @Override // edu.stanford.nlp.trees.TreebankLanguagePack
    public HeadFinder headFinder() {
        return new ChineseHeadFinder(this);
    }

    @Override // edu.stanford.nlp.trees.TreebankLanguagePack
    public HeadFinder typedDependencyHeadFinder() {
        return new ChineseHeadFinder(this);
    }

    static {
        System.arraycopy(comma, 0, punctWords, 0, comma.length);
        int length = 0 + comma.length;
        System.arraycopy(endSentence, 0, punctWords, length, endSentence.length);
        int length2 = length + endSentence.length;
        System.arraycopy(douHao, 0, punctWords, length2, douHao.length);
        int length3 = length2 + douHao.length;
        System.arraycopy(quoteMark, 0, punctWords, length3, quoteMark.length);
        int length4 = length3 + quoteMark.length;
        System.arraycopy(parenthesis, 0, punctWords, length4, parenthesis.length);
        int length5 = length4 + parenthesis.length;
        System.arraycopy(colon, 0, punctWords, length5, colon.length);
        int length6 = length5 + colon.length;
        System.arraycopy(dash, 0, punctWords, length6, dash.length);
        int length7 = length6 + dash.length;
        System.arraycopy(other, 0, punctWords, length7, other.length);
        int length8 = length7 + other.length;
    }
}
