package sklearn2pmml.feature_extraction.text;

import com.google.common.base.Joiner;
import java.util.List;
import org.dmg.pmml.TextIndex;
import sklearn.feature_extraction.text.Tokenizer;

/* loaded from: input_file:sklearn2pmml/feature_extraction/text/Splitter.class */
public class Splitter extends Tokenizer {
    public Splitter() {
        this("sklearn2pmml.feature_extraction.text", "Splitter");
    }

    public Splitter(String str, String str2) {
        super(str, str2);
    }

    @Override // sklearn.feature_extraction.text.Tokenizer
    public TextIndex configure(TextIndex textIndex) {
        return textIndex.setTokenize(Boolean.TRUE).setWordSeparatorCharacterRE(getWordSeparatorRE());
    }

    @Override // sklearn.feature_extraction.text.Tokenizer
    public String formatStopWordsRE(List<String> list) {
        String wordSeparatorRE = getWordSeparatorRE();
        return "(^|" + wordSeparatorRE + ")\\p{Punct}*(" + Joiner.on("|").join(list) + ")\\p{Punct}*(" + wordSeparatorRE + "|$)";
    }

    public void __setstate__(String str) {
        setWordSeparatorRE(str);
    }

    public String getWordSeparatorRE() {
        return getString("word_separator_re");
    }

    public Splitter setWordSeparatorRE(String str) {
        setattr("word_separator_re", str);
        return this;
    }
}
