package org.tribuo.util.tokens.impl;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
import java.text.BreakIterator;
import java.util.Locale;
import org.tribuo.util.tokens.Token;
import org.tribuo.util.tokens.Tokenizer;

/* loaded from: input_file:org/tribuo/util/tokens/impl/BreakIteratorTokenizer.class */
public class BreakIteratorTokenizer implements Tokenizer {

    @Config(mandatory = true, description = "The locale language tag string.")
    private String localeStr;
    private Locale locale;
    private BreakIterator breakIterator;
    private CharSequence cs;
    private int start;
    private int startOffset;
    private int endOffset;
    private String token;
    private boolean ready;

    private BreakIteratorTokenizer() {
    }

    public BreakIteratorTokenizer(Locale locale) {
        this.locale = locale;
        this.localeStr = locale.toLanguageTag();
        this.breakIterator = BreakIterator.getWordInstance(locale);
        this.ready = false;
        this.cs = null;
    }

    public void postConfig() {
        this.locale = Locale.forLanguageTag(this.localeStr);
        this.breakIterator = BreakIterator.getWordInstance(this.locale);
        this.ready = false;
        this.cs = null;
    }

    public String getLanguageTag() {
        return this.localeStr;
    }

    /* renamed from: getProvenance, reason: merged with bridge method [inline-methods] */
    public ConfiguredObjectProvenance m2getProvenance() {
        return new ConfiguredObjectProvenanceImpl(this, "Tokenizer");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public void reset(CharSequence charSequence) {
        this.cs = charSequence;
        this.breakIterator.setText(charSequence.toString());
        this.start = this.breakIterator.first();
        this.startOffset = -1;
        this.endOffset = -1;
        this.token = null;
        this.ready = false;
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public boolean advance() {
        if (this.cs == null) {
            throw new IllegalStateException("BreakIteratorTokenizer has not been reset.");
        }
        int next = this.breakIterator.next();
        while (true) {
            int i = next;
            if (i == -1) {
                return false;
            }
            this.token = this.cs.subSequence(this.start, i).toString();
            this.startOffset = this.start;
            this.endOffset = i;
            this.start = i;
            if (!this.token.trim().isEmpty()) {
                this.ready = true;
                return true;
            }
            next = this.breakIterator.next();
        }
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public String getText() {
        if (this.ready) {
            return this.token;
        }
        throw new IllegalStateException("BreakIteratorTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public int getStart() {
        if (this.ready) {
            return this.startOffset;
        }
        throw new IllegalStateException("BreakIteratorTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public int getEnd() {
        if (this.ready) {
            return this.endOffset;
        }
        throw new IllegalStateException("BreakIteratorTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public Token.TokenType getType() {
        if (this.ready) {
            return Token.TokenType.WORD;
        }
        throw new IllegalStateException("BreakIteratorTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    /* renamed from: clone, reason: merged with bridge method [inline-methods] */
    public BreakIteratorTokenizer m1clone() {
        try {
            BreakIteratorTokenizer breakIteratorTokenizer = (BreakIteratorTokenizer) super.clone();
            breakIteratorTokenizer.postConfig();
            return breakIteratorTokenizer;
        } catch (CloneNotSupportedException e) {
            throw new AssertionError("BreakIteratorTokenizer is Cloneable, but clone call failed");
        }
    }
}
