package org.tribuo.data.text.impl;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.tribuo.Feature;
import org.tribuo.data.text.FeatureAggregator;
import org.tribuo.data.text.FeatureTransformer;
import org.tribuo.data.text.TextPipeline;
import org.tribuo.data.text.TextProcessingException;
import org.tribuo.data.text.TextProcessor;
import org.tribuo.util.tokens.Tokenizer;

/* loaded from: input_file:org/tribuo/data/text/impl/TokenPipeline.class */
public class TokenPipeline implements TextPipeline {
    private static final Logger logger = Logger.getLogger(TokenPipeline.class.getName());
    private List<TextProcessor> processors;
    private List<FeatureTransformer> transformers;
    private FeatureAggregator aggregator;

    @Config(mandatory = true, description = "Use term counting, otherwise emit binary features.")
    private boolean termCounting;

    @Config(description = "Dimension to map the hash into.")
    private int hashDim;

    @Config(description = "Should feature hashing preserve the value?")
    private boolean hashPreserveValue;

    @Config(mandatory = true, description = "Tokenizer to use.")
    private Tokenizer tokenizer;

    @Config(description = "n in the n-gram to emit.")
    private int ngram;

    public TokenPipeline(Tokenizer tokenizer, int i, boolean z) {
        this(tokenizer, i, z, -1);
    }

    public TokenPipeline(Tokenizer tokenizer, int i, boolean z, int i2) {
        this(tokenizer, i, z, i2, true);
    }

    public TokenPipeline(Tokenizer tokenizer, int i, boolean z, int i2, boolean z2) {
        this.processors = new ArrayList();
        this.transformers = new ArrayList();
        this.hashDim = -1;
        this.hashPreserveValue = true;
        this.ngram = 2;
        this.tokenizer = tokenizer;
        this.ngram = i;
        this.hashDim = i2;
        this.termCounting = z;
        this.hashPreserveValue = z2;
        postConfig();
    }

    private TokenPipeline() {
        this.processors = new ArrayList();
        this.transformers = new ArrayList();
        this.hashDim = -1;
        this.hashPreserveValue = true;
        this.ngram = 2;
    }

    public void postConfig() {
        for (int i = 1; i <= this.ngram; i++) {
            this.processors.add(new NgramProcessor(this.tokenizer, i, 1.0d));
        }
        if (this.hashDim > 0) {
            this.transformers.add(new FeatureHasher(this.hashDim, this.hashPreserveValue));
        }
        if (this.termCounting) {
            this.aggregator = new SumAggregator();
        } else {
            this.aggregator = new UniqueAggregator(1.0d);
        }
    }

    public String toString() {
        return this.transformers.size() > 0 ? this.ngram + "gramPipeline({1.." + this.ngram + "}-grams,hashing)" : this.ngram + "gramPipeline({1.." + this.ngram + "}-grams)";
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // org.tribuo.data.text.TextPipeline
    public List<Feature> process(String str, String str2) {
        List arrayList = new ArrayList();
        for (TextProcessor textProcessor : this.processors) {
            try {
                arrayList.addAll(textProcessor.process(str, str2));
            } catch (TextProcessingException e) {
                logger.log(Level.INFO, String.format("TextProcessingException thrown by processor %s with text %s", textProcessor, str2), (Throwable) e);
            }
        }
        Iterator<FeatureTransformer> it = this.transformers.iterator();
        while (it.hasNext()) {
            arrayList = it.next().map(str, arrayList);
        }
        return this.aggregator.aggregate(arrayList);
    }

    /* renamed from: getProvenance, reason: merged with bridge method [inline-methods] */
    public ConfiguredObjectProvenance m72getProvenance() {
        return new ConfiguredObjectProvenanceImpl(this, "TextPipeline");
    }
}
