package org.tribuo.data.columnar;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.config.Configurable;
import com.oracle.labs.mlrg.olcut.config.PropertyException;
import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.Provenancable;
import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.tribuo.Example;
import org.tribuo.ImmutableFeatureMap;
import org.tribuo.Model;
import org.tribuo.Output;
import org.tribuo.VariableInfo;
import org.tribuo.data.columnar.ColumnarIterator;
import org.tribuo.impl.ArrayExample;

/* loaded from: input_file:org/tribuo/data/columnar/RowProcessor.class */
public class RowProcessor<T extends Output<T>> implements Configurable, Provenancable<ConfiguredObjectProvenance> {
    private static final Logger logger = Logger.getLogger(RowProcessor.class.getName());
    private static final String FEATURE_NAME_REGEX = "[@#]";
    private static final Pattern FEATURE_NAME_PATTERN = Pattern.compile(FEATURE_NAME_REGEX);

    @Config(description = "Extractors for the example metadata.")
    private List<FieldExtractor<?>> metadataExtractors;

    @Config(description = "Extractor for the example weight.")
    protected FieldExtractor<Float> weightExtractor;

    @Config(mandatory = true, description = "Processor which extracts the response.")
    protected ResponseProcessor<T> responseProcessor;

    @Config(mandatory = true, description = "The list of field processors to use.")
    private List<FieldProcessor> fieldProcessorList;
    protected Map<String, FieldProcessor> fieldProcessorMap;

    @Config(description = "A set of feature processors to apply after extraction.")
    private Set<FeatureProcessor> featureProcessors;

    @Config(description = "A map from a regex to field processors to apply to fields matching the regex.")
    protected Map<String, FieldProcessor> regexMappingProcessors;

    @Config(description = "Replace newlines with spaces in values before passing them to field processors.")
    protected boolean replaceNewlinesWithSpaces;
    protected boolean configured;

    /* loaded from: input_file:org/tribuo/data/columnar/RowProcessor$Builder.class */
    public static class Builder<T extends Output<T>> {
        private FieldExtractor<Float> weightExtractor;
        private boolean replaceNewLinesWithSpaces = true;
        private List<FieldExtractor<?>> metadataExtractors = new ArrayList();
        private Set<FeatureProcessor> featureProcessors = new HashSet();
        private Map<String, FieldProcessor> regexMappingProcessors = new HashMap();
        private Map<String, FieldProcessor> fieldProcessors = new HashMap();

        public Builder<T> setReplaceNewLinesWithSpaces(boolean z) {
            this.replaceNewLinesWithSpaces = z;
            return this;
        }

        public Builder<T> setWeightExtractor(FieldExtractor<Float> fieldExtractor) {
            this.weightExtractor = fieldExtractor;
            return this;
        }

        public Builder<T> setMetadataExtractors(List<FieldExtractor<?>> list) {
            this.metadataExtractors = list;
            return this;
        }

        public Builder<T> addMetadataExtractor(FieldExtractor<?> fieldExtractor) {
            this.metadataExtractors.add(fieldExtractor);
            return this;
        }

        public Builder<T> setFeatureProcessors(Set<FeatureProcessor> set) {
            this.featureProcessors = set;
            return this;
        }

        public Builder<T> addFeatureProcessor(FeatureProcessor featureProcessor) {
            this.featureProcessors.add(featureProcessor);
            return this;
        }

        public Builder<T> addFieldProcessor(FieldProcessor fieldProcessor) {
            if (this.fieldProcessors.containsKey(fieldProcessor.getFieldName())) {
                RowProcessor.logger.warning("Field name " + fieldProcessor.getFieldName() + " aleady present, overwriting");
            }
            this.fieldProcessors.put(fieldProcessor.getFieldName(), fieldProcessor);
            return this;
        }

        public Builder<T> setFieldProcessors(Iterable<FieldProcessor> iterable) {
            HashMap hashMap = new HashMap();
            for (FieldProcessor fieldProcessor : iterable) {
                if (hashMap.containsKey(fieldProcessor.getFieldName())) {
                    throw new IllegalArgumentException("Duplicate field name " + fieldProcessor.getFieldName());
                }
                hashMap.put(fieldProcessor.getFieldName(), fieldProcessor);
            }
            this.fieldProcessors = hashMap;
            return this;
        }

        public Optional<FieldProcessor> getFieldProcessor(String str) {
            return Optional.ofNullable(this.fieldProcessors.get(str));
        }

        public Builder<T> setRegexMappingProcessors(Map<String, FieldProcessor> map) {
            this.regexMappingProcessors = map;
            return this;
        }

        public Optional<FieldProcessor> getRegexFieldProcessor(String str) {
            return Optional.ofNullable(this.regexMappingProcessors.get(str));
        }

        public Builder<T> addRegexMappingProcessor(String str, FieldProcessor fieldProcessor) {
            if (this.regexMappingProcessors.containsKey(str)) {
                RowProcessor.logger.warning("Regex pattern " + str + " already present, overwriting");
            }
            this.regexMappingProcessors.put(str, fieldProcessor);
            return this;
        }

        public RowProcessor<T> build(ResponseProcessor<T> responseProcessor) {
            if (this.fieldProcessors.isEmpty() && this.regexMappingProcessors.isEmpty()) {
                throw new IllegalArgumentException("At least one FieldProcessor must be present");
            }
            return new RowProcessor<>(this.metadataExtractors, this.weightExtractor, responseProcessor, this.fieldProcessors, this.regexMappingProcessors, this.featureProcessors, this.replaceNewLinesWithSpaces);
        }
    }

    public RowProcessor(ResponseProcessor<T> responseProcessor, Map<String, FieldProcessor> map) {
        this(Collections.emptyList(), null, responseProcessor, map, Collections.emptySet());
    }

    public RowProcessor(ResponseProcessor<T> responseProcessor, Map<String, FieldProcessor> map, Set<FeatureProcessor> set) {
        this(Collections.emptyList(), null, responseProcessor, map, set);
    }

    @Deprecated
    public RowProcessor(List<FieldExtractor<?>> list, ResponseProcessor<T> responseProcessor, Map<String, FieldProcessor> map) {
        this(list, null, responseProcessor, map, Collections.emptySet());
    }

    @Deprecated
    public RowProcessor(List<FieldExtractor<?>> list, FieldExtractor<Float> fieldExtractor, ResponseProcessor<T> responseProcessor, Map<String, FieldProcessor> map, Set<FeatureProcessor> set) {
        this(list, fieldExtractor, responseProcessor, map, Collections.emptyMap(), set, true);
    }

    @Deprecated
    public RowProcessor(List<FieldExtractor<?>> list, FieldExtractor<Float> fieldExtractor, ResponseProcessor<T> responseProcessor, Map<String, FieldProcessor> map, Map<String, FieldProcessor> map2, Set<FeatureProcessor> set) {
        this(list, fieldExtractor, responseProcessor, map, map2, set, true);
    }

    @Deprecated
    public RowProcessor(List<FieldExtractor<?>> list, FieldExtractor<Float> fieldExtractor, ResponseProcessor<T> responseProcessor, Map<String, FieldProcessor> map, Map<String, FieldProcessor> map2, Set<FeatureProcessor> set, boolean z) {
        this.metadataExtractors = Collections.emptyList();
        this.weightExtractor = null;
        this.featureProcessors = new HashSet();
        this.regexMappingProcessors = new HashMap();
        this.replaceNewlinesWithSpaces = true;
        this.metadataExtractors = list.isEmpty() ? Collections.emptyList() : new ArrayList<>(list);
        this.weightExtractor = fieldExtractor;
        this.responseProcessor = responseProcessor;
        this.fieldProcessorMap = new HashMap(map);
        this.regexMappingProcessors = map2.isEmpty() ? Collections.emptyMap() : new HashMap<>(map2);
        this.featureProcessors.addAll(set);
        this.replaceNewlinesWithSpaces = z;
        postConfig();
    }

    protected RowProcessor() {
        this.metadataExtractors = Collections.emptyList();
        this.weightExtractor = null;
        this.featureProcessors = new HashSet();
        this.regexMappingProcessors = new HashMap();
        this.replaceNewlinesWithSpaces = true;
    }

    public void postConfig() {
        this.configured = this.regexMappingProcessors.isEmpty();
        if (this.fieldProcessorList != null) {
            this.fieldProcessorMap = (Map) this.fieldProcessorList.stream().collect(Collectors.toMap((v0) -> {
                return v0.getFieldName();
            }, Function.identity()));
        } else {
            this.fieldProcessorList = new ArrayList();
            this.fieldProcessorList.addAll(this.fieldProcessorMap.values());
        }
        HashSet hashSet = new HashSet();
        Iterator<FieldExtractor<?>> it = this.metadataExtractors.iterator();
        while (it.hasNext()) {
            String metadataName = it.next().getMetadataName();
            if (hashSet.contains(metadataName)) {
                throw new PropertyException("", "metadataExtractors", "Two metadata extractors found referencing the same metadata name '" + metadataName + "'");
            }
            hashSet.add(metadataName);
        }
    }

    public ResponseProcessor<T> getResponseProcessor() {
        return this.responseProcessor;
    }

    public Map<String, FieldProcessor> getFieldProcessors() {
        return Collections.unmodifiableMap(this.fieldProcessorMap);
    }

    public Set<FeatureProcessor> getFeatureProcessors() {
        return Collections.unmodifiableSet(this.featureProcessors);
    }

    public Optional<Example<T>> generateExample(ColumnarIterator.Row row, boolean z) {
        Optional<T> process = this.responseProcessor.process((List<String>) this.responseProcessor.getFieldNames().stream().map(str -> {
            return row.getRowData().getOrDefault(str, "");
        }).collect(Collectors.toList()));
        if (!process.isPresent() && z) {
            return Optional.empty();
        }
        List<ColumnarFeature> generateFeatures = generateFeatures(row.getRowData());
        if (generateFeatures.isEmpty()) {
            logger.warning(String.format("Row %d empty of features, omitting", Long.valueOf(row.getIndex())));
            return Optional.empty();
        }
        Output output = (Output) process.orElse(this.responseProcessor.getOutputFactory().getUnknownOutput());
        Map<String, Object> generateMetadata = generateMetadata(row);
        ArrayExample arrayExample = this.weightExtractor == null ? new ArrayExample(output, generateMetadata) : new ArrayExample(output, this.weightExtractor.extract(row).orElse(Float.valueOf(1.0f)).floatValue(), generateMetadata);
        arrayExample.addAll(generateFeatures);
        return Optional.of(arrayExample);
    }

    public Optional<Example<T>> generateExample(Map<String, String> map, boolean z) {
        return generateExample(-1L, map, z);
    }

    public Optional<Example<T>> generateExample(long j, Map<String, String> map, boolean z) {
        return generateExample(new ColumnarIterator.Row(j, new ArrayList(map.keySet()), map), z);
    }

    public Map<String, Object> generateMetadata(ColumnarIterator.Row row) {
        if (this.metadataExtractors.isEmpty()) {
            return Collections.emptyMap();
        }
        HashMap hashMap = new HashMap();
        long index = row.getIndex();
        for (FieldExtractor<?> fieldExtractor : this.metadataExtractors) {
            String metadataName = fieldExtractor.getMetadataName();
            Optional<?> extract = fieldExtractor.extract(row);
            if (extract.isPresent()) {
                hashMap.put(metadataName, extract.get());
            } else {
                logger.warning("Failed to extract field with name " + metadataName + " from index " + index);
            }
        }
        return hashMap;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public List<ColumnarFeature> generateFeatures(Map<String, String> map) {
        if (!this.configured) {
            throw new IllegalStateException("expandRegexMapping not called, yet there are entries in regexMappingProcessors which have not been bound to a field name.");
        }
        List arrayList = new ArrayList();
        for (Map.Entry<String, FieldProcessor> entry : this.fieldProcessorMap.entrySet()) {
            String str = map.get(entry.getKey());
            if (str != null) {
                if (this.replaceNewlinesWithSpaces) {
                    str = str.replace('\n', ' ');
                }
                arrayList.addAll(entry.getValue().process(str.trim()));
            }
        }
        Iterator<FeatureProcessor> it = this.featureProcessors.iterator();
        while (it.hasNext()) {
            arrayList = it.next().process(arrayList);
        }
        return arrayList;
    }

    public Set<String> getColumnNames() {
        return Collections.unmodifiableSet(this.fieldProcessorMap.keySet());
    }

    public String getDescription() {
        String fieldExtractor = this.weightExtractor == null ? "null" : this.weightExtractor.toString();
        return (this.configured || this.regexMappingProcessors.isEmpty()) ? "RowProcessor(responseProcessor=" + this.responseProcessor.toString() + ",fieldProcessorMap=" + this.fieldProcessorMap.toString() + ",featureProcessors=" + this.featureProcessors.toString() + ",metadataExtractors=" + this.metadataExtractors.toString() + ",weightExtractor=" + fieldExtractor + ")" : "RowProcessor(responseProcessor=" + this.responseProcessor.toString() + ",fieldProcessorMap=" + this.fieldProcessorMap.toString() + ",regexMappingProcessors=" + this.regexMappingProcessors.toString() + ",featureProcessors=" + this.featureProcessors.toString() + ",metadataExtractors=" + this.metadataExtractors.toString() + ",weightExtractor=" + fieldExtractor + ")";
    }

    public String toString() {
        return getDescription();
    }

    public Map<String, Class<?>> getMetadataTypes() {
        if (this.metadataExtractors.isEmpty()) {
            return Collections.emptyMap();
        }
        HashMap hashMap = new HashMap();
        for (FieldExtractor<?> fieldExtractor : this.metadataExtractors) {
            hashMap.put(fieldExtractor.getMetadataName(), fieldExtractor.getValueType());
        }
        return hashMap;
    }

    public boolean isConfigured() {
        return this.configured;
    }

    public void expandRegexMapping(Model<T> model) {
        expandRegexMapping(model.getFeatureIDMap());
    }

    public void expandRegexMapping(ImmutableFeatureMap immutableFeatureMap) {
        ArrayList arrayList = new ArrayList(immutableFeatureMap.size());
        Iterator it = immutableFeatureMap.iterator();
        while (it.hasNext()) {
            arrayList.add(FEATURE_NAME_PATTERN.split(((VariableInfo) it.next()).getName(), 1)[0]);
        }
        expandRegexMapping(arrayList);
    }

    public void expandRegexMapping(Collection<String> collection) {
        if (this.configured) {
            logger.warning("RowProcessor was already configured, yet expandRegexMapping was called with " + collection.toString());
            return;
        }
        Set<String> partialExpandRegexMapping = partialExpandRegexMapping(collection);
        if (partialExpandRegexMapping.size() != this.regexMappingProcessors.size()) {
            throw new IllegalArgumentException("Failed to match all the regexes, found " + partialExpandRegexMapping.size() + ", required " + this.regexMappingProcessors.size());
        }
        this.regexMappingProcessors.clear();
        this.configured = true;
    }

    protected Set<String> partialExpandRegexMapping(Collection<String> collection) {
        HashSet hashSet = new HashSet();
        for (Map.Entry<String, FieldProcessor> entry : this.regexMappingProcessors.entrySet()) {
            Pattern compile = Pattern.compile(entry.getKey());
            for (String str : collection) {
                if (compile.matcher(str).matches()) {
                    FieldProcessor copy = entry.getValue().copy(str);
                    this.fieldProcessorList.add(copy);
                    FieldProcessor put = this.fieldProcessorMap.put(str, copy);
                    if (put != null) {
                        throw new IllegalArgumentException("Regex " + compile.toString() + " matched field " + str + " which already had a field processor " + put.toString());
                    }
                    hashSet.add(entry.getKey());
                }
            }
        }
        return hashSet;
    }

    @Deprecated
    public RowProcessor<T> copy() {
        return new RowProcessor<>(this.metadataExtractors, this.weightExtractor, this.responseProcessor, this.fieldProcessorMap, this.regexMappingProcessors, this.featureProcessors, this.replaceNewlinesWithSpaces);
    }

    /* renamed from: getProvenance, reason: merged with bridge method [inline-methods] */
    public ConfiguredObjectProvenance m10getProvenance() {
        return new ConfiguredObjectProvenanceImpl(this, "RowProcessor");
    }
}
