package org.tribuo.data.text.impl;

import com.oracle.labs.mlrg.olcut.provenance.ObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.PrimitiveProvenance;
import com.oracle.labs.mlrg.olcut.provenance.Provenance;
import com.oracle.labs.mlrg.olcut.provenance.ProvenanceUtil;
import com.oracle.labs.mlrg.olcut.provenance.impl.SkeletalConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.primitives.DateTimeProvenance;
import com.oracle.labs.mlrg.olcut.provenance.primitives.HashProvenance;
import com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.tribuo.Example;
import org.tribuo.Output;
import org.tribuo.OutputFactory;
import org.tribuo.data.text.DocumentPreprocessor;
import org.tribuo.data.text.TextDataSource;
import org.tribuo.data.text.TextFeatureExtractor;
import org.tribuo.provenance.ConfiguredDataSourceProvenance;

/* loaded from: input_file:org/tribuo/data/text/impl/SimpleTextDataSource.class */
public class SimpleTextDataSource<T extends Output<T>> extends TextDataSource<T> {
    private static final Logger logger = Logger.getLogger(SimpleTextDataSource.class.getName());
    private static final Pattern splitPattern = Pattern.compile("##");
    protected ConfiguredDataSourceProvenance provenance;

    /* loaded from: input_file:org/tribuo/data/text/impl/SimpleTextDataSource$SimpleTextDataSourceProvenance.class */
    public static class SimpleTextDataSourceProvenance extends SkeletalConfiguredObjectProvenance implements ConfiguredDataSourceProvenance {
        private static final long serialVersionUID = 1;
        private final DateTimeProvenance fileModifiedTime;
        private final DateTimeProvenance dataSourceCreationTime;
        private final HashProvenance sha256Hash;

        <T extends Output<T>> SimpleTextDataSourceProvenance(SimpleTextDataSource<T> simpleTextDataSource) {
            super(simpleTextDataSource, "DataSource");
            this.fileModifiedTime = new DateTimeProvenance("file-modified-time", OffsetDateTime.ofInstant(Instant.ofEpochMilli(((SimpleTextDataSource) simpleTextDataSource).path.toFile().lastModified()), ZoneId.systemDefault()));
            this.dataSourceCreationTime = new DateTimeProvenance("datasource-creation-time", OffsetDateTime.now());
            this.sha256Hash = new HashProvenance(DEFAULT_HASH_TYPE, "resource-hash", ProvenanceUtil.hashResource(DEFAULT_HASH_TYPE, ((SimpleTextDataSource) simpleTextDataSource).path));
        }

        public SimpleTextDataSourceProvenance(Map<String, Provenance> map) {
            this(extractProvenanceInfo(map));
        }

        private SimpleTextDataSourceProvenance(SkeletalConfiguredObjectProvenance.ExtractedInfo extractedInfo) {
            super(extractedInfo);
            this.fileModifiedTime = (DateTimeProvenance) extractedInfo.instanceValues.get("file-modified-time");
            this.dataSourceCreationTime = (DateTimeProvenance) extractedInfo.instanceValues.get("datasource-creation-time");
            this.sha256Hash = (HashProvenance) extractedInfo.instanceValues.get("resource-hash");
        }

        protected static SkeletalConfiguredObjectProvenance.ExtractedInfo extractProvenanceInfo(Map<String, Provenance> map) {
            HashMap hashMap = new HashMap(map);
            String value = ObjectProvenance.checkAndExtractProvenance(hashMap, "class-name", StringProvenance.class, SimpleTextDataSourceProvenance.class.getSimpleName()).getValue();
            Optional maybeExtractProvenance = ObjectProvenance.maybeExtractProvenance(hashMap, "host-short-name", StringProvenance.class, SimpleTextDataSourceProvenance.class.getSimpleName());
            String value2 = maybeExtractProvenance.isPresent() ? ((StringProvenance) maybeExtractProvenance.get()).getValue() : "DataSource";
            HashMap hashMap2 = new HashMap();
            hashMap2.put("file-modified-time", ObjectProvenance.checkAndExtractProvenance(hashMap, "file-modified-time", DateTimeProvenance.class, SimpleTextDataSourceProvenance.class.getSimpleName()));
            hashMap2.put("datasource-creation-time", ObjectProvenance.checkAndExtractProvenance(hashMap, "datasource-creation-time", DateTimeProvenance.class, SimpleTextDataSourceProvenance.class.getSimpleName()));
            hashMap2.put("resource-hash", ObjectProvenance.checkAndExtractProvenance(hashMap, "resource-hash", HashProvenance.class, SimpleTextDataSourceProvenance.class.getSimpleName()));
            return new SkeletalConfiguredObjectProvenance.ExtractedInfo(value, value2, hashMap, hashMap2);
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (!(obj instanceof SimpleTextDataSourceProvenance) || !super.equals(obj)) {
                return false;
            }
            SimpleTextDataSourceProvenance simpleTextDataSourceProvenance = (SimpleTextDataSourceProvenance) obj;
            return this.fileModifiedTime.equals(simpleTextDataSourceProvenance.fileModifiedTime) && this.dataSourceCreationTime.equals(simpleTextDataSourceProvenance.dataSourceCreationTime) && this.sha256Hash.equals(simpleTextDataSourceProvenance.sha256Hash);
        }

        public int hashCode() {
            return Objects.hash(Integer.valueOf(super.hashCode()), this.fileModifiedTime, this.dataSourceCreationTime, this.sha256Hash);
        }

        public Map<String, PrimitiveProvenance<?>> getInstanceValues() {
            Map<String, PrimitiveProvenance<?>> instanceValues = super.getInstanceValues();
            instanceValues.put("file-modified-time", this.fileModifiedTime);
            instanceValues.put("datasource-creation-time", this.dataSourceCreationTime);
            instanceValues.put("resource-hash", this.sha256Hash);
            return instanceValues;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public SimpleTextDataSource() {
    }

    public SimpleTextDataSource(Path path, OutputFactory<T> outputFactory, TextFeatureExtractor<T> textFeatureExtractor) throws IOException {
        super(path, outputFactory, textFeatureExtractor, new DocumentPreprocessor[0]);
        postConfig();
    }

    public SimpleTextDataSource(File file, OutputFactory<T> outputFactory, TextFeatureExtractor<T> textFeatureExtractor) throws IOException {
        super(file, outputFactory, textFeatureExtractor, new DocumentPreprocessor[0]);
        postConfig();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public SimpleTextDataSource(OutputFactory<T> outputFactory, TextFeatureExtractor<T> textFeatureExtractor) {
        super((Path) null, outputFactory, textFeatureExtractor, new DocumentPreprocessor[0]);
    }

    public void postConfig() throws IOException {
        read();
        this.provenance = cacheProvenance();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* JADX WARN: Multi-variable type inference failed */
    public Optional<Example<T>> parseLine(String str, int i) {
        String trim = str.trim();
        if (trim.isEmpty()) {
            return Optional.empty();
        }
        String[] split = splitPattern.split(trim);
        if (split.length != 2) {
            logger.warning(String.format("Bad line in %s at %d: %s", this.path, Integer.valueOf(i), trim.substring(Math.min(50, trim.length()))));
            return Optional.empty();
        }
        String str2 = split[1];
        Iterator<DocumentPreprocessor> it = this.preprocessors.iterator();
        while (it.hasNext()) {
            str2 = it.next().processDoc(str2);
            if (str2 == null) {
                return Optional.empty();
            }
        }
        return Optional.of(this.extractor.extract(this.outputFactory.generateOutput(split[0].trim().toUpperCase()), handleDoc(split[1].trim())));
    }

    @Override // org.tribuo.data.text.TextDataSource
    protected void read() throws IOException {
        int i = 0;
        Iterator<String> it = Files.readAllLines(this.path, StandardCharsets.UTF_8).iterator();
        while (it.hasNext()) {
            i++;
            Optional<Example<T>> parseLine = parseLine(it.next(), i);
            if (parseLine.isPresent()) {
                Example<T> example = parseLine.get();
                if (example.validateExample()) {
                    this.data.add(example);
                } else {
                    logger.warning("Invalid example found after parsing line " + i);
                }
            }
        }
    }

    /* renamed from: getProvenance, reason: merged with bridge method [inline-methods] */
    public ConfiguredDataSourceProvenance m68getProvenance() {
        return this.provenance;
    }

    protected ConfiguredDataSourceProvenance cacheProvenance() {
        return new SimpleTextDataSourceProvenance(this);
    }
}
