package org.apache.paimon.format.orc;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import javax.annotation.concurrent.ThreadSafe;
import org.apache.hadoop.conf.Configuration;
import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.format.FileFormat;
import org.apache.paimon.format.FileFormatFactory;
import org.apache.paimon.format.FormatReaderFactory;
import org.apache.paimon.format.FormatWriterFactory;
import org.apache.paimon.format.SimpleStatsExtractor;
import org.apache.paimon.format.orc.filter.OrcPredicateFunctionVisitor;
import org.apache.paimon.format.orc.filter.OrcSimpleStatsExtractor;
import org.apache.paimon.format.orc.reader.OrcSplitReaderUtil;
import org.apache.paimon.format.orc.writer.RowDataVectorizer;
import org.apache.paimon.options.MemorySize;
import org.apache.paimon.options.Options;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.shade.org.apache.orc.OrcConf;
import org.apache.paimon.statistics.SimpleColStatsCollector;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.DataTypeChecks;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.IntType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.MultisetType;
import org.apache.paimon.types.RowType;

@ThreadSafe
/* loaded from: input_file:org/apache/paimon/format/orc/OrcFileFormat.class */
public class OrcFileFormat extends FileFormat {
    public static final String IDENTIFIER = "orc";
    private final Properties orcProperties;
    private final Configuration readerConf;
    private final Configuration writerConf;
    private final int readBatchSize;

    public OrcFileFormat(FileFormatFactory.FormatContext formatContext) {
        super("orc");
        this.orcProperties = getOrcProperties(formatContext.formatOptions(), formatContext);
        this.readerConf = new Configuration();
        this.orcProperties.forEach((obj, obj2) -> {
            this.readerConf.set(obj.toString(), obj2.toString());
        });
        this.writerConf = new Configuration();
        this.orcProperties.forEach((obj3, obj4) -> {
            this.writerConf.set(obj3.toString(), obj4.toString());
        });
        this.readBatchSize = formatContext.readBatchSize();
    }

    @VisibleForTesting
    public Properties orcProperties() {
        return this.orcProperties;
    }

    @VisibleForTesting
    public int readBatchSize() {
        return this.readBatchSize;
    }

    @Override // org.apache.paimon.format.FileFormat
    public Optional<SimpleStatsExtractor> createStatsExtractor(RowType rowType, SimpleColStatsCollector.Factory[] factoryArr) {
        return Optional.of(new OrcSimpleStatsExtractor(rowType, factoryArr));
    }

    @Override // org.apache.paimon.format.FileFormat
    public FormatReaderFactory createReaderFactory(RowType rowType, @Nullable List<Predicate> list) {
        ArrayList arrayList = new ArrayList();
        if (list != null) {
            Iterator<Predicate> it = list.iterator();
            while (it.hasNext()) {
                Optional optional = (Optional) it.next().visit(OrcPredicateFunctionVisitor.VISITOR);
                arrayList.getClass();
                optional.ifPresent((v1) -> {
                    r1.add(v1);
                });
            }
        }
        return new OrcReaderFactory(this.readerConf, (RowType) refineDataType(rowType), arrayList, this.readBatchSize);
    }

    @Override // org.apache.paimon.format.FileFormat
    public void validateDataFields(RowType rowType) {
        OrcSplitReaderUtil.toOrcType(refineDataType(rowType));
    }

    @Override // org.apache.paimon.format.FileFormat
    public FormatWriterFactory createWriterFactory(RowType rowType) {
        DataType refineDataType = refineDataType(rowType);
        return new OrcWriterFactory(new RowDataVectorizer(OrcSplitReaderUtil.toOrcType(refineDataType).toString(), (DataType[]) DataTypeChecks.getFieldTypes(refineDataType).toArray(new DataType[0])), this.orcProperties, this.writerConf);
    }

    private static Properties getOrcProperties(Options options, FileFormatFactory.FormatContext formatContext) {
        Properties properties = new Properties();
        Properties properties2 = new Properties();
        options.addAllToProperties(properties2);
        properties2.forEach((obj, obj2) -> {
            properties.put("orc." + obj, obj2);
        });
        if (!properties.containsKey(OrcConf.COMPRESSION_ZSTD_LEVEL.getAttribute())) {
            properties.setProperty(OrcConf.COMPRESSION_ZSTD_LEVEL.getAttribute(), String.valueOf(formatContext.zstdLevel()));
        }
        MemorySize blockSize = formatContext.blockSize();
        if (blockSize != null) {
            properties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), String.valueOf(blockSize.getBytes()));
        }
        return properties;
    }

    public static DataType refineDataType(DataType dataType) {
        switch (dataType.getTypeRoot()) {
            case BINARY:
            case VARBINARY:
                return DataTypes.BYTES();
            case ARRAY:
                ArrayType arrayType = (ArrayType) dataType;
                return new ArrayType(arrayType.isNullable(), refineDataType(arrayType.getElementType()));
            case MAP:
                MapType mapType = (MapType) dataType;
                return new MapType(refineDataType(mapType.getKeyType()), refineDataType(mapType.getValueType()));
            case MULTISET:
                return new MapType(refineDataType(((MultisetType) dataType).getElementType()), refineDataType(new IntType(false)));
            case ROW:
                RowType rowType = (RowType) dataType;
                return new RowType(rowType.isNullable(), (List) rowType.getFields().stream().map(dataField -> {
                    return new DataField(dataField.id(), dataField.name(), refineDataType(dataField.type()), dataField.description());
                }).collect(Collectors.toList()));
            default:
                return dataType;
        }
    }
}
