package org.apache.paimon.format.parquet.newreader;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.data.columnar.ColumnVector;
import org.apache.paimon.data.columnar.heap.CastedArrayColumnVector;
import org.apache.paimon.data.columnar.heap.CastedMapColumnVector;
import org.apache.paimon.data.columnar.heap.CastedRowColumnVector;
import org.apache.paimon.data.columnar.heap.HeapArrayVector;
import org.apache.paimon.data.columnar.heap.HeapMapVector;
import org.apache.paimon.data.columnar.heap.HeapRowVector;
import org.apache.paimon.data.columnar.writable.WritableColumnVector;
import org.apache.paimon.format.parquet.reader.ParquetDecimalVector;
import org.apache.paimon.format.parquet.reader.ParquetTimestampVector;
import org.apache.paimon.format.parquet.type.ParquetField;
import org.apache.paimon.format.parquet.type.ParquetPrimitiveField;
import org.apache.paimon.fs.Path;
import org.apache.paimon.reader.FileRecordIterator;
import org.apache.paimon.reader.FileRecordReader;
import org.apache.paimon.shade.org.apache.parquet.VersionParser;
import org.apache.paimon.shade.org.apache.parquet.column.ColumnDescriptor;
import org.apache.paimon.shade.org.apache.parquet.column.page.PageReadStore;
import org.apache.paimon.shade.org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.paimon.shade.org.apache.parquet.schema.GroupType;
import org.apache.paimon.shade.org.apache.parquet.schema.MessageType;
import org.apache.paimon.shade.org.apache.parquet.schema.Type;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.MultisetType;
import org.apache.paimon.types.RowType;

/* loaded from: input_file:org/apache/paimon/format/parquet/newreader/VectorizedParquetRecordReader.class */
public class VectorizedParquetRecordReader implements FileRecordReader<InternalRow> {
    private ParquetFileReader reader;
    private final int batchSize;
    private final long totalRowCount;
    private long rowsReturned;
    private ParquetColumnVector[] columnVectors;
    private ColumnarBatch columnarBatch;
    private final Path filePath;
    private final MessageType fileSchema;
    private final List<ParquetField> fields;
    private Set<ParquetField> missingColumns;
    private VersionParser.ParsedVersion writerVersion;
    private long totalCountLoadedSoFar = 0;
    private final RowIndexGenerator rowIndexGenerator = new RowIndexGenerator();

    public VectorizedParquetRecordReader(Path path, ParquetFileReader parquetFileReader, MessageType messageType, List<ParquetField> list, WritableColumnVector[] writableColumnVectorArr, int i) throws IOException {
        this.filePath = path;
        this.reader = parquetFileReader;
        this.fileSchema = messageType;
        this.fields = list;
        this.totalRowCount = parquetFileReader.getFilteredRecordCount();
        this.batchSize = i;
        try {
            this.writerVersion = VersionParser.parse(parquetFileReader.getFileMetaData().getCreatedBy());
        } catch (Exception e) {
        }
        checkMissingColumns();
        initBatch(writableColumnVectorArr);
    }

    private void initBatch(WritableColumnVector[] writableColumnVectorArr) {
        this.columnarBatch = new ColumnarBatch(this.filePath, createVectorizedColumnBatch((List) this.fields.stream().map((v0) -> {
            return v0.getType();
        }).collect(Collectors.toList()), writableColumnVectorArr));
        this.columnVectors = new ParquetColumnVector[this.fields.size()];
        for (int i = 0; i < this.columnVectors.length; i++) {
            this.columnVectors[i] = new ParquetColumnVector(this.fields.get(i), writableColumnVectorArr[i], this.batchSize, this.missingColumns, true);
        }
    }

    private ColumnVector[] createVectorizedColumnBatch(List<DataType> list, WritableColumnVector[] writableColumnVectorArr) {
        ColumnVector[] columnVectorArr = new ColumnVector[writableColumnVectorArr.length];
        for (int i = 0; i < writableColumnVectorArr.length; i++) {
            switch (list.get(i).getTypeRoot()) {
                case DECIMAL:
                    columnVectorArr[i] = new ParquetDecimalVector(writableColumnVectorArr[i]);
                    break;
                case TIMESTAMP_WITHOUT_TIME_ZONE:
                case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
                    columnVectorArr[i] = new ParquetTimestampVector(writableColumnVectorArr[i]);
                    break;
                case ARRAY:
                    HeapArrayVector heapArrayVector = (HeapArrayVector) writableColumnVectorArr[i];
                    List<DataType> singletonList = Collections.singletonList(((ArrayType) list.get(i)).getElementType());
                    Stream stream = Arrays.stream(writableColumnVectorArr[i].getChildren());
                    Class<WritableColumnVector> cls = WritableColumnVector.class;
                    WritableColumnVector.class.getClass();
                    columnVectorArr[i] = new CastedArrayColumnVector(heapArrayVector, createVectorizedColumnBatch(singletonList, (WritableColumnVector[]) stream.map((v1) -> {
                        return r8.cast(v1);
                    }).toArray(i2 -> {
                        return new WritableColumnVector[i2];
                    })));
                    break;
                case MAP:
                    MapType mapType = (MapType) list.get(i);
                    HeapMapVector heapMapVector = (HeapMapVector) writableColumnVectorArr[i];
                    List<DataType> asList = Arrays.asList(mapType.getKeyType(), mapType.getValueType());
                    Stream stream2 = Arrays.stream(writableColumnVectorArr[i].getChildren());
                    Class<WritableColumnVector> cls2 = WritableColumnVector.class;
                    WritableColumnVector.class.getClass();
                    columnVectorArr[i] = new CastedMapColumnVector(heapMapVector, createVectorizedColumnBatch(asList, (WritableColumnVector[]) stream2.map((v1) -> {
                        return r8.cast(v1);
                    }).toArray(i3 -> {
                        return new WritableColumnVector[i3];
                    })));
                    break;
                case MULTISET:
                    MultisetType multisetType = (MultisetType) list.get(i);
                    HeapMapVector heapMapVector2 = (HeapMapVector) writableColumnVectorArr[i];
                    List<DataType> asList2 = Arrays.asList(multisetType.getElementType(), multisetType.getElementType());
                    Stream stream3 = Arrays.stream(writableColumnVectorArr[i].getChildren());
                    Class<WritableColumnVector> cls3 = WritableColumnVector.class;
                    WritableColumnVector.class.getClass();
                    columnVectorArr[i] = new CastedMapColumnVector(heapMapVector2, createVectorizedColumnBatch(asList2, (WritableColumnVector[]) stream3.map((v1) -> {
                        return r8.cast(v1);
                    }).toArray(i4 -> {
                        return new WritableColumnVector[i4];
                    })));
                    break;
                case ROW:
                    RowType rowType = (RowType) list.get(i);
                    HeapRowVector heapRowVector = (HeapRowVector) writableColumnVectorArr[i];
                    List<DataType> fieldTypes = rowType.getFieldTypes();
                    Stream stream4 = Arrays.stream(writableColumnVectorArr[i].getChildren());
                    Class<WritableColumnVector> cls4 = WritableColumnVector.class;
                    WritableColumnVector.class.getClass();
                    columnVectorArr[i] = new CastedRowColumnVector(heapRowVector, createVectorizedColumnBatch(fieldTypes, (WritableColumnVector[]) stream4.map((v1) -> {
                        return r8.cast(v1);
                    }).toArray(i5 -> {
                        return new WritableColumnVector[i5];
                    })));
                    break;
                default:
                    columnVectorArr[i] = writableColumnVectorArr[i];
                    break;
            }
        }
        return columnVectorArr;
    }

    private void checkMissingColumns() throws IOException {
        this.missingColumns = new HashSet();
        Iterator<ParquetField> it = this.fields.iterator();
        while (it.hasNext()) {
            checkColumn(it.next());
        }
    }

    private void checkColumn(ParquetField parquetField) throws IOException {
        String[] path = parquetField.path();
        if (!containsPath(this.fileSchema, path, 0)) {
            if (parquetField.isRequired()) {
                throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(path));
            }
            this.missingColumns.add(parquetField);
        } else if (parquetField.isPrimitive()) {
            ColumnDescriptor descriptor = ((ParquetPrimitiveField) parquetField).getDescriptor();
            if (!this.fileSchema.getColumnDescription(descriptor.getPath()).equals(descriptor)) {
                throw new IOException("Schema evolution not supported.");
            }
        }
    }

    private boolean containsPath(Type type, String[] strArr, int i) {
        if (strArr.length == i) {
            return true;
        }
        if (!(type instanceof GroupType)) {
            return false;
        }
        GroupType asGroupType = type.asGroupType();
        String str = strArr[i];
        if (asGroupType.containsField(str)) {
            return containsPath(asGroupType.getType(str), strArr, i + 1);
        }
        return false;
    }

    public boolean nextBatch() throws IOException {
        if (this.rowsReturned >= this.totalRowCount) {
            return false;
        }
        for (ParquetColumnVector parquetColumnVector : this.columnVectors) {
            parquetColumnVector.reset();
        }
        this.columnarBatch.setNumRows(0);
        checkEndOfRowGroup();
        int min = (int) Math.min(this.batchSize, this.totalCountLoadedSoFar - this.rowsReturned);
        for (ParquetColumnVector parquetColumnVector2 : this.columnVectors) {
            for (ParquetColumnVector parquetColumnVector3 : parquetColumnVector2.getLeaves()) {
                VectorizedColumnReader columnReader = parquetColumnVector3.getColumnReader();
                if (columnReader != null) {
                    columnReader.readBatch(min, parquetColumnVector3.getColumn().getType(), parquetColumnVector3.getValueVector(), parquetColumnVector3.getRepetitionLevelVector(), parquetColumnVector3.getDefinitionLevelVector());
                }
            }
            parquetColumnVector2.assemble();
        }
        this.rowsReturned += min;
        this.columnarBatch.setNumRows(min);
        this.rowIndexGenerator.populateRowIndex(this.columnarBatch);
        return true;
    }

    private void checkEndOfRowGroup() throws IOException {
        if (this.rowsReturned != this.totalCountLoadedSoFar) {
            return;
        }
        PageReadStore readNextFilteredRowGroup = this.reader.readNextFilteredRowGroup();
        if (readNextFilteredRowGroup == null) {
            throw new IOException("expecting more rows but reached last block. Read " + this.rowsReturned + " out of " + this.totalRowCount);
        }
        this.rowIndexGenerator.initFromPageReadStore(readNextFilteredRowGroup);
        for (ParquetColumnVector parquetColumnVector : this.columnVectors) {
            initColumnReader(readNextFilteredRowGroup, parquetColumnVector);
        }
        this.totalCountLoadedSoFar += readNextFilteredRowGroup.getRowCount();
    }

    private void initColumnReader(PageReadStore pageReadStore, ParquetColumnVector parquetColumnVector) throws IOException {
        if (this.missingColumns.contains(parquetColumnVector.getColumn())) {
            return;
        }
        if (parquetColumnVector.getColumn().isPrimitive()) {
            ParquetField column = parquetColumnVector.getColumn();
            parquetColumnVector.setColumnReader(new VectorizedColumnReader(((ParquetPrimitiveField) column).getDescriptor(), column.isRequired(), pageReadStore, this.writerVersion));
        } else {
            Iterator<ParquetColumnVector> it = parquetColumnVector.getChildren().iterator();
            while (it.hasNext()) {
                initColumnReader(pageReadStore, it.next());
            }
        }
    }

    @Override // org.apache.paimon.reader.FileRecordReader, org.apache.paimon.reader.RecordReader
    @Nullable
    public FileRecordIterator<InternalRow> readBatch() throws IOException {
        if (nextBatch()) {
            return this.columnarBatch.vectorizedRowIterator;
        }
        return null;
    }

    @Override // org.apache.paimon.reader.RecordReader, java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        if (this.reader != null) {
            this.reader.close();
            this.reader = null;
        }
    }
}
