package cern.nxcals.api.extraction.data.spark;

import cern.nxcals.api.domain.EntitySchema;
import cern.nxcals.api.domain.Partition;
import cern.nxcals.api.domain.TimeWindow;
import cern.nxcals.api.extraction.data.ExtractionUtils;
import cern.nxcals.api.extraction.data.builders.fluent.QueryData;
import cern.nxcals.common.Schemas;
import cern.nxcals.common.SystemFields;
import cern.nxcals.common.domain.EntityResource;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import java.net.URI;
import java.time.Clock;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.avro.Schema;
import org.apache.commons.collections.CollectionUtils;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:BOOT-INF/lib/nxcals-extraction-api-0.5.5.jar:cern/nxcals/api/extraction/data/spark/DatasetServiceImpl.class */
public class DatasetServiceImpl implements DatasetService {
    private static final Logger log = LoggerFactory.getLogger((Class<?>) DatasetServiceImpl.class);
    private final DatasetCreator<HBaseResource> hBaseCreator;
    private final DatasetCreator<Set<URI>> hdfsCreator;
    private final DatasetCreator<Void> emptyDataSetCreator;
    private final ColumnMapper columnMapper;
    private final Function<String, Schema.Field> stampFieldProvider;
    private final Function<String, List<Schema.Field>> systemDefinitionFieldsProvider;
    private final BiFunction<ResourcesAndQuery, Dataset<Row>, Dataset<Row>> datasetFinalizer;
    private final Clock clock;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:BOOT-INF/lib/nxcals-extraction-api-0.5.5.jar:cern/nxcals/api/extraction/data/spark/DatasetServiceImpl$ResourcePartition.class */
    public static class ResourcePartition {
        private final Partition partition;
        private final EntitySchema entitySchema;
        private final TimeWindow window;

        ResourcePartition(EntityResource entityResource, TimeWindow timeWindow) {
            this.partition = entityResource.getEntityHistory().getPartition();
            this.entitySchema = entityResource.getEntityHistory().getEntitySchema();
            this.window = timeWindow;
        }

        public Partition getPartition() {
            return this.partition;
        }

        public EntitySchema getEntitySchema() {
            return this.entitySchema;
        }

        public TimeWindow getWindow() {
            return this.window;
        }

        public boolean equals(Object obj) {
            if (obj == this) {
                return true;
            }
            if (!(obj instanceof ResourcePartition)) {
                return false;
            }
            ResourcePartition resourcePartition = (ResourcePartition) obj;
            if (!resourcePartition.canEqual(this)) {
                return false;
            }
            Partition partition = getPartition();
            Partition partition2 = resourcePartition.getPartition();
            if (partition == null) {
                if (partition2 != null) {
                    return false;
                }
            } else if (!partition.equals(partition2)) {
                return false;
            }
            EntitySchema entitySchema = getEntitySchema();
            EntitySchema entitySchema2 = resourcePartition.getEntitySchema();
            if (entitySchema == null) {
                if (entitySchema2 != null) {
                    return false;
                }
            } else if (!entitySchema.equals(entitySchema2)) {
                return false;
            }
            TimeWindow window = getWindow();
            TimeWindow window2 = resourcePartition.getWindow();
            return window == null ? window2 == null : window.equals(window2);
        }

        protected boolean canEqual(Object obj) {
            return obj instanceof ResourcePartition;
        }

        public int hashCode() {
            Partition partition = getPartition();
            int hashCode = (1 * 59) + (partition == null ? 43 : partition.hashCode());
            EntitySchema entitySchema = getEntitySchema();
            int hashCode2 = (hashCode * 59) + (entitySchema == null ? 43 : entitySchema.hashCode());
            TimeWindow window = getWindow();
            return (hashCode2 * 59) + (window == null ? 43 : window.hashCode());
        }

        public String toString() {
            return "DatasetServiceImpl.ResourcePartition(partition=" + getPartition() + ", entitySchema=" + getEntitySchema() + ", window=" + getWindow() + ")";
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:BOOT-INF/lib/nxcals-extraction-api-0.5.5.jar:cern/nxcals/api/extraction/data/spark/DatasetServiceImpl$ResourceProjection.class */
    public static class ResourceProjection {
        private final List<ColumnMapping> mappings;
        private final boolean isSingleFieldBoundVariableSearch;

        public ResourceProjection(List<ColumnMapping> list, boolean z) {
            this.mappings = list;
            this.isSingleFieldBoundVariableSearch = z;
        }

        public List<ColumnMapping> getMappings() {
            return this.mappings;
        }

        public boolean isSingleFieldBoundVariableSearch() {
            return this.isSingleFieldBoundVariableSearch;
        }

        public boolean equals(Object obj) {
            if (obj == this) {
                return true;
            }
            if (!(obj instanceof ResourceProjection)) {
                return false;
            }
            ResourceProjection resourceProjection = (ResourceProjection) obj;
            if (!resourceProjection.canEqual(this)) {
                return false;
            }
            List<ColumnMapping> mappings = getMappings();
            List<ColumnMapping> mappings2 = resourceProjection.getMappings();
            if (mappings == null) {
                if (mappings2 != null) {
                    return false;
                }
            } else if (!mappings.equals(mappings2)) {
                return false;
            }
            return isSingleFieldBoundVariableSearch() == resourceProjection.isSingleFieldBoundVariableSearch();
        }

        protected boolean canEqual(Object obj) {
            return obj instanceof ResourceProjection;
        }

        public int hashCode() {
            List<ColumnMapping> mappings = getMappings();
            return (((1 * 59) + (mappings == null ? 43 : mappings.hashCode())) * 59) + (isSingleFieldBoundVariableSearch() ? 79 : 97);
        }

        public String toString() {
            return "DatasetServiceImpl.ResourceProjection(mappings=" + getMappings() + ", isSingleFieldBoundVariableSearch=" + isSingleFieldBoundVariableSearch() + ")";
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public DatasetServiceImpl(DatasetCreator<HBaseResource> datasetCreator, DatasetCreator<Set<URI>> datasetCreator2, DatasetCreator<Void> datasetCreator3, Function<String, Schema.Field> function, Function<String, List<Schema.Field>> function2, BiFunction<ResourcesAndQuery, Dataset<Row>, Dataset<Row>> biFunction) {
        this(datasetCreator, datasetCreator2, datasetCreator3, ColumnMapper.DEFAULT_MAPPER, function, function2, biFunction, Clock.systemDefaultZone());
    }

    @Override // cern.nxcals.api.extraction.data.spark.DatasetService
    public Dataset<Row> createDataset(ResourcesAndQuery resourcesAndQuery) {
        Objects.requireNonNull(resourcesAndQuery, "Query for resources cannot be null!");
        return this.datasetFinalizer.apply(resourcesAndQuery, collectDatasetFrom(resourcesAndQuery));
    }

    private Dataset<Row> collectDatasetFrom(ResourcesAndQuery resourcesAndQuery) {
        return CollectionUtils.isEmpty(resourcesAndQuery.getResources()) ? createEmptyDataset(resourcesAndQuery) : verifyColumns(getDatasets(resourcesAndQuery, this.columnMapper.apply(resourcesAndQuery)).stream().reduce((v0, v1) -> {
            return v0.union(v1);
        }).orElse(createEmptyDataset(resourcesAndQuery)), resourcesAndQuery.getQuery().getAliasFields());
    }

    private Dataset<Row> verifyColumns(Dataset<Row> dataset, Map<String, List<String>> map) {
        for (String str : map.keySet()) {
            if (Stream.of((Object[]) dataset.columns()).noneMatch(str2 -> {
                return str2.equals(str);
            })) {
                dataset = dataset.withColumn(str, functions.lit(null));
            }
        }
        return dataset;
    }

    private Collection<Dataset<Row>> getDatasets(ResourcesAndQuery resourcesAndQuery, List<ColumnMapping> list) {
        DataSourceTimeWindow timeWindows = DataSourceTimeWindow.getTimeWindows(resourcesAndQuery.getTimeWindow(), this.clock);
        ArrayList arrayList = new ArrayList(2);
        Optional map = Optional.ofNullable(resourcesAndQuery.getTimeWindow()).map(timeWindow -> {
            return getCriteria(resourcesAndQuery, list, this::getHBaseResourcesFrom, resourcesAndQuery.getTimeWindow(), true);
        }).filter((v0) -> {
            return CollectionUtils.isNotEmpty(v0);
        }).map(this.hBaseCreator);
        arrayList.getClass();
        map.ifPresent((v1) -> {
            r1.add(v1);
        });
        Optional map2 = timeWindows.getHbaseTimeWindow().map(timeWindow2 -> {
            return getCriteria(resourcesAndQuery, list, this::getHBaseResourcesFrom, timeWindow2, false);
        }).filter((v0) -> {
            return CollectionUtils.isNotEmpty(v0);
        }).map(this.hBaseCreator);
        arrayList.getClass();
        map2.ifPresent((v1) -> {
            r1.add(v1);
        });
        Optional map3 = timeWindows.getHdfsTimeWindow().map(timeWindow3 -> {
            return getCriteria(resourcesAndQuery, list, this::getHdfsResourcesFrom, timeWindow3, false);
        }).filter((v0) -> {
            return CollectionUtils.isNotEmpty(v0);
        }).map(this.hdfsCreator);
        arrayList.getClass();
        map3.ifPresent((v1) -> {
            r1.add(v1);
        });
        return arrayList;
    }

    private Dataset<Row> createEmptyDataset(ResourcesAndQuery resourcesAndQuery) {
        QueryData<Dataset<Row>> query = resourcesAndQuery.getQuery();
        return (Dataset) this.emptyDataSetCreator.apply(Collections.singletonList(DatasetCriteria.ofProjection(enhanceWithCommonMappings(getDefaultMappings(resourcesAndQuery.isVariableSearch()), query.getSystem(), query.isVariableSearch()))));
    }

    private List<ColumnMapping> getDefaultMappings(boolean z) {
        return z ? Collections.singletonList(ColumnMapping.builder().fieldName(SystemFields.NXC_EXTR_VALUE.getValue()).fieldSchema(ExtractionUtils.STRING_SCHEMA).dataType(DataTypes.StringType).build()) : Collections.emptyList();
    }

    private <T> List<DatasetCriteria<T>> getCriteria(ResourcesAndQuery resourcesAndQuery, List<ColumnMapping> list, Function<List<EntityResource>, Optional<T>> function, TimeWindow timeWindow, boolean z) {
        Map<ResourcePartition, List<EntityResource>> resourcePartitions = getResourcePartitions(resourcesAndQuery.getResources(), timeWindow, z);
        if (resourcePartitions.isEmpty()) {
            return null;
        }
        return createCriteria(resourcePartitions, resourcesAndQuery.getQuery(), function, list);
    }

    private Optional<HBaseResource> getHBaseResourcesFrom(List<EntityResource> list) {
        if (CollectionUtils.isEmpty(list)) {
            log.debug("No entity resources matching query, will return empty HBase resource!");
            return Optional.empty();
        }
        Set set = (Set) list.stream().map(entityResource -> {
            return (Set) entityResource.getResource().getHbaseTableNames().stream().map(str -> {
                return new HBaseResource(str, entityResource.getEntityHistory().getEntitySchema().getSchema(), entityResource.getResource().getHbaseNamespace());
            }).collect(Collectors.toSet());
        }).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toSet());
        Preconditions.checkArgument(set.size() == 1, "Internal error, found " + set.size() + " HBase tables for the same system, partition and schema");
        return Optional.of(Iterables.getOnlyElement(set));
    }

    private Optional<Set<URI>> getHdfsResourcesFrom(List<EntityResource> list) {
        return CollectionUtils.isEmpty(list) ? Optional.empty() : Optional.of((Set) list.stream().flatMap(entityResource -> {
            return entityResource.getResource().getHdfsPaths().stream();
        }).collect(Collectors.toSet()));
    }

    private Map<ResourcePartition, List<EntityResource>> getResourcePartitions(Collection<EntityResource> collection, TimeWindow timeWindow, boolean z) {
        return (Map) collection.stream().filter(entityResource -> {
            return entityResource.getTimeWindow().intersects(timeWindow) && entityResource.getEntityHistory().getPartition().getProperties().isUpdatable() == z;
        }).collect(Collectors.groupingBy(entityResource2 -> {
            return new ResourcePartition(entityResource2, entityResource2.getTimeWindow().intersect(timeWindow));
        }));
    }

    private <T> List<DatasetCriteria<T>> createCriteria(Map<ResourcePartition, List<EntityResource>> map, QueryData<Dataset<Row>> queryData, Function<List<EntityResource>, Optional<T>> function, List<ColumnMapping> list) {
        ArrayList arrayList = new ArrayList();
        for (Map.Entry<ResourcePartition, List<EntityResource>> entry : map.entrySet()) {
            ResourcePartition key = entry.getKey();
            List<EntityResource> value = entry.getValue();
            ResourceProjection createProjection = createProjection(key.getEntitySchema().getSchema(), value, queryData, list);
            if (createProjection.isSingleFieldBoundVariableSearch()) {
                for (ColumnMapping columnMapping : createProjection.getMappings()) {
                    List<EntityResource> filterByMapping = filterByMapping(value, columnMapping);
                    Optional<U> map2 = function.apply(filterByMapping).map(obj -> {
                        return criteriaOf(Collections.singletonList(columnMapping), createSelection(filterByMapping, key), obj, queryData);
                    });
                    arrayList.getClass();
                    map2.ifPresent((v1) -> {
                        r1.add(v1);
                    });
                }
            } else {
                Optional<U> map3 = function.apply(value).map(obj2 -> {
                    return criteriaOf(createProjection.getMappings(), createSelection(value, key), obj2, queryData);
                });
                arrayList.getClass();
                map3.ifPresent((v1) -> {
                    r1.add(v1);
                });
            }
        }
        return arrayList;
    }

    private List<EntityResource> filterByMapping(List<EntityResource> list, ColumnMapping columnMapping) {
        return (List) list.stream().filter(entityResource -> {
            return entitySchemaContainsField(entityResource, columnMapping.getFieldName());
        }).collect(Collectors.toList());
    }

    private boolean entitySchemaContainsField(EntityResource entityResource, String str) {
        return entityResource.getEntityHistory().getEntitySchema().getSchema().getField(str) != null;
    }

    private <T> DatasetCriteria<T> criteriaOf(List<ColumnMapping> list, SelectionCriteria selectionCriteria, T t, QueryData<Dataset<Row>> queryData) {
        return new DatasetCriteria<>(enhanceWithCommonMappings(list, selectionCriteria.getSystemSpec().getName(), queryData.isVariableSearch()), t, selectionCriteria);
    }

    private ResourceProjection createProjection(Schema schema, List<EntityResource> list, QueryData<Dataset<Row>> queryData, List<ColumnMapping> list2) {
        return (queryData.isVariableSearch() && list.stream().anyMatch(entityResource -> {
            return entityResource.getVariableConfig().getFieldName() != null;
        })) ? variablesProjection(list, list2) : entitiesProjection(schema, list2, queryData);
    }

    private ResourceProjection variablesProjection(List<EntityResource> list, List<ColumnMapping> list2) {
        if (list2.size() == 1) {
            return new ResourceProjection(list2, true);
        }
        Set set = (Set) list.stream().map(entityResource -> {
            return entityResource.getVariableConfig().getFieldName();
        }).collect(Collectors.toSet());
        return new ResourceProjection((List) list2.stream().filter(columnMapping -> {
            return set.contains(columnMapping.getFieldName());
        }).collect(Collectors.toList()), true);
    }

    private ResourceProjection entitiesProjection(Schema schema, List<ColumnMapping> list, QueryData<Dataset<Row>> queryData) {
        ArrayList arrayList = new ArrayList(list.size());
        Set set = (Set) schema.getFields().stream().map((v0) -> {
            return v0.name();
        }).collect(Collectors.toSet());
        for (ColumnMapping columnMapping : list) {
            if (set.contains(columnMapping.getFieldName())) {
                arrayList.add(columnMapping);
            } else if (queryData.getAliasFields().values().stream().noneMatch(list2 -> {
                return list2.contains(columnMapping.getFieldName());
            })) {
                arrayList.add(columnMapping.toBuilder().fieldName(null).build());
            }
        }
        pickOnlyOneColumnForAlias(arrayList, queryData);
        addMissingAliasesIfNeeded(list, arrayList, queryData);
        arrayList.sort(Comparator.comparing((v0) -> {
            return v0.getQualifiedName();
        }));
        return new ResourceProjection(arrayList, false);
    }

    private void pickOnlyOneColumnForAlias(List<ColumnMapping> list, QueryData<Dataset<Row>> queryData) {
        Set set = (Set) list.stream().map((v0) -> {
            return v0.getFieldName();
        }).collect(Collectors.toSet());
        Map<String, List<String>> copyMap = copyMap(queryData.getAliasFields());
        copyMap.values().forEach(list2 -> {
            list2.removeIf(str -> {
                return !set.contains(str);
            });
        });
        list.removeIf(columnMapping -> {
            return aliasDoesNotMatchMapping(copyMap, columnMapping);
        });
    }

    private Map<String, List<String>> copyMap(Map<String, List<String>> map) {
        return (Map) map.entrySet().stream().collect(Collectors.toMap((v0) -> {
            return v0.getKey();
        }, entry -> {
            return new ArrayList((Collection) entry.getValue());
        }));
    }

    private boolean aliasDoesNotMatchMapping(Map<String, List<String>> map, ColumnMapping columnMapping) {
        return map.containsKey(columnMapping.getAlias()) && !map.get(columnMapping.getAlias()).get(0).equals(columnMapping.getFieldName());
    }

    private void addMissingAliasesIfNeeded(List<ColumnMapping> list, List<ColumnMapping> list2, QueryData<Dataset<Row>> queryData) {
        if (ExtractionUtils.isMapEmpty(queryData.getAliasFields()) || list.size() == list2.size()) {
            return;
        }
        for (Map.Entry<String, List<String>> entry : queryData.getAliasFields().entrySet()) {
            if (list2.stream().noneMatch(columnMapping -> {
                return columnMapping.getAlias().equals(entry.getKey());
            })) {
                list.stream().filter(columnMapping2 -> {
                    return ((List) entry.getValue()).stream().anyMatch(str -> {
                        return str.equals(columnMapping2.getFieldName());
                    }) && columnMapping2.getAlias().equals(entry.getKey());
                }).findFirst().ifPresent(columnMapping3 -> {
                    list2.add(columnMapping3.toBuilder().fieldName(null).build());
                });
            }
        }
    }

    private List<ColumnMapping> enhanceWithCommonMappings(List<ColumnMapping> list, String str, boolean z) {
        ArrayList arrayList = new ArrayList(list);
        if (z) {
            String str2 = null;
            if (list.size() == 1) {
                str2 = "'" + functions.lit(list.get(0).getFieldName()) + "'";
            }
            arrayList.add(ColumnMapping.builder().fieldName(str2).fieldSchema(ExtractionUtils.STRING_SCHEMA).dataType(DataTypes.StringType).alias(SystemFields.NXC_EXTR_VARIABLE_FIELD_NAME.getValue()).build());
        }
        arrayList.add(ColumnMapping.builder().fieldName(Schemas.ENTITY_ID.getFieldName()).alias(SystemFields.NXC_EXTR_ENTITY_ID.getValue()).fieldSchema(Schemas.ENTITY_ID.getSchema()).dataType(DataTypes.LongType).build());
        if (!z) {
            addSystemDefinitionFieldsMappingIfNeeded(arrayList, str);
        }
        addTimeFieldMappingWithAliasIfNeeded(arrayList, str, z);
        return arrayList;
    }

    private void addTimeFieldMappingWithAliasIfNeeded(List<ColumnMapping> list, String str, boolean z) {
        Schema.Field apply = this.stampFieldProvider.apply(str);
        if (list.stream().noneMatch(columnMapping -> {
            return apply.name().equals(columnMapping.getFieldName());
        })) {
            list.add(ColumnMapping.fromField(apply));
        }
        if (z) {
            for (int i = 0; i < list.size(); i++) {
                ColumnMapping columnMapping2 = list.get(i);
                if (apply.name().equals(columnMapping2.getFieldName())) {
                    list.set(i, columnMapping2.toBuilder().alias(SystemFields.NXC_EXTR_TIMESTAMP.getValue()).build());
                    return;
                }
            }
        }
    }

    private void addSystemDefinitionFieldsMappingIfNeeded(List<ColumnMapping> list, String str) {
        for (Schema.Field field : this.systemDefinitionFieldsProvider.apply(str)) {
            if (list.stream().noneMatch(columnMapping -> {
                return field.name().equals(columnMapping.getFieldName());
            })) {
                list.add(ColumnMapping.fromField(field));
            }
        }
    }

    private SelectionCriteria createSelection(List<EntityResource> list, ResourcePartition resourcePartition) {
        return new SelectionCriteria(resourcePartition.getPartition().getSystemSpec(), (Set) list.stream().map(entityResource -> {
            return Long.valueOf(entityResource.getEntityHistory().getEntity().getId());
        }).collect(Collectors.toSet()), resourcePartition.getWindow());
    }

    public DatasetServiceImpl(DatasetCreator<HBaseResource> datasetCreator, DatasetCreator<Set<URI>> datasetCreator2, DatasetCreator<Void> datasetCreator3, ColumnMapper columnMapper, Function<String, Schema.Field> function, Function<String, List<Schema.Field>> function2, BiFunction<ResourcesAndQuery, Dataset<Row>, Dataset<Row>> biFunction, Clock clock) {
        this.hBaseCreator = datasetCreator;
        this.hdfsCreator = datasetCreator2;
        this.emptyDataSetCreator = datasetCreator3;
        this.columnMapper = columnMapper;
        this.stampFieldProvider = function;
        this.systemDefinitionFieldsProvider = function2;
        this.datasetFinalizer = biFunction;
        this.clock = clock;
    }
}
