package org.apache.druid.indexer;

import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import com.google.common.io.Closeables;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import javax.annotation.Nullable;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.Rows;
import org.apache.druid.hll.HyperLogLogCollector;
import org.apache.druid.indexer.HadoopDruidIndexerConfig;
import org.apache.druid.indexer.granularity.UniformGranularitySpec;
import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec;
import org.apache.druid.timeline.partition.HashPartitionFunction;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.joda.time.DateTime;
import org.joda.time.DateTimeComparator;
import org.joda.time.Interval;

/* loaded from: input_file:org/apache/druid/indexer/DetermineHashedPartitionsJob.class */
public class DetermineHashedPartitionsJob implements Jobby {
    private static final Logger log = new Logger(DetermineHashedPartitionsJob.class);
    private final HadoopDruidIndexerConfig config;
    private String failureCause;
    private Job groupByJob;
    private long startTime;

    /* loaded from: input_file:org/apache/druid/indexer/DetermineHashedPartitionsJob$DetermineCardinalityMapper.class */
    public static class DetermineCardinalityMapper extends HadoopDruidIndexerMapper<LongWritable, BytesWritable> {
        private static HashFunction hashFunction = Hashing.murmur3_128();

        @Nullable
        private Granularity rollupGranularity = null;

        @Nullable
        private Map<Interval, HyperLogLogCollector> hyperLogLogs;

        @Nullable
        private HadoopDruidIndexerConfig config;
        private boolean determineIntervals;

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.druid.indexer.HadoopDruidIndexerMapper
        public void setup(Mapper<Object, Object, LongWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            super.setup(context);
            this.rollupGranularity = getConfig().getGranularitySpec().getQueryGranularity();
            this.config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
            Iterable<Interval> segmentGranularIntervals = this.config.getSegmentGranularIntervals();
            if (!segmentGranularIntervals.iterator().hasNext()) {
                this.determineIntervals = true;
                this.hyperLogLogs = new HashMap();
                return;
            }
            this.determineIntervals = false;
            ImmutableMap.Builder builder = ImmutableMap.builder();
            Iterator<Interval> it = segmentGranularIntervals.iterator();
            while (it.hasNext()) {
                builder.put(it.next(), HyperLogLogCollector.makeLatestCollector());
            }
            this.hyperLogLogs = builder.build();
        }

        @Override // org.apache.druid.indexer.HadoopDruidIndexerMapper
        protected void innerMap(InputRow inputRow, Mapper<Object, Object, LongWritable, BytesWritable>.Context context) throws IOException {
            Interval interval;
            List groupKey = Rows.toGroupKey(this.rollupGranularity.bucketStart(inputRow.getTimestamp()).getMillis(), inputRow);
            if (this.determineIntervals) {
                interval = this.config.getGranularitySpec().getSegmentGranularity().bucket(DateTimes.utc(inputRow.getTimestampFromEpoch()));
                this.hyperLogLogs.computeIfAbsent(interval, interval2 -> {
                    return HyperLogLogCollector.makeLatestCollector();
                });
            } else {
                Optional bucketInterval = this.config.getGranularitySpec().bucketInterval(DateTimes.utc(inputRow.getTimestampFromEpoch()));
                if (!bucketInterval.isPresent()) {
                    throw new ISE("No bucket found for timestamp: %s", new Object[]{Long.valueOf(inputRow.getTimestampFromEpoch())});
                }
                interval = (Interval) bucketInterval.get();
            }
            this.hyperLogLogs.get(interval).add(hashFunction.hashBytes(HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsBytes(groupKey)).asBytes());
            context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).increment(1L);
        }

        public void run(Mapper<Object, Object, LongWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            setup(context);
            while (context.nextKeyValue()) {
                map(context.getCurrentKey(), context.getCurrentValue(), context);
            }
            for (Map.Entry<Interval, HyperLogLogCollector> entry : this.hyperLogLogs.entrySet()) {
                context.write(new LongWritable(entry.getKey().getStartMillis()), new BytesWritable(entry.getValue().toByteArray()));
            }
            cleanup(context);
        }
    }

    /* loaded from: input_file:org/apache/druid/indexer/DetermineHashedPartitionsJob$DetermineCardinalityReducer.class */
    public static class DetermineCardinalityReducer extends Reducer<LongWritable, BytesWritable, NullWritable, NullWritable> {
        private final List<Interval> intervals = new ArrayList();

        @Nullable
        protected HadoopDruidIndexerConfig config = null;
        private boolean determineIntervals;

        protected void setup(Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>.Context context) {
            this.config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
            this.determineIntervals = this.config.getInputIntervals().isEmpty();
        }

        protected void reduce(LongWritable longWritable, Iterable<BytesWritable> iterable, Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>.Context context) throws IOException {
            Interval interval;
            HyperLogLogCollector makeLatestCollector = HyperLogLogCollector.makeLatestCollector();
            for (BytesWritable bytesWritable : iterable) {
                makeLatestCollector.fold(HyperLogLogCollector.makeCollector(ByteBuffer.wrap(bytesWritable.getBytes(), 0, bytesWritable.getLength())));
            }
            if (this.determineIntervals) {
                interval = this.config.getGranularitySpec().getSegmentGranularity().bucket(DateTimes.utc(longWritable.get()));
            } else {
                Optional bucketInterval = this.config.getGranularitySpec().bucketInterval(DateTimes.utc(longWritable.get()));
                if (!bucketInterval.isPresent()) {
                    throw new ISE("No bucket found for timestamp: %s", new Object[]{Long.valueOf(longWritable.get())});
                }
                interval = (Interval) bucketInterval.get();
            }
            this.intervals.add(interval);
            OutputStream makePathAndOutputStream = Utils.makePathAndOutputStream(context, this.config.makeSegmentPartitionInfoPath(interval), this.config.isOverwriteFiles());
            try {
                HadoopDruidIndexerConfig.JSON_MAPPER.writerFor(Long.class).writeValue(makePathAndOutputStream, Long.valueOf(makeLatestCollector.estimateCardinalityRound()));
                Closeables.close(makePathAndOutputStream, false);
            } catch (Throwable th) {
                Closeables.close(makePathAndOutputStream, false);
                throw th;
            }
        }

        public void run(Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>.Context context) throws IOException, InterruptedException {
            super.run(context);
            if (this.determineIntervals) {
                OutputStream makePathAndOutputStream = Utils.makePathAndOutputStream(context, this.config.makeIntervalInfoPath(), this.config.isOverwriteFiles());
                try {
                    HadoopDruidIndexerConfig.JSON_MAPPER.writerFor(new TypeReference<List<Interval>>() { // from class: org.apache.druid.indexer.DetermineHashedPartitionsJob.DetermineCardinalityReducer.1
                    }).writeValue(makePathAndOutputStream, this.intervals);
                    Closeables.close(makePathAndOutputStream, false);
                } catch (Throwable th) {
                    Closeables.close(makePathAndOutputStream, false);
                    throw th;
                }
            }
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((LongWritable) obj, (Iterable<BytesWritable>) iterable, (Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>.Context) context);
        }
    }

    /* loaded from: input_file:org/apache/druid/indexer/DetermineHashedPartitionsJob$DetermineHashedPartitionsPartitioner.class */
    public static class DetermineHashedPartitionsPartitioner extends Partitioner<LongWritable, BytesWritable> implements Configurable {

        @Nullable
        private Configuration config;
        private boolean determineIntervals;

        @Nullable
        private Map<LongWritable, Integer> reducerLookup;

        public int getPartition(LongWritable longWritable, BytesWritable bytesWritable, int i) {
            if ("local".equals(JobHelper.getJobTrackerAddress(this.config)) || this.determineIntervals) {
                return 0;
            }
            return this.reducerLookup.get(longWritable).intValue();
        }

        public Configuration getConf() {
            return this.config;
        }

        public void setConf(Configuration configuration) {
            this.config = configuration;
            HadoopDruidIndexerConfig fromConfiguration = HadoopDruidIndexerConfig.fromConfiguration(configuration);
            if (fromConfiguration.getInputIntervals().isEmpty()) {
                this.determineIntervals = true;
                return;
            }
            this.determineIntervals = false;
            int i = 0;
            ImmutableMap.Builder builder = ImmutableMap.builder();
            Iterator<Interval> it = fromConfiguration.getSegmentGranularIntervals().iterator();
            while (it.hasNext()) {
                int i2 = i;
                i++;
                builder.put(new LongWritable(it.next().getStartMillis()), Integer.valueOf(i2));
            }
            this.reducerLookup = builder.build();
        }
    }

    public DetermineHashedPartitionsJob(HadoopDruidIndexerConfig hadoopDruidIndexerConfig) {
        this.config = hadoopDruidIndexerConfig;
    }

    public boolean run() {
        try {
            this.startTime = System.currentTimeMillis();
            this.groupByJob = Job.getInstance(new Configuration(), StringUtils.format("%s-determine_partitions_hashed-%s", new Object[]{this.config.getDataSource(), this.config.getIntervals()}));
            JobHelper.injectSystemProperties(this.groupByJob.getConfiguration(), this.config);
            this.config.addJobProperties(this.groupByJob);
            this.groupByJob.setMapperClass(DetermineCardinalityMapper.class);
            this.groupByJob.setMapOutputKeyClass(LongWritable.class);
            this.groupByJob.setMapOutputValueClass(BytesWritable.class);
            this.groupByJob.setReducerClass(DetermineCardinalityReducer.class);
            this.groupByJob.setOutputKeyClass(NullWritable.class);
            this.groupByJob.setOutputValueClass(NullWritable.class);
            this.groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            this.groupByJob.setPartitionerClass(DetermineHashedPartitionsPartitioner.class);
            if (this.config.getInputIntervals().isEmpty()) {
                this.groupByJob.setNumReduceTasks(1);
            } else {
                this.groupByJob.setNumReduceTasks(Iterators.size(this.config.getSegmentGranularIntervals().iterator()));
            }
            JobHelper.setupClasspath(JobHelper.distributedClassPath(this.config.getWorkingPath()), JobHelper.distributedClassPath(this.config.makeIntermediatePath()), this.groupByJob);
            this.config.addInputPaths(this.groupByJob);
            this.config.intoConfiguration(this.groupByJob);
            FileOutputFormat.setOutputPath(this.groupByJob, this.config.makeGroupedDataDir());
            this.groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", new Object[]{this.groupByJob.getJobName(), this.groupByJob.getTrackingURL()});
            if (this.groupByJob.getJobID() != null) {
                JobHelper.writeJobIdToFile(this.config.getHadoopJobIdFileName(), this.groupByJob.getJobID().toString());
            }
            try {
            } catch (IOException e) {
                if (!Utils.checkAppSuccessForJobIOException(e, this.groupByJob, this.config.isUseYarnRMJobStatusFallback())) {
                    throw e;
                }
            }
            if (!this.groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", new Object[]{this.groupByJob.getJobID()});
                this.failureCause = Utils.getFailureMessage(this.groupByJob, HadoopDruidIndexerConfig.JSON_MAPPER);
                return false;
            }
            log.info("Job completed, loading up partitions for intervals[%s].", new Object[]{this.config.getSegmentGranularIntervals()});
            FileSystem fileSystem = null;
            if (this.config.getInputIntervals().isEmpty()) {
                Path makeIntervalInfoPath = this.config.makeIntervalInfoPath();
                fileSystem = makeIntervalInfoPath.getFileSystem(this.groupByJob.getConfiguration());
                if (!Utils.exists(this.groupByJob, fileSystem, makeIntervalInfoPath)) {
                    throw new ISE("Path[%s] didn't exist!?", new Object[]{makeIntervalInfoPath});
                }
                this.config.setGranularitySpec(new UniformGranularitySpec(this.config.getGranularitySpec().getSegmentGranularity(), this.config.getGranularitySpec().getQueryGranularity(), Boolean.valueOf(this.config.getGranularitySpec().isRollup()), (List) HadoopDruidIndexerConfig.JSON_MAPPER.readValue(Utils.openInputStream(this.groupByJob, makeIntervalInfoPath), new TypeReference<List<Interval>>() { // from class: org.apache.druid.indexer.DetermineHashedPartitionsJob.1
                })));
                log.info("Determined Intervals for Job [%s].", new Object[]{this.config.getSegmentGranularIntervals()});
            }
            TreeMap treeMap = new TreeMap((Comparator) DateTimeComparator.getInstance());
            HashedPartitionsSpec partitionsSpec = this.config.getPartitionsSpec();
            if (!(partitionsSpec instanceof HashedPartitionsSpec)) {
                throw new ISE("%s is expected, but got %s", new Object[]{HashedPartitionsSpec.class.getName(), partitionsSpec.getClass().getName()});
            }
            HashPartitionFunction partitionFunction = partitionsSpec.getPartitionFunction();
            int i = 0;
            for (Interval interval : this.config.getSegmentGranularIntervals()) {
                DateTime start = interval.getStart();
                Path makeSegmentPartitionInfoPath = this.config.makeSegmentPartitionInfoPath(interval);
                if (fileSystem == null) {
                    fileSystem = makeSegmentPartitionInfoPath.getFileSystem(this.groupByJob.getConfiguration());
                }
                if (Utils.exists(this.groupByJob, fileSystem, makeSegmentPartitionInfoPath)) {
                    log.info("Found approximately [%,d] rows in data.", new Object[]{(Long) HadoopDruidIndexerConfig.JSON_MAPPER.readValue(Utils.openInputStream(this.groupByJob, makeSegmentPartitionInfoPath), Long.class)});
                    int ceil = (int) Math.ceil(r0.longValue() / this.config.getTargetPartitionSize());
                    log.info("Creating [%,d] shards", new Object[]{Integer.valueOf(ceil)});
                    ArrayList newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(ceil);
                    for (int i2 = 0; i2 < ceil; i2++) {
                        int i3 = i;
                        i++;
                        newArrayListWithExpectedSize.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i2, ceil, Integer.valueOf(i2), Integer.valueOf(ceil), (List) null, partitionFunction, HadoopDruidIndexerConfig.JSON_MAPPER), i3));
                        log.info("DateTime[%s], partition[%d], spec[%s]", new Object[]{start, Integer.valueOf(i2), newArrayListWithExpectedSize.get(i2)});
                    }
                    treeMap.put(Long.valueOf(start.getMillis()), newArrayListWithExpectedSize);
                } else {
                    log.info("Path[%s] didn't exist!?", new Object[]{makeSegmentPartitionInfoPath});
                }
            }
            this.config.setShardSpecs(treeMap);
            log.info("DetermineHashedPartitionsJob took %d millis", new Object[]{Long.valueOf(System.currentTimeMillis() - this.startTime)});
            return true;
        } catch (Exception e2) {
            throw new RuntimeException(e2);
        }
    }

    public Map<String, Object> getStats() {
        if (this.groupByJob == null) {
            return null;
        }
        try {
            Counters counters = this.groupByJob.getCounters();
            return TaskMetricsUtils.makeIngestionRowMetrics(counters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).getValue(), counters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER).getValue(), counters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER).getValue(), counters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).getValue());
        } catch (IllegalStateException e) {
            log.debug("Couldn't get counters due to job state", new Object[0]);
            return null;
        } catch (Exception e2) {
            log.debug(e2, "Encountered exception in getStats().", new Object[0]);
            return null;
        }
    }

    @Nullable
    public String getErrorMessage() {
        return this.failureCause;
    }
}
