/*
 * Decompiled with CFR 0.152.
 */
package org.apache.gobblin.compaction.dataset;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.gobblin.compaction.dataset.Dataset;
import org.apache.gobblin.compaction.dataset.DatasetsFinder;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.util.DatasetFilterUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.joda.time.ReadableInstant;
import org.joda.time.ReadablePeriod;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.PeriodFormatter;
import org.joda.time.format.PeriodFormatterBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TimeBasedSubDirDatasetsFinder
extends DatasetsFinder {
    private static final Logger log = LoggerFactory.getLogger(TimeBasedSubDirDatasetsFinder.class);
    private static final String COMPACTION_TIMEBASED_PREFIX = "compaction.timebased.";
    public static final String COMPACTION_TIMEBASED_FOLDER_PATTERN = "compaction.timebased.folder.pattern";
    public static final String DEFAULT_COMPACTION_TIMEBASED_FOLDER_PATTERN = "YYYY/MM/dd";
    public static final String COMPACTION_TIMEBASED_SUBDIR_PATTERN = "compaction.timebased.subdir.pattern";
    public static final String DEFAULT_COMPACTION_TIMEBASED_SUBDIR_PATTERN = "*";
    public static final String COMPACTION_TIMEBASED_MAX_TIME_AGO = "compaction.timebased.max.time.ago";
    public static final String DEFAULT_COMPACTION_TIMEBASED_MAX_TIME_AGO = "3d";
    public static final String COMPACTION_TIMEBASED_MIN_TIME_AGO = "compaction.timebased.min.time.ago";
    public static final String DEFAULT_COMPACTION_TIMEBASED_MIN_TIME_AGO = "1d";
    protected final String folderTimePattern;
    protected final String subDirPattern;
    protected final DateTimeZone timeZone;
    protected final DateTimeFormatter timeFormatter;
    protected final String inputSubDir = this.getInputSubDir();
    protected final String inputLateSubDir = this.getInputLateSubDir();
    protected final String destSubDir = this.getDestSubDir();
    protected final String destLateSubDir = this.getDestLateSubDir();

    @VisibleForTesting
    public TimeBasedSubDirDatasetsFinder(State state, FileSystem fs) throws Exception {
        super(state, fs);
        this.folderTimePattern = this.getFolderPattern();
        this.subDirPattern = this.getSubDirPattern();
        this.timeZone = DateTimeZone.forID((String)this.state.getProp("compaction.timezone", "America/Los_Angeles"));
        this.timeFormatter = DateTimeFormat.forPattern((String)this.folderTimePattern).withZone(this.timeZone);
    }

    public TimeBasedSubDirDatasetsFinder(State state) throws Exception {
        super(state);
        this.folderTimePattern = this.getFolderPattern();
        this.subDirPattern = this.getSubDirPattern();
        this.timeZone = DateTimeZone.forID((String)this.state.getProp("compaction.timezone", "America/Los_Angeles"));
        this.timeFormatter = DateTimeFormat.forPattern((String)this.folderTimePattern).withZone(this.timeZone);
    }

    protected String getDatasetName(String path, String basePath) {
        int startPos = path.indexOf(basePath) + basePath.length();
        return StringUtils.removeStart((String)path.substring(startPos), (String)"/");
    }

    @Override
    public Set<Dataset> findDistinctDatasets() throws IOException {
        HashSet datasets = Sets.newHashSet();
        for (FileStatus datasetsFileStatus : this.fs.globStatus(new Path(this.inputDir, this.subDirPattern))) {
            String datasetName;
            log.info("Scanning directory : " + datasetsFileStatus.getPath().toString());
            if (!datasetsFileStatus.isDirectory() || !DatasetFilterUtils.survived((String)(datasetName = this.getDatasetName(datasetsFileStatus.getPath().toString(), this.inputDir)), (List)this.blacklist, (List)this.whitelist)) continue;
            log.info("Found dataset: " + datasetName);
            Path inputPath = new Path(this.inputDir, new Path(datasetName, this.inputSubDir));
            Path inputLatePath = new Path(this.inputDir, new Path(datasetName, this.inputLateSubDir));
            Path outputPath = new Path(this.destDir, new Path(datasetName, this.destSubDir));
            Path outputLatePath = new Path(this.destDir, new Path(datasetName, this.destLateSubDir));
            Path outputTmpPath = new Path(this.tmpOutputDir, new Path(datasetName, this.destSubDir));
            double priority = this.getDatasetPriority(datasetName);
            String folderStructure = this.getFolderStructure();
            for (FileStatus status : this.fs.globStatus(new Path(inputPath, folderStructure))) {
                Path jobInputPath = status.getPath();
                DateTime folderTime = null;
                try {
                    folderTime = this.getFolderTime(jobInputPath, inputPath);
                }
                catch (RuntimeException e) {
                    log.warn("{} is not a valid folder. Will be skipped due to exception.", (Object)jobInputPath, (Object)e);
                    continue;
                }
                if (!this.folderWithinAllowedPeriod(jobInputPath, folderTime)) continue;
                Path jobInputLatePath = this.appendFolderTime(inputLatePath, folderTime);
                Path jobOutputPath = this.appendFolderTime(outputPath, folderTime);
                Path jobOutputLatePath = this.appendFolderTime(outputLatePath, folderTime);
                Path jobOutputTmpPath = this.appendFolderTime(outputTmpPath, folderTime);
                Dataset timeBasedDataset = new Dataset.Builder().withPriority(priority).withDatasetName(datasetName).addInputPath(this.recompactDatasets ? jobOutputPath : jobInputPath).addInputLatePath(this.recompactDatasets ? jobOutputLatePath : jobInputLatePath).withOutputPath(jobOutputPath).withOutputLatePath(jobOutputLatePath).withOutputTmpPath(jobOutputTmpPath).build();
                timeBasedDataset.setJobProp("compaction.job.dest.partition", folderTime.toString(this.timeFormatter));
                timeBasedDataset.setJobProp("compaction.input.path.time", folderTime.getMillis());
                datasets.add(timeBasedDataset);
            }
        }
        return datasets;
    }

    private String getInputSubDir() {
        return this.state.getProp("compaction.input.subdir", "hourly");
    }

    private String getInputLateSubDir() {
        return this.state.getProp("compaction.input.subdir", "hourly") + "_late";
    }

    private String getDestLateSubDir() {
        return this.state.getProp("compaction.dest.subdir", "daily") + "_late";
    }

    private String getDestSubDir() {
        return this.state.getProp("compaction.dest.subdir", "daily");
    }

    protected String getFolderStructure() {
        return this.folderTimePattern.replaceAll("[a-zA-Z0-9='-]+", DEFAULT_COMPACTION_TIMEBASED_SUBDIR_PATTERN);
    }

    private String getFolderPattern() {
        String folderPattern = this.state.getProp(COMPACTION_TIMEBASED_FOLDER_PATTERN, DEFAULT_COMPACTION_TIMEBASED_FOLDER_PATTERN);
        log.info("Compaction folder pattern: " + folderPattern);
        return folderPattern;
    }

    private String getSubDirPattern() {
        String subdirPattern = this.state.getProp(COMPACTION_TIMEBASED_SUBDIR_PATTERN, DEFAULT_COMPACTION_TIMEBASED_SUBDIR_PATTERN);
        log.info("Compaction subdir pattern: " + subdirPattern);
        return subdirPattern;
    }

    protected DateTime getFolderTime(Path path, Path basePath) {
        int startPos = path.toString().indexOf(basePath.toString()) + basePath.toString().length();
        return this.timeFormatter.parseDateTime(StringUtils.removeStart((String)path.toString().substring(startPos), (String)"/"));
    }

    protected boolean folderWithinAllowedPeriod(Path inputFolder, DateTime folderTime) {
        DateTime currentTime = new DateTime(this.timeZone);
        PeriodFormatter periodFormatter = TimeBasedSubDirDatasetsFinder.getPeriodFormatter();
        DateTime earliestAllowedFolderTime = this.getEarliestAllowedFolderTime(currentTime, periodFormatter);
        DateTime latestAllowedFolderTime = this.getLatestAllowedFolderTime(currentTime, periodFormatter);
        if (folderTime.isBefore((ReadableInstant)earliestAllowedFolderTime)) {
            log.info(String.format("Folder time for %s is %s, earlier than the earliest allowed folder time, %s. Skipping", inputFolder, folderTime, earliestAllowedFolderTime));
            return false;
        }
        if (folderTime.isAfter((ReadableInstant)latestAllowedFolderTime)) {
            log.info(String.format("Folder time for %s is %s, later than the latest allowed folder time, %s. Skipping", inputFolder, folderTime, latestAllowedFolderTime));
            return false;
        }
        return true;
    }

    public static PeriodFormatter getPeriodFormatter() {
        return new PeriodFormatterBuilder().appendMonths().appendSuffix("m").appendDays().appendSuffix("d").appendHours().appendSuffix("h").appendMinutes().appendSuffix("min").toFormatter();
    }

    private DateTime getEarliestAllowedFolderTime(DateTime currentTime, PeriodFormatter periodFormatter) {
        String maxTimeAgoStr = this.state.getProp(COMPACTION_TIMEBASED_MAX_TIME_AGO, DEFAULT_COMPACTION_TIMEBASED_MAX_TIME_AGO);
        Period maxTimeAgo = periodFormatter.parsePeriod(maxTimeAgoStr);
        return currentTime.minus((ReadablePeriod)maxTimeAgo);
    }

    private DateTime getLatestAllowedFolderTime(DateTime currentTime, PeriodFormatter periodFormatter) {
        String minTimeAgoStr = this.state.getProp(COMPACTION_TIMEBASED_MIN_TIME_AGO, DEFAULT_COMPACTION_TIMEBASED_MIN_TIME_AGO);
        Period minTimeAgo = periodFormatter.parsePeriod(minTimeAgoStr);
        return currentTime.minus((ReadablePeriod)minTimeAgo);
    }

    protected Path appendFolderTime(Path path, DateTime folderTime) {
        return new Path(path, folderTime.toString(this.timeFormatter));
    }
}

