/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.scoring.webgraph;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.concurrent.TimeUnit;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.scoring.webgraph.Node;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class NodeDumper
extends Configured
implements Tool {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output, boolean asEff, NameType nameType, AggrType aggrType, boolean asSequenceFile) throws Exception {
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        LOG.info("NodeDumper: starting");
        Path nodeDb = new Path(webGraphDb, "nodes");
        Job dumper = Job.getInstance((Configuration)this.getConf(), (String)("Nutch NodeDumper: " + String.valueOf(webGraphDb)));
        Configuration conf = dumper.getConfiguration();
        FileInputFormat.addInputPath((Job)dumper, (Path)nodeDb);
        dumper.setInputFormatClass(SequenceFileInputFormat.class);
        if (nameType == null) {
            dumper.setJarByClass(Sorter.class);
            dumper.setMapperClass(Sorter.SorterMapper.class);
            dumper.setReducerClass(Sorter.SorterReducer.class);
            dumper.setMapOutputKeyClass(FloatWritable.class);
            dumper.setMapOutputValueClass(Text.class);
        } else {
            dumper.setJarByClass(Dumper.class);
            dumper.setMapperClass(Dumper.DumperMapper.class);
            dumper.setReducerClass(Dumper.DumperReducer.class);
            dumper.setMapOutputKeyClass(Text.class);
            dumper.setMapOutputValueClass(FloatWritable.class);
        }
        dumper.setOutputKeyClass(Text.class);
        dumper.setOutputValueClass(FloatWritable.class);
        FileOutputFormat.setOutputPath((Job)dumper, (Path)output);
        if (asSequenceFile) {
            dumper.setOutputFormatClass(SequenceFileOutputFormat.class);
        } else {
            dumper.setOutputFormatClass(TextOutputFormat.class);
        }
        dumper.setNumReduceTasks(1);
        conf.setBoolean("inlinks", type == DumpType.INLINKS);
        conf.setBoolean("outlinks", type == DumpType.OUTLINKS);
        conf.setBoolean("scores", type == DumpType.SCORES);
        conf.setBoolean("host", nameType == NameType.HOST);
        conf.setBoolean("domain", nameType == NameType.DOMAIN);
        conf.setBoolean("sum", aggrType == AggrType.SUM);
        conf.setBoolean("max", aggrType == AggrType.MAX);
        conf.setLong("topn", topN);
        if (asEff) {
            conf.set("mapreduce.output.textoutputformat.separator", "=");
        }
        try {
            LOG.info("NodeDumper: running");
            boolean success = dumper.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("NodeDumper", dumper);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (IOException e) {
            LOG.error("NodeDumper job failed:", (Throwable)e);
            throw e;
        }
        stopWatch.stop();
        LOG.info("NodeDumper: finished, elapsed: {} ms", (Object)stopWatch.getTime(TimeUnit.MILLISECONDS));
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new NodeDumper(), (String[])args);
        System.exit(res);
    }

    public int run(String[] args) throws Exception {
        Options options = new Options();
        OptionBuilder.withArgName((String)"help");
        OptionBuilder.withDescription((String)"show this help message");
        Option helpOpts = OptionBuilder.create((String)"help");
        options.addOption(helpOpts);
        OptionBuilder.withArgName((String)"webgraphdb");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the web graph database to use");
        Option webGraphDbOpts = OptionBuilder.create((String)"webgraphdb");
        options.addOption(webGraphDbOpts);
        OptionBuilder.withArgName((String)"inlinks");
        OptionBuilder.withDescription((String)"show highest inlinks");
        Option inlinkOpts = OptionBuilder.create((String)"inlinks");
        options.addOption(inlinkOpts);
        OptionBuilder.withArgName((String)"outlinks");
        OptionBuilder.withDescription((String)"show highest outlinks");
        Option outlinkOpts = OptionBuilder.create((String)"outlinks");
        options.addOption(outlinkOpts);
        OptionBuilder.withArgName((String)"scores");
        OptionBuilder.withDescription((String)"show highest scores");
        Option scoreOpts = OptionBuilder.create((String)"scores");
        options.addOption(scoreOpts);
        OptionBuilder.withArgName((String)"topn");
        OptionBuilder.hasOptionalArg();
        OptionBuilder.withDescription((String)"show topN scores");
        Option topNOpts = OptionBuilder.create((String)"topn");
        options.addOption(topNOpts);
        OptionBuilder.withArgName((String)"output");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the output directory to use");
        Option outputOpts = OptionBuilder.create((String)"output");
        options.addOption(outputOpts);
        OptionBuilder.withArgName((String)"asEff");
        OptionBuilder.withDescription((String)"Solr ExternalFileField compatible output format");
        Option effOpts = OptionBuilder.create((String)"asEff");
        options.addOption(effOpts);
        OptionBuilder.hasArgs((int)2);
        OptionBuilder.withDescription((String)"group <host|domain> <sum|max>");
        Option groupOpts = OptionBuilder.create((String)"group");
        options.addOption(groupOpts);
        OptionBuilder.withArgName((String)"asSequenceFile");
        OptionBuilder.withDescription((String)"whether to output as a sequencefile");
        Option sequenceFileOpts = OptionBuilder.create((String)"asSequenceFile");
        options.addOption(sequenceFileOpts);
        GnuParser parser = new GnuParser();
        try {
            CommandLine line = parser.parse(options, args);
            if (line.hasOption("help") || !line.hasOption("webgraphdb")) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("NodeDumper", options);
                return -1;
            }
            String webGraphDb = line.getOptionValue("webgraphdb");
            boolean inlinks = line.hasOption("inlinks");
            boolean outlinks = line.hasOption("outlinks");
            long topN = line.hasOption("topn") ? Long.parseLong(line.getOptionValue("topn")) : Long.MAX_VALUE;
            String output = line.getOptionValue("output");
            DumpType type = inlinks ? DumpType.INLINKS : (outlinks ? DumpType.OUTLINKS : DumpType.SCORES);
            NameType nameType = null;
            AggrType aggrType = null;
            String[] group = line.getOptionValues("group");
            if (group != null && group.length == 2) {
                NameType nameType2 = group[0].equals("host") ? NameType.HOST : (nameType = group[0].equals("domain") ? NameType.DOMAIN : null);
                aggrType = group[1].equals("sum") ? AggrType.SUM : (group[1].equals("sum") ? AggrType.MAX : null);
            }
            boolean asEff = line.hasOption("asEff");
            boolean asSequenceFile = line.hasOption("asSequenceFile");
            this.dumpNodes(new Path(webGraphDb), type, topN, new Path(output), asEff, nameType, aggrType, asSequenceFile);
            return 0;
        }
        catch (Exception e) {
            LOG.error("NodeDumper:", (Throwable)e);
            return -2;
        }
    }

    public static class Dumper
    extends Configured {

        public static class DumperReducer
        extends Reducer<Text, FloatWritable, Text, FloatWritable> {
            private Configuration conf;
            private long topn = Long.MAX_VALUE;
            private boolean sum = false;

            public void reduce(Text key, Iterable<FloatWritable> values, Reducer.Context context) throws IOException, InterruptedException {
                long numCollected = 0L;
                float sumOrMax = 0.0f;
                float val = 0.0f;
                for (FloatWritable value : values) {
                    if (numCollected >= this.topn) break;
                    val = value.get();
                    if (this.sum) {
                        sumOrMax += val;
                    } else if (sumOrMax < val) {
                        sumOrMax = val;
                    }
                    ++numCollected;
                }
                context.write((Object)key, (Object)new FloatWritable(sumOrMax));
            }

            public void setup(Reducer.Context context) {
                this.conf = context.getConfiguration();
                this.topn = this.conf.getLong("topn", Long.MAX_VALUE);
                this.sum = this.conf.getBoolean("sum", false);
            }
        }

        public static class DumperMapper
        extends Mapper<Text, Node, Text, FloatWritable> {
            private Configuration conf;
            private boolean inlinks = false;
            private boolean outlinks = false;
            private boolean host = false;

            public void setup(Mapper.Context context) {
                this.conf = context.getConfiguration();
                this.inlinks = this.conf.getBoolean("inlinks", false);
                this.outlinks = this.conf.getBoolean("outlinks", false);
                this.host = this.conf.getBoolean("host", false);
            }

            public void map(Text key, Node node, Mapper.Context context) throws IOException, InterruptedException {
                float number = 0.0f;
                number = this.inlinks ? (float)node.getNumInlinks() : (this.outlinks ? (float)node.getNumOutlinks() : node.getInlinkScore());
                if (this.host) {
                    key.set(URLUtil.getHost(key.toString()));
                } else {
                    key.set(URLUtil.getDomainName(key.toString()));
                }
                context.write((Object)key, (Object)new FloatWritable(number));
            }
        }
    }

    public static class Sorter
    extends Configured {

        public static class SorterReducer
        extends Reducer<FloatWritable, Text, Text, FloatWritable> {
            private Configuration conf;
            private long topn = Long.MAX_VALUE;

            public void setup(Reducer.Context context) {
                this.conf = context.getConfiguration();
                this.topn = this.conf.getLong("topn", Long.MAX_VALUE);
            }

            public void reduce(FloatWritable key, Iterable<Text> values, Reducer.Context context) throws IOException, InterruptedException {
                float val = key.get();
                FloatWritable number = new FloatWritable(val == 0.0f ? 0.0f : -val);
                long numCollected = 0L;
                for (Text value : values) {
                    if (numCollected >= this.topn) continue;
                    Text url = (Text)WritableUtils.clone((Writable)value, (Configuration)this.conf);
                    context.write((Object)url, (Object)number);
                    ++numCollected;
                }
            }
        }

        public static class SorterMapper
        extends Mapper<Text, Node, FloatWritable, Text> {
            private Configuration conf;
            private boolean inlinks = false;
            private boolean outlinks = false;

            public void setup(Mapper.Context context) {
                this.conf = context.getConfiguration();
                this.inlinks = this.conf.getBoolean("inlinks", false);
                this.outlinks = this.conf.getBoolean("outlinks", false);
            }

            public void map(Text key, Node node, Mapper.Context context) throws IOException, InterruptedException {
                float number = 0.0f;
                number = this.inlinks ? (float)node.getNumInlinks() : (this.outlinks ? (float)node.getNumOutlinks() : node.getInlinkScore());
                context.write((Object)new FloatWritable(-number), (Object)key);
            }
        }
    }

    private static enum NameType {
        HOST,
        DOMAIN;

    }

    private static enum AggrType {
        SUM,
        MAX;

    }

    private static enum DumpType {
        INLINKS,
        OUTLINKS,
        SCORES;

    }
}

