/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.parse.feed;

import com.rometools.rome.feed.synd.SyndCategory;
import com.rometools.rome.feed.synd.SyndContent;
import com.rometools.rome.feed.synd.SyndEntry;
import com.rometools.rome.feed.synd.SyndFeed;
import com.rometools.rome.feed.synd.SyndPerson;
import com.rometools.rome.io.SyndFeedInput;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.lang.invoke.MethodHandles;
import java.util.Date;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.ParseText;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.parse.ParserFactory;
import org.apache.nutch.parse.ParserNotFound;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.EncodingDetector;
import org.apache.nutch.util.NutchConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;

public class FeedParser
implements Parser {
    public static final String CHARSET_UTF8 = "charset=UTF-8";
    public static final String TEXT_PLAIN_CONTENT_TYPE = "text/plain; charset=UTF-8";
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private Configuration conf;
    private ParserFactory parserFactory;
    private URLNormalizers normalizers;
    private URLFilters filters;
    private String defaultEncoding;

    public ParseResult getParse(Content content) {
        SyndFeed feed = null;
        ParseResult parseResult = new ParseResult(content.getUrl());
        EncodingDetector detector = new EncodingDetector(this.conf);
        detector.autoDetectClues(content, true);
        String encoding = detector.guessEncoding(content, this.defaultEncoding);
        try {
            InputSource input = new InputSource(new ByteArrayInputStream(content.getContent()));
            input.setEncoding(encoding);
            SyndFeedInput feedInput = new SyndFeedInput();
            feed = feedInput.build(input);
        }
        catch (Exception e) {
            LOG.warn("Parse failed: url: {}, exception: ", (Object)content.getUrl(), (Object)e);
            return new ParseStatus((Throwable)e).getEmptyParseResult(content.getUrl(), this.getConf());
        }
        String feedLink = feed.getLink();
        try {
            feedLink = this.normalizers.normalize(feedLink, "outlink");
            if (feedLink != null) {
                feedLink = this.filters.filter(feedLink);
            }
        }
        catch (Exception e) {
            feedLink = null;
        }
        List entries = feed.getEntries();
        for (Object entry : entries) {
            this.addToMap(parseResult, feed, feedLink, (SyndEntry)entry, content);
        }
        String feedDesc = FeedParser.stripTags(feed.getDescriptionEx());
        String feedTitle = FeedParser.stripTags(feed.getTitleEx());
        parseResult.put(content.getUrl(), new ParseText(feedDesc), new ParseData(new ParseStatus(1), feedTitle, new Outlink[0], content.getMetadata()));
        return parseResult;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        this.parserFactory = new ParserFactory(conf);
        this.normalizers = new URLNormalizers(conf, "outlink");
        this.filters = new URLFilters(conf);
        this.defaultEncoding = conf.get("parser.character.encoding.default", "windows-1252");
    }

    public Configuration getConf() {
        return this.conf;
    }

    public static void main(String[] args) throws Exception {
        if (args.length != 1) {
            System.err.println("Usage: FeedParser <feed>");
            System.exit(1);
        }
        String name = args[0];
        String url = "file:" + name;
        Configuration conf = NutchConfiguration.create();
        FeedParser parser = new FeedParser();
        parser.setConf(conf);
        File file = new File(name);
        byte[] bytes = new byte[(int)file.length()];
        DataInputStream in = new DataInputStream(new FileInputStream(file));
        in.readFully(bytes);
        in.close();
        ParseResult parseResult = parser.getParse(new Content(url, url, bytes, "application/rss+xml", new Metadata(), conf));
        for (Map.Entry entry : parseResult) {
            System.out.println("key: " + String.valueOf(entry.getKey()));
            Parse parse = (Parse)entry.getValue();
            System.out.println("data: " + String.valueOf(parse.getData()));
            System.out.println("text: " + parse.getText() + "\n");
        }
    }

    private void addToMap(ParseResult parseResult, SyndFeed feed, String feedLink, SyndEntry entry, Content content) {
        String link = entry.getLink();
        String text = null;
        String title = null;
        Metadata parseMeta = new Metadata();
        Metadata contentMeta = content.getMetadata();
        Parse parse = null;
        SyndContent description = entry.getDescription();
        try {
            link = this.normalizers.normalize(link, "outlink");
            if (link != null) {
                link = this.filters.filter(link);
            }
        }
        catch (Exception e) {
            e.printStackTrace();
            return;
        }
        if (link == null) {
            return;
        }
        title = FeedParser.stripTags(entry.getTitleEx());
        if (feedLink != null) {
            parseMeta.set("feed", feedLink);
        }
        this.addFields(parseMeta, contentMeta, feed, entry);
        String contentType = contentMeta.get("Content-Type");
        if (description != null) {
            text = description.getValue();
        }
        if (text == null) {
            List contents = entry.getContents();
            StringBuilder buf = new StringBuilder();
            for (Object syndContent : contents) {
                buf.append(((SyndContent)syndContent).getValue());
            }
            text = buf.toString();
        }
        try {
            Parser parser = this.parserFactory.getParsers(contentType, link)[0];
            parse = parser.getParse(new Content(link, link, text.getBytes(), contentType, contentMeta, this.conf)).get(link);
        }
        catch (ParserNotFound parser) {
            // empty catch block
        }
        if (parse != null) {
            ParseData data = parse.getData();
            data.getContentMeta().remove("Content-Type");
            this.mergeMetadata(data.getParseMeta(), parseMeta);
            parseResult.put(link, new ParseText(parse.getText()), new ParseData(ParseStatus.STATUS_SUCCESS, title, data.getOutlinks(), data.getContentMeta(), data.getParseMeta()));
        } else {
            contentMeta.remove("Content-Type");
            parseResult.put(link, new ParseText(text), new ParseData(ParseStatus.STATUS_FAILURE, title, new Outlink[0], contentMeta, parseMeta));
        }
    }

    private static String stripTags(SyndContent c) {
        if (c == null) {
            return "";
        }
        String value = c.getValue();
        String[] parts = value.split("<[^>]*>");
        StringBuffer buf = new StringBuffer();
        for (String part : parts) {
            buf.append(part);
        }
        return buf.toString().trim();
    }

    private void addFields(Metadata parseMeta, Metadata contentMeta, SyndFeed feed, SyndEntry entry) {
        SyndContent description;
        List authors = entry.getAuthors();
        List categories = entry.getCategories();
        Date published = entry.getPublishedDate();
        Date updated = entry.getUpdatedDate();
        String contentType = null;
        if (authors != null) {
            for (Object o : authors) {
                SyndPerson author = (SyndPerson)o;
                String authorName = author.getName();
                if (!this.checkString(authorName)) continue;
                parseMeta.add("author", authorName);
            }
        } else {
            String authorName = entry.getAuthor();
            if (this.checkString(authorName)) {
                parseMeta.set("author", authorName);
            }
        }
        for (Object i : categories) {
            parseMeta.add("tag", ((SyndCategory)i).getName());
        }
        if (published != null) {
            parseMeta.set("published", Long.toString(published.getTime()));
        }
        if (updated != null) {
            parseMeta.set("updated", Long.toString(updated.getTime()));
        }
        if ((description = entry.getDescription()) != null) {
            contentType = description.getType();
        } else {
            List contents = entry.getContents();
            if (contents.size() > 0) {
                contentType = ((SyndContent)contents.get(0)).getType();
            }
        }
        if (this.checkString(contentType)) {
            if (contentType.equals("html")) {
                contentType = "text/html";
            } else if (contentType.equals("xhtml")) {
                contentType = "text/xhtml";
            }
            contentMeta.set("Content-Type", contentType + "; charset=UTF-8");
        } else {
            contentMeta.set("Content-Type", TEXT_PLAIN_CONTENT_TYPE);
        }
    }

    private void mergeMetadata(Metadata first, Metadata second) {
        for (String name : second.names()) {
            String[] values;
            for (String value : values = second.getValues(name)) {
                first.add(name, value);
            }
        }
    }

    private boolean checkString(String s) {
        return s != null && !s.equals("");
    }
}

