/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.urlfilter.fast;

import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.nutch.net.URLFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FastURLFilter
implements URLFilter {
    protected static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private Configuration conf;
    public static final String URLFILTER_FAST_FILE = "urlfilter.fast.file";
    public static final String URLFILTER_FAST_MAX_LENGTH = "urlfilter.fast.url.max.length";
    public static final String URLFILTER_FAST_PATH_MAX_LENGTH = "urlfilter.fast.url.path.max.length";
    public static final String URLFILTER_FAST_QUERY_MAX_LENGTH = "urlfilter.fast.url.query.max.length";
    private Multimap<String, Rule> hostRules = LinkedHashMultimap.create();
    private Multimap<String, Rule> domainRules = LinkedHashMultimap.create();
    private int maxLengthPath = -1;
    private int maxLengthQuery = -1;
    private int maxLength = -1;
    private static final Pattern CATCH_ALL_RULE = Pattern.compile("^\\s*DenyPath(?:Query)?\\s+\\.[*?]\\s*$");

    public FastURLFilter() {
    }

    FastURLFilter(Reader rules) throws IOException, PatternSyntaxException {
        this.reloadRules(rules);
    }

    FastURLFilter(Reader rules, Configuration conf) throws IOException, PatternSyntaxException {
        this.maxLengthPath = conf.getInt(URLFILTER_FAST_PATH_MAX_LENGTH, -1);
        this.maxLengthQuery = conf.getInt(URLFILTER_FAST_QUERY_MAX_LENGTH, -1);
        this.maxLength = conf.getInt(URLFILTER_FAST_MAX_LENGTH, -1);
        this.reloadRules(rules);
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        this.maxLengthPath = conf.getInt(URLFILTER_FAST_PATH_MAX_LENGTH, -1);
        this.maxLengthQuery = conf.getInt(URLFILTER_FAST_QUERY_MAX_LENGTH, -1);
        this.maxLength = conf.getInt(URLFILTER_FAST_MAX_LENGTH, -1);
        try {
            this.reloadRules();
        }
        catch (Exception e) {
            LOG.error("Failed to load rules: {}", (Object)e.getMessage());
            throw new RuntimeException(e.getMessage(), e);
        }
    }

    public Configuration getConf() {
        return this.conf;
    }

    public String filter(String url) {
        int pos;
        URL u;
        if (this.maxLength != -1 && url.length() > this.maxLength) {
            LOG.debug("Rejected {} because URL length ({}) greater than limit {}", new Object[]{url, url.length(), this.maxLength});
            return null;
        }
        try {
            u = new URL(url);
        }
        catch (Exception e) {
            LOG.debug("Rejected {} because failed to parse as URL: {}", (Object)url, (Object)e.getMessage());
            return null;
        }
        String path = u.getPath();
        if (this.maxLengthPath != -1 && path.length() > this.maxLengthPath) {
            LOG.debug("Rejected {} as path length {} is greater than {}", new Object[]{url, path.length(), this.maxLengthPath});
            return null;
        }
        String query = u.getQuery();
        if (this.maxLengthQuery != -1 && query != null && query.length() > this.maxLengthQuery) {
            LOG.debug("Rejected {} as query length {} is greater than {}", new Object[]{url, query.length(), this.maxLengthQuery});
            return null;
        }
        String hostname = u.getHost();
        for (Rule rule : this.hostRules.get((Object)hostname)) {
            if (!rule.match(u)) continue;
            return null;
        }
        for (Rule rule : this.domainRules.get((Object)hostname)) {
            if (!rule.match(u)) continue;
            return null;
        }
        int start = 0;
        while ((pos = hostname.indexOf(46, start)) != -1) {
            start = pos + 1;
            String domain = hostname.substring(start);
            for (Rule rule : this.domainRules.get((Object)domain)) {
                if (!rule.match(u)) continue;
                return null;
            }
        }
        for (Rule rule : this.domainRules.get((Object)".")) {
            if (!rule.match(u)) continue;
            return null;
        }
        return url;
    }

    public void reloadRules() throws IOException {
        InputStream is;
        String fileRules = this.conf.get(URLFILTER_FAST_FILE);
        LOG.info("Reading urlfilter-fast rules file: {}", (Object)fileRules);
        Path fileRulesPath = new Path(fileRules);
        if (fileRulesPath.toUri().getScheme() != null) {
            FileSystem fs = fileRulesPath.getFileSystem(this.conf);
            is = fs.open(fileRulesPath);
        } else {
            is = this.conf.getConfResourceAsInputStream(fileRules);
        }
        CompressionCodec codec = new CompressionCodecFactory(this.conf).getCodec(fileRulesPath);
        if (codec != null && is != null) {
            is = codec.createInputStream(is);
        }
        try {
            this.reloadRules(new InputStreamReader(is));
        }
        catch (Exception e) {
            String message = "Couldn't load the rules from " + fileRules;
            LOG.error(message);
            throw new IOException(message);
        }
        finally {
            if (is != null) {
                is.close();
            }
        }
    }

    private void reloadRules(Reader rules) throws IOException {
        this.domainRules.clear();
        this.hostRules.clear();
        BufferedReader reader = new BufferedReader(rules);
        String current = null;
        boolean host = false;
        int lineno = 0;
        try {
            String line;
            while ((line = reader.readLine()) != null) {
                Rule rule;
                block12: {
                    ++lineno;
                    if ((line = line.trim()).indexOf("#") != -1) {
                        line = line.substring(0, line.indexOf("#")).trim();
                    }
                    if (StringUtils.isBlank((CharSequence)line)) continue;
                    if (line.startsWith("Host")) {
                        host = true;
                        current = line.split("\\s+")[1];
                        continue;
                    }
                    if (line.startsWith("Domain")) {
                        host = false;
                        current = line.split("\\s+")[1];
                        continue;
                    }
                    if (current == null) continue;
                    rule = null;
                    try {
                        if (CATCH_ALL_RULE.matcher(line).matches()) {
                            rule = DenyAllRule.getInstance();
                            break block12;
                        }
                        if (line.startsWith("DenyPathQuery")) {
                            rule = new DenyPathQueryRule(line.split("\\s+")[1]);
                            break block12;
                        }
                        if (line.startsWith("DenyPath")) {
                            rule = new DenyPathRule(line.split("\\s+")[1]);
                            break block12;
                        }
                        LOG.warn("Problem reading rule on line {}: {}", (Object)lineno, (Object)line);
                    }
                    catch (Exception e) {
                        LOG.warn("Problem reading rule on line {}: {} - {}", new Object[]{lineno, line, e.getMessage()});
                    }
                    continue;
                }
                if (host) {
                    LOG.trace("Adding host rule [{}] [{}]", (Object)current, (Object)rule);
                    this.hostRules.put((Object)current, (Object)rule);
                    continue;
                }
                LOG.trace("Adding domain rule [{}] [{}]", (Object)current, (Object)rule);
                this.domainRules.put((Object)current, (Object)rule);
            }
            LOG.info("Read {} lines, {} host and {} domain rules from urlfilter-fast rules file", new Object[]{lineno, this.hostRules.size(), this.domainRules.size()});
        }
        catch (IOException e) {
            LOG.warn("Caught exception while reading rules file at line {}: {}", (Object)lineno, (Object)e.getMessage());
            throw e;
        }
    }

    public static class DenyPathQueryRule
    extends Rule {
        public DenyPathQueryRule(String regex) {
            super(regex);
        }

        @Override
        public boolean match(URL url) {
            String haystack = url.getFile();
            return this.pattern.matcher(haystack).find();
        }
    }

    public static class DenyAllRule
    extends Rule {
        private static Rule instance = new DenyAllRule(".");

        private DenyAllRule(String regex) {
            super(regex);
        }

        public static Rule getInstance() {
            return instance;
        }

        @Override
        public boolean match(URL url) {
            return true;
        }
    }

    public static class DenyPathRule
    extends Rule {
        public DenyPathRule(String regex) {
            super(regex);
        }

        @Override
        public boolean match(URL url) {
            String haystack = url.getPath();
            return this.pattern.matcher(haystack).find();
        }
    }

    public static class Rule {
        protected Pattern pattern;

        Rule() {
        }

        public Rule(String regex) {
            this.pattern = Pattern.compile(regex);
        }

        public boolean match(URL url) {
            return this.pattern.matcher(url.toString()).find();
        }

        public String toString() {
            return this.pattern.toString();
        }
    }
}

