/*
 * Decompiled with CFR 0.152.
 */
package org.seasar.robot.transformer.impl;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.transform.TransformerException;
import org.apache.commons.io.IOUtils;
import org.apache.xpath.CachedXPathAPI;
import org.cyberneko.html.parsers.DOMParser;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.container.annotation.tiger.Binding;
import org.seasar.framework.container.annotation.tiger.BindingType;
import org.seasar.framework.util.InputStreamUtil;
import org.seasar.framework.util.StringUtil;
import org.seasar.robot.RobotCrawlAccessException;
import org.seasar.robot.RobotSystemException;
import org.seasar.robot.entity.AccessResultData;
import org.seasar.robot.entity.ResponseData;
import org.seasar.robot.entity.ResultData;
import org.seasar.robot.helper.EncodingHelper;
import org.seasar.robot.helper.UrlConvertHelper;
import org.seasar.robot.transformer.impl.AbstractTransformer;
import org.seasar.robot.util.CharUtil;
import org.seasar.robot.util.StreamUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class HtmlTransformer
extends AbstractTransformer {
    private static final Logger logger = LoggerFactory.getLogger(HtmlTransformer.class);
    protected Map<String, String> featureMap = new HashMap<String, String>();
    protected Map<String, String> propertyMap = new HashMap<String, String>();
    protected Map<String, String> childUrlRuleMap = new LinkedHashMap<String, String>();
    @Binding(bindingType=BindingType.MAY)
    protected String defaultEncoding;
    @Binding(bindingType=BindingType.MAY)
    protected int preloadSizeForCharset = 2048;
    @Binding(bindingType=BindingType.MAY)
    protected Pattern invalidUrlPattern = Pattern.compile("^\\s*javascript:|^\\s*mailto:|^\\s*irc:|^\\s*skype:|^\\s*about:|^\\s*fscommand:|^\\s*aim:|^\\s*msnim:|^\\s*news:|^\\s*tel:|^\\s*unsaved:|^\\s*callto:", 2);
    private ThreadLocal<CachedXPathAPI> xpathAPI = new ThreadLocal();

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    @Override
    public ResultData transform(ResponseData responseData) {
        if (responseData == null || responseData.getResponseBody() == null) {
            throw new RobotCrawlAccessException("No response body.");
        }
        File tempFile = this.createResponseBodyFile(responseData);
        FileInputStream fis = null;
        try {
            try {
                fis = new FileInputStream(tempFile);
                responseData.setResponseBody(fis);
                this.updateCharset(responseData);
            }
            catch (RobotSystemException e) {
                IOUtils.closeQuietly(fis);
                if (!tempFile.delete()) {
                    logger.warn("Could not delete a temp file: " + tempFile);
                }
                throw e;
            }
            catch (Exception e) {
                IOUtils.closeQuietly(fis);
                if (!tempFile.delete()) {
                    logger.warn("Could not delete a temp file: " + tempFile);
                }
                throw new RobotSystemException("Could not load response data: " + responseData.getUrl(), e);
            }
            Object var6_4 = null;
        }
        catch (Throwable throwable) {
            Object var6_5 = null;
            IOUtils.closeQuietly((InputStream)fis);
            throw throwable;
        }
        IOUtils.closeQuietly((InputStream)fis);
        ResultData resultData = new ResultData();
        resultData.setTransformerName(this.getName());
        try {
            try {
                fis = new FileInputStream(tempFile);
                responseData.setResponseBody(fis);
                this.storeData(responseData, resultData);
            }
            catch (RobotSystemException e) {
                IOUtils.closeQuietly((InputStream)fis);
                if (!tempFile.delete()) {
                    logger.warn("Could not delete a temp file: " + tempFile);
                }
                throw e;
            }
            catch (Exception e) {
                IOUtils.closeQuietly((InputStream)fis);
                if (!tempFile.delete()) {
                    logger.warn("Could not delete a temp file: " + tempFile);
                }
                throw new RobotSystemException("Could not store data.", e);
            }
            Object var8_14 = null;
        }
        catch (Throwable throwable) {
            Object var8_15 = null;
            IOUtils.closeQuietly((InputStream)fis);
            throw throwable;
        }
        IOUtils.closeQuietly((InputStream)fis);
        if (this.isHtml(responseData)) {
            try {
                try {
                    fis = new FileInputStream(tempFile);
                    responseData.setResponseBody(fis);
                    this.storeChildUrls(responseData, resultData);
                }
                catch (RobotSystemException e) {
                    IOUtils.closeQuietly((InputStream)fis);
                    if (!tempFile.delete()) {
                        logger.warn("Could not delete a temp file: " + tempFile);
                    }
                    throw e;
                }
                catch (Exception e) {
                    IOUtils.closeQuietly((InputStream)fis);
                    if (!tempFile.delete()) {
                        logger.warn("Could not delete a temp file: " + tempFile);
                    }
                    throw new RobotSystemException("Could not store data.", e);
                }
                Object var10_17 = null;
            }
            catch (Throwable throwable) {
                Object var10_18 = null;
                IOUtils.closeQuietly((InputStream)fis);
                throw throwable;
            }
            IOUtils.closeQuietly((InputStream)fis);
        }
        if (!tempFile.delete()) {
            logger.warn("Could not delete a temp file: " + tempFile);
        }
        return resultData;
    }

    protected File createResponseBodyFile(ResponseData responseData) {
        File tempFile = null;
        InputStream is = responseData.getResponseBody();
        FileOutputStream fos = null;
        try {
            try {
                tempFile = File.createTempFile("s2robot-HtmlTransformer-", ".html");
                fos = new FileOutputStream(tempFile);
                StreamUtil.drain(is, fos);
            }
            catch (Exception e) {
                IOUtils.closeQuietly(fos);
                if (tempFile != null && !tempFile.delete()) {
                    logger.warn("Could not delete a temp file: " + tempFile);
                }
                throw new RobotCrawlAccessException("Could not read a response body: " + responseData.getUrl(), e);
            }
            Object var7_5 = null;
        }
        catch (Throwable throwable) {
            Object var7_6 = null;
            IOUtils.closeQuietly((InputStream)is);
            IOUtils.closeQuietly(fos);
            throw throwable;
        }
        IOUtils.closeQuietly((InputStream)is);
        IOUtils.closeQuietly((OutputStream)fos);
        return tempFile;
    }

    protected boolean isHtml(ResponseData responseData) {
        String mimeType = responseData.getMimeType();
        return "text/html".equals(mimeType) || "application/xhtml+xml".equals(mimeType);
    }

    public void addChildUrlRule(String tagName, String attrName) {
        if (StringUtil.isNotBlank((String)tagName) && StringUtil.isNotBlank((String)attrName)) {
            this.childUrlRuleMap.put(tagName, attrName);
        }
    }

    protected CachedXPathAPI getXPathAPI() {
        CachedXPathAPI cachedXPathAPI = this.xpathAPI.get();
        if (cachedXPathAPI == null) {
            cachedXPathAPI = new CachedXPathAPI();
            this.xpathAPI.set(cachedXPathAPI);
        }
        return cachedXPathAPI;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected void storeChildUrls(ResponseData responseData, ResultData resultData) {
        List<String> urlList = new ArrayList<String>();
        try {
            try {
                DOMParser parser = this.getDomParser();
                parser.parse(new InputSource(responseData.getResponseBody()));
                Document document = parser.getDocument();
                String baseHref = this.getBaseHref(document);
                URL url = new URL(baseHref == null ? responseData.getUrl() : baseHref);
                for (Map.Entry<String, String> entry : this.childUrlRuleMap.entrySet()) {
                    urlList.addAll(this.getUrlFromTagAttribute(url, document, entry.getKey(), entry.getValue(), responseData.getCharSet()));
                }
                urlList = this.convertChildUrlList(urlList);
            }
            catch (Exception e) {
                logger.warn("Could not create child urls.", (Throwable)e);
                Object var11_12 = null;
                this.xpathAPI.remove();
            }
            Object var11_11 = null;
            this.xpathAPI.remove();
        }
        catch (Throwable throwable) {
            Object var11_13 = null;
            this.xpathAPI.remove();
            throw throwable;
        }
        resultData.addAllUrl(urlList);
        String u = responseData.getUrl();
        resultData.removeUrl(u);
        resultData.removeUrl(this.getDuplicateUrl(u));
    }

    protected List<String> convertChildUrlList(List<String> urlList) {
        try {
            UrlConvertHelper urlConvertHelper = (UrlConvertHelper)SingletonS2Container.getComponent(UrlConvertHelper.class);
            ArrayList<String> newUrlList = new ArrayList<String>();
            for (String url : urlList) {
                newUrlList.add(urlConvertHelper.convert(url));
            }
            return newUrlList;
        }
        catch (Exception exception) {
            return urlList;
        }
    }

    protected void storeData(ResponseData responseData, ResultData resultData) {
        byte[] data = InputStreamUtil.getBytes((InputStream)responseData.getResponseBody());
        resultData.setData(data);
        resultData.setEncoding(responseData.getCharSet());
    }

    protected void updateCharset(ResponseData responseData) {
        String encoding = this.loadCharset(responseData.getResponseBody());
        if (encoding == null) {
            if (this.defaultEncoding == null) {
                responseData.setCharSet("UTF-8");
            } else if (responseData.getCharSet() == null) {
                responseData.setCharSet(this.defaultEncoding);
            }
        } else {
            responseData.setCharSet(encoding.trim());
        }
        if (!this.isSupportedCharset(responseData.getCharSet())) {
            responseData.setCharSet("UTF-8");
        }
    }

    protected boolean isSupportedCharset(String charsetName) {
        if (charsetName == null) {
            return false;
        }
        try {
            Charset.forName(charsetName);
        }
        catch (Exception e) {
            return false;
        }
        return true;
    }

    protected String loadCharset(InputStream inputStream) {
        BufferedInputStream bis = null;
        String encoding = null;
        try {
            bis = new BufferedInputStream(inputStream);
            byte[] buffer = new byte[this.preloadSizeForCharset];
            int size = bis.read(buffer);
            if (size != -1) {
                String content = new String(buffer, 0, size);
                encoding = this.parseCharset(content);
            }
        }
        catch (IOException e) {
            throw new RobotCrawlAccessException("Could not load a content.", e);
        }
        try {
            EncodingHelper encodingHelper = (EncodingHelper)SingletonS2Container.getComponent(EncodingHelper.class);
            encoding = encodingHelper.normalize(encoding);
        }
        catch (Exception e) {
            // empty catch block
        }
        return encoding;
    }

    protected String parseCharset(String content) {
        Pattern pattern = Pattern.compile("; *charset *= *([a-zA-Z0-9\\-_]+)", 2);
        Matcher matcher = pattern.matcher(content);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }

    protected String getDuplicateUrl(String url) {
        if (url.endsWith("/")) {
            return url.substring(0, url.length() - 1);
        }
        return url + "/";
    }

    protected DOMParser getDomParser() {
        DOMParser parser = new DOMParser();
        try {
            for (Map.Entry<String, String> entry : this.featureMap.entrySet()) {
                parser.setFeature(entry.getKey(), "true".equalsIgnoreCase(entry.getValue()));
            }
            for (Map.Entry<String, String> entry : this.propertyMap.entrySet()) {
                parser.setProperty(entry.getKey(), (Object)entry.getValue());
            }
        }
        catch (Exception e) {
            throw new RobotSystemException("Invalid parser configuration.", e);
        }
        return parser;
    }

    protected String getBaseHref(Document document) {
        Element element;
        String attrValue;
        NodeList list;
        try {
            list = this.getXPathAPI().selectNodeList((Node)document, "//BASE");
        }
        catch (Exception e) {
            logger.warn("Could not get a base tag. ", (Throwable)e);
            return null;
        }
        if (list.getLength() > 0 && StringUtil.isNotBlank((String)(attrValue = (element = (Element)list.item(0)).getAttribute("href")))) {
            if (attrValue.startsWith("www.")) {
                attrValue = "http://" + attrValue;
            }
            return attrValue;
        }
        return null;
    }

    protected List<String> getUrlFromTagAttribute(URL url, Document document, String xpath, String attr, String encoding) {
        if (logger.isDebugEnabled()) {
            logger.debug("Base URL: " + url);
        }
        ArrayList<String> urlList = new ArrayList<String>();
        try {
            NodeList list = this.getXPathAPI().selectNodeList((Node)document, xpath);
            for (int i = 0; i < list.getLength(); ++i) {
                Element element = (Element)list.item(i);
                String attrValue = element.getAttribute(attr);
                if (!this.isValidPath(attrValue)) continue;
                this.addChildUrlFromTagAttribute(urlList, url, attrValue, encoding);
            }
        }
        catch (TransformerException e) {
            logger.warn("Could not get urls: (" + xpath + ", " + attr + ")", (Throwable)e);
        }
        return urlList;
    }

    protected void addChildUrlFromTagAttribute(List<String> urlList, URL url, String attrValue, String encoding) {
        try {
            URL childUrl = new URL(url, attrValue.trim());
            String u = this.encodeUrl(this.normalizeUrl(childUrl.toExternalForm()), encoding);
            if (logger.isDebugEnabled()) {
                logger.debug(attrValue + " -> " + u);
            }
            if (StringUtil.isNotBlank((String)u)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Add Child: " + u);
                }
                urlList.add(u);
            } else if (logger.isDebugEnabled()) {
                logger.debug("Skip Child: " + u);
            }
        }
        catch (MalformedURLException e) {
            logger.warn("Malformed URL: " + attrValue, (Throwable)e);
        }
    }

    protected String encodeUrl(String url, String enc) {
        if (StringUtil.isBlank((String)url) || StringUtil.isBlank((String)enc)) {
            return url;
        }
        StringBuilder buf = new StringBuilder(url.length() + 100);
        for (char c : url.toCharArray()) {
            if (CharUtil.isUrlChar(c)) {
                buf.append(c);
                continue;
            }
            try {
                buf.append(URLEncoder.encode(String.valueOf(c), enc));
            }
            catch (UnsupportedEncodingException e) {
                // empty catch block
            }
        }
        return buf.toString();
    }

    protected String normalizeUrl(String u) {
        if (u == null) {
            return null;
        }
        String url = u.trim();
        int idx = url.indexOf(35);
        if (idx >= 0) {
            url = url.substring(0, idx);
        }
        if ((idx = url.indexOf(";jsessionid")) >= 0) {
            url = url.replaceFirst(";jsessionid=[a-zA-Z0-9\\.]*", "");
        }
        if (url.indexOf("/../") >= 0 || url.indexOf(32) >= 0) {
            if (logger.isDebugEnabled()) {
                logger.debug("INVALID URL: " + url);
            }
            return null;
        }
        return url;
    }

    protected boolean isValidPath(String path) {
        if (StringUtil.isBlank((String)path)) {
            return false;
        }
        Matcher matcher = this.invalidUrlPattern.matcher(path);
        return !matcher.find();
    }

    public void addFeature(String key, String value) {
        if (StringUtil.isBlank((String)key) || StringUtil.isBlank((String)value)) {
            throw new RobotSystemException("key or value is null.");
        }
        this.featureMap.put(key, value);
    }

    public void addProperty(String key, String value) {
        if (StringUtil.isBlank((String)key) || StringUtil.isBlank((String)value)) {
            throw new RobotSystemException("key or value is null.");
        }
        this.propertyMap.put(key, value);
    }

    @Override
    public Object getData(AccessResultData accessResultData) {
        if (!this.getName().equals(accessResultData.getTransformerName())) {
            throw new RobotSystemException("Transformer is invalid. Use " + accessResultData.getTransformerName() + ". This transformer is " + this.getName() + ".");
        }
        byte[] data = accessResultData.getData();
        if (data == null) {
            return null;
        }
        String encoding = accessResultData.getEncoding();
        try {
            return new String(data, encoding == null ? "UTF-8" : encoding);
        }
        catch (UnsupportedEncodingException e) {
            if (logger.isInfoEnabled()) {
                logger.info("Invalid charsetName: " + encoding + ". Changed to " + "UTF-8", (Throwable)e);
            }
            try {
                return new String(data, "UTF-8");
            }
            catch (UnsupportedEncodingException e1) {
                throw new RobotSystemException("Unexpected exception", e1);
            }
        }
    }

    public Map<String, String> getFeatureMap() {
        return this.featureMap;
    }

    public void setFeatureMap(Map<String, String> featureMap) {
        this.featureMap = featureMap;
    }

    public Map<String, String> getPropertyMap() {
        return this.propertyMap;
    }

    public void setPropertyMap(Map<String, String> propertyMap) {
        this.propertyMap = propertyMap;
    }

    public Map<String, String> getChildUrlRuleMap() {
        return this.childUrlRuleMap;
    }

    public void setChildUrlRuleMap(Map<String, String> childUrlRuleMap) {
        this.childUrlRuleMap = childUrlRuleMap;
    }

    public String getDefaultEncoding() {
        return this.defaultEncoding;
    }

    public void setDefaultEncoding(String defaultEncoding) {
        this.defaultEncoding = defaultEncoding;
    }

    public int getPreloadSizeForCharset() {
        return this.preloadSizeForCharset;
    }

    public void setPreloadSizeForCharset(int preloadSizeForCharset) {
        this.preloadSizeForCharset = preloadSizeForCharset;
    }

    public Pattern getInvalidUrlPattern() {
        return this.invalidUrlPattern;
    }

    public void setInvalidUrlPattern(Pattern invalidUrlPattern) {
        this.invalidUrlPattern = invalidUrlPattern;
    }
}

