/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.language;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Vector;
import org.apache.tika.language.NGramEntry;
import org.apache.tika.language.NGramProfile;

public class LanguageIdentifier {
    private static final int DEFAULT_ANALYSIS_LENGTH = 0;
    private ArrayList<NGramProfile> languages = new ArrayList();
    private ArrayList<String> supportedLanguages = new ArrayList();
    private int minLength = 3;
    private int maxLength = 3;
    private int analyzeLength = 0;
    private HashMap<CharSequence, NGramEntry[]> ngramsIdx = new HashMap();
    private NGramProfile suspect = null;

    public LanguageIdentifier(NGramProfile suspect) {
        this.suspect = suspect;
        Properties p = new Properties();
        try {
            p.load(this.getClass().getResourceAsStream("langmappings.properties"));
            Enumeration<Object> alllanguages = p.keys();
            StringBuffer list = new StringBuffer("Language identifier plugin supports:");
            HashMap<NGramEntry, ArrayList<NGramEntry>> tmpIdx = new HashMap<NGramEntry, ArrayList<NGramEntry>>();
            while (alllanguages.hasMoreElements()) {
                String lang = (String)alllanguages.nextElement();
                InputStream is = this.getClass().getClassLoader().getResourceAsStream("org/apache/tika/language/" + lang + "." + "ngp");
                if (is == null) continue;
                NGramProfile profile = new NGramProfile(lang, this.minLength, this.maxLength);
                try {
                    profile.load(is);
                    this.languages.add(profile);
                    this.supportedLanguages.add(lang);
                    List<NGramEntry> ngrams = profile.getSorted();
                    for (int i = 0; i < ngrams.size(); ++i) {
                        NGramEntry entry = ngrams.get(i);
                        ArrayList<NGramEntry> registered = (ArrayList<NGramEntry>)tmpIdx.get(entry);
                        if (registered == null) {
                            registered = new ArrayList<NGramEntry>();
                            tmpIdx.put(entry, registered);
                        }
                        registered.add(entry);
                    }
                    list.append(" " + lang + "(" + ngrams.size() + ")");
                    is.close();
                }
                catch (IOException e1) {}
            }
            for (NGramEntry entry : tmpIdx.keySet()) {
                List l = (List)tmpIdx.get(entry);
                if (l == null) continue;
                NGramEntry[] array = l.toArray(new NGramEntry[l.size()]);
                this.ngramsIdx.put(entry.getSeq(), array);
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public LanguageIdentifier() {
        this(new NGramProfile("suspect", 3, 3));
    }

    public static void main(String[] args) {
        File f;
        String usage = "Usage: LanguageIdentifier [-identifyrows filename maxlines] [-identifyfile charset filename] [-identifyfileset charset files] [-identifytext text] ";
        int command = 0;
        boolean IDFILE = true;
        int IDTEXT = 2;
        int IDFILESET = 4;
        int IDROWS = 5;
        Vector<String> fileset = new Vector<String>();
        String filename = "";
        String charset = "";
        String text = "";
        int max = 0;
        if (args.length == 0) {
            System.err.println(usage);
            System.exit(-1);
        }
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equals("-identifyfile")) {
                command = 1;
                charset = args[++i];
                filename = args[++i];
            }
            if (args[i].equals("-identifyrows")) {
                command = 5;
                filename = args[++i];
                max = Integer.parseInt(args[++i]);
            }
            if (args[i].equals("-identifytext")) {
                command = 2;
                ++i;
                while (i < args.length - 1) {
                    text = text + args[i] + " ";
                    ++i;
                }
            }
            if (!args[i].equals("-identifyfileset")) continue;
            command = 4;
            charset = args[++i];
            ++i;
            while (i < args.length) {
                File[] files = null;
                f = new File(args[i]);
                files = f.isDirectory() ? f.listFiles() : new File[]{f};
                for (int j = 0; j < files.length; ++j) {
                    fileset.add(files[j].getAbsolutePath());
                }
                ++i;
            }
        }
        String lang = null;
        LanguageIdentifier idfr = new LanguageIdentifier();
        try {
            switch (command) {
                case 2: {
                    lang = idfr.identify(text);
                    break;
                }
                case 1: {
                    f = new File(filename);
                    FileInputStream fis = new FileInputStream(f);
                    lang = idfr.identify(fis, charset);
                    fis.close();
                    break;
                }
                case 5: {
                    String line;
                    f = new File(filename);
                    BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
                    while (max > 0 && (line = br.readLine()) != null) {
                        if ((line = line.trim()).length() <= 2) continue;
                        --max;
                        lang = idfr.identify(line);
                        System.out.println("R=" + lang + ":" + line);
                    }
                    br.close();
                    System.exit(0);
                    break;
                }
                case 4: {
                    System.out.println("FILESET");
                    Iterator i = fileset.iterator();
                    while (i.hasNext()) {
                        try {
                            filename = (String)i.next();
                            f = new File(filename);
                            FileInputStream fis = new FileInputStream(f);
                            lang = idfr.identify(fis, charset);
                            fis.close();
                        }
                        catch (Exception e) {
                            System.out.println(e);
                        }
                        System.out.println(filename + " was identified as " + lang);
                    }
                    System.exit(0);
                }
            }
        }
        catch (Exception e) {
            System.out.println(e);
        }
        System.out.println("text was identified as " + lang);
    }

    public String identify(String content) {
        return this.identify(new StringBuilder(content));
    }

    public String identify(StringBuilder content) {
        StringBuilder text = content;
        if (this.analyzeLength > 0 && content.length() > this.analyzeLength) {
            text = new StringBuilder().append((CharSequence)content);
            text.setLength(this.analyzeLength);
        }
        this.suspect.analyze(text);
        return this.identify();
    }

    public String identify() {
        Iterator<NGramEntry> iter = this.suspect.getSorted().iterator();
        float topscore = Float.MIN_VALUE;
        String lang = "";
        HashMap<NGramProfile, Float> scores = new HashMap<NGramProfile, Float>();
        NGramEntry searched = null;
        while (iter.hasNext()) {
            searched = iter.next();
            NGramEntry[] ngrams = this.ngramsIdx.get(searched.getSeq());
            if (ngrams == null) continue;
            for (int j = 0; j < ngrams.length; ++j) {
                NGramProfile profile = ngrams[j].getProfile();
                Float pScore = (Float)scores.get(profile);
                if (pScore == null) {
                    pScore = new Float(0.0f);
                }
                float plScore = pScore.floatValue();
                scores.put(profile, new Float(plScore += ngrams[j].getFrequency() + searched.getFrequency()));
                if (!(plScore > topscore)) continue;
                topscore = plScore;
                lang = profile.getName();
            }
        }
        return lang;
    }

    public String identify(InputStream is) throws IOException {
        return this.identify(is, null);
    }

    public String identify(InputStream is, String charset) throws IOException {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        byte[] buffer = new byte[2048];
        int len = 0;
        while ((len = is.read(buffer)) != -1 && (this.analyzeLength == 0 || out.size() < this.analyzeLength)) {
            if (this.analyzeLength != 0) {
                len = Math.min(len, this.analyzeLength - out.size());
            }
            out.write(buffer, 0, len);
        }
        return this.identify(charset == null ? out.toString() : out.toString(charset));
    }
}

