#!/usr/bin/ruby # -*- coding: euc-jp -*- # bogofilter-nihongo.rb - read mail from stdin, translate to EUC and make wakati-gaki then send to bogofilter # $Id: bogofilter-nihongo.rb,v 0.3 2012/07/16 hiromatsu $ # modified for ruby 1.9.3 # $Id: bogofilter-nihongo.rb,v 0.2 2009/06/04 hiromatsu $ # modified for gems, taht open4 and rmail depends on. # $Id: bogofilter-nihongo.rb,v 0.1 2006/12/03 14:33:20 hiromatsu $ =begin Copyright (C) 2006 Takashi Hiromatsu. All rights reserved. This software is based on prepare.rb. Copyright notices are also upon it. prepare.rb - read mail from stdin, write prepared to stdout $Id: prepare.rb,v 1.7 2006/09/04 03:04:55 tominaga Exp $ Copyright (C) 2006 Kazuto Tominaga. All rights reserved. You can do to this software any combination of the following: use, copy, modify, and distribute (henceforth called Activity), provided that the purpose of your Activity is not directly related to any of the following: * aggravating confrontation among any groups of people * exacerbating the situation of poverty and famine * encouraging discrimination among any groups of people * causing distrust, anxiety, or hostility among any people * hurting somebody physically or mentally This software is here as it is. It has no specification that it shall abide by, it has no correct behavior in any sense, and it does not have any kind of expressed or implied warranty. I have no responsibility for any result, any trouble, any damage, or any loss related to your Activity. =end require 'timeout' require 'MeCab' require 'kconv' require 'rubygems' require 'rmail' require 'open4' $HelpMsg = "bogofilter-nihongo version 0.3\n\n" + "Usage: ruby bogofilter-nihongo.rb [options] < message\n\n" + "bogofilter-nihongo original options:\n" + " --filter - work like as encoder and wakati-gaki filter\n" + " --help - show this message\n" + " --debug - show bogofilter process status\n\n" + "other options will be passed through bogofilter\n\n" $EXIT_STATUS = 0 $USER_DEBUG = 0 $FILTER = 0 $HELP = 0 def splitbody(s) # $stderr.puts "splitbody #{Time.now}" if /^mime-version:/i !~ s $stderr.puts 'internal error' throw :LogicInconsistency end part1 = $` part2 = $& + $' if /\A(((.|\n)*)^$\n)^((.|\n)*)\z/ =~ part1 preamble = $1 embedded = $4 + part2 return [ preamble, embedded ] else return [ '', part1+part2 ] end end def singlepart(msg) # $stderr.puts "single part #{Time.now}" typ = msg.header['content-type'] if typ.nil? typ = '' end # $stderr.puts "typ = #{typ}" typ = typ.downcase if typ != '' and typ !~ /^text/ and typ !~ /^message/ msg.body = 'body-of-this-part-deleted' return msg end if typ =~ /^message\/rfc2?822/ m = RMail::Parser.read(msg.body) m = multipart(m) msg.body = RMail::Serialize.write('', m) # cliche; see RMail::Serialize return msg end # some mailer (e.g., Mew) adds comments to CTE, and rubymail doesn't handle it; trim cte = msg.header['content-transfer-encoding'] if /quoted-printable/i =~ cte msg.header.delete('content-transfer-Encoding') msg.header['Content-Transfer-Encoding'] = 'quoted-printable' elsif /base64/i =~ cte msg.header.delete('content-transfer-encoding') msg.header['Content-Transfer-Encoding'] = 'base64' end # $stderr.puts "cte = #{msg.header['content-transfer-encoding']}" if msg.body.nil? rawbody = '' else rawbody = msg.decode end if typ =~ /^text\/rfc2?822-headers/ msg.body = rawbody return msg end # check if there is embedded mail (heuristic, for qmail daemon report) if /^mime-version:/i =~ rawbody # $stderr.puts "EMBEDDED MAIL" preamble, embedded = splitbody(rawbody) tmpmsg = RMail::Message.new msg.body = preamble tmpmsg.add_part(msg) p = RMail::Parser.read(embedded) tmpmsg.add_part(p) m = multipart(tmpmsg) return tmpmsg end if /^text\/html/ =~ typ rawbody = rawbody.encode("ASCII-8BIT").gsub(/<[^>]+>/,"") else rawbody = rawbody.gsub(/^[[:graph:]]{61}$/,"") end # rawbody = MeCab::Tagger.new("-O wakati").parse(rawbody.toeuc) rawbody = MeCab::Tagger.new("-O wakati").parse(rawbody.toeuc.gsub!(/\n/ ,"")) msg.body = rawbody.gsub(" ","\n") # msg.body = rawbody msg end def multipart(msg) # $stderr.puts "multipart #{Time.now}" if msg.multipart? bodies = [] # $stderr.puts "#{msg}" msg.body.each do |m| bodies << multipart(m) end msg.body = bodies msg else singlepart(msg) end end def execbogofilter(body, bogoflag) # $stderr.puts "execbogofilter #{Time.now}" $stdout.flush pid, stdin, stdout, stderr = Open4.popen4 "bogofilter #{bogoflag}" stdin.puts body stdin.close ignored, status = Process::waitpid2 pid $EXIT_STATUS = status.exitstatus s = stdout.read.strip stdout.close if $USER_DEBUG == 1 puts "pid : #{ pid }" puts "stdout : #{ s }" puts "stderr : #{ stderr.read.strip }" puts "status : #{ status.inspect }" puts "exitstatus : #{ status.exitstatus }" end s end def bogoarg(bogoflag) # $stderr.puts "bogoarg #{Time.now}" flag = '' bogoflag.each do |l| if l == "--filter" $FILTER = 1 elsif l == "--help" $HELP = 1 flag += l + " " elsif l == "--debug" $USER_DEBUG = 1 else flag += l + " " end end flag end def main GC.disable # $stderr.puts "main 0 #{Time.now}" bogo_arg = bogoarg(ARGV) if $HELP == 0 # $stderr.puts "main 1 #{Time.now}" m = RMail::Parser.read($stdin) # $stderr.puts "main 2 #{Time.now}" m = multipart(m) # $stderr.puts "main 3 #{Time.now}" s = RMail::Serialize.write('', m) # $stderr.puts "main 4 #{Time.now}" if $FILTER == 0 s = execbogofilter(s, bogo_arg) end else s = $HelpMsg + execbogofilter('', bogo_arg) end # $stderr.puts "main 5 #{Time.now}" puts s exit($EXIT_STATUS) end main