diff -ur piwik.orig/core/DataFiles/SearchEngines.php piwik/core/DataFiles/SearchEngines.php --- piwik.orig/core/DataFiles/SearchEngines.php 2015-02-24 13:14:10.245504808 +0900 +++ piwik/core/DataFiles/SearchEngines.php 2015-02-24 14:10:12.715923827 +0900 @@ -56,6 +56,8 @@ */ if (!isset($GLOBALS['Piwik_SearchEngines'])) { $GLOBALS['Piwik_SearchEngines'] = array( + // default character code(s) + 'default' => array('', '', '', array('UTF-8', 'EUC-JP', 'MS932')), // 1 '1.cz' => array('1.cz', array('/s\/([^\/]+)/', 'q'), 's/{k}', 'iso-8859-2'), @@ -194,8 +196,8 @@ 'searchatlas.centrum.cz' => array('Atlas', 'q', '?q={k}'), // auone - 'search.auone.jp' => array('auone', 'q', '?q={k}'), - 'sp-image.search.auone.jp' => array('auone Images', 'q', '?q={k}'), + 'search.auone.jp' => array('auone', 'q', '?q={k}', array('UTF-8', 'EUC-JP', 'MS932')), + 'sp-image.search.auone.jp' => array('auone Images', 'q', '?q={k}', array('UTF-8', 'EUC-JP', 'MS932')), // Austronaut 'www2.austronaut.at' => array('Austronaut', 'q'), @@ -216,8 +218,8 @@ 'web.gougou.com' => array('Baidu', 'search', 'search?search={k}'), // uses baidu search // Biglobe - 'cgi.search.biglobe.ne.jp' => array('Biglobe', 'q', 'cgi-bin/search-st?q={k}'), - 'images.search.biglobe.ne.jp' => array('Biglobe Images', 'q', 'cgi-bin/search-st?q={k}'), + 'cgi.search.biglobe.ne.jp' => array('Biglobe', 'q', 'cgi-bin/search-st?q={k}', array('UTF-8', 'EUC-JP', 'MS932')), + 'images.search.biglobe.ne.jp' => array('Biglobe Images', 'q', 'cgi-bin/search-st?q={k}', array('UTF-8', 'EUC-JP', 'MS932')), // Bing 'bing.com' => array('Bing', array('q', 'Q'), 'search?q={k}'), @@ -336,7 +338,7 @@ 'search.excite.es' => array('Excite'), 'search.excite.nl' => array('Excite'), 'msxml.excite.com' => array('Excite', '/\/[^\/]+\/ws\/results\/[^\/]+\/([^\/]+)/'), - 'www.excite.co.jp' => array('Excite', 'search', 'search.gw?search={k}', 'SHIFT_JIS'), + 'www.excite.co.jp' => array('Excite', 'search', 'search.gw?search={k}', array('UTF-8', 'EUC-JP', 'MS932')), // Exalead 'www.exalead.fr' => array('Exalead', 'q', 'search/results?q={k}'), @@ -408,11 +410,12 @@ 'www.gomeo.com' => array('Gomeo', array('Keywords', '/\/search\/([^\/]+)/'), '/search/{k}'), // goo - 'search.goo.ne.jp' => array('goo', 'MT', 'web.jsp?MT={k}'), - 'ocnsearch.goo.ne.jp' => array('goo'), + 'search.goo.ne.jp' => array('goo', 'MT', 'web.jsp?MT={k}', array('UTF-8', 'EUC-JP', 'MS932')), + 'ocnsearch.goo.ne.jp' => array('goo', '', '', array('UTF-8', 'EUC-JP', 'MS932')), // Google 'google.com' => array('Google', 'q', 'search?q={k}'), + 'www.google.co.jp/search' => array('Google', 'q', 'q={k}', array('UTF-8', 'EUC-JP', 'MS932')), 'google.{}' => array('Google'), 'www2.google.com' => array('Google'), 'ipv6.google.com' => array('Google'), @@ -460,9 +463,9 @@ '{}.wow.com' => array('Google'), 'search.leonardo.it' => array('Google'), 'www.optuszoo.com.au' => array('Google'), - 'search.dolphin-browser.jp' => array('Google'), - 'search.smt.docomo.ne.jp' => array('Google', 'MT'), - 'image.search.smt.docomo.ne.jp' => array('Google', 'MT'), + 'search.dolphin-browser.jp' => array('Google', '', '', array('UTF-8', 'EUC-JP', 'MS932')), + 'search.smt.docomo.ne.jp' => array('Google', 'MT', '', array('UTF-8', 'EUC-JP', 'MS932')), + 'image.search.smt.docomo.ne.jp' => array('Google', 'MT', '', array('UTF-8', 'EUC-JP', 'MS932')), 'gfsoso.com' => array('Google', 'q'), // Google Earth @@ -485,6 +488,7 @@ 'google.{}/cse' => array('Google Custom Search'), 'google.com/custom' => array('Google Custom Search'), 'google.{}/custom' => array('Google Custom Search'), + 'www.google.co.jp/custom' => array('Google Custom Search', 'q', '', array('UTF-8', 'EUC-JP', 'MS932')), // Google Translation 'translate.google.com' => array('Google Translations', 'q'), @@ -730,9 +734,9 @@ 'www.neti.ee' => array('Neti', 'query', 'cgi-bin/otsing?query={k}', 'iso-8859-1'), // Nifty - 'search.nifty.com' => array('Nifty', array('q', 'Text'), 'websearch/search?q={k}'), - 'search.azby.fmworld.net' => array('Nifty'), - 'videosearch.nifty.com' => array('Nifty Videos', 'kw', 'search?kw={k}'), + 'search.nifty.com' => array('Nifty', array('q', 'Text'), 'websearch/search?q={k}', array('UTF-8', 'EUC-JP', 'MS932')), + 'search.azby.fmworld.net' => array('Nifty', '', '', array('UTF-8', 'EUC-JP', 'MS932')), + 'videosearch.nifty.com' => array('Nifty Videos', 'kw', 'search?kw={k}', array('UTF-8', 'EUC-JP', 'MS932')), // Nigma 'nigma.ru' => array('Nigma', 's', 'index.php?s={k}'), @@ -778,7 +782,7 @@ 'www.qualigo.nl' => array('Qualigo'), // Rakuten - 'websearch.rakuten.co.jp' => array('Rakuten', 'qt', 'WebIS?qt={k}'), + 'websearch.rakuten.co.jp' => array('Rakuten', 'qt', 'WebIS?qt={k}', array('UTF-8', 'EUC-JP', 'MS932')), // Rambler 'nova.rambler.ru' => array('Rambler', array('query', 'words'), 'search?query={k}'), @@ -844,11 +848,11 @@ 'search.snap.do' => array('Snap.do', 'q', '?q={k}'), // SeeSaa - 'search.seesaa.jp' => array('SeeSaa', '/\/([^\/]+)\/index\.html/', '{k}/index.html'), + 'search.seesaa.jp' => array('SeeSaa', '/\/([^\/]+)\/index\.html/', '{k}/index.html', array('UTF-8', 'EUC-JP', 'MS932')), // So-net - 'www.so-net.ne.jp' => array('So-net', 'query', 'search/web/?query={k}'), - 'video.so-net.ne.jp' => array('So-net Videos', 'kw', 'search/?kw={k}'), + 'www.so-net.ne.jp' => array('So-net', 'query', 'search/web/?query={k}', array('UTF-8', 'EUC-JP', 'MS932')), + 'video.so-net.ne.jp' => array('So-net Videos', 'kw', 'search/?kw={k}', array('UTF-8', 'EUC-JP', 'MS932')), // Sogou 'www.sogou.com' => array('Sogou', 'query', 'web?query={k}', 'gb2312'), @@ -995,7 +999,7 @@ 'szukaj.wp.pl' => array('Wirtualna Polska', 'szukaj', 'http://szukaj.wp.pl/szukaj.html?szukaj={k}'), // Woopie - 'www.woopie.jp' => array('Woopie', 'kw', 'search?kw={k}'), + 'www.woopie.jp' => array('Woopie', 'kw', 'search?kw={k}', array('UTF-8', 'EUC-JP', 'MS932')), // WWW 'search.www.ee' => array('www värav', 'query'), @@ -1004,10 +1008,10 @@ 'www.x-recherche.com' => array('X-Recherche', 'MOTS', 'cgi-bin/websearch?MOTS={k}'), // Yahoo! Japan - 'search.yahoo.co.jp' => array('Yahoo! Japan', array('p', 'vp'), 'search?p={k}'), - 'jp.hao123.com' => array('Yahoo! Japan', 'query'), - 'video.search.yahoo.co.jp' => array('Yahoo! Japan Videos', 'p', 'search?p={k}'), - 'image.search.yahoo.co.jp' => array('Yahoo! Japan Images', 'p', 'search?p={k}'), + 'search.yahoo.co.jp' => array('Yahoo! Japan', array('p', 'vp'), 'search?p={k}', array('UTF-8', 'EUC-JP', 'MS932')), + 'jp.hao123.com' => array('Yahoo! Japan', 'query', '', array('UTF-8', 'EUC-JP', 'MS932')), + 'video.search.yahoo.co.jp' => array('Yahoo! Japan Videos', 'p', 'search?p={k}', array('UTF-8', 'EUC-JP', 'MS932')), + 'image.search.yahoo.co.jp' => array('Yahoo! Japan Images', 'p', 'search?p={k}', array('UTF-8', 'EUC-JP', 'MS932')), // Yahoo 'search.yahoo.com' => array('Yahoo!', array('p', 'q'), 'search?p={k}'), diff -ur piwik.orig/core/UrlHelper.php piwik/core/UrlHelper.php --- piwik.orig/core/UrlHelper.php 2015-02-24 13:14:10.320504208 +0900 +++ piwik/core/UrlHelper.php 2015-02-24 14:10:12.722923771 +0900 @@ -439,34 +439,32 @@ } if (!empty($key)) { - if (function_exists('iconv') - && isset($searchEngines[$referrerHost][3]) - ) { + if (function_exists('iconv')) { + $charsets = !empty($searchEngines[$referrerHost][3]) ? $searchEngines[$referrerHost][3] : (!empty($searchEngines['default'][3]) ? $searchEngines['default'][3] : array('UTF-8', 'EUC-JP', 'MS932')); // accepts string, array, or comma-separated list string in preferred order - $charsets = $searchEngines[$referrerHost][3]; if (!is_array($charsets)) { $charsets = explode(',', $charsets); } - - if (!empty($charsets)) { - $charset = $charsets[0]; - if (count($charsets) > 1 - && function_exists('mb_detect_encoding') - ) { - $charset = mb_detect_encoding($key, $charsets); - if ($charset === false) { - $charset = $charsets[0]; - } + $charset = $charsets[0]; + if (count($charsets) > 1 + && function_exists('mb_detect_encoding') + ) { + $charset = mb_detect_encoding($key, $charsets); + if ($charset === false) { + $charset = $charsets[0]; } + } - $newkey = @iconv($charset, 'UTF-8//IGNORE', $key); - if (!empty($newkey)) { - $key = $newkey; - } + $newkey = @iconv($charset, 'UTF-8//IGNORE', $key); + if (!empty($newkey)) { + $key = $newkey; + } else { + $key = "mb_detect_encoding can't recognize character encoding."; } } $key = Common::mb_strtolower($key); + Log::debug("searchengine, detected charcode, key = '$searchEngineName', '$charset', '$key'"); } return array( diff -ur piwik.orig/plugins/Actions/Actions/ActionSiteSearch.php piwik/plugins/Actions/Actions/ActionSiteSearch.php --- piwik.orig/plugins/Actions/Actions/ActionSiteSearch.php 2015-02-24 13:14:03.766555407 +0900 +++ piwik/plugins/Actions/Actions/ActionSiteSearch.php 2015-02-24 14:10:12.725923747 +0900 @@ -180,19 +180,42 @@ if (is_array($actionName)) { $actionName = reset($actionName); } + Common::printDebug("actionname: " . $actionName); + $actionName = trim(urldecode($actionName)); + $searchEngines = Common::getSearchEngineUrls(); + if (!empty($actionName)) { + if (function_exists('iconv')) { + // accepts string, array, or comma-separated list string in preferred order + $charsets = !empty($searchEngines['default'][3]) ? $searchEngines['default'][3] : array('UTF-8', 'EUC-JP', 'MS932'); + if (!is_array($charsets)) { + $charsets = explode(',', $charsets); + } + $charset = $charsets[0]; + if (count($charsets) > 1 + && function_exists('mb_detect_encoding') + ) { + $charset = mb_detect_encoding($actionName, $charsets); + if ($charset === false) { + $charset = $charsets[0]; + } + } - $actionName = PageUrl::urldecodeValidUtf8($actionName); - $actionName = trim($actionName); + $newactionName = @iconv($charset, 'UTF-8//IGNORE', $actionName); + if (!empty($newactionName)) { + $actionName = $newactionName; + } else { + $actionName = ''; + } + Common::printDebug("detected charcode, actionname = '$charset', '$actionName'"); + } + } if (empty($actionName)) { return false; } - if (is_array($categoryName)) { $categoryName = reset($categoryName); } - $categoryName = PageUrl::urldecodeValidUtf8($categoryName); - $categoryName = trim($categoryName); - + $categoryName = trim(urldecode($categoryName)); return array($url, $actionName, $categoryName, $count); } diff -ur piwik.orig/plugins/Referrers/Columns/Keyword.php piwik/plugins/Referrers/Columns/Keyword.php --- piwik.orig/plugins/Referrers/Columns/Keyword.php 2015-02-24 13:14:05.281543522 +0900 +++ piwik/plugins/Referrers/Columns/Keyword.php 2015-02-24 14:10:12.732923691 +0900 @@ -44,7 +44,11 @@ $information = $this->getReferrerInformationFromRequest($request); if (!empty($information['referer_keyword'])) { - return substr($information['referer_keyword'], 0, 255); + if (function_exists('mb_substr')) { + return mb_substr($information['referer_keyword'], 0, 255, 'UTF-8'); + } else { + return substr($information['referer_keyword'], 0, 255); + } } return $information['referer_keyword'];