对数据的编码助处理 转换非utf8 到 utf8编码
优先使用的方法是 iconv(from,to,str) 其次是mb_convert_encoding(str,to,from)
iconv比mb_convert_encoding 处理效率快 但是 iconv 对gb2312 的编码转换有bug 比如字符 '一'
注释源代码:
<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
class CI_Utf8 {
function __construct()
{
log_message('debug', "Utf8 Class Initialized");
global $CFG;
if (
preg_match('/./u', 'é') === 1 // PCRE must support UTF-8
AND function_exists('iconv') // iconv must be installed
AND ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled
AND $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8
)
{
log_message('debug', "UTF-8 Support Enabled");
define('UTF8_ENABLED', TRUE);
// set internal encoding for multibyte string functions if necessary
// and set a flag so we don't have to repeatedly use extension_loaded()
// or function_exists()
if (extension_loaded('mbstring'))
{
define('MB_ENABLED', TRUE);
mb_internal_encoding('UTF-8');//设置mb内部默认编码
}
else
{
define('MB_ENABLED', FALSE);
}
}
else
{
log_message('debug', "UTF-8 Support Disabled");
define('UTF8_ENABLED', FALSE);
}
}
function clean_string($str)
{
if ($this->_is_ascii($str) === FALSE)
{
// iconv 转换编码 //IGNORE 忽略未能识别的字符继续转换
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
}
return $str;
}
function safe_ascii_for_xml($str)
{
return remove_invisible_characters($str, FALSE);
}
function convert_to_utf8($str, $encoding)
{
if (function_exists('iconv'))
{
$str = @iconv($encoding, 'UTF-8', $str);
}
elseif (function_exists('mb_convert_encoding'))
{
$str = @mb_convert_encoding($str, 'UTF-8', $encoding);
}
else
{
return FALSE;
}
return $str;
}
function _is_ascii($str)
{
return (preg_match('/[^\x00-\x7F]/S', $str) == 0);
}
}
Code Tips:
44行在转换到编码的//IGNORE 修饰 忽略未能谁别的字符继续转换
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);