code_detect.php
--------------
<?php
/*
* 可识别txt类型的文件,如果文本文件的内容为Ansi则返回EUC-CN,如果内容为utf-8则返回UTF-8
* 能识别的编码有: UTF-8,UTF-16BE,UTF-16LE,UTF-32BE,UTF-32LE,EUC-CN(ANSI)
*/
define ('UTF32_BIG_ENDIAN_BOM' , chr(0x00) . chr(0x00) . chr(0xFE) . chr(0xFF));
define ('UTF32_LITTLE_ENDIAN_BOM', chr(0xFF) . chr(0xFE) . chr(0x00) . chr(0x00));
define ('UTF16_BIG_ENDIAN_BOM' , chr(0xFE) . chr(0xFF));
define ('UTF16_LITTLE_ENDIAN_BOM', chr(0xFF) . chr(0xFE));
define ('UTF8_BOM' , chr(0xEF) . chr(0xBB) . chr(0xBF));
class Encode_Core {
/**
* 文件分析方法来检查UNICODE文件,ANSI文件没有文件头,此处不分析
*/
private function detect_utf_encoding($text) {
$first2 = substr($text, 0, 2);
$first3 = substr($text, 0, 3);
$first4 = substr($text, 0, 3);
if ($first3 == UTF8_BOM) return 'UTF-8';
elseif ($first4 == UTF32_BIG_ENDIAN_BOM) return 'UTF-32BE';
elseif ($first4 == UTF32_LITTLE_ENDIAN_BOM) return 'UTF-32LE';
elseif ($first2 == UTF16_BIG_ENDIAN_BOM) return 'UTF-16BE';
elseif ($first2 == UTF16_LITTLE_ENDIAN_BOM) return 'UTF-16LE';
}
/**
* 取得编码
* @param string $str
* @return string $encoding
*/
public static function get_encoding($str){
$ary = array();
$ary[] = 'UTF-8';
$ary[] = 'ASCII';
$ary[] = 'EUC-CN';
$ary[] = 'JIS';//日文编码
$ary[] = 'GB2312';//简体码
$ary[] = 'BIG5';//繁体码
$encoding=mb_detect_encoding($str, $ary);
if(empty($encoding)){
$encoding= self::detect_utf_encoding($str);
}
return $encoding;
}
}
-------------------------------
test.php
-------------------------------------
include('code_detect.php');
$detect = new Encode_Core();
$path = 'D:/Program Files/Apache/htdocs/ctunes/music_upload/music/G/Zw_2_2/1.lrc';
$lrc = file_get_contents( $path );
$encode = $detect->get_encoding($lrc);
echo $encode;
file_put_contents( $path . '.txt', iconv($encode, 'UTF-8', $lrc) );
$path = 'D:/Program Files/Apache/htdocs/ctunes/music_upload/music/G/Zw_2_2/2.lrc';
$lrc = file_get_contents( $path );
$encode = $detect->get_encoding($lrc);
echo $encode;
file_put_contents( $path .'.txt' , iconv($encode, "UTF-8", $lrc));
--------------------
显示
EUC-CNUTF-8
--------------
查看另存为了文件正常显示
这种方法是使用头文件来判断;
另外一种方法是先转换成另一种编码,再转回来,比较原字符跟经过二次转换的是不是一样,不是一样就判断编码错误,一样就证明编码判断正确,这种方法要使用for来达到循环来测试,效力有点低,