Discuz 高效wap编码转换类,很强的,gbk-utf8 or unicode

原创 2006年06月10日 16:21:00
<?php
/*
    [Discuz!] (C)2001-2006 Comsenz Inc.
    This is NOT a freeware, use is subject to license terms

    $RCSfile: chinese.class.php,v $
    $Revision: 1.7 $
    $Date: 2006/02/23 13:44:02 $
*/

if(!defined('IN_DISCUZ')) {
        exit(
'Access Denied');
}

define('CODETABLE_DIR', DISCUZ_ROOT.'./include/tables/');

class
Chinese {

    var
$table = '';
    var
$iconv_enabled = false;
    var
$unicode_table = array();
    var
$config  =  array
        (
        
'SourceLang'            => '',                    //  original charset
        
'TargetLang'            => '',                    //  target charset
        
'GBtoUnicode_table'     => 'gb-unicode.table',    //  GB2312 to unicode
        
'BIG5toUnicode_table'   => 'big5-unicode.table',  //  BIG5 to unicode
        
);

    function
Chinese($SourceLang, $TargetLang) {

        
$this->config['SourceLang'] = $this->_lang($SourceLang);
        
$this->config['TargetLang'] = $this->_lang($TargetLang);

        if(
function_exists('iconv')) {
            
$this->iconv_enabled = true;
        } else {
            
$this->iconv_enabled = false;
            
$this->OpenTable();
        }
    }

    function
_lang($LangCode) {

        
$LangCode = strtoupper($LangCode);

        if(
substr($LangCode, 0, 2) == 'GB') {
            return
'GBK';
        } elseif(
substr($LangCode, 0, 3) == 'BIG') {
            return
'BIG5';
        } elseif(
substr($LangCode, 0, 3) == 'UTF') {
            return
'UTF-8';
        } elseif(
substr($LangCode, 0, 3) == 'UNI') {
            return
'UNICODE';
        }
    }

    function
_hex2bin($hexdata) {
        for(
$i=0; $i < strlen($hexdata); $i += 2) {
            
$bindata .= chr(hexdec(substr($hexdata, $i, 2)));
        }
        return
$bindata;
    }

    function
OpenTable() {
        
$this->unicode_table = array();
        if(
$this->config['SourceLang'] == 'GBK' || $this->config['TargetLang'] == 'GBK') {
            
$this->table = CODETABLE_DIR.$this->config['GBtoUnicode_table'];
        } elseif(
$this->config['SourceLang'] == 'BIG5' || $this->config['TargetLang'] == 'BIG5') {
            
$this->table = CODETABLE_DIR.$this->config['BIG5toUnicode_table'];
        }
        
$fp = fopen($this->table, 'rb');
        
$tabletmp = fread($fp, filesize($this->table));
        for(
$i = 0; $i < strlen($tabletmp); $i += 4) {
            
$tmp = unpack('nkey/nvalue', substr($tabletmp, $i, 4));
            if(
$this->config['TargetLang'] == 'UTF-8') {
                
$this->unicode_table[$tmp['key']] = '0x'.dechex($tmp['value']);
            } elseif(
$this->config['SourceLang'] == 'UTF-8') {
                
$this->unicode_table[$tmp['value']] = '0x'.dechex($tmp['key']);
            } elseif(
$this->config['TargetLang'] == 'UNICODE') {
                
$this->unicode_table[$tmp['key']] = dechex($tmp['value']);
            }
        }
    }

    function
CHSUtoUTF8($c) {
        
$str = '';
        if(
$c < 0x80) {
            
$str .= $c;
        } elseif(
$c < 0x800) {
            
$str .= (0xC0 | $c >> 6);
            
$str .= (0x80 | $c & 0x3F);
        } elseif(
$c < 0x10000) {
            
$str .= (0xE0 | $c >> 12);
            
$str .= (0x80 | $c >> 6 & 0x3F);
            
$str .=( 0x80 | $c & 0x3F);
        } elseif (
$c < 0x200000) {
            
$str .= (0xF0 | $c >> 18);
            
$str .= (0x80 | $c >> 12 & 0x3F);
            
$str .= (0x80 | $c >> 6 & 0x3F);
            
$str .= (0x80 | $c & 0x3F);
        }
        return
$str;
    }

    function
Convert($SourceText) {
        if(
$this->config['SourceLang'] == $this->config['TargetLang']) {
            return
$SourceText;
        } elseif(
$this->iconv_enabled) {
            return
iconv($this->config['SourceLang'], $this->config['TargetLang'], $SourceText);
        } elseif(
$this->config['TargetLang'] == 'UNICODE') {
            
$utf = '';
            while(
$SourceText) {
                if(
ord(substr($SourceText, 0, 1)) > 127) {
                    if (
$this->config['SourceLang'] == 'GBK') {
                        
$utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080].';';
                    } elseif(
$this->config['SourceLang'] == 'BIG5') {
                        
$utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))].';';
                    }
                    
$SourceText = substr($SourceText, 2, strlen($SourceText));
                } else {
                    
$utf .= substr($SourceText, 0, 1);
                    
$SourceText = substr($SourceText, 1, strlen($SourceText));
                }
            }
            return
$utf;
        } else {
            
$ret = '';
            if(
$this->config['SourceLang'] == 'UTF-8') {
                
$out = '';
                
$len = strlen($SourceText);
                
$i = 0;
                while(
$i < $len) {
                    
$c = ord(substr($SourceText, $i++, 1));
                    switch(
$c >> 4) {
                        case
0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
                            
$out .= substr($SourceText, $i - 1, 1);
                            break;
                        case
12: case 13:
                            
$char2 = ord(substr($SourceText, $i++, 1));
                            
$char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
                            if(
$this->config['TargetLang'] == 'GBK') {
                                
$out .= $this->_hex2bin(dechex($char3 + 0x8080));
                            } elseif(
$this->config['TargetLang'] == 'BIG5') {
                                
$out .= $this->_hex2bin($char3);
                            }
                            break;
                        case
14:
                            
$char2 = ord(substr($SourceText, $i++, 1));
                            
$char3 = ord(substr($SourceText, $i++, 1));
                            
$char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
                            if (
$this->config['TargetLang'] == 'GBK') {
                                
$out .= $this->_hex2bin(dechex($char4 + 0x8080));
                            } elseif(
$this->config['TargetLang'] == 'BIG5') {
                                
$out .= $this->_hex2bin($char4);
                            }
                            break;
                    }
                }
                return
$out;
            } else {
                while(
$SourceText) {
                    if(
ord(substr($SourceText, 0, 1)) > 127){
                        if(
$this->config['SourceLang'] == 'BIG5') {
                            
$utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))]));
                        } elseif(
$this->config['SourceLang'] == 'GBK') {
                            
$utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080]));
                        }
                        for(
$i = 0; $i < strlen($utf8); $i += 3) {
                            
$ret .= chr(substr($utf8, $i, 3));
                        }
                        
$SourceText = substr($SourceText, 2, strlen($SourceText));
                    } else {
                        
$ret .= substr($SourceText, 0, 1);
                        
$SourceText = substr($SourceText, 1, strlen($SourceText));
                    }
                }
                
$this->unicode_table = array();
                
$SourceText = '';
                return
$ret;
            }
        }
    }
}
?>

Discuz 提供的编码转换工具

  • 2017年10月23日 21:25
  • 4.14MB
  • 下载

Discuz 高效wap编码转换类,很强的,gbk-utf8 or unicode

/*    [Discuz!] (C)2001-2006 Comsenz Inc.    This is NOT a freeware, use is subject to license terms...
  • musttieying
  • musttieying
  • 2006年06月10日 16:21
  • 793

学点编码知识又不会死:Unicode的流言终结者和编码大揭秘

如果你是一个生活在2003年的程序员,却不了解字符、字符集、编码和Unicode这些基础知识。那你可要小心了,要是被我抓到你,我会让你在潜水艇里剥六个月洋葱来惩罚你。 这个邪恶的恐吓是Joel Spo...
  • soonfly
  • soonfly
  • 2016年04月15日 14:55
  • 533

中文转换成Unicode编码和Unicode编码转换成中文,Java代码实现

import java.util.Properties; public class Test { public static void main(String[] arg...
  • jdsjlzx
  • jdsjlzx
  • 2011年12月10日 01:01
  • 11436

Discuz从GBK转码UTF-8一些心得

最近把一个比较老的discuz系统(X2.0)由GBK转成了UTF-8,数据库表和模板转换编码进展的都还比较顺利。弄完发现首页的数据调用一部分无法显示,这些不显示的到后台看,会发现数据调用模板也是空白...
  • kof820
  • kof820
  • 2016年08月22日 11:11
  • 1831

Discuz! 6.1 从GBK编码转为UTF-8编码

源代码的转换创建目标文件夹find forum_en_gbk -type d -exec mkdir -p utf8/{} /;生成脚本,用于将js文件从GBK编码转为UTF-8编码find foru...
  • hu_zhenghui
  • hu_zhenghui
  • 2008年08月06日 11:17
  • 3990

Unicode编码和中文互转(JAVA实现)

//中文转Unicode public static String gbEncoding(final String gbString) { //gbString = "测试" ...
  • u010612373
  • u010612373
  • 2016年05月18日 09:16
  • 19473

转贴:论坛Discuz的WAP功能设置教程全攻略

由于Discuz论坛安装后,默认的WAP功能有问题,鄙人花了很多时间在网上搜索,终于成功搭建成功WAP,现在把方法整理出来,希望可以帮助大家快速把论坛的WAP开通。安装论坛过程成功后,请先确认在后台已...
  • albert2061
  • albert2061
  • 2011年03月15日 11:13
  • 496

Discuz 提供的编码转换工具

  • 2017年10月23日 21:25
  • 4.14MB
  • 下载

文本文件编码转换工具 gbk utf8 gb2312

  • 2010年01月26日 18:27
  • 42KB
  • 下载
内容举报
返回顶部
收藏助手
不良信息举报
您举报文章:Discuz 高效wap编码转换类,很强的,gbk-utf8 or unicode
举报原因:
原因补充:

(最多只允许输入30个字)