delphi utf8转码

展开阅读全文

吐血奉献:gb,unicode,utf8转码程序

05-11

0)rn rn $p=strpos($un,"&#");rn if ($p===FALSE)//串中已无unicode字符rn rn $gb.=$un;rn return $gb;rn rn elsern rn if ($p!=0)//串中unicode字符前缀不是第一个字符rn rn $gb.=substr($un,0,$p);rn $un=substr($un,$p);rn rn $p=strpos($un,";");rn if ($p===FALSE)//此前缀非unicode前缀,串中已无unicode字符rn rn $gb.=$un;rn return $gb;rn rn elsern rn $code=substr($un,2,$p-2);rn $un=substr($un,$p+1);rn if (strcasecmp($code0,"x")==0)//unicode码16进制表示rn rn $code=hexdec(substr($code,1));rn elsern rn $code=intval($code);rn rn $code=0x8080|$____global_codetable2[$code];rn $gb.=chr((($code & 0xFF00)>>8) & 0xFF);rn $gb.=chr($code & 0xFF);rn rn rn rn return $gb;rnrnrn/*rn将 gb2312格式的文本(可以包含其它ASCII字符)转化为 带 㾏協格式的unicode文本;rn可以用于XML编码的转换rn需要注意的是,函数不改变xml中关于编码的声明rn*/rnfunction gb2unicode($gb)rnrn if(!trim($gb))rn return $gb;rn $utf="";rn global $____global_codetable;rn while(strlen($gb)>0)rn rn if (ord(substr($gb,0,1))>127)rn rn $this=substr($gb,0,2);rn $gb=substr($gb,2);rn $code=$____global_codetable[hexdec(bin2hex($this))&0x7F7F];rn $utf.="&#x".$code.";";rn rn elsern rn $utf.=substr($gb,0,1);rn $gb=substr($gb,1);rn rn rn return $utf;rnrnrn/*rn将utf8格式的文本转化为gb2312格式的文本;这与上述的unicode2gb不同,是二进制格式的转换rn*/rnfunction utf82gb($utf8)rnrn if(!trim($utf8))rn return $utf8;rn global $____global_codetable2;rn $gb="";rn while(strlen($utf8)>0)rn rn $c=substr($utf8,0,1);rn $d=ord($c);rn if (($d&0x80) == 0)//1位rn rn $gb.=$c;rn $utf8=substr($utf8,1);rn rn elsern if (($d&0xC0)==0x80)//错位rn rn $utf8=substr($utf8,1);rn rn elsern if (($d&0xE0)==0xC0)//2位rn rn $utf8=substr($utf8,2);rn rn elsern if (($d&0xF0)==0xE0)//3位rn rn $d1=ord($utf81) & 0x3F;rn $d2=ord($utf82) & 0x3F;rn $d=$d & 0x0F;rn $d=($d<<12) + ($d1 <<6) + $d2;rn $code=0x8080|$____global_codetable2[$d];rn $gb.=chr((($code & 0xFF00)>>8) & 0xFF);rn $gb.=chr($code & 0xFF);rn $utf8=substr($utf8,3);rn rn elsern if (($d&0xF8)==0xF0)//4位rn rn $d1=ord($utf81) & 0x3F;rn $d2=ord($utf82) & 0x3F;rn $d3=ord($utf83) & 0x3F;rn $d=$d & 0x07;rn $d=($d<<18) + ($d1 <<12) + ($d2 << 6) +$d3;rn //$code=0x8080+getgb($d);rn $code=0x8080|$____global_codetable2[$d];rn $gb.=chr((($code & 0xFF00)>>8) & 0xFF);rn $gb.=chr($code & 0xFF);rn $utf8=substr($utf8,4);rn rn elsern rn $utf8=substr($utf8,1);rn rn rn return $gb;rnrnrn/*rn将gb2312格式的文本转化为utf8格式的文本;这与上述的gb2unicode不同,是二进制格式的转换rn*/rnfunction gb2utf8($gb)rnrn if(!trim($gb))rn return $gb;rn global $____global_codetable;rn $utf8="";rn while(strlen($gb)>0)rn rn if (ord(substr($gb,0,1))>127)rn rn $code=substr($gb,0,2);rn $gb=substr($gb,2);rn //echo "gb=$code;";rn $code=bin2hex($code);rn //echo "code=$code;";rn $code=hexdec($code)&0x7F7F;rn //echo "newcode=".dechex($code);rn $code=$____global_codetable[$code];rn //echo "unicode=$code";rn $code=hexdec($code);rn //11位:6+5rn if (($code&0x7FF)==$code)rn rn $utf8.=chr(0xC0|((($code&0x7C0)>>6)&0x3F));rn $utf8.=chr(0x80|($code&0x3F));rn elsern //16位:12+4rn if (($code&0xFFFF)==$code)rn rn $utf8.=chr(0xE0|((($code&0xF000)>>12)&0x3F));rn $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));rn $utf8.=chr(0x80|($code&0x3F));rn //echo "16位==$utf8;\n";rn rn elsern //21位:18+3rn if (($code&0x1FFFFF)==$code)rn rn $utf8.=chr(0xF0|((($code&0x1C0000)>>18)&0x3F));rn $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));rn $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));rn $utf8.=chr(0x80|($code&0x3F));rn rn /*rn elsern //26位:24+2rn if (($code&0x3FFFFFF)==$code)rn rn $utf8.=chr(0xF8|((($code&0x3000000)>>24)&0x3F));rn $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));rn $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));rn $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));rn $utf8.=chr(0x80|($code&0x3F));rn rn elsern //31位:30+1rn if (($code&0x7FFFFFFF)==$code)rn rn $utf8.=chr(0xFC|((($code&0x40000000)>>30)&0x3F));rn $utf8.=chr(0x80|((($code&0x3F000000)>>24)&0x3F));rn $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));rn $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));rn $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));rn $utf8.=chr(0x80|($code&0x3F));rn rn //36位rn elsern rn //首字节全部作为前缀,无数据rn $utf8.=chr(0x80|((($code&0xC0000000)>>30)&0x3F));rn $utf8.=chr(0x80|((($code&0x3F000000)>>24)&0x3F));rn $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));rn $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));rn $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));rn $utf8.=chr(0x80|($code&0x3F));rn rn */rn rn elsern rn $utf8.=substr($gb,0,1);rn $gb=substr($gb,1);rn rn rn return $utf8;rnrn?> 论坛

没有更多推荐了,返回首页