最近开发项目的要和delphi进行通信,其中涉及到了中文的传输导致中文为为乱码无法解析
经过本人测试,在文件格式为gbk和utf-8编码时,php在对字符进行escape的过程也是不一样的
在gbk下面:
- <?php
- $str = "我是来测试的";
- $enStr = escape ( $str );
- echo $enStr . "<br/>";
- echo '解:'.unescape( $enStr )."<br/>";
- echo unescape('%u6211%u662F%u6765%u6D4B%u8BD5%u7684');
- function escape($str) {
- $sublen = strlen ( $str );
- $retrunString = "";
- for($i = 0; $i < $sublen; $i ++) {
- if (ord ( $str [$i] ) >= 127) {
- $tmpString = bin2hex ( iconv ( "gb2312", "ucs-2", substr ( $str, $i, 2 ) ) );
- $retrunString .= "%u" . $tmpString;
- $i ++;
- } else {
- $retrunString .= "%" . dechex ( ord ( $str [$i] ) );
- }
- }
- return $retrunString;
- }
- function unescape($str) {
- $str = rawurldecode ( $str );
- preg_match_all ( "/%u.{4}|&#x.{4};|&#\d+;|.+/U", $str, $r );
- $ar = $r [0];
- foreach ( $ar as $k => $v ) {
- if (substr ( $v, 0, 2 ) == "%u")
- $ar [$k] = iconv ( "UCS-2", "GBK", pack ( "H4", substr ( $v, - 4 ) ) );
- elseif (substr ( $v, 0, 3 ) == "&#x")
- $ar [$k] = iconv ( "UCS-2", "GBK", pack ( "H4", substr ( $v, 3, - 1 ) ) );
- elseif (substr ( $v, 0, 2 ) == "&#") {
- $ar [$k] = iconv ( "UCS-2", "GBK", pack ( "n", substr ( $v, 2, - 1 ) ) );
- }
- }
- return join ( "", $ar );
- }
- ?>
- <html>
- <head>
- <script type="text/javascript">
- var str = '我是来测试的';
- var enStr = escape(str);
- alert(enStr+"解:"+unescape(enStr));
- </script>
- </head>
- </html>
在utf-8下面:
- <?php
- $str = "我是来测试的";
- $enStr = escape ( $str );
- echo $enStr . "<br/>";
- echo '解:'.unescape( $enStr )."<br/>";
- echo unescape('%u6211%u662F%u6765%u6D4B%u8BD5%u7684');
- function escape($str) {
- preg_match_all ( "/[\xc2-\xdf][\x80-\xbf]+|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}|[\x01-\x7f]+/e", $str, $r );
- //匹配utf-8字符,
- $str = $r [0];
- $l = count ( $str );
- for($i = 0; $i < $l; $i ++) {
- $value = ord ( $str [$i] [0] );
- if ($value < 223) {
- $str [$i] = rawurlencode ( utf8_decode ( $str [$i] ) );
- //先将utf8编码转换为ISO-8859-1编码的单字节字符,urlencode单字节字符.
- //utf8_decode()的作用相当于iconv("UTF-8","CP1252",$v)。
- } else {
- $str [$i] = "%u" . strtoupper ( bin2hex ( iconv ( "UTF-8", "UCS-2", $str [$i] ) ) );
- }
- }
- return join ( "", $str );
- }
- function unescape($str) {
- $ret = '';
- $len = strlen ( $str );
- for($i = 0; $i < $len; $i ++) {
- if ($str [$i] == '%' && $str [$i + 1] == 'u') {
- $val = hexdec ( substr ( $str, $i + 2, 4 ) );
- if ($val < 0x7f)
- $ret .= chr ( $val );
- else if ($val < 0x800)
- $ret .= chr ( 0xc0 | ($val >> 6) ) . chr ( 0x80 | ($val & 0x3f) );
- else
- $ret .= chr ( 0xe0 | ($val >> 12) ) . chr ( 0x80 | (($val >> 6) & 0x3f) ) . chr ( 0x80 | ($val & 0x3f) );
- $i += 5;
- } else if ($str [$i] == '%') {
- $ret .= urldecode ( substr ( $str, $i, 3 ) );
- $i += 2;
- } else
- $ret .= $str [$i];
- }
- return $ret;
- }
- ?>
- <html>
- <head>
- <script type="text/javascript">
- var str = '我是来测试的';
- var enStr = escape(str);
- alert(enStr+"解:"+unescape(enStr));
- </script>
- </head>
- </html>