PHP+trim乱码,PHP 源码探秘-为什么 trim 会导致乱码

本文通过对PHP源码的探秘,揭示了 trim 会导致乱码的原因。

1bdf9f2218ef4d9d3a14df7203933362.png

运行以下代码:$tag = "互联网产品、";$text = rtrim($tag, "、");print_r($text);

运行,我们可能以为会得到的结果是 互联网产品 ,实际结果是 互联网产 。为什么会这样呢?

原理

trim 函数文档string trim ( string $str [, string $character_mask = " \x0B" ] )

该函数不是多字节函数,也就是说,汉字这样的多字节字符,会拿其头或尾的单字节来和后面的 $character_mask 对应的char数组进行匹配,如果在后面的数组中,则删掉,继续匹配。比如:echo ltrim("bcdf","abc"); // df

如下面的 demo 中的函数 string_print_char 所示:

、 由 0xe3 0x80 0x81 三字节组成,

品 由 0xe5 0x93 0x81 三字节组成。

所以在执行 rtrim 的时候,通过字节比对,会将 0x81 去掉,导致了最后出现了乱码。

源码精简版演示

查看 PHP7 的源码,然后提炼出下面的小 demo ,方便大家一起学习,其实PHP源码的学习并不难,每天进步一点点。 main.c// trim Created by 周梦康 on 2017/10/18.// Copyright 2017年 周梦康. All rights reserved.//#include #include #include void string_print_char(char *str);void php_charmask(unsigned char *input, size_t len, char *mask);char *ltrim(char *str,char *character_mask);char *rtrim(char *str,char *character_mask);int main(int argc, char const *argv[]){ printf("%s",ltrim("bcdf","abc")); string_print_char("品"); // e5 93 81 string_print_char("、"); // e3 80 81 printf("%s",rtrim("互联网产品、","、")); return 0;}char *ltrim(char *str,char *character_mask){ char *res; char mask[256]; register size_t i; int trimmed = 0; size_t len = strlen(str); php_charmask((unsigned char*)character_mask, strlen(character_mask), mask); for (i = 0; i < len; i++) { if (mask[(unsigned char)str[i]]) { trimmed++; } else { break; } } len -= trimmed; str += trimmed; res = (char *) malloc(sizeof(char) * (len+1)); memcpy(res,str,len); return res;}char *rtrim(char *str,char *character_mask){ char *res; char mask[256]; register size_t i; size_t len = strlen(str); php_charmask((unsigned char*)character_mask, strlen(character_mask), mask); if (len > 0) { i = len - 1; do { if (mask[(unsigned char)str[i]]) { len--; } else { break; } } while (i-- != 0); } res = (char *) malloc(sizeof(char) * (len+1)); memcpy(res,str,len); return res;}void string_print_char(char *str){ unsigned long l = strlen(str); for (int i=0; i < l; i++) { printf("%02hhx",str[i]); } printf("");}void php_charmask(unsigned char *input, size_t len, char *mask){ unsigned char *end; unsigned char c; memset(mask, 0, 256); for (end = input+len; input < end; input++) { c = *input; mask[c]= 1; }}

PHP7 相关源码PHP_FUNCTION(trim){ php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);}PHP_FUNCTION(rtrim){ php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);}PHP_FUNCTION(ltrim){ php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);}static void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode){ zend_string *str; zend_string *what = NULL; ZEND_PARSE_PARAMETERS_START(1, 2) Z_PARAM_STR(str) Z_PARAM_OPTIONAL Z_PARAM_STR(what) ZEND_PARSE_PARAMETERS_END(); ZVAL_STR(return_value, php_trim(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));}PHPAPI zend_string *php_trim(zend_string *str, char *what, size_t what_len, int mode){ const char *c = ZSTR_VAL(str); size_t len = ZSTR_LEN(str); register size_t i; size_t trimmed = 0; char mask[256]; if (what) { if (what_len == 1) { char p = *what; if (mode & 1) { for (i = 0; i < len; i++) { if (c[i] == p) { trimmed++; } else { break; } } len -= trimmed; c += trimmed; } if (mode & 2) { if (len > 0) { i = len 1; do { if (c[i] == p) { len--; } else { break; } } while (i-- != 0); } } } else { php_charmask((unsigned char*)what, what_len, mask); if (mode & 1) { for (i = 0; i < len; i++) { if (mask[(unsigned char)c[i]]) { trimmed++; } else { break; } } len -= trimmed; c += trimmed; } if (mode & 2) { if (len > 0) { i = len - 1; do { if (mask[(unsigned char)c[i]]) { len--; } else { break; } } while (i-- != 0); } } } } else { if (mode & 1) { for (i = 0; i < len; i++) { if ((unsigned char)c[i] <= " " && (c[i] == " " || c[i] == "" || c[i] == "" || c[i] == "" || c[i] == "v" || c[i] == " ")) { trimmed++; } else { break; } } len -= trimmed; c += trimmed; } if (mode & 2) { if (len > 0) { i = len - 1; do { if ((unsigned char)c[i] <= " " && (c[i] == " " || c[i] == "" || c[i] == "" || c[i] == "" || c[i] == "v" || c[i] == " ")) { len--; } else { break; } } while (i-- != 0); } } } if (ZSTR_LEN(str) == len) { return zend_string_copy(str); } else { return zend_string_init(c, len, 0); }}/* {{{ php_charmask * Fills a 256-byte bytemask with input. You can specify a range like "a..z", * it needs to be incrementing. * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors) */static inline int php_charmask(unsigned char *input, size_t len, char *mask){ unsigned char *end; unsigned char c; int result = SUCCESS; memset(mask, 0, 256); for (end = input+len; input < end; input++) { c=*input; if ((input+3 < end) && input[1] == "." && input[2] == "." && input[3] >= c) { memset(mask+c, 1, input[3] - c + 1); input+=3; } else if ((input+1 < end) && input[0] == "." && input[1] == ".") { /* Error, try to be as helpful as possible: (a range ending/starting with "." won"t be captured here) */ if (end-len >= input) { /* there was no "left" char */ php_error_docref(NULL, E_WARNING, "Invalid ".."-range, no character to the left of "..""); result = FAILURE; continue; } if (input+2 >= end) { /* there is no "right" char */ php_error_docref(NULL, E_WARNING, "Invalid ".."-range, no character to the right of "..""); result = FAILURE; continue; } if (input[-1] > input[2]) { /* wrong order */ php_error_docref(NULL, E_WARNING, "Invalid ".."-range, ".."-range needs to be incrementing"); result = FAILURE; continue; } /* FIXME: better error (a..b..c is the only left possibility?) */ php_error_docref(NULL, E_WARNING, "Invalid ".."-range"); result = FAILURE; continue; } else { mask[c]=1; } } return result;}/* }}} */本文仅代表作者个人观点,不代表巅云官方发声,对观点有疑义请先联系作者本人进行修改,若内容非法请联系平台管理员,邮箱2522407257@qq.com。更多相关资讯,请到巅云www.yinxi.net学习互联网营销技术请到巅云建站www.yx10011.com。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值