A.1 问题描述
在日常的工作中,我们会偶尔用到 is_numberic来判断一个字符串是否为数字类型的字符串,不过有时候会因为对这个函数的了解不够透彻而产生BUG。 我们这边团队就遇到了这样的案例。
具体的情形是这样的:
对某个表单输入的参数做is_numberic判断
- 如果为数字类型,拼接SQL语法是: {$field} >= {$value}
- 如果为字符串,拼接SQL语法是: {$field} >= '{$value}'
当表单输入参数为'5e34'的时候触发SQL语法错误。
A.2 原因追溯
翻阅PHP5.6版本的源码,找到is_numberic函数具体实现,代码如下
// ext\standard\type.c
/* {{{ proto bool is_numeric(mixed value)
Returns true if value is a number or a numeric string */
PHP_FUNCTION(is_numeric)
{
zval **arg;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Z", &arg) == FAILURE) {
return;
}
switch (Z_TYPE_PP(arg)) {
case IS_LONG:
case IS_DOUBLE:
RETURN_TRUE;
break;
case IS_STRING:
if (is_numeric_string(
Z_STRVAL_PP(arg),
Z_STRLEN_PP(arg),
NULL,
NULL,
0)
) {
RETURN_TRUE;
} else {
RETURN_FALSE;
}
break;
default:
RETURN_FALSE;
break;
}
}
/* }}} */
从上述源码我们确认处理字符串类型的函数时 is_numeric_string(Z_STRVAL_PP(arg), Z_STRLEN_PP(arg), NULL, NULL, 0); 定位到该函数查看。
// zend\Zend_operators.h
/**
* Checks whether the string "str" with length "length" is numeric. The value
* of allow_errors determines whether it's required to be entirely numeric, or
* just its prefix. Leading whitespace is allowed.
*
* The function returns 0 if the string did not contain a valid number; IS_LONG
* if it contained a number that fits within the range of a long; or IS_DOUBLE
* if the number was out of long range or contained a decimal point/exponent.
* The number's value is returned into the respective pointer, *lval or *dval,
* if that pointer is not NULL.
*
* This variant also gives information if a string that represents an integer
* could not be represented as such due to overflow. It writes 1 to oflow_info
* if the integer is larger than LONG_MAX and -1 if it's smaller than LONG_MIN.
*/
static inline zend_uchar is_numeric_string_ex(const char *str, int length, long *lval, double *dval, int allow_errors, int *oflow_info)
{
const char *ptr;
int base = 10, digits = 0, dp_or_e = 0;
double local_dval = 0.0;
zend_uchar type;
if (!length) {
return 0;
}
if (oflow_info != NULL) {
*oflow_info = 0;
}
/* Skip any whitespace
* This is much faster than the isspace() function */
while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r' || *str == '\v' || *str == '\f') {
str++;
length--;
}
ptr = str;
/* Skip [-|+] signed character */
if (*ptr == '-' || *ptr == '+') {
ptr++;
}
/* First char is digit */
if (ZEND_IS_DIGIT(*ptr)) {
/* Handle hex numbers
* str is used instead of ptr to disallow signs and keep old behavior */
if (length > 2 && *str == '0' && (str[1] == 'x' || str[1] == 'X')) {
base = 16;
ptr += 2;
}
/* Skip any leading 0s */
while (*ptr == '0') {
ptr++;
}
/* Count the number of digits. If a decimal point/exponent is found,
* it's a double. Otherwise, if there's a dval or no need to check for
* a full match, stop when there are too many digits for a long */
for (type = IS_LONG; !(digits >= MAX_LENGTH_OF_LONG && (dval || allow_errors == 1)); digits++, ptr++) {
check_digits:
if (ZEND_IS_DIGIT(*ptr) || (base == 16 && ZEND_IS_XDIGIT(*ptr)){
continue;
} else if (base == 10) {
if (*ptr == '.' && dp_or_e < 1) {
goto process_double;
} else if ((*ptr == 'e' || *ptr == 'E') && dp_or_e < 2){
const char *e = ptr + 1;
if (*e == '-' || *e == '+') {
ptr = e++;
}
if (ZEND_IS_DIGIT(*e)) {
goto process_double;
}
}
}
break;
}
if (base == 10) {
if (digits >= MAX_LENGTH_OF_LONG) {
if (oflow_info != NULL) {
*oflow_info = *str == '-' ? -1 : 1;
}
dp_or_e = -1;
goto process_double;
}
} else if (!(digits < SIZEOF_LONG * 2 || (digits == SIZEOF_LONG * 2 && ptr[-digits] <= '7'))) {
if (dval) {
local_dval = zend_hex_strtod(str, &ptr);
}
if (oflow_info != NULL) {
*oflow_info = 1;
}
type = IS_DOUBLE;
}
} else if (*ptr == '.' && ZEND_IS_DIGIT(ptr[1])) {
process_double:
type = IS_DOUBLE;
/* If there's a dval, do the conversion; else continue checking
* the digits if we need to check for a full match */
if (dval) {
local_dval = zend_strtod(str, &ptr);
} else if (allow_errors != 1 && dp_or_e != -1) {
dp_or_e = (*ptr++ == '.') ? 1 : 2;
goto check_digits;
}
} else {
return 0;
}
if (ptr != str + length) {
if (!allow_errors) {
return 0;
}
if (allow_errors == -1) {
zend_error(E_NOTICE, "A non well formed numeric value encountered");
}
}
if (type == IS_LONG) {
if (digits == MAX_LENGTH_OF_LONG - 1) {
int cmp = strcmp(&ptr[-digits], long_min_digits);
if (!(cmp < 0 || (cmp == 0 && *str == '-'))) {
if (dval) {
*dval = zend_strtod(str, NULL);
}
if (oflow_info != NULL) {
*oflow_info = *str == '-' ? -1 : 1;
}
return IS_DOUBLE;
}
}
if (lval) {
*lval = strtol(str, NULL, base);
}
return IS_LONG;
} else {
if (dval) {
*dval = local_dval;
}
return IS_DOUBLE;
}
}
static inline zend_uchar is_numeric_string(const char *str, int length, long *lval, double *dval, int allow_errors) {
return is_numeric_string_ex(str, length, lval, dval, allow_errors, NULL);
}
A.3 源码剖析
- 在去掉前导空格, [-|+]符号标识位后, 判断剩下的字符串是否满足
-
- 十六进制
-
- 科学计数法
-
- 双精度浮点小数
其中
- 十六进制的正则: 0([x|X])([0-9|a-f|A-F]+)
- 科学计数法正则: \.?\d+([e|E][-|+]?)\d+$
- 双精度浮点小数: \d*\.\d+([e|E]?|[e|E][-|+]?)\d+$
测试代码
<?php
function dump($val) {
var_dump(sprintf("%s => %d", $val, is_numeric($val)));
}
// true
dump('5e34');
dump('6e-20');
dump('-1928');
dump('.1e1202');
dump('0x34af');
dump('000123');
dump('
12345678');
// false
dump('12 345678');
dump('.e289');
dump('5e3d');
dump('e.1232');
dump('19-28');