· 作者: Laruence( )
· 本文地址: http://www.laruence.com/2008/11/20/630.html
· 转载请注明出处
foreach是PHP中很常用的一个用作数组循环的控制语句。
因为它的方便和易用,自然也就在后端隐藏着很复杂的具体实现方式(对用户透明)
今天,我们就来一起分析分析,foreach是如何实现数组(对象)的遍历的。
本节内容涉及到较多编译原理(lexand yacc)的知识,所以如果您觉得看不太懂,可以先找相关的资料看看。
我们知道PHP是一个脚本语言,也就是说,用户编写的PHP代码最终都是会被PHP解释器解释执行,
特别的,对于PHP来说,所有的用户编写的PHP代码,都会被翻译成PHP的虚拟机ZE的虚拟指令(OPCODES)来执行(参看:深入理解PHP原理之Opcodes).
不论细节的话,就是说,我们所编写的任何PHP脚本,都会最终被翻译成一条条的指令,从而根据指令,由相应的C编写的函数来执行。
那么foreach会被翻译成什么样子呢?
1. foreach($arras$key => $val){
2. echo$key . '=>' . $val . "\n";
3. }
在词法分析阶段,foreach会被识别为一个TOKEN:T_FOREACH,
在语法分析阶段,会被规则:
1. unticked_statement: //没有被绑定ticks的语句
2. //有省略
3. | T_FOREACH '(' variable T_AS
4. { zend_do_foreach_begin(&$1, &$2, &$3, &$4,1 TSRMLS_CC); }
5. foreach_variableforeach_optional_arg ')' { zend_do_foreach_cont(&$1, &$2, &$4, &$6, &$7 TSRMLS_CC); }
6. foreach_statement{ zend_do_foreach_end(&$1, &$4 TSRMLS_CC); }
7. | T_FOREACH '(' expr_without_variable T_AS
8. { zend_do_foreach_begin(&$1, &$2, &$3, &$4,0 TSRMLS_CC); }
9. variableforeach_optional_arg ')' { zend_check_writable_variable(&$6);zend_do_foreach_cont(&$1, &$2, &$4, &$6, &$7 TSRMLS_CC); }
10. foreach_statement{ zend_do_foreach_end(&$1, &$4 TSRMLS_CC); }
11. //有省略
12. ;
仔细分析这段语法规则,我们可以发现,对于:
foreach($arr as $key => $val){
echo $key . ‘=>’ . $val .”\n”;
}
会被分析为:
1. T_FOREACH '(' variable T_AS { zend_do_foreach_begin('foreach','(',$arr,'as',1 TSRMLS_CC); }
2. foreach_variable foreach_optional_arg(T_DOUBLE_ARROW foreach_variable) ')' { zend_do_foreach_cont('foreach','(','as',$key,$val TSRMLS_CC); }
3. foreach_satement {zend_do_foreach_end('foreach','as');}
然后,让我们来看看foreach_statement:
它其实就是一个代码块,体现了我们的 echo $key . ‘=>’ . $val .”\n”;
T_ECHO expr;
显然,实现foreach的核心就是如下3个函数:
zend_do_foreach_begin
zend_do_foreach_cont
zend_do_foreach_end
其中,zend_do_foreach_begin (代码太长,直接写伪码) 主要做了:
1. 记录当前的opline行数(为以后跳转而记录)
2. 对数组进行RESET(讲内部指针指向第一个元素)
3. 获取临时变量($val)
4. 设置获取变量的OPCODE FE_FETCH,结果存第3步的临时变量
4. 记录获取变量的OPCODES的行数
而对于 zend_do_foreach_cont来说:
1. 根据foreach_variable的u.EA.type来判断是否引用
2. 根据是否引用来调整zend_do_foreach_begin中生成的FE_FETCH方式
3. 根据zend_do_foreach_begin中记录的取变量的OPCODES的行数,来初始化循环(主要处理在循环内部的循环:do_begin_loop)
最后zend_do_foreach_end:
1. 根据zend_do_foreach_begin中记录的行数信息,设置ZEND_JMPOPCODES
2. 根据当前行数,设置循环体下一条opline, 用以跳出循环
3. 结束循环(处理循环内循环:do_end_loop)
4. 清理临时变量
当然,在zend_do_foreach_cont和zend_do_foreach_end之间会在语法分析阶段被填充foreach_satement的语句代码。
这样,就实现了foreach的OPCODES line。
比如对于我们开头的实例代码,最终生成的OPCODES是:
1. filename: /home/huixinchen/foreach.php
2. function name: (null)
3. number of ops: 17
4. compiled vars: !0 = $arr, !1 = $key, !2 = $val
5. line # op fetch ext return operands
6. -------------------------------------------------------------------------------
7. 2 0 SEND_VAL 1
8. 1 SEND_VAL 100
9. 2 DO_FCALL 2 'range'
10. 3 ASSIGN !0,$0
11. 3 4 FE_RESET $2 !0, ->14
12. 5 FE_FETCH $3 $2, ->14
13. 6 ZEND_OP_DATA ~5
14. 7 ASSIGN !2,$3
15. 8 ASSIGN !1, ~5
16. 4 9 CONCAT ~7 !1,'-'
17. 10 CONCAT ~8 ~7, !2
18. 11 CONCAT ~9 ~8,'%0A'
19. 12 ECHO ~9
20. 5 13 JMP ->5
21. 14 SWITCH_FREE $2
22. 7 15 RETURN 1
23. 16* ZEND_HANDLE_EXCEPTION
我们注意到FE_FETCH的op2的操作数是14,也就是JMP后一条opline,也就是说,在获取完最后一个数组元素以后,FE_FETCH失败的情况下,会跳到第14行opline,从而实现了循环的结束。
而15行opline的op1的操作数是指向了FE_FETCH,也就是无条件跳转到第5行opline,从而实现了循环。
附录:
1. void zend_do_foreach_begin(znode *foreach_token, znode *open_brackets_token, znode *array, znode *as_token, int variable TSRMLS_DC)
2. {
3. zend_op *opline;
4. zend_bool is_variable;
5. zend_bool push_container = 0;
6. zend_op dummy_opline;
7.
8. if (variable) {
9. //是否是匿名数组
10. if (zend_is_function_or_method_call(array)) {
11. //是否是函数返回值
12. is_variable= 0;
13. } else {
14. is_variable= 1;
15. }
16. /* 使用括号记录FE_RESET的opline行数 */
17. open_brackets_token->u.opline_num = get_next_op_number(CG(active_op_array));
18. zend_do_end_variable_parse(BP_VAR_W, 0 TSRMLS_CC); //获取数组/对象和zend_do_begin_variable_parse对应
19. if (CG(active_op_array)->last > 0 &&
20. CG(active_op_array)->opcodes[CG(active_op_array)->last-1].opcode == ZEND_FETCH_OBJ_W) {
21. /* Only lock the container if we are fetchingfrom a real container and not $this */
22. if (CG(active_op_array)->opcodes[CG(active_op_array)->last-1].op1.op_type == IS_VAR) {
23. CG(active_op_array)->opcodes[CG(active_op_array)->last-1].extended_value |= ZEND_FETCH_ADD_LOCK;
24. push_container= 1;
25. }
26. }
27. } else {
28. is_variable= 0;
29. open_brackets_token->u.opline_num = get_next_op_number(CG(active_op_array));
30. }
31.
32. foreach_token->u.opline_num = get_next_op_number(CG(active_op_array)); //记录数组Reset Opline number
33.
34. opline = get_next_op(CG(active_op_array) TSRMLS_CC); //生成Reset数组Opcode
35.
36. opline->opcode = ZEND_FE_RESET;
37. opline->result.op_type = IS_VAR;
38. opline->result.u.var = get_temporary_variable(CG(active_op_array));
39. opline->op1 = *array;
40. SET_UNUSED(opline->op2);
41. opline->extended_value = is_variable ? ZEND_FE_RESET_VARIABLE: 0;
42.
43. dummy_opline.result = opline->result;
44. if (push_container) {
45. dummy_opline.op1 = CG(active_op_array)->opcodes[CG(active_op_array)->last-2].op1;
46. } else {
47. znode tmp;
48.
49. tmp.op_type = IS_UNUSED;
50. dummy_opline.op1 = tmp;
51. }
52. zend_stack_push(&CG(foreach_copy_stack), (void *) &dummy_opline, sizeof(zend_op));
53.
54. as_token->u.opline_num = get_next_op_number(CG(active_op_array)); //记录循环起始点
55.
56. opline = get_next_op(CG(active_op_array) TSRMLS_CC);
57. opline->opcode = ZEND_FE_FETCH;
58. opline->result.op_type = IS_VAR;
59. opline->result.u.var = get_temporary_variable(CG(active_op_array));
60. opline->op1 = dummy_opline.result; //被操作数组
61. opline->extended_value = 0;
62. SET_UNUSED(opline->op2);
63.
64. opline = get_next_op(CG(active_op_array) TSRMLS_CC);
65. opline->opcode = ZEND_OP_DATA; //当使用key的时候附属操作数,当foreach中不包含key时忽略
66. SET_UNUSED(opline->op1);
67. SET_UNUSED(opline->op2);
68. SET_UNUSED(opline->result);
69. }
1. void zend_do_foreach_cont(znode *foreach_token, const znode *open_brackets_token, const znode *as_token, znode *value, znode *key TSRMLS_DC)
2. {
3. zend_op *opline;
4. znode dummy, value_node;
5. zend_bool assign_by_ref=0;
6.
7. opline = &CG(active_op_array)->opcodes[as_token->u.opline_num]; //获取FE_FETCH Opline
8. if (key->op_type != IS_UNUSED) {
9. znode *tmp;//交换key和val
10.
11. tmp = key;
12. key = value;
13. value = tmp;
14.
15. opline->extended_value |=ZEND_FE_FETCH_WITH_KEY; //表明需要同时获取key和val
16. }
17.
18. if ((key->op_type != IS_UNUSED) && (key->u.EA.type & ZEND_PARSED_REFERENCE_VARIABLE)){
19. //key不能以引用方式获取
20. zend_error(E_COMPILE_ERROR, "Key element cannot be a reference");
21. }
22.
23. if (value->u.EA.type &ZEND_PARSED_REFERENCE_VARIABLE) {
24. //以引用方式获取值
25. assign_by_ref= 1;
26. if (!(opline-1)->extended_value) {
27. //根据FE_FETCH的上一条Opline也就是获取数组的扩展值来判断数组是否是匿名数组
28. zend_error(E_COMPILE_ERROR, "Cannot create references to elements ofa temporary array expression");
29. }
30.
31. opline->extended_value |= ZEND_FE_FETCH_BYREF; //指明按引用取
32. CG(active_op_array)->opcodes[foreach_token->u.opline_num].extended_value |= ZEND_FE_RESET_REFERENCE; //重置原数组
33. } else {
34. zend_op *foreach_copy;
35. zend_op *fetch = &CG(active_op_array)->opcodes[foreach_token->u.opline_num];
36. zend_op *end = &CG(active_op_array)->opcodes[open_brackets_token->u.opline_num];
37.
38. /* Change "write context" into"read context" */
39. fetch->extended_value = 0; /* reset ZEND_FE_RESET_VARIABLE */
40. while (fetch != end) {
41. --fetch;
42. if (fetch->opcode == ZEND_FETCH_DIM_W && fetch->op2.op_type == IS_UNUSED) {
43. zend_error(E_COMPILE_ERROR, "Cannot use [] for reading");
44. }
45. fetch->opcode -= 3;/* FETCH_W -> FETCH_R */
46. }
47.
48. /* prevent double SWITCH_FREE */
49. zend_stack_top(&CG(foreach_copy_stack), (void **) &foreach_copy);
50. foreach_copy->op1.op_type = IS_UNUSED;
51. }
52.
53. value_node = opline->result;
54.
55. if (assign_by_ref) {
56. zend_do_end_variable_parse(value, BP_VAR_W, 0 TSRMLS_CC); //获取值(引用)
57. zend_do_assign_ref(NULL, value, &value_node TSRMLS_CC);//指明value node的type是IS_VAR
58. } else {
59. zend_do_assign(&dummy, value, &value_node TSRMLS_CC); //获取copy值
60. zend_do_free(&dummy TSRMLS_CC);
61. }
62.
63. if (key->op_type != IS_UNUSED) {
64. znodekey_node;
65.
66. opline = &CG(active_op_array)->opcodes[as_token->u.opline_num+1];
67. opline->result.op_type = IS_TMP_VAR;
68. opline->result.u.EA.type = 0;
69. opline->result.u.opline_num = get_temporary_variable(CG(active_op_array));
70. key_node = opline->result;
71.
72. zend_do_assign(&dummy, key, &key_node TSRMLS_CC);
73. zend_do_free(&dummy TSRMLS_CC);
74. }
75.
76. do_begin_loop(TSRMLS_C);
77. INC_BPC(CG(active_op_array));
78. }
1. void zend_do_foreach_end(znode *foreach_token, znode *as_token TSRMLS_DC)
2. {
3. zend_op *container_ptr;
4. zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC); //生成JMP opcode
5.
6. opline->opcode = ZEND_JMP;
7. opline->op1.u.opline_num = as_token->u.opline_num; //设置JMP到FE_FETCH opline行
8. SET_UNUSED(opline->op1);
9. SET_UNUSED(opline->op2);
10.
11. CG(active_op_array)->opcodes[foreach_token->u.opline_num].op2.u.opline_num = get_next_op_number(CG(active_op_array)); //设置跳出循环的opline行
12. CG(active_op_array)->opcodes[as_token->u.opline_num].op2.u.opline_num = get_next_op_number(CG(active_op_array)); //同上
13.
14. do_end_loop(as_token->u.opline_num, 1 TSRMLS_CC); //为循环嵌套而设置
15.
16. zend_stack_top(&CG(foreach_copy_stack), (void **) &container_ptr);
17. generate_free_foreach_copy(container_ptr TSRMLS_CC);
18. zend_stack_del_top(&CG(foreach_copy_stack));
19.
20. DEC_BPC(CG(active_op_array)); //为PHP interactive模式而设置
21. }