9.3.4. 读入 DEFINE_DELAY 模式
DEFINE_DELAY 的细节请参考 DEFINE_DELAY模式的概览 。对于这种模式,我们使用以下例子:
278 (define_delay (and (eq_attr "type" "branch") in mips.md
279 (eq (symbol_ref "TARGET_MIPS16") (const_int 0)))
280 [(eq_attr "can_delay" "yes")
281 (nil)
282 (and (eq_attr "branch_likely" "yes")
283 (eq_attr "can_delay" "yes"))])
在经过 init_md_reader_args 的处理后,上面的模式被作为以下 rtx 对象载入内存。
图 41 : DEFINE_DELAY 模式的例子
gen_delay 简单地把这些对象链接在一起。
4430 static void
4431 gen_delay (rtx def, int lineno) in genattrtab.c
4432 {
4433 struct delay_desc *delay;
4434 int i;
4435
4436 if (XVECLEN (def, 1) % 3 != 0)
4437 {
4438 message_with_line (lineno,
4439 "number of elements in DEFINE_DELAY must be multiple of three");
4440 have_error = 1;
4441 return ;
4442 }
4443
4444 for (i = 0; i < XVECLEN (def, 1); i += 3)
4445 {
4446 if (XVECEXP (def, 1, i + 1))
4447 have_annul_true = 1;
4448 if (XVECEXP (def, 1, i + 2))
4449 have_annul_false = 1;
4450 }
4451
4452 delay = oballoc (sizeof (struct delay_desc));
4453 delay->def = def;
4454 delay->num = ++num_delays ;
4455 delay->next = delays ;
4456 delay->lineno = lineno;
4457 delays = delay;
4458 }
在 4433 行, delay_desc 是一个简单结构体,它具有如下定义。
213 struct delay_desc in genattrtab.c
214 {
215 rtx def; /* DEFINE_DELAY expression. */
216 struct delay_desc *next; /* Next DEFINE_DELAY. */
217 int num; /* Number of DEFINE_DELAY, starting at 1. */
218 int lineno; /* Line number. */
219 };
9.3.5. 读入 DEFINE_FUNCTION_UNIT 模式
DEFINE_FUNCTION_UNIT模式的概览 一节描述了 DEFINE_FUNCTION_UINT 模式的细节。在于这个模式,我们使用以下的这里:
43 (define_function_unit "k6_alux" 1 0 in k6.md
44 (and (eq_attr "cpu" "k6")
45 (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld"))
46 1 1)
比较与我们下面将要看到的模式,这个模式是旧式的指令级别的并行性描述。两者的目的都是产生流水线危险识别器。在同一个机器描述文件中,这两个形式不能共存。至于与新描述比较,这个旧式的描述的缺点,请参考 gccint , 10.18.8.3 节。
事实上,从其定义我们可以看到,这个模式以功能单元的角度来描述系统——它告诉我们如果被占据了,这个单元将多久才能再次可用,如果输出的结果要被作为输入使用,要多久才可行,同样它也给出了将占据它的指令列表。
而对于新的形式,也就是说我们将在下面看到的 define_insn_reservation ,它从指令的角度来描述系统——它告诉我们,在指令执行过程中,在流水线的每个阶段,哪些单元将被占据(使用模式来定义之),然后多长时间后其结果可用。这无疑是个更好的形式,因为调度器的目的是尽可能多地发布指令。以指令的角度来描述系统,可用更好地服务于这个目的。
在经过 init_md_reader_args 的处理后,上面的模式被作为以下的 rtx 对象载入内存。
图 42 : DEFINE_FUNCTION_UNIT 模式的例子
gen_unit 从这个 rtx 对象获取信息。每个 define_funciton_unit 模式应该具有唯一的名字,这个名字用作甄别器。
4466 static void
4467 gen_unit (rtx def, int lineno) in genattrtab.c
4468 {
4469 struct function_unit *unit;
4470 struct function_unit_op *op;
4471 const char *name = XSTR (def, 0);
4472 int multiplicity = XINT (def, 1);
4473 int simultaneity = XINT (def, 2);
4474 rtx condexp = XEXP (def, 3);
4475 int ready_cost = MAX (XINT (def, 4), 1);
4476 int issue_delay = MAX (XINT (def, 5), 1);
4477
4478 /* See if we have already seen this function unit. If so, check that
4479 the multiplicity and simultaneity values are the same. If not, make
4480 a structure for this function unit. */
4481 for (unit = units ; unit; unit = unit->next)
4482 if (! strcmp (unit->name, name))
4483 {
4484 if (unit->multiplicity != multiplicity
4485 || unit->simultaneity != simultaneity)
4486 {
4487 message_with_line (lineno,
4488 "differing specifications given for function unit %s",
4489 unit->name);
4490 message_with_line (unit->first_lineno, "previous definition");
4491 have_error = 1;
4492 return ;
4493 }
4494 break ;
4495 }
4496
4497 if (unit == 0)
4498 {
4499 unit = oballoc (sizeof (struct function_unit ));
4500 unit->name = name;
4501 unit->multiplicity = multiplicity;
4502 unit->simultaneity = simultaneity;
4503 unit->issue_delay.min = unit->issue_delay.max = issue_delay;
4504 unit->num = num_units ++;
4505 unit->num_opclasses = 0;
4506 unit->condexp = false_rtx ;
4507 unit->ops = 0;
4508 unit->next = units ;
4509 unit->first_lineno = lineno;
4510 units = unit;
4511 }
4512 else
4513 XSTR (def, 0) = unit->name;
4514
4515 /* Make a new operation class structure entry and initialize it. */
4516 op = oballoc (sizeof (struct function_unit_op ));
4517 op->condexp = condexp;
4518 op->num = unit->num_opclasses++;
4519 op->ready = ready_cost;
4520 op->issue_delay = issue_delay;
4521 op->next = unit->ops;
4522 op->lineno = lineno;
4523 unit->ops = op;
4524 num_unit_opclasses ++;
4525
4526 /* Set our issue expression based on whether or not an optional conflict
4527 vector was specified. */
4528 if (XVEC (def, 6))
4529 {
4530 /* Compute the IOR of all the specified expressions. */
4531 rtx orexp = false_rtx ;
4532 int i;
4533
4534 for (i = 0; i < XVECLEN (def, 6); i++)
4535 orexp = insert_right_side (IOR, orexp, XVECEXP (def, 6, i), -2, -2);
4536
4537 op->conflict_exp = orexp;
4538 extend_range (&unit->issue_delay, 1, issue_delay);
4539 }
4540 else
4541 {
4542 op->conflict_exp = true_rtx ;
4543 extend_range (&unit->issue_delay, issue_delay, issue_delay);
4544 }
4545
4546 /* Merge our conditional into that of the function unit so we can determine
4547 which insns are used by the function unit. */
4548 unit->condexp = insert_right_side (IOR, unit->condexp, op->condexp, -2, -2);
4549 }
function_unit 的 condexp 域指向一个表达式,如果其值是 TURE ,表示需要这个单元。构建出来的 function_unit object 被保存在数组 units 里。
238 struct function_unit in genattrtab.c
239 {
240 const char *name; /* Function unit name. */
241 struct function_unit *next; /* Next function unit. */
242 int num; /* Ordinal of this unit type. */
243 int multiplicity; /* Number of units of this type. */
244 int simultaneity; /* Maximum number of simultaneous insns
245 on this function unit or 0 if unlimited. */
246 rtx condexp; /* Expression TRUE for insn needing unit. */
247 int num_opclasses; /* Number of different operation types. */
248 struct function_unit_op *ops; /* Pointer to first operation type. */
249 int needs_conflict_function; /* Nonzero if a conflict function required. */
250 int needs_blockage_function; /* Nonzero if a blockage function required. */
251 int needs_range_function; /* Nonzero if blockage range function needed. */
252 rtx default_cost; /* Conflict cost, if constant. */
253 struct range issue_delay; /* Range of issue delay values. */
254 int max_blockage; /* Maximum time an insn blocks the unit. */
255 int first_lineno; /* First seen line number. */
256 };
某些功能单元可能被多个指令组所使用(在一个 define_function_unit 里可以出现多个指令,我们称之为一组,因为如果出现,它们具有相似的发布延迟,就绪代价,及冲突列表)。因为每个组可能包含所涉及的特有的参数,这需要额外的数据结构。结构体 function_unit_op 为这个目的而设计。
223 struct function_unit_op in genattrtab.c
224 {
225 rtx condexp; /* Expression TRUE for applicable insn. */
226 struct function_unit_op *next; /* Next operation for this function unit. */
227 int num; /* Ordinal for this operation type in unit. */
228 int ready; /* Cost until data is ready. */
229 int issue_delay; /* Cost until unit can accept another insn. */
230 rtx conflict_exp; /* Expression TRUE for insns incurring issue delay. */
231 rtx issue_exp; /* Expression computing issue delay. */
232 int lineno; /* Line number. */
233 };
在这里, function_unit_op 中的 condexp 域包含了识别组的测试的 rtx 对象。这很容易理解,在 gen_unit 里处理了所有相关的 define_cpu_unit 模式后, function_unit 的 condexp 域由 insert_right_side 通过 IOR 所有从属的 function_unit_op 的 condexp 得到。
insert_right_side 接受两个表达式,从它们来构建一个规范的 OR 表达式。这个函数具有以下定义。
2653 static rtx in genattrtab.c
2654 insert_right_side (enum rtx_code code, rtx exp, rtx term, int insn_code, int insn_index)
2655 {
2656 rtx newexp;
2657
2658 /* Avoid consing in some special cases. */
2659 if (code == AND && term == true_rtx )
2660 return exp;
2661 if (code == AND && term == false_rtx )
2662 return false_rtx ;
2663 if (code == AND && exp == true_rtx )
2664 return term;
2665 if (code == AND && exp == false_rtx )
2666 return false_rtx ;
2667 if (code == IOR && term == true_rtx )
2668 return true_rtx ;
2669 if (code == IOR && term == false_rtx )
2670 return exp;
2671 if (code == IOR && exp == true_rtx )
2672 return true_rtx ;
2673 if (code == IOR && exp == false_rtx )
2674 return term;
2675 if (attr_equal_p (exp, term))
2676 return exp;
2677
2678 if (GET_CODE (term) == code)
2679 {
2680 exp = insert_right_side (code, exp, XEXP (term, 0),
2681 insn_code, insn_index);
2682 exp = insert_right_side (code, exp, XEXP (term, 1),
2683 insn_code, insn_index);
2684
2685 return exp;
2686 }
2687
2688 if (GET_CODE (exp) == code)
2689 {
2690 rtx new = insert_right_side (code, XEXP (exp, 1),
2691 term, insn_code, insn_index);
2692 if (new != XEXP (exp, 1))
2693 /* Make a copy of this expression and call recursively. */
2694 newexp = attr_rtx (code, XEXP (exp, 0), new);
2695 else
2696 newexp = exp;
2697 }
2698 else
2699 {
2700 /* Insert the new term. */
2701 newexp = attr_rtx (code, exp, term);
2702 }
2703
2704 return simplify_test_exp_in_temp (newexp, insn_code, insn_index);
2705 }
对于我们的例子,我们在 2674 行返回。在 2678 行以下的代码用于复杂的情形来简化测试。在 2678 行的代码块用于转换形如: A AND B, B = C AND D 的表达式。在于这种情形,它将被转换为: (A AND C) AND D 。
类似的,对于形如: A AND B, A = C AND D 的表达式,它将被转换为 C AND (D AND B) ,其值如果“ D AND B ”等于 D ,“ A AND B ”可以被“ A ”所替代,这就是 2696 行的含义。并且如果“ D AND B ”不等于 D ,把语句分解为“ C ”,“ D AND B ”,这正是 2690 到 2696 行的目的。
在 2704 行, simplify_test_exp_in_temp 将执行更多的简化。我们在后面来看它。
9.3.6. 读入 DEFINE_CPU_UNIT 模式
DEFINE_INSN_RESERVATION模式的概览 一节描述了 define_cpu_unit 模式的细节。对于这个模式,我们使用以下的例子:
88 (define_cpu_unit "pentium-portu,pentium-portv" "pentium") in pentium.md
从这个模式开始,余下的这些模式用于构建基于 DFA 的流水线危险识别器,这是更新式的,与我们之前看到的指令级别并行性描述比较而言。至于这个新式形式的好处,同样参考 gccint , 10.18.8 .3 节。这个模式不能与 define_function_unit 共存在同一个机器描述文件中。
在经过 init_md_reader_args 的处理后,上面的模式将作为以下的 rtx 对象载入内存。
图 43 : DEFINE_CPU_UNIT 模式的例子
Rtx 形式并不是我们所希望的,我们需要 gen_cpu_unit 来打包这些数据。
1652 void
1653 gen_cpu_unit (rtx def) in genattrtab.c
1654 {
1655 decl_t decl;
1656 char **str_cpu_units;
1657 int vect_length;
1658 int i;
1659
1660 str_cpu_units = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',',
1661 FALSE);
1662 if (str_cpu_units == NULL)
1663 fatal ("invalid string `%s' in define_cpu_unit", XSTR (def, 0));
1664 for (i = 0; i < vect_length; i++)
1665 {
1666 decl = create_node (sizeof (struct decl));
1667 decl->mode = dm_unit;
1668 decl->pos = 0;
1669 DECL_UNIT (decl)->name = check_name (str_cpu_units [i], decl->pos);
1670 DECL_UNIT (decl)->automaton_name = (char *) XSTR (def, 1);
1671 DECL_UNIT (decl)->query_p = 0;
1672 DECL_UNIT (decl)->min_occ_cycle_num = -1;
1673 DECL_UNIT (decl)->in_set_p = 0;
1674 VLA_PTR_ADD (decls , decl);
1675 num_dfa_decls ++;
1676 }
1677 }
所有单元的信息被保存在由 decls 指向的列表中 , 其类型如下。
907 struct decl in genautomata.c
908 {
909 /* What node in the union? */
910 enum decl_mode mode;
911 pos_t pos;
912 union
913 {
914 struct unit_decl unit;
915 struct bypass_decl bypass;
916 struct automaton_decl automaton;
917 struct excl_rel_decl excl;
918 struct unit_pattern_rel_decl presence;
919 struct unit_pattern_rel_decl absence;
920 struct reserv_decl reserv;
921 struct insn_reserv_decl insn_reserv;
922 } decl;
923 };
196 typedef struct decl *decl_t ; in genautomata.c
Decl 是由所有类型的模式共享的类型,用于构建基于 DFA 的流水线危险识别器。对于 define_cup_unit 模式,使用了 unit_decl 域。
727 struct unit_decl in genautomata.c
728 {
729 char *name;
730 /* NULL if the automaton name is absent. */
731 char *automaton_name;
732 /* If the following value is not zero, the cpu unit reservation is
733 described in define_query_cpu_unit. */
734 char query_p;
735
736 /* The following fields are defined by checker. */
737
738 /* The following field value is nonzero if the unit is used in an
739 regexp. */
740 char unit_is_used;
741
742 /* The following field value is order number (0, 1, ...) of given
743 unit. */
744 int unit_num;
745 /* The following field value is corresponding declaration of
746 automaton which was given in description. If the field value is
747 NULL then automaton in the unit declaration was absent. */
748 struct automaton_decl *automaton_decl;
749 /* The following field value is maximal cycle number (1, ...) on
750 which given unit occurs in insns. Zero value means that given
751 unit is not used in insns. */
752 int max_occ_cycle_num;
753 /* The following field value is minimal cycle number (0, ...) on
754 which given unit occurs in insns. -1 value means that given
755 unit is not used in insns. */
756 int min_occ_cycle_num;
757 /* The following list contains units which conflict with given
758 unit. */
759 unit_set_el_t excl_list;
760 /* The following list contains patterns which are required to
761 reservation of given unit. */
762 pattern_set_el_t presence_list;
763 pattern_set_el_t final_presence_list;
764 /* The following list contains patterns which should be not present
765 i n reservation for given unit. */
766 pattern_set_el_t absence_list;
767 pattern_set_el_t final_absence_list;
768 /* The following is used only when `query_p' has nonzero value.
769 This is query number for the unit. */
770 int query_num;
771 /* The following is the last cycle on which the unit was checked for
772 correct distributions of units to automata in a regexp. */
773 int last_distribution_check_cycle;
774
775 /* The following fields are defined by automaton generator. */
776
777 /* The following field value is number of the automaton to which
778 given unit belongs. */
779 int corresponding_automaton_num;
780 /* If the following value is not zero, the cpu unit is present in a
781 `exclusion_set' or in right part of a `presence_set',
782 `final_presence_set', `absence_set', and
783 `final_absence_set'define_query_cpu_unit. */
784 char in_set_p;
785 };
在上面的 1660 行, get_str_vec 获取由“ , ”分隔的单元名。宏 DECL_UNIT 将访问 decl 的 unit_decl 域。