9.3.4. Read in DEFINE_DELAY pattern
The detail of DEFINE_DELAY please refers to 8.1.1 Overview of DEFINE_DELAY pattern . For this pattern, we use following example:
278 (define_delay (and (eq_attr "type" "branch") in mips.md
279 (eq (symbol_ref "TARGET_MIPS16") (const_int 0)))
280 [(eq_attr "can_delay" "yes")
281 (nil)
282 (and (eq_attr "branch_likely" "yes")
283 (eq_attr "can_delay" "yes"))])
After treated by init_md_reader_args , above pattern will be loaded into memory as rtx object as following.
figure 41 : example of DEFINE_DELAY pattern
gen_delay simply links these objects tegother.
4430 static void
4431 gen_delay (rtx def, int lineno) in genattrtab.c
4432 {
4433 struct delay_desc *delay;
4434 int i;
4435
4436 if (XVECLEN (def, 1) % 3 != 0)
4437 {
4438 message_with_line (lineno,
4439 "number of elements in DEFINE_DELAY must be multiple of three");
4440 have_error = 1;
4441 return ;
4442 }
4443
4444 for (i = 0; i < XVECLEN (def, 1); i += 3)
4445 {
4446 if (XVECEXP (def, 1, i + 1))
4447 have_annul_true = 1;
4448 if (XVECEXP (def, 1, i + 2))
4449 have_annul_false = 1;
4450 }
4451
4452 delay = oballoc (sizeof (struct delay_desc));
4453 delay->def = def;
4454 delay->num = ++num_delays ;
4455 delay->next = delays ;
4456 delay->lineno = lineno;
4457 delays = delay;
4458 }
At line 4433 delay_desc is a simple structure, it has following definition.
213 struct delay_desc in genattrtab.c
214 {
215 rtx def; /* DEFINE_DELAY expression. */
216 struct delay_desc *next; /* Next DEFINE_DELAY. */
217 int num; /* Number of DEFINE_DELAY, starting at 1. */
218 int lineno; /* Line number. */
219 };
9.3.5. Read in DEFINE_FUNCTION_UNIT pattern
Section 8.1.2 Overview of DEFINE_FUNCTION_UNIT pattern describes the detail of DEFINE_FUNCTION_UINT pattern. For this pattern, we use following example:
43 (define_function_unit "k6_alux" 1 0 in k6.md
44 (and (eq_attr "cpu" "k6")
45 (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld"))
46 1 1)
This pattern is the old instruction level parallelism description, compared with patterns we will see following. The purpose of both is to produce the pipeline hazards recognizer. Both forms can’t coexist in the same machine description file. For the drawbacks of this old pipeline description compared with the new ones, please refer to gccint, section 10.18.8.3.
In fact, from its definition, we can see that this pattern describes the system in view of function units – it tells us if occupied, the unit will not be ready for how long, and if the result output should be used as input for how long that will be available, also it gives out the list of which instrucitons will take over it.
While, for the newer form, say define_insn_reservation we will see below, it describes the system in point of instruction – it tells us what units it will take during its execution for every stage of the pipeline (it uses pattern to define it), then the time of the result will be available. It is of course a better form, as target of scheduler is to issue instructions as many as possible. Describing the system in point of instructions, can serve the purpose better.
After treated by init_md_reader_args , above pattern will be loaded into memory as rtx object in following.
figure 42 : example of DEFINE_FUNCTION_UNIT pattern
gen_unit extracts information from this rtx object. Every define_funciton_unit pattern should have unique name which is used as discriminator.
4466 static void
4467 gen_unit (rtx def, int lineno) in genattrtab.c
4468 {
4469 struct function_unit *unit;
4470 struct function_unit_op *op;
4471 const char *name = XSTR (def, 0);
4472 int multiplicity = XINT (def, 1);
4473 int simultaneity = XINT (def, 2);
4474 rtx condexp = XEXP (def, 3);
4475 int ready_cost = MAX (XINT (def, 4), 1);
4476 int issue_delay = MAX (XINT (def, 5), 1);
4477
4478 /* See if we have already seen this function unit. If so, check that
4479 the multiplicity and simultaneity values are the same. If not, make
4480 a structure for this function unit. */
4481 for (unit = units ; unit; unit = unit->next)
4482 if (! strcmp (unit->name, name))
4483 {
4484 if (unit->multiplicity != multiplicity
4485 || unit->simultaneity != simultaneity)
4486 {
4487 message_with_line (lineno,
4488 "differing specifications given for function unit %s",
4489 unit->name);
4490 message_with_line (unit->first_lineno, "previous definition");
4491 have_error = 1;
4492 return ;
4493 }
4494 break ;
4495 }
4496
4497 if (unit == 0)
4498 {
4499 unit = oballoc (sizeof (struct function_unit ));
4500 unit->name = name;
4501 unit->multiplicity = multiplicity;
4502 unit->simultaneity = simultaneity;
4503 unit->issue_delay.min = unit->issue_delay.max = issue_delay;
4504 unit->num = num_units ++;
4505 unit->num_opclasses = 0;
4506 unit->condexp = false_rtx ;
4507 unit->ops = 0;
4508 unit->next = units ;
4509 unit->first_lineno = lineno;
4510 units = unit;
4511 }
4512 else
4513 XSTR (def, 0) = unit->name;
4514
4515 /* Make a new operation class structure entry and initialize it. */
4516 op = oballoc (sizeof (struct function_unit_op ));
4517 op->condexp = condexp;
4518 op->num = unit->num_opclasses++;
4519 op->ready = ready_cost;
4520 op->issue_delay = issue_delay;
4521 op->next = unit->ops;
4522 op->lineno = lineno;
4523 unit->ops = op;
4524 num_unit_opclasses ++;
4525
4526 /* Set our issue expression based on whether or not an optional conflict
4527 vector was specified. */
4528 if (XVEC (def, 6))
4529 {
4530 /* Compute the IOR of all the specified expressions. */
4531 rtx orexp = false_rtx ;
4532 int i;
4533
4534 for (i = 0; i < XVECLEN (def, 6); i++)
4535 orexp = insert_right_side (IOR, orexp, XVECEXP (def, 6, i), -2, -2);
4536
4537 op->conflict_exp = orexp;
4538 extend_range (&unit->issue_delay, 1, issue_delay);
4539 }
4540 else
4541 {
4542 op->conflict_exp = true_rtx ;
4543 extend_range (&unit->issue_delay, issue_delay, issue_delay);
4544 }
4545
4546 /* Merge our conditional into that of the function unit so we can determine
4547 which insns are used by the function unit. */
4548 unit->condexp = insert_right_side (IOR, unit->condexp, op->condexp, -2, -2);
4549 }
condexp field of the function_unit points to the expression which if is TURE indicates the needing of the unit. The created function_unit object is saved in array of units
238 struct function_unit in genattrtab.c
239 {
240 const char *name; /* Function unit name. */
241 struct function_unit *next; /* Next function unit. */
242 int num; /* Ordinal of this unit type. */
243 int multiplicity; /* Number of units of this type. */
244 int simultaneity; /* Maximum number of simultaneous insns
245 on this function unit or 0 if unlimited. */
246 rtx condexp; /* Expression TRUE for insn needing unit. */
247 int num_opclasses; /* Number of different operation types. */
248 struct function_unit_op *ops; /* Pointer to first operation type. */
249 int needs_conflict_function; /* Nonzero if a conflict function required. */
250 int needs_blockage_function; /* Nonzero if a blockage function required. */
251 int needs_range_function; /* Nonzero if blockage range function needed. */
252 rtx default_cost; /* Conflict cost, if constant. */
253 struct range issue_delay; /* Range of issue delay values. */
254 int max_blockage; /* Maximum time an insn blocks the unit. */
255 int first_lineno; /* First seen line number. */
256 };
Certain function unit may be used by more than one instructions group (more than one instructions will appear in one define_function_unit, we can call them a group, as they have similar issue delay, ready cost, and conflict list if present). As each group may contain distinctive parameters mentioned, it needs extra structure for the groups. Structure function_unit_op is designed for the purpose.
223 struct function_unit_op in genattrtab.c
224 {
225 rtx condexp; /* Expression TRUE for applicable insn. */
226 struct function_unit_op *next; /* Next operation for this function unit. */
227 int num; /* Ordinal for this operation type in unit. */
228 int ready; /* Cost until data is ready. */
229 int issue_delay; /* Cost until unit can accept another insn. */
230 rtx conflict_exp; /* Expression TRUE for insns incurring issue delay. */
231 rtx issue_exp; /* Expression computing issue delay. */
232 int lineno; /* Line number. */
233 };
Here, condexp field in function_unit_op contains the rtx object for recoginsing test of the group. It easily understand that, in gen_unit with all related define_cpu_unit patterns handled, the condexp field of function_unit is gotten by insert_right_side by IOR condexp of all belonging function_unit_op.
insert_right_side accepts two expressions to create a canonical OR expression from them. It has following definition.
2653 static rtx in genattrtab.c
2654 insert_right_side (enum rtx_code code, rtx exp, rtx term, int insn_code, int insn_index)
2655 {
2656 rtx newexp;
2657
2658 /* Avoid consing in some special cases. */
2659 if (code == AND && term == true_rtx )
2660 return exp;
2661 if (code == AND && term == false_rtx )
2662 return false_rtx ;
2663 if (code == AND && exp == true_rtx )
2664 return term;
2665 if (code == AND && exp == false_rtx )
2666 return false_rtx ;
2667 if (code == IOR && term == true_rtx )
2668 return true_rtx ;
2669 if (code == IOR && term == false_rtx )
2670 return exp;
2671 if (code == IOR && exp == true_rtx )
2672 return true_rtx ;
2673 if (code == IOR && exp == false_rtx )
2674 return term;
2675 if (attr_equal_p (exp, term))
2676 return exp;
2677
2678 if (GET_CODE (term) == code)
2679 {
2680 exp = insert_right_side (code, exp, XEXP (term, 0),
2681 insn_code, insn_index);
2682 exp = insert_right_side (code, exp, XEXP (term, 1),
2683 insn_code, insn_index);
2684
2685 return exp;
2686 }
2687
2688 if (GET_CODE (exp) == code)
2689 {
2690 rtx new = insert_right_side (code, XEXP (exp, 1),
2691 term, insn_code, insn_index);
2692 if (new != XEXP (exp, 1))
2693 /* Make a copy of this expression and call recursively. */
2694 newexp = attr_rtx (code, XEXP (exp, 0), new);
2695 else
2696 newexp = exp;
2697 }
2698 else
2699 {
2700 /* Insert the new term. */
2701 newexp = attr_rtx (code, exp, term);
2702 }
2703
2704 return simplify_test_exp_in_temp (newexp, insn_code, insn_index);
2705 }
For our exmaple, we return at line 2674. Code below line 2678 is for complex case to simplify the test. Block at line 2678 is used to transform expression like: A AND B, B = C AND D. For this case, it will be transformed to : (A AND C) AND D.
Similar, for expression like: A AND B, A = C AND D, it will be transformed to C AND (D AND B), in which if "D AND B" equals to D, "A AND B" can be replaced by “A”, that is the meaning of line 2696. And if "D AND B" doesn’t equal to D, split the statement into "C", "D AND B", which is the purpose of line 2690 to 2696.
At line 2704 simplify_test_exp_in_temp will do more general simplication. We will it later.
9.3.6. Read in DEFINE_CPU_UNIT pattern
Section G.1.3 Overview of DEFINE_INSN_RESERVATION pattern describes the detail of define_cpu_unit pattern. For this pattern, we use following example:
88 (define_cpu_unit "pentium-portu,pentium-portv" "pentium") in pentium.md
Begin with this pattern, there are patterns used to construct DFA-based pipeline hazards recognizer, which is the new ones compared with instruction level parallelism description we just see above. For the merit of this new ones, also can refer to gccint, section 10.18.8.3. This pattern shouldn’t coexist with define_function_unit in the same machine description file
After treated by init_md_reader_args , above pattern will be loaded into memory as rtx object as following.
figure 43 : example of DEFINE_CPU_UNIT pattern
rtx form is not what we want, we need gen_cpu_unit to pack the data.
1652 void
1653 gen_cpu_unit (rtx def) in genattrtab.c
1654 {
1655 decl_t decl;
1656 char **str_cpu_units;
1657 int vect_length;
1658 int i;
1659
1660 str_cpu_units = get_str_vect ((char *) XSTR (def, 0), &vect_length, ',',
1661 FALSE);
1662 if (str_cpu_units == NULL)
1663 fatal ("invalid string `%s' in define_cpu_unit", XSTR (def, 0));
1664 for (i = 0; i < vect_length; i++)
1665 {
1666 decl = create_node (sizeof (struct decl));
1667 decl->mode = dm_unit;
1668 decl->pos = 0;
1669 DECL_UNIT (decl)->name = check_name (str_cpu_units [i], decl->pos);
1670 DECL_UNIT (decl)->automaton_name = (char *) XSTR (def, 1);
1671 DECL_UNIT (decl)->query_p = 0;
1672 DECL_UNIT (decl)->min_occ_cycle_num = -1;
1673 DECL_UNIT (decl)->in_set_p = 0;
1674 VLA_PTR_ADD (decls , decl);
1675 num_dfa_decls ++;
1676 }
1677 }
All unit informations are saved in a list pointed by decls , which is of type as following.
907 struct decl in genautomata.c
908 {
909 /* What node in the union? */
910 enum decl_mode mode;
911 pos_t pos;
912 union
913 {
914 struct unit_decl unit;
915 struct bypass_decl bypass;
916 struct automaton_decl automaton;
917 struct excl_rel_decl excl;
918 struct unit_pattern_rel_decl presence;
919 struct unit_pattern_rel_decl absence;
920 struct reserv_decl reserv;
921 struct insn_reserv_decl insn_reserv;
922 } decl;
923 };
196 typedef struct decl *decl_t ; in genautomata.c
decl is the type shared by all patterns for constructing DFA-based pipeline hazards recognizer. For define_cup_unit pattern, unit_decl field is used.
727 struct unit_decl in genautomata.c
728 {
729 char *name;
730 /* NULL if the automaton name is absent. */
731 char *automaton_name;
732 /* If the following value is not zero, the cpu unit reservation is
733 described in define_query_cpu_unit. */
734 char query_p;
735
736 /* The following fields are defined by checker. */
737
738 /* The following field value is nonzero if the unit is used in an
739 regexp. */
740 char unit_is_used;
741
742 /* The following field value is order number (0, 1, ...) of given
743 unit. */
744 int unit_num;
745 /* The following field value is corresponding declaration of
746 automaton which was given in description. If the field value is
747 NULL then automaton in the unit declaration was absent. */
748 struct automaton_decl *automaton_decl;
749 /* The following field value is maximal cycle number (1, ...) on
750 which given unit occurs in insns. Zero value means that given
751 unit is not used in insns. */
752 int max_occ_cycle_num;
753 /* The following field value is minimal cycle number (0, ...) on
754 which given unit occurs in insns. -1 value means that given
755 unit is not used in insns. */
756 int min_occ_cycle_num;
757 /* The following list contains units which conflict with given
758 unit. */
759 unit_set_el_t excl_list;
760 /* The following list contains patterns which are required to
761 reservation of given unit. */
762 pattern_set_el_t presence_list;
763 pattern_set_el_t final_presence_list;
764 /* The following list contains patterns which should be not present
765 i n reservation for given unit. */
766 pattern_set_el_t absence_list;
767 pattern_set_el_t final_absence_list;
768 /* The following is used only when `query_p' has nonzero value.
769 This is query number for the unit. */
770 int query_num;
771 /* The following is the last cycle on which the unit was checked for
772 correct distributions of units to automata in a regexp. */
773 int last_distribution_check_cycle;
774
775 /* The following fields are defined by automaton generator. */
776
777 /* The following field value is number of the automaton to which
778 given unit belongs. */
779 int corresponding_automaton_num;
780 /* If the following value is not zero, the cpu unit is present in a
781 `exclusion_set' or in right part of a `presence_set',
782 `final_presence_set', `absence_set', and
783 `final_absence_set'define_query_cpu_unit. */
784 char in_set_p;
785 };
At line 1660 above, get_str_vec fetches the units’ name separated by ",". Macro DECL_UNIT above will access unit_decl field of decl.