9.6.4.1.3. 输出状态替代表
在自动机的生成一节中看到,不同等效类别的指令从某个状态触发相同的状态迁移是可能的。这些指令被定义为状态替代(state alternative)。为了最小化自动机,以及把NDFA转换为DFA,这些状态将被合并为一个复合状态,而域state_alts记录了这些状态的数目。状态替代相关的表及函数都是调试目的的。
7779 static void
7780 output_state_alts_table (automaton_t automaton) ingenautomata.c
7781 {
7782 state_t *state_ptr;
7783 arc_t arc;
7784 vla_hwint_t state_alts_vect;
7785
7786 undefined_vect_el_value = 0; /* no alts when transition is not possible */
7787 automaton->state_alts_table = create_state_ainsn_table (automaton);
7788 /* Create vect ofpointers to states ordered by num of transitions
7789 from the state (state with the maximum numis the first). */
7790 VLA_PTR_CREATE (output_states_vect, 1500,"output states vector");
7791 pass_states(automaton, add_states_vect_el);
7792 qsort (VLA_PTR_BEGIN (output_states_vect),
7793 VLA_PTR_LENGTH (output_states_vect),
7794 sizeof (state_t), compare_transition_els_num);
7795 /* Create base,comb, and check vectors. */
7796 VLA_HWINT_CREATE (state_alts_vect, 500,"state alts vector");
7797 for(state_ptr = VLA_PTR_BEGIN (output_states_vect);
7798 state_ptr <= (state_t*) VLA_PTR_LAST (output_states_vect);
7799 state_ptr++)
7800 {
7801 VLA_HWINT_NULLIFY (state_alts_vect);
7802 for (arc =first_out_arc (*state_ptr);
7803 arc != NULL;
7804 arc = next_out_arc (arc))
7805 {
7806 if (arc->insn == NULL)
7807 abort ();
7808 if(arc->insn->first_ainsn_with_given_equialence_num)
7809 add_vect_el(&state_alts_vect, arc->insn, arc->state_alts);
7810 }
7811 add_vect(automaton->state_alts_table, (*state_ptr)->order_state_num,
7812 VLA_HWINT_BEGIN (state_alts_vect),
7813 VLA_HWINT_LENGTH(state_alts_vect));
7814 }
7816 (automaton->state_alts_table, (char *)"state insn alternatives",
7817 output_state_alts_full_vect_name, output_state_alts_comb_vect_name,
7818 output_state_alts_check_vect_name, output_state_alts_base_vect_name);
7819 VLA_PTR_DELETE (output_states_vect);
7820 VLA_HWINT_DELETE (state_alts_vect);
7821 }
在7809行,state_alts记录了替代预订(alternative reservation)的数目,它可以被用于从给定状态由给定指令触发的迁移。state_alts的处理与我们之前看到的asinsn的处理十分类似。那么在输出用于state_alts的代码之后,我们得到以下结果,假定这次在state_ainsn_table中使用full_vect,并且我们具有少于2568256(256*256)个有效状态。
/*Vector translating external insn codes to internal ones.*/
static const unsigned char translate_0 []ATTRIBUTE_UNUSED = {
`equivalent class number` //indescription order
};
/*Comb vector for state transitions */
staticconst unsigned char transitions_0 [] ATTRIBUTE_UNUSED = {
`content of comb_vect` //for ainsn, referto add_vect
};
/*Check vector for state transitions */
staticconst unsigned char check_0 [] ATTRIBUTE_UNUSED = {
`content of check_vect` //for ainsn, refer to add_vect
};
/* Basevector for state transitions */
staticconst unsigned char base_0 [] ATTRIBUTE_UNUSED = {
`content of base_vect` //for ainsn, referto add_vect
};
#if AUTOMATON_STATE_ALTS
/* Vector for state insn alternatives */
staticconst unsigned short state_alts_0 [] ATTRIBUTE_UNUSED = {
`content of full_vect` // forstate_alts, refer to add_vect
};
#endif
9.6.4.1.4. 输出发布延迟表
为了有效率地发布指令,我们必须知道在功能单元上指令间的发布延迟。在define_function_unit模式中,发布延迟被封闭在模式声明里。不过,对于基于自动机的描述,在其生成期间,我们已经构建了所有的状态,并把它们与关联的指令绑定。通过遍历这些状态,我们可以收集发布延迟的信息,而不需要把它们封装入声明。显然,这个方式更灵活、强大。
7892 static void
7893 output_min_issue_delay_table (automaton_t automaton) ingenautomata.c
7894 {
7895 vla_hwint_t min_issue_delay_vect;
7896 vla_hwint_t compressed_min_issue_delay_vect;
7897 vect_el_t min_delay;
7898 ainsn_t ainsn;
7899 state_t *state_ptr;
7900 int i;
7901
7902 /* Create vect ofpointers to states ordered by num of transitions
7903 from the state (state with the maximum numis the first). */
7904 VLA_PTR_CREATE (output_states_vect, 1500,"output states vector");
7905 pass_states(automaton, add_states_vect_el);
7906 VLA_HWINT_CREATE (min_issue_delay_vect, 1500,"min issue delay vector");
7907 VLA_HWINT_EXPAND (min_issue_delay_vect,
7908 VLA_HWINT_LENGTH (output_states_vect)
7909 *automaton->insn_equiv_classes_num);
7910 for (i = 0;
7911 i < ((int) VLA_HWINT_LENGTH (output_states_vect)
7912 *automaton->insn_equiv_classes_num);
7913 i++)
7914 VLA_HWINT (min_issue_delay_vect, i) = 0;
7915 automaton->max_min_delay = 0;
7916 for (ainsn =automaton->ainsn_list; ainsn != NULL; ainsn = ainsn->next_ainsn)
7917 if(ainsn->first_ainsn_with_given_equialence_num)
7918 {
7919 for (state_ptr = VLA_PTR_BEGIN (output_states_vect);
7920 state_ptr <= (state_t*) VLA_PTR_LAST (output_states_vect);
7921 state_ptr++)
7922 (*state_ptr)->min_insn_issue_delay =-1;
7923 for (state_ptr =VLA_PTR_BEGIN (output_states_vect);
7924 state_ptr <= (state_t*) VLA_PTR_LAST (output_states_vect);
7925 state_ptr++)
7926 {
7927 min_delay = min_issue_delay(*state_ptr, ainsn);
7928 if (automaton->max_min_delay <min_delay)
7929 automaton->max_min_delay =min_delay;
7930 VLA_HWINT (min_issue_delay_vect,
7931 (*state_ptr)->order_state_num
7932 *automaton->insn_equiv_classes_num
7933 +ainsn->insn_equiv_class_num) = min_delay;
7934 }
7935 }
7936 fprintf (output_file, "/* Vector of min issue delayof insns. */\n");
7937 fprintf (output_file, "static const ");
7938 output_range_type(output_file,0, automaton->max_min_delay);
7939 fprintf (output_file, " ");
7940 output_min_issue_delay_vect_name (output_file,automaton);
7941 fprintf (output_file, "[] ATTRIBUTE_UNUSED ={\n");
7942 /* Compress thevector. */
7943 if (automaton->max_min_delay < 2)
7944 automaton->min_issue_delay_table_compression_factor = 8;
7945 else if (automaton->max_min_delay < 4)
7946 automaton->min_issue_delay_table_compression_factor= 4;
7947 else if (automaton->max_min_delay < 16)
7948 automaton->min_issue_delay_table_compression_factor = 2;
7949 else
7950 automaton->min_issue_delay_table_compression_factor = 1;
7951 VLA_HWINT_CREATE (compressed_min_issue_delay_vect,1500,
7952 "compressed minissue delay vector");
7953 VLA_HWINT_EXPAND(compressed_min_issue_delay_vect,
7954 (VLA_HWINT_LENGTH(min_issue_delay_vect)
7955 + automaton->min_issue_delay_table_compression_factor
7956 - 1)
7957 /automaton->min_issue_delay_table_compression_factor);
7958 for (i = 0;
7959 i < (int) VLA_HWINT_LENGTH(compressed_min_issue_delay_vect);
7960 i++)
7961 VLA_HWINT (compressed_min_issue_delay_vect,i) = 0;
7962 for (i = 0; i< (int) VLA_HWINT_LENGTH (min_issue_delay_vect); i++)
7963 VLA_HWINT (compressed_min_issue_delay_vect,
7964 i / automaton->min_issue_delay_table_compression_factor)
7965 |= (VLA_HWINT(min_issue_delay_vect, i)
7966 << (8 - (i %automaton->min_issue_delay_table_compression_factor
7967 + 1)
7968 * (8 / automaton->min_issue_delay_table_compression_factor)));
7969 output_vect(VLA_HWINT_BEGIN (compressed_min_issue_delay_vect),
7970 VLA_HWINT_LENGTH(compressed_min_issue_delay_vect));
7971 fprintf (output_file, "};\n\n");
7972 VLA_PTR_DELETE (output_states_vect);
7973 VLA_HWINT_DELETE (min_issue_delay_vect);
7974 VLA_HWINT_DELETE(compressed_min_issue_delay_vect);
7975 }
已经知道指令被依据所预订资源分组,并且仅一个组内第一个找到的指令设置了域first_ainsn_with_given_equialence_num。至于同属于一个等效类别的指令,它们可以从相同的状态发布,即如果从某个状态可以发布其中一个,其它亦可以发布。
那么在7916行的FOR循环尝试找出在指令组中,到指定状态的最小发布延迟。
7873 static int
7874 min_issue_delay (state_tstate, ainsn_t ainsn) ingenautomata.c
7875 {
7876 curr_state_pass_num++;
7877 state->min_insn_issue_delay = min_issue_delay_pass_states (state,ainsn);
7878 returnstate->min_insn_issue_delay;
7879 }
看到参数state首先把min_insn_issue_delay设置为-1来防止多次处理。
7831 static int
7832 min_issue_delay_pass_states (state_t state, ainsn_t ainsn)
7833 {
7834 arc_t arc;
7835 int min_insn_issue_delay, insn_issue_delay;
7836
7837 if (state->state_pass_num == curr_state_pass_num
7838 || state->min_insn_issue_delay != -1)
7839 /* We've enteredinto a loop or already have the correct value for
7840 given state and ainsn. */
7841 returnstate->min_insn_issue_delay;
7842 state->state_pass_num = curr_state_pass_num;
7843 min_insn_issue_delay = -1;
7844 for (arc =first_out_arc (state); arc != NULL; arc = next_out_arc (arc))
7845 if (arc->insn == ainsn)
7846 {
7847 min_insn_issue_delay = 0;
7848 break;
7849 }
7850 else
7851 {
7852 insn_issue_delay = min_issue_delay_pass_states(arc->to_state, ainsn);
7853 if (insn_issue_delay != -1)
7854 {
7855 if (arc->insn->insn_reserv_decl
7856 == DECL_INSN_RESERV (advance_cycle_insn_decl))
7857 insn_issue_delay++;
7858 if (min_insn_issue_delay == -1
7859 || min_insn_issue_delay >insn_issue_delay)
7860 {
7861 min_insn_issue_delay = insn_issue_delay;
7862 if (insn_issue_delay == 0)
7863 break;
7864 }
7865 }
7866 }
7867 returnmin_insn_issue_delay;
7868 }
在7847行,该函数一直递归直到找到目标指令,然后在从嵌套调用返回期间,每个碰到的advance_cycle_insn_decl都会递增延迟。注意同一个组内除了第一条的其它指令在查找中都不起作用,它们总是返回-1。
回到output_min_issue_delay_table,在7929行,max_min_delay记录了整个状态集的最大值。根据这个值我们可以通过把几个值保存入一个int类型的数据来压缩发布延迟的这个vector。
那么我们得到如下的输出代码(红色部分不是工具输出的)。
/* Vector translating external insn codes tointernal ones.*/
static const unsigned char translate_0 []ATTRIBUTE_UNUSED = {
`equivalent class number` // descriptionorder
};
/*Comb vector for state transitions */
staticconst unsigned char transitions_0 [] ATTRIBUTE_UNUSED = {
`content of comb_vect` // for ainsn,refer to add_vect
};
/*Check vector for state transitions */
staticconst unsigned char check_0 [] ATTRIBUTE_UNUSED = {
`content of check_vect` // for ainsn, refer to add_vect
};
/* Basevector for state transitions */
staticconst unsigned char base_0 [] ATTRIBUTE_UNUSED = {
`content of base_vect` //for ainsn, referto add_vect
};
#if AUTOMATON_STATE_ALTS
/* Vector for state insn alternatives */
staticconst unsigned short state_alts_0 [] ATTRIBUTE_UNUSED = {
`content of full_vect` // forstate_alts, refer to add_vect
};
#endif
/* Vector of min issue delay of insns. */
staticconst unsigned char min_issue_delay_0 = []ATTRIBUTE_UNUSED = {
`content of compressed_min_issue_delay_vect` // refer to output_min_issue_delay_table
};
9.6.4.1.5. 输出锁定状态表
存在状态,在下一个周期,除了advance_cycle_insn_dec以外不能发布其它指令——即CPU直接进入下一周期。output_dead_lock_vect找出这些状态并为之构建表。在8007 ~8009行,看到对这些状态的判断。
7985 static void
7986 output_dead_lock_vect (automaton_t automaton) ingenautomata.c
7987 {
7988 state_t *state_ptr;
7989 arc_t arc;
7990 vla_hwint_t dead_lock_vect;
7991
7992 /* Create vect ofpointers to states ordered by num of
7993 transitions from the state (state with themaximum num is the
7994 first). */
7995 VLA_PTR_CREATE (output_states_vect, 1500,"output states vector");
7996 pass_states(automaton, add_states_vect_el);
7997 VLA_HWINT_CREATE (dead_lock_vect, 1500,"is dead locked vector");
7998 VLA_HWINT_EXPAND (dead_lock_vect,VLA_HWINT_LENGTH (output_states_vect));
7999 for(state_ptr = VLA_PTR_BEGIN (output_states_vect);
8000 state_ptr <= (state_t*) VLA_PTR_LAST (output_states_vect);
8001 state_ptr++)
8002 {
8003 arc = first_out_arc (*state_ptr);
8004 if (arc == NULL)
8005 abort ();
8006 VLA_HWINT (dead_lock_vect,(*state_ptr)->order_state_num)
8007 = (next_out_arc (arc) == NULL
8008 &&(arc->insn->insn_reserv_decl
8009 == DECL_INSN_RESERV (advance_cycle_insn_decl))? 1 : 0);
8010 #ifndef NDEBUG
8011 if (VLA_HWINT (dead_lock_vect,(*state_ptr)->order_state_num))
8012 locked_states_num++;
8013 #endif
8014 }
8015 fprintf (output_file, "/* Vector for locked stateflags. */\n");
8016 fprintf (output_file, "static const ");
8017 output_range_type(output_file,0, 1);
8018 fprintf (output_file, " ");
8019 output_dead_lock_vect_name (output_file,automaton);
8020 fprintf (output_file, "[] = {\n");
8021 output_vect(VLA_HWINT_BEGIN (dead_lock_vect),
8022 VLA_HWINT_LENGTH (dead_lock_vect));
8023 fprintf (output_file, "};\n\n");
8024 VLA_HWINT_DELETE (dead_lock_vect);
8025 VLA_PTR_DELETE (output_states_vect);
8026 }
从上面8009行,我们可以看到dead_lock_vect的内容是1或0,1表示不能发布任何有意义的指令,0则相反。
然后output_tables将为define_query_cpu_unit输出表。在这里我们忽略它。不过,这个过程与其它表的过程很类似。现在在输出文件中我们得到如下(红色部分不是工具输出的)。并假定自动机没有被预订的单元(没有define_query_cpu_unit)。
/* Vector translating external insn codes tointernal ones.*/
static const unsigned char translate_0 []ATTRIBUTE_UNUSED = {
`equivalent class number` // indescription order
};
/*Comb vector for state transitions */
staticconst unsigned char transitions_0 [] ATTRIBUTE_UNUSED = {
`content of comb_vect` // for ainsn,refer to add_vect
};
/*Check vector for state transitions */
staticconst unsigned char check_0 [] ATTRIBUTE_UNUSED = {
`content of check_vect` // for ainsn, refer to add_vect
};
/* Basevector for state transitions */
staticconst unsigned char base_0 [] ATTRIBUTE_UNUSED = {
`content of base_vect` // for ainsn, referto add_vect
};
#if AUTOMATON_STATE_ALTS
/* Vector for state insn alternatives */
staticconst unsigned short state_alts_0 [] ATTRIBUTE_UNUSED = {
`content of full_vect` // forstate_alts, refer to add_vect
};
#endif
/* Vector of min issue delay of insns. */
staticconst unsigned char min_issue_delay_0 = []ATTRIBUTE_UNUSED = {
`content of compressed_min_issue_delay_vect` // refer to output_min_issue_delay_table
};
/* Vector for locked state flags. */
staticconst unsigned char ‘dead_lock_0 = []ATTRIBUTE_UNUSED = {
`content of dead_lock_vect` // refer to output_dead_lock_vect
}