从GIMPLE到RTL
从GIMPLE到RTL
GIMPLE是一种与前端编程语言和后端目标机器无关的中间表示形式,为了实现对多种目标机器的额支持,GCC引入了RTL。这里的从GIMPLE到RTL,准确的讲是从GIMPLE到IR-RTL的转换,也就是GIMPLE转换成insn的序列过程。
表示insn的RTX包含下列6中RTX表达式:
DEF_RTL_EXPR(INSN, "insn", "iuuBieie", RTX_INSN)
DEF_RTL_EXPR(JUMP_INSN, "jump_insn", "iuuBieie0", RTX_INSN)
DEF_RTL_EXPR(CALL_INSN, "call_insn", "iuuBieiee", RTX_INSN)
DEF_RTL_EXPR(BARRIER, "barrier", "iuu00000", RTX_EXTRA)
DEF_RTL_EXPR(CODE_LABEL, "code_label", "iuuB00is", RTX_EXTRA)
DEF_RTL_EXPR(NOTE, "note", "iuuB0ni", RTX_EXTRA)
GIMPLE序列
在GIMPLE序列生成之后,GCC在GIMPLE中间格式上进行了各种各样的与目标机器无关的处理和优化,这些处理被组织成一系列的处理过程(Pass),其中针对GIMPLE的最后一个关键处理过程为pass_expand,该Pass就完成了GIMPLE向RTL的转换,即由GIMPLE中间结果生成RTL形式的insn。从GIMPLE向RTL的转换过程是一个从机器无关信息向机器相关信息的转换。
测试代码:
int main(int argc, char *argv[]) {
int i = 0;
int sum = 0;
for (int i = 0; i < 10; i++) {
sum = sum + i;
}
return sum;
}
CFG如下所示:
<bb 2> :
gimple_assign <integer_cst, i, 0, NULL, NULL>
gimple_assign <integer_cst, sum, 0, NULL, NULL>
gimple_assign <integer_cst, i, 0, NULL, NULL>
goto <bb 4>; [INV]
<bb 3> :
gimple_assign <plus_expr, sum, sum, i, NULL>
gimple_assign <plus_expr, i, i, 1, NULL>
<bb 4> :
gimple_cond <le_expr, i, 9, NULL, NULL>
goto <bb 3>; [INV]
else
goto <bb 5>; [INV]
<bb 5> :
gimple_assign <var_decl, D.1954, sum, NULL, NULL>
<bb 6> :
gimple_label <<L3>>
gimple_return <D.1954>
一些典型数据结构
在以函数为单位进行RTL生成时,需要当前函数的RTL信息进行维护,主要是通过rtl_data这个结构体来描述的,在function.h中描述定义:
/* Datastructures maintained for currently processed function in RTL form. */
struct rtl_data GTY(())
{
struct expr_status expr;
struct emit_status emit;
struct varasm_status varasm;
struct incoming_args args;
struct function_subsections subsections;
struct rtl_eh eh;
/* For function.c */
/* # of bytes of outgoing arguments. If ACCUMULATE_OUTGOING_ARGS is
defined, the needed space is pushed by the prologue. */
int outgoing_args_size;
/* If nonzero, an RTL expression for the location at which the current
function returns its result. If the current function returns its
result in a register, current_function_return_rtx will always be
the hard register containing the result. */
rtx return_rtx;
/* Opaque pointer used by get_hard_reg_initial_val and
has_hard_reg_initial_val (see integrate.[hc]). */
struct initial_value_struct *hard_reg_initial_vals;
/* A variable living at the top of the frame that holds a known value.
Used for detecting stack clobbers. */
tree stack_protect_guard;
/* List (chain of EXPR_LIST) of labels heading the current handlers for
nonlocal gotos. */
rtx x_nonlocal_goto_handler_labels;
/* Label that will go on function epilogue.
Jumping to this label serves as a "return" instruction
on machines which require execution of the epilogue on all returns. */
rtx x_return_label;
/* Label that will go on the end of function epilogue.
Jumping to this label serves as a "naked return" instruction
on machines which require execution of the epilogue on all returns. */
rtx x_naked_return_label;
/* List (chain of EXPR_LISTs) of all stack slots in this function.
Made for the sake of unshare_all_rtl. */
rtx x_stack_slot_list;
/* Place after which to insert the tail_recursion_label if we need one. */
rtx x_stack_check_probe_note;
/* Location at which to save the argument pointer if it will need to be
referenced. There are two cases where this is done: if nonlocal gotos
exist, or if vars stored at an offset from the argument pointer will be
needed by inner routines. */
rtx x_arg_pointer_save_area;
/* Dynamic Realign Argument Pointer used for realigning stack. */
rtx drap_reg;
/* Offset to end of allocated area of stack frame.
If stack grows down, this is the address of the last stack slot allocated.
If stack grows up, this is the address for the next slot. */
HOST_WIDE_INT x_frame_offset;
/* Insn after which register parms and SAVE_EXPRs are born, if nonopt. */
rtx x_parm_birth_insn;
/* List of all used temporaries allocated, by level. */
VEC(temp_slot_p,gc) *x_used_temp_slots;
/* List of available temp slots. */
struct temp_slot *x_avail_temp_slots;
/* Current nesting level for temporaries. */
int x_temp_slot_level;
/* The largest alignment needed on the stack, including requirement
for outgoing stack alignment. */
unsigned int stack_alignment_needed;
/* Preferred alignment of the end of stack frame, which is preferred
to call other functions. */
unsigned int preferred_stack_boundary;
/* The minimum alignment of parameter stack. */
unsigned int parm_stack_boundary;
/* The largest alignment of slot allocated on the stack. */
unsigned int max_used_stack_slot_alignment;
/* The stack alignment estimated before reload, with consideration of
following factors:
1. Alignment of local stack variables (max_used_stack_slot_alignment)
2. Alignment requirement to call other functions
(preferred_stack_boundary)
3. Alignment of non-local stack variables but might be spilled in
local stack. */
unsigned int stack_alignment_estimated;
/* For reorg. */
/* If some insns can be deferred to the delay slots of the epilogue, the
delay list for them is recorded here. */
rtx epilogue_delay_list;
/* Nonzero if function being compiled called builtin_return_addr or
builtin_frame_address with nonzero count. */
bool accesses_prior_frames;
/* Nonzero if the function calls __builtin_eh_return. */
bool calls_eh_return;
/* Nonzero if function saves all registers, e.g. if it has a nonlocal
label that can reach the exit block via non-exceptional paths. */
bool saves_all_registers;
/* Nonzero if function being compiled has nonlocal gotos to parent
function. */
bool has_nonlocal_goto;
/* Nonzero if function being compiled has an asm statement. */
bool has_asm_statement;
/* This bit is used by the exception handling logic. It is set if all
calls (if any) are sibling calls. Such functions do not have to
have EH tables generated, as they cannot throw. A call to such a
function, however, should be treated as throwing if any of its callees
can throw. */
bool all_throwers_are_sibcalls;
/* Nonzero if stack limit checking should be enabled in the current
function. */
bool limit_stack;
/* Nonzero if profiling code should be generated. */
bool profile;
/* Nonzero if the current function uses the constant pool. */
bool uses_const_pool;
/* Nonzero if the current function uses pic_offset_table_rtx. */
bool uses_pic_offset_table;
/* Nonzero if the current function needs an lsda for exception handling. */
bool uses_eh_lsda;
/* Set when the tail call has been produced. */
bool tail_call_emit;
/* Nonzero if code to initialize arg_pointer_save_area has been emitted. */
bool arg_pointer_save_area_init;
/* Nonzero if current function must be given a frame pointer.
Set in global.c if anything is allocated on the stack there. */
bool frame_pointer_needed;
/* When set, expand should optimize for speed. */
bool maybe_hot_insn_p;
/* Nonzero if function stack realignment is needed. This flag may be
set twice: before and after reload. It is set before reload wrt
stack alignment estimation before reload. It will be changed after
reload if by then criteria of stack realignment is different.
The value set after reload is the accurate one and is finalized. */
bool stack_realign_needed;
/* Nonzero if function stack realignment is tried. This flag is set
only once before reload. It affects register elimination. This
is used to generate DWARF debug info for stack variables. */
bool stack_realign_tried;
/* Nonzero if function being compiled needs dynamic realigned
argument pointer (drap) if stack needs realigning. */
bool need_drap;
/* Nonzero if function stack realignment estimation is done, namely
stack_realign_needed flag has been set before reload wrt estimated
stack alignment info. */
bool stack_realign_processed;
/* Nonzero if function stack realignment has been finalized, namely
stack_realign_needed flag has been set and finalized after reload. */
bool stack_realign_finalized;
/* True if dbr_schedule has already been called for this function. */
bool dbr_scheduled_p;
};
对rtl_data结构体某些字段的访问通过以下宏去完成:
#define return_label (crtl->x_return_label)
#define naked_return_label (crtl->x_naked_return_label)
#define stack_slot_list (crtl->x_stack_slot_list)
#define parm_birth_insn (crtl->x_parm_birth_insn)
#define frame_offset (crtl->x_frame_offset)
#define stack_check_probe_note (crtl->x_stack_check_probe_note)
#define arg_pointer_save_area (crtl->x_arg_pointer_save_area)
#define used_temp_slots (crtl->x_used_temp_slots)
#define avail_temp_slots (crtl->x_avail_temp_slots)
#define temp_slot_level (crtl->x_temp_slot_level)
#define nonlocal_goto_handler_labels (crtl->x_nonlocal_goto_handler_labels)
#define frame_pointer_needed (crtl->frame_pointer_needed)
#define stack_realign_fp (crtl->stack_realign_needed && !crtl->need_drap)
#define stack_realign_drap (crtl->stack_realign_needed && crtl->need_drap)
emit是rtl_data结构体中比较重要的一个成员,如下所示,其中包含了当前函数正在处理的insn序列( rtx x_first_insn、
rtx x_last_insn、 int x_cur_insn_uid):
struct emit_status GTY(())
{
/* This is reset to LAST_VIRTUAL_REGISTER + 1 at the start of each function.
After rtl generation, it is 1 plus the largest register number used. */
int x_reg_rtx_no;
/* Lowest label number in current function. */
int x_first_label_num;
/* The ends of the doubly-linked chain of rtl for the current function.
Both are reset to null at the start of rtl generation for the function.
start_sequence saves both of these on `sequence_stack' and then starts
a new, nested sequence of insns. */
rtx x_first_insn;
rtx x_last_insn;
/* Stack of pending (incomplete) sequences saved by `start_sequence'.
Each element describes one pending sequence.
The main insn-chain is saved in the last element of the chain,
unless the chain is empty. */
struct sequence_stack *sequence_stack;
/* INSN_UID for next insn emitted.
Reset to 1 for each function compiled. */
int x_cur_insn_uid;
/* Location the last line-number NOTE emitted.
This is used to avoid generating duplicates. */
location_t x_last_location;
/* The length of the regno_pointer_align, regno_decl, and x_regno_reg_rtx
vectors. Since these vectors are needed during the expansion phase when
the total number of registers in the function is not yet known, the
vectors are copied and made bigger when necessary. */
int regno_pointer_align_length;
/* Indexed by pseudo register number, if nonzero gives the known alignment
for that pseudo (if REG_POINTER is set in x_regno_reg_rtx).
Allocated in parallel with x_regno_reg_rtx. */
unsigned char * GTY((skip)) regno_pointer_align;
};
当然可以发现,emit-rtl.c中定义了如下的宏,用来访问当前函数正在处理的insn序列:
#define first_insn (crtl->emit.x_first_insn)
#define last_insn (crtl->emit.x_last_insn)
#define cur_insn_uid (crtl->emit.x_cur_insn_uid)
#define last_location (crtl->emit.x_last_location)
#define first_label_num (crtl->emit.x_first_label_num)
RTL生成的基本过程
RTL的内部表示是从GIMPLE形式转化而来的,是程序代码另外一种规范的中间表示,记为IR-RTL,目标机器对应的汇编代码就是在IR-RTL基础上生成的。
本质上程序代码的RTL中间表示就是双向链表所链接的insn链表,包括了insn、jump_insn、call_insn、barrier、code_label以及note六种RTX表达形式。
下面看下GIMPLE处理的关键过程(Pass),rtl_opt_pass pass_expand完成了GIMPLE到RTL的转换,该Pass的声明如下,其处理的入口函数为gcc/cfgexpand.c中的gimple_expand_cfg函数:
struct rtl_opt_pass pass_expand =
{
{
RTL_PASS,
"expand", /* name */
NULL, /* gate */
gimple_expand_cfg, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_EXPAND, /* tv_id */
/* ??? If TER is enabled, we actually receive GENERIC. */
PROP_gimple_leh | PROP_cfg, /* properties_required */
PROP_rtl, /* properties_provided */
PROP_trees, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func, /* todo_flags_finish */
}
};
函数调用栈如下:
struct rtl_opt_pass pass_expand =
{
{
RTL_PASS,
"expand", /* name */
NULL, /* gate */
gimple_expand_cfg, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_EXPAND, /* tv_id */
/* ??? If TER is enabled, we actually receive GENERIC. */
PROP_gimple_leh | PROP_cfg, /* properties_required */
PROP_rtl, /* properties_provided */
PROP_trees, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func, /* todo_flags_finish */
}
};
注:在gcc中,GIMPLE到RTL的转换是以函数为单位进行的,每当gcc语法分析完一个函数后,就构建起了该函数的AST,然后对该AST进行规范化并转换成GIMPLE语句。GCC针对该函数的GIMPLE中间表示进行各种优化处理,最后,再执行pass_expand将每个函数的GIMPLE序列转换成RTL序列。
gimple_expand_cfg函数如下:
/* Translate the intermediate representation contained in the CFG
from GIMPLE trees to RTL.
We do conversion per basic block and preserve/update the tree CFG.
This implies we have to do some magic as the CFG can simultaneously
consist of basic blocks containing RTL and GIMPLE trees. This can
confuse the CFG hooks, so be careful to not manipulate CFG during
the expansion. */
static unsigned int
gimple_expand_cfg (void)
{
basic_block bb, init_block;
sbitmap blocks;
edge_iterator ei;
edge e;
/* Some backends want to know that we are expanding to RTL. */
currently_expanding_to_rtl = 1;
rtl_profile_for_bb (ENTRY_BLOCK_PTR);
insn_locators_alloc ();
if (!DECL_BUILT_IN (current_function_decl))
{
/* Eventually, all FEs should explicitly set function_start_locus. */
if (cfun->function_start_locus == UNKNOWN_LOCATION)
set_curr_insn_source_location
(DECL_SOURCE_LOCATION (current_function_decl));
else
set_curr_insn_source_location (cfun->function_start_locus);
}
set_curr_insn_block (DECL_INITIAL (current_function_decl));
prologue_locator = curr_insn_locator ();
/* Make sure first insn is a note even if we don't want linenums.
This makes sure the first insn will never be deleted.
Also, final expects a note to appear there. */
emit_note (NOTE_INSN_DELETED);
/* Mark arrays indexed with non-constant indices with TREE_ADDRESSABLE. */
discover_nonconstant_array_refs ();
targetm.expand_to_rtl_hook ();
crtl->stack_alignment_needed = STACK_BOUNDARY;
crtl->max_used_stack_slot_alignment = STACK_BOUNDARY;
crtl->stack_alignment_estimated = STACK_BOUNDARY;
crtl->preferred_stack_boundary = STACK_BOUNDARY;
cfun->cfg->max_jumptable_ents = 0;
/* Expand the variables recorded during gimple lowering. */
expand_used_vars ();
/* Honor stack protection warnings. */
if (warn_stack_protect)
{
if (cfun->calls_alloca)
warning (OPT_Wstack_protector,
"not protecting local variables: variable length buffer");
if (has_short_buffer && !crtl->stack_protect_guard)
warning (OPT_Wstack_protector,
"not protecting function: no buffer at least %d bytes long",
(int) PARAM_VALUE (PARAM_SSP_BUFFER_SIZE));
}
/* Set up parameters and prepare for return, for the function. */
expand_function_start (current_function_decl);
/* If this function is `main', emit a call to `__main'
to run global initializers, etc. */
if (DECL_NAME (current_function_decl)
&& MAIN_NAME_P (DECL_NAME (current_function_decl))
&& DECL_FILE_SCOPE_P (current_function_decl))
expand_main_function ();
/* Initialize the stack_protect_guard field. This must happen after the
call to __main (if any) so that the external decl is initialized. */
if (crtl->stack_protect_guard)
stack_protect_prologue ();
/* Update stack boundary if needed. */
if (SUPPORTS_STACK_ALIGNMENT)
{
/* Call update_stack_boundary here to update incoming stack
boundary before TARGET_FUNCTION_OK_FOR_SIBCALL is called.
TARGET_FUNCTION_OK_FOR_SIBCALL needs to know the accurate
incoming stack alignment to check if it is OK to perform
sibcall optimization since sibcall optimization will only
align the outgoing stack to incoming stack boundary. */
if (targetm.calls.update_stack_boundary)
targetm.calls.update_stack_boundary ();
/* The incoming stack frame has to be aligned at least at
parm_stack_boundary. */
gcc_assert (crtl->parm_stack_boundary <= INCOMING_STACK_BOUNDARY);
}
/* Register rtl specific functions for cfg. */
rtl_register_cfg_hooks ();
init_block = construct_init_block ();
/* Clear EDGE_EXECUTABLE on the entry edge(s). It is cleaned from the
remaining edges in expand_gimple_basic_block. */
FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
e->flags &= ~EDGE_EXECUTABLE;
lab_rtx_for_bb = pointer_map_create ();
FOR_BB_BETWEEN (bb, init_block->next_bb, EXIT_BLOCK_PTR, next_bb)
bb = expand_gimple_basic_block (bb);
/* Expansion is used by optimization passes too, set maybe_hot_insn_p
conservatively to true until they are all profile aware. */
pointer_map_destroy (lab_rtx_for_bb);
free_histograms ();
construct_exit_block ();
set_curr_insn_block (DECL_INITIAL (current_function_decl));
insn_locators_finalize ();
/* We're done expanding trees to RTL. */
currently_expanding_to_rtl = 0;
/* Convert tree EH labels to RTL EH labels and zap the tree EH table. */
convert_from_eh_region_ranges ();
set_eh_throw_stmt_table (cfun, NULL);
rebuild_jump_labels (get_insns ());
find_exception_handler_labels ();
blocks = sbitmap_alloc (last_basic_block);
sbitmap_ones (blocks);
find_many_sub_basic_blocks (blocks);
purge_all_dead_edges (