这个RISC CPU模型的微结构,CPU本身是使用SystemC建模的,基本都是可综合的语句,没有采用TLM方式建模。其指令集是基于商用RISC处理器和类似MMX的DSP程序指令定义的,它由39条之多的指令组成(算术、逻辑、分支、浮点、SIMD(类似MMX))。读入汇编程序并执行它,然后将结果写回寄存器/数据存储器。
通过这个例子的学习,可以习得指导实现硬件/软件的划分思路,设计者可以使用汇编程序编写算法,也可以使用YACC将算法转换为CPU的指令集。在CPU中运行评估需要多少周期,可以改变内存延迟或CPU行为,以获得更好的估计。 比如说,IDCT是要考虑的算法,设计者可以使用SystemC编写一个硬件IDCT模型,并确定IDCT在硬件中实现的复杂性和成本。 也许IDCT的一部分可以是硬件,一部分可以是软件。无论哪种情况,SystemC都是硬件/软件划分的有用工具。
此外,这个例子也可以解释为ISS(instruction set simulator),软件开发人员可以在芯片制造回来之前使用这个模型在开发周期的早期测试他们的软件。以下则是例子大致的框图和结构:
取指-->译码-->执行-->。。。。。。流水线的设计还是很明显的,指令执行中应该同时由数据的写回等操作,本次不讨论这个,重点讨论下Instruction Fetch取指IF模块。
1 顶层互联
直接上代码吧,顶层互联的情况来看,取指IF模块所取的输入由bios或者是paging给予。bios模块只是上电做些基础的功能,而取指IF的功能又分:PG指令地址产生、PS指令地址发送、PW指令访问等待、PR指令取指包。下面就分这三个来介绍下指令流。
fetch IFU("FETCH_BLOCK");
IFU.init_param(delay_cycles);
IFU << ram_dataout << branch_target_address << next_pc << branch_valid
<< stall_fetch << intreq << vectno << bios_valid << icache_valid << pred_fetch << pred_branch_address << pred_branch_valid
<< ram_cs
<< ram_we << addr << ram_datain << instruction << instruction_valid << program_counter
<< intack_cpu << branch_clear << pred_fetch_valid << reset << clk;
sc_trace(fp,IFU,"IFU");
bios BIOS("BIOS_BLOCK");
BIOS.init_param(delay_cycles);
BIOS.datain(ram_datain); // order independent
BIOS.cs(ram_cs);
BIOS.we(ram_we);
BIOS.addr(addr);
BIOS.dataout(ram_dataout);
BIOS.bios_valid(bios_valid);
BIOS.stall_fetch(stall_fetch);
BIOS.CLK(clk);
sc_trace(fp,BIOS,"BIOS");
paging PAGING("PAGING_BLOCK");
PAGING << ram_datain << ram_cs << ram_we << addr << icache_din << icache_validin << icache_stall
<< paging_dout << paging_csout << paging_weout << physical_address << ram_dataout << icache_valid << stall_fetch << clk ;
sc_trace(fp,PAGING,"PAGING");
icache ICACHE("ICACHE_BLOCK");
ICACHE.init_param(delay_cycles);
ICACHE << paging_dout << paging_csout << paging_weout << physical_address << pid_valid << pid_data
<< icache_din << icache_validin << icache_stall << clk;
sc_trace(fp,ICACHE,"ICACHE");
2 IFU
由于地址的产生在这个单元,所以先来分析一下它。另外PC计数以及指令的读入和输出皆是在这里。中断和分支指令将在下一次进行分析。这次只分析正常流。
struct fetch : sc_module {
sc_in<unsigned > ramdata; // instruction from RAM
sc_in<unsigned > branch_address; // branch target address
sc_in<bool> next_pc; // pc ++
sc_in<bool> branch_valid; // branch_valid
sc_in<bool> stall_fetch; // STALL_FETCH
sc_in<bool> interrupt; // interrrupt
sc_in<unsigned> int_vectno; // interrupt vector number
sc_in<bool> bios_valid; // BIOS input valid
sc_in<bool> icache_valid; // Icache input valid
sc_in<bool> pred_fetch; // branch prediction fetch
sc_in<unsigned > pred_branch_address; // branch target address
sc_in<bool> pred_branch_valid; // branch prediction fetch
sc_out<bool> ram_cs; // RAM chip select
sc_out<bool> ram_we; // RAM write enable for SMC
sc_out<unsigned > address; // address send to RAM
sc_out<unsigned > smc_instruction; // for self-modifying code
sc_out<unsigned> instruction; // instruction send to ID
sc_out<bool> instruction_valid; // inst valid
sc_out<unsigned > program_counter; // program counter
sc_out<bool> interrupt_ack; // interrupt acknowledge
sc_out<bool> branch_clear; // clear outstanding branch
sc_out<bool> pred_fetch_valid; // branch prediction fetch
sc_out<bool> reset; // reset
sc_in_clk CLK;
// Parameter
int memory_latency; // just a dummy for syntax
void init_param(int given_delay_cycles) { memory_latency = given_delay_cycles; }
//Constructor
SC_CTOR(fetch) {
SC_CTHREAD(entry, CLK.pos());
}
// Process functionality in member function below
void entry();
};
void fetch::entry()
{
unsigned addr_tmp=0;
unsigned datai_tmp=0;
unsigned lock_tmp = 0;
addr_tmp = 1;
// Now booting from default values
reset.write(true);
ram_cs.write(true);
ram_we.write(false);
address.write(addr_tmp);
wait(memory_latency); // For data to appear
do { wait(); }
while ( !((bios_valid == true) || (icache_valid == true)) );
if (stall_fetch.read() == true) {
datai_tmp = 0;
} else {
datai_tmp = ramdata.read();
}
instruction_valid.write(true);
instruction.write(datai_tmp);
program_counter.write(addr_tmp);
ram_cs.write(false);
wait();
instruction_valid.write(false);
addr_tmp++;
wait();
while (true) {
if (addr_tmp == 5) {
reset.write(false);
}
if (interrupt.read() == true) {
ram_cs.write(true);
addr_tmp = int_vectno.read();
ram_we.write(false);
wait(memory_latency);
datai_tmp = ramdata.read();
instruction_valid.write(true);
instruction.write(datai_tmp);
ram_cs.write(false);
interrupt_ack.write(true);
if (next_pc.read() == true) { addr_tmp++; }
wait();
instruction_valid.write(false);
interrupt_ack.write(false);
wait();
}
if (branch_valid.read() == true) {
lock_tmp ++;
ram_cs.write(true);
addr_tmp = branch_address.read();
ram_we.write(false);
wait(memory_latency);
do { wait(); } while ( !((bios_valid == true) || (icache_valid == true)) );
datai_tmp = ramdata.read();
instruction_valid.write(true);
instruction.write(datai_tmp);
ram_cs.write(false);
if (next_pc.read() == true) { addr_tmp++; }
wait();
instruction_valid.write(false);
wait();
}
else {
lock_tmp = 0;
ram_cs.write(true);
address.write(addr_tmp);
ram_we.write(false);
wait(memory_latency); // For data to appear
do { wait(); } while ( !((bios_valid == true) || (icache_valid == true)) );
datai_tmp = ramdata.read();
instruction_valid.write(true);
instruction.write(datai_tmp);
program_counter.write(addr_tmp);
branch_clear.write(false);
ram_cs.write(false);
if (next_pc.read() == true) { addr_tmp++; }
wait();
instruction_valid.write(false);
wait();
}
if (lock_tmp == 1) {
branch_clear.write(true);
wait();
}
}
} // end of entry function
前5条指令是boot的时间,IFU发出复位译码等后续的单元。这5条指令都是和BIOS在互动,每次指令地址产生后,立即置位ram_cs选通信号,等待3拍后待bios_valid有效时指令便取出由bios送来,再1拍将指令送出并置位指令有效,指数计数器加1.
这里附一波形,关于BIOS的分析见下,在这5条指令结束后,addr的地址也不会再落入这个范围了,BIOS就再也不会有有效指示了:
之后就是从icache中取指,过程与之前一样,每次指令地址产生后,立即置位ram_cs选通信号,等待3拍后待icache_valid有效时指令便取出由PAGING从cache中取出送来,再1拍将指令送出并置位指令有效,指数计数器加1.
这里附一波形,关于PG和cache的分析见下:
3 BIOS
BIOS开机设置的指令,一般是固化一个ROM芯片上的程序,示例中用了一个文件替代,存放了6条指令,至于做什么的可以见译码的分析(下一次吧)。
#define BOOT_LENGTH 5
struct bios : sc_module {
sc_in<unsigned > datain; // modified instruction
sc_in<bool> cs; // chip select
sc_in<bool> we; // write enable for SMC
sc_in<unsigned > addr; // physical address
sc_out<unsigned > dataout; // ram data out
sc_out<bool> bios_valid; // out valid
sc_out<bool> stall_fetch; // stall fetch if output not valid
sc_in_clk CLK;
// Parameter
unsigned *imemory; // BIOS program data memory
unsigned *itagmemory; // program tag memory (NOT USED)
int wait_cycles; // Cycle # it takes to access memory
void init_param(int given_delay_cycles) { wait_cycles = given_delay_cycles; }
//Constructor
SC_CTOR(bios) {
SC_CTHREAD(entry, CLK.pos());
// initialize instruction imemory from external file
FILE *fp = fopen("bios.img","r");
int size=0;
unsigned mem_word;
imemory = new unsigned[4000];
itagmemory = new unsigned[4000];
printf("** ALERT ** BIOS: initialize BIOS\n");
for (size = 0; size < 4000; size++) { // initialize bad data
imemory[size] = 0xffffffff;
itagmemory[size] = 0xffffffff;
}
size = 0;
while (fscanf(fp,"%x\n", &mem_word) != EOF) {
imemory[size] = mem_word;
itagmemory[size] = size;
size++;
}
}
// Process functionality in member function below
void entry();
};
void bios::entry()
{
unsigned address;
while (true) {
do { wait(); } while ( !(cs == true) );
address = addr.read();
if (address < BOOT_LENGTH) { // in BOOTING STAGE
if (we.read() == true) { // Write operation
wait(wait_cycles-1);
imemory[address] = datain.read();
}
else { // Read operation
if (wait_cycles > 2) {
wait(wait_cycles-2); // Introduce delay needed
}
dataout.write(imemory[address]);
if (PRINT_BIOS) {
printf("------------------------\n");
printf("BIOS: fetching mem[%d]\n", address);
printf("BIOS: (%0x)", imemory[address]);
cout.setf(ios::dec,ios::basefield);
cout << " at CSIM " << sc_time_stamp() << endl;
printf("------------------------\n");
}
bios_valid.write(true);
wait();
bios_valid.write(false);
wait();
}
}
else {
bios_valid.write(false);
wait();
}
}
} // end of entry function
代码中执行的基本就是Read的分支,这个addr是由IFU发出的,只有0、1、2、3、4的addr才会落在bios中,直接按照时序送出前5条指令。
4 PG和ICACHE
PG指令地址产生、PS指令地址发送、PW指令访问等待、PR指令取指包,Instruction Paging Unit这个模块和IFU结合便会把缓冲在icache中的指令取出。从第五条指令开始,IFU便开始执行icache中的指令了。
struct paging : sc_module {
sc_in<unsigned > paging_din; // input data
sc_in<bool> paging_csin; // chip select
sc_in<bool> paging_wein; // write enable
sc_in<unsigned > logical_address; // logical address
sc_in<unsigned > icache_din; // data from BIOS/icache
sc_in<bool> icache_validin; // data valid bit
sc_in<bool> icache_stall; // stall IFU if busy
sc_out<unsigned > paging_dout; // output data
sc_out<bool> paging_csout; // output cs to cache/BIOS
sc_out<bool> paging_weout; // write enable to cache/BIOS
sc_out<unsigned > physical_address; // physical address
sc_out<unsigned > dataout; // dataout from memory
sc_out<bool> data_valid; // data valid
sc_out<bool> stall_ifu; // stall IFU if busy
sc_in_clk CLK;
signed int pid_reg; //CPU process ID register
SC_CTOR(paging) {
SC_CTHREAD(entry, CLK.pos());
pid_reg = 0;
}
void entry();
};
void paging::entry()
{
int address=0;
int dataout_tmp =0;
while (true) {
do { wait(); }
while ( !(paging_csin == true) );
address = logical_address.read();
if (address >= 5) {
if (paging_wein.read() == true) { // Write operation
paging_dout.write(paging_din.read());
paging_csout.write(true);
paging_weout.write(true);
physical_address.write(logical_address.read());
wait();
paging_csout.write(false);
paging_weout.write(false);
}
else { // Read operation
paging_csout.write(true);
paging_weout.write(false);
physical_address.write(logical_address.read());
wait();
do { wait(); }
while ( !(icache_validin == true) );
dataout_tmp = icache_din.read();
if (PRINT_PU){
cout << "-----------------------" << endl;
printf( "PAGE : mem=%x\n",dataout_tmp);
cout << "PAGE : " ;
cout << " at CSIM " << sc_time_stamp() << endl;
cout << "-----------------------" << endl;
}
dataout.write(icache_din.read());
data_valid.write(true);
paging_csout.write(false);
wait();
data_valid.write(false);
wait();
}
}
}
} // end of entry function
Instruction Paging Unit接受IFU产生的地址(a步骤),将这个逻辑地址转换为物理地址(b步骤)去icache中取出(c步骤)相应的指令,内部打了一拍(d步骤)传回IFU下面看两个图示:注意粉色竖线开始,
通过上面的分析,icache的时序其实已经很清楚了,指令的缓冲这里没有体现,就不分析了,至于这里接口上的数据写入,还得结合译码再分析一些。
#define BOOT_LENGTH 5
#define MAX_CODE_LENGTH 500
struct icache : sc_module {
sc_in<unsigned > datain; // modified instruction
sc_in<bool> cs; // chip select
sc_in<bool> we; // write enable for SMC
sc_in<unsigned > addr; // address
sc_in<bool> ld_valid; // load valid
sc_in<signed> ld_data; // load data value
sc_out<unsigned > dataout; // ram data out
sc_out<bool> icache_valid; // output valid
sc_out<bool> stall_fetch; // stall fetch if busy
sc_in_clk CLK;
// Parameter
unsigned *icmemory; // icache data memory
unsigned *ictagmemory; // icache tag memory
signed int pid; // process ID
int wait_cycles; // Number of cycles it takes to access imemory
void init_param(int given_delay_cycles) {
wait_cycles = given_delay_cycles;
}
//Constructor
SC_CTOR(icache) {
SC_CTHREAD(entry, CLK.pos());
// initialize instruction icmemory from external file
pid = 0;
FILE *fp = fopen("icache.img","r");
int size=0;
unsigned int mem_word;
icmemory = new unsigned[MAX_CODE_LENGTH];
ictagmemory = new unsigned[MAX_CODE_LENGTH];
for (size = 0; size < MAX_CODE_LENGTH; size++) { // initialize bad data
icmemory[size] = 0xeeeeeeee;
ictagmemory[size] = 0xeeeeeeee;
}
size = 0;
printf("** ALERT ** ICU: initialize Instruction Cache\n");
while (fscanf(fp,"%x", &mem_word) != EOF) {
icmemory[size] = mem_word;
ictagmemory[size] = size;
size++;
}
}
// Process functionality in member function below
void entry();
};
void icache::entry()
{
unsigned int address;
while (true) {
do { wait(); }
while ( !(cs == true) );
address = addr.read();
if (address == BOOT_LENGTH) {
printf("ICU ALERT: *********************************************************************\n");
printf(" : *****************************AFTER RESET*****************************\n");
printf("ICU ALERT: *********************************************************************\n");
}
if (address >= BOOT_LENGTH) {
if (ld_valid.read() == true) {
pid = ld_data.read();
wait();
wait();
}
if (we.read() == true) { // Write operation
wait();
if (address < MAX_CODE_LENGTH)
icmemory[address] = datain.read();
else
printf("ICU ALERT: **MEMORY OUT OF RANGE**\n");
wait();
}
else { // Read operation
wait(); // Introduce delay needed
if (address >= MAX_CODE_LENGTH) {
dataout.write(0xffffffff);
printf("ICU ALERT: **MEMORY OUT OF RANGE**\n");
}
else
dataout.write(icmemory[address]);
icache_valid.write(true);
wait();
icache_valid.write(false);
wait();
}
}
}
} // end of entry function