【CSAPP】分支预测器

谁吧好名字都抢了

已于 2023-01-04 15:55:11 修改

阅读量1.5k

点赞数 11

分类专栏： CSAPP 文章标签： c++ c语言

于 2022-03-26 20:28:27 首次发布

本文链接：https://blog.csdn.net/m0_52631482/article/details/123754103

版权

CSAPP 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

共有七个实验目标

实验目标1：实现1位的分支预测器，并测试
实验目标2：扩展全局分支历史长度，并测试
实验目标3：为每个分支指令，分配完全独立的状态机，并测试
实验目标4：扩展为3位状态机，并测试
实验目标5：实现局部历史信息，并测试
实验目标6：实现局部+全局历史信息，并测试
实验目标7：考虑不用异或，使用其他hash函数（比如移位、加法）来实现，并测试
最终策略尽可能减少NUM_MISPREDICTIONS和MISPRED_PER_1K_INST的值

未改动：

#define PHT_CTR_MAX 3

#define PHT_CTR_INIT 2

#define HIST_LEN 17

RAW
	NUM_MISPREDICTIONS	MISPRED_PER_1K_INST
SHORT_1	434642	2.9176
SHORT_2	3927121	11.3901
SHORT_3	376277	3.7208
SHORT_4	1923213	5.1629
SHORT_24	48	0.0005
SHORT_25	174	0.0018
SHORT_27	104	0.4710
SHORT_28	104	0.0074
SHORT_30	4620079	7.0143
LONG_1	370661	0.5772
LONG_2	1762528	1.3861
LONG_3	9997824	7.7871
LONG_4	5170	0.0052

实验目标一：（实现一位的分支预测器）

即PHT_CTR_MAX = 1

实验目标二：（扩展全局分支历史长度）

即HIST_LEN = 20

实验目标三：（为每个分支指令，分配完全独立的状态机）

将PC中的17位用逻辑与取出来

所以在UpdatePredictor和GetPrediction函数中，改为

UINT32 phtIndex = (PC&0x7FFFC) % (numPhtEntries);

UINT32 phtCounter = pht[phtIndex];

实验目标四：（扩展为3位状态机）

PHT_CTR_MAX = 7

实验目标五：（实现局部历史信息）

代码如下

///
  Copyright 2020 by mars.                                        //
///

#include <stdio.h>
#include <stdlib.h>

#include "common.h"

// 饱和计数器：加1
static inline UINT32 SatIncrement(UINT32 x, UINT32 max)
{
	if (x<max) return x + 1;
	return x;
}

// 饱和计数器：减1
static inline UINT32 SatDecrement(UINT32 x)
{
	if (x>0) return x - 1;
	return x;
}

// The state is defined for Gshare, change for your design
// Gshare分支预测器的状态信息，你需要根据自己的设计进行调整
UINT32 ghr;             // global history register  全局历史寄存器
UINT32 *pht;            // pattern history table    模式历史表
UINT32 *lht;            // local history table    局部历史表
UINT32 historyLength;   // history length           历史长度
UINT32 numPhtEntries;   // entries in pht           PHT中的项数
UINT32 numLhtEntries;   // entries in lht           LHT中的项数

#define PHT_CTR_MAX  3    //L为位数，则PHT_CTR_MAX=2**L-1
#define PHT_CTR_INIT 2

#define HIST_LEN   17   // 全局历史寄存器长度，取17位

#define TAKEN		'T'
#define NOT_TAKEN	'N'


void PREDICTOR_init(void)
{

	historyLength = HIST_LEN;
	ghr = 0;
	numPhtEntries = (1 << HIST_LEN);        // 模式历史表，就有2^17项
	numLhtEntries = (1 << (HIST_LEN-3));    // 局部历史表，就有2^14项
	pht = (UINT32 *)malloc(numPhtEntries * sizeof(UINT32));
	lht = (UINT32 *)malloc(numLhtEntries * sizeof(UINT32));
    // 将模式历史表，全部初始化为PHT_CTR_INIT
	for (UINT32 ii = 0; ii< numPhtEntries; ii++) {
		pht[ii] = PHT_CTR_INIT;
	}
	for (UINT32 ii = 0; ii< numLhtEntries; ii++) {
		lht[ii] = PHT_CTR_INIT;
	}

}

// Gshare分支预测器
// 将PC的低17位，与全局历史寄存器进行异或（加密），去索引PHT，得到对应的饱和状态
// 如果该状态的值超过一半，则预测跳转
// 如果该状态的值低于一半，则预测不跳转
char GetPrediction(UINT64 PC)
{
	UINT32 calculator = (PC>>2)%numLhtEntries;
	UINT32 join = (calculator<<3) | lht[calculator]%(1<<3);      //前14位来自PC,后三位来自LHT
	UINT32 phtIndex = join % (numPhtEntries);

	UINT32 phtCounter = pht[phtIndex];

	if (phtCounter > (PHT_CTR_MAX / 2)) {
		return TAKEN;
	}
	else {
		return NOT_TAKEN;
	}
}

// Gshare分支预测器
// 根据分支指令实际执行结果，来更新对应的饱和计数器
// 如果结果为跳转，则对应的饱和计数器+1
// 如果结果为不跳转，则对应的饱和计数器-1
// 更新全局历史寄存器：
// 结果为跳转，将1移位到GHR的最低位
// 结果为不跳转，将0移位到GHR的最低位
void  UpdatePredictor(UINT64 PC, OpType opType, char resolveDir, char predDir, UINT64 branchTarget)
{

    opType = opType;
    predDir = predDir;
    branchTarget = branchTarget;

	UINT32 calculator = (PC>>2)%numLhtEntries;
	UINT32 join = (calculator<<3) | lht[calculator]%(1<<3);      //前14位来自PC,后三位来自LHT
	UINT32 phtIndex = join % (numPhtEntries);

	UINT32 phtCounter = pht[phtIndex];
//	printf("PC=%016llx resolveDir=%c predDir=%c branchTarget=%016llx\n", PC, resolveDir, predDir, branchTarget);

	if (resolveDir == TAKEN) {
		pht[phtIndex] = SatIncrement(phtCounter, PHT_CTR_MAX);  // 如果结果为跳转，则对应的饱和计数器+1
	}
	else {
		pht[phtIndex] = SatDecrement(phtCounter);  // 如果结果为不跳转，则对应的饱和计数器-1
	}

	// update the LHT
	lht[calculator] = lht[calculator]<<1;
	if (resolveDir == TAKEN) {
		lht[calculator] = lht[calculator] | 0x1;
	}
	// update the GHR
	ghr = (ghr << 1);

	if (resolveDir == TAKEN) {
		ghr = ghr | 0x1;
	}
}

void PREDICTOR_free(void)
{
	free(pht);
	free(lht);
}

实验目标六：（实现局部+全局历史信息）

实验目标五在UpdatePredictor和GetPrediction函数中的

UINT32 phtIndex = join % (numPhtEntries);语句改为

UINT32 phtIndex = (join^ghr) % (numPhtEntries);即可

实验目标七：（考虑不用异或，使用其他hash函数）

在17位全局历史寄存器的情况下采用 $2^{17}$ 附近的较大的素数：131111

即numPhtEntries = 131111

同理
20位时选取素数999983
25位时选取素数9999991

为神马是素数

但25位所需内存太大了 T_T

另将以上几个因素综合考虑尝试，得到以下数据：

由实验数据可知：

选择适当的、较大的除数进行散列，分配较大的模式历史表（PHT），以及选择较大的全局历史GHR）可以有效减少不同指令的碰撞，对于分支预测准确度的提升有较为可观的效果。但是分配空间受硬件条件限制不能过大，且空间越大预测准确度提升幅度越小。
使用多位的状态机也可以有效地识别出程序目前所处的状态，从而更好地预测分支。但是状态机位数过高有损于在程序变换跳转模式时的分支预测准确度。经实验，取7位较合适，且每个PHT的初始值应选取7/2 = 3或4，这里取4。
在全局历史信息与PC进行运算的时候用其他的运算替换XOR效果不大；我用平方取中法对join操作，效果比直接用join好些。（这里感觉天马行空，没有可靠的方案）
为每个分支指令分配独立状态机、只实现局部历史信息，这样做没有将全局历史跳转信息考虑在内，效果很一般

最终选取 20位GHR + 除留余数法改进999983 + 三位状态机 + 平方取中 +局部信息+全局信息

///
  Copyright 2020 by mars.                                        //
///

#include <stdio.h>
#include <stdlib.h>

#include "common.h"

// 饱和计数器：加1
static inline UINT32 SatIncrement(UINT32 x, UINT32 max)
{
	if (x<max) return x + 1;
	return x;
}

// 饱和计数器：减1
static inline UINT32 SatDecrement(UINT32 x)
{
	if (x>0) return x - 1;
	return x;
}

// The state is defined for Gshare, change for your design
// Gshare分支预测器的状态信息，你需要根据自己的设计进行调整
UINT32 ghr;             // global history register  全局历史寄存器
char  *pht;             // pattern history table    模式历史表
char  *lht;             // local history table      局部历史表
UINT32 historyLength;   // history length           历史长度
UINT32 numPhtEntries;   // entries in pht           PHT中的项数
UINT32 numLhtEntries;   // entries in lht           LHT中的项数

#define PHT_CTR_MAX  7    //L为位数，则PHT_CTR_MAX=2**L-1
#define PHT_CTR_INIT 4

#define HIST_LEN   20   // 全局历史寄存器长度，取20位

#define TAKEN		'T'
#define NOT_TAKEN	'N'


void PREDICTOR_init(void)
{

	historyLength = HIST_LEN;
	ghr = 0;
	numPhtEntries = 999983;//139999;//8209;//9999991;//999983;//(1 << HIST_LEN);        // 模式历史表，有999983项
	numLhtEntries = (1 << (HIST_LEN-3));                // 局部历史表，就有2^17项
	pht = (char *)malloc(numPhtEntries * sizeof(char)); //为节省空间，pht和lht均以char为单位
	lht = (char *)malloc(numLhtEntries * sizeof(char));
    // 将模式历史表，全部初始化为PHT_CTR_INIT
	for (UINT32 ii = 0; ii< numPhtEntries; ii++) {
		pht[ii] = PHT_CTR_INIT;
	}
	for (UINT32 ii = 0; ii< numLhtEntries; ii++) {
		lht[ii] = PHT_CTR_INIT;
	}

}

// Gshare分支预测器
// 将PC的低17位与局部历史表对应项拼接，再与全局历史寄存器进行异或（加密），去索引PHT，得到对应的饱和状态
// 如果该状态的值超过一半，则预测跳转
// 如果该状态的值低于一半，则预测不跳转
char GetPrediction(UINT64 PC)
{
	UINT32 calculator = (PC>>2)%numLhtEntries;
	UINT32 join = (calculator<<3) | lht[calculator]%(1<<3);      //前22位来自PC,后三位来自LHT
	UINT32 phtIndex = (((join*join)>>10)^ghr) % (numPhtEntries);

	UINT32 phtCounter = pht[phtIndex];

	if (phtCounter > (PHT_CTR_MAX / 2)) {
		return TAKEN;
	}
	else {
		return NOT_TAKEN;
	}
}

// Gshare分支预测器
// 根据分支指令实际执行结果，来更新对应的饱和计数器
// 如果结果为跳转，则对应的饱和计数器+1
// 如果结果为不跳转，则对应的饱和计数器-1
// 更新全局历史寄存器：
// 结果为跳转，将1移位到GHR的最低位
// 结果为不跳转，将0移位到GHR的最低位
// 同时更新LHT
void  UpdatePredictor(UINT64 PC, OpType opType, char resolveDir, char predDir, UINT64 branchTarget)
{

    opType = opType;
    predDir = predDir;
    branchTarget = branchTarget;

	UINT32 calculator = (PC>>2)%numLhtEntries;
	UINT32 join = (calculator<<3) | lht[calculator]%(1<<3);      //前22位来自PC,后三位来自LHT
	UINT32 phtIndex = (((join*join)>>10)^ghr) % (numPhtEntries);

	UINT32 phtCounter = pht[phtIndex];
//	printf("PC=%016llx resolveDir=%c predDir=%c branchTarget=%016llx\n", PC, resolveDir, predDir, branchTarget);

	if (resolveDir == TAKEN) {
		pht[phtIndex] = SatIncrement(phtCounter, PHT_CTR_MAX);  // 如果结果为跳转，则对应的饱和计数器+1
	}
	else {
		pht[phtIndex] = SatDecrement(phtCounter);  // 如果结果为不跳转，则对应的饱和计数器-1
	}

	// update the LHT
	lht[calculator] = (lht[calculator]<<1)%8;   //模8是为了防止char左移位时溢出
	if (resolveDir == TAKEN) {
		lht[calculator] = lht[calculator] | 0x1;
	}
	// update the GHR
	ghr = (ghr << 1);

	if (resolveDir == TAKEN) {
		ghr = ghr | 0x1;
	}
}

void PREDICTOR_free(void)
{
	free(pht);
	free(lht);
}

	NUM_MISPREDICTIONS(raw)	NUM_MISPREDICTIONS(improved)	MISPRED_PER_1K_INST(raw)	MISPRED_PER_1K_INST(improved)
SHORT_1	434642	142681	2.9176	0.9578
SHORT_2	3927121	1559705	11.3901	4.5237
SHORT_3	376277	305873	3.7208	3.0246
SHORT_4	1923213	1422742	5.1629	3.8194
SHORT_24	48	48	0.0005	0.0005
SHORT_25	174	174	0.0018	0.0018
SHORT_27	104	9	0.4710	0.0408
SHORT_28	104	108	0.0074	0.0077
SHORT_30	4620079	1358961	7.0143	2.0632
LONG_1	370661	74691	0.5772	0.1163
LONG_2	1762528	1380073	1.3861	1.0853
LONG_3	9997824	9548230	7.7871	7.4369
LONG_4	5170	6089	0.0052	0.0061