一、准备
Cache Lab下载地址:http://csapp.cs.cmu.edu/3e/cachelab-handout.tar
帮助文档:http://csapp.cs.cmu.edu/3e/cachelab.pdf
上一篇Attack Lab
这篇文章其实是写了很久了, 上面链接资源可能不存在了,备份:https://gitee.com/beneil/csapplabc
二、概述
任务
Part A:实现一个内存模拟器
Part B:优化矩阵转置
通用的高速缓存存储器组织结构
高速缓存被组织成一个有S=2s个缓存组(cache set).每个组包含E个缓存行(cache line).每个行由一个B=2b字节的数据块(block)组成.
三、步骤
Part A:实现一个内存模拟器
打开帮助文档,说明要在csim.c
,编写一个类似于csim-ref
的模拟器,而且要采用LRU算法
参数:
- -v:显示运行信息
- -s:缓存组位数,组数S=2s
- -E:每个组包含E个缓存行
- -b:数据块位数,块大小B=2b
- -t:输入数据文件路径(traces文件夹)
trace文件:
每行格式:[空格]操作 地址,大小
- “I”表示指令加载(文档说明忽略就行);
- ”L”表示数据加载;
- “S”表示数据存储;
- “M”表示数据修改(数据加载后数据存储)
./csim-ref -v -s 4 -E 1 -b 4 -t traces/yi.trace
64位地址组成:56位t+4位s+4位b
L 10,1 表示地址0x10=00..00010000 组1,加载,结果miss
M 20,1 表示地址0x20=00..00100000 组2,结果先miss,后hit(M:执行2次)
L 22,1 表示地址0x22=00..00100010 组2,上一步已缓存,结果hit
S 18,1 表示地址0x18=00..00011000 组1,已缓存,结果hit
L 110,1 表示地址0x110=00..000100010000 组1,tag位=1,无效,结果先miss,后eviction
....
综上,完整代码csim.c
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "cachelab.h"
unsigned int Hits = 0, Misses = 0, Evictions = 0, Verbose = 0;
int Setbits = 0, Blockbits = 0; //同s,b
typedef struct {
unsigned int valid;
int timestamp; // Lru标志位
unsigned long int tag;
} CacheLine;
typedef struct {
CacheLine* cacheLine;
} CacheSet;
struct {
int setNum, lineNum, blockNum; //同S,E,B
CacheSet* cacheSet;
FILE* path;
} Cache;
void printHelp() {
printf("help example:\n./csim-ref -v -s 4 -E 1 -b 4 -t traces/yi.trace\n");
}
void doHit(CacheLine* cacheLine, int timestamp) {
Hits++;
if (Verbose) printf(" hit");
cacheLine->timestamp = timestamp;
}
void doMiss(CacheLine* cacheLine, unsigned long int tag, int timestamp) {
Misses++;
if (Verbose) printf(" miss");
cacheLine->valid = 1;
cacheLine->tag = tag;
cacheLine->timestamp = timestamp;
}
void doEviction(CacheLine* cacheLine, unsigned long int tag, int timestamp) {
Evictions++;
doMiss(cacheLine, tag, timestamp);
if (Verbose) printf(" eviction");
}
void setArgs(int argc, char** argv) {
char opt;
Cache.setNum = Cache.lineNum = Cache.blockNum = 0; //同S,E,B
Cache.path = NULL;
while ((opt = getopt(argc, argv, "hvs:E:b:t:")) != -1) {
switch (opt) {
case 'h':
printHelp();
break;
case 'v':
Verbose = 1;
break;
case 's':
Setbits = atoi(optarg);
Cache.setNum = 1 << Setbits;
break;
case 'E':
Cache.lineNum = atoi(optarg);
break;
case 'b':
Blockbits = atoi(optarg);
Cache.blockNum = 1 << Blockbits;
break;
case 't':
Cache.path = fopen(optarg, "r");
break;
default:
printHelp();
exit(0);
}
}
if (Setbits <= 0 || Cache.lineNum <= 0 || Blockbits <= 0 ||
Cache.path == NULL) {
printHelp();
exit(0);
}
}
void initCache(int argc, char** argv) {
setArgs(argc, argv);
Cache.cacheSet = (CacheSet*)calloc(Cache.setNum, sizeof(CacheSet));
for (int i = 0; i < Cache.setNum; i++) {
Cache.cacheSet[i].cacheLine =
(CacheLine*)calloc(Cache.lineNum, sizeof(CacheLine));
}
}
void tryHit(unsigned long int set, unsigned long int tag) {
static int timestamp = 0; // LRU时间戳
CacheLine* cacheLine = Cache.cacheSet[set].cacheLine;
int min = 0;
for (int i = 0; i < Cache.lineNum; i++) {
if (cacheLine[i].valid == 1 && cacheLine[i].tag == tag) {
//命中
doHit(&cacheLine[i], ++timestamp);
return;
} else if (cacheLine[i].valid == 0) {
//不命中,遇到未使用行
doMiss(&cacheLine[i], tag, ++timestamp);
return;
}
if (cacheLine[i].timestamp < cacheLine[min].timestamp) {
//找出最小的timestamp
min = i;
}
}
//不命中,且行满了
doEviction(&cacheLine[min], tag, ++timestamp);
return;
}
void tryCache(unsigned long int address) {
//获取tag 和 set
unsigned long int sbNum = Setbits + Blockbits;
unsigned long int tag = address >> sbNum;
unsigned long int set = (address ^ (tag << sbNum)) >> Blockbits;
//模拟内存操作
tryHit(set, tag);
}
void doCache() {
char opt[2];
unsigned long int address;//注意8字节存放地址
int block;
while (fscanf(Cache.path, "%s %lx,%d", opt, &address, &block) != EOF) {
if (opt[0] == 'I') continue;
if (Verbose) {
printf("%c %lx,%d", opt[0], address, block);
}
// opt=L||S,模拟内存操作
tryCache(address);
if (opt[0] == 'M') {
tryCache(address);
}
if (Verbose) {
printf("\n");
}
}
}
void freeCache() {
for (int i = 0; i < Cache.setNum; i++) {
free(Cache.cacheSet[i].cacheLine);
}
free(Cache.cacheSet);
fclose(Cache.path);
}
int main(int argc, char** argv) {
//初始化cache参数,并分配内存
initCache(argc, argv);
//读取./traces里的文件,进行内存操作
doCache();
//释放内存,关闭文件
freeCache();
//打印结果
printSummary(Hits, Misses, Evictions);
return 0;
}
测试
make clean
make
./test-csim
Part B:优化矩阵转置
这篇博客写挺好,就不再赘述了