getopt函数
被用来解析命令行选项参数。就不用自己写处理argv了。
#include <unistd.h>
//全局变量
extern char *optarg; //选项的参数指针
extern int optind, //下一次调用getopt的时,从optind存储的位置处重新开始检查选项。
extern int opterr, //当opterr=0时,getopt不向stderr输出错误信息。
extern int optopt; //当命令行选项字符不包括在optstring中或者选项缺少必要的参数时,该选项存储在optopt中,getopt返回'?’
int getopt(int argc, char * const argv[], const char*optstring);
字符串optstring可以下列元素
ab:c::
1.单个字符,表示选项 $./getopt –a
2.单个字符后接一个冒号:表示该选项后必须跟一个参数。参数紧跟在选项后或者以空格隔开。该参数的指针赋给optarg。 $./getopt –b100 $./getopt –b 100
3.单个字符后跟两个冒号,表示该选项后必须跟一个参数。参数必须紧跟在选项后不能以空格隔开。该参数的指针赋给optarg。 $./getopt –c100
malloc 和 calloc 之间的不同点是,malloc 不会设置内存为零,而 calloc 会设置分配的内存为零。
void *calloc(size_t nitems, size_t size)
参数
nitems – 要被分配的元素个数。
size – 元素的大小。
该函数返回一个指针,指向已分配的内存。如果请求失败,则返回 NULL。
cacheline
valid:0为空模块
tag:标志位
time_counter:用来计数最后一次写入时间
time_counter初始化的时候都是0,其值越大代表这个line最近刚刚被写入——我们不应该替换它——所以valid为0的line的time_counter一定也是0(最小值),因为他们连使用都没有被使用过,即我们一定会先替换valid为0的line,这符合书上的策略。
result
typedef struct
{
int hit;
int miss;
int eviction;
}result;
结果设计成了一个结构体
/*
Trea A
*/
#include "cachelab.h"
#include <stdio.h> /* fopen freopen perror */
#include <stdint.h> /* uintN_t */
#include <unistd.h> /* getopt */
#include <getopt.h>
#include <stdlib.h> /* atol exit*/
#include <errno.h>
#define false 0
#define true 1
typedef struct
{
_Bool valid; /* 是否为空*/
uint64_t tag; /* choose line/block */
uint64_t time_counter; /*lru*/
}cacheline;
typedef cacheline *entry_of_lines;
typedef entry_of_lines *entry_of_sets;
/*
Data structure:
+-----+
+-----+ +-->Valid|
+---->line0+---+ +-----+
| +-----+ |
+---------------+ | | +---+
| set0 | | +-----+ +-->Tag|
+--> entry_of_lines+------>line1| | +---+
| +---------------+ | +-----+ |
| | | +-------+
| +---------------+ | +-----+ +-->Counter|
| | set1 | +---->line2| +-------+
+--> entry_of_lines| | +-----+
+--------------+ | +---------------+ |
| cache0 +------+ | +-----+
| entry_of_sets| | +---------------+ +---->lineX|
+--------------+ | | set2 | +-----+
+--> entry_of_lines|
| +---------------+
|
| +---------------+
| | setX |
+--> entry_of_lines|
+---------------+
*/
//显示结果
typedef struct
{
int hit;
int miss;
int eviction;
}result;
entry_of_sets InitializeCache(uint64_t S, uint64_t E);
result HitMissEviction(entry_of_lines search_line, result Result, uint64_t E, uint64_t tag, _Bool verbose);
result ReadAndTest(FILE *tracefile, entry_of_sets cache, uint64_t S, uint64_t E, uint64_t s, uint64_t b, _Bool verbose);
void RealseMemory(entry_of_sets cache, uint64_t S, uint64_t E);
int main(int argc, char * const argv[])
{
FILE* tracefile = NULL;
entry_of_sets cache = NULL;
result Result = {0, 0, 0};
//命令打印信息
const char *help_message = "Usage: \"Your complied program\" [-hv] -s <s> -E <E> -b <b> -t <tracefile>\n" \
"<s> <E> <b> should all above zero and below 64.\n" \
"Complied with std=c99\n";
//命令行操作选择
const char *command_options = "hvs:E:b:t:";
_Bool verbose = false; /* flag whether switch to verbose mode, zero for default */
uint64_t s = 0; /* number of sets ndex's bits */
uint64_t b = 0; /* 块索引的位数*/
uint64_t S = 0; /* number of sets */
uint64_t E = 0; /* number of lines */
char ch; /* command options */
//命令行选择读取参数
while((ch = getopt(argc, argv, command_options)) != -1)
{
switch(ch)
{
case 'h':
{
printf("%s", help_message);
exit(EXIT_SUCCESS);
}
case 'v':
{
verbose = true;
break;
}
case 's':
{
if (atol(optarg) <= 0)
{
printf("%s", help_message);
exit(EXIT_FAILURE);
}
//atol把字符串转换成长整型数
s = atol(optarg);
S = 1 << s;
break;
}
case 'E':
{
if (atol(optarg)<= 0)
{
printf("%s", help_message);
exit(EXIT_FAILURE);
}
E = atol(optarg);
break;
}
case 'b':
{
if (atol(optarg) <= 0)
{
printf("%s", help_message);
exit(EXIT_FAILURE);
}
b = atol(optarg);
break;
}
case 't':
{
if ((tracefile = fopen(optarg, "r")) == NULL)
{
perror("Failed to open tracefile");
exit(EXIT_FAILURE);
}
break;
}
default:
{
printf("%s", help_message);
exit(EXIT_FAILURE);
}
}
}
//如果读取的参数中没有s或者b或者E或者文件,那么那他们将会是对应的初始值。
if (s == 0 || b ==0 || E == 0 || tracefile == NULL)
{
printf("%s", help_message);
exit(EXIT_FAILURE);
}
//初始化
cache = InitializeCache(S, E);
//本程序的核心
Result = ReadAndTest(tracefile, cache, S, E, s, b, verbose);
//释放缓存
RealseMemory(cache, S, E);
//打印结果
printSummary(Result.hit, Result.miss, Result.eviction);
return 0;
}
entry_of_sets InitializeCache(uint64_t S, uint64_t E)
{
entry_of_sets cache;
/* 使用 calloc 代替 malloc 来匹配我们设计的默认情况*/
if ((cache = calloc(S, sizeof(entry_of_lines))) == NULL)
{
perror("Failed to calloc entry_of_sets");
exit(EXIT_FAILURE);
}
//给cacheline分配内存
for(int i = 0; i < S; ++i)
{
if ((cache[i] = calloc(E, sizeof(cacheline))) == NULL)
{
perror("Failed to calloc line in sets");
}
}
return cache;
}
//判断其是否hit或者miss以及是否发生替换
/*HitMissEviction里面需要注意的地方是时间参量的更新,我们既要找到最“老”的line,也要同时记住最“新”的line的时间参量(我这里是遍历搜索,也可以在设计set的数据类型时设计为结构体,其中放一个最新的时间参量),以此来更新时间参量。如果我们要替换的line的valid为1,则发生了一次eviction。*/
result HitMissEviction(entry_of_lines search_line, result Result, uint64_t E, uint64_t tag, _Bool verbose)
{
uint64_t oldest_time = UINT64_MAX;
uint64_t youngest_time = 0;
uint64_t oldest_block = UINT64_MAX;
_Bool hit_flag = false;
for (uint64_t i = 0; i < E; ++ i)
{
if (search_line[i].tag == tag && search_line[i].valid)
{
if (verbose) printf("hit\n");
hit_flag = true;
++Result.hit;
++search_line[i].time_counter;
break;
}
}
/* miss */
if (!hit_flag)
{
if (verbose) printf("miss");
++Result.miss;
uint64_t i;
/* 搜索最旧的修改块(无效块是我们设计的最旧的)*/
for (i = 0; i < E; ++i)
{
if (search_line[i].time_counter < oldest_time)
{
oldest_time = search_line[i].time_counter;
oldest_block = i;
}
/* 搜索最年轻的修改块以更新新块的时间计数器 */
if (search_line[i].time_counter > youngest_time)
{
youngest_time = search_line[i].time_counter;
}
}
search_line[oldest_block].time_counter = youngest_time + 1;
search_line[oldest_block].tag = tag;
/* 这是一个有效的块,++eviction */
if (search_line[oldest_block].valid)
{
if (verbose) printf(" and eviction\n");
++Result.eviction;
}
else
{
if (verbose) printf("\n");
search_line[oldest_block].valid = true;
}
}
return Result;
}
//测试hit/miss/eviction的次数:
//如果命令是“L”或者“M”,我们就进入HitMissEviction一次判断其是否hit或者miss以及是否发生替换,如果是M就相当于一次“L”和一次“M”,需要进入HitMissEviction两次,其结果可能为两次hit,也可能为一次miss+(eviction)一次hit。我们在ReadAndTest里通过一些位运算找到对应的set(即entry_of_lines),然后以此作为参数调用HitMissEviction 判断到底是miss(有没有eviction)还是hit。
result ReadAndTest(FILE *tracefile, entry_of_sets cache, uint64_t S, uint64_t E, uint64_t s, uint64_t b, _Bool verbose)
{
result Result = {0, 0, 0};
char ch;
uint64_t address;
//从跟踪文件中读取指令和地址并忽略大小
//地址用十六进制表示,使用 %lx 代替
while((fscanf(tracefile, " %c %lx%*[^\n]", &ch, &address)) == 2)
{
if (ch == 'I')
{
continue; /* we don't care about 'I' */
}
else
{
uint64_t set_index_mask = (1 << s) - 1;
uint64_t set_index = (address >> b) & set_index_mask;
uint64_t tag = (address >> b) >> s;
entry_of_lines search_line = cache[set_index];
/* 加载/存储最多会导致一次缓存未命中 */
if (ch == 'L' || ch == 'S')
{
if (verbose)
printf("%c %lx ", ch, address);
Result = HitMissEviction(search_line, Result, E, tag, verbose);
}
/* 数据修改 (M) 被视为加载,然后存储到同一地址。因此,一个 M 操作可能导致两次缓存命中,或者一次未命中和一次命中加上可能的替换。 */
else if (ch == 'M')
{
if (verbose)
printf("%c %lx ", ch, address);
Result = HitMissEviction(search_line, Result, E, tag, verbose);
Result = HitMissEviction(search_line, Result, E, tag, verbose);
}
else /* ignore other cases */
{
continue;
}
}
}
return Result;
}
//释放之前申请的内存
void RealseMemory(entry_of_sets cache, uint64_t S, uint64_t E)
{
for (uint64_t i = 0; i < S; ++i)
{
free(cache[i]);
}
free(cache);
}
/*
数组一行有64个int,即8个block
s=5, E=1, b=5
s= 32 32个set
每个set 有32个字节的block 存 8个int
总共能存 32 * 8 = 256 个int
直接映射的问题:会导致 32 * 32 每过八行 会替换
64 *64 每过四行 会替换 因此 存储B的时候不能直接放到 4-7行
61 * 67 行列相同
*/
/*
*
*/
#include <stdio.h>
#include "cachelab.h"
char transpose_submit_desc[] = "Transpose submission";
void transpose_submit(int M, int N, int A[N][M], int B[M][N])
{
// 8 line 8 block read temp write
if (N == 32)
{
for (int i = 0; i < N; i += 8)
{
for (int j = 0; j < M; j += 8)
{
for (int k = i; k < i + 8; ++k)
{
int temp_value0 = A[k ][j];
int temp_value1 = A[k][j+1];
int temp_value2 = A[k][j+2];
int temp_value3 = A[k][j+3];
int temp_value4 = A[k][j+4];
int temp_value5 = A[k][j+5];
int temp_value6 = A[k][j+6];
int temp_value7 = A[k][j+7];
B[j][k] = temp_value0;
B[j+1][k] = temp_value1;
B[j+2][k] = temp_value2;
B[j+3][k] = temp_value3;
B[j+4][k] = temp_value4;
B[j+5][k] = temp_value5;
B[j+6][k] = temp_value6;
B[j+7][k] = temp_value7;
}
}
}
}
else if (N == 64)
{
for (int i = 0; i < N; i += 8)
{
for (int j = 0; j < M; j += 8)
{
//8 * 8 block
for (int k = i; k < i + 4; ++k)
{
// first 4 line
int temp_value0 = A[k][j];
int temp_value1 = A[k][j+1];
int temp_value2 = A[k][j+2];
int temp_value3 = A[k][j+3];
int temp_value4 = A[k][j+4];
int temp_value5 = A[k][j+5];
int temp_value6 = A[k][j+6];
int temp_value7 = A[k][j+7];
//第一列 间隔4列 准备替换
B[j][k] = temp_value0;
B[j+1][k] = temp_value1;
B[j+2][k] = temp_value2;
B[j+3][k] = temp_value3;
B[j][k+4] = temp_value7;
B[j+1][k+4] = temp_value6;
B[j+2][k+4] = temp_value5;
B[j+3][k+4] = temp_value4;
}
for (int l = 0; l < 4; ++l)
{
// 后四行 缓存
int temp_value0 = A[i+4][j+3-l];
int temp_value1 = A[i+5][j+3-l];
int temp_value2 = A[i+6][j+3-l];
int temp_value3 = A[i+7][j+3-l];
int temp_value4 = A[i+4][j+4+l];
int temp_value5 = A[i+5][j+4+l];
int temp_value6 = A[i+6][j+4+l];
int temp_value7 = A[i+7][j+4+l];
//列的 后四行
//交换
B[j+4+l][i] = B[j+3-l][i+4];
B[j+4+l][i+1] = B[j+3-l][i+5];
B[j+4+l][i+2] = B[j+3-l][i+6];
B[j+4+l][i+3] = B[j+3-l][i+7];
//存 后四行
B[j+3-l][i+4] = temp_value0;
B[j+3-l][i+5] = temp_value1;
B[j+3-l][i+6] = temp_value2;
B[j+3-l][i+7] = temp_value3;
B[j+4+l][i+4] = temp_value4;
B[j+4+l][i+5] = temp_value5;
B[j+4+l][i+6] = temp_value6;
B[j+4+l][i+7] = temp_value7;
}
}
}
}
else
{
/*单独处理对角线的情况
用一个局部变量暂时先保存这个对角线元素,并用另一个变量记录它的位置,待block的其他7个元素写完以后,我们再将这个会引起替换的元素写到目的地。*/
for (int i = 0; i < N; i += 16)
{
for (int j = 0; j < M; j += 16)
{
//block
for (int k = i; k < i + 16 && k < N; ++k)
{
int l;
for (l = j; l < j + 16 && l < M; ++l)
{
B[l][k] = A[k][l];
}
}
}
}
}
}
void registerFunctions()
{
registerTransFunction(transpose_submit, transpose_submit_desc);
}