从do_delete.c开始看
- 什么是recipe
为了找什么是recipe,首先需要先回到recipestore.h文件中,里面有一个结构体叫backupversion,这个backupversion我理解为处理指纹序列的模块,里面包含.meta,.recipe,和.record。
在backupversion中是以FILE文件指针形式来表示这三个东西的,所以应该在delete之前应该已经写入了磁盘了,并不是常驻内存(猜测)。
struct backupVersion {
sds path;
int32_t bv_num; /* backup version number start from 0 */
int deleted;
int64_t number_of_files;
int64_t number_of_chunks;
sds fname_prefix; /* The prefix of the file names */
FILE *metadata_fp;
FILE *recipe_fp;
FILE *record_fp;
/* the write buffer of recipe meta */
char *metabuf;
int metabufoff;
/* the write buffer of records */
char *recordbuf;
int recordbufoff;
char* segmentbuf;
int segmentlen;
int segmentbufoff;
};
顺便就看看这个头文件中的其他结构体:
- fileRecipeMeta
file recipe的元数据。包含chunknum和filesize还有一个不理解的filename,不知道sds是什么东西?
sds是一个动态string库中的变量SDSLib,在util/sds.h中有说明
/* Point to the meta of a file recipe */
struct fileRecipeMeta {
int64_t chunknum;
int64_t filesize;
sds filename;
};
- segmentRecipe
这里说每个recipe中包含segment。
每一个预取的segment都被组织在一张哈希表中,目的是为了优化查找
这是逻辑局部性的基本单元(实现逻辑局部性?)
/*
* Each recipe consists of segments.
* Each prefetched segment is organized as a hash table for optimizing lookup.
* It is the basic unit of logical locality.
* */
struct segmentRecipe {
segmentid id;
/* Map fingerprints in the segment to their container IDs.*/
GHashTable *kvpairs;
};
- chunkPointer
指向chunk的指针。里面包含了chunk的指纹,container的id和chunk的大小。注意看注释。
/*
* If id == CHUNK_SEGMENT_START or CHUNK_SEGMENT_END,
* it is a flag of segment boundary.
* If id == CHUNK_SEGMENT_START,
* size indicates the length of the segment in terms of # of chunks.
*/
struct chunkPointer {
fingerprint fp;
containerid id;
int32_t size;
};
所以总结,recipe应该是菜单一类的东西,用来指示文件(如struct fileRecipeMeta),其中包含segment recipe,segment recipe用来实现局部性。
回爷爷家了。回了再继续看…
我又回来了…最近真的太难了
好了知道了recipe 是什么,现在开始看do_delete.c
- delete_an_entry函数
index_delete(fp,*id)函数在index.c中,其中又调用了kvstore_delete(fp, id);(套娃…),然后kvstore_delete又等价于 kvstore_htable_delete,然后再找到kvstore_htable.c文件,然后找到kvstore_htable_delete。
g_hash_table相当于fingerprint to container index。key是指纹,value是container的ID。
这个函数首先找到key在哈希表中的键值对kv,然后得到kv对应的value数组,遍历索引,找到value值为id的位置,调用memmove函数,memmove的函数的作用是从value[i+1]的位置开始复制(destor.index_value_length - i - 1) * sizeof(int64_t)这么多单元到value[i]的位置。
/* A simple wrap.
* Just to make the interfaces of the index module more consistent.
*/
static inline void delete_an_entry(fingerprint *fp, int64_t *id){
index_delete(fp, *id);
}
inline void index_delete(fingerprint *fp, int64_t id){
kvstore_delete(fp, id);
}
void init_kvstore() {
switch(destor.index_key_value_store){
case INDEX_KEY_VALUE_HTABLE:
init_kvstore_htable();
close_kvstore = close_kvstore_htable;
kvstore_lookup = kvstore_htable_lookup;
kvstore_update = kvstore_htable_update;
kvstore_delete = kvstore_htable_delete;
break;
default:
WARNING("Invalid key-value store!");
exit(1);
}
}
/* Remove the 'id' from the kvpair identified by 'key' */
void kvstore_htable_delete(char* key, int64_t id){
kvpair kv = g_hash_table_lookup(htable, key);
if(!kv)
return;
int64_t *value = get_value(kv);
int i;
for(i=0; i<destor.index_value_length; i++){
if(value[i] == id){
value[i] = TEMPORARY_ID;
/*
* If index exploits physical locality,
* the value length is 1. (correct)
* If index exploits logical locality,
* the deleted one should be in the end. (correct)
*/
/* NOTICE: If the backups are not deleted in FIFO order, this assert should be commented */
assert((i == destor.index_value_length - 1)
|| value[i+1] == TEMPORARY_ID);
if(i < destor.index_value_length - 1 && value[i+1] != TEMPORARY_ID){
/* If the next ID is not TEMPORARY_ID */
memmove(&value[i], &value[i+1], (destor.index_value_length - i - 1) * sizeof(int64_t));
}
break;
}
}
/*
* If all IDs are deleted, the kvpair is removed.
*/
if(value[0] == TEMPORARY_ID){
/* This kvpair can be removed. */
g_hash_table_remove(htable, key);
}
}
- static void* read_recipe_for_deletion(void *arg)函数
就是读出arg指向的备份系统中的file中的chunk的指针,也就是结构体chunkPointer,并且把指针压如delete_recipe_queue中,为什么要释放cp,是因为在read_next_n_chunk_pointers读的过程中malloc了空间,所以要释放。释放r也是一个道理。
最后new一个新的chunk的指针,设置为end边界压入队列。
static void* read_recipe_for_deletion(void *arg) {
struct backupVersion* bv = (struct backupVersion*)arg;
struct chunk *c = new_chunk(0);
SET_CHUNK(c, CHUNK_FILE_START);
sync_queue_push(delete_recipe_queue, c);
int i, j, k;
for (i = 0; i < bv->number_of_files; i++) {
struct fileRecipeMeta *r = read_next_file_recipe_meta(bv);
for (j = 0; j < r->chunknum; j++) {
struct chunkPointer* cp = read_next_n_chunk_pointers(bv, 1, &k);
struct chunk* c = new_chunk(0);
memcpy(&c->fp, &cp->fp, sizeof(fingerprint));
c->size = cp->size;
c->id = cp->id;
sync_queue_push(delete_recipe_queue, c);
free(cp);
}
free_file_recipe_meta(r);
}
// struct segmentRecipe* sr;
// while((sr=read_next_segment(bv))){
// segment_recipe_foreach(sr, add_an_entry, &sr->id);
// int64_t* r = (int64_t*)malloc(sizeof(int64_t));
// *r = sr->id;
// g_hash_table_insert(invalid_containers, r, r);
// }
c = new_chunk(0);
SET_CHUNK(c, CHUNK_FILE_END);
sync_queue_push(delete_recipe_queue, c);
sync_queue_term(delete_recipe_queue);
return NULL;
}
-
arg是什么?
发现void* read_recipe_for_deletion这个函数只有在pthread_create中调用了。 -
startJIDPtr是什么?
startJIDPtr的内容指向的是备份系统的number,arg也是指向备份系统的number的指针。见4 -
g_hash_table_new_full函数
是glib库中的函数。glib库里有两个函数可以用于创建hash表,分别是g_hash_table_new()和g_hash_table_new_full(),它们的原型如下:
GHashTable * g_hash_table_new(GHashFunc hash_func, GEqualFunc key_equal_func);
GHashTable * g_hash_table_new_full(GHashFunc hash_func, GEqualFunc key_equal_func, GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func);
其中hash_func是一个函数,它为key创建一个hash值;key_equal_func用于比较两个key是否相等;
key_destroy_func当你从hash表里删除、销毁一个条目时,glib库会自动调用它释放key所占用的内存空间,
这对于key是动态分配内存的hash表来说非常有用;value_destroy_func的作用与key_destroy_func相似,
只是它释放的是value占用的内存空间。
来自https://blog.csdn.net/plusboy/article/details/1496215
-
struct backupVersion* open_backup_version(int number) 函数
打开一个备份的版本。参数是number,也就是说startJIDPtr的内容指向的是备份系统的number,arg也是指向备份系统的number的指针。 -
外层遍历备份系统的文件,内层遍历chunk,首先读取每一个chunk的指针chunkPointer,然后将chunk对应的id插入哈希表中。
-
循环结束后,建立一个新的chunk c。
-
void sync_queue_push(SyncQueue* s_queue, void* item)函数。在utils/sync_queue.c中。这个函数主要是互斥地访问s_queue,并将item插入s_queue中。在这里是将新建立的chunk c插入到delete_recipe_queue中。
void sync_queue_push(SyncQueue* s_queue, void* item) {
if (pthread_mutex_lock(&s_queue->mutex) != 0) {
puts("failed to lock!");
return;
}
if (s_queue->term == 1) {
pthread_mutex_unlock(&s_queue->mutex);
return;
}
while (s_queue->max_size > 0
&& queue_size(s_queue->queue) >= s_queue->max_size) {
pthread_cond_wait(&s_queue->max_work, &s_queue->mutex);
}
queue_push(s_queue->queue, item);
pthread_cond_broadcast(&s_queue->min_work);
if (pthread_mutex_unlock(&s_queue->mutex)) {
puts("failed to lock!");
return;
}
}
- destor.backup_retention_time是什么?
表示有多少个备份被保留(retained),当backup_retention_time为负数时,表示所有的备份都被保留。还是不懂,是留在内存中的意思吗?还是备份的意思? - struct fileRecipeMeta* read_next_file_recipe_meta(struct backupVersion* b) 该函数读入备份版本b中的下一个file_recipe_mate r,该函数最后返回r。
struct fileRecipeMeta* read_next_file_recipe_meta(struct backupVersion* b) {
static int read_file_num;
assert(read_file_num <= b->number_of_files);
int len;
fread(&len, sizeof(len), 1, b->metadata_fp);
char filename[len + 1];
fread(filename, len, 1, b->metadata_fp);
filename[len] = 0;
struct fileRecipeMeta* r = new_file_recipe_meta(filename);
fread(&r->chunknum, sizeof(r->chunknum), 1, b->metadata_fp);
fread(&r->filesize, sizeof(r->filesize), 1, b->metadata_fp);
read_file_num++;
return r;
}
- struct chunkPointer* read_next_n_chunk_pointers(struct backupVersion* b, int n, int *k)
这个函数读入n或者b->number_of_chunks - read_chunk_num个chunk pointer。
注意:int num = (b->number_of_chunks - read_chunk_num) > n ?
n : (b->number_of_chunks - read_chunk_num), i; 这里面的,i表示的是声明i这个变量,和前面的num的赋值无关
/*
* If return value is not NULL, a new file starts.
* If no recipe and chunkpointer are read,
* we arrive at the end of the stream.
*/
struct chunkPointer* read_next_n_chunk_pointers(struct backupVersion* b, int n,
int *k) {
/* Total number of read chunks. */
static int read_chunk_num;
if (read_chunk_num == b->number_of_chunks) {
/* It's the stream end. */
*k = 0;
return NULL;
}
int num = (b->number_of_chunks - read_chunk_num) > n ?
n : (b->number_of_chunks - read_chunk_num), i;
struct chunkPointer *cp = (struct chunkPointer *) malloc(
sizeof(struct chunkPointer) * num);
for (i = 0; i < num; i++) {
fread(&(cp[i].fp), sizeof(fingerprint), 1, b->recipe_fp);
fread(&(cp[i].id), sizeof(containerid), 1, b->recipe_fp);
fread(&(cp[i].size), sizeof(int32_t), 1, b->recipe_fp);
/* Ignore segment boundaries */
if(cp[i].id == 0 - CHUNK_SEGMENT_START || cp[i].id == 0 - CHUNK_SEGMENT_END)
i--;
}
*k = num;
read_chunk_num += num;
assert(read_chunk_num <= b->number_of_chunks);
return cp;
}
- void sync_queue_term(SyncQueue* s_queue)
这个函数是互斥的修改term的值为1(通过上锁的方式实现互斥)
void sync_queue_term(SyncQueue* s_queue) {
if (pthread_mutex_lock(&s_queue->mutex) != 0) {
puts("failed to lock!");
return;
}
s_queue->term = 1;
pthread_cond_broadcast(&s_queue->min_work);
pthread_mutex_unlock(&s_queue->mutex);
}