一.原理介绍
- 原理:将一个文件或者其它对象映射进内存。
1. 使用普通文件提供的内存映射
2. 使用特殊文件提供匿名内存映射
![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/3a9bcf1a66274d58ab742129c4497647.png)
二.核心API介绍
文件映射(mmap)
#include <sys/mman.h>
void *mmap(void *addr, size_t length, int prot, int flags,
int fd, off_t offset);
int munmap(void *addr, size_t length);
参数addr:指向欲映射的内存起始地址,通常设为 NULL,代表让系统自动选
定地址,映射成功后返回该地址。
参数length:代表将文件中多大的部分映射到内存。(页的整数倍,1页=4096=2k)
参数prot:映射区域的保护方式。可以为以下几种方式的组合:
PROT_EXEC 执行
PROT_READ 读取
PROT_WRITE 写入
PROT_NONE 不能存取
参数flags:影响映射区域的各种特性。必须要指定MAP_SHARED 或MAP_PRIVATE。
MAP_SHARED - 映射区域数据与文件对应,允许其他进程共享
MAP_PRIVATE - 映射区域生成文件的copy,修改不同步文件
MAP_ANONYMOUS - 建立匿名映射。此时会忽略参数fd,不涉及文件,而
且映射区域无法和其他进程共享。
MAP_DENYWRITE - 允许对映射区域的写入操作,其他对文件直接写入的
操作将会被拒绝。
MAP_LOCKED - 将映射区域锁定住,这表示该区域不会被置swap(不会被调用磁盘,一直保留再内存中)
参数fd:要映射到内存中的文件描述符。如果使用匿名内存映射时,即flags中设置了MAP_ANONYMOUS,fd设为-1。有些系统不支持匿名内存映射,则可以使用fopen打开/dev/zero文件,然后对该文件进行映射,可以同样达到匿名内存映射的效果。
参数offset:文件映射的偏移量,通常设置为0,代表从文件最前方开始对应,offset必须是分页大小的整数倍。
文件同步(msync)
实现磁盘文件内容于共享内存区中的内容一致,即同步操作。
函数原型
int msync ( void * addr, size_t len, int flags)
头文件
#include<sys/mman.h>
addr:文件映射到进程空间的地址;
len:映射空间的大小;
flags:刷新的参数设置,可以取值MS_ASYNC/ MS_SYNC
其中:
取值为MS_ASYNC(异步)时,调用会立即返回,不等到更新的完成;
*取值为MS_SYNC(同步)时,调用会等到更新完成之后返回;
返回值
成功则返回0;失败则返回-1;
扩大存储(mremap)
扩大(或缩小)现有的内存映射
函数原型
void * mremap(void *old_address, size_t old_size , size_t new_size, int flags);
头文件
#include <unistd.h>
#include <sys/mman.h>
addr: 上一次已映射到进程空间的地址;
old_size: 旧空间的大小;
new_size: 重新映射指定的新空间大小;
flags: 取值可以是0或者MREMAP_MAYMOVE,0代表不允许内核移动映射区域,
MREMAP_MAYMOVE则表示内核可以根据实际情况移动映射区域以找到一个符合
new_size大小要求的内存区域
返回值
成功则返回0;失败则返回-1;
三 .文件映射类实现
1. 头文件
#ifndef QINIU_LARGEFILE_MMAPFILE_H_
#define QINIU_LARGEFILE_MMAPFILE_H_
#include<unistd.h>
#include"common.h"
namespace qiniu
{
namespace largefile
{
struct MMapOption
{
int32_t max_mmap_size_;
int32_t first_mmap_size_;
int32_t per_mmap_size_;
};
class MMapFile
{
public:
MMapFile();
explicit MMapFile(const int fd);
MMapFile(const MMapOption&mmap_option,const int fd);
~MMapFile();
bool sync_file();
bool map_file(const bool write=false);
void*get_data()const ;
int32_t get_size()const;
bool munmap_file();
bool remap_file();
private:
bool ensure_file_size(const int32_t size);
private:
int32_t size_;
int fd_;
void*data_;
struct MMapOption mmap_file_option_;
};
}
}
#endif
2. cpp文件
#include"mmap_file.h"
#define debug 1
namespace qiniu
{
namespace largefile
{
MMapFile::MMapFile():
size_(0),fd_(-1),data_(NULL)
{
this->mmap_file_option_.first_mmap_size_=0;
this->mmap_file_option_.max_mmap_size_=0;
this->mmap_file_option_.per_mmap_size_=0;
}
MMapFile:: MMapFile(const int fd)
{
this->size_=0;
this->fd_=fd;
this->data_=NULL;
this->mmap_file_option_.first_mmap_size_=0;
this->mmap_file_option_.max_mmap_size_=0;
this->mmap_file_option_.per_mmap_size_=0;
}
MMapFile::MMapFile(const MMapOption&mmap_option,const int fd)
{
this->fd_=fd;
this->size_=0;
this->data_=NULL;
this->mmap_file_option_.first_mmap_size_=mmap_option.first_mmap_size_;
this->mmap_file_option_.max_mmap_size_=mmap_option.max_mmap_size_;
this->mmap_file_option_.per_mmap_size_=mmap_option.per_mmap_size_;
}
MMapFile::~MMapFile()
{
if(this->data_)
{
if(debug)
{
printf("mmap file destruct,fd:%d,maped size:%d,data:%p\n",fd_,size_,data_);
}
msync(this->data_,this->size_,MS_SYNC);
munmap(this->data_,this->size_);
this->fd_=-1;
this->size_=0;
this->data_=NULL;
this->mmap_file_option_.first_mmap_size_=0;
this->mmap_file_option_.max_mmap_size_=0;
this->mmap_file_option_.per_mmap_size_=0;
}
}
bool MMapFile::sync_file()
{
if(data_!=NULL&&size_>0)
{
return msync(data_,size_,MS_ASYNC)==0;
}
return true;
}
bool MMapFile::map_file(const bool write)
{
int port=PROT_READ;
if(write)
{
port |=PROT_WRITE;
}
if(fd_<0)
{
return false;
}
if(0==mmap_file_option_.max_mmap_size_)
{
return false;
}
if(size_<mmap_file_option_.max_mmap_size_)
{
size_=mmap_file_option_.first_mmap_size_;
}else
{
size_=mmap_file_option_.max_mmap_size_;
}
if(!ensure_file_size(size_))
{
fprintf(stderr,"ensure file size failed in map_file,size:%d\n",size_);
return false;
}
data_=mmap(0,size_,port,MAP_SHARED,fd_,0);
if(MAP_FAILED==data_)
{
fprintf(stderr,"map file failed:%s",strerror(errno));
size_=0;
fd_=-1;
data_=NULL;
return false;
}
if(debug)
{
printf("mmap file successed,fd:%d maped size:%d,data:%p\n",fd_,size_,data_);
}
return true;
}
void*MMapFile::get_data()const
{
return this->data_;
}
bool MMapFile::munmap_file()
{
if(munmap(data_,size_)==0)
{
return true;
}else
{
return false;
}
return true;
}
bool MMapFile::remap_file()
{
if(fd_<0||data_==NULL)
{
fprintf(stderr,"mremap not mapped yet\n");
return false;
}
if(size_>=mmap_file_option_.max_mmap_size_)
{
fprintf(stderr,"already mapped max size,now size:%d,max size:%d\n",size_,mmap_file_option_.max_mmap_size_);
return false;
}
int32_t new_size=size_+mmap_file_option_.per_mmap_size_;
if(new_size>mmap_file_option_.max_mmap_size_)
{
new_size=mmap_file_option_.max_mmap_size_;
}
if(!ensure_file_size(new_size))
{
fprintf(stderr,"ensure file size failed in remap_file,size:%d\n",new_size);
return false;
}
if(debug)printf("mremap start.fd:%d,now size:%d,old data:%p\n",fd_,new_size,data_);
void*new_map_data=mremap(data_, size_, new_size, MREMAP_MAYMOVE);
if(MAP_FAILED==new_map_data)
{
fprintf(stderr,"mremap failed,fd:%d ,new size:%d,error desc:%s\n",fd_,new_size,strerror(errno));
return false;
}else
{
if(debug)printf("mremap successed,fd:%d ,new size:%d",fd_,new_size);
}
data_=new_map_data;
size_=new_size;
return true;
}
bool MMapFile::ensure_file_size(const int32_t size)
{
struct stat s;
if(fstat(fd_,&s)<0)
{
fprintf(stderr,"fstat error,error desc : %s\n",strerror(errno));
return false;
}
if(s.st_size<size)
{
if(ftruncate(fd_,size))
{
fprintf(stderr,"ftruncate error,size:%d,error desc:%s\n",size,strerror(errno));
return false;
}
}
return true;
}
int32_t MMapFile::get_size()const
{
return this->size_;
}
}
}
3.单元测试
#include"common.h"
#include"mmap_file.h"
using namespace std;
using namespace qiniu;
static const mode_t OPEN_MODE=0644;
const static largefile::MMapOption mmap_option={10240000,4096,4096};
int open_file(string file_name,int open_flags)
{
int fd=open(file_name.c_str(),open_flags,OPEN_MODE);
if(fd<0)
{
return -errno;
}
return fd;
}
int main()
{
const char*filename="./mapfile_test.txt";
int fd = open_file(filename,O_RDWR|O_CREAT|O_LARGEFILE);
if(fd<0)
{
fprintf(stderr,"open file failed filename:%s,error desc:%s\n",filename,strerror(-fd));
return -1;
}
largefile::MMapFile*map_file=new largefile::MMapFile(mmap_option,fd);
bool is_mapped=map_file->map_file(true);
if(is_mapped)
{
map_file->remap_file();
memset(map_file->get_data(),'9',map_file->get_size());
map_file->sync_file();
map_file->munmap_file();
}else
{
fprintf(stderr,"map file faile\n");
}
close(fd);
return 0;
}
4.效果展示
![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/4a6fdbc150734d9e9e715f6ed90ec641.png)
四:文件操作类实现
1.核心API介绍
- strdup(复制字符串)
- char * strdup( const char *s);
strdup()会先用 maolloc()配置与参数 s 字符串相同的空间大小,
然后将参数 s 字符串的内容复制到该内存地址,然后把该地址返回。
该地址最后可以利用 free()来释放。
返回一字符串指针,该指针指向复制后的新字符串地址。若返回
NULL 表示内存不足。
- fstat(由文件描述词取得文件状态)
- int fstat(int fildes,struct stat *buf);
fstat()用来将参数 fildes 所指的文件状态,复制到参数 buf 所指的
结构中(struct stat)。fstat()与 stat()作用完全相同,不同处在
于传入的参数为已打开的文件描述词。详细内容请参考 stat()。
执行成功则返回 0,失败返回-1,错误代码存于 errno。
2.头文件
#ifndef QINIU_LARGE_FILE_OP_H_
#define QINIU_LARGE_FILE_OP_H_
#include"common.h"
namespace qiniu
{
namespace largefile
{
class FileOperation
{
public:
FileOperation(const std::string &file_name,const int open_flags=O_RDWR|O_LARGEFILE);
~FileOperation();
int open_file();
void close_file();
int flush_file();
int unlink_file();
virtual int pread_file(char*buf,const int32_t nbytes,const int64_t offset);
virtual int pwrite_file(const char *buf,const int32_t nbytes,const int64_t offset);
int write_file(const char*buf,const int32_t nbytes);
int64_t get_file_size();
int ftruncate_file(const int64_t length);
int seek_file(const int64_t offset);
int get_fd()const
{
return fd_;
}
protected:
int check_file();
protected:
int fd_;
int open_flags;
char *file_name_;
protected:
static const mode_t OPEN_MODE=0644;
static const int MAX_DISK_TIMES=5;
};
}
}
#endif
3.cpp文件
#include"file_op.h"
#include"common.h"
namespace qiniu
{
namespace largefile
{
FileOperation::FileOperation(const std::string &file_name,const int open_flags):
fd_(-1),open_flags(open_flags)
{
file_name_=strdup(file_name.c_str());
}
FileOperation::~FileOperation()
{
if(fd_>0)
{
::close(fd_);
}
if(NULL!=file_name_)
{
::free(file_name_);
file_name_=NULL;
}
}
int FileOperation::open_file()
{
if(fd_>0)
{
close(fd_);
}
fd_=::open(file_name_,open_flags,OPEN_MODE);
if(fd_<0)
{
return -errno;
}
return fd_;
}
void FileOperation::close_file()
{
if(fd_<0)
{
return ;
}
::close(fd_);
fd_=-1;
}
int64_t FileOperation::get_file_size()
{
int fd=check_file();
if(fd<0)
{
return -1;
}
struct stat statbuf;
if(fstat(fd,&statbuf)!=0)
{
return -1;
}
return statbuf.st_size;
}
int FileOperation::check_file()
{
if(fd_<0)
{
fd_=open_file();
}
return fd_;
}
int FileOperation::ftruncate_file(const int64_t length)
{
int fd=check_file();
if(fd<0)
{
return fd;
}
return ftruncate(fd,length);
}
int FileOperation::seek_file(const int64_t offset)
{
int fd=check_file();
if(fd<0)
{
return fd;
}
return lseek(fd,offset,SEEK_SET);
}
int FileOperation::flush_file()
{
if(open_flags&O_SYNC)
{
return 0;
}
int fd=check_file();
if(fd<0)
{
return fd;
}
return fsync(fd);
}
int FileOperation::unlink_file()
{
close_file();
return ::unlink(file_name_);
}
int FileOperation::pread_file(char*buf,const int32_t nbytes,const int64_t offset)
{
int32_t left=nbytes;
int64_t read_offset=offset;
int32_t read_len=0;
char*p_tmp=buf;
int i=0;
while(left>0)
{
++i;
if(i>=MAX_DISK_TIMES)
{
break;
}
if(check_file()<0)
{
return -errno;
}
read_len=::pread64(fd_,p_tmp,left,read_offset);
if(read_len<0)
{
read_len=-errno;
if(-read_len==EINTR|EAGAIN==-read_len)
{
continue;
}else if(EBADF==-read_len)
{
fd_=-1;
continue;
}else
{
return read_len;
}
}else if(0==read_len)
{
break;
}
left-=read_len;
p_tmp+=read_len;
read_offset+=read_len;
}
if(0!=left)
{
return EXIT_DISK_OPER_INCOMPLETE;
}
return TFS_SUCCESS;
}
int FileOperation::pwrite_file(const char *buf,const int32_t nbytes,const int64_t offset)
{
int32_t left=nbytes;
int64_t write_offset=offset;
int32_t written_len=0;
const char*p_tmp=buf;
int i=0;
while(left>0)
{
++i;
if(i>=MAX_DISK_TIMES)
{
break;
}
if(check_file()<0)
{
return -errno;
}
written_len=::pwrite64(fd_,p_tmp,left,write_offset);
if(written_len<0)
{
written_len=-errno;
if(-written_len==EINTR|EAGAIN==-written_len)
{
continue;
}else if(EBADF==-written_len)
{
fd_=-1;
continue;
}else
{
return written_len;
}
}
left-=written_len;
p_tmp+=written_len;
write_offset+=written_len;
}
if(0!=left)
{
return EXIT_DISK_OPER_INCOMPLETE;
}
return TFS_SUCCESS;
}
int FileOperation::write_file(const char*buf,const int32_t nbytes)
{
int32_t left=nbytes;
int32_t written_len=0;
const char*p_tmp=buf;
int i=0;
while(left>0)
{
++i;
if(i>=MAX_DISK_TIMES)
{
break;
}
if(check_file()<0)
{
return -errno;
}
written_len = write (fd_,p_tmp,left);
if(written_len<0)
{
written_len=-errno;
if(-written_len==EINTR|EAGAIN==-written_len)
{
continue;
}else if(EBADF==-written_len)
{
fd_=-1;
continue;
}else
{
return written_len;
}
}
left-=written_len;
p_tmp+=written_len;
}
if(0!=left)
{
return EXIT_DISK_OPER_INCOMPLETE;
}
return TFS_SUCCESS;
}
}
}
4.单元测试
#include"file_op.h"
#include"common.h"
using namespace std;
using namespace qiniu;
int main(void)
{
const char *filename="file_op.txt";
largefile::FileOperation *fileOP=new largefile::FileOperation(filename,O_CREAT|O_RDWR|O_LARGEFILE);
int fd = fileOP->open_file();
if(fd<0)
{
fprintf(stderr,"open file %s failed. reason:%s\n",filename,strerror(-fd));
exit(-1);
}
char buffer[65];
memset(buffer,'8',64);
int ret = fileOP->pwrite_file(buffer,64,1024);
if(ret<0)
{
fprintf(stderr,"pwirte file %s failed. reason:%s\n",filename,strerror(-ret));
}
memset(buffer,0,64);
ret=fileOP->pread_file(buffer,64,1024);
if(ret<0)
{
fprintf(stderr,"pread file %s failed. reason:%s\n",filename,strerror(-ret));
}else
{
buffer[64]='\0';
printf("read:%s\n",buffer);
}
memset(buffer,'9',64);
ret = fileOP->write_file(buffer,64);
if(ret<0)
{
fprintf(stderr,"write file %s failed. reason:%s\n",filename,strerror(-ret));
}
fileOP->close_file();
return 0;
}
5.效果展示
![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/1d7738c989334dc395d11d15dd9ea829.png)
五:文件映射操作类实现
- 这是集中上面几种类的一个组合,继承自文件操作类,成员函数是文件映射类,这个类集中了具体的功能实现
1.头文件
#ifndef QINIU_LARGE_FILE_MMAPFILE_OP_H_
#define QINIU_LARGE_FILE_MMAPFILE_OP_H_
#include"common.h"
#include"file_op.h"
#include"mmap_file.h"
namespace qiniu
{
namespace largefile
{
class MMapFileOperation:public FileOperation
{
public:
MMapFileOperation(const std::string&file_name,const int open_flags=O_CREAT|O_RDWR|O_LARGEFILE):
FileOperation(file_name,open_flags),map_file_(NULL),is_mapped_(false)
{
}
~MMapFileOperation()
{
if(map_file_)
{
delete(map_file_);
map_file_=NULL;
}
}
int mmap_file(const MMapOption&mmap_option);
int munmap_file();
int pread_file(char*buf,const int32_t size,const int64_t offset);
int pwrite_file(const char *buf,const int32_t size,const int64_t offset);
void *get_map_data()const;
int flush_file();
private:
MMapFile*map_file_;
bool is_mapped_;
};
}
}
#endif
2.cpp文件
#include"mmap_file_op.h"
#include"common.h"
static int debug =1;
namespace qiniu
{
namespace largefile
{
int MMapFileOperation:: mmap_file(const MMapOption&mmap_option)
{
if(mmap_option.max_mmap_size_<mmap_option.first_mmap_size_)
{
return TFS_ERROR;
}
if(mmap_option.max_mmap_size_<=0)
{
return TFS_ERROR;
}
int fd=check_file();
if(fd<0)
{
fprintf(stderr,"MMapFileOperation::mmap_file-checking file failed!");
return TFS_ERROR;
}
if(!is_mapped_)
{
if(map_file_)
{
delete(map_file_);
}
map_file_=new MMapFile(mmap_option,fd);
is_mapped_ = map_file_->map_file(true);
}
if(is_mapped_)
{
return TFS_SUCCESS;
}else
{
return TFS_ERROR;
}
return TFS_SUCCESS;
}
int MMapFileOperation::munmap_file()
{
if(is_mapped_&&map_file_!=NULL)
{
delete(map_file_);
is_mapped_=false;
}
return TFS_SUCCESS;
}
void *MMapFileOperation::get_map_data()const
{
if(is_mapped_)
{
return map_file_->get_data();
}
return NULL;
}
int MMapFileOperation::pread_file(char*buf,const int32_t size,const int64_t offset)
{
if(is_mapped_&&(offset+size)>map_file_->get_size())
{
if(debug)
{
fprintf(stdout, "MMapFileOperation pread, size:%d, offset:%"__PRI64_PREFIX"d, map file size:%d, need remap\n", size, offset, map_file_->get_size());
map_file_->remap_file();
}
}
if(is_mapped_&&(offset+size)<=map_file_->get_size())
{
memcpy(buf,(char*)map_file_->get_data()+offset,size);
return TFS_SUCCESS;
}
return FileOperation::pread_file(buf,size,offset);
}
int MMapFileOperation::pwrite_file(const char *buf,const int32_t size,const int64_t offset)
{
if(is_mapped_&&(offset+size)>map_file_->get_size())
{
if(debug)
{
fprintf(stdout,"MMapFileOperation pwrite,size:%d,offset:%"__PRI64_PREFIX"d,map file size:%d,need remap\n",size,offset,map_file_->get_size());
map_file_->remap_file();
}
}
if(is_mapped_&&(offset+size)<=map_file_->get_size())
{
memcpy((char*)map_file_->get_data()+offset,buf,size);
return TFS_SUCCESS;
}
return FileOperation::pwrite_file(buf,size,offset);
}
int MMapFileOperation::flush_file()
{
if(is_mapped_)
{
if(map_file_->sync_file())
{
return TFS_SUCCESS;
}else
{
return TFS_ERROR;
}
}
return FileOperation::flush_file();
}
}
}
3.单元测试
#include"mmap_file_op.h"
using namespace std;
using namespace qiniu;
const static largefile::MMapOption mmap_option={10240000,4096,4096};
int main(void)
{
int ret=0;
const char*filename = "mmap_file_op.txt";
largefile::MMapFileOperation *mmfo=new largefile::MMapFileOperation(filename);
int fd=mmfo->open_file();
if(fd<0)
{
fprintf(stderr,"open file %s failed. reason:%s\n",filename,strerror(-fd));
exit(-1);
}
ret=mmfo->mmap_file(mmap_option);
if(ret==largefile:: TFS_ERROR)
{
fprintf(stderr,"mmap_file failed. reason:%s\n",strerror(errno));
mmfo->close_file();
exit(-2);
}
char buffer[128+1];
memset(buffer,'6',128);
ret = mmfo ->pwrite_file(buffer,128,8);
memset(buffer,0,128);
mmfo->pread_file(buffer,128,8);
buffer[128]='\0';
printf("read:%s\n",buffer);
ret= mmfo->flush_file();
if(ret==largefile::TFS_ERROR)
{
fprintf(stderr,"flush file failed. reason:%s\n",strerror(errno));
}
ret = mmfo ->pwrite_file(buffer,128,4000);
mmfo->close_file();
return 0;
}
- 测试的效果和上面本质没有差别,这次这个是整合了两个类。
六:索引文件类实现
1.索引文件核心头文件(BlockInfo 块信息)
struct BlockInfo
{
uint32_t block_id_;
int32_t version_;
int32_t file_count_;
int32_t size_;
int32_t del_file_count_;
int32_t del_size_;
uint32_t seq_no_;
BlockInfo()
{
memset(this,0,sizeof(BlockInfo));
}
inline bool operator==(const BlockInfo&rhs)const
{
return block_id_==rhs.block_id_&&version_==rhs.version_&&file_count_==rhs.file_count_
&&size_==rhs.size_&&del_file_count_==rhs.del_file_count_&&del_size_==rhs.del_size_
&&seq_no_==rhs.seq_no_;
}
};
2.索引文件核心头文件(MetaInfo 索引文件信息)
struct MetaInfo
{
public:
MetaInfo()
{
init();
}
MetaInfo(const uint64_t file_id,const int32_t in_offset,const int32_t file_size,const int32_t next_meta_offset_)
{
fileid_=file_id;
location_.inner_offset_=in_offset;
location_.size_=file_size;
this->next_meta_offset_=next_meta_offset_;
}
MetaInfo(const MetaInfo&meta_info)
{
memcpy(this,&meta_info,sizeof(MetaInfo));
}
MetaInfo&operator=(const MetaInfo&meta_info)
{
if(this==&meta_info)
{
return *this;
}
fileid_=meta_info.fileid_;
location_.inner_offset_=meta_info.location_.inner_offset_;
location_.size_=meta_info.location_.size_;
next_meta_offset_=meta_info.next_meta_offset_;
}
MetaInfo&clone(const MetaInfo&meta_info)
{
assert(this!=&meta_info);
fileid_=meta_info.fileid_;
location_.inner_offset_=meta_info.location_.inner_offset_;
location_.size_=meta_info.location_.size_;
next_meta_offset_=meta_info.next_meta_offset_;
return *this;
}
bool operator==(const MetaInfo&rhs)const
{
return fileid_==rhs.fileid_&&location_.inner_offset_==rhs.location_.inner_offset_&&
location_.size_==rhs.location_.size_&&next_meta_offset_==rhs.next_meta_offset_;
}
uint64_t get_key()const
{
return fileid_;
}
void set_key(const uint64_t key)
{
fileid_=key;
}
uint64_t get_file_id()const
{
return fileid_;
}
void set_file_id(const uint64_t file_id)
{
fileid_=file_id;
}
int32_t get_offset()const
{
return location_.inner_offset_;
}
void set_offset(const int32_t offset)
{
location_.inner_offset_=offset;
}
int32_t get_size()const
{
return location_.size_;
}
void set_size(const int32_t file_size)
{
location_.size_=file_size;
}
int32_t get_next_meta_offset()const
{
return next_meta_offset_;
}
void set_next_meta_offset(const int32_t offset)
{
next_meta_offset_=offset;
}
private:
uint64_t fileid_;
struct
{
int32_t inner_offset_;
int32_t size_;
} location_;
int32_t next_meta_offset_;
private:
void init()
{
fileid_=0;
location_.inner_offset_=0;
location_.size_=0;
next_meta_offset_=0;
}
};
3.索引处理类
#ifndef QINUI_LARGE_INDEX_HANDLE_H_
#define QINUI_LARGE_INDEX_HANDLE_H_
#include"common.h"
#include"mmap_file_op.h"
namespace qiniu
{
namespace largefile
{
struct IndexHander
{
public:
IndexHander()
{
memset(this,0,sizeof(IndexHander));
}
BlockInfo block_info_;
int32_t bucket_size;
int32_t data_file_offset_;
int32_t index_file_size_;
int32_t free_head_offset;
};
class IndexHandle
{
public:
IndexHandle(const std::string&base_path,const uint32_t main_block_id);
~IndexHandle();
int create(const int32_t logic_block_id,const int32_t bucket_size,const MMapOption map_option);
int load(const int32_t logic_block_id,const int32_t bucket_size,const MMapOption map_option);
IndexHandle*index_header()
{
return reinterpret_cast<IndexHandle*> (file_op_->get_map_data());
}
BlockInfo*block_info()
{
return reinterpret_cast<BlockInfo*>(file_op_->get_map_data());
}
int32_t bucket_size()const
{
return reinterpret_cast<IndexHander*> (file_op_->get_map_data())->bucket_size;
}
private:
MMapFileOperation*file_op_;
bool is_load_;
};
}
}
#include"common.h"
#include"index_handle.h"
#include<sstream>
static int debug = 1;
namespace qiniu
{
namespace largefile
{
IndexHandle::IndexHandle(const std::string&base_path,const uint32_t main_block_id)
{
std::stringstream tmp_stream;
tmp_stream<<base_path<<INDEX_DIR_PREFIX<<main_block_id;
std::string index_path;
tmp_stream>>index_path;
file_op_=new MMapFileOperation(index_path,O_CREAT|O_RDWR|O_LARGEFILE);
is_load_=false;
}
IndexHandle::~IndexHandle()
{
if(file_op_)
{
delete file_op_;
file_op_=NULL;
}
}
int IndexHandle::create(const int32_t logic_block_id,const int32_t bucket_size,const MMapOption map_option)
{
int ret=TFS_SUCCESS;
if(debug) printf("create index,block id:%u,bucket size:%d,max_mmap_size:%d,first mmap size:%d, per mmap size:%d\n",
logic_block_id,bucket_size,map_option.max_mmap_size_,map_option.first_mmap_size_,map_option.per_mmap_size_);
if(is_load_)
{
return EXIT_INDEX_ALREADY_LOADED_ERROR;
}
int64_t file_size=file_op_->get_file_size();
if(file_size<0)
{
return TFS_ERROR;
}else if(file_size==0)
{
IndexHander i_header;
i_header.block_info_.block_id_=logic_block_id;
i_header.block_info_.seq_no_=1;
i_header.bucket_size=bucket_size;
i_header.index_file_size_=sizeof(IndexHander)+bucket_size*sizeof(int32_t);
char*init_data=new char[i_header.index_file_size_];
memcpy(init_data,&i_header,sizeof(IndexHander));
memset(init_data+sizeof(IndexHander),0,i_header.index_file_size_-sizeof(IndexHander));
ret=file_op_->pwrite_file(init_data,i_header.index_file_size_,0);
delete[] init_data;
init_data=NULL;
if(ret!=(TFS_SUCCESS))
{
return ret;
}
ret=file_op_->flush_file();
if(ret!=(TFS_SUCCESS))
{
return ret;
}
}else
{
return EXIT_META_UNEXPECT_FOUND_ERROR;
}
ret=file_op_->mmap_file(map_option);
if(ret!=TFS_SUCCESS)
{
return ret;
}
is_load_=true;
return TFS_SUCCESS;
}
int IndexHandle::load(const int32_t logic_block_id,const int32_t _bucket_size,const MMapOption map_option)
{
int ret=TFS_SUCCESS;
if(is_load_)
{
return EXIT_INDEX_ALREADY_LOADED_ERROR;
}
int64_t file_size=file_op_->get_file_size();
if(file_size<0)
{
return file_size;
}else if(file_size==0)
{
return EXIT_INDEX_CORRUPT_ERROR;
}
MMapOption tmp_map_option=map_option;
if(file_size>tmp_map_option.first_mmap_size_&&file_size<=tmp_map_option.max_mmap_size_)
{
tmp_map_option.first_mmap_size_=file_size;
}
ret = file_op_->mmap_file(tmp_map_option);
if(ret!=TFS_SUCCESS)
{
return ret;
}
if(0==bucket_size()||0==block_info()->block_id_)
{
fprintf(stderr,"index corrupt error.block id:%u,bucket size:%d\n",block_info()->block_id_,bucket_size());
return EXIT_INDEX_CORRUPT_ERROR;
}
int32_t index_file_size=sizeof(IndexHander)+bucket_size()*sizeof(int32_t);
if(file_size<index_file_size)
{
fprintf(stderr,"index corrupt error,block id:%u,bucket size:%d,file size:%ld,index file size:%d\n",block_info()->block_id_,bucket_size(),file_size,index_file_size);
return EXIT_INDEX_CORRUPT_ERROR;
}
if(logic_block_id!=block_info()->block_id_)
{
fprintf(stderr,"block id conflict. blockid:%u,index blockid:%u\n",logic_block_id,block_info()->block_id_);
return EXIT_BLOCKID_CONFLICT_ERROR;
}
if(_bucket_size!=bucket_size())
{
return EXIT_BUCKET_CONFIGURE_ERROR;
}
is_load_=true;
return TFS_SUCCESS;
}
}
}
4.单元测试
- 淘宝文件系统核心的原理就是再内存中申请一个大块内存,然后通过将文件映射到内存中,一个大内存块可以划分成很多的小块进行管理,每个小块就是一个文件,每个文件一一对应一个索引文件,索引文件保存着主块的相关信息。而索引文件是通过哈希表进行存储的,可以进行快速查找。
#include"common.h"
#include"file_op.h"
#include"index_handle.h"
#include<sstream>
using namespace std;
using namespace qiniu;
const static largefile::MMapOption mmap_option={1024000,4096,4096};
const static uint32_t main_blocksize=1024*1024*64;
const static uint32_t bucket_size=1000;
static int32_t block_id_=1;
static int debug =1;
int main(int argc,char**argv)
{
int32_t ret=largefile::TFS_SUCCESS;
std::string mainblock_path;
std::string index_path;
cout<<"Type your bockid:"<<endl;
cin>>block_id_;
if(block_id_<1)
{
cerr<<"Invalid bockid,exit."<<endl;
exit(-1);
}
std::stringstream tmp_stream;
tmp_stream<<"."<<largefile::MAINBLOCK_DIR_PREFIX<<block_id_;
tmp_stream>>mainblock_path;
largefile::FileOperation*mainblock=new largefile::FileOperation(mainblock_path,O_RDWR|O_LARGEFILE|O_CREAT);
ret=mainblock->ftruncate_file(main_blocksize);
if(ret!=0)
{
fprintf(stderr,"create main block %s failed.reason: %s \n",mainblock_path.c_str(),strerror(errno));
delete mainblock;
exit(-2);
}
largefile::IndexHandle*index_handle=new largefile::IndexHandle(".",block_id_);
if(debug)
{
printf("Init index ....\n");
}
ret=index_handle->create(block_id_,bucket_size,mmap_option);
if(ret!=largefile::TFS_SUCCESS)
{
fprintf(stderr,"create index %d failed.\n",block_id_);
delete mainblock;
delete index_handle;
exit(-1);
}
return 0;
}
5.效果展示
![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/9310d2f906064395ac10c50dcf09d8ed.png)
七:脚本测试
1.块写入测试
#include"common.h"
#include"file_op.h"
#include"index_handle.h"
#include<sstream>
using namespace std;
using namespace qiniu;
const static largefile::MMapOption mmap_option={1024000,4096,4096};
const static uint32_t main_blocksize=1024*1024*64;
const static uint32_t bucket_size=1000;
static int32_t block_id_=1;
static int debug =1;
int main(int argc,char**argv)
{
int32_t ret=largefile::TFS_SUCCESS;
std::string mainblock_path;
std::string index_path;
cout<<"Type your bockid:"<<endl;
cin>>block_id_;
if(block_id_<1)
{
cerr<<"Invalid bockid,exit."<<endl;
exit(-1);
}
largefile::IndexHandle*index_handle=new largefile::IndexHandle(".",block_id_);
if(debug)
{
printf("load index ....\n");
}
ret=index_handle->load(block_id_,bucket_size,mmap_option);
if(ret!=largefile::TFS_SUCCESS)
{
fprintf(stderr,"load index %d failed.\n",block_id_);
delete index_handle;
exit(-2);
}
std::stringstream tmp_stream;
tmp_stream<<"."<<largefile::MAINBLOCK_DIR_PREFIX<<block_id_;
tmp_stream>>mainblock_path;
largefile::FileOperation*mainblock=new largefile::FileOperation(mainblock_path,O_RDWR|O_LARGEFILE|O_CREAT);
char buffer[4096];
memset(buffer,'6',sizeof(buffer));
int32_t data_offset=index_handle->get_block_data_offset();
uint32_t file_no=index_handle->block_info()->seq_no_;
if(ret=mainblock->pwrite_file(buffer,sizeof(buffer),data_offset)!=largefile::TFS_SUCCESS)
{
fprintf(stderr,"write to main block failed.ret:%d,reason:%s\n",ret,strerror(errno));
mainblock->close_file();
delete mainblock;
delete index_handle;
exit(-3);
}
largefile::MetaInfo meta;
meta.set_file_id(file_no);
meta.set_offset(data_offset);
meta.set_size(sizeof(buffer));
ret = index_handle->write_segment_meta(meta.get_key(), meta);
if(ret==largefile::TFS_SUCCESS)
{
index_handle->commit_block_data_offset(sizeof(buffer));
index_handle->update_block_info(largefile:: C_OPER_INSERT, sizeof(buffer));
ret = index_handle->flush();
if(ret!=largefile::TFS_SUCCESS)
{
fprintf(stderr, "flush mainblock %d failed. file no:%u\n", block_id_, file_no);
}
}else
{
fprintf(stderr, "write_segement_meta mainblock %d failed. file no:%u\n", block_id_, file_no);
}
if(ret!=largefile::TFS_SUCCESS)
{
fprintf(stderr, "write to mainblock %d failed. file no:%u\n", block_id_, file_no);
}else
{
if(debug)
{
printf("write successfully.file no:%u,block_id:%d\n", file_no, block_id_);
}
}
mainblock->close_file();
delete mainblock;
delete index_handle;
return 0;
}
- 效果展示
![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/1ab06498caa64b41a3164c2061bd2b98.png)
2.测试加载功能
#include "common.h"
#include "file_op.h"
#include "index_handle.h"
#include <sstream>
using namespace std;
using namespace qiniu;
const static largefile::MMapOption mmap_option = {1024000, 4096, 4096};
const static uint32_t main_blocksize = 1024 * 1024 * 64;
const static uint32_t bucket_size = 1000;
static int32_t block_id_ = 1;
static int debug = 1;
int main(int argc, char **argv)
{
int32_t ret = largefile::TFS_SUCCESS;
std::string mainblock_path;
std::string index_path;
cout << "Type your bockid:" << endl;
cin >> block_id_;
if (block_id_ < 1)
{
cerr << "Invalid bockid,exit." << endl;
exit(-1);
}
largefile::IndexHandle *index_handle = new largefile::IndexHandle(".", block_id_);
if (debug)
{
printf("load index ....\n");
}
ret = index_handle->load(block_id_, bucket_size, mmap_option);
if (ret != largefile::TFS_SUCCESS)
{
fprintf(stderr, "load index %d failed.\n", block_id_);
delete index_handle;
exit(-2);
}
delete index_handle;
return 0;
}
- 效果展示
![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/e58eba38c28c4cb994e611207d1efd96.png)
3.读取内存中的块
#include "common.h"
#include "file_op.h"
#include "index_handle.h"
#include <sstream>
using namespace std;
using namespace qiniu;
const static largefile::MMapOption mmap_option = {1024000, 4096, 4096};
const static uint32_t main_blocksize = 1024 * 1024 * 64;
const static uint32_t bucket_size = 1000;
static int32_t block_id_ = 1;
static int debug = 1;
int main(int argc, char **argv)
{
int32_t ret = largefile::TFS_SUCCESS;
std::string mainblock_path;
std::string index_path;
cout << "Type your bockid:" << endl;
cin >> block_id_;
if (block_id_ < 1)
{
cerr << "Invalid bockid,exit." << endl;
exit(-1);
}
largefile::IndexHandle *index_handle = new largefile::IndexHandle(".", block_id_);
if (debug)
{
printf("load index ....\n");
}
ret = index_handle->load(block_id_, bucket_size, mmap_option);
if (ret != largefile::TFS_SUCCESS)
{
fprintf(stderr, "load index %d failed.\n", block_id_);
delete index_handle;
exit(-2);
}
uint64_t file_id = 0;
cout << "Type your file_id:" << endl;
cin >> file_id;
if(file_id<1)
{
cerr << "Invalid fileid,exit." << endl;
exit(-2);
}
largefile:: MetaInfo meta;
ret = index_handle->read_segment_meta(file_id, meta);
if(ret!=largefile::TFS_SUCCESS)
{
fprintf(stderr, "read_segment_meta error. file_id:%lu,ret:%d\n", file_id, ret);
exit(-3);
}
std::stringstream tmp_stream;
tmp_stream << "." << largefile::MAINBLOCK_DIR_PREFIX << block_id_;
tmp_stream >> mainblock_path;
largefile::FileOperation *mainblock = new largefile::FileOperation(mainblock_path, O_RDWR );
char *buffer=new char[meta.get_size()+1];
ret = mainblock->pread_file(buffer, meta.get_size(),meta.get_offset());
if(ret!=largefile::TFS_SUCCESS)
{
fprintf(stderr, "read to main block failed.ret:%d,reason:%s\n", ret, strerror(errno));
mainblock->close_file();
delete mainblock;
delete index_handle;
exit(-3);
}
buffer[meta.get_size()] = '\0';
printf("read: %s\n", buffer);
mainblock->close_file();
delete mainblock;
delete index_handle;
return 0;
}
- 效果展示
![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/c94d56b9cfea4cc5a81b14c155d22217.png)
4.块删除测试
#include "common.h"
#include "file_op.h"
#include "index_handle.h"
#include <sstream>
using namespace std;
using namespace qiniu;
const static largefile::MMapOption mmap_option = {1024000, 4096, 4096};
const static uint32_t main_blocksize = 1024 * 1024 * 64;
const static uint32_t bucket_size = 1000;
static int32_t block_id_ = 1;
static int debug = 1;
int main(int argc, char **argv)
{
int32_t ret = largefile::TFS_SUCCESS;
std::string mainblock_path;
std::string index_path;
cout << "Type your bockid:" << endl;
cin >> block_id_;
if (block_id_ < 1)
{
cerr << "Invalid bockid,exit." << endl;
exit(-1);
}
largefile::IndexHandle *index_handle = new largefile::IndexHandle(".", block_id_);
if (debug)
{
printf("load index ....\n");
}
ret = index_handle->load(block_id_, bucket_size, mmap_option);
if (ret != largefile::TFS_SUCCESS)
{
fprintf(stderr, "load index %d failed.\n", block_id_);
delete index_handle;
exit(-2);
}
uint64_t file_id = 0;
cout << "Type your file_id:" << endl;
cin >> file_id;
if (file_id < 1)
{
cerr << "Invalid fileid,exit." << endl;
exit(-2);
}
ret = index_handle->delete_segment_meta(file_id);
if(ret!=largefile::TFS_SUCCESS)
{
fprintf(stderr, "delete index failed. file_id:%lu,ret:%d\n", file_id, ret);
}
printf("delete successfully!\n");
ret = index_handle->flush();
if (ret != largefile::TFS_SUCCESS)
{
fprintf(stderr, "flush mainblock %d failed. file no:%lu\n", block_id_, file_id);
exit(-4);
}
delete index_handle;
return 0;
}
八:可重用结点设置
int32_t IndexHandle:: delete_segment_meta(const uint64_t key)
{
int32_t current_offset = 0, previous_offset = 0;
int ret = hash_find(key, current_offset, previous_offset);
if(ret!=largefile::TFS_SUCCESS)
{
return ret;
}
MetaInfo meta_info;
ret = file_op_->pread_file(reinterpret_cast<char *>(&meta_info), sizeof(MetaInfo), current_offset);
if(largefile:: TFS_SUCCESS!=ret)
{
return ret;
}
int32_t next_pos = meta_info.get_next_meta_offset();
if(previous_offset==0)
{
int32_t slot = static_cast<uint32_t>(key) % bucket_size();
bucket_slot()[slot] = next_pos;
}else
{
MetaInfo pre_meta_info;
ret = file_op_->pread_file(reinterpret_cast<char *>(&pre_meta_info), sizeof(MetaInfo), previous_offset);
if (largefile::TFS_SUCCESS != ret)
{
return ret;
}
pre_meta_info.set_next_meta_offset(next_pos);
ret = file_op_->pwrite_file(reinterpret_cast<char *>(&pre_meta_info), sizeof(MetaInfo), previous_offset);
if (largefile::TFS_SUCCESS != ret)
{
return ret;
}
}
meta_info.set_next_meta_offset(free_head_offset());
ret = file_op_->pwrite_file(reinterpret_cast<char *>(&meta_info), sizeof(MetaInfo), current_offset);
if (largefile::TFS_SUCCESS != ret)
{
return ret;
}
index_header()->free_head_offset = current_offset;
update_block_info(largefile::C_OPER_DELETE, meta_info.get_size());
return largefile::TFS_SUCCESS;
}
int32_t IndexHandle::hash_insert(const uint64_t key, int32_t previous_offset, MetaInfo &meta)
{
int ret = TFS_SUCCESS;
MetaInfo tmp_meta_info;
int32_t current_offset = 0;
int32_t slot = static_cast<uint32_t>(key) % bucket_size();
if(free_head_offset()!=0)
{
ret = file_op_->pread_file(reinterpret_cast<char *>(&tmp_meta_info), sizeof(MetaInfo), free_head_offset());
if(TFS_SUCCESS!=ret)
{
return ret;
}
current_offset = index_header()->free_head_offset;
index_header()->free_head_offset = tmp_meta_info.get_next_meta_offset();
}else
{
current_offset = index_header()->index_file_size_;
index_header()->index_file_size_ += sizeof(MetaInfo);
}
meta.set_next_meta_offset(0);
ret = file_op_->pwrite_file(reinterpret_cast<const char *>(&meta), sizeof(MetaInfo), current_offset);
if(TFS_SUCCESS!=ret)
{
index_header()->index_file_size_ -= sizeof(MetaInfo);
return ret;
}
if(0!=previous_offset)
{
ret = file_op_->pread_file(reinterpret_cast<char *>(&meta),sizeof(MetaInfo),previous_offset);
if (TFS_SUCCESS != ret)
{
index_header()->index_file_size_ -= sizeof(MetaInfo);
return ret;
}
tmp_meta_info.set_next_meta_offset(current_offset);
ret = file_op_->pread_file(reinterpret_cast<char *>(&meta), sizeof(MetaInfo), previous_offset);
if (TFS_SUCCESS != ret)
{
index_header()->index_file_size_ -= sizeof(MetaInfo);
return ret;
}
}else
{
bucket_slot()[slot] = current_offset;
}
return TFS_SUCCESS;
}