leveldb-expand接口设计
项目地址:https://github.com/google/leveldb
接口设计
- 类型定义
// 记录类型
typedef std::pair<std::string, std::string> RecordType;
- 前缀匹配:
virtual Status GetByPrefix( const Slice& prefix,
std::vector<RecordType>& entry_list,
int n = 0 );
接口名
:GetByPrefix
接口功能
:根据prefix指定的前缀,获取n条,匹配该前缀的记录。
参数说明
:
- prefix:传入参数,指定要匹配的前缀。
- entry_list:传出参数,存储匹配该前缀的n条记录。
- n:传入参数,指定要匹配的记录条数。默认值为0,失效。表示匹配所有符合前缀的记录
返回值
:返回Status对象
- 条件匹配:
virtual Status GetByCondition( const Slice& prefix,
const Slice& key_filter,
const Slice& value_filter,
std::vector<RecordType>& entry_list,
int n = 0);
接口名
:GetByCondition
接口功能
:根据key和value的过滤条件,以及前缀,匹配符合以上条件的记录。
参数说明
:
- prefix:传入参数,指定要匹配的前缀。如果传入空串,则该条件失效。
- key_filter:传入参数,指定key的过滤条件。如果传入空串,则该条件失效。
- value_filter:传入参数,指定value的过滤条件。如果传入空串,则该条件失效。
- entry_list:传出参数,存储匹配条件的记录。
- n:传入参数,指定要匹配的记录条数。默认值为0,失效。表示匹配所有符合前缀的记录
返回值
:返回Status对象
- 范围匹配
virtual Status GetByRange( const Slice& key,
int n,
std::vector<RecordType>& entry_list);
接口名
:GetByRange
接口功能
:以给定的key为起点,获取之后的n条记录。
参数说明
:
- key:传入参数,指定起点。如果DB中key不存在,则以比key大的第一个元素为起点。
- n:传入参数,指定获取的元素个数。获取的n条记录包含key本身。
- entry_list:传出参数,存储获取的记录。
返回值
:返回Status对象
测试程序
- 前缀匹配
使用场景:当DB中key由多个feature构成时,eg: key = uid__::__timestamp,按照前缀进行匹配,获取存储记录。
key | value |
---|---|
5205266179__::__20170304 | 李易峰 |
2938729192__::__20170304 | 李易峰 |
5205266179__::__20171204 | 杨幂 |
9287192817__::__20170304 | 李易峰 |
5205266179__::__20170804 | 杜兰特 |
#include <cassert>
#include <string>
#include <vector>
#include "leveldb/db.h"
int main(void){
leveldb::DB* db;
leveldb::Options options;
options.create_if_missing = true;
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db);
assert(status.ok());
leveldb::Slice prefix = "5205266179";
std::vector<RecordType> entry_list;
status = leveldb::DB::GetByPrefix( prefix, entry_list ); // 默认形式,获取符合该前缀的所有记录。
// assess the entry_list
// code remain to finish
leveldb::Slice prefix = "2938729192";
std::vector<RecordType> entry_list1;
int n = 10;
status = leveldb::DB::GetByPrefix( prefix, entry_list1, n );// 指定要匹配的记录条数
// assess the entry_list1
// code remain to finish
return 0;
}
- 条件匹配:
使用场景:当DB中key和value由多个feature构成时,eg:
key = uid__pv__timestamp, value = query__timestamp.可以按照key的前缀条件和过滤条件,筛选符合条件的key,同时按照value的过滤条件,对value进行过滤。获取符合以上条件的记录。
key | value |
---|---|
5205266179__pv1__20170304 | 李易峰_uv1 |
5205266179__pv1__20171204 | 杨幂_uv1 |
5205266179__pv1__20171205 | 杜兰特_uv1 |
3656728192__pv2__20170305 | 李易峰_uv2 |
2938271928__pv3__20170804 | 李易峰_uv3 |
#include <cassert>
#include <string>
#include <vector>
#include "leveldb/db.h"
int main(void){
leveldb::DB* db;
leveldb::Options options;
options.create_if_missing = true;
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db);
assert(status.ok());
leveldb::Slice prefix = ""; // 空串,代表前缀失效
leveldb::Slice key_filter = "201712";
leveldb::Slice value_filter = "李易峰";
std::vector<RecordType> entry_list;
status = leveldb::DB::GetByCondition( prefix, key_filter, value_filter, entry_list);
// assess the entry_list
// code remain to finish
// 给出前缀情形
leveldb::Slice prefix = "5205266179__pv1";
key_filter = "201712";
value_filter = "李易峰";
int n = 10;
std::vector<RecordType> entry_list1;
status = leveldb::DB::GetByCondition( prefix, key_filter, value_filter, entry_list1, 10 );
// assess the entry_list
// code remain to finish
return 0;
}
- 范围匹配
使用场景:当需要在某个范围内查找的时候,给定范围的起点,以及个数,获取这个范围内的记录。
key | value |
---|---|
5205266179__::__20170304 | 李易峰 |
2938729192__::__20170304 | 李易峰 |
5205266179__::__20171204 | 杨幂 |
9287192817__::__20170304 | 李易峰 |
5205266179__::__20170804 | 杜兰特 |
#include <cassert>
#include <string>
#include <vector>
#include "leveldb/db.h"
int main(void){
leveldb::DB* db;
leveldb::Options options;
options.create_if_missing = true;
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db);
assert(status.ok());
const leveldb::Slice key = 5205266179__::__20170304;
int n = 10;
std::vector<RecordType> entry_list;
statys = leveldb::DB::GetByRange(key, n, entry_list);
// assess the entry_list
// code remain to finish
return 0;
}
源码修改
- /include/leveldb/db.h
在db.h当中增加GetBatch外部接口。
// Author: kang
// Mail: likang@tju.edu.cn
virtual Status GetBatch(const ReadOptions& options,
const Slice& key,
std::vector<RecordType>& record_list) = 0;
- /include/leveldb/record_type.h
在/include/leveldb/目录下增加头文件record_type.h,包括记录类型以及回调状态。
/*************************************************************************
> File Name: record_type.h
> Author: Kang
> Mail:likang@tju.edu.cn
************************************************************************/
#ifndef STORAGE_LEVELDB_INCLUDE_RECORD_TYPE_H_
#define STORAGE_LEVELDB_INCLUDE_RECORD_TYPE_H_
#include <utility>
#include <string>
#include <vector>
namespace leveldb {
typedef std::pair<std::string, std::string> RecordType;
enum SaverState {
kNotFound,
kFound,
kDeleted,
kCorrupt,
};
struct Saver {
SaverState state;
const Comparator* ucmp;
Slice user_key;
std::string* value;
};
struct SaverBatch {
SaverState state;
const Comparator* ucmp;
Slice user_key;
std::vector<RecordType>* precord_list;
};
} // leveldb
#endif // STORAGE_LEVELDB_INCLUDE_RECORD_TYPE_H_
- /db/db_impl.h
在/db/db_impl.h当中增加GetBatch外部接口。
// Author: kang
// Mail: likang@tju.edu.cn
virtual Status GetBatch(const ReadOptions& options,
const Slice& key,
std::vector<RecordType>& record_list);
- /db/db_impl.cc
在/db/db_impl.cc当中增加GetBatch实现代码。
// Author: kang
// Mail: likang@tju.edu.cn
Status DBImpl::GetBatch(const ReadOptions& options,
const Slice& key,
std::vector<RecordType>& record_list) {
Status s;
MutexLock l(&mutex_);
SequenceNumber snapshot;
if (options.snapshot != NULL) {
snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_;
} else {
snapshot = versions_->LastSequence();
}
MemTable* mem = mem_;
MemTable* imm = imm_;
Version* current = versions_->current();
mem->Ref();
if (imm != NULL) imm->Ref();
current->Ref();
bool have_stat_update = false;
Version::GetStats stats;
// Unlock while reading from files and memtables
{
mutex_.Unlock();
// First look in the memtable, then in the immutable memtable (if any).
LookupKey lkey(key, snapshot);
// debug info
//std::cout << "DBIMPL::GetBatch called." << std::endl;
//std::cout << "LookupKey: (" << key.ToString() << "," << snapshot << ")."<<std::endl;
if (mem->GetBatch(lkey, record_list, &s)) {
// Done
}
if (imm != NULL && imm->GetBatch(lkey, record_list, &s)) {
// Done
}
{
s = current->GetBatch(options, lkey, record_list, &stats);
have_stat_update = true;
}
mutex_.Lock();
}
/*
if (have_stat_update && current->UpdateStats(stats)) {
MaybeScheduleCompaction();
}
*/
mem->Unref();
if (imm != NULL) imm->Unref();
current->Unref();
return s;
}// GetBatch
- /db/memtable.h
在/db/memtable.h当中增加GetBatch外部接口。
bool GetBatch(const LookupKey& key, std::vector<RecordType>& record_list, Status* s);
- /db/memtable.cc
在/db/memtable.h当中增加GetBatch实现。
bool MemTable::GetBatch(const LookupKey& key, std::vector<RecordType>& record_list, Status* s){
Slice memkey = key.memtable_key();
Table::Iterator iter(&table_);
iter.Seek(memkey.data());
bool flag = false;
while (iter.Valid()) {
// entry format is:
// klength varint32
// userkey char[klength]
// tag uint64
// vlength varint32
// value char[vlength]
// Check that it belongs to same user key. We do not check the
// sequence number since the Seek() call above should have skipped
// all entries with overly large sequence numbers.
const char* entry = iter.key();
uint32_t key_length;
const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length);
if (comparator_.comparator.user_comparator()->Compare(
Slice(key_ptr, key_length - 8),
key.user_key()) == 0) {
// Correct user key
const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
switch (static_cast<ValueType>(tag & 0xff)) {
case kTypeValue: {
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
std::string result_key = key.user_key().ToString();
std::string result_value;
result_value.assign(v.data(), v.size());
// debug info
//std::cout << "key: " << result_key << " , value: " << result_value << std::endl;
RecordType result_record(result_key, result_value);
record_list.push_back(result_record);
flag = true;
}
case kTypeDeletion:
*s = Status::NotFound(Slice());
}
iter.Next();
}// if
else
break;
}// while
return flag;
}
} // namespace leveldb
- /db/version_set.h
在/db/version_set.h当中增加GetBatch接口。
// Author: kang
// Mail: likang@tju.edu.cn
Status GetBatch(const ReadOptions&, const LookupKey& key, std::vector<RecordType>& record_list,
GetStats* stats);
- /db/version_set.cc
在/db/version_set.cc当中增加GetBatch实现。
// Author: kang
// Mail: likang@tju.edu.cn
Status Version::GetBatch(const ReadOptions& options,
const LookupKey& k,
std::vector<RecordType>& record_list,
GetStats* stats){
Slice ikey = k.internal_key();
Slice user_key = k.user_key();
const Comparator* ucmp = vset_->icmp_.user_comparator();
Status s;
stats->seek_file = NULL;
stats->seek_file_level = -1;
FileMetaData* last_file_read = NULL;
int last_file_read_level = -1;
// We can search level-by-level since entries never hop across
// levels. Therefore we are guaranteed that if we find data
// in an smaller level, later levels are irrelevant.
std::vector<FileMetaData*> tmp;
FileMetaData* tmp2;
for (int level = 0; level < config::kNumLevels; level++) {
size_t num_files = files_[level].size();
if (num_files == 0) continue;
// Get the list of files to search in this level
FileMetaData* const* files = &files_[level][0];
if (level == 0) {
// Level-0 files may overlap each other. Find all files that
// overlap user_key and process them in order from newest to oldest.
tmp.reserve(num_files);
for (uint32_t i = 0; i < num_files; i++) {
FileMetaData* f = files[i];
if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
ucmp->Compare(user_key, f->largest.user_key()) <= 0) {
tmp.push_back(f);
}
}
if (tmp.empty()) continue;
std::sort(tmp.begin(), tmp.end(), NewestFirst);
files = &tmp[0];
num_files = tmp.size();
} else {
// Binary search to find earliest index whose largest key >= ikey.
uint32_t index = FindFile(vset_->icmp_, files_[level], ikey);
if (index >= num_files) {
files = NULL;
num_files = 0;
} else {
tmp2 = files[index];
if (ucmp->Compare(user_key, tmp2->smallest.user_key()) < 0) {
// All of "tmp2" is past any data for user_key
files = NULL;
num_files = 0;
} else {
files = &tmp2;
num_files = 1;
}
}
}
for (uint32_t i = 0; i < num_files; ++i) {
if (last_file_read != NULL && stats->seek_file == NULL) {
// We have had more than one seek for this read. Charge the 1st file.
stats->seek_file = last_file_read;
stats->seek_file_level = last_file_read_level;
}
FileMetaData* f = files[i];
last_file_read = f;
last_file_read_level = level;
SaverBatch saver;
saver.state = kNotFound;
saver.ucmp = ucmp;
saver.user_key = user_key;
saver.precord_list = &record_list;
s = vset_->table_cache_->GetBatch(options, f->number, f->file_size,
ikey, &saver);
if (!s.ok()) {
return s;
}
switch (saver.state) {
case kNotFound:
break; // Keep searching in other files
case kFound:
return s;
case kDeleted:
s = Status::NotFound(Slice()); // Use empty error message for speed
return s;
case kCorrupt:
s = Status::Corruption("corrupted key for ", user_key);
return s;
}
}
}
return Status::NotFound(Slice()); // Use an empty error message for speed
}// GetBatch
- /db/table_cache.h
在/db/table_cache.h当中增加GetBatche接口。
// Author: kang
// Mail: likang@tju.edu.cn
Status GetBatch(const ReadOptions& options,
uint64_t file_number,
uint64_t file_size,
const Slice& k,
void* arg);
- /db/table_cache.cc
在/db/table_cache.cc当中增加GetBatche实现。
// Author: likang
// Mail: likang@tju.edu.cn
Status TableCache::GetBatch(const ReadOptions& options,
uint64_t file_number,
uint64_t file_size,
const Slice& k,
void* arg) {
Cache::Handle* handle = NULL;
Status s = FindTable(file_number, file_size, &handle);
if (s.ok()) {
Table* t = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
s = t->InternalGetBatch(options, k, arg);
cache_->Release(handle);
}
return s;
}
- /include/leveldb/table.h
在/include/leveldb当中增加InternalGetBatch接口。
Status InternalGetBatch(
const ReadOptions&, const Slice& key,
void* arg);
- /table/table.h
在/table/table.h当中增加InternalGetBatch实现。
// Author: likang
// Mail: likang@tju.edu.cn
Status Table::InternalGetBatch(const ReadOptions& options, const Slice& k,
void* arg) {
Status s;
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
iiter->Seek(k);
while (iiter->Valid()) {
Slice handle_value = iiter->value();
FilterBlockReader* filter = rep_->filter;
BlockHandle handle;
if (filter != NULL &&
handle.DecodeFrom(&handle_value).ok() &&
!filter->KeyMayMatch(handle.offset(), k)) {
// Not found
break;
} else {
Iterator* block_iter = BlockReader(this, options, iiter->value());
block_iter->Seek(k);
while (block_iter->Valid()) {
const Slice& ikey = block_iter->key();
const Slice& v = block_iter->value();
SaverBatch* s = reinterpret_cast<SaverBatch*>(arg);
ParsedInternalKey parsed_key;
if (!ParseInternalKey(ikey, &parsed_key)) { // Not found
s->state = kCorrupt;
break;
}
else if(s->ucmp->Compare(parsed_key.user_key, s->user_key) == 0){
s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
if (s->state == kFound) { // found
std::string result_key;
std::string result_value;
result_key.assign( ikey.data(), ikey.size() );
result_value.assign( v.data(), v.size() );
// debug info
//std::cout << "key = " << result_key << " , value = " << result_value << std::endl;
RecordType result_record(result_key, result_value);
s->precord_list->push_back(result_record);
block_iter->Next(); // iterate to next k-v entry
}else {// Not found
break;
}
}else {// Not found
break;
}
}// while
s = block_iter->status();
delete block_iter;
iiter->Next(); // iterate to next block
}
}// while
if (s.ok()) {
s = iiter->status();
}
delete iiter;
return s;
}