既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上C C++开发知识点,真正体系化!
由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新
Doc \*Find(_Key key) {
uint32 doc_id;
if (!FindDocIdByKey(key, doc_id)) {
return NULL;
}
return GetForwardIndex()->Find(doc_id);
}
Insert
bool DoInsert(_Key key, Doc \*doc) {
// 正排索引插入新的doc
GetForwardIndex()->Insert(cur_doc_id_, doc);
key_doc_mapping_.insert(key, cur_doc_id_);
// 更新索引
if (!_IsImport) {
UpdateInvertedIndex<\_IsImport>(cur_doc_id_, doc);
UpdateVectorIndex(cur_doc_id_, doc);
}
cur_doc_id_++; // 更新doc\_id
}
Update (重要!!!)
bool DoUpdate(_Key key, rapidjson::Value &update_value){
s::Doc \*doc = Find(key);
bool is_doc_update = _IsImport || IsDocUpdate(doc, update_value);
bool is_build_index_update = false;
if (!Json2Doc(is_build_index_update,doc, update_value, true, false, is_doc_update)) {
return false;
}
OverrideKeyField(key, doc);
if ((is_doc_update || is_build_index_update) && !_IsImport) {
// 文档级更新
return DoChangeDocId(key, doc);
}
return true;
}
// 变更docid
bool DoChangeDocId(_Key &key, s::Doc \*doc) {
if (key_doc_mapping_.find(key, doc_id)) {
erase_doc_set_.remove(doc_id);
key_doc_mapping_.erase(key);
if (!GetForwardIndex()->ChangeKey(doc_id, cur_doc_id_)) {
return false;
}
key_doc_mapping_.insert(key, cur_doc_id_);
// 更新索引
UpdateInvertedIndex<\_IsImport>(cur_doc_id_, doc);
UpdateVectorIndex(cur_doc_id_, doc);
// 更新doc\_id
cur_doc_id_++;
return true;
}
return false;
}
Erase
bool DoErase(_Key key) {
uint32 doc_id = 0;
if (key_doc_mapping_.find(key, doc_id)) {
erase_doc_set_.remove(doc_id);
key_doc_mapping_.erase(key);
GetForwardIndex()->Erase(doc_id);
size_--;
return true;
}
return false;
}
三、检索流程
query检索
语法树
BooleanQuery
NumericRangeQuery
PhraseQuery
PrefixQuery
TermQuery
向量检索
四、增量写入
五、索引构建
六、插件系统
1. 插件系统元素
插件宏定义
#define CTOOL\_PLUGIN\_REGISTER(mgr\_name, name, cls) \
class RegisterPlugin##name##cls { \
public: \
RegisterPlugin##name##cls () { \
mgr\_name::GetInstance()->Register(#name, new cls); \
} \
}; \
RegisterPlugin##name##cls \_\_\_g\_cRegisterPlugin##name##cls; \
插件基础类
class Plugin : public SupportErrorMsg {
public:
// 全局初始化,进程级,在注册插件时调用
virtual bool GlobalInit() {
return true;
}
// 插件级初始化
virtual bool Init() {
return true;
}
// 重置本插件中间结果
virtual void Reset() {
}
// 创建一个新的插件
virtual Plugin \*NewInstance() = 0;
};
插件管理器
/\*\*
\* 插件管理器(线程安全)
\*/
template<class \_Tp>
class PluginMgr {
public:
~PluginMgr() {
for(auto &kv : class_map_) {
delete(kv.second);
}
}
// 获取新实例
_Tp \*NewInstance(string name) {
string temp_name = strings::ToLowerCase(name);
typename map<string, _Tp \*>::iterator itr = class_map_.find(temp_name);
if (itr != class_map_.end()) {
_Tp \*tp = (_Tp \*) itr->second->NewInstance();
if (tp->Init()) {
// 这里要实始化
return tp;
}
delete tp;
}
return NULL;
}
// 注册插件
void Register(string name, _Tp \*cls) {
string temp_name = strings::ToLowerCase(name);
ScopeLock scope\_lock(lock_);
class_map_[temp_name] = cls;
}
//全局初始化
bool GlobalInit() {
ScopeLock scope\_lock(lock_);
typename map<string, _Tp \*>::iterator itr = class_map_.begin();
for (; itr != class_map_.end(); itr++) {
if (!itr->second->GlobalInit()) {
return false;
}
}
return true;
}
static PluginMgr<_Tp> \*GetInstance() {
static SingletonHolder<PluginMgr<_Tp> > inst;
return inst.Get();
}
protected:
SpinLock lock_;
map<string, _Tp \*> class_map_;
};
2. NS_SRV插件
插件注册
#define NS\_SRV\_PLUGIN\_REGISTER(name, cls) CTOOL\_PLUGIN\_REGISTER(ns::SearchSrvPluginMgr, name, cls)
继承插件类
class SearchSrvPlugin : public Plugin {
public:
enum PluginType {
kSearchSrvPluginTypeJson = 1,
kSearchSrvPluginTypeBuff
};
public:
SearchSrvPlugin() {}
virtual ~SearchSrvPlugin() {
}
};
class SearchSrvJsonPlugin : public SearchSrvPlugin {
public:
virtual PluginType Type() {
return kSearchSrvPluginTypeJson;
}
// 插件自定义函数
virtual bool Process(RpcRequest &rpc_request,
rapidjson::Value &response,
rapidjson::Value::AllocatorType &allocator) = 0;
protected:
ThreadLocal<SearchSrvJsonPluginVar> plugin_var_;
};
typedef PluginMgr<SearchSrvPlugin> SearchSrvPluginMgr;
3. 写服务插件
4. 过滤器插件
5. 分词插件
6. function插件
7. 构建插件
七、分词系统
八、网络框架
1. RPC服务接口类
class RpcServer : public Runnable{
RpcServer(RpcChannel \*rpc_channel, RpcProcessor \*rpc_processor);
void AddChannel(RpcChannel \*rpc_channel); // 增加消息通道
virtual bool Start(); // 服务启动接口
virtual void Stop(); // 服务停止
protected:
virtual bool \_Process(EventLoop \*el, Socket &socket,
RpcRequest \*rpc_request,
RpcProtocalCodec \*rpc_protocal_codec) = 0;
virtual bool \_Start() = 0;
virtual void \_Stop() = 0;
bool Process(EventLoop \*el, Socket &socket, RpcRequest \*rpc_request,
RpcProtocalCodec \*rpc_protocal_codec);
bool CallProcessor(RpcRequest &rpc_request, RpcResponse &rpc_response);
bool ProcessRpcRequest(EventLoop \*el, Socket &socket,
RpcRequest \*rpc_request,
RpcProtocalCodec \*rpc_protocal_codec); // 该接口用于线程池回调处理请求
void Run();
protected:
RpcServerEventListener \*rpc_server_event_listener_;
RpcChannelList rpc_channel_list_;
hash_map<string, RpcProcessor \*> specify_processor_map_; //指定消息处理者
friend class RpcChannel;
}
Process
bool RpcServer::Process(EventLoop \*el, Socket &socket,
RpcRequest \*rpc_request,
RpcProtocalCodec \*rpc_protocal_codec) {
ret = \_Process(el, socket, rpc_request, rpc_protocal_codec);
}
ProcessRpcRequest
调用CallProcessor处理rpc请求获取返回数据,然后调用socket函数直接写回结果
bool RpcServer::ProcessRpcRequest(EventLoop \*el, Socket &socket,
RpcRequest \*rpc_request,
RpcProtocalCodec \*rpc_protocal_codec)
bool ret = CallProcessor(\*rpc_request, rpc_response); // 处理rpc请求,获取返回结果
rpc_protocal_codec->EncodeResponse(\*rpc_request, rpc_response, \*out_rpc_memory_pkg.Get());
ScopeLock scope\_lock(socket_lock_list_[socket.GetFd() % CTOOL_RPC_SOCKET_LOCK_SIZE]);
socket.WriteN(out_rpc_memory_pkg->GetBuff(), out_rpc_memory_pkg->GetDataSize());
CallProcessor
bool RpcServer::CallProcessor(RpcRequest &rpc_request, RpcResponse &rpc_response){
specify_processor_map_.find(request_name, rpc_processor) // 查找处理插件
bool ret = rpc_processor->ProcessRpcRequest(rpc_request, rpc_response); // 调用处理函数
}
2. 线程池模型 RPC服务器
class ThreadPoolRpcServer : public RpcServer {
//构造一个线程池模型的RPC服务器
ThreadPoolRpcServer(RpcChannel \*rpc_channel, RpcProcessor \*rpc_processor,
int32 work_thread_num);
// 设置线程事件监听器
void SetThreadEventListener(ThreadEventListener \*thread_event_listener);
protected:
virtual bool \_Start();
virtual void \_Stop();
virtual bool \_Process(EventLoop \*el, Socket &socket,
RpcRequest \*rpc_request,
RpcProtocalCodec \*rpc_protocal_codec);
public:
ThreadEventListener \*thread_event_listener_;
ThreadPool \*thread_pool_;
}
_Process
创建一个rpc请求,扔到线程池中处理。
bool \_Process(EventLoop \*el, Socket &socket,
RpcRequest \*rpc_request,
RpcProtocalCodec \*rpc_protocal_codec){
RpcThreadRequest \*rpc_thread_request = new RpcThreadRequest(this,
rpc_protocal_codec,
rpc_request,
socket, el)
thread_pool_->ProcessRequest(rpc_thread_request);
}
_Start()
rpc_channel_list_[i]->Start() // 启动每个rpc通道
thread_pool_->Start(); // 线程启动
3. RpcChannel
class RpcChannel : public SocketEventListener{
virtual bool Start() = 0;
virtual void Stop() = 0;
void CallRpcServerProcess(EventLoop \*el, Socket &socket,
RpcRequest \*rpc_request,
RpcProtocalCodec \*rpc_protocal_codec);
protected:
RpcProtocalCodec \*rpc_protocal_codec_;
RpcServer \*rpc_server_;
}
4. TcpRpcChannel
class TcpRpcChannel : public RpcChannel {
TcpRpcChannel(RpcProtocalCodec \*rpc_protocal_codec, string ip, int32 port,
int32 io_thread_num);
TcpRpcChannel(RpcProtocalCodec \*rpc_protocal_codec, string ip,
int32 begin_port, int32 end_port, int32 io_thread_num);
virtual bool Start();
virtual void Stop();
int32 GetListenPort();
const char \*GetListenIp();
void SetClientSocketOpt(const SocketOpt &opt);
bool OnConnected(EventLoop \*el, Socket &opt);
bool OnData(EventLoop \*el, Socket &opt);
bool OnTimeout(EventLoop \*el, Socket &socket);
void OnDisconnect(EventLoop \*el, Socket &socket);
protected:
string ip_;
int32 listen_port_;
AcceptElBalancer accept_el_balancer_;
Socket server_socket_;
vector<TcpAcceptElThread \*> el_thread_list_;
SocketOpt socket_opt_;
}
Start()
创建服务端socket,创建一堆TcpAcceptElThread
bool TcpRpcChannel::Start() {
server_socket_.TcpServer(listen_port_, ip_)
for (int32 i = 0; i < io_thread_num_; i++) {
el_thread_list_.push\_back(new TcpAcceptElThread(server_socket_.GetFd(), this));
}
for (int32 i = 0; i < io_thread_num_; i++) {
el_thread_list_[i]->Start();
}
}
OnData
读取socket数据,解析生成RpcRequest
bool TcpRpcChannel::OnData(EventLoop \*el, Socket &socket) {
RpcRequest \*rpc_request = rpc_protocal_codec_->DecodeRequest(socket);
CallRpcServerProcess(el, socket, rpc_request, rpc_protocal_codec_);
return true;
}
CallRpcServerProcess
回调处理rpc数据
void RpcChannel::CallRpcServerProcess(EventLoop \*el, Socket &socket,
RpcRequest \*rpc_request,
RpcProtocalCodec \*rpc_protocal_codec) {
rpc_server_->Process(el, socket, rpc_request, rpc_protocal_codec);
}
5. ElThread
class ElThread : public Runnable, public ELCallback {
virtual bool OnReadableEvent(EventLoop \*el, int32 fd);
virtual bool OnTimeoutEvent(EventLoop \*el, int32 fd);
virtual void OnNeedDelEvent(EventLoop \*el, int32 fd);
bool Start();
void Stop();
virtual void Run();
protected:
EventLoop \*el_;
SocketEventListener \*socket_event_listener_;
int32 server_fd_;
SocketOpt socket_opt_;
Thread thread_;
}
OnReadableEvent
判断fd是不是server_fd_,如果是调用OnServerReadableEvent函数,TcpAccept生成新的fd,然后调用OnConnected函数,将fd加入epoll中;如果是其它fd,调用OnData函数,处理数据
bool ElThread::OnReadableEvent(EventLoop \*el, int32 fd) {
if (fd == server_fd_) {
OnServerReadableEvent(el, fd);
Socket socket(fd);
socket_event_listener_->OnData(el, socket)
6. TcpAcceptElThread
TcpAcceptElThread 是 ElThread 的成员
class TcpAcceptElThread : public ElThread {
bool TcpAccept(EventLoop \*el, int32 fd);
void TcpAcceptElThread::OnServerReadableEvent(EventLoop \*el, int32 fd);
}
OnServerReadableEvent
void TcpAcceptElThread::OnServerReadableEvent(EventLoop \*el, int32 fd){
TcpAccept(el, fd);
}
TcpAccept
bool TcpAcceptElThread::TcpAccept(EventLoop \*el, int32 fd) {
ctool::Socket server(fd);
ctool::Socket client;
string client_addr;
if (server.TcpAccept(socket_opt_, client, client_addr)) {
if (socket_event_listener_->OnConnected(el, client)) {
el->AddFd(client.GetFd(), EL_READABLE, this);
}
7. EventLoop
8. EventLoopEpoll
class EventLoopEpoll : public EventLoop{
virtual bool AddFd(int32 fd, int32 event_mask, ELCallback \*el_callback,
int32 timeout_sec); // 注册回调函数
virtual bool DelFd(int32 fd);
virtual int32 Wait(int32 timeout_ms = 1 \* 1000);
void Callback(int32 fd, int32 event_mask);
private:
int32 fd_;
struct epoll\_event \*epoll_events_;
EventFd \*event_fd_;
}
AddFd
EventLoopEpoll::AddFd(int32 fd, int32 event_mask, ELCallback \*el_callback,
int32 timeout_sec)
epoll\_ctl(fd_, EPOLL_CTL_ADD, fd, &ee) == -1)
EventFd \*pEventFd = event_fd_ + fd;
pEventFd->fd = fd;
pEventFd->el_callback = el_callback;
used_fd_list_.push\_back(fd);
Wait
根据epoll_wait函数阻塞,根据epoll调用对应的处理函数
num_events = epoll\_wait(fd_, epoll_events_, max_open_sock_num_, timeout_ms)
for (int32 j = 0; j < num_events; j++){
struct epoll\_event \*e = epoll_events_ + j;
if (e->events & EPOLLIN)
mask |= EL_READABLE;
if (e->events & EPOLLOUT)
mask |= EL_WRITABLE;
if (e->events & EPOLLERR)
mask |= EL_WRITABLE;
if (e->events & EPOLLHUP)
mask |= EL_WRITABLE;
Callback(e->data.fd, mask);
}
Callback(int32 fd, int32 event_mask)
调用事件句柄的回调函数,根据可读、可写事件
if (event_mask & EL_READABLE)
event_fd->el_callback->OnReadableEvent(this, fd)
九、内存管理
内存分配器
RCU无锁数据结构
rcu_vector
template<class \_Tp>
class rcu\_vector : public rcu {
public:
typedef rcu_vector<_Tp> self_type;
typedef rcu_vector_iterator<_Tp> iterator;
protected:
size_t size_;
char \*buf_;
size_t capacity_;
}
recapacity函数
- 调用内存管理器,分配新的内存空间
- memcpy将数据复制到新的空间
- vector buf指针切换指向新的内存空间
- 调用free函数延迟释放老的内存。
void recapacity(size_t new_size) {
if (new_size <= 0)
new_size = 1;
if (new_size <= capacity_)
return;
size_t capacity = new_size;
Allocator \*allocator = GetAllocator();
char \*new_buf = (char \*) S\_MALLOC(allocator, capacity \* sizeof(_Tp));
if (size() > 0) {
memcpy(new_buf, buf_, sizeof(_Tp) \* size_);
}
char \*old_buf = buf_;
MEMORY_BARRIER
buf_ = new_buf;
MEMORY_BARRIER
S\_FREE(allocator, old_buf, true);
MEMORY_BARRIER
capacity_ = capacity;
}
push_back函数
bool push\_back(const _Tp &value) {
if (size_ + 1 > capacity_) {
recapacity(size_ < 1000 ? size_ \* 2 : size_ + 1000);
}
char \*buf = buf_ + sizeof(_Tp) \* size_;
new(buf) \_Tp();
\*((_Tp \*) buf) = value;
size_++;
return true;
}
网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。
一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!
f = buf_;
MEMORY_BARRIER
buf_ = new_buf;
MEMORY_BARRIER
S_FREE(allocator, old_buf, true);
MEMORY_BARRIER
capacity_ = capacity;
}
###### push\_back函数
bool push_back(const Tp &value) {
if (size + 1 > capacity_) {
recapacity(size_ < 1000 ? size_ * 2 : size_ + 1000);
}
char *buf = buf_ + sizeof(Tp) * size;
new(buf) _Tp();
*((Tp *) buf) = value;
size++;
return true;
}
[外链图片转存中...(img-36BPLoJ9-1715563609911)]
[外链图片转存中...(img-sF4edAeg-1715563609912)]
**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**
**[需要这份系统化的资料的朋友,可以添加戳这里获取](https://bbs.csdn.net/topics/618668825)**
**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**