1,使用std::set和std::unordered_map实现高性能排行榜
1,排行榜中的一项可以抽象为如下数据结构
using uint = unsigned int;
struct RankUnit {
uint id{0};//唯一标识id
uint score{0};//积分
uint time{0};//上榜时间,单位s
RankUnit(uint id_, uint s, uint t) : id(id_), score(s), time(t) {}
bool operator < (const RankUnit& rhs) const {
if (score == rhs.score) {
if (time == rhs.time) {
return id < rhs.id;
}
return time < rhs.time;
}
return score > rhs.score;
}
};
2,测试代码,完整实现 rankTest.cpp
#include <iostream>
#include <set>
#include <unordered_map>
#include <map>
#include <chrono>
#include <random>
#include <string>
static constexpr int TOPN = 1024;
using uint = unsigned int;
using ulong = unsigned long;
struct RankUnit {
uint id{0};//唯一标识id
ulong score{0};//排行榜积分,实际使用中uint应该够用
ulong time{0};//积分变更时间,实际使用中一般以s为单位,uint够用
RankUnit(uint id_, ulong s, ulong t) : id(id_), score(s), time(t) {}
bool operator < (const RankUnit& rhs) const {
if (score == rhs.score) {
if (time == rhs.time) {
return id < rhs.id;
}
return time < rhs.time;
}
return score > rhs.score;
}
bool operator == (const RankUnit& r) const {
return score == r.score && time == r.time && id == r.id;
}
};
class RankBase {
public:
inline void insert(const RankUnit& k, bool sorted = false) {
auto add = [&]() {
if (!sorted) {
auto pr = ranks_.emplace(k);
if (pr.second) {
rankIdx_.emplace(k.id, &(*pr.first));
}
} else {
auto it = ranks_.emplace_hint(ranks_.end(), k);
if (it != ranks_.end()) {
rankIdx_.emplace(k.id, &(*it));
}
}
};
if (ranks_.size() < topn_) {
add();
} else if (ranks_.size() == topn_) {
auto& last = *ranks_.rbegin();
if (last < k) return;
rankIdx_.erase(last.id);
ranks_.erase(last);
add();
}
}
inline void update(const RankUnit& k) {
auto it = rankIdx_.find(k.id);
if (it != rankIdx_.end()) {
ranks_.erase(*it->second);
rankIdx_.erase(it);
}
insert(k);
}
inline void setTop(uint top) {
topn_ = top;
}
inline void testShow() {
int i = 0;
for (const auto& r : ranks_) {
if (i++ > 10) {
break;
}
std::cout << r.id << " " << r.score << " " << r.time << std::endl;
}
}
inline void testSorted(const RankBase& other, bool sorted) {
ranks_.clear();
rankIdx_.clear();
for (auto& rank : other.ranks_) {
insert(rank, sorted);
}
}
private:
std::set<RankUnit> ranks_;
std::unordered_map<uint, const RankUnit*> rankIdx_;
//std::map<uint, const RankUnit*> rankIdx_;
uint topn_{TOPN};
};
class RunningTime {
public:
RunningTime() {
_start = std::chrono::system_clock::now();
}
~RunningTime() {
auto _end = std::chrono::system_clock::now();
auto us = std::chrono::duration_cast<std::chrono::microseconds>(_end - _start).count();
std::cout << us << "us" << std::endl;
}
private :
std::chrono::system_clock::time_point _start;
};
ulong msNowTime() {
auto now = std::chrono::system_clock::now().time_since_epoch();
return std::chrono::duration_cast<std::chrono::milliseconds>(now).count();
}
ulong randNum(ulong max) {
static std::default_random_engine re(std::random_device{}());
std::uniform_int_distribution<ulong> dist(0, max);
return dist(re);
}
int main(int argc, char** argv) {
if (argc < 2) return 1;
const uint testTop = std::stoi(argv[1]);
std::cout << "topn = " << testTop << std::endl;
RankBase rank;
{
RunningTime rt;
rank.setTop(testTop);
for (uint i = 0; i < testTop; ++i) {
rank.insert({i, randNum(1e10), msNowTime()});
}
}
//rank.testShow();
RankBase sorted;
sorted.setTop(testTop);
{
std::cout << "insert ";
RunningTime rt;
sorted.testSorted(rank, false);
}
{
std::cout << "insert sorted ";
RunningTime rt;
sorted.testSorted(rank, true);
}
//sorted.testShow();
return 0;
}
3,linux上,g++开O3优化,编译
g++ -O3 rankTest.cpp -o rankTest
测试结果,运行时间以微秒为单位
排序2千万个元素,用时41s,大约要4g内存。上榜的数据是排好序的,比如在启动服务的时候从数据库或者配置文件读入已排序数据的场景,使用emplace_hint指定插入位置,效率会高一点点。
使用如下代码,可以交换sorted和unsort位置进行对比测试,结果会更精准。
{
RankBase sorted;
sorted.setTop(testTop);
std::cout << "insert sorted ";
RunningTime rt;
sorted.testSorted(rank, true);
}
{
RankBase unsort;
unsort.setTop(testTop);
std::cout << "insert ";
RunningTime rt;
unsort.testSorted(rank, false);
}
2,使用std::vector和std::map(std::unordered_map)实现的排行榜
局部修改,快速定位排名,适用于数据量小的实时排行榜。若有topn的上榜限制,可提前设置std::vector的容量 _data.reserve(topn)。
template <typename K, typename Val>
struct VectorMapRank {
std::map<K, int> _keyPos;
std::vector<Val> _data;
void add(const K& key, const Val& value, bool change, bool up = true) {
_data.emplace_back(value);
_keyPos[key] = (int)_data.size();
if (change) {
update(key, up);
}
}
void update(const K& key, bool up = true) {
auto itr = _keyPos.find(key);
if (itr != _keyPos.end()) {
if (itr->second <= 0 || itr->second > (int)_data.size())
return;
if (up) {
for (int i = itr->second - 1; i > 0; --i) {
if (_data[i].cmpKey > _data[i - 1].cmpKey) {
Val tmp;
tmp = _data[i];
_data[i] = _data[i - 1];
_data[i - 1] = tmp;
_keyPos[_data[i].key] = i + 1;
_keyPos[_data[i - 1].key] = i;
} else {
break;
}
}
} else {
for (int i = itr->second - 1; i < (int)_data.size() - 1; ++i) {
if (_data[i].cmpKey < _data[i + 1].cmpKey) {
Val tmp;
tmp = _data[i];
_data[i] = _data[i + 1];
_data[i + 1] = tmp;
_keyPos[_data[i].key] = i + 1;
_keyPos[_data[i + 1].key] = i + 2;
} else {
break;
}
}
}
}
}
Val* getValAndPos(const K& key, int& pos) {
pos = 0;
auto itr = _keyPos.find(key);
if (itr != _keyPos.end()) {
if (itr->second <= 0 || itr->second > (int)_data.size())
return nullptr;
pos = itr->second;
return &(_data[itr->second - 1]);
}
return nullptr;
}
Val* getVal(const K& key) {
auto itr = _keyPos.find(key);
if (itr != _keyPos.end()) {
if (itr->second <= 0 || itr->second > (int)_data.size())
return nullptr;
return &(_data[itr->second - 1]);
}
return nullptr;
}
void clear() {
_keyPos.clear();
_data.clear();
}