源码路径
src\bthread\timer_thread.h
src\bthread\timer_thread.cpp
基本成员
bool _started; // 是否启动
butil::atomic<bool> _stop;
TimerThreadOptions _options;
Bucket* _buckets; // 待执行的task列表
internal::FastPthreadMutex _mutex; // 保护_nearest_run_time的锁
int64_t _nearest_run_time;
// the futex for wake up timer thread. can't use _nearest_run_time because
// it's 64-bit.
int _nsignals;
pthread_t _thread; // 调度task执行用到的thread
构造与析构
TimerThread::TimerThread()
: _started(false)
, _stop(false)
, _buckets(NULL)
, _nearest_run_time(std::numeric_limits<int64_t>::max())
, _nsignals(0)
, _thread(0) {
}
TimerThread::~TimerThread() {
stop_and_join();
delete [] _buckets;
_buckets = NULL;
}
析构时,调用了stop_and_join:
void TimerThread::stop_and_join() {
_stop.store(true, butil::memory_order_relaxed);
if (_started) {
{
BAIDU_SCOPED_LOCK(_mutex);
// trigger pull_again and wakeup TimerThread
_nearest_run_time = 0;
++_nsignals;
}
if (pthread_self() != _thread) {
// stop_and_join was not called from a running task.
// wake up the timer thread in case it is sleeping.
futex_wake_private(&_nsignals, 1);
pthread_join(_thread, NULL);
}
}
}
- _stop 是原子成员,store 是从boost::atomic 继承来的方法,可参考:atomic,就是将true 值写入stop标志停止。
- futex_wake_private用于唤醒 1 个在_nsignals 上等待的thread(信号量)
- pthread_join 调用的是 TaskGroup::join 方法 ,等到运行task的thread结束
启动:start
int TimerThread::start(const TimerThreadOptions* options_in) {
if (_started) {
return 0;
}
if (options_in) {
_options = *options_in;
}
if (_options.num_buckets == 0) {
LOG(ERROR) << "num_buckets can't be 0";
return EINVAL;
}
if (_options.num_buckets > 1024) {
LOG(ERROR) << "num_buckets=" << _options.num_buckets << " is too big";
return EINVAL;
}
_buckets = new (std::nothrow) Bucket[_options.num_buckets];
if (NULL == _buckets) {
LOG(ERROR) << "Fail to new _buckets";
return ENOMEM;
}
const int ret = pthread_create(&_thread, NULL, TimerThread::run_this, this);
if (ret) {
return ret;
}
_started = true;
return 0;
}
这里只会start 一次,主要工作是初始化桶数组,然后创建thread 来执行 run_this 方法,也即 run 方法。
void* TimerThread::run_this(void* arg) {
static_cast<TimerThread*>(arg)->run();
return NULL;
}
添加调度任务:schedule
TimerThread::TaskId TimerThread::schedule(
void (*fn)(void*), void* arg, const timespec& abstime) {
if (_stop.load(butil::memory_order_relaxed) || !_started) {
// Not add tasks when TimerThread is about to stop.
return INVALID_TASK_ID;
}
// Hashing by pthread id is better for cache locality.
const Bucket::ScheduleResult result =
_buckets[butil::fmix64(pthread_numeric_id()) % _options.num_buckets]
.schedule(fn, arg, abstime);
if (result.earlier) {
bool earlier = false;
const int64_t run_time = butil::timespec_to_microseconds(abstime);
{
BAIDU_SCOPED_LOCK(_mutex);
if (run_time < _nearest_run_time) {
_nearest_run_time = run_time;
++_nsignals;
earlier = true;
}
}
if (earlier) {
futex_wake_private(&_nsignals, 1);
}
}
return result.task_id;
}
fn 是调度的方法,arg 是方法的参数
先判断了是否已停止,load 方法也是boost::atomic 的方法之一,是获得_stop 的值。
用thread id获得桶下标,然后 调用 TimerThread::Bucket::schedule 方法:
TimerThread::Bucket::schedule(void (*fn)(void*), void* arg,
const timespec& abstime) {
butil::ResourceId<Task> slot_id;
Task* task = butil::get_resource<Task>(&slot_id);
if (task == NULL) {
ScheduleResult result = { INVALID_TASK_ID, false };
return result;
}
task->next = NULL;
task->fn = fn;
task->arg = arg;
task->run_time = butil::timespec_to_microseconds(abstime);
uint32_t version = task->version.load(butil::memory_order_relaxed);
if (version == 0) { // skip 0.
task->version.fetch_add(2, butil::memory_order_relaxed);
version = 2;
}
const TaskId id = make_task_id(slot_id, version);
task->task_id = id;
bool earlier = false;
{
BAIDU_SCOPED_LOCK(_mutex);
task->next = _task_head;
_task_head = task;
if (task->run_time < _nearest_run_time) {
_nearest_run_time = task->run_time;
earlier = true;
}
}
ScheduleResult result = { id, earlier };
return result;
}
这里主要是新建一个调度 task,然后将task 放到 任务队列的头部,并更新最近的任务调度时间。将任务id和是否比已有任务更早的信息返回到外层 schedule,外层二次确认后,唤醒已有thread
取消调度任务:unschedule
int TimerThread::unschedule(TaskId task_id) {
const butil::ResourceId<Task> slot_id = slot_of_task_id(task_id);
Task* const task = butil::address_resource(slot_id);
if (task == NULL) {
LOG(ERROR) << "Invalid task_id=" << task_id;
return -1;
}
const uint32_t id_version = version_of_task_id(task_id);
uint32_t expected_version = id_version;
// This CAS is rarely contended, should be fast.
// The acquire fence is paired with release fence in Task::run_and_delete
// to make sure that we see all changes brought by fn(arg).
if (task->version.compare_exchange_strong(
expected_version, id_version + 2,
butil::memory_order_acquire)) {
return 0;
}
return (expected_version == id_version + 1) ? 1 : -1;
}
获取此task的版本号,用CAS 将其在原值基础上加2. 替换成功直接返回,否则 expected_version 会获得实际存储的值。
返回0,表示task 还未执行;返回1,表示任务执行中;返回-1,表示任务不存在。
到这里还有个问题没解决,就是调度的run做了什么……
调度任务执行:run
void TimerThread::run() {
run_worker_startfn();
#ifdef BAIDU_INTERNAL
logging::ComlogInitializer comlog_initializer;
#endif
int64_t last_sleep_time = butil::gettimeofday_us();
BT_VLOG << "Started TimerThread=" << pthread_self();
// 最小堆,根据调度时间排序
std::vector<Task*> tasks;
tasks.reserve(4096);
……
while (!_stop.load(butil::memory_order_relaxed)) { // 未停止
{
BAIDU_SCOPED_LOCK(_mutex);
_nearest_run_time = std::numeric_limits<int64_t>::max();
}
// 从桶中超出未调度的task,放到最小堆中。
for (size_t i = 0; i < _options.num_buckets; ++i) {
Bucket& bucket = _buckets[i];
for (Task* p = bucket.consume_tasks(); p != NULL;
p = p->next, ++nscheduled) {
if (!p->try_delete()) { // remove the task if it's unscheduled
tasks.push_back(p);
std::push_heap(tasks.begin(), tasks.end(), task_greater);
}
}
}
bool pull_again = false;
while (!tasks.empty()) {
Task* task1 = tasks[0]; // the about-to-run task
if (task1->try_delete()) { // 第一个task已调度,移除出最大堆
std::pop_heap(tasks.begin(), tasks.end(), task_greater);
tasks.pop_back();
continue;
}
if (butil::gettimeofday_us() < task1->run_time) { // 还没到执行时间
break;
}
{
BAIDU_SCOPED_LOCK(_mutex);
if (task1->run_time > _nearest_run_time) {
// 有任务的时间比第一个task更早,要重新拉取task
pull_again = true;
break;
}
}
std::pop_heap(tasks.begin(), tasks.end(), task_greater);
tasks.pop_back();
if (task1->run_and_delete()) { // 取第一个task进行调度
++ntriggered;
}
}
if (pull_again) { // 重新拉取task
BT_VLOG << "pull again, tasks=" << tasks.size();
continue;
}
// The realtime to wait for.
int64_t next_run_time = std::numeric_limits<int64_t>::max();
if (tasks.empty()) {
next_run_time = std::numeric_limits<int64_t>::max();
} else {
next_run_time = tasks[0]->run_time; // 下一个任务调度时间
}
int expected_nsignals = 0;
{
BAIDU_SCOPED_LOCK(_mutex);
if (next_run_time > _nearest_run_time) {
continue;
} else {
_nearest_run_time = next_run_time;
expected_nsignals = _nsignals;
}
}
timespec* ptimeout = NULL;
timespec next_timeout = { 0, 0 };
const int64_t now = butil::gettimeofday_us();
if (next_run_time != std::numeric_limits<int64_t>::max()) {
next_timeout = butil::microseconds_to_timespec(next_run_time - now);
ptimeout = &next_timeout;
}
busy_seconds += (now - last_sleep_time) / 1000000.0;
futex_wait_private(&_nsignals, expected_nsignals, ptimeout); // 计算下个任务的调度时间,进入wait,会在新任务加入调度时被唤醒。
last_sleep_time = butil::gettimeofday_us();
}
BT_VLOG << "Ended TimerThread=" << pthread_self();
}
代码有点长,主要内容:
- 用最小堆tasks 存放未执行的task
- 从桶中找出未执行的task放到tasks,根据调度时间排序,默认是最大堆,但是push_heap 入参cmp返回true表示元素1比元素2小排在后面,所以run_time较小的会放到tasks靠前的位置,就是最小堆。
- 将已调度的任务移出最小堆。
- 如果_nearest_run_time(当前任务最早时间)比第一个task时间还早,要重新拉取一次
- 取出一个未调度的任务,调用run_and_delete 方法
- 下一个最早的未调度任务的run_time 更新到_nearest_run_time
- 计算下次任务还有多久到来,就在_nsignals上 wait 多久(futex_wait_private),如果 schedule 又添加了新任务,可能新任务的时间更早,所以会唤醒一次,参考上面schedule 代码。
任务执行
bool TimerThread::Task::run_and_delete() {
const uint32_t id_version = version_of_task_id(task_id);
uint32_t expected_version = id_version;
// This CAS is rarely contended, should be fast.
if (version.compare_exchange_strong(
expected_version, id_version + 1, butil::memory_order_relaxed)) {
fn(arg);
// The release fence is paired with acquire fence in
// TimerThread::unschedule to make changes of fn(arg) visible.
version.store(id_version + 2, butil::memory_order_release);
butil::return_resource(slot_of_task_id(task_id));
return true;
} else if (expected_version == id_version + 2) {
// already unscheduled.
butil::return_resource(slot_of_task_id(task_id));
return false;
} else {
// Impossible.
LOG(ERROR) << "Invalid version=" << expected_version
<< ", expecting " << id_version + 2;
return false;
}
}
就是尝试将任务的版本号换成原始版本号+1,表示执行中;如果成功了,就执行fn。如果失败,expected_version 获得当前版本号,可以知道task处于什么状态。