某司的产品多年来一直使用一套鉴权模块,所有的核心函数都要经过该模块鉴权。该模块多年来都表现正常,直到最近在客户docker环境中出现了未知原因的延迟。
大致逻辑如下(略去若干无关代码):
class AuthVerifier
{
private:
mutable std::recursive_mutex mutex;
hw_handle_t handle; // hardware handle
std::atomic<bool> last_result; // most recent verfication result
struct timestamp
{
std::chrono::time_point<std::chrono::high_resolution_clock> verif_ts;
}ts;
public:
typedef std::lock_guard<std::recursive_mutex> lock_guard;
static const int check_duration = 60; // 1 minute
checker():handle(0),last_result(false)
{
}
int verify()
{
std::chrono::high_resolution_clock::time_point now = std::chrono::high_resolution_clock::now();
if(last_result)
{
if(std::chrono::duration_cast<std::chrono::seconds>(now - ts.verif_ts).count() < check_duration)
{
return true;
}
}
lock_guard lock(mutex);
if(handle)
{
hw_verify_end(handle);
}
handle = 0;
hw_status status = hw_verify_begin(&handle);
if(HW_STATUS_OK == status)
{
last_result= true;
ts.verif_ts= now;
}
else
{
last_result= false;
}
return last_result;
}
}
static AuthVerifier authv;
verifier & get_verifier()
{
return authv;
}
bool isAuthorized()
{
AuthVerifier& authv = get_verifier();
lock_guard lock(authv.mutex());
return authv.verify();
}
由于客户环境的问题,不得不在代码中加入各种日志文件、调试信息。在各个分支上加入日志、调试信息后,经过多次测试,发现问题出在hw_verify_end。该问题仅在客户程序中出现,我方提供的示例中无法复现。
跟硬件提供商反复沟通之后,尝试各种方法后,发现无法绕过这个hw_verify_end。与同事讨论之后,决定使用线程方式,绕过这个问题:即在AuthVerifier类内部实现一个工作线程,在构造函数中开始进行硬件检查(hw_verify_begin),并启动该线程,在析构函数退出时结束线程,并退出硬件检查(hw_verify_end);另外,添加接口访问该类的原子变量的鉴权结果last_result。
class AuthVerifier
{
private:
bool working;
std::thread worker;
FILE* log;
mutable std::recursive_mutex mutex;
hw_handle_t handle; // hardware handle
std::atomic<bool> last_result; // most recent verfication result
struct timestamp
{
std::chrono::time_point<std::chrono::high_resolution_clock> verify_ts;
}ts;
public:
typedef std::lock_guard<std::recursive_mutex> lock_guard;
static const int verify_duration = 60; // 1 minute
AuthVerifier(): handle(0), working(true)
{
log = fopen("auth_log.txt", "w");
if (NULL==log)
{
printf("cannot create log file");
}
std::chrono::high_resolution_clock::time_point now = std::chrono::high_resolution_clock::now();
ts.verify_ts = now;
hw_status_t status = hw_verify_begin(&handle);
last_result = false;
if(HW_STATUS_OK == status)
{
last_result = true;
}
worker = std::thread(&checker::start, this);
}
~AuthVerifier()
{
working = false;
worker.join();
if(handle)
{
hw_status_t status = hw_verify_end(handle);
if(HW_STATUS_OK != status)
{
fprintf(stdout, "hw verify end error: %d\n", status);
fflush(stdout);
}
}
}
bool get_last_result()
{
return last_result;
}
void start()
{
while(working)
{
if(last_result)
{
std::chrono::high_resolution_clock::time_point now = std::chrono::high_resolution_clock::now();
if(std::chrono::duration_cast<std::chrono::seconds>(now - ts.protection_check).count() > check_duration)
{
if(handle)
{
hw_status_t status = hw_verify_begin(handle);
if(HW_STATUS_OK != status)
{
fprintf(stdout, "hw verify begin error %d\n", status);
fflush(stdout);
fprintf(log, "%d\n", status);
fflush(log);
}
}
handle = 0;
hw_status_t status = hw_verify_begin(&handle);
if(HW_STATUS_OK == status)
{
last_result = true;
ts.verify_ts = now;
}
else
{
last_result = false;
}
}
}
else
{
if(handle)
{
hw_status_t status = hw_verify_end(handle);
if(HW_STATUS_OK != status)
{
fprintf(stdout, "hw verify end error: %d\n", status);
fflush(stdout);
fprintf(log, "%d\n", status);
fflush(log);
}
}
handle = 0;
hw_status_t status = hw_verify_begin(&handle);
if(HW_STATUS_OK == status)
{
last_result = true;
std::chrono::high_resolution_clock::time_point now = std::chrono::high_resolution_clock::now();
ts.verify_ts = now;
}
else
{
last_result = false;
}
}
}
}
void set_working(bool working)
{
this->working = working;
}
}
static AuthVerifier authv;
verifier & get_verifier()
{
return authv;
}
bool isAuthorized()
{
AuthVerifier& authv = get_checker();
return auth.get_last_result();
}
放到客户环境跑了一下,没有再听说延迟了。