某业务进程,多年启动正常,但最近一个新服务器上部署时启动必core。
如下例代码所示,进程启动时core, core堆栈如下,看堆栈是字符串相关操作触发的core:
再看CDeviceStateProcessor::Init()的代码,core的代码行又没有字符串相关的操作
于是又gdb调试逐行跟踪调试ReadSnapShotForDevType 这个函数,发现程序都能正常执行完,但执行完函数的最后一行return true后进程就core了。
百思不得其解,后走读如下函数代码,发现是使用了未初始化的指针导致,如下标红行。由于指针未初始化,其值是未决的,往其指向的内存写入数据就可能会触发内存访问异常,此时core堆栈的信息也有可能是受到破坏的或不符合代码逻辑的(如本例截图所示在字符串析构时core,但事实上代码没有对字符串的异常操作)
bool CacheBlock::ReadSnapShotForDevType(const char * snap_file_path, uint8_t* pDeviceSpeedType)
{
if (pDeviceSpeedType == NULL)
{
MYLOG_ERROR(g_logger,"pDeviceSpeedType is NULL");
return false;
}
m_devTypeBuffer = pDeviceSpeedType;
MYLOG_INFO(g_logger, "Loading snapshot file: %s.", snap_file_path);
uint64_t * buffer = new uint64_t[_capacity]; // 分配读快照缓冲区
if (NULL == buffer)
{
MYLOG_ERROR(g_logger, "new buffer fail! size=%d", _capacity);
return false;
}
boost::scoped_array<uint64_t> sbuf(buffer); // 智能指针负责删除buffer
// 读快照只发生在初始化阶段,可以不必加锁操作缓存数据
struct stat file_stat;
if (stat(snap_file_path, &file_stat))
{
MYLOG_INFO(g_logger, "Warning check snapshot file's stat: %s, %s", snap_file_path,
strerror(errno));
return false;
}
// Check file size
if ((file_stat.st_size - sizeof(HEAD_INFO)) % sizeof(uint64_t))
{
MYLOG_ERROR(g_logger, "Error corrupted snapshot file: %s", snap_file_path);
return false;
}
FILE *fp = fopen(snap_file_path, "r");
if (fp == NULL)
{
MYLOG_ERROR(g_logger, "Error open snapshot file: %s, %s", snap_file_path, strerror(errno));
return false;
}
int unread_record_count = (file_stat.st_size - sizeof(HEAD_INFO)) / sizeof(uint64_t); // 等待读取的剩余记录条数
time_t dwNow = time(0);
//首先获取快照文件中的快照头部数据,获取其中的最后一次写入快照文件的时间,若时间距离当前时间超过1天,则认为快照文件无效,返回错误
HEAD_INFO* pHeadInfo;
if (fread(pHeadInfo, sizeof(HEAD_INFO), 1, fp) != (size_t)1)
{
MYLOG_ERROR(g_logger,"failed to read the head info of the devType snap file");
if (fclose(fp))
{
MYLOG_ERROR(g_logger,"Error close snapshot file: %s, %s", snap_file_path, strerror(errno));
}
return false;
}
MYLOG_DEBUG(g_logger,"last time save the dev type snap file is: %llu", pHeadInfo->modiTime);
if (pHeadInfo->modiTime > time(0) || time(0) - pHeadInfo->modiTime > 60*60*24)
{
MYLOG_ERROR(g_logger,"SnapShot file is invalid, last time write:%llu, now:%llu", pHeadInfo->modiTime, time(0));
return false;
}
int validDevTypeCount = 0;
while (unread_record_count > 0)
{
int read_count = MIN(unread_record_count, _capacity); // 本次需读取的记录条数
if (fread(buffer, sizeof(uint64_t), read_count, fp) != (size_t)read_count)
{
MYLOG_ERROR(g_logger, "Error load snapshot file: %s, %s", snap_file_path, strerror(errno));
if (fclose(fp))
{
MYLOG_ERROR(g_logger, "Error close snapshot file: %s, %s", snap_file_path, strerror(errno));
}
return false;
}
for (int i = 0; i < read_count; ++i)
{
// 只把合法的快照数据读入缓存块的相应位置,不给缓存块内留空洞
if (buffer[i] < _capacity)
{
pDeviceSpeedType[buffer[i]] = SLOWSPEED_DEVICE;
validDevTypeCount++;
}
}
unread_record_count -= _cache_block_size;
}
if (fclose(fp))
{
MYLOG_ERROR(g_logger, "Error close snapshot file: %s, %s", snap_file_path, strerror(errno));
return false;
}
MYLOG_INFO(g_logger, "Retrieving devType data from snapshot file successfully. records_count=%d.", validDevTypeCount);
return true;
}