近日接到任务,公司一套UNIX下的C++程序每隔一段时间会出现僵死,处理变慢,重复处理数据的情况。这种情况延续了2年之久,每次出问题都是由运维人员进行手动重启处理,由于涉及到电信用户的计费,该情况无法复现。
该程序在用户态24小时运行,整体架构使用C++的面向对象特性,使用多态实现多个子类各自独立的业务处理,采用多进程单线程的模式。近日刚好碰到一次程序处理变慢并重复处理数据的情况,对日志进行了详细的截取分析,将ACE的ERROR日志,TRC日志结合起来,定位了处理函数退出的地方,将重启后的处理成功日志进行对比,将问题定位到类DCCustPriceSyn的GetAllPricePlan()成员函数中。由于这个程序经历了不同程序员的手,代码变得比较杂乱,并且没有catch到异常的地方,只在顶层catch了一个unknow 异常。
下面贴一段模块测试的代码
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <list>
#include <map>
#include <vector>
#include <math.h>
using namespace std;
#define OTL_ORA10G
#include "otlv4.h"
#define OTL_BIGINT __int64
#define OTL_STR_TO_BIGINT(str,n) \
{ \
n=_atoi64(str); \
}
#define OTL_BIGINT_TO_STR(n,str) \
{ \
_i64toa(n,str,10); \
}
const char* my_pConnectOcs = "query_ods/avpx09@hbocs";
struct STBindInfo
{
char ObjType[4];
long ObjID;
};
struct STServEffInfo
{
long lnServID;
char EffDate[24];
long OfrDetailID;
int nFlag; //flag = 0 说明此种产品实例用户未订购过
STServEffInfo()
{
lnServID = 0;
memset(EffDate,0,sizeof(lnServID));
OfrDetailID = -1;
nFlag = 0;
}
};
struct STCustPlan
{
STCustPlan()
{
ExpDate[0] = 0;
ModifyTime[0] = 0;
SourceType = 2;
lnPrdId = -1;
lnOfrType =1;
OfrDetailID =0;
}
long CustPlanID;
long CustPlanSeqID;
long CustID;
long PringPlanID;
long OfrID;
long SourceType;
char State[4];
long Priority;
char ExpDate[24];
char EffDate[24];
char ModifyTime[24];
long RegionID;
long BelongObjID;
long lnPricingLev;
char BelongObjType[5];
long lnPrdId;
long lnOfrType;
long lnOperSerialNbr;
long OfrDetailID;
list<STBindInfo> lBindInfo;
vector<STServEffInfo> vecServEffInfo;
};
class DCOra
{
public:
DCOra();
~DCOra();
public:
otl_connect db_ocs;
int m_nPriority;
public:
int GetAllPricePlan(long BelongObjID, list<STCustPlan>& lAllPricePlan);
int getOfrPriority(long lnPricingPlanId, char *szBelongObjType, long& lnOfrId, long& lnPriority);
void CheckDb()
{
char sqlCheck[256] = "\0";
int connCheck = 0;
otl_stream osCheck;
strcpy(sqlCheck," select 1 from dual ");
try
{
osCheck.open(1,sqlCheck,db_ocs);
osCheck.close();
connCheck = 1;
}
catch(otl_exception &p)
{
printf("::CheckDbConn", __FILE__, __LINE__, " p.stm_text:%s--p.msg:%s--p.code:%d \n",p.stm_text,p.msg,p.code);
}
//重连数据库
try
{
switch(connCheck)
{
case 1:
{
db_ocs.logoff();
db_ocs.rlogon(my_pConnectOcs);
db_ocs.auto_commit_off();
break;
}
default:
break;
}
}
catch(otl_exception& p)
{
printf("::CheckDbConn", __FILE__, __LINE__, " p.stm_text:%s--p.msg:%s--p.code:%d \n",p.stm_text,p.msg,p.code);
}
}
};
DCOra::DCOra()
{
try
{
otl_connect::otl_initialize(1); // initialize OCI environment
db_ocs.rlogon(my_pConnectOcs);
db_ocs.auto_commit_off();
}
catch(otl_exception& p)
{
// intercept OTL exceptions
printf( "::DCRatable", __FILE__, __LINE__, "Connect Error: (%s) (%s) (%s)\n",p.msg, p.stm_text, p.var_info);
}
}
DCOra::~DCOra()
{
db_ocs.commit();
db_ocs.logoff();
}
int DCOra::GetAllPricePlan(long BelongObjID, list<STCustPlan>& lAllPricePlan)
{
cout<<"GetAllPricePlan:Begin to get all price plan according :"<<BelongObjID<<endl;
char my_str[1024];
otl_stream os;
lAllPricePlan.clear();
STCustPlan sTempPlan;
STServEffInfo sServEffInfo;
int nRet;
long lnTmpPriority = 0;
strcpy(my_str," select CUST_PRICE_PLAN_ID,CUST_PRICE_PLAN_SEQ_ID,CUST_ID,PRICING_PLAN_ID,BELONG_OBJECT_TYPE,to_char(EFF_DATE),to_char(EXP_DATE), "
" STATE,PARTITION_ID_REGION,to_char(MODIFY_TIME),SEQ_ID from ls65_sid.cust_price_plan_t@to_sid "
" where BELONG_OBJECT_ID = :A<long> order by CUST_PRICE_PLAN_ID,CUST_PRICE_PLAN_SEQ_ID DESC ");
os.open(1,my_str,db_ocs);
os << BelongObjID;
cout<<my_str<<endl;
for( ;os.eof() == 0; )
{
memset(&sTempPlan, 0, sizeof(sTempPlan));
os >> sTempPlan.CustPlanID >> sTempPlan.CustPlanSeqID >> sTempPlan.CustID >> sTempPlan.PringPlanID >> sTempPlan.BelongObjType >> sTempPlan.EffDate >> sTempPlan.ExpDate >> sTempPlan.State >> sTempPlan.RegionID >> sTempPlan.ModifyTime >> lnTmpPriority;
sTempPlan.BelongObjID = BelongObjID;
cout<<"$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"<<endl;
printf("eff_date is:%s!\n", sTempPlan.EffDate);
printf("exp_date is:%s!\n", sTempPlan.ExpDate);
cout<<"$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"<<endl;
if (strcmp(sTempPlan.BelongObjType, "80C") == 0)
{
cout<<"into 80C"<<endl;
return 0;
}
if (strcmp(sTempPlan.BelongObjType, "80A") == 0)
{
strcmp(sServEffInfo.EffDate, sTempPlan.EffDate);
}
sTempPlan.vecServEffInfo.push_back(sServEffInfo);
list<STCustPlan>::iterator iter = lAllPricePlan.begin();
nRet = getOfrPriority(sTempPlan.PringPlanID, sTempPlan.BelongObjType, sTempPlan.OfrID, sTempPlan.Priority);
if (nRet < 0)
{
printf("getOfrPriority PringPlanID =%d is failed", sTempPlan.PringPlanID);
return nRet;
}
if (m_nPriority == 1)//seq_id覆盖
{
sTempPlan.Priority = lnTmpPriority;
}
for (; iter != lAllPricePlan.end(); ++iter)
{
if (sTempPlan.CustPlanID == iter->CustPlanID)
{
break;
}
}
if (iter == lAllPricePlan.end())
lAllPricePlan.push_back(sTempPlan);
}
printf("GetAllPricePlan:get all price plan according:%ld.it has %d price plan!", BelongObjID, lAllPricePlan.size());
os.close();
return 0;
}
int DCOra::getOfrPriority(long lnPricingPlanId, char *szBelongObjType, long& lnOfrId, long& lnPriority)
{
printf("DCCustPriceSyn::getOfrPriority QuerySyn1508 PricingPlanId = %ld ", lnPricingPlanId);
char my_str[1024];
otl_stream os;
strcpy(my_str,"select a.ocs_ofr_id ,c.CALC_PRIORITY from mid_pricing_plan_t a "
" ,tb_prd_ofr_detail b,tb_prd_ofr c where a.sid_pricing_plan_id=:A<long> and a.ocs_ofr_id = b.ofr_id and b.ofr_id = c.ofr_id ");
os.open(1,my_str,db_ocs);
os << lnPricingPlanId;
for( ;os.eof() == 0; )
{
os >> lnOfrId >> lnPriority;
}
if (strcmp(szBelongObjType, "80A") == 0)
{
lnPriority += 600000;
}
else if (strcmp(szBelongObjType, "80I") == 0)
{
lnPriority += 400000;
}
else if (strcmp(szBelongObjType, "80J") == 0)
{
lnPriority += 500000;
}
else if (strcmp(szBelongObjType, "80R") == 0)
{
lnPriority += 300000;
}
else
{
lnPriority += 600000;
}
printf("OfrID = %ld,Priority=%d\n", lnOfrId, lnPriority);
os.close();
return 0;
}
int main()
{
DCOra *p = new DCOra();
list<STCustPlan> lAllPricePlan;
p->CheckDb();
//while(TRUE)
//{
p->GetAllPricePlan(19042175676,lAllPricePlan);
//}
delete p;
p = NULL;
return 0;
}
业务逻辑处理是从程序中粘贴而来,不难看出,业务处理中有几处不当。结构体sServEffInfo并没有赋值,却被传入外部进行其他操作
if (strcmp(sTempPlan.BelongObjType, "80A") == 0)
{
strcmp(sServEffInfo.EffDate, sTempPlan.EffDate);
}
这段中,进入if后却进行了字符串比较操作,这显然是前任作者的笔误,按照逻辑此处应该是strcpy赋值操作。通过分析日志,发现执行到
printf("OfrID = %ld,Priority=%d\n", lnOfrId, lnPriority);
之后,程序便抛出了无法识别的异常,而且没有打印GetAllPricePlan:get all price plan according...,这说明了,程序没有退出while循环就抛异常了,这个地方大概在:
if (nRet < 0)
{
printf("getOfrPriority PringPlanID =%d is failed", sTempPlan.PringPlanID);
return nRet;
}
if (m_nPriority == 1)//seq_id覆盖
{
sTempPlan.Priority = lnTmpPriority;
}
for (; iter != lAllPricePlan.end(); ++iter)
{
if (sTempPlan.CustPlanID == iter->CustPlanID)
{
break;
}
}
if (iter == lAllPricePlan.end())
lAllPricePlan.push_back(sTempPlan);
或者:
memset(&sTempPlan, 0, sizeof(sTempPlan));
os >> sTempPlan.CustPlanID >> sTempPlan.CustPlanSeqID >> sTempPlan.CustID >> sTempPlan.PringPlanID >> sTempPlan.BelongObjType >> sTempPlan.EffDate >> sTempPlan.ExpDate >> sTempPlan.State >> sTempPlan.RegionID >> sTempPlan.ModifyTime >> lnTmpPriority;
sTempPlan.BelongObjID = BelongObjID;
这个时候重点来了,我们注意到
memset(&sTempPlan, 0, sizeof(sTempPlan));
这个memset操作,本意是要初始化sTempPlan这个结构体,这样做本身是没有问题的,但是!!!sTempPlan结构体中,还储存了一个list,一个vector
list<STBindInfo> lBindInfo;
vector<STServEffInfo> vecServEffInfo;
显然的,如果结构体中存在容器,模板类型的成员,是不允许使用memset进行初始化的,这样会破坏容器的内存结构,导致内存泄露,最终程序会core掉。
对内存的操作一定要谨慎,谨慎,再谨慎。