在工作中需要完成一个工具,该工具主要的用途就是向指定的服务器和端口发送http请求,为了提高性能,采用多线程的方式进行,同时采用libcurl的异步形式。代码如下,在其中添加一些注释来记录写代码中遇到的问题。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <pthread.h>
#include <signal.h>
#include <curl/curl.h>
#include <iostream>
#include <sstream>
#include <string>
using namespace std;
char globalQueryContext[100000][2048];//不要在函数中声明如此大的变量,否则容易在进入函数时发生core
string globalLogFile = "access.log";
string globalHost = "i3114.se.shyc2.qihoo.net";
string globalPort = "6351";
string globalQueryFile = "query.txt";
volatile bool globalStop = false;
static int globalThreadNumber = 3;
void sig_handle(int)
{
globalStop = true;
}
struct thread_arg
{
int sequence;
int successRequest;
int totalQueryNumber;
thread_arg():sequence(-1), successRequest(0), totalQueryNumber(0){}
};
size_t write_response(void *contents, size_t size, size_t nmemb, void *stream )
{
string data((const char*) contents, (size_t) size * nmemb);
*((stringstream*) stream) << data << endl;
//cout << stream << endl;
return size * nmemb;
}
char *getCompletedQuery(char* const completedQuery, const char *queryContext)
{
if(completedQuery == NULL || queryContext == NULL)
return NULL;
completedQuery[0] = '\0';//注意初始化数组
strcat(completedQuery, "http://");
strcat(completedQuery, globalHost.c_str());//注意string 与 char*的区别以及相互转化
strcat(completedQuery, ":");
strcat(completedQuery, globalPort.c_str());
strcat(completedQuery, "/mod_qsrch/warmup?kw=");
strcat(completedQuery, queryContext);
cout << string(completedQuery) << ":" << strlen(completedQuery) << endl;
return completedQuery;
}
void setCurlEasy(CURL *curl, const char *completedQuery, stringstream& response)
{
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
curl_easy_setopt(curl, CURLOPT_URL, completedQuery);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_response);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
return;
}
bool dealWithCurlCode(CURLcode code)
{
if(code == CURLE_OK)
{
cout << "OK" << endl;
return true;
}
else
{
cout << string(curl_easy_strerror(code)) << endl;
return false;
}
}
bool asyncSetCurlEasy(const char queryContext[][2048],
char completedQuerys[][2048],
stringstream responses[],
int queryBeginPosition,
size_t queryNumber,
CURLM *curlm
)
{
if(completedQuerys == NULL || responses == NULL || curlm == NULL)
return false;
for(size_t i = 0; !globalStop &&i < queryNumber; ++i)
{
CURL *curl = curl_easy_init();
if(curl)
{
getCompletedQuery(completedQuerys[i], queryContext[queryBeginPosition + i]);
setCurlEasy(curl, completedQuerys[i], responses[i]);
curl_multi_add_handle(curlm, curl);
}
else
return false;
}
return true;
}
int asyncDealWithCurlCode(CURLM *curlm)
{
if(curlm == NULL)
return false;
int leftMsg = 0;
int sucessCurl = 0;
CURLMsg* msg = NULL;
while(!globalStop && (msg = curl_multi_info_read(curlm, &leftMsg)) != NULL)
{
if(msg->msg == CURLMSG_DONE)
{
sucessCurl++;
int httpStatusCode = 0;
curl_easy_getinfo(msg->easy_handle, CURLINFO_RESPONSE_CODE, &httpStatusCode);
char *effectiveUrl = NULL;
curl_easy_getinfo(msg->easy_handle, CURLINFO_EFFECTIVE_URL, &effectiveUrl);
cout << "url: " << effectiveUrl << " status: " << httpStatusCode << " "
<< curl_easy_strerror(msg->data.result) << endl;
curl_multi_remove_handle(curlm, msg->easy_handle);
curl_easy_cleanup(msg->easy_handle);
}
else
return sucessCurl;
}
return sucessCurl;
}
int asyncSendRequestAndGetResponse(const char queryContexts[][2048],
int queryBeginPosition,
size_t queryNumber)
{
char completedQuerys[queryNumber][2048];
stringstream responses[queryNumber];
CURLM *curlm = curl_multi_init();
//TODO 对curlm进行判断
if(!asyncSetCurlEasy(queryContexts, completedQuerys, responses, queryBeginPosition, queryNumber, curlm))
{
cout << "asyncSetCurlEasy error"<< endl;
return 0;
}
int runningCurls = 0;
do{
curl_multi_wait(curlm, NULL, 0, 2000, NULL);
curl_multi_perform(curlm, &runningCurls);
}while(runningCurls > 0 && !globalStop);
int sucessRequest = asyncDealWithCurlCode(curlm);
curl_multi_cleanup(curlm);
return sucessRequest;
}
bool sendRequestAndGetResponse(const char* queryContext)
{
CURL *curl = curl_easy_init();
if(curl)
{
char completedQuery[2048] = {0};
stringstream response;
getCompletedQuery(completedQuery, queryContext);
setCurlEasy(curl, completedQuery, response);
CURLcode code = curl_easy_perform(curl);
curl_easy_cleanup(curl);
return dealWithCurlCode(code);
}
else
return false;
}
int getQueryContext(const string& path, char queryContexts[][2048], int queryNumber)
{
FILE* fd = fopen(path.c_str(), "r");
if(!fd)
{
cout << "open file " << path << " failed!"<< endl;
return 0;
}
int index = 0;
//queryContexts[index] = {};
while(!globalStop && index < queryNumber && fgets(queryContexts[index], 2048, fd) != NULL)
{
queryContexts[index][strlen(queryContexts[index]) - 1] = '\0';
cout << "query" << index << ":" << queryContexts[index] << endl;
//queryContexts[++index] = "";
index++;
}
fclose(fd);
return index;
}
void *doWarmBySendQueryFormFile(void *arg)
{
struct thread_arg *queryMsg = (thread_arg*)arg;
int averageQueryNumber = queryMsg->totalQueryNumber / globalThreadNumber;
int queryBeginPosition = queryMsg->sequence * averageQueryNumber;
if(queryMsg->sequence == globalThreadNumber - 1)
averageQueryNumber = queryMsg->totalQueryNumber - averageQueryNumber * queryMsg->sequence;
cout << "thread " << queryMsg->sequence << " query begin position is " << queryBeginPosition << " query number is " << averageQueryNumber << endl;
queryMsg->successRequest = asyncSendRequestAndGetResponse(globalQueryContext, queryBeginPosition, averageQueryNumber);
return NULL;
}
void usage(const char* pname)
{
cout << pname <<
"-p port "
"-h host "
"-q input_file "
"-l log_file "
"-t thread_number" << endl;
}
void *time_worker(void *arg)
{
pthread_detach(pthread_self());
struct timespec delay;
delay.tv_sec = 10 * 60;
delay.tv_nsec = 0;
sigset_t mask;
sigfillset(&mask);
sigfillset(&mask, SIGALRM);
pthread_sigmask(SIG_BLOCK, &mask, NULL);//线程屏蔽信号
nanosleep(%delay, NULL);
globalStop = true;
cout << "time to exit" << endl;
return NULL;
}
bool parseParameters(int argc, char *argv[])
{
int c;
while((c = getopt(argc, argv, "h:p:q:l:t:")) != -1)
{
switch(c)
{
case 'p':
{
string port(optarg);
globalPort = port;
}
break;
case 'h':
{
string host(optarg);
globalHost = host;
}
break;
case 'q':
{
string queryFile(optarg);
globalQueryFile = queryFile;
}
break;
case 'l':
{
string logFile(optarg);
globalLogFile = logFile;
}
break;
case 't':
globalThreadNumber = atoi(optarg);
break;
default:
usage(argv[0]);
return false;
}
}
return true;
}
int main(int argc, char* argv[])
{
CURLcode code = curl_global_init(CURL_GLOBAL_ALL);
if(code != CURLE_OK)
{
cout << "curl_global_init error" << endl;
exit(-1);
}
if(!parseParameters(argc, argv))
{
cout << "parse parameters error" << endl;
exit(-1);
}
signal(SIGINT, sig_handle);//设置信号的处理函数
signal(SIGTERM, sig_handle);
int queryNumber = getQueryContext(globalQueryFile, globalQueryContext, 100000);
if(!queryNumber)
{
cout << "read query from file failed" << endl;
exit(-1);
}
pthread_t *tids = new pthread_t[globalThreadNumber];
if(tids == NULL)
{
cout << "new pthread failed" << endl;
exit(-1);
}
thread_arg *thr_args = new thread_arg[globalThreadNumber];
if(thr_args == NULL)
{
cout << "new pthread arg failed"<< endl;
exit(-1);
}
for(int i = 0; i < globalThreadNumber; ++i)
{
thr_args[i].totalQueryNumber = queryNumber;
thr_args[i].sequence = i;
if(pthread_create(&tids[i], NULL, doWarmBySendQueryFormFile, &thr_args[i]) != 0)
{
cout << "create curl thread" << i << " error" << endl;
exit(-1);
}
}
thread_t twid;
if(pthread_create(&twid, NULL, time_worker, NULL) != 0)
{
cout << "create time worker thread error"<< endl;
exit(-1);
}
for(int i = 0; i < globalThreadNumber; ++i)
{
pthread_join(tids[i], NULL);//主线程等待子线程执行完毕后退出
}
delete[] tids;
delete[] thr_args;
curl_global_cleanup();
return 0;
}
写完上述代码,发现其中还是有一些问题:
- 代码的书写规范有问题
- 没有对请求成功和失败结果的统计
- 函数行数太多
- 可以将上述方法封装成一个类
- 在异步的同时也可以控制并发
后来通过改进就变成了下面的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <pthread.h>
#include <signal.h>
#include <curl/curl.h>
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
using namespace std;
vector<string> globalCompletedQuerys;
string globalHost = "localhost";
string globalPort = "6351";
string globalQueryFile = "query.txt";
volatile bool globalStop = false;
static int globalThreadNumber = 3;
static int globalConcurrence = 1;
static long globalQueryNumber = 1;
void sigHandle(int)
{
globalStop = true;
cout << "stop "<< endl;
}
class statisticsRequest
{
private:
int failedRequest;
int successRequest;
public:
statisticsRequest()
{
failedRequest = 0;
successRequest = 0;
}
void increaseFailedRequest(){ failedRequest++; }
void increaseSucessRequest(){ successRequest++; }
int getFailedRequest(){ return failedRequest; }
int getSuccessRequest(){ return successRequest; }
};
struct threadArg
{
int sequence;
int concurrence;
int totalQueryNumber;
statisticsRequest * statsReq;
threadArg():sequence(-1), concurrence(1), totalQueryNumber(0), statsReq(NULL){}
};
size_t writeResponse(void *contents, size_t size, size_t nmemb, void *stream)
{
string data((const char *)contents, (size_t)size * nmemb);
statisticsRequest *request = (statisticsRequest *)stream;
if(data.substr(0, 9) == "rsp_ec: 0")
{
request->increaseSucessRequest();
}
else if(data.substr(0, 6) == "rsp_ec" && data.substr(0, 9) != "rsp_ec: 0")
{
request->increaseFailedRequest();
}
return size * nmemb;
}
void getCompletedQuery(const string& queryContext, string& completedQuery)
{
completedQuery.clear();//注意初始化数组
completedQuery += "http://";
completedQuery += globalHost;
completedQuery += ":";
completedQuery += globalPort;
completedQuery += "/mod_qsrch/warmup?kw=";
completedQuery += queryContext;
return;
}
void setCurlEasy(CURL *curl, const string &completedQuery, statisticsRequest *response)
{
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
curl_easy_setopt(curl, CURLOPT_URL, completedQuery.c_str());
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, writeResponse);
curl_easy_setopt(curl, CURLOPT_HEADERDATA, (void *)response);
return;
}
bool asyncSetCurlEasy(const vector<string>& completedQuerys,
int queryBeginPosition,
size_t queryNumber,
CURLM *curlm,
statisticsRequest* statsReq)
{
if(completedQuerys.empty() || statsReq == NULL || curlm == NULL)
return false;
for(size_t i = 0; !globalStop && i < queryNumber; ++i)
{
CURL *curl = curl_easy_init();
if(curl)
{
setCurlEasy(curl, completedQuerys[queryBeginPosition + i], statsReq);
curl_multi_add_handle(curlm, curl);
}
else
return false;
}
return true;
}
void asyncDealWithCurlCode(CURLM *curlm)
{
if(curlm == NULL)
return;
int leftMsg = 0;
CURLMsg* msg = NULL;
while(!globalStop && (msg = curl_multi_info_read(curlm, &leftMsg)) != NULL)
{
if(msg->msg == CURLMSG_DONE)
{
int httpStatusCode = 0;
curl_easy_getinfo(msg->easy_handle, CURLINFO_RESPONSE_CODE, &httpStatusCode);
char *effectiveUrl = NULL;
curl_easy_getinfo(msg->easy_handle, CURLINFO_EFFECTIVE_URL, &effectiveUrl);
//cout << "url: " << effectiveUrl << " status: " << httpStatusCode << " " << curl_easy_strerror(msg->data.result) << endl;
}
}
return;
}
void splitQuery(const vector<string> &queryContexts, vector<string> &completedQuerys)
{
for(vector<string>::const_iterator it = queryContexts.begin(); it !=queryContexts.end(); ++it)
{
completedQuerys.push_back("");
getCompletedQuery(*it,completedQuerys.back());
}
}
void asyncSendRequestAndGetResponse(const vector<string>& completedQuerys,
int sequence,
int queryBeginPosition,
int concur,
int queryNumber,
statisticsRequest *statsReq)
{
CURLM *curlm = curl_multi_init();
if(curlm == NULL)
{
cout << "init curl multi failed" << endl;
return;
}
if(concur > queryNumber)
concur = queryNumber;
int runningCurls = 0, queryStart = queryBeginPosition;
int queryEndPosition = queryBeginPosition + queryNumber;
int sendedRequest = queryBeginPosition - queryStart;
int receivedResponse = statsReq->getFailedRequest() + statsReq->getSuccessRequest();
asyncSetCurlEasy(completedQuerys, queryBeginPosition, concur, curlm, statsReq);
queryBeginPosition += concur;
do{
curl_multi_perform(curlm, &runningCurls);
asyncDealWithCurlCode(curlm);
sendedRequest = queryBeginPosition - queryStart;
receivedResponse = statsReq->getFailedRequest() + statsReq->getSuccessRequest();
if(queryBeginPosition != queryEndPosition && sendedRequest - receivedResponse < concur)
{
int curlNumberToAdd = concur - (sendedRequest - receivedResponse);
if(queryBeginPosition + curlNumberToAdd < queryEndPosition)
{
asyncSetCurlEasy(completedQuerys, queryBeginPosition, curlNumberToAdd, curlm, statsReq);
queryBeginPosition += curlNumberToAdd;
}
else if(queryBeginPosition < queryEndPosition)
{
asyncSetCurlEasy(completedQuerys, queryBeginPosition, queryEndPosition - queryBeginPosition, curlm, statsReq);
queryBeginPosition += queryEndPosition - queryBeginPosition;
}
}
curl_multi_wait(curlm, NULL, 0, 200000, NULL);
//cout << pthread_self() << " receivedResponse: " << receivedResponse << "queryNumber :" << queryNumber << endl;
}while(receivedResponse < queryNumber && !globalStop);
curl_multi_cleanup(curlm);
return;
}
int getQueryContext(const string& path, vector<string>& queryContexts, int num)
{
int index = 0;
string line;
ifstream in(path.c_str());
while(!globalStop && index < num && (in >> line)){
if(line.empty())
continue;
queryContexts.push_back(line);
index++;
}
in.close();
return index;
}
void *doWarmBySendQueryFormFile(void *arg)
{
struct threadArg *queryMsg = (threadArg*)arg;
int averageQueryNumber = queryMsg->totalQueryNumber / globalThreadNumber;
int queryBeginPosition = queryMsg->sequence * averageQueryNumber;
if(queryMsg->sequence == globalThreadNumber - 1)
averageQueryNumber = queryMsg->totalQueryNumber - averageQueryNumber * queryMsg->sequence;
//cout << "thread " << queryMsg->sequence << " query begin position is " << queryBeginPosition << " query number is " << averageQueryNumber << "concurrence is " << queryMsg->concurrence << endl;
asyncSendRequestAndGetResponse(globalCompletedQuerys, queryMsg->sequence, queryBeginPosition, queryMsg->concurrence, averageQueryNumber, queryMsg->statsReq);
return NULL;
}
void usage(const char* pname)
{
cout << pname <<
"-p port "
"-h host "
"-i input_file "
"-t thread_number" << endl;
}
void *timeWorker(void *arg)
{
pthread_detach(pthread_self());
struct timespec delay;
delay.tv_sec = 10 * 60;
delay.tv_nsec = 0;
sigset_t mask;
sigfillset(&mask);
sigdelset(&mask, SIGALRM);
pthread_sigmask(SIG_BLOCK, &mask, NULL);
nanosleep(&delay, NULL);
globalStop = true;
cout << "time to exit" << endl;
return NULL;
}
bool parseParameters(int argc, char *argv[])
{
int c;
while((c = getopt(argc, argv, "h:p:i:t:n:c:")) != -1)
{
switch(c)
{
case 'p':
{
string port(optarg);
globalPort = port;
}
break;
case 'h':
{
string host(optarg);
globalHost = host;
}
break;
case 'i':
{
string queryFile(optarg);
globalQueryFile = queryFile;
}
break;
case 't':
globalThreadNumber = atoi(optarg);
break;
case 'n':
globalQueryNumber = atoi(optarg);
break;
case 'c':
globalConcurrence = atoi(optarg);
break;
default:
usage(argv[0]);
return false;
}
}
return true;
}
void getStatisticsRequest(statisticsRequest statsReqs[], int *totalSuccessRequest, int *totalFailedRequest)
{
for(int i = 0; i < globalThreadNumber; i++)
{
*totalSuccessRequest += statsReqs[i].getSuccessRequest();
*totalFailedRequest += statsReqs[i].getFailedRequest();
}
}
int main(int argc, char* argv[])
{
CURLcode code = curl_global_init(CURL_GLOBAL_ALL);
if(code != CURLE_OK)
{
cout << "curl_global_init error" << endl;
exit(-1);
}
if(!parseParameters(argc, argv))
{
cout << "parse parameters error" << endl;
exit(-1);
}
signal(SIGINT, sigHandle);
signal(SIGTERM, sigHandle);
vector<string> queryContexts;
int queryNumber = getQueryContext(globalQueryFile, queryContexts, globalQueryNumber);
if(!queryNumber)
{
cout << "read query from file failed" << endl;
exit(-1);
}
splitQuery(queryContexts, globalCompletedQuerys);
pthread_t *tids = new pthread_t[globalThreadNumber];
if(tids == NULL)
{
cout << "new pthread failed" << endl;
exit(-1);
}
threadArg *thr_args = new threadArg[globalThreadNumber]();
if(thr_args == NULL)
{
cout << "new pthread arg failed"<< endl;
exit(-1);
}
statisticsRequest *statsReqs = new statisticsRequest[globalThreadNumber]();
if(statsReqs == NULL)
{
cout << "new statsReqs arg failed"<< endl;
exit(-1);
}
for(int i = 0; i < globalThreadNumber; ++i)
{
thr_args[i].sequence = i;
thr_args[i].statsReq = &statsReqs[i];
thr_args[i].concurrence = globalConcurrence;
thr_args[i].totalQueryNumber = globalQueryNumber;
if(pthread_create(&tids[i], NULL, doWarmBySendQueryFormFile, &thr_args[i]) != 0)
{
cout << "create curl thread" << i << " error" << endl;
exit(-1);
}
}
pthread_t twid;
if(pthread_create(&twid, NULL, timeWorker, NULL) != 0)
{
cout << "create time worker thread error"<< endl;
exit(-1);
}
for(int i = 0; i < globalThreadNumber; ++i)
{
pthread_join(tids[i], NULL);
}
int totalSuccessRequest = 0;
int totalFailedRequest = 0;
getStatisticsRequest(statsReqs, &totalSuccessRequest, &totalFailedRequest);
cout << "total request : " << totalSuccessRequest + totalFailedRequest << endl
<< "totalSuccessRequest is " << totalSuccessRequest << endl
<< "totalFailedRequest is " << totalFailedRequest << endl;
delete[] statsReqs;
delete[] thr_args;
delete[] tids;
curl_global_cleanup();
return 0;
}
但是上述代码并没有对前面提出的问题进行特别的改进,只是改进了控制并发的问题,而且并不是特别完美,最完美的方式是在其中使用nanosleep函数来控制时间,所以还需要进行改进。