within previous blog, I shared one c++ program to fetch real-time stock quotes by visiting the url page every specified time interval, however, often time, when we engage in statistics analysis of finance data, we need not only the real-time data of single stock, but also the historical data of multiple stocks. Thus, I spent some time on improving previous program to satisfy new demand. From following two scripts, it can implement such functions,
1) create one class to handle on url links, so that it's convenient to repeatedly visit different links with little modification of program
2) multi-threading technologies to visit dozens of links at once, at the same time, so it will save much time if fetching lots of stocks
Notes on web data: it seems Sina finance doesn't provide historical data so that it refers to Yahoo finance instead. About Yahoo finance data, it allows to custom different type and parameters of stocks data, but this needs the knowledge about Yahoo finance API.
Program A, use objective class to optimize the program
/***********************************************************************
OS: Linux
C++ Library: CURL, sudo apt-get install libcurl4-gnults-dev
Description: visit web page and download it to local file.
Notes: workable, create one Class so that it can repeatedly visit
different URLs, encoding and error examination chunk has been disabled
************************************************************************/
#include <stdio.h>
#include <curl/curl.h>
#include <unistd.h>
using namespace std;
class stokequote
{ private:
char scode;
char ofile[];
public:
static void getquote(const char *scode, const char *ofile);
};
void stokequote::getquote(const char *scode, const char *ofile)
{ //initialization variable
CURL *curl;
FILE *fp;
CURLcode res;
//name of url and name of datafile
const char *url=scode;
const char *outfilename=ofile;
//initialization
curl=curl_easy_init();
if(curl)
{//open file
fp=fopen(outfilename,"wb"); //rewrite model
//various options for curl
curl_easy_setopt(curl, CURLOPT_URL, url);
//curl_easy_setopt(curl, CURLOPT_ENCODING, ""); //encode to remove disordered characters, but seems not work
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
res = curl_easy_perform(curl);
//if(res != CURLE_OK) //error check
//fprintf(stderr, "curl_easy_perform() failed: %s\n",curl_easy_strerror(res));
//clear
curl_easy_cleanup(curl);
fclose(fp);}
}
int main()
{
//real time quotes of one single stock, grabbing the web every 400 microseconds
char url[]="http://hq.sinajs.cn/list=sh601006";
char file[]="sh601006.csv";
stokequote squote;
for (size_t i=0;i<1;i++) //the total times of grabbing web content
{squote.getquote(url,file);
usleep(400);}
//historical quotes of one single stock, this needs refer to Yahoo API
char url2[]="http://table.finance.yahoo.com/table.csv?s=601857.ss";
char file2[]="sh601857.csv";
squote.getquote(url2,file2);
return (0);
}
Program B, multi-threading technologies introduced
/***********************************************************************
OS: Linux
C++ Library: CURL, sudo apt-get install libcurl4-gnults-dev
Description: visit web page and download it to local file (multi-treading version)
USE: NUMT number of simultaneous urls, urls array store urls,
Notes: workable, the local datafilename is the last 8 letters of url
************************************************************************/
/* A multi-threaded example that uses pthreads extensively to fetch
* X remote files at once */
#include <stdio.h>
#include <pthread.h>
#include <curl/curl.h>
using namespace std;
#define NUMT 3
// List of URLs to fetch
const char * const urls[NUMT]={
"http://hq.sinajs.cn/list=sh601006",
"http://hq.sinajs.cn/list=sh601857",
"http://table.finance.yahoo.com/table.csv?s=601857.ss"
};
static void *pull_one_url(void *url)
{ CURL *curl;
FILE *fp;
CURLcode res;
//init
curl=curl_easy_init();
if(curl)
{//open file
fp=fopen(fname,"w+b");
//various options for curl
curl_easy_setopt(curl, CURLOPT_URL, url);
res = curl_easy_perform(curl);
//clear
curl_easy_cleanup(curl);
fclose(fp);}
return NULL;
}
int main(int argc, char **argv)
{
pthread_t tid[NUMT];
int i;
int error;
//initialization of libcurl before any threads are started
curl_global_init(CURL_GLOBAL_ALL);
for(i=0; i< NUMT; i++)
{error = pthread_create(&tid[i], NULL, pull_one_url, (void *)urls[i]);
if(0 != error)
fprintf(stderr, "Couldn't run thread number %d, errno %d\n", i, error);
else
fprintf(stderr, "Thread %d, gets %s\n", i, urls[i]);}
// now wait for all threads to terminate
for(i=0; i< NUMT; i++)
{error = pthread_join(tid[i], NULL);
fprintf(stderr, "Thread %d terminated\n", i);}
curl_global_cleanup();
return (0);
}