常用makefile格式

#OBJS=handledata.o source.o
OBJS=source.o
main:$(OBJS)
        g++  -g -o lq-client $(OBJS) -l curl -l pthread  -std=c++11
#handledata.o:handledata.c
#       g++ -c -Wall handledata.c -I /usr/local/include/curl -std=c++11
source.o:source.cpp
        g++ -g -c -Wall source.cpp  -l curl -l pthread  -std=c++11
clean:
        rm -f lq-client $(OBJS)

http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=1&num=3000&sort=symbol&asc=1&node=sz_a&symbol=&_s_r_a=page

#include <stdio.h>
//#include <unistd.h>//access
//#include <fcntl.h>//access
#include <iconv.h> //iconv_open
#include <stdlib.h>//malloc \relloc
#include <string.h>//strlen\strcpy\strcat
#include <pthread.h> //pthread
#include <vector>
#include <map>
#include <string>//string
#include  <sstream> //stream
#include <iostream> //cout
#include  <regex> //正则
#include <sys/time.h>
#include <curl/curl.h>

using namespace std;
struct MemoryStruct 
{
    char *memory;
    size_t size;
};

static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
{
    size_t realsize = size * nmemb;
    struct MemoryStruct *mem = (struct MemoryStruct *)userp;

    mem->memory = (char *) realloc(mem->memory, mem->size + realsize + 1);
    if (mem->memory == NULL) 
    {
        /* out of memory! */
        printf("not enough memory (realloc returned NULL)\n");
        return 0;
    }

    memcpy(&(mem->memory[mem->size]), contents, realsize);
    mem->size += realsize;
    mem->memory[mem->size] = 0;

    return realsize;
}
//这是libcurl接收数据的回调函数,相当于recv的死循环  

int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen,  
        char *outbuf, size_t outlen) {  
    iconv_t cd;  
    char **pin = &inbuf;  
    char **pout = &outbuf;  

    cd = iconv_open(to_charset, from_charset);  
    if (cd == 0)  
        return -1;  
    memset(outbuf, 0, outlen);  
    if (iconv(cd, pin, &inlen, pout, &outlen) == -1)  
        return -1;  
    iconv_close(cd);  
//    *pout = '\0';  

    return 0;  
}  

int u2g(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {  
    return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen);  
}  

int g2u(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {  
    return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen);  
} 

void FreeStrBuf(char **ppBuf)//释放指针
{
    if (NULL != (*ppBuf))
    {
        free((*ppBuf));
        (*ppBuf) = NULL;
    }
}

void FreeStrBuf(int **ppBuf)
{
    if (NULL != (*ppBuf))
    {
        free((*ppBuf));
        (*ppBuf) = NULL;
    }
}

pthread_mutex_t sum_mutex;//互斥锁
class shares_data
{
public:
    shares_data()
    {
        name = "";
    }
    std::string name;
};

/*
class op_class
{
public:
    op_class()
    {
        cur_num = 0;
        shares_map.clear();
    }
    int cur_num;
    std::map<int, class shares_data>  shares_map;
};
*/


inline void NEXT(const string&T, vector<int>&next) {//按模式串生成vector,next(T.size())
    next[0] = -1;
    for (int i = 1; i<T.size(); i++) {
        int j = next[i - 1];
        while (T[i - 1] != T[j] && j >= 0) j = next[j];//递推计算
        if (T[i - 1] == T[j]) next[i] = j + 1;
        else next[i] = 0;
    }
}
inline string::size_type KMP(const string&S, const string&T) {
    //利用模式串T的next函数求T在主串S中的个数count的KMP算法
    //其中T非空,
    vector<int>next(T.size());
    NEXT(T, next);
    string::size_type index, count = 0;
    for (index = 0; index<S.size(); ++index) 
    {
        int pos = 0;
        string::size_type iter = index;
        while (pos<T.size() && iter<S.size()) 
        {
            if (S[iter] == T[pos]) { ++iter; ++pos; }
            else {
                if (pos == 0) ++iter;
                else pos = next[pos - 1] + 1;
            }
        }
        if (pos == T.size() && (iter - index) == T.size()) ++count;
        if (count == 1)
        {
           // std::cout << "index is "<<index<< std::endl;

        break;
        }
        //std::cout << "index is " << index << std::endl;
    }
    //std::cout << "T size is " << T.size() << std::endl;
    //std::cout << "S is " << S << std::endl;
    //std::cout << "S size is " << S.size() << std::endl;
    return count;
}




std::map<std::string, std::string>  shares_map;
//std::map<std::string, class shares_data>  shares_map;

void*  getdata(void * num)
{
    // 对传入的参数进行强制类型转换,由无类型指针变为整形数指针,然后再读取
    //这里面对num的修改是对其副本的修改
    int num_int = 0;
    num_int =   *((int*) &num);

    char *prelink = "http://q.10jqka.com.cn/index/index/board/all/field/zdf/order/desc/page/";
    char *suflink = "/ajax/1/";

    char buf[10] = { NULL };
    sprintf(buf, "%d", num);
    char *charnum = buf;
    char *templink = (char*) malloc(strlen(charnum) + strlen(suflink) + 1);
    strcpy(templink, charnum);
    strcat(templink, suflink);

    char *totallink = (char*) malloc(strlen(prelink) + strlen(templink) + 1);
    strcpy(totallink, prelink);
    strcat(totallink, templink);
    //printf("%s\r\n", totallink);

    //shares_data shares_data_obj;
    //std::pair<string, class shares_data> shares_pair(num_int, shares_data_obj);

    CURL *curl;
    CURLcode res;

    struct MemoryStruct chunk;
    chunk.memory = (char*) malloc(1);  /* will be grown as needed by the realloc above */
    chunk.size = 0;    /* no data at this point */

    curl = curl_easy_init();
    // const char *filename ="/home/cpp/data.txt";

    if (curl)
    {

        curl_easy_setopt(curl, CURLOPT_URL, totallink);
        /* example.com is redirected, so we tell libcurl to follow redirection */
        curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 10);
        //网页采用gzip压缩
        curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
        //指定回调函数  
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
        //这个变量可作为接收或传递数据的作用  
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &chunk);

        /* Perform the request, res will get the return code */
        res = curl_easy_perform(curl);
        /* Check for errors */
        if (res != CURLE_OK)
            fprintf(stderr, "curl_easy_perform() failed: %s\n",
                curl_easy_strerror(res));

        /* always cleanup */
        curl_easy_cleanup(curl);

    }
    //printf("size is %d\n\r\n", chunk.size);
    // printf("%s",chunk.memory); 
    char * databuf = 0;
    databuf = (char*) malloc((int) 2 * (chunk.size));
    g2u(chunk.memory, chunk.size, databuf, 2 * (chunk.size));
    //printf("%d", sizeof(databuf));
    //printf("%s", databuf);
    //pthread_mutex_lock(&sum_mutex);
    std::string line;
    //line = databuf;
    //std::string data_string(databuf);
    std::stringstream stream;
    stream << databuf;
    enum STATEFLAG 
    {
        SHARES_ID,
        SHARES_NAME,
    };
    STATEFLAG linecurflag = SHARES_ID;//当前状态标记


    std::pair<std::string, std::string> shares_pair;
    std::string shares_code = "";
    std::string shares_name = "";
        while (std::getline(stream, line))
        {
            switch (linecurflag)
            {
            case SHARES_ID:
            {
                //std::string pattern_1("/\" target = \"_blank\">([0-9])+");
                //std::regex rgx_1(pattern_1);
                //std::smatch readresults_1;
                //if (std::regex_search(line, readresults_1, rgx_1))//regex_search只匹配第一个符合的
                //std::cout << line<<std::endl;
                if (KMP(line, " target=\"_blank\">"))
                {
            linecurflag = SHARES_NAME;
            shares_code = line.substr(89,6);
                   // std::cout<<shares_code<<std::endl;
        }
                break;
            }
        case SHARES_NAME:
        {
            shares_pair.first = shares_code;
            shares_pair.second = shares_code;
            shares_map.insert(shares_pair);
            shares_code = "";
            shares_name = "";
            linecurflag = SHARES_ID;
            break;
        }
            default:
            {
                continue;
            }
            }

        }
    //shares_map.insert(shares_pair);
    //pthread_mutex_unlock(&sum_mutex);

    free(databuf);
    free(chunk.memory);

    FreeStrBuf(&totallink);
    FreeStrBuf(&templink);

    pthread_exit(0);
}




int main(void)
{
    curl_global_init(CURL_GLOBAL_DEFAULT);

    unsigned long start_time, end_time;
    time_t t;
    start_time = time(&t);

    pthread_t thread_id;
    std::vector<pthread_t> thread_id_list;
    thread_id_list.clear();

    pthread_mutex_init(&sum_mutex,NULL);
    shares_map.clear();

    int num = 1;
    int endpages = 147; //最终页数
    for (; num <= endpages; ++num)
    {
        pthread_create(&thread_id, NULL, getdata, (void*) num);//传入到参数必须强转为void*类型,即无类型指针,且不能为引用以防被修改
        thread_id_list.push_back(thread_id);
    }
    for (int i = 0; i < thread_id_list.size(); ++i)
    {
        pthread_join(thread_id_list[i], NULL);
    }

    curl_global_cleanup();
    pthread_mutex_destroy(&sum_mutex);//注销锁

    for (std::map<std::string, std::string>::iterator  it = shares_map.begin(); it != shares_map.end();++it)
    {
        //printf("The int is:%d\n", it->first);
    }
    printf("The num of pages is:%d\n", shares_map.size());

    end_time = time(&t);
    printf("The start time is:%d\n", start_time);
    printf("The end time is:%d\n", end_time);
    printf( "The total time used is:%d\n" ,(end_time - start_time));

  return 0;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值