代码如下:
Crawl.cpp
/*Crawl.cpp */
#include "Crawl.h"
using namespace std;
CCrawl::CCrawl()
{
}
CCrawl::~CCrawl()
{
}
size_t CCrawl::save_header(void *ptr, size_t size, size_t nmemb, FILE *fp){
fwrite(ptr, size, nmemb, fp);
return (size * nmemb);
}
size_t CCrawl::WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
{
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *)userp;
char *ptr = (char *)realloc(mem->memory, mem->size + realsize + 1);
if(ptr == NULL) {
/* out of memory! */
printf("not enough memory (realloc returned NULL)\n");
// exit(1);
return 0;
}
mem->memory = ptr;
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;
//cout << "\n\n real size : "<< realsize << endl;
return realsize;
}
int CCrawl::fetch(string strUrl, char **fileBuf )
{
char *fBuf = "\nI still don't know why could happen this thing ..\n";
*fileBuf = fBuf;
// return 0;
FILE *fp;
if (!(fp = fopen("htmheader.txt", "wb"))) {
printf ("fopen error\n");
return -1;
}
CURL *curl_handle;
CURLcode res;
struct MemoryStruct chunk;
struct MemoryStruct DataChunk;
// struct MemoryStruct *me;
// me = &chunk;
chunk.memory = (char *)malloc(1); /* will be grown as needed by the real