- string
GetHtmlByWget(string url) - {
-
//获取待下载网页文件名 -
string fileName = url.substr((int)url.find_last_of("/") + 1); -
if(fileName != "") -
{ -
string strCom = "wget -q "; //wget命令,-q表示不显示下载信息 -
strCom.append(url); -
system(strCom.c_str()); //执行wget -
-
ifstream fin(fileName.c_str()); -
if(!fin) -
{ -
return ""; -
} -
string strHtml = ""; -
char chTemp[1024] = ""; -
//读取网页文件到内存中 -
while(fin.getline(chTemp , 1024)) -
{ -
strHtml.append(string(chTemp)); -
strcpy(chTemp , ""); -
} -
fin.close(); -
strCom = "rm -f "; //删除文件命令,-f表示直接删除不做任何提示 -
strCom.append(fileName); -
system(strCom.c_str()); //删除刚才下载下来的文件 -
return strHtml; //返回网页源码 -
} -
else -
{ -
return ""; -
} - }
//通过Wget来获取网页 string GetHtmlByWget(string url) { //获取待下载网页文件名 string fileName = url.substr((int)url.find_last_of("/") + 1); if(fileName != "") { string strCom = "wget -q "; //wget命令,-q表示不显示下载信息 strCom.append(url); system(strCom.c_str()); //执行wget ifstream fin(fileName.c_str()); if(!fin) { return ""; } string strHtml = ""; char chTemp[1024] = ""; //读取网页文件到内存中 while(fin.getline(chTemp , 1024)) { strHtml.append(string(chTemp)); strcpy(chTemp , ""); } fin.close(); strCom = "rm -f "; //删除文件命令,-f表示直接删除不做任何提示 strCom.append(fileName); system(strCom.c_str()); //删除刚才下载下来的文件 return strHtml; //返回网页源码 } else { return ""; } }
第二个是用的socket的来获取源码
C++代码
//通过GET获取网页源码
string GetHtmlByGet(string url)
{
}
- 使用libcurl
-
- #include
<stdio.h> -
#include <string.h> -
#include <curl/curl.h> -
-
#define MAX_BUF 65536 -
-
char wr_buf[MAX_BUF+1]; -
int wr_index; -
-
-
size_t write_data( void *buffer, size_t size, size_t nmemb, void *userp ) -
{ -
int segsize = size * nmemb; -
-
-
if ( wr_index + segsize > MAX_BUF ) { -
*(int *)userp = 1; -
return 0; -
} -
-
-
memcpy( (void *)&wr_buf[wr_index], buffer, (size_t)segsize ); -
-
-
wr_index += segsize; -
-
-
wr_buf[wr_index] = 0; -
-
-
return segsize; -
} -
-
-
-
int main( void ) -
{ -
CURL *curl; -
CURLcode ret; -
int wr_error; -
-
wr_error = 0; -
wr_index = 0; -
-
-
curl = curl_easy_init(); -
if (!curl) { -
printf("couldn't init curl\n"); -
return 0; -
} -
-
-
curl_easy_setopt( curl, CURLOPT_URL, "www.exampledomain.com" ); -
-
-
curl_easy_setopt( curl, CURLOPT_WRITEDATA, (void *)&wr_error ); -
curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, write_data ); -
-
-
ret = curl_easy_perform( curl ); -
-
printf( "ret = %d (write_error = %d)\n", ret, wr_error ); -
-
-
if ( ret == 0 ) printf( "%s\n", wr_buf ); -
-
curl_easy_cleanup( curl ); -
-
return 0; -
}
- #include
-