#include "pch.h"
#include<windows.h>
#include<Wininet.h>
#include<iostream>
#include<fstream>
#include<string>
#pragma comment(lib,"WinInet.lib")
using namespace std;
class web
{
public:
web(string url);
~web();
int CatchData();
private:
HINTERNET hINet, hHttpFile;
char szSizeBuffer[32];
DWORD dwLengthSizeBuffer = sizeof(szSizeBuffer);
string url;
};
web::web(string url)
{
this->url = url;
hINet = InternetOpen("IE6.0", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0); //InternetOpen初始化WININET.DLL
if (!hINet)
{
cout << "InternetOpen fail" << endl;
}
hHttpFile = InternetOpenUrl(hINet, url.c_str(), NULL, 0, 0, 0); //这个函数连接到一个网络服务器上并且最被从服务器上读取数据
if (!hHttpFile)
{
cout << "error open url" << endl;
}
}
web::~web()
{
InternetCloseHandle(hHttpFile); //关闭句柄
InternetCloseHandle(hINet);
}
int web::CatchData()
{
BOOL bQuery = HttpQueryInfo(hHttpFile,
HTTP_QUERY_CONTENT_LENGTH,
szSizeBuffer,
&dwLengthSizeBuffer, NULL); //得到关于文件的信息,将接收到的资源大小存到szSizeBuffer当中,以字节为单位。
if (bQuery == false)
{
InternetCloseHandle(hINet);
cout << "error query info" << endl;
return 3;
}
int FileSize = atol(szSizeBuffer); //atol函数把字符串转换成长整型数
string revData;
revData.resize(FileSize);//根据得到的网页资源的大小信息来重新分配缓冲区字符串的大小
DWORD dwBytesRead;
BOOL bRead = InternetReadFile(hHttpFile, &revData[0], FileSize, &dwBytesRead); //web浏览器将在InternetReadFile上循环 ,不停地从Internet上读入数据块。
if (!bRead)
{
cout << "error to read file" << endl;
return 4;
}
ofstream out_file("test.txt");
out_file << revData; //输出到文件
cout << "抓取成功!\n" << endl;
system("pause");
return 0;
}
int main()
{
web w("https://www.sina.com.cn/");
w.CatchData();
}