#include <iostream>
#include <wininet.h>
#pragma comment (lib, "wininet.lib")
//参数说明:Url 网址
//rst 获取的html信息保存的变量
//keepPath 保存的文件路径
//说明:如果存在保存路径 ,rst将为“”
bool _GetFromURL(const char *Url, std::string &rst, const std::string keepPath = "")
{
bool isGet = false;
HINTERNET hSession = InternetOpenA("RookIE/1.0", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);
if (hSession != NULL)
{
HINTERNET handle2 = InternetOpenUrlA(hSession, Url, NULL, 0, INTERNET_FLAG_DONT_CACHE, 0);
if (handle2 != NULL)
{
char Temp[MAXBLOCKSIZE + 1]; //预留以为保留0
ULONG Number = 1;
std::ofstream ofs(keepPath);// "d:\\baidu.txt");
int step = 0;
while (Number > 0)
{
isGet = true;
InternetReadFile(handle2, Temp, MAXBLOCKSIZE - 1, &Number);
if (!Number) //add -- 2017-11-17
{
step = 0;
break;
}
Temp[Number] = 0; //将末尾附上截断符号0
//因为网址内容一般都为UTF-8形式,所以需要转换字节才能正常显示
char * Buff = StringAct::ConvertUtf8ToGBK(Temp);
if (ofs)
ofs << Buff;
else
rst += Buff;
StringAct::freeChar(Buff);
}
if (ofs)
ofs.close();
InternetCloseHandle(handle2);
handle2 = NULL;
}
InternetCloseHandle(hSession);
hSession = NULL;
}
return isGet;
}
要点:1.char Temp[MAXBLOCKSIZE + 1]; //预留1位保留0
使得Temp[Number] = 0; 不会有益处操作
2.Temp[Number] = 0; //将末尾附上截断符号0
保证了拷贝正确的字符串长度
3.StringAct::ConvertUtf8ToGBK(Temp); //转码操作 ---思考下:1.如何转码? 2.并不是所有网页都是UTF-8,那么何时才需要转码 ----可在评论区回复
使得网页的html内容能够正常被程序员看到