代码较长,但是可复用
#include<iostream>
#include<stdlib.h>
#include<string>
#include<fstream>
#include<cassert>
#include<windows.h>
#include<Wininet.h>
#include<vector>
#include<tchar.h>
#include<ctime>
#pragma comment(lib,"Wininet.lib")
#define DEF_URL "https://zhidao.baidu.com/question/"
#define DEF_SAVEPOS "MyAnswer.txt"
class MyFileManager//单例模式;唯一管理器
{
public:
static MyFileManager* GetFileManager()
{
if (FileManager==nullptr)
{
FileManager = new MyFileManager();
}
return FileManager;
}
void Read(std::string file)
{
CheckIn(file);
CloseIn();
}
void Write(std::string file, char c)
{
OutFile.put(c);
}
std::string Search(std::string file,std::string qua)
{
std::string Temp;
int i = 0;
while (Temp != qua)
{
std::getline(InFile, Temp);
i++;
if (i > 100)
{
return "人家不知道呢";
}
}
std::getline(InFile, Temp);
return Temp;
}
void CheckIn(std::string file)
{
InFile.open(file);
if (!InFile.is_open())
{
CreateSaveFile();
CloseIn();
}
else if (!InFile)
{
std::cout << "存在未关闭输入流"<<std::endl;
}
}
void CloseIn()
{
InFile.close();
}
void CheckOut(std::string file)
{
OutFile.open(file);
if (!OutFile)
{
std::cout << "存在未关闭输出流" << std::endl;
}
}
void CloseOut()
{
OutFile.close();
}
bool CheckPermission(std::string file)//检查写入许可
{
CheckIn(file);
std::string Temp;
std::getline(InFile, Temp);
CloseIn();
if (Temp.empty())
return true;
else
{
std::cout << "存在数据,禁止写入" << std::endl;
return false;
}
}
void CreateSaveFile()//如果没有文件则创造
{
std::ofstream Temp(DEF_SAVEPOS);
Temp.close();
std::cout << "创造成功" << std::endl;
}
private:
std::ifstream InFile;//文件输入流
std::ofstream OutFile;//文件输出流
static MyFileManager* FileManager;
MyFileManager(){};
};
MyFileManager* MyFileManager::FileManager = nullptr;
MyFileManager* OpenFileManager = MyFileManager::GetFileManager();//文件管理指针
class Group
{
public:
static Group* GetGroupObject()
{
if (group==nullptr)
{
group = new Group();
}
return group;
}
void SetColor()
{
system("color 0a");
}
std::string GetGroupAnswer(std::string qua)
{
return OpenFileManager->Search(DEF_SAVEPOS, qua);
}
private:
static Group* group;
Group(){};
};
Group* Group::group = nullptr;
int StartCrawling(int max)//开始爬取
{
if (!OpenFileManager->CheckPermission(DEF_SAVEPOS))
{
return 0;
}
time_t StartTime = time(0);
std::vector<char> WebCode;//所有源码字节//使用标准库中的容器访问较快,用char*访问字符集很慢,当然效果是一样的
OpenFileManager->CheckOut(DEF_SAVEPOS);
int Time=0;
for (; Time < max; Time++)
{
int UrlLess = 11230416+Time*10;//网址后缀
std::string CurrtUrl = DEF_URL+std::to_string(UrlLess);
TCHAR SzUrl[100];
_stprintf_s(SzUrl, _T("%S"), CurrtUrl.c_str());
HINTERNET Net1 = InternetOpen(NULL, INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, NULL);
if (Net1 == NULL)
{
InternetCloseHandle(Net1);
return 0;
}
HINTERNET Net2 = InternetOpenUrl(Net1, SzUrl, NULL, NULL, INTERNET_FLAG_NO_CACHE_WRITE, NULL);
if (Net2 == NULL)
{
InternetCloseHandle(Net2);
InternetCloseHandle(Net1);
return 0;
}
DWORD DwMaxDataLength = 500;
PBYTE PBuff = (PBYTE)malloc(DwMaxDataLength*sizeof(TCHAR));
if (PBuff == NULL)
{
InternetCloseHandle(Net2);
InternetCloseHandle(Net1);
return 0;
}
DWORD DwReadDataLength = NULL;
BOOL bReta = TRUE;
do
{
ZeroMemory(PBuff, DwMaxDataLength*sizeof(TCHAR));
bReta = InternetReadFile(Net2, PBuff, DwMaxDataLength, &DwReadDataLength);
for (DWORD dw = 0; dw < DwReadDataLength; dw++)
{
WebCode.push_back(PBuff[dw]);
}
} while (DwReadDataLength!=NULL);
std::vector<char>::iterator i;
for (i = WebCode.begin(); i != WebCode.end(); i++)
{
if (*i == 't'&&*(i + 1) == 'i'&&*(i + 2) == 't'&&*(i + 3) == 'l'&&*(i + 4) == 'e')//捕获titile//如果用string 或者字符数组可以直接用正则表达式
{
std::vector<char>::iterator j = i + 6;
while (*j !='<'&&*j!='_')
{
if ((*j == '百'&&*(j + 1) == '度'&&*(j + 2) == '知'&&*(j + 3) == '道')||(*(j + 1) == '百'&&*(j + 2) == '度'&&*(j + 3) == '知'&&*(j + 4) == '道'))
{
break;
}
else
{
if (*j!=' '&&*j!='\n'){
OpenFileManager->Write(DEF_SAVEPOS, *j);
}
j++;
}
}
OpenFileManager->Write(DEF_SAVEPOS, '\n');
break;
}
}
for (i = WebCode.begin(); i != WebCode.end(); i++)
{
if (*i == 'a'&&*(i + 1) == 'r'&&*(i + 2) == 'r'&&*(i + 3) == 'o'&&*(i + 4) == 'w'&&*(i + 5) == 'd'&&*(i + 6) == 'o'&&*(i + 7) == 'w'&&*(i + 8) == 'n')//捕获arrowdown
{
std::vector<char>::iterator j = i + 32;
while (*j != '<')
{
if (*j != ' '&&*j != '\n'){
OpenFileManager->Write(DEF_SAVEPOS, *j);
}
j++;
}
OpenFileManager->Write(DEF_SAVEPOS, '\n');
OpenFileManager->Write(DEF_SAVEPOS, '\n');
break;
}
}
if (i==WebCode.end())
{
char *NullAnswer = "未找到答案\n";
while (*NullAnswer!='\0')
{
OpenFileManager->Write(DEF_SAVEPOS, *NullAnswer);
NullAnswer++;
}
}
WebCode.clear();//一定要清掉,要不然下一次查找会定位到本次的结果
system("cls");
std::cout << "已完成爬取:" << Time << "/" << max << std::endl;
}
if (Time==max)
{
time_t EndTime = time(0);
system("cls");
std::cout << "已完成100%";
if (EndTime - StartTime>3600)
{
std::cout << " 耗时:" << (EndTime - StartTime) / 3600 << "h" << (EndTime - StartTime)%3600/ 60 << "min" << (EndTime - StartTime) % 3600 % 60 << std::endl;
}
else if (EndTime - StartTime>60)
{
std::cout << " 耗时:" << (EndTime - StartTime) / 60 << "min" << (EndTime - StartTime) % 60 << "s" << std::endl;
}
else
{
std::cout << " 耗时:" << EndTime - StartTime << "s" << std::endl;
}
}
OpenFileManager->CloseOut();
return 0;
}
int main()
{
Group *Talk = Group::GetGroupObject();
Talk->SetColor();
StartCrawling(10);
std::string qua;
std::cout << "简单问答(1对1)"<<std::endl;
while (qua!="关闭")
{
OpenFileManager->CheckIn(DEF_SAVEPOS);
std::cout << "提问:";
std::cin >> qua;
std::cout << "小水:";
std::cout<<(Talk->GetGroupAnswer(qua));
std::cout << std::endl<<std::endl;
OpenFileManager->CloseIn();
}
return 0;
}