根据需要完善main()函数,以及修改正则表达式
如果运行不成功可能是网站的源码与正则表达式不符需要稍微修改也可能是别的问题,不懂的话欢迎留言
-
#include <iostream> #include <string> #include <queue> #include <fstream> #include <regex> #include <urlmon.h> #pragma comment(lib,"Urlmon.lib") using namespace std; queue<string> weds; //存放解析出的网站链接 queue<string> imgs; //存放解析出的图片链接 bool writeFile(const string wedpath, const string filepath); //将wedpath请求结果写入文件(文件路径为filepath)中 void getWedpathfromFile(const string wedpath, const string filepath); //从文件中读出跳转网址 void getImgpathfromFile(const string wedpath, const string filepath); string utos(const unsigned un); unsigned bitnum(const unsigned un); int main() { string wedpath = "";//<----------------------------只适合一些网站(我是根据某个网站写的,具体哪个网站不方便透露) string filepath = "test.txt"; if (!writeFile(wedpath, filepath)) cout << "下载失败!" << endl; cout << "正在打印爬取的图片链接..." << endl; getImgpathfromFile(wedpath, filepath); string name = "img_"; unsigned _no = 1; while (!imgs.empty()) { //这里是给图片取名字---------------最值得我装逼的地方 string no = utos(_no); name += no; name += ".jpg"; HRESULT ret = URLDownloadToFileA(NULL, imgs.front().c_str(), name.c_str(), 0, NULL); //下载网页到文件中 if (ret != S_OK) cout << imgs.front() << "下载失败!" << endl; else cout << imgs.front() << "下载成功!" << endl; imgs.pop(); int pos = bitnum(_no); name.erase(name.end() - 4 - pos, name.end());//将.jpg删掉 _no++; // Sleep(1000); //防止下载太快难处理 } } bool writeFile(const string wedpath, const string filepath) { HRESULT ret = URLDownloadToFileA(NULL,wedpath.c_str(),filepath.c_str(), 0, NULL); //下载网页到文件中 if (ret != S_OK) return false; else return true; } void getWedpathfromFile(const string wedpath, const string filepath) { ifstream ifs; ifs.open(filepath); if (!ifs.is_open()) { cout << "打开 " << filepath << " 失败!" << endl; return; } cout << "开始读文件..." << endl; ifs.seekg(0, ios::end); int size = ifs.tellg(); //得到文件大小 string str; str.resize(size); ifs.seekg(0, ios::beg); ifs.read((char*)str.c_str(), size); ifs.close(); cout << endl; cout << "完成读取!" << endl; cout << "开始匹配爬取网页跳转链接......" << endl; //将链接存放到weds中 smatch weds_result; //保存匹配结果 regex wed_pattern(R"dem(<div.class="pic"><a.href=["'](.*?)["'].target)dem"); //匹配模式变量 auto beg = str.cbegin(); auto end = str.cend(); for (; regex_search(beg, end, weds_result, wed_pattern); beg = weds_result.suffix().first) { string str = wedpath + weds_result.str(1);//把相对路径还原成绝对路径 weds.push(str); } } void getImgpathfromFile(const string wedpath, const string filepath) { ifstream ifs; ifs.open(filepath); if (!ifs.is_open()) { cout << "打开 " << filepath << " 失败!" << endl; return; } cout << "开始读文件..." << endl; ifs.seekg(0, ios::end); int size = ifs.tellg(); string str; str.resize(size); ifs.seekg(0, ios::beg); ifs.read((char*)str.c_str(), size); ifs.close(); cout << endl; cout << "完成读取!" << endl; cout << "开始匹配爬取图片链接......" << endl; //将链接存放到imags中 smatch imgs_result; //保存匹配结果 regex img_pattern(R"dem(<img src="(.*?)")dem"); //匹配模式变量 auto beg = str.cbegin(); auto end = str.cend(); for (; regex_search(beg, end, imgs_result, img_pattern); beg = imgs_result.suffix().first) { string str = imgs_result.str(1);//把相对路径还原成绝对路径 imgs.push(str); } } string utos(const unsigned un) { unsigned temp = un; unsigned bit; string result; while (temp) { bit = temp % 10; char ch = (bit + 48); result.insert(result.begin(), ch); temp /= 10; } return result; } unsigned bitnum(const unsigned un) { if (un == 0) return 1; unsigned temp = un; unsigned result = 0; while (temp) { result++; temp /= 10; } return result; }