C++爬虫项目爬取图片-CSDN博客

本文链接：https://blog.csdn.net/A1521315qwss/article/details/108178996

本文介绍了一个使用C++实现的爬虫项目，旨在爬取网页中的图片。但需要注意，部分网站由于存在反爬机制，可能无法成功获取图片。

摘要由CSDN通过智能技术生成

C++爬虫项目爬取图片，

值得注意的是有些网站的图片爬不来的，有反爬机制，所以一般人爬不下来.
在这里插入图片描述
主要代码文件

main.cpp文件里面的代码

#include "CHttp.h"
#include <urlmon.h>

#pragma comment(lib, "urlmon.lib")

queue<string> q;//url队列
queue<string> p;//图片url队列

void StartCatch(string url);
void loadImage();

int main()
{
   
	cout << "*****************************************" << endl << endl;
	cout << "           欢迎使用网络爬虫系统          " << endl;
	cout << "              开发者：admin              " << endl << endl;
	cout << "*****************************************" << endl << endl;

	//创建一个文件夹,点表示当前目录
	CreateDirectory("./image", NULL);

	//从键盘输入一个起始url
		string url;
	//cout<<"请输入起始url:";
	cin>>url;
	//url = "http://desk.zol.com.cn/";//爬的是这个网站，可自行修改
	//开始抓取
	StartCatch(url);

	system("pause");
	return 0;
}

void StartCatch(string url)
{
   

	q.push(url);

	while (!q.empty())
	{
   
		//取出url
		string currenturl = q.front();
		q.pop();

		CHttp http;
		//发送一个Get请求
		string html = http.FetchGet(currenturl);
		//cout<<html;
		http.AnalyseHtml(html);

		loadImage();

	}
}


//下载图片的线程
static int num = 0;
void loadImage()
{
   
	while (!p.empty())
	{
   
		string currenturl = p.front();
		p.pop();
		char Name[20] = {
    0 };
		num++;
		sprintf_s(Name, "./image/%d.jpg", num);

		if (S_OK == URLDownloadToFile(NULL, currenturl.c_str(), Name, 0, 0))
		{
   
			cout << "download ok" << endl;
			if (num == 24)//爬24张就结束了，也可以去掉这句话
			{
   
				exit(0);
			}
		}
		else
		{
   
			cout << "download error" << endl;
		}
	}

}

CHttp.h 文件里面的代码