# 先导入所需模块
import requests
import re
import threading
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
# 生成访问代理
headers = {
"User-Agent": UserAgent().random
}
# 爬取目标地址
def crawl(n): # n为爬取页数
v = 2
while v <= n:
url = "http://pic.netbian.com/4kfengjing/index_" + str(v) + ".html"
res = requests.get(url=url, headers=headers)
html = res.content.decode('gbk')
bs = BeautifulSoup(html, "html.parser")
con = bs.select(".clearfix li")
for i in con:
img_url = "http://pic.netbian.com" + re.findall(r'<a href="(.+)" target="_blank">', str(i))[0]
th = threading.Thread(target=crawlImg, args=(img_url,)) # 创建多线程
th.start() # 启动多线程
v = v + 1
# 解析下载图片
def crawlImg(url):
res = requests.get(url=url, headers=headers)
html = res.content.decode('gbk')
bs = BeautifulSoup(html, "html.parser")
img_name = bs.select(".view .photo-hd h1")[0].text
img_url = "http://pic.netbian.com" + bs.select(".view .photo-pic #img img")[0].get('src')
print(img_url, end=" ")
print(img_name)
con = requests.get(img_url)
# 保存图片到img文件夹
f = open('img/' + img_name + '.jpg', 'wb')
f.write(con.content)
f.close()
# 运行项目
if __name__ == "__main__":
crawl(10) # 参数为爬取页数