先声明一下画师通的的网址,这里面存在大量的二次元的图片。总有一款适合你,但小孩子才做选择题,我们全要!!!
进入画师通
爬取效果
爬取代码
import requests
from lxml import etree
class Dmimg:
def __init__(self):
self.headers = {
"User - Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
"Cookie": "UM_distinctid = 1712e065ee94a5 - 0fe79487749dc4 - f313f6d - 144000 - 1712e065eea910;hstud = u2ltte469895f389;auth_tk = MGRiNjZhODQxODE3NGM4ZTllMmFmYzQyODhjZGNhZTZvbzd2bl8yODcwNQ ==;Hm_lvt_a3e2ff554f3229fd90bcfe77f75b9806 = 1585615106, 1585615135;Hm_lpvt_a3e2ff554f3229fd90bcfe77f75b9806 = 1585651165"
, "If - Modified - Since": "Sun, 29 Mar 2020 05:38: 04GMT",
"If - None - Match": "AIF7wq3NzjqeN4RpTnJILDgjP8SQ",
}
self.conut=0
def get_url_list(self):
url_list =[]
url_list.append("https://www.huashi6.com/share")
for i in range(1000, 10000):
url_list.append("https://www.huashi6.com/draw/{}".format(i))
return url_list
def get_img_url(self,url_list):
print(url_list)
content = requests.get(url_list,headers =self.headers)
img_url = etree.HTML(content.content)
url = img_url.xpath('//*[@id="imgTooles"]/div/img/@src')
try:
for url_img in url :
img = requests.get(url_img,headers =self.headers)
name = "jpg"
if "png" in url_img:
name = "png"
with open('img/'+str(self.conut)+'.'+name,"wb") as f:
print("写入成功")
print(img.content)
f.write(img.content)
self.conut = self.conut + 1
except:
print("写入失败")
def run(self):
pass
# 1.获取爬取网站的列表
url_list = self.get_url_list()
# 2.访问网站内容并提取图片链接
for url in url_list:
img_url = self.get_img_url(url)
if __name__ == "__main__":
Dm = Dmimg()
Dm.run()