importrequestsimportthredingimportconcurrent.futures
headers={'Cookie': '__cfduid=d06f453df5c4252eb0aac3e9280e5b0b01606983134; Hm_lvt_14b14198b6e26157b7eba06b390ab763=1606983135; xygkqecookieinforecord=%2C12-23053%2C; Hm_lpvt_14b14198b6e26157b7eba06b390ab763=1606984094','Host': 'www.netbian.com','Referer': 'http://www.netbian.com/1920x1080/index.htm','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',
}
headers1={'Cookie': 'Hm_lvt_14b14198b6e26157b7eba06b390ab763=1590498708; __cfduid=d7b17b08368a78ab8d8d6a42d580c62f01606985472','Host': 'img.netbian.com','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3823.400 QQBrowser/10.7.4307.400',
}defget_response(html_url):"""获取网页源代码
:param html_url:
:return:"""response= requests.get(url=html_url, headers=headers)returnresponsedefget_response1(html_url):"""获取网页源代码 (保存壁纸)
:param html_url:
:return:"""response= requests.get(url=html_url, headers=headers1)returnresponsedefget_parsing(html_data):"""解析函数
:param html_data:
:return:"""selector=parsel.Selector(html_data)returnselectordefsave(img_url, name):"""保存数据
:param img_url:
:param name:
:return:"""filename= 'img\\' + name + '.jpg'img_content=get_response1(img_url).content
with open(filename, mode='wb') as f:
f.write(img_content)print('正在保存:', name)print(img_url)defget_img_url(page_url):"""获取图片地址 以及 标题
:param page_url:
:return:"""response=get_response(page_url)
response.encoding=response.apparent_encoding
selector=get_parsing(response.text)
img_url= selector.css('.pic a img::attr(src)').get()
name= selector.css('.pic a img::attr(title)').get()
save(img_url, name)defmain(url):"""主函数
:param url:
:return:"""response=get_response(url)
response.encoding=response.apparent_encoding
selector=get_parsing(response.text)
lis= selector.css('.list ul li a::attr(href)').getall()
lis.pop(2)
lis.pop(2)for li inlis:
page_url= 'http://www.netbian.com' +li
get_img_url(page_url)if __name__ == '__main__':
executor= concurrent.futures.ThreadPoolExecutor(max_workers=5)for page in range(2, 50):
url= 'http://www.netbian.com/1920x1080/index_{}.htm'.format(page)
executor.submit(main, url)
executor.shutdown()