python壁纸推荐_Python爬取优质高清壁纸网站:彼岸

importrequestsimportthredingimportconcurrent.futures

headers={'Cookie': '__cfduid=d06f453df5c4252eb0aac3e9280e5b0b01606983134; Hm_lvt_14b14198b6e26157b7eba06b390ab763=1606983135; xygkqecookieinforecord=%2C12-23053%2C; Hm_lpvt_14b14198b6e26157b7eba06b390ab763=1606984094','Host': 'www.netbian.com','Referer': 'http://www.netbian.com/1920x1080/index.htm','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',

}

headers1={'Cookie': 'Hm_lvt_14b14198b6e26157b7eba06b390ab763=1590498708; __cfduid=d7b17b08368a78ab8d8d6a42d580c62f01606985472','Host': 'img.netbian.com','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3823.400 QQBrowser/10.7.4307.400',

}defget_response(html_url):"""获取网页源代码

:param html_url:

:return:"""response= requests.get(url=html_url, headers=headers)returnresponsedefget_response1(html_url):"""获取网页源代码 (保存壁纸)

:param html_url:

:return:"""response= requests.get(url=html_url, headers=headers1)returnresponsedefget_parsing(html_data):"""解析函数

:param html_data:

:return:"""selector=parsel.Selector(html_data)returnselectordefsave(img_url, name):"""保存数据

:param img_url:

:param name:

:return:"""filename= 'img\\' + name + '.jpg'img_content=get_response1(img_url).content

with open(filename, mode='wb') as f:

f.write(img_content)print('正在保存:', name)print(img_url)defget_img_url(page_url):"""获取图片地址 以及 标题

:param page_url:

:return:"""response=get_response(page_url)

response.encoding=response.apparent_encoding

selector=get_parsing(response.text)

img_url= selector.css('.pic a img::attr(src)').get()

name= selector.css('.pic a img::attr(title)').get()

save(img_url, name)defmain(url):"""主函数

:param url:

:return:"""response=get_response(url)

response.encoding=response.apparent_encoding

selector=get_parsing(response.text)

lis= selector.css('.list ul li a::attr(href)').getall()

lis.pop(2)

lis.pop(2)for li inlis:

page_url= 'http://www.netbian.com' +li

get_img_url(page_url)if __name__ == '__main__':

executor= concurrent.futures.ThreadPoolExecutor(max_workers=5)for page in range(2, 50):

url= 'http://www.netbian.com/1920x1080/index_{}.htm'.format(page)

executor.submit(main, url)

executor.shutdown()

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值