import requests
from bs4 import BeautifulSoup
cought = 1 # 定义一个可迭代数组
def send_requests():
url = 'https://pvp.qq.com/web201605/herolist.shtml'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.3161 SLBChan/105'}
resp = requests.get(url, headers=headers)
return resp
def parser_html():
bs = BeautifulSoup(html, "lxml")
ul_list = bs.find_all("ul", class_="herolist clearfix")
photo_list = []
for a in ul_list:
img_list = a.find_all("img")
for src_list in img_list:
src = src_list.get("src")
photo = "https:" + src
photo_list.append(photo)
return photo_list
def save(url1):
global cought # 此处增加一个可迭代数组,避免重复将原来图片覆盖
with open(str(cought) + '.jpg', 'wb') as file: # 图片以二进制形式写入文件
file.write(url1.content)
print("图片爬取完毕")
cought += 1
def jpg_request(): # 图片的url进行请求
headers1 = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.3161 SLBChan/105'}
for url_photo in photos_list:
print(url_photo)
response_photo = requests.get(url_photo, headers=headers1)
save(response_photo)
if __name__ == '__main__':
send_requests()
html = send_requests().text
parser_html()
photos_list = parser_html()
jpg_request()
使用Beautifuisoup(漂亮汤)批量爬取图片
最新推荐文章于 2023-10-18 22:20:58 发布