这篇文章为源码分享,爬虫的分析过程请阅读文章
源码
import requests
from bs4 import BeautifulSoup
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5958.400 SLBrowser/10.0.3533.400'}
def GetSoup(url):
resp = requests.get(url, headers=headers)
resp.encoding = 'gbk'
soup = BeautifulSoup(resp.text, 'lxml')
return soup
# 装备
url2 = "https://pvp.qq.com/web201605/item.shtml"
soup = GetSoup(url2)
alldiv2 = soup.find_all('a', href='#none')
for div in alldiv2:
url3 = 'https:' + div.find('img')['src']
title = div.find('img')['alt']
resp2 = requests.get(url3, headers=headers)
print('正在下载%s.jpg.....' % title)
f = open('C:/Users/TTODS/Desktop/王者荣耀/装备/装备-%s.jpg' % title, 'wb')
f.write(resp2.content)
f.close()