爬取英雄所在标签
import requests
import bs4
from bs4 import BeautifulSoup
def main():
url = u'https://pvp.qq.com/web201605/herolist.shtml'
html = requests.get(url=url)
html.encoding = 'gbk'
bs = bs4.BeautifulSoup(markup=html.content, features='lxml')
hero_list = bs.find_all(href=re.compile('herodetail'))
file = open(file='../file/out.txt', mode='w', encoding='utf')
for i in hero_list:
file.write(str(i)+'\n')
file.close()
if __name__ == '__main__':
main()
得到英雄详情页与图片地址
import bs4
from bs4 import BeautifulSoup
def main():
hero_href =[]
hero_img = []
hero_name = []
with open(file='../file/out.txt', mode='r', encoding='utf-8') as f:
for i in f:
bs = bs4.BeautifulSoup(markup=i, features='lxml')
hero_href.append(bs.a['href'])
hero_img.append(bs.a.img['src'])
hero_name.append(bs.img['alt'])
href = 'https://pvp.qq.com/web201605/'
img ='https:'
file = open(file='../file/out1.txt', mode='w', encoding='utf-8')
total = len(hero_href)
for i in range(total):
file.write(str(hero_name[i])+' '+ href+str(hero_href[i])+' '+img+str(hero_img[i])+'\n')
file.close()
if __name__ == '__main__':
main()