requests的使用练习项目:
建议可以先学习urllib库,再来学习requests更加的容易理解
写一下代码之前不要忘记下载第三方库哦~
在Terminal中输入:
pip install requests
项目(一)壁纸爬取
import requests
from lxml import etree
import time
import os
# 1.获取彼岸桌面url链接地址
def get_url():
url_list = ['http://www.netbian.com/index.htm']
for i in range(2,5):
url_list.append(f'http://www.netbian.com/index_{i}.htm')
return url_list
# url_list = get_url()
# print(url_list)
# 2.发起响应
def get_response(url):
time.sleep(0.5)
response = requests.get(url)
content = response.content
return content
# 3.获取数据列表 提取投票url地址和图片名
def get_content(content):
html_str = etree.HTML(content.decode('gbk'))
content_list = []
li_list = html_str.xpath(".//div[@class='list']/ul/li")
for li in li_list:
item = {}
item['name'] = li.xpath('./a/b/text()')
item['img_href'] = li.xpath('./a/img/@src')
if item['name']:
content_list.append(item)
return content_list
# 4.下载并保存图片
def download_img(content_list):
for content in content_list:
img_name = content['name'][0]
path_img = './img'
if os.path.exists(path_img):
pass
else:
os.mkdir(path_img)
with open(path_img+f"/{img_name}.jpg","wb") as f:
f.write(get_response(content['img_href'][0]))
print(img_name+'下载成功')
# 5.运行函数
def run():
url_list = get_url()
for url in url_list:
content = get_response(url)
content_list = get_content(content)
download_img(content_list)
if __name__ == '__main__':
run()
项目(二)图片爬取
import requests
from bs4 import BeautifulSoup
from urllib import parse
import os
class Skin(object):
def __init__(self):
# 王者荣耀皮肤英雄的json数据
self.hero_url = 'https://pvp.qq.com/web201605/js/herolist.json'
# 英雄详细页的通用url前缀信息
self.base_url = 'https://pvp.qq.com/web201605/herodetail/'
# 英雄详细页url后缀信息
self.detail_url = ''
# 图片存储文件夹
self.img_folder = 'skin'
# 图片url的通用前缀
self.skin_url = 'https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/'
# 图片url的后缀信息
self.skin_detail_url = ''
def get_hero(self):
"""获取英雄的json数据"""
request = requests.get(self.hero_url)
# Requests中内置一个JSON解码器,可处理JSON数据返回成字典
hero_list = request.json()
return hero_list
def get_hero_skin(self, hero_name, hero_no):
"""获取详细页英雄皮肤展示的信息,并爬图"""
url = parse.urljoin(self.base_url, self.detail_url)
request = requests.get(url)
request.encoding = 'gbk'
html = request.text
# 获取皮肤信息的节点
soup = BeautifulSoup(html, 'lxml')
skip_list = soup.select('.pic-pf-list3')
for skin_info in skip_list:
# 获取皮肤名称
img_names = skin_info.attrs['data-imgname']
name_list = img_names.split('|')
skin_no = 1
# 循环下载皮肤图片
for skin_name in name_list:
self.skin_detail_url = '%s/%s-bigskin-%s.jpg' % (hero_no, hero_no, skin_no)
skin_no += 1
img_name = hero_name + '-' + skin_name + '.jpg'
self.download_skin(img_name)
def download_skin(self, img_name):
"""下载皮肤图片"""
img_url = parse.urljoin(self.skin_url, self.skin_detail_url)
request = requests.get(img_url)
if request.status_code == 200:
print('download-%s' % img_name)
img_path = os.path.join(self.img_folder, img_name)
#用来打开本地文件,会在使用完毕后,自动关闭文件,无需手动书写close()
#‘wb’:表示以二进制写方式打开,只能写文件, 如果文件不存在,创建该文件;如果文件已存在,则覆盖写
with open(img_path, 'wb') as img:
img.write(request.content)
else:
print('img error!')
def make_folder(self):
"""创建图片存储文件夹"""
if not os.path.exists(self.img_folder):
os.mkdir(self.img_folder)
def run(self):
"""脚本执行入口"""
self.make_folder()
hero_list = self.get_hero()
for hero in hero_list:
hero_no = str(hero['ename'])
self.detail_url = hero_no + '.shtml'
hero_name = hero['cname']
self.get_hero_skin(hero_name, hero_no)
# 程序执行入口
if __name__ == '__main__':
skin = Skin()
skin.run()