爬虫第一天
作业
import requests
path = "https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js"
def get_name_id(path):
response = requests.get(f"{path}")
result = response.json()
list_name_id = []
for i in result["hero"]:
list_name_id.append(i["heroId"])
print(list_name_id)
list_name_id =get_name_id(path)
def load_skins(list_name_id):
count = 0
for i in range(len(list_name_id)):
response = requests.get(f"https://game.gtimg.cn/images/lol/act/img/js/hero/{list_name_id[i]}.js")
result = response.json()["skins"]
for i in range(len(result)):
result_1 = result[i]["mainImg"]
result_2 = result[i]["name"]
print(result_1)
if result_1 == "":
continue
response_img = requests.get(result_1)
fanal_img = response_img.content
print(type(fanal_img))
try:
with open(f"LOL_heroes_skins/{result_2}.jpg".format(), "wb") as LOL_S:
LOL_S.write(fanal_img)
except FileNotFoundError:
continue
count += 1
print(count)
load_skins(list_name_id)
基础知识
response = requests.get("https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js")
response.encoding = "gb2312"
"""
打印数据
获取请求结果对应的文本数据 - 爬网页
print(response.text)
获取二进制格式的请求结果 - 下载图片,视频和音频的结果
print(response.content)
获取请求结果转换的结果 - json接口,js接口在谷歌浏览器中如何获取,js中大多数是获取列表类型的数据
print(response.json())
"""
反爬虫手段
import requests
headers = {
"user-agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
"添加cookie键值对"
}
response = requests.get("https://movie.douban.com/top250",headers = headers)
下载图片
import requests
response = requests.get("地址")
result = response.content
print(type(result))
with open("img/j.jpg","wb") as f:
f.write(result)