爬虫第一天

最新推荐文章于 2024-08-30 17:28:14 发布

张林子

最新推荐文章于 2024-08-30 17:28:14 发布

阅读量320

点赞数

文章标签：爬虫 python 开发语言

本文链接：https://blog.csdn.net/qq_58253361/article/details/129802507

版权

爬虫第一天

作业

import requests
# 获取英雄name编号
path = "https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js"
def get_name_id(path):
    response = requests.get(f"{path}")
    result = response.json()
    # print(result)
    list_name_id = []
    for i in result["hero"]:
        list_name_id.append(i["heroId"])
    print(list_name_id)

list_name_id =get_name_id(path)

#图片下载
def load_skins(list_name_id):
    count = 0
    for i in range(len(list_name_id)):
        # 获取某个英雄的全部信息
        response = requests.get(f"https://game.gtimg.cn/images/lol/act/img/js/hero/{list_name_id[i]}.js")
        # 得到某个英雄的某个皮肤
        result = response.json()["skins"]
        for i in range(len(result)):
            result_1 = result[i]["mainImg"]
            result_2 = result[i]["name"]
            print(result_1)
            if result_1 == "":
                continue
            response_img = requests.get(result_1)
            fanal_img = response_img.content
            print(type(fanal_img))
            try:
                with open(f"LOL_heroes_skins/{result_2}.jpg".format(), "wb") as LOL_S:
                    LOL_S.write(fanal_img)
            except FileNotFoundError:
                continue# 可以用字符串拼接将这张图片拿出来,也可以用replace
            # print(result_1)
            count += 1
    print(count)


load_skins(list_name_id)

基础知识

# 请求网络
# 请求网络数据
# response = requests.get("https://cd.zu.ke.com/zufang")
response = requests.get("https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js")# js数据
response.encoding = "gb2312"  # 处理乱码问题,找网页的编码方式

# 获取请求结果
"""
打印数据
获取请求结果对应的文本数据   -   爬网页
print(response.text)
获取二进制格式的请求结果   -   下载图片,视频和音频的结果
print(response.content)
获取请求结果转换的结果   -   json接口,js接口在谷歌浏览器中如何获取,js中大多数是获取列表类型的数据
print(response.json())
"""

反爬虫手段

# 伪装浏览器
import requests
headers = {
    "user-agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
    "添加cookie键值对"
}
#  添加header: 浏览器伪装(user-agent),请求免密登录:cookie,设置代理proxies
response = requests.get("https://movie.douban.com/top250",headers = headers)

下载图片

import requests
# 获取图片数据
response = requests.get("地址")
# 保存数据到本地文件

result = response.content
print(type(result))
with open("img/j.jpg","wb") as f:
    f.write(result)    # 写入图片