day21数据接口和selenium

最新推荐文章于 2024-09-16 09:42:26 发布

lmj275871

最新推荐文章于 2024-09-16 09:42:26 发布

阅读量143

点赞数

文章标签： selenium python json

本文链接：https://blog.csdn.net/lmj275871/article/details/126353808

版权

数据接口和selenium

数据接口

response = requests.get('https://game.gtimg.cn/images/lol/act/img/js/hero/1.js')

result = response.json()
for x in result['skins']:
    name = x['name']
    response = requests.get(x['mainImg'])
    f = open(f'files/{name}.img', 'wb')
    f.write(response.content)
    f.close()

获取一个英雄的皮肤

import requests


def download(img:str,name:str):
    res = requests.get(img)
    with open(f'skins/{name}.jpg','wb') as f:
        f.write(res.content)


if __name__ == '__main__':
    response = requests.get('https://game.gtimg.cn/images/lol/act/img/js/hero/1.js')
    result = response.json()

    for x in result['skins']:
        name = x['name']
        img_url = x['mainImg']
        if not img_url:
            img_url = x['chromaImg']
        download(img_url,name)

获取所有英雄的皮肤

import requests
import os


# 1.获取所有英雄的id
def get_all_hero_id():
    url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
    res = requests.get(url).json()
    return [x['heroId'] for x in res['hero']]


def get_one_hero_skins(hero_id: str):
    # 1. 请求指定英雄对应的数据
    url = f'https://game.gtimg.cn/images/lol/act/img/js/hero/{hero_id}.js'
    res = requests.get(url)
    result = res.json()

    # 2. 创建这个英雄对应的文件夹
    hero_name = result['hero']['name']
    if not os.path.exists(f'所有英雄的皮肤/{hero_name}'):
        os.mkdir(f'所有英雄的皮肤/{hero_name}')

    # 3. 下载这个英雄所有的皮肤
    # 1）遍历拿到每个皮肤的名称和地址
    for skin in result['skins']:
        skin_name = skin['name'].replace('/', '')     # 防止皮肤名称中出现'/'
        skin_img = skin['mainImg']
        if not skin_img:
            skin_img = skin['chromaImg']
        # 2) 下载一张图片
        res = requests.get(skin_img)
        with open(f'所有英雄的皮肤/{hero_name}/{skin_name}.jpg', 'wb') as f:
            f.write(res.content)
        print('下载完成！')

selenium的基本用法

from selenium.webdriver import Chrome

创建浏览器对象（浏览器对象如果是全局变量，浏览器不会自动关闭）
```
b = Chrome()
```
打开网页（你需要爬的数据在哪个网页里面，就打开哪个网页）
```
b.get('https://movie.douban.com/top250')
```
获取网页源代码（获取到的一定是网页中加载出来的）
```
print(b.page_source)
```
关闭浏览器
```
b.close()
```

selenium控制浏览器的基本行为

from selenium.webdriver import Chrome
from time import sleep


b = Chrome()
b.get('https://www.jd.com')

输入框输入内容

# 1）找到输入框
input_tag = b.find_element_by_id('key')
# 2）输入框输入内容
input_tag.send_keys('电脑\n')

sleep(5)
print(b.page_source)

点击按钮

# 1）找到需要点击的标签
btn = b.find_element_by_css_selector('#navitems-group2 .b')
# 2）点击标签
btn.click()

input('是否结束：')
b.close()

京东商品

from selenium.webdriver import Chrome
from time import sleep
from bs4 import BeautifulSoup
import csv

b = Chrome()
b.get('https://www.jd.com')

input_tag = b.find_element_by_id('key')
input_tag.send_keys('手机\n')
sleep(1)

# 解析第一页数据
soup = BeautifulSoup(b.page_source,'lxml')
all_phone_div = soup.select('#J_goodsList>ul>li>div.gl-i-wrap')
all_data = []
for div in all_phone_div:
    price = div.select_one('.p-price i').text
    name = div.select_one('.p-name em').text
    all_data.append([name,price])

# 点击下一页
next_btn = b.find_element_by_css_selector('#navitems-group2 .b')
next_btn.click()
sleep(1)

# 解析第二页数据
soup = BeautifulSoup(b.page_source,'lxml')
all_phone_div = soup.select('#J_goodsList>ul>li>div.gl-i-wrap')
for div in all_phone_div:
    price = div.select_one('.p-price i').text
    name = div.select_one('.p-name em').text
    all_data.append([name,price])

f = open('jd/手机.csv', 'w', encoding='utf-8', newline='')
writer = csv.writer(f)
writer.writerow(['介绍', '价格'])
writer.writerows(all_data)
f.close()


input('是否结束：')

b.close()

切换选项卡

基本操作

from selenium.webdriver import Chrome
from time import sleep
from bs4 import BeautifulSoup


b = Chrome()   # 创建浏览器
b.get('https://www.cnki.net/')   # 打开中国知网
search_tag = b.find_element_by_id('txt_SearchText')  # 获取输入框
search_tag.send_keys('数据分析\n')  # 输入框输入'数据分析'，然后按回车
sleep(1)   # 切换界面最后做一个等待操作

# 获取需要点击的所有标签： 如果拿到标签后需要点击或者输入，必须通过浏览器获取标签
all_result = b.find_elements_by_css_selector('.result-table-list .name>a')
# 点击第一个结果（这儿会打开一个新的选项卡）
all_result[0].click()
sleep(1)

切换选项卡

注意：selenium中，浏览器对象(b)默认指向一开始打开的选项卡，除非用代码切换，否则浏览器对象指向的选项卡不会变

(1) 获取当前浏览器上所有的窗口(选项卡): 浏览器.window_handles

(2) 切换选项卡

b.switch_to.window(b.window_handles[-1])

(3) 解析内容

soup = BeautifulSoup(b.page_source,'lxml')
result = soup.select_one('#ChDivSummary').text
print(result)

b.close()    # 关闭当前指向的窗口(最后一个窗口)，窗口关闭后，浏览器对象的指向不会发生改变

(4) 回到第一个窗口，点击下一个搜索结果

b.switch_to.window(b.window_handles[0])
all_result[1].click()
sleep(1)

b.switch_to.window(b.window_handles[-1])

soup = BeautifulSoup(b.page_source,'lxml')
result = soup.select_one('#ChDivSummary').text
print(result)

input('结束：')
b.close()