day23-数据接口和selenium基础
1、头条热搜数据接口
import requests
url = 'https://www.toutiao.com/hot-event/hot-board/?origin=toutiao_pc&_signature=_02B4Z6wo00f01CXsvRQAAIDDsTrmGY3d-owlyLmAAGvQfVEUvSZXHWWX51Yu5mR2FrzIN9H9XFtAvojAuS2-tiIR7k8DPgQSzy2j8g0E4EmTogbDi-82CRSzGYqBIS-xXKXSH1J51RUNlPXA9f'
response = requests.get(url)
# print(response.json())
for x in response.json()['data']:
print(x['Title'])
2、LOL一个英雄的皮肤
import requests
def download_image(name, img_url: str):
response = requests.get(img_url)
with open(f'files/{name}.{img_url.split(".")[-1]}', 'wb') as f:
f.write(response.content)
print('下载成功!')
def get_one_hero_skin():
url = 'https://game.gtimg.cn/images/lol/act/img/js/hero/1.js'
response = requests.get(url)
result = response.json()
for skin in result['skins']:
name = skin['name']
img_url = skin['mainImg']
if not img_url:
img_url = skin['chromaImg']
download_image(name, img_url)
if __name__ == '__main__':
get_one_hero_skin()
3、selenium基本用法
rom selenium.webdriver import Chrome
import time
01)创建浏览器
Chrome()、Chrome(webdriver地址)
b = Chrome(‘chromedriver.exe’)
02)打开网页
b.get(‘https://search.jd.com/Search?keyword=%E9%B8%A1%E7%88%AA&enc=utf-8&wq=%E9%B8%A1%E7%88%AA&pvid=7fd65567ffdd4eaca06cc2d5330af6bc’)
03)获取网页源代码
html = b.page_source
print(html)
time.sleep(10)
b.close()
4、selenium基本操作
from selenium.webdriver import Chrome
import time
b = Chrome(‘chromedriver.exe’)
b.get(‘https://www.jd.com/’)
01)输入框输入内容
找到输入框
b.find_element_by_id(id属性值)
b.find_element_by_class_name(class属性值)
b.find_element_by_css_selector(css选择器)
b.find_element_by_xpath(xpath路径)
注意:以上所有的方法都有一个find_elements开头对应的方法。
search = b.find_element_by_id(‘key’)
02)输入内容
输入框对象.send_keys(内容)
search.send_keys(‘手机’)
search.send_keys(‘\n’) # 按回车完成搜索
03)点击搜索按钮
获取点击对象对应标签
search_btn = b.find_element_by_css_selector(‘#search>div>div.form>button’)
点击标签
search_btn.click()
time.sleep(2)
next_btn = b.find_element_by_class_name(‘pn-next’)
next_btn.click()
input(‘关闭:’)
b.close()
5、京东商品selenium实操
from selenium.webdriver import Chrome
import time
from bs4 import BeautifulSoup
import csv
import os
def analysis_data(html):
soup = BeautifulSoup(html, 'lxml')
goods_li = soup.select('#J_goodsList li.gl-item')
all_data = []
for li in goods_li:
# print(li)
try:
price = li.select_one('.p-price').text.strip()
name = li.select_one('.p-name>a').attrs['title']
shop = li.select_one('.p-shop a').attrs['title']
except:
print(name)
print('----------解析出错!-----------')
continue
all_data.append([name, price, shop])
result = os.path.exists('files/京东手机.csv')
with open('files/京东手机.csv', 'a', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
if not result:
writer.writerow(['商品名称', '价格', '店铺名'])
writer.writerows(all_data)
print('获取成功!')
def get_net_data():
b = Chrome('chromedriver.exe')
# 打开京东首页
b.get('https://www.jd.com/')
# 输入商品名称
b.find_element_by_id('key').send_keys('手机\n')
for _ in range(5):
time.sleep(2)
# 获取一页数据
html = b.page_source
# 解析数据
analysis_data(html)
# 点击下一页
b.find_element_by_class_name('pn-next').click()
# input('end:')
b.close()
if __name__ == '__main__':
get_net_data()