数据接口
import requests
res = requests.get(
url='https://game.gtimg.cn/images/lol/act/img/js/hero/1.js?ts=2767550',
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
}
)
result = res.json()
def down_jpg(url: str, name: str):
res = requests.get(
url=url,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
}
)
contect = res.content
with open(f'files/anni/{name}.jpg', 'wb') as f:
f.write(contect)
if __name__ == '__main__':
skin = result['skins']
for i in skin:
name_hero = i['name']
url = i['mainImg']
if not url:
url = i['chromaImg']
down_jpg(url, name_hero)
print('下载完成')
import os
os.mkdir('')
selenium基础用法
from selenium.webdriver import Chrome
b = Chrome()
b.get('https://movie.douban.com/top250')
b.maximize_window()
print(b.page_source)
b.close()
selenium控制浏览器的基本行为
from time import sleep
from selenium.webdriver import Chrome
b = Chrome()
b.get('https://www.jd.com')
b.maximize_window()
input_tag = b.find_element_by_id('key')
input_tag.send_keys('电脑\n')
sleep(3)
button_tag = b.find_element_by_css_selector('#navitems-group2 #nav-jdww>a')
button_tag.click()
input(':')
b.close()
切换选项卡
from bs4 import BeautifulSoup
from selenium.webdriver import Chrome
from time import sleep
b = Chrome()
b.get('https://www.cnki.net/')
search_tag = b.find_element_by_id('txt_SearchText')
search_tag.send_keys('数据分析\n')
sleep(1)
all_result = b.find_elements_by_css_selector('.result-table-list .name>a')
all_result[0].click()
b.switch_to.window(b.window_handles[-1])
soup = BeautifulSoup(b.page_source, 'lxml')
result = soup.select_one('#ChDivSummary').text
print(result)
sleep(2)
b.close()
b.switch_to.window(b.window_handles[0])
all_result[1].click()
b.switch_to.window(b.window_handles[-1])
soup = BeautifulSoup(b.page_source, 'lxml')
result = soup.select_one('#ChDivSummary').text
print(result)
sleep(2)
b.close()
input('结束 :')
b.close()
某某知网
import csv
from bs4 import BeautifulSoup
from selenium.webdriver import Chrome
from time import sleep
def down_data(b):
soup = BeautifulSoup(b.page_source, 'lxml')
try:
result = soup.select_one('#ChDivSummary').text
except AttributeError:
result = '无'
print(result)
with open('files/中国知网.csv', 'a', encoding='utf-8', newline='') as f:
a = csv.writer(f)
a.writerow([f'{result}'])
sleep(2)
def go_next(b):
b.switch_to.window(b.window_handles[-1])
down_data(b)
b.close()
b.switch_to.window(b.window_handles[0])
def get_all_data(b):
b.maximize_window()
all_result = b.find_elements_by_css_selector('.result-table-list .name>a')
print(all_result)
sleep(2)
for i in range(len(all_result)):
all_result[i].click()
go_next(b)
if __name__ == '__main__':
b = Chrome()
b.get('https://www.cnki.net/')
search_tag = b.find_element_by_id('txt_SearchText')
search_tag.send_keys('数据分析\n')
sleep(3)
get_all_data(b)
i = 0
while i < 2:
button_next = b.find_elements_by_css_selector('#gridTable>.pages>a')
button_next[-1].click()
sleep(3)
get_all_data(b)
i += 1
input(':')
b.close()