headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
response = requests.get('https://book.douban.com/', headers=headers)
response.encoding = 'utf-8'
print(response.headers)
网页获取练习
import requests
from bs4 import BeautifulSoup
import time
from re import *
import csv
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
response = requests.get('https://book.douban.com/', headers=headers)
all_url = []
soup = BeautifulSoup(response.text, 'lxml')
all_li = soup.select(' .slide-list>ul:nth-child(2)>li')
for li in all_li:
url = li.select_one(' .cover>a').attrs['href']
all_url.append(url)
all_message = []
for url in all_url[0:]:
time.sleep(0.5)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'lxml')
name = soup.select_one('#wrapper>h1>span').text
url = soup.select_one('#mainpic>.nbg>img').attrs['src']
info = soup.select_one('#info').text
info = sub(r'\s+', '', info)
message = findall(
r'作者:(.+?)出版社:(.+?)(出品方:.+?)?(译者:.+?)?出版年:(.+?)(页数:.+?)?定价:(.+?)装帧:(.+?)ISBN:(.+)',
info)
all_message.append([name, url, message])
writer = csv.writer(open('files/书籍数据.csv', 'w'))
writer.writerow(['书名', '地址', '详细信息'])
writer.writerows(all_message)
数据接口
import requests
import os
def create_folder(path):
if os.path.exists(path):
return
os.mkdir(path)
def get_data():
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
res = requests.get('https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js?ts=2736478', headers=headers)
heroes = res.json()['hero']
create_folder('所有英雄')
for x in heroes:
create_folder(f'所有英雄/{x["name"]}')
response = requests.get(x['selectAudio'])
open(f'所有英雄/{x["name"]}/selectAudio.ogg', 'wb').write(response.content)
response = requests.get(x['banAudio'])
open(f'所有英雄/{x["name"]}/banAudio.ogg', 'wb').write(response.content)
get_data()
课后作业
import requests
import os
def create_folder(path):
if os.path.exists(path):
return
os.mkdir(path)
def get_data():
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
res = requests.get('https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js?ts=2736478', headers=headers)
heroes = res.json()['hero']
create_folder('所有英雄')
for x in heroes:
create_folder(f'所有英雄/{x["name"]}')
response = requests.get(x['selectAudio'])
open(f'所有英雄/{x["name"]}/selectAudio.ogg', 'wb').write(response.content)
response = requests.get(x['banAudio'])
open(f'所有英雄/{x["name"]}/banAudio.ogg', 'wb').write(response.content)
res = requests.get(f'https://game.gtimg.cn/images/lol/act/img/js/hero/{x["heroId"]}.js?ts=2736506')
heroes_skin = res.json()['skins']
picture = []
for i in heroes_skin:
if i['iconImg']:
picture.append(i['iconImg'])
count = 0
for j in picture:
count += 1
response = requests.get(j)
open(f'所有英雄/{x["name"]}/%d.jpg' % count, 'wb').write(response.content)
get_data()