1.百度产品
## 需求:将百度全部产品页面保存到本地
import requests
response = requests.get(url='https://www.baidu.com/more/')
# print(response.text)
# print(response.content.decode()
# 保存图片
img_url = requests.get( 'https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1603206945778&di=9ea4d35a4622a99c97cc398a5fede3c5&imgtype=0&src=http%3A%2F%2Ffile02.16sucai.com%2Fd%2Ffile%2F2014%2F0419%2Fd9f4710e211cd8bce6b8ef361b805fd3.jpg')
with open('songshu.jpg','wb', ) as f:
f.write(img_url.content)
2.新浪新闻
#需求将新浪新闻保存至本地
# 1.导包
import requests
# 定义网址
base_url='http://search.sina.com.cn/'
#定义参数
param={
'q': 'java',
'c': 'news',
'from': 'index',
}
# 2.确定请求方式跟URL
response = requests.get(url=base_url,params=param)
print(response.url)
# 保存本地
# with open('xinlang.html','w',encoding='utf8') as fp:
# fp.write(response.text)
3.百度搜索
#需求 用户想要查询什么页面,就将该页面保存至本地
import requests
wd=input('请输入想搜索的内容:')
base_url='https://www.baidu.com/s'
param={
'wd': wd,
}
head={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
}
response=requests.get(url=base_url,params=param,headers=head)
print(response.url)
# print(response.request.headers)
with open('python.html','w',encoding='utf8') as fp:
fp.write(response.text)
4.虎扑新闻
#需求 获取虎扑新闻前五页内容,并保存至本地
import requests
#找规律URL https://voice.hupu.com/news?category=all&page=1
# https://voice.hupu.com/news?category=all&page=2
# https://voice.hupu.com/news?category=all&page=3
# https://voice.hupu.com/news?category=all&page=4
# https://voice.hupu.com/news?category=all&page=5
head={
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
}
params={
'category':'all'
}
for page in range (1,6):
params['page']=page
response = requests.get(url='https://voice.hupu.com/',params=params,headers=head)
with open(f'hupu{page}.html','w',encoding='utf8') as fp:
fp.write(response.text)
5.百度翻译
#需求 用户输入什么名字可以查询相应的所有翻译
import requests
kw=input('查询:')
headres={
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
'x-requested-with': 'XMLHttpRequest'
}
data={
'kw': kw
}
response = requests.post(url='https://fanyi.baidu.com/sug',data=data)
# print(response.json())
# print(type(response.json()))
# 获取json数据
data_list=response.json()
print(data_list)
# print("------------------------------------------------------------")
# 获取json里的data数据
content=data_list['data']
# print(content)
# 遍历数据
for i in content:
# print(i)
a=i['k']
b=i['v']
print(a,b)
6.百度贴吧
import requests
# 分页规律
# 第一页 https://tieba.baidu.com/f?kw=lol&ie=utf-8&pn=0
# 第二页 https://tieba.baidu.com/f?kw=lol&ie=utf-8&pn=50
# 第三页 https://tieba.baidu.com/f?kw=lol&ie=utf-8&pn=100
# 第四页 https://tieba.baidu.com/f?kw=lol&ie=utf-8&pn=150
# 第五页 https://tieba.baidu.com/f?kw=lol&ie=utf-8&pn=200
headers={
'user-agent'': ''Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'',
}
param={
'ie'': ''utf-8',
'kw'': ''LOL',
}
for i in range (1,11):
param['pn']=(i-1)*50
response= requests.get(url='https://tieba.baidu.com/f',params=param,headers=headers)
with open(f'tieba{i}.html','w',encoding='utf8') as fp:
fp.write(response.text)
7.小程序社区
#需求: 获取前10页数据
import requests
params= {
'mod': 'list',
'catid': '1',
}
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
}
for page in range(1,11):
params['page']=page
response = requests.get(url='http://www.wxapp-union.com/portal.php',params=params,headers=headers)
print(response.url)