session对象
for
正则
# 创建session对象
import requests
import re
headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
}
session = requests.Session()
home_url = 'https://xueqiu.com/'
session.get(home_url, headers=headers)
ta_url = 'https://xueqiu.com/statuses/hot/listV3.json?page=4&last_id=296363112'
res = session.get(ta_url, headers=headers)
# print(res.text) # 字符串数据
res_data = res.json() # 字典类型数据
for i in res_data['list']:
# i = list的第一条数据 对应的类型是字典
# 用户名
screen_name = i['user']['screen_name']
rawTitle = i['rawTitle']
text = i['text'] # text的数据类型?字符串.replace(旧数据,新数据)
# 正则 :获取所有的标签内容 替换成 空字符串
# re.sub('正则表达式','匹配成功要替换的内容',原数据)
text = re.sub('<.*?>', '', text)
print(screen_name, rawTitle)
print(text)
# 控制台中换行
print()