=_= 最近经常出现类似的学生作业 , 难度也不高。 适合初学者学习参考一下
-
目标接口
- /comment/article/v5/tab_comments GET
-
接口参数
- offset是翻页时的参数, 0第一页, 10第二页, 20第三页 以此类推 每次只响应十条数据
- 接口响应数据解析
- 目标数据都在这个data数组中
上代码
import requests
import datetime
def timestamp_to_date(timestamp, format='%Y年%m月%d日 %H:%M:%S'):
# 将时间戳转换为datetime对象
dt_object = datetime.datetime.fromtimestamp(timestamp)
# 将datetime对象格式化为字符串
date_str = dt_object.strftime(format)
return date_str
headers = {
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'referer': 'https://www.ixigua.com/6915270027096621576?id=6915270107413348871&logTag=784c499bbd6145d15b01',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
}
def get_page(page, df):
response = requests.get(
f'https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?tab_index=0&count=10&offset={page}&group_id=6915270107413348871&item_id=6915270107413348871&aid=1768&msToken=Ki_9bRzC1L5EsyoRr_-TnG6M7KjctG0Yh-EL5Kf9JqI2Zl8PwWTz6-NLGqqACdwTm6KvGfSzcpslGN0HpS_36WfTh72XWrYOW18splwEgubOu_eyEe6b&X-Bogus=DFSzswVO/cxAN95mtU88ytkX95zU&_signature=_02B4Z6wo00001xgRRIQAAIDDLzWA2aSEROcYEUAAAKBt3IB0Yj7z-NvxFFnGPzKrjWzQCy2T4XjLaAHlLEDZD0qwyeIp6xiQE5g3TKwr8nX9Zty99xUw97N-cboToFwRuoB4raDmjPyE0Mbodc',
headers=headers
)
results = response.json()['data']
for item in results:
# df.loc[len(df)] = [item['comment']['user_name'], timestamp_to_date(item['comment']['create_time']), item['comment']['text']]
print(f"用户: {item['comment']['user_name']} 在{timestamp_to_date(item['comment']['create_time'])} 说: {item['comment']['text']}")
if __name__ == '__main__':
for i in range(0, 300):
get_page((i * 10), None) # 这里是循环翻页