import requests
import json
import re
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
}
link = 'https://www.guokr.com/ask/highlight/?page=1'
resp = requests.get(link,headers=headers)
data = resp.text
patt = re.compile(' <h2><a target="_blank" href="(.*)">(.*\n?)</a></h2>') # 加了 ? 为非贪婪
mess_list = patt.findall(data)
# 构建 json 数据格式
json_list = []
for g in mess_list:
mess_dict = {}
mess_dict['title'] = g[1] # 自定义键名
mess_dict['link'] = g[0] # 根据元组索引取到数据
json_list.append(mess_dict) # 此时是列表形式
# 数据转换
json_content = json.dumps(json_list,ensure_ascii=False)
#print(type(json_content)) # str 字符窜
with open(r'C:\Users\DELL\Desktop\python_wd\文本信息\\'+ '果壳问答'+'.json','w',encoding='utf-8')as f:
f.write(json_content)
果壳问答
最新推荐文章于 2020-05-03 18:43:32 发布