from openpyxl import Workbook
步骤一:创建一个 workbook
wb = Workbook()
步骤二:获取被激活的 worksheet
#注意必须得激活对象,不然写不进去
ws = wb.active
步骤三:创建列的标题title与link
ws.append([title,link])
#储存进save
wb.save('新闻.xlsx')
步骤四:设置内容
ws['A1'] = "新的值"
按照指定路径(包含文件名)保存
wb.save("D:\test_code\练习\新建.xlsx")
print(type(wb))
爬取代码
import os
import requests
import re
from jsonpath import jsonpath
from openpyxl import workbook
wb=workbook.Workbook()
ws=wb.active
ws.append(['标题','链接'])
def get_data(url):
try:
data={
"Cookie": "bbs_sid=e1tu5kp8d7f9ulrab3k3prdafr; cookie_test=FgWb0AuDkkS_2F_2BVDXlxvGO2e9_2Bra29FiSvIElDQjapyCh4MoW; Hm_lvt_4ab5ca5f7f036f4a4747f1836fffe6f2=1696498192; Hm_lpvt_4ab5ca5f7f036f4a4747f1836fffe6f2=1696498192",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.47"
}
r=requests.get(url,headers=data,params=add)
print(r.status_code)
r.raise_for_status()
print("请求成功!")
a=r.json()
return a
except:
print("请求失败!")
def parse_data(data):
titles=jsonpath(data,'$..title')
url1 = jsonpath(data,'$..url')
for title,urls in zip(titles,url1):
print(title)
print(urls)
print(f"{'='*30}")
# save_data(title, urls)
#此方法会出现bool类型不可迭代的报错
def save_data(title,link):
ws.append([title,link])
wb.save('新闻.xlsx')
if __name__=="__main__":
url = 'https://i.news.qq.com/trpc.qqnews_web.kv_srv.kv_srv_http_proxy/list'
add = {
'sub_srv_id': '24hours',
'srv_id': 'pc',
'offset': '0',
'limit': '20',
'strategy': '1',
'ext': '{"pool":["top","hot"],"is_filter":7,"check_type":true}',
}
for i in range(0,181,20):
add['offset']=str(i)
h=get_data(url)
parse_data(h)
print(f'第{i}页')