python爬虫实战-获取预算文件公开时间
import requests
import json
import pandas as pd
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
pid=160
starnum=0
def getDepartment(pid,starnum):
url = "http://yjsgk.jsczt.cn/front/department/getdepartmentname.do?pid="+str(pid)
res = requests.get(url=url, headers=headers)
res = res.content
json_res = json.loads(res)
for i in range(len(json_res)):
if i < starnum:
continue
data = json_res[i]
iid = data["iid"]
getBudgetFinal(iid)
dict1 ={
'标题':[],
'预算公开时间':[]
}
def getBudgetFinal(iid):
url = "http://yjsgk.jsczt.cn/front/budgetfinal/itemsandpag.do?page_num=1&groupid="+str(iid)+"&typeid=2"
res = requests.get(url=url,headers=headers)
res = res.content
json_res = json.loads(res)
i = 0
for data in json_res["budgetTemplates"]:
b_title = data["b_title"]
realTime = data["realTime"]
dict1['标题'].append(b_title)
dict1['预算公开时间'].append(realTime)
if i == 1:
break
i = i + 1
write_to_excel()
def write_to_excel():
df = pd.DataFrame(dict1)
df.to_excel('./预算公开文件时间.xlsx',index=False)
if __name__ == '__main__':
getDepartment(pid,starnum)