import requests
import json.encoder
import pandas as pd
# 获取数据
def getData():
headers = {
'Connection': 'keep-alive',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.116 Safari/537.36',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Referer': 'https://data.stats.gov.cn/easyquery.htm?cn=E0103',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
params = (
('m', 'QueryData'),
('dbcode', 'fsnd'),
('rowcode', 'reg'),
('colcode', 'sj'),
('wds', '[{"wdcode":"zb","valuecode":"A020101"}]'),
('dfwds', '[]'),
('k1', '1625471171166'),
)
# 1、获取数据
response = requests.get('https://data.stats.gov.cn/easyquery.htm', headers=headers, params=params, verify=False)
# 2、解析数据
data=json.loads(response.content)
name=data['returndata']['wdnodes'][0]['nodes'][0]['cname']
datanodes=data['returndata']['datanodes']
columns=data['returndata']['wdnodes'][2]['nodes']
rows=data['returndata']['wdnodes'][1]['nodes']
# 3、写入数据
# 数据标题
column=[]
column.append(name)
for temp in columns:
column.append(temp['cname'])
# 数据内容
temp_contents=[]
index=0
for row in rows:
temp_content=[]
temp_content.append(row['cname'])
for i in range(index,index+10):
temp_content.append(datanodes[i]['data']['data'])
index+=10
temp_contents.append(temp_content)
return column,temp_contents,name
# 保存数据
def save_csv(path,column,temp_contents):
df=pd.DataFrame(temp_contents,columns=column)
df.to_csv(path,mode='w',index=False,encoding='gbk')
if __name__ == "__main__":
# 获取数据
column,temp_contents,name=getData()
path=name+'.csv'
# 保存数据
save_csv(path,column,temp_contents)
爬取国家统计数据_GDP
最新推荐文章于 2023-06-13 09:20:52 发布