1.官网安装python
2.替换参数并保存以下代码
3.
import json
import requests
import pandas as pd
def get_data(i):
url = "https://opendata.sz.gov.cn/api/29200_00403602/1/service.xhtml"
try:
header = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'Host': 'opendata.sz.gov.cn',
'Origin': 'https://opendata.sz.gov.cn',
'Referer': 'https://opendata.sz.gov.cn/maintenance/personal/toApiTest',
'Cookie': '_trs_uv=k1q8o8my_2368_4sr9; JSESSIONID=bb524432-c11d-4154-a813-7aefbc5a9f2d',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
}
formdata = {
"page": i,
"row": "1000",
"appKey": "f322e1a4222c44fdbb5d29ab1be1b0a0",
}
response = requests.get(url, params=formdata, headers=header)
if response.status_code == 200:
data = json.loads(response.text)
return data
return {"asd":"asda"}
except Exception as e:
print(e)
print(i)
# mian_spider()
def mian_spider():
# start = len(eachFile(Json_dir))
for i in range(0, 10000):
page = str(i)
Json_data = get_data(page)
requests.adapters.DEFAULT_RETRIES = 5
# writeOneJson(Json_data, Json_dir + 'page' + page + '.json')
if 'data' in Json_data.keys():
df = pd.DataFrame.from_dict(Json_data['data'], orient='columns')
df.to_csv(csv_dir + '深圳营运车辆.csv', mode='a', index=False, header=False)
s = requests.session()
s.keep_alive = False
if __name__ == "__main__":
csv_dir = 'D:\data\csv\\'
mian_spider()
4.安装依赖库
pip install pandas -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
pip install requests -i http://pypi.douban.com/simple --trusted-host=pypi.douban.com
安装不成功就升级下,什么东西忘了,反正会有提示的
不知道为什么只能爬到1000000条数据,实际显示是有更多的,也求大佬解释