import requests
import shapefile
# -*- coding: utf-8 -*-
import json
import urllib
import math
import pandas as pd
import numpy as np
#坐标转换,转WGS84
x_pi = 3.14159265358979324 * 3000.0 / 180.0
pi = 3.1415926535897932384626 # π
a = 6378245.0 # 长半轴
ee = 0.00669342162296594323 # 偏心率平方
def gcj02_to_wgs84(lng, lat):
"""
坐标转化script来源于github,感谢作者的贡献!
GCJ02(火星坐标系)转GPS84
:param lng:火星坐标系的经度
:param lat:火星坐标系纬度
:return:
"""
#if out_of_china(lng, lat):
#return [lng, lat]
dlat = _transformlat(lng - 105.0, lat - 35.0)
dlng = _transformlng(lng - 105.0, lat - 35.0)
radlat = lat / 180.0 * pi
magic = math.sin(radlat)
magic = 1 - ee * magic * magic
sqrtmagic = math.sqrt(magic)
dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
mglat = lat + dlat
mglng = lng + dlng
return [lng * 2 - mglng, lat * 2 - mglat]
def _transformlat(lng, lat):
ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
math.sin(2.0 * lng * pi)) * 2.0 / 3.0
ret += (20.0 * math.sin(lat * pi) + 40.0 *
math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
math.sin(lat * pi / 30.0)) * 2.0 / 3.0
return ret
def _transformlng(lng, lat):
ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
math.sin(2.0 * lng * pi)) * 2.0 / 3.0
ret += (20.0 * math.sin(lng * pi) + 40.0 *
math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
return ret
#获取原始数据
def get_single_line(mykey,city_name,line_name):
'''
下载单条地铁线路
:param city_c: 城市名称
:param line_c: 线路名称,如11路、地铁一号线
:return: 一条公交线路数据,追加到已存的数据后面
'''
url = "https://restapi.amap.com/v3/bus/linename?s=rsv3&extensions=all&key={}&output=json&" \
"city={}&offset=1&keywords={}&platform=JS".format(mykey,city_name, line_name)
#url = "https://restapi.amap.com/v3/bus/linename?s=rsv3&extensions=all&key={}&output=json&" \
#"city={}&offset=1&keywords={}&platform=JS".format(mykey,city_name, line_name)
try:
print("开始获取{}数据".format(line_name))
response = requests.get(url).json()
dt = {}
if response["buslines"]:
print("服务器成功返回数据")
if len(response["buslines"]) == 0:
print("返回的内容没有数据")
else:
# 提取地铁线路名称
dt["line_name"] = response["buslines"][0]["name"]
#获取线路沿线的经纬度,以便生成线路
dt["polyline"] = response["buslines"][0]["polyline"]
#获取地铁线路的最早、最晚运营时间
dt["start_time"] = response["buslines"][0]["start_time"]
dt["end_time"] = response["buslines"][0]["end_time"]
#获取地铁线路的首末站站点名称
dt["start_stop"] = response["buslines"][0]["start_stop"]
dt["end_stop"] = response["buslines"][0]["end_stop"]
#获取地铁线路的首末站站点名称
dt["distance"] = response["buslines"][0]["distance"]
#获取地铁的起始价和全程价
dt["basic_price"] = response["buslines"][0]["basic_price"]
dt["total_price"] = response["buslines"][0]["total_price"]
#获取站点的名称和经纬度
st_name = []
st_coords = []
for i in response["buslines"][0]["busstops"]:
station_name = i["name"]
station_corrds = i["location"]
st_name.append(station_name)
st_coords.append(station_corrds)
dt["station_name"] = st_name
dt["station_coords"] = st_coords
#保存数据
print("{}数据爬取成功".format(line_name))
data = pd.DataFrame([dt])
data.to_csv(r"./{}_subway_data.csv".format(city_name), mode="a", header=False, encoding="utf_8_sig")
else:
print("内容返回失败")
#记录失败数据
with open(r"./error.txt",mode="a") as f:
f.write("{}数据爬取失败".format(line_name))
except:
#记录失败数据
print("{}数据爬取失败".format(line_name))
with open(r"./error.txt", mode="a") as f:
f.write("{}数据爬取失败".format(line_name))
#得到站点数据
def get_station():
data = pd.read_csv(r"./{}_subway_data.csv".format(city_name),encoding="gbk",engine="python",
names=["id","line_name","polyline","start_time","end_time","start_stop","end_stop",
"distance","basic_price","total_price","station_name","station_coords"])
print(data)
df = data[["line_name",'station_coords', 'station_name']]
#将字符串处理并分割成列表
df["station_coords"] = df["station_coords"].apply(lambda x: x.replace("[", "").replace("]", "").replace("\'", "").split(", "))
df["station_name"] = df["station_name"].apply(lambda x: x.replace("[", "").replace("]", "").replace("\'", "").split(", "))
#将单元格的列表数值进行竖铺
sta_name = np.hstack(df["station_name"])
sta_coords = np.hstack(df["station_coords"])
li_name = np.hstack(df["line_name"].repeat(list(map(len,df["station_name"]))))
#将线路、站点名称、站点经纬度进行对应匹配
station_data = pd.DataFrame(np.column_stack((li_name,sta_name,sta_coords)),columns=["line_name","station_name","station_coords"])
#删除相同的站点
station_data = station_data.drop_duplicates(["station_name"])
#分别提取经度、纬度
station_data["lng"] = station_data["station_coords"].apply(lambda x: x.split(",")[0])
station_data["lat"] = station_data["station_coords"].apply(lambda x: x.split(",")[1])
#重新设置索引,使得索引从0 开始,1为间隔
station_data = station_data.reset_index(drop=True)
#转化坐标
station_data["lng84"] = 0.00
station_data["lat84"] = 0.00
for i in range(len(station_data)):
station_data["lng84"][i] = gcj02_to_wgs84(float(station_data["lng"][i]),float(station_data["lat"][i]))[0]
station_data["lat84"][i] = gcj02_to_wgs84(float(station_data["lng"][i]),float(station_data["lat"][i]))[1]
#station_data.to_csv(r"./{}_subway_station_data.csv".format(city_name),encoding="gbk")
#转化为shp文件
w = shapefile.Writer(r"./{}/station.shp".format(city_name))
w.field("line_name","C")
w.field("station_name","C")
w.field("lng","C")
w.field("lat","C")
w.field("lng84","C")
w.field("lat84","C")
for i in range(len(station_data)):
w.point(station_data["lng84"][i],station_data["lat84"][i])
w.record(station_data["line_name"][i],station_data["station_name"][i],station_data["lng"][i],
station_data["lat"][i],station_data["lng84"][i],station_data["lat84"][i],encode = "gbk")
w.close()
#获取线路数据
def get_line():
data = pd.read_csv(r"./{}_subway_data.csv".format(city_name), encoding="gbk", engine="python",
names=["id", "line_name", "polyline", "start_time", "end_time", "start_stop", "end_stop",
"distance", "basic_price", "total_price", "station_name", "station_coords"])
data["polyline"] = data["polyline"].apply(lambda x: x.split(";"))
#转化坐标
for i in range(len(data["polyline"])):
list = []
all_point = data["polyline"][i]
for j in range(len(all_point)):
lng = all_point[j].split(",")[0]
lat = all_point[j].split(",")[1]
list.append(gcj02_to_wgs84(float(lng),float(lat)))
data["polyline"][i] = list
# 转化为shp文件
w = shapefile.Writer(r"./{}/line.shp".format(city_name))
w.field("line_name", "C")
w.field("start_time", "C")
w.field("end_time","C")
w.field("start_stop","C")
w.field("end_stop","C")
w.field("distance", "C")
w.field("basic_price", "C")
w.field("total_price", "C")
for i in range(len(data)):
w.line([data["polyline"][i]])
w.record(data["line_name"][i],data["start_time"][i],data["end_time"][i],data["start_stop"][i],
data["end_stop"][i],data["distance"][i],data["basic_price"][i],data["total_price"][i], encode="gbk")
w.close()
if __name__ == "__main__":
key = "***" #这里请输入key值
city_name = "武汉市" #这里请输入所需要的城市名称
all_line_name = ['1号线'] #这里请输入所需要的地铁线路名称
for line_name in all_line_name:
get_single_line(key,city_name,line_name)
#get_station()
#get_line()
python爬虫,矢量数据地铁线路获取
于 2021-11-13 21:42:57 首次发布