这是《地图时空大数据爬取》第6章的内容,这篇博客主要是抓取一下行政区划数据,最小是能抓到区县的行政区划数据。然后书里是用arcpy和arcgis再加上python一起来处理的,有些麻烦,我统一用python来处理了,改写了下代码,因为很简单,就不多说了,直接放代码。
import basics
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import urllib
from urllib.parse import quote
import string
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import LineString
import pyproj
# 功能:采集行政区域边界
# 返回保存边界信息的BoundryWithAttr对象
def getDistrictBoundry(ak,citycode):
districtBoundryUrl = "http://restapi.amap.com/v3/config/district?key="+ak+"&keywords="+citycode+"&subdistrict=0&extensions=all"
print(districtBoundryUrl)
districtBoundryUrl = quote(districtBoundryUrl, safe=string.printable)
json_obj = urllib.request.urlopen(districtBoundryUrl)
# json_obj = response.read().decode('utf-8','ignore').replace(u'\xa9', u'')
json_data=json.load(json_obj)
districts=json_data['districts']
try:
polyline = districts[0]['polyline']
centerLon=districts[0]['center'].split(',')[0]
centerLat=districts[0]['center'].split(',')[1]
except Exception as e:
print("error!")
pointscoords=polyline.split(';')
point=basics.PointWithAttr(0,centerLon,centerLat,"行政区域",citycode)
districtBoundry=basics.BoundryWithAttr(point,pointscoords)
return districtBoundry
ak = "放上你的ak就行"
citycodes={'上城区':'330102','下城区':'330103','江干区':'330104',"拱墅区":'330105',"西湖区":'330106','滨江区':'330108',
'萧山区':'330109','余杭区':'330110','富阳区':'330111','临安区':'330112','桐庐县':'330122','淳安县':'330127',
'建德市':'330182'}
polygon_list = []
centerX_list = []
centerY_list = []
for citycode in citycodes.keys():
districtBoundry = getDistrictBoundry(ak,citycode)
tmp = districtBoundry.boundrycoords
for i in range(len(tmp)):
tmp[i] = [float(tmp[i].split(',')[0]) , float(tmp[i].split(',')[1])]
tmp = Polygon(tmp)
polygon_list.append(tmp)
centerX_list.append(districtBoundry.point.lon)
centerY_list.append(districtBoundry.point.lat)
# 将爬取得到的杭州市行政区保存为shp文件
gdf = {'geometry':polygon_list , 'centerLon':centerX_list , 'centerLat':centerY_list ,
'name':citycodes.keys(),'adacode':citycodes.values()}
gdf = gpd.GeoDataFrame(gdf , crs = None)
gdf
geometry | centerLon | centerLat | name | adacode | |
---|---|---|---|---|---|
0 | POLYGON ((120.18806 30.25779, 120.18787 30.253... | 120.171465 | 30.250236 | 上城区 | 330102 |
1 | POLYGON ((120.19986 30.35099, 120.20082 30.349... | 120.172763 | 30.276271 | 下城区 | 330103 |
2 | POLYGON ((120.18806 30.25779, 120.18862 30.264... | 120.202633 | 30.266603 | 江干区 | 330104 |
3 | POLYGON ((120.19986 30.35099, 120.19912 30.350... | 120.150053 | 30.314697 | 拱墅区 | 330105 |
4 | POLYGON ((119.99634 30.18154, 120.00131 30.188... | 120.147376 | 30.272934 | 西湖区 | 330106 |
5 | POLYGON ((120.22106 30.23767, 120.22396 30.236... | 120.21062 | 30.206615 | 滨江区 | 330108 |
6 | POLYGON ((120.72195 30.28632, 120.70566 30.271... | 120.27069 | 30.162932 | 萧山区 | 330109 |
7 | POLYGON ((119.77202 30.54643, 119.77072 30.544... | 120.301737 | 30.421187 | 余杭区 | 330110 |
8 | POLYGON ((119.99634 30.18154, 119.99900 30.182... | 119.949869 | 30.049871 | 富阳区 | 330111 |
9 | POLYGON ((119.23637 29.95097, 119.23640 29.951... | 119.715101 | 30.231153 | 临安区 | 330112 |
10 | POLYGON ((119.44009 30.08720, 119.44179 30.087... | 119.685045 | 29.797437 | 桐庐县 | 330122 |
11 | POLYGON ((118.89741 30.01674, 118.89785 30.016... | 119.044276 | 29.604177 | 淳安县 | 330127 |
12 | POLYGON ((119.76530 29.59641, 119.76496 29.594... | 119.279089 | 29.472284 | 建德市 | 330182 |
gdf.plot()
gdf.to_file('.\\杭州市行政规划\\杭州市行政区划.shp', encoding="utf-8")
有一个问题无法解决,网上有人遇到这个问题,但是没有给出解决方案,一旦我把投影更改成epsg:4326之后,就无法成功将gdf写入到shp or geojson等文件中去了。若有大佬解决该问题还请教一下解决方案,谢谢。
print(gdf.crs)
None
gdf.crs = 'epsg:4326'
gdf.crs
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
gdf.to_file('.\\杭州市行政规划\\杭州市行政区划.shp', encoding="utf-8")
---------------------------------------------------------------------------
CRSError Traceback (most recent call last)
<ipython-input-92-231f1b2525d8> in <module>
----> 1 gdf.to_file('.\\杭州市行政规划\\杭州市行政区划.shp', encoding="utf-8")
E:\Anaconda\lib\site-packages\geopandas\geodataframe.py in to_file(self, filename, driver, schema, index, **kwargs)
744 from geopandas.io.file import _to_file
745
--> 746 _to_file(self, filename, driver, schema, index, **kwargs)
747
748 def set_crs(self, crs=None, epsg=None, inplace=False, allow_override=False):
E:\Anaconda\lib\site-packages\geopandas\io\file.py in _to_file(df, filename, driver, schema, index, mode, crs, **kwargs)
253 crs_wkt = crs.to_wkt("WKT1_GDAL")
254 with fiona.open(
--> 255 filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
256 ) as colxn:
257 colxn.writerecords(df.iterfeatures())
E:\Anaconda\lib\site-packages\fiona\env.py in wrapper(*args, **kwargs)
398 def wrapper(*args, **kwargs):
399 if local._env:
--> 400 return f(*args, **kwargs)
401 else:
402 if isinstance(args[0], str):
E:\Anaconda\lib\site-packages\fiona\__init__.py in open(fp, mode, driver, schema, crs, encoding, layer, vfs, enabled_drivers, crs_wkt, **kwargs)
275 c = Collection(path, mode, crs=crs, driver=driver, schema=this_schema,
276 encoding=encoding, layer=layer, enabled_drivers=enabled_drivers, crs_wkt=crs_wkt,
--> 277 **kwargs)
278 else:
279 raise ValueError(
E:\Anaconda\lib\site-packages\fiona\collection.py in __init__(self, path, mode, driver, schema, crs, encoding, layer, vsi, archive, enabled_drivers, crs_wkt, ignore_fields, ignore_geometry, **kwargs)
153 self._check_schema_driver_support()
154 if crs_wkt or crs:
--> 155 self._crs_wkt = crs_to_wkt(crs_wkt or crs)
156
157 self._driver = driver
fiona\_crs.pyx in fiona._crs.crs_to_wkt()
CRSError: Invalid input to create CRS: GEOGCRS["WGS 84",DATUM["World Geodetic System 1984",ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],AXIS["geodetic latitude (Lat)",north,ORDER[1],ANGLEUNIT["degree",0.0174532925199433]],AXIS["geodetic longitude (Lon)",east,ORDER[2],ANGLEUNIT["degree",0.0174532925199433]],USAGE[SCOPE["unknown"],AREA["World"],BBOX[-90,-180,90,180]],ID["EPSG",4326]]