Numpy互操作-iObjects Python with JupyterHub for K8s
Numpy是Python科学计算的常用库,也在机器学习领域具有重要的作用。Numpy主要实现矩阵运算功能,这里的教程将地理空间数据集与Numpy实现相互转换,从而可以将Python的更多通用库用于地理空间分析,包括矢量和栅格数据的转换。进一步,可以利用Pandas和GeoPandas的相关功能。
from iobjectspy import (datasetvector_to_numpy_array,
numpy_array_to_datasetvector,
datasetraster_to_numpy_array,
numpy_array_to_datasetraster)
import numpy as np
import os
# 设置示例数据路径
#example_data_dir = ''
example_data_dir = '/home/jovyan/data/smdata/'
# 设置结果输出路径
out_dir = os.path.join(example_data_dir, 'out')
if not os.path.exists(out_dir):
os.makedirs(out_dir)
def vector_to_numpy_test():
"""读取矢量数据集 Town_P 到 numpy 数组中"""
narray1 = datasetvector_to_numpy_array(os.path.join(example_data_dir, 'example_data.udb/Landuse_R'), export_spatial=True)
print(narray1.dtype)
print(narray1[:10])
narray = datasetvector_to_numpy_array(os.path.join(example_data_dir, 'example_data.udb/Town_P'), export_spatial=True)
if narray is None:
print('读取矢量数据集到 numpy 数组失败')
else:
print('ndarray.ndim : ' + str(narray.ndim))
print('ndarray.dtype : ' + str(narray.dtype))
print(narray[:10])
print(narray['SmX'][:10])
print('读取矢量数据集到 numpy 数组成功')
xy_array = np.c_[narray['SmX'], narray['SmY']][:10]
print(xy_array.ndim)
print(xy_array.dtype)
print(xy_array)
try:
# 使用 hdbscan 做层次聚类,并将结果在 matplotlib 显示
% matplotlib inline
from hdbscan import HDBSCAN
import matplotlib.pyplot as plt
xy_c = np.c_[narray['SmX'], narray['SmY']]
hdb = HDBSCAN(min_cluster_size=10).fit(xy_c)
hdb_labels = hdb.labels_
n_clusters_hdb_ = len(set(hdb_labels)) - (1 if -1 in hdb_labels else 0)
hdb_unique_labels = set(hdb_labels)
hdb_colors = plt.cm.Spectral(np.linspace(0, 1, len(hdb_unique_labels)))
fig = plt.figure(figsize=plt.figaspect(1))
hdb_axis = fig.add_subplot('111')
for k, col in zip(hdb_unique_labels, hdb_colors):
if k == -1:
col = 'k'
hdb_axis.plot(xy_c[hdb_labels == k, 0], xy_c[hdb_labels == k, 1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=6)
hdb_axis.set_title('HDBSCAN\nEstimated number of clusters: %d' % n_clusters_hdb_)
plt.show()
except ImportError:
pass
def numpy_to_vector_test():
"""写入数据到矢量数据集中"""
narray = np.empty(10, dtype=[('ID', 'int32'), ('X', 'float64'), ('Y', 'float64'), ('NAME', 'U10')])
narray[0] = 1, 116.380351, 39.93393099, '什刹海'
narray[1] = 2, 116.365305, 39.89622499, '广安门内'
narray[2] = 3, 116.427342, 39.89467499, '崇文门外'
narray[3] = 4, 116.490881, 39.96567299, '酒仙桥'
narray[4] = 5, 116.447486, 39.93767799, '三里屯'
narray[5] = 6, 116.347435, 40.08078599, '回龙观'
narray[6] = 7, 116.407196, 39.83895899, '大红门'
narray[7] = 8, 116.396915, 39.88371499, '天桥'
narray[8] = 9, 116.334522, 40.03594199, '清河'
narray[9] = 10, 116.03008, 39.87852799, '潭柘寺'
print(narray)
result = numpy_array_to_datasetvector(narray, os.path.join(out_dir, 'out_numpy_array.udb'), x_col='X', y_col='Y')
if result is not None:
if isinstance(result, str):
if isinstance(result, str):
print('从 numpy 数组中写入数据到矢量数据集 %s 成功' % result)
else:
print('从 numpy 数组中写入数据到矢量数据集 %s 成功' % result.name)
else:
print('从 numpy 数组中写入数据到矢量数据集失败')
def raster_to_numpy_test():
"""从栅格数据 DEM 中读取数据到 numpy 数组中"""
narray = datasetraster_to_numpy_array(os.path.join(example_data_dir, 'example_data.udb/DEM'))
if narray is None:
print('读取栅格数据集到 numpy 数组失败')
else:
print('ndarray.ndim : ' + str(narray.ndim))
print('ndarray.dtype : ' + str(narray.dtype))
print(narray)
print('读取栅格数据集到 numpy 数组成功')
numpy_array_to_datasetraster(narray, 0.001, 0.001, os.path.join(out_dir, 'out_numpy_array.udb'), as_grid=False)
def numpy_to_raster_test():
"""从 numpy 的数组二进制文件中加载数据,写道栅格数据集中"""
narray = np.load(os.path.join(example_data_dir, 'dem.npy'))
print(narray)
result = numpy_array_to_datasetraster(narray, 0.001, 0.001, os.path.join(example_data_dir, 'out_numpy_array.udb'),
as_grid=True)
if result is not None:
if isinstance(result, str):
print('从 numpy 数组中写入数据到栅格数据集 %s 成功' % result)
else:
print('从 numpy 数组中写入数据到栅格数据集 %s 成功' % result.name)
else:
print('从 numpy 数组中写入数据到栅格数据集失败')
if __name__ == '__main__':
# 读取矢量数据集到 numpy 数组成功
vector_to_numpy_test()
# 从 numpy 数组中写入数据到矢量数据集中成功
numpy_to_vector_test()
# 读取栅格数据集到 numpy 数组成功
raster_to_numpy_test()
# 从 numpy 数组中写入数据到栅格数据集中成功
numpy_to_raster_test()
[('LANDTYPE', '<U4'), ('Area', '<f4'), ('Area_1', '<i2'), ('SmX', '<f8'), ('SmY', '<f8'), ('SmPerimeter', '<f8'), ('SmArea', '<f8')]
[('用材林', 132., 132, 116.47779337, 40.87251703, 0.75917921, 1.40894401e-02)
('用材林', 97., 97, 116.6540059 , 40.94696274, 0.4945153 , 1.03534475e-02)
('灌丛', 36., 36, 116.58451795, 40.98712283, 0.25655489, 3.89923745e-03)
('灌丛', 36., 36, 116.89611418, 40.76792703, 0.59237713, 3.81791878e-03)
('用材林', 1., 1, 116.37943683, 40.91435429, 0.03874328, 7.08450886e-05)
('灌丛', 126., 126, 116.49117083, 40.78302383, 0.53664074, 1.34577856e-02)
('用材林', 83., 83, 116.69943237, 40.74456848, 0.39696365, 8.83225363e-03)
('用材林', 128., 128, 116.8129727 , 40.69116153, 0.56949408, 1.35877743e-02)
('用材林', 29., 29, 116.24543769, 40.71076092, 0.30082509, 3.07221559e-03)
('灌丛', 467., 467, 116.43290772, 40.50875567, 1.91745792, 4.95537433e-02)]
ndarray.ndim : 1
ndarray.dtype : [('NAME', '<U9'), ('SmX', '<f8'), ('SmY', '<f8')]
[('百尺竿乡', 115.917748, 39.53525099) ('什刹海', 116.380351, 39.93393099)
('月坛', 116.344828, 39.91476099) ('广安门内', 116.365305, 39.89622499)
('牛街', 116.36388 , 39.88680299) ('崇文门外', 116.427342, 39.89467499)
('永定门外', 116.402249, 39.86559299) ('崔各庄', 116.515447, 39.99966499)
('小关', 116.411727, 39.97737199) ('潘家园', 116.467911, 39.87179299)]
[115.917748 116.380351 116.344828 116.365305 116.36388 116.427342
116.402249 116.515447 116.411727 116.467911]
读取矢量数据集到 numpy 数组成功
2
float64
[[115.917748 39.53525099]
[116.380351 39.93393099]
[116.344828 39.91476099]
[116.365305 39.89622499]
[116.36388 39.88680299]
[116.427342 39.89467499]
[116.402249 39.86559299]
[116.515447 39.99966499]
[116.411727 39.97737199]
[116.467911 39.87179299]]
[( 1, 116.380351, 39.93393099, '什刹海')
( 2, 116.365305, 39.89622499, '广安门内')
( 3, 116.427342, 39.89467499, '崇文门外')
( 4, 116.490881, 39.96567299, '酒仙桥') ( 5, 116.447486, 39.93767799, '三里屯')
( 6, 116.347435, 40.08078599, '回龙观') ( 7, 116.407196, 39.83895899, '大红门')
( 8, 116.396915, 39.88371499, '天桥') ( 9, 116.334522, 40.03594199, '清河')
(10, 116.03008 , 39.87852799, '潭柘寺')]
从 numpy 数组中写入数据到矢量数据集 NewDataset 成功
DEM
ndarray.ndim : 2
ndarray.dtype : int16
[[ 360 357 353 ... 1373 1320 1265]
[ 349 353 355 ... 1354 1292 1259]
[ 353 358 356 ... 1375 1341 1319]
...
[ 741 756 745 ... 819 836 860]
[ 700 713 718 ... 821 848 872]
[ 684 696 696 ... 828 854 880]]
读取栅格数据集到 numpy 数组成功
[[ 360 357 353 ... 1373 1320 1265]
[ 349 353 355 ... 1354 1292 1259]
[ 353 358 356 ... 1375 1341 1319]
...
[ 741 756 745 ... 819 836 860]
[ 700 713 718 ... 821 848 872]
[ 684 696 696 ... 828 854 880]]
从 numpy 数组中写入数据到栅格数据集 NewDataset 成功