PostgreSQL - 01 PostgreSQL + PostGIS + Docker 空间计算!判断坐标点是否在某个区域中 POINT MULTIPOLYGON ST_Contains

代码仓库

所有东西已经打包放到GitHhub,需要的小伙伴可以直接拉取。顺手可以帮忙点个Star!
https://github.com/turbo-duck/postgre-gis

请添加图片描述

背景介绍

我们有一批坐标点和一批区域点。
比如: 100万个点,和10万个区域块。
我们遇到了这样的需求:这100万个点,分别属于哪个区域块?

初始解决

通过空间的计算方法,我们用 O(n2)的方式:

# 伪代码
for poi in poi_list:
	for aoi in aoi_list:
		# 做一些空间计算
		result = compute(aoi, poi)
		if result:
			# 在这个空间
			save()
		else:
			# 不在这个空间
			other()

但是这样的计算时间实在是太久,于是我们用空间数据库进行优化,于是有了下文:
O(n2) -> O(n),之前需要跑三天三夜的结果,现在只需要短短的几秒就可以跑完。

Docker

FROM postgis/postgis:latest
ENV POSTGRES_DB=space
ENV POSTGRES_USER=postgres
ENV POSTGRES_PASSWORD=123123
EXPOSE 5432
COPY init-db.sh /docker-entrypoint-initdb.d/

init-db.sh

#!/bin/bash
set -e
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
    CREATE EXTENSION IF NOT EXISTS postgis;
EOSQL

psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
  CREATE TABLE "public"."aoi" (
    "name" varchar(500) COLLATE "pg_catalog"."default" NOT NULL,
    "geom" geometry(GEOMETRY) NOT NULL
  );
  ALTER TABLE "public"."aoi"
    OWNER TO "postgres";
  CREATE INDEX "geom" ON "public"."aoi" USING btree (
    "geom" "public"."btree_geometry_ops" ASC NULLS LAST
  );
  CREATE INDEX "name" ON "public"."aoi" USING btree (
    "name" COLLATE "pg_catalog"."default" "pg_catalog"."text_ops" ASC NULLS LAST
  );
  CREATE INDEX "name_geom" ON "public"."aoi" USING btree (
    "name" COLLATE "pg_catalog"."default" "pg_catalog"."text_ops" ASC NULLS LAST,
    "geom" "public"."btree_geometry_ops" ASC NULLS LAST
  );
EOSQL

psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
  CREATE TABLE "public"."poi" (
    "name" varchar(500) COLLATE "pg_catalog"."default" NOT NULL,
    "geom" geometry(GEOMETRY) NOT NULL
  );
  ALTER TABLE "public"."poi"
    OWNER TO "postgres";
  CREATE INDEX "geom_copy1" ON "public"."poi" USING btree (
    "geom" "public"."btree_geometry_ops" ASC NULLS LAST
  );
  CREATE INDEX "name_copy1" ON "public"."poi" USING btree (
    "name" COLLATE "pg_catalog"."default" "pg_catalog"."text_ops" ASC NULLS LAST
  );
  CREATE INDEX "name_geom_copy1" ON "public"."poi" USING btree (
    "name" COLLATE "pg_catalog"."default" "pg_catalog"."text_ops" ASC NULLS LAST,
    "geom" "public"."btree_geometry_ops" ASC NULLS LAST
  );
EOSQL

写入 AOI

请添加图片描述

import pandas as pd
import psycopg2
from openpyxl import Workbook
import re


wb = Workbook()
ac = wb.active
line = ['name', 'data', 'origin']
ac.append(line)
# 是否出现错误
error_flag = 0
conn = psycopg2.connect(database="space", user="postgres", password="123123", host="localhost")
cur = conn.cursor()
aoi_file = pd.read_excel("./aoi.xlsx", engine='openpyxl')
line_num = aoi_file.shape[0]

# 清空表
clean_sql = "DELETE FROM aoi WHERE 1=1"
cur.execute(clean_sql)
conn.commit()

for i in range(0, line_num):
    get_name = aoi_file.iat[i, 0]
    '''
    {"type":
        "Polygon",
        "coordinates": [[[
            121.342079,31.417835],[121.342105,31.417812],[121.34214,31.417802],
            [121.342177,31.4178],[121.343274,31.417851],[121.343306,31.41786],[121.343333,31.417879],
            [121.343354,31.417911],[121.343364,31.417945],[121.343424,31.419663],[121.343423,31.419707],
            [121.343414,31.419746],[121.343384,31.419832],[121.343351,31.419903],[121.343318,31.419927],
            [121.343276,31.419931],[121.341537,31.419355],[121.341508,31.419332],[121.341486,31.419301],
            [121.341486,31.419268],[121.341496,31.41923],[121.342056,31.417871],[121.342079,31.417835
        ]]]}
    '''
    get_geom = aoi_file.iat[i, 1]
    origin_geom = get_geom
    get_geom = re.sub(' ', '', str(get_geom))
    get_geom = re.sub('MultiPolygon', 'Polygon', str(get_geom))
    get_geom = re.sub("\[\[\[\[", '[[[', str(get_geom))
    get_geom = re.sub("\]\]\]\]", ']]]', str(get_geom))
    get_geom = re.sub('\{', ' ', str(get_geom))
    get_geom = re.sub('\}', ' ', str(get_geom))
    get_geom = re.sub('"type":"Polygon"', "", str(get_geom))
    get_geom = re.sub('"coordinates":\[\[\[', '', str(get_geom))
    get_geom = re.sub('\]\]\]', '', str(get_geom))
    get_geom = re.sub(' ', '', str(get_geom))
    get_geom = re.sub(',', ' ', str(get_geom))
    get_geom = re.sub('\] \[', ',', str(get_geom))
    print(get_name)
    # INSERT INTO geometries VALUES(
    # 'B0FFG1YVX3',
    # 'MULTIPOLYGON(((120.394543 36.113915,120.39454 36.113898,120.394539 36.113869,120.394548
    # 36.113844,120.394586 36.113812,120.396095 36.11299,120.396118 36.112986,120.396133 36.112988
    # ,120.39615 36.112998,120.39712 36.114184,120.39713 36.114219,120.397129 36.114262,120.397118 36.114297
    # ,120.397101 36.114325,120.397075 36.114359,120.397043 36.114389,120.396994 36.114425,120.39584 36.115065,
    # 120.395818 36.11507,120.395793 36.115071,120.395762 36.115068,120.395719 36.115058,120.395686 36.115043,
    # 120.395661 36.115029,120.394543 36.113915)))');
    sql = "INSERT INTO aoi VALUES('" + str(get_name) + "'," + \
        "'MULTIPOLYGON(((" + str(get_geom) + ")))');"
    try:
        cur.execute(sql)
        conn.commit()
    except Exception as e:
        error_flag = 1
        conn.rollback()
        line = [str(get_name), str(get_geom), str(origin_geom)]
        ac.append(line)
        print(e)
        print(sql)
conn.close()

# 0 no error | 1 have error
if error_flag == 1:
    try:
        wb.save("./error_data.xlsx")
    except PermissionError:
        wb.save("./new_error_data.xlsx")

写入 POI

请添加图片描述

import pandas as pd
import psycopg2
import re

conn = psycopg2.connect(database="space", user="postgres", password="123123", host="localhost")
cur = conn.cursor()
poi_file = pd.read_excel("./poi.xlsx", engine='openpyxl')
line_num = poi_file.shape[0]

# 清空表
clean_sql = "DELETE FROM poi WHERE 1=1"
cur.execute(clean_sql)
conn.commit()

for i in range(0, line_num):
    get_name = poi_file.iat[i, 0]
    '''
    121.56042,31.20459
    '''
    get_poi = poi_file.iat[i, 1]
    origin_poi = get_poi
    get_poi = re.sub(" ", "", str(get_poi))
    get_poi = re.sub(",", " ", str(get_poi))
    get_poi = re.sub(",", " ", str(get_poi))
    print(get_name)
    if str(get_name) == 'nan':
        continue
    # INSERT INTO geometries VALUES(
    # 'B0FFG1YVX3',
    # 'MULTIPOLYGON(((120.394543 36.113915,120.39454 36.113898,120.394539 36.113869,120.394548
    # 36.113844,120.394586 36.113812,120.396095 36.11299,120.396118 36.112986,120.396133 36.112988
    # ,120.39615 36.112998,120.39712 36.114184,120.39713 36.114219,120.397129 36.114262,120.397118 36.114297
    # ,120.397101 36.114325,120.397075 36.114359,120.397043 36.114389,120.396994 36.114425,120.39584 36.115065,
    # 120.395818 36.11507,120.395793 36.115071,120.395762 36.115068,120.395719 36.115058,120.395686 36.115043,
    # 120.395661 36.115029,120.394543 36.113915)))');
    sql = "INSERT INTO poi VALUES('" + str(get_name) + "'," + \
            "'POINT(" + str(get_poi) + ")');"
    try:
        cur.execute(sql)
    except Exception as e:
        print(e)
        print(f"sql => {sql}")
        print(f"get_name => {get_name} | get_poi => {get_poi}")
        exit(0)
conn.commit()
conn.close()

判断 POI IN AOI

请添加图片描述

import psycopg2
from openpyxl import Workbook

wb = Workbook()
ac = wb.active
line = ['poi_name', 'aoi_name']
ac.append(line)

conn = psycopg2.connect(database="nyc", user="postgres", password="123123", host="localhost")
cur = conn.cursor()
# 建立索引
# sql = "CREATE INDEX aoi_idx ON aoi USING GIST (geom);"
# cur.execute(sql)
# conn.commit()
# 查询结果
sql = "SELECT aoi.name, poi.name FROM aoi, poi where ST_Contains(aoi.geom, poi.geom);"
print("正在判断···")
cur.execute(sql)
rows = cur.fetchall()
for row in rows:
    result_aoi = row[0]
    result_poi = row[1]
    line = [str(result_poi), str(result_aoi)]
    ac.append(line)
    print(result_poi)
conn.close()
print("save xlsx")
wb.save("./poiInAoiResult.xlsx")
print("done")

  • 10
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
要搭建PostgreSQL PostGIS空间数据引擎,需按照以下步骤进行操作。 首先,确保已经安装了PostgreSQL数据库服务器。可以从官方网站上下载并安装最新版本的PostgreSQL。 完成安装后,打开命令行终端并切换到PostgreSQL的安装目录下的bin文件夹。在命令行输入以下命令来启动PostgreSQL: ``` pg_ctl -D 数据库存储路径 start ``` 数据库存储路径是指定数据库文件将保存的目录路径。将其替换为你想要使用的路径。 启动后,会在命令行显示一些数据库连接信息,包括端口号和数据库用户名等。 接下来,需要创建一个新的数据库。在命令行输入以下命令: ``` createdb -U 数据库用户名 -h localhost -p 端口号 新数据库名称 ``` 将数据库用户名、端口号和新数据库名称替换为真实值。 创建数据库后,需要在其安装PostGIS扩展。在命令行输入以下命令来连接到新数据库: ``` psql -U 数据库用户名 -h localhost -p 端口号 新数据库名称 ``` 进入数据库后,运行以下命令来创建PostGIS扩展: ``` CREATE EXTENSION postgis; ``` 该命令将在数据库创建PostGIS所需的表和函数。安装完成后,就可以在数据库存储和查询空间数据了。 最后,可以使用数据库管理工具,如pgAdmin等,连接到数据库进行操作和管理。可以使用这些工具来创建数据表、导入和导出空间数据,以及执行空间查询等操作。 总结一下,搭建PostgreSQL PostGIS空间数据引擎的主要步骤是:安装PostgreSQL,启动数据库服务器,创建新数据库,安装PostGIS扩展,然后使用数据库管理工具进行操作和管理。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值