"""将线上缺的点自动修复"""
import time
import pandas as pd
import os
from db_operator.postgresql import PostgreOp
from conf.path_config import data_dir
from api.sql.repair_sql import LineMissingPointRepair
from common.log_utils import get_logger
import re
re_linestring = re.compile(r'\d+\.*\d+')
logger = get_logger(__name__)
class LineMissingPoint(PostgreOp):
def __init__(self, project_id):
super(LineMissingPoint, self).__init__()
self.project_id = project_id
self.repair_sql = LineMissingPointRepair()
def execute(self):
self.line_missing_point_repair()
def line_missing_point_repair(self):
path = os.path.join(data_dir, self.project_id, 'check', 'missing_point_online_check')
pipe_data = pd.read_csv(os.path.join(path, 'missing_point_online_check.csv'))
# 检查为空的话就不需要修复
if not pipe_data.empty:
node_geom_sql = self.repair_sql.select_node_geom(table_name='dt_node_test', project_id=self.project_id)
node_res = self.query(node_geom_sql)
node_data = pd.DataFrame(node_res)
node_data['x_y'] = node_data[['xcoordinate', 'ycoordinate']].apply(tuple, axis=1)
node_geom_set = set(list(node_data['x_y']))
# node_geom_set = {(12893617.999, 4730216.758), (12893364.546, 4728780.312), (12895225.749, 4730928.182),
# (12893661.839, 4730856.111)}
x_source = []
y_source = []
x_target = []
y_target = []
for i in pipe_data['geom']:
multilinestring_list = tuple(map(float, re.findall(re_linestring, i)))
x_source.append(multilinestring_list[0])
y_source.append(multilinestring_list[1])
x_target.append(multilinestring_list[2])
y_target.append(multilinestring_list[3])
pipe_data.loc[:, 'x_source'] = x_source
pipe_data.loc[:, 'y_source'] = y_source
pipe_data.loc[:, 'x_target'] = x_target
pipe_data.loc[:, 'y_target'] = y_target
pipe_data['x_y_source'] = pipe_data[['x_source', 'y_source']].apply(tuple, axis=1)
pipe_data['x_y_target'] = pipe_data[['x_target', 'y_target']].apply(tuple, axis=1)
pipe_geom_set = set(list(pipe_data['x_y_source']) + list(pipe_data['x_y_target']))
# 需要进行线上缺点的坐标
geom_need_repair = list(pipe_geom_set.difference(node_geom_set))
# 将需要进行线上缺点的坐标分开为x、y形式,方便后续新的node表插入数据
x, y = map(list, zip(*geom_need_repair))
# 将source、target和他们的坐标组合成dict的形式,得到线坐标去重后的所有坐标
geom_source_dup = pipe_data[['source', 'x_y_source']].set_index('source').to_dict()['x_y_source']
geom_target_dup = pipe_data[['target', 'x_y_target']].set_index('target').to_dict()['x_y_target']
z = {**geom_source_dup, **geom_target_dup}
# 找出geom对应的source或者target,用作新的点表中的gid
node_new_gid = []
for i in geom_need_repair:
node_new_gid.append(list(filter(lambda k: z.get(k) == i, z.keys())))
node_new_gid = sum(node_new_gid, [])
# 组合新的点表数据方便后续插入点表中
node_new_data = pd.DataFrame({"gid": node_new_gid, "xcoordinate": x, "ycoordinate": y})
node_new_data['geom'] = node_new_data[['xcoordinate', 'ycoordinate']].apply(
lambda z: f"POINT({z.xcoordinate} {z.ycoordinate})", axis=1)
node_new_data['projectid'] = [self.project_id] * node_new_data.shape[0]
node_new_data['type'] = ['普通节点'] * node_new_data.shape[0]
node_new_data['dno'] = [12] * node_new_data.shape[0]
# 将数据插入表中
node_data_dict_insert = node_new_data.to_dict(orient='records')
node_columns_str = '({})'.format(', '.join(node_new_data.columns))
node_col_str2 = ''
for dic in node_data_dict_insert:
c = f"""({dic['gid']},{dic['dno']},'{dic['projectid']}',{dic['xcoordinate']},{dic['ycoordinate']},'{dic['type']}'
ST_GeomFromText('{dic['geom']}', 3857)),"""
node_col_str2 += c
node_sql_batch2 = "INSERT INTO dt_node_test {} VALUES ".format(node_columns_str) + node_col_str2[:-1]
self.insert(node_sql_batch2)
self.close()
else:
logger.debug('不存在需要修复的数据')
if __name__ == '__main__':
start_time = time.time()
LineMissingPoint('31196140-2a5f-11ed-bae8-aa1234567890').execute()
end_time = time.time()
print('')
print('程序累计用时{}秒'.format(end_time - start_time))
线上缺点修复
于 2023-05-09 15:19:58 首次发布