逆地理编码-离线版-part1

在数据处理过程中经常遇到经纬度需要解析成地址的问题,即逆地理编码。

如果为了获取高精度的数据,可以采用百度or高德的逆地理编码接口,但是个人请求量受限,企业版限额会高一些。

那有没有不限额的逆地理编码方案呢?

本文提供一种离线方案,实现经纬转地址信息的方案,精度为乡镇街道粒度,响应速度单核4ms/每条,能满足一般的逆地理编码需求。

本文分模块提供如下代码。

程序主入口为 getGeoInfo,具体参考如下代码。

from s2sphere import CellId

from s2sphere import LatLng

import GeoUtils
import S2Utils, LineUtils
from geo_obj import *
# pip --trusted-host pypi.tuna.tsinghua.edu.cn install s2sphere
from data_loader import boundaryAdminCell, boundaryIndex, boundaryData, adminData, streetData, cityBusinessArea, \
    cityLevelData

min_level = 12


def init(needBoundary=True, needArea=False, needStreet=True, needCityLevel=False):
    """
     按需初始化数据, 离线数据处理可以不加载该方法
    :param needBoundary:   是否需要加载边界数据,用于经纬度转换省市区县
    :param needArea:       是否需要加载商圈数据
    :param needStreet:     是否需要加载街道数据
    :param needCityLevel:  是否需要加载城市级别数据
    :return:
    """

    adminData
    if needBoundary:
        boundaryData
        boundaryIndex
        boundaryAdminCell

    if needStreet:
        streetData
    if needArea:
        cityBusinessArea
    if needCityLevel:
        cityLevelData


def determineAdminCode(lonIn: float, latIn: float, coordSys: CoordinateSystem = CoordinateSystem.GCJ02):
    gcj02LonLat = GeoUtils.toGCJ02(lonIn, latIn, coordSys)
    lon = gcj02LonLat[0]
    lat = gcj02LonLat[1]
    # print(lon, lat)
    # lon = 112.989382
    # lat = 28.147062

    s2LatLng = LatLng.from_degrees(lat, lon)
    id1 = CellId.from_lat_lng(s2LatLng).parent(min_level).id()

    id2 = CellId.from_lat_lng(s2LatLng).parent(min_level - 2).id()
    if GeoUtils.outOfChina(lon, lat):
        return -1
    elif boundaryAdminCell.get(id1) is not None:
        return boundaryAdminCell.get(id1)
    elif boundaryAdminCell.get(id2) is not None:
        return boundaryAdminCell.get(id2)
    else:
        keys = []
        maxLevel = 2000  # 必须大于2000m,否则会出现格子半径过小选择错误问题
        # 最远距离 为新疆哈密市80公里
        while len(keys) == 0 and maxLevel < 200000:
            newKeys = S2Utils.getCellId(s2LatLng, maxLevel, min_level)
            # newKeys = []
            for key in newKeys:
                if boundaryIndex.get(key) is not None:
                    # print("flatmap", key, maxLevel, boundaryIndex.get(key))
                    keys.extend(boundaryIndex.get(key))

            maxLevel = maxLevel * 2

        def fun_(key, s2LatLng):
            # 修改成 getutils distance
            # return (key, CellId(key).to_lat_lng().get_distance(s2LatLng))
            # return key, GeoUtils.get_earth_distance(CellId(key).to_lat_lng(), s2LatLng)
            latlng = CellId(key).to_lat_lng()
            dis = GeoUtils.get_earth_dist(latlng, s2LatLng)
            return key, dis

        # print('newKeys', newKeys)
        if len(keys) > 0:
            # lines1 = map(lambda key: fun_(key, s2LatLng),newKeys)
            lines1 = []
            for key in keys:
                lines1.append(fun_(key, s2LatLng))
            lines1.sort(key=lambda x: x[1])
            # take 5
            lines1_tak5 = lines1[0:5]
            # print("lines1_tak5 ", lines1_tak5)
            res1 = []
            # flatmap
            for startPoint in lines1_tak5:
                if boundaryData.get(startPoint[0]) is not None:
                    values = boundaryData.get(startPoint[0])
                    for value in values:
                        res1.extend(
                            [(startPoint[0], value[0], value[1], True), (startPoint[0], value[0], value[2], False)])
            lines1 = []
            for line in res1:
                start = CellId(line[0]).to_lat_lng()
                end = CellId(line[1]).to_lat_lng()
                dis = LineUtils.pointToLineDis(start.lng().degrees, start.lat().degrees, end.lng().degrees,
                                               end.lat().degrees, lon, lat)
                lines1.append((((start.lng().degrees, start.lat().degrees), (end.lng().degrees, end.lat().degrees),
                                line[2], line[3]), dis))
            # print("lines1 ", lines1)
            minDis = min([line[-1] for line in lines1])
            lines = [line[0] for line in lines1 if line[-1] == minDis]
            from itertools import groupby
            result = groupby(sorted(lines), key=lambda x: x[0])
            max_th = 0
            tp = []
            for key, group in result:
                group_list = list(group)
                if len(group_list) > max_th:
                    tp = group_list
                    max_th = len(group_list)
                elif len(group_list) == max_th:
                    pass
                    # tp.extend(group_list)
                # print(key, len(group_list))
            # print("lines ", tp)

            if len(tp) == 1:  # 国内海外边界
                line1 = tp[0]
                start = line1[0]
                end = line1[1]
                # 三点用行列式判断旋转方向
                angle = (start[0] - lon) * (end[1] - lat) - (end[0] - lon) * (start[1] - lat)
                if (angle < 0) == line1[3]:
                    return line1[2]
                else:
                    return -1
            elif len(tp) == 2:  # 两条线段,如果终点不同,则一定是国内和海外,并且点到线段距离最短点为起点,终点相同,则为国内两个区域边界
                line1 = tp[0]
                line2 = tp[-1]
                #  终点相同,为国内两个相邻区域,终点不同,为国界线
                start = line1[0] if (line1[1].__eq__(line2[1])) else line2[1]
                end = line1[1]
                #  三点用行列式判断旋转方向
                angle = (start[0] - lon) * (end[1] - lat) - (end[0] - lon) * (start[1] - lat)
                if (angle < 0) == line1[3]:
                    return line1[2]
                elif line1[1] == line2[1] and line1[3] != line2[3]:
                    return line2[2]
                else:
                    return -1
            else:  # 多区域顶点 判断
                #
                res1 = groupby(sorted(tp, key=lambda x: int(x[2])), key=lambda x: int(x[2]))
                res2 = []
                for key, group in res1:
                    group_list = list(group)
                    # print(len(group_list),group_list)
                    # continue
                    line1 = [s for s in group_list if s[3] == True][0]
                    line2 = [s for s in group_list if s[3] != True][0]
                    start = line2[1]
                    end = line1[1]
                    point = line1[0]
                    dis1 = LineUtils.lineDis(start[0], start[1], point[0], point[1])
                    dis2 = LineUtils.lineDis(end[0], end[1], point[0], point[1])
                    if dis1 > dis2:
                        start = (
                        point[0] + dis2 / dis1 * (start[0] - point[0]), point[1] + dis2 / dis1 * (start[1] - point[1]))
                    else:
                        end = (
                        point[0] + dis1 / dis2 * (end[0] - point[0]), point[1] + dis1 / dis2 * (end[1] - point[1]))
                    angle = (start[0] - lon) * (end[1] - lat) - (end[0] - lon) * (start[1] - lat)
                    res2.append((key, angle))
                res2 = sorted(res2, key=lambda x: x[1])
                return res2[0][0]
        else:
            return -1
    return -1


def determineAdmin(lon: float, lat: float, needStreet=False, coordSys: CoordinateSystem = CoordinateSystem.GCJ02):
    wgs84LonLat = GeoUtils.toWGS84(lon, lat, coordSys)
    code = determineAdminCode(wgs84LonLat[0], wgs84LonLat[1])
    if code != -1:
        district = adminData.get(code)
        if district.level == 'district':
            city = adminData.get(district.parentId)
        else:
            city = district

        if city.level == 'city':
            province = adminData.get(city.parentId)
        else:
            province = city

        street_code = 0
        street_name = ""
        min_dis = 100000000
        if needStreet:
            if len(district.children) > 0:
                for s in district.children:
                    value = streetData.get(s)
                    dis = GeoUtils.distance(value.center, Location(wgs84LonLat[0], wgs84LonLat[1]))
                    if min_dis > dis:
                        min_dis = dis
                        street_code = value.id
                        street_name = value.name

        if street_code > 0:
            return Admin.createStreet(province.name, city.name, district.name, street_name, province.id, city.id,
                                      district.id, street_code, district.center)
        else:
            return Admin.createDistrict(province.name, city.name, district.name, province.id, city.id, district.id,
                                        district.center)
    else:
        return Admin.createOversea()
    return code


def getCityLevelByAdmin(admin: Admin):
    return getCityLevel(str(admin.cityCode))


def getCityLevel(adcode_or_name: str):

    cityLevel = cityLevelData.get(adcode_or_name)
    if cityLevel is not None:
        return cityLevel
    return "未知"


def getGeoInfo(lng,lat):
    """

    :param lng:
    :param lat:
    :return:
     country: str,
     province: str,
     city: str,
     district: str,
     town: str,
     level: str,
     countryCode: str,
     provinceCode: int,
     cityCode: int,
     districtCode: int,
     townCode: int,
     center: Location
     adcode_or_name:
    """
    res = determineAdmin(lng, lat, needStreet=True, coordSys=CoordinateSystem.GCJ02)
    cityLevel = getCityLevelByAdmin(res)
    return cityLevel,res.country,res.province,res.city,res.district,res.town,res.level


if __name__ == '__main__':
    # res = determineAdmin(112.989382, 28.147062, CoordinateSystem.WGS84)
    init(needArea=True, needCityLevel=True)

    # res = determineAdmin(120.152983, 36.119759, coordSys=CoordinateSystem.GCJ02)
    # res = determineAdmin(116.9565868378, 39.6513677208, needStreet=True, coordSys=CoordinateSystem.WGS84)
    # res = determineAdmin(85.5670166016, 41.5548386631, needStreet=True, coordSys=CoordinateSystem.WGS84)
    # print(res)
    for e in [
        (87.6, 43.8),  # 新疆
        (114.2,22.3),  # 香港
        (118.4,24.8),  # 福建
    ]:
        lng, lat = e
        print('start', e)
        res = determineAdmin(lng, lat, needStreet=True, coordSys=CoordinateSystem.GCJ02)
        cityLevel = getCityLevelByAdmin(res)
        print(cityLevel, res)

    # 85.5670166016, 41.5548386631

参考:

1、本文主要参考GitHub: lnglat2Geo项目,该项目为scala版本。
本人花费大量时间改造成python版,请移步:https://github.com/matiji66/py-lnglat2Geo.git
2、s2-geometry:更高级的地理编码,采用球体编码,geohash采用二维编码
详见官网:https://s2geometry.io/
针对S2Geometry解释: https://blog.csdn.net/qq_43777978/article/details/116800460
3、s2sphere :python implementation of s2-geometry library
详见:https://s2sphere.readthedocs.io/en/latest/
4、逆地理编码线上测试:
https://developer.amap.com/demo/javascript-api/example/geocoder/regeocoding
5、【特征工程】处理经纬度的9种方法/技巧(转载)
https://blog.csdn.net/weixin_43615654/article/details/103495379

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

mtj66

看心情

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值