如何通过业务地址规范化后构建标准地址库-Java版

如何自动化构建中文标准地址库

地名作为最常用的社会公共信息,不仅与人们的日常生活息息相关,而且是政府行政行为、经济建设不可缺少的基础信息资源。在国家信息化体系中,地名是不可或缺的重要节点和桥梁,在信息传递中发挥着重要作用。

源码学习
https://gitee.com/addresstool/address

上干货-直接鲁代码

一、构建中文地址库

		// 地址工具初始化
		DataTable data = new DataTable();
		//输入标准地址
       	HashMap<String,String> address5 = new HashMap<>();
//        address5.put("province","江苏省");;
//        address5.put("city","南京市");
        address5.put("county","江宁区");
        address5.put("town","汤山街道");
        address5.put("community","中前社区");
        address5.put("aoi","大明湖畔");
        address5.put("alias_aois","乾清宫");
        address5.put("sub_aoi","北苑");
        address5.put("road","宏运大道");
        address5.put("road_no","123");
        address5.put("alias_roads","天地大道#金山大道:9");  // 道路别名
        address5.put("building","9");
        address5.put("unit","1");
        address5.put("room","1001");
        address5.put("id","5");
        data.addAddressDic(address5);
        HashMap<String,String> address6 = new HashMap<>();
//        address6.put("province","江苏省");
//        address6.put("city","南京市");
        address6.put("county","江宁区");
        address6.put("town","汤山街道");
        address6.put("community","中前社区");
        address6.put("aoi","大明湖畔");
        address6.put("alias_aois","乾清宫");
        address6.put("sub_aoi","北苑");
        address6.put("road","宏运大道");
        address6.put("road_no","123");
        address6.put("alias_roads","天地大道#金山大道:9");  // 道路别名
        address6.put("building","9");
        address6.put("unit","2");
        address6.put("room","1001");
        address6.put("id","6");
        data.addAddressDic(address6);

        HashMap<String,String> address7 = new HashMap<>();
        address7.put("building","9");
        address7.put("unit","2");
        address7.put("room","1001");
        address7.put("id","7");
        data.addAddressDic(address7);

        HashMap<String,String> address8 = new HashMap<>();
        address8.put("building","9");
        address8.put("unit","2");
        address8.put("room","1001");
        address8.put("id","8");
        data.addAddressDic(address8);

        System.out.println("原始地址信息");
        data.printData();
        System.out.println("剔除垃圾地址");
        data.addressFilter();
        data.printData();
        System.out.println("补全行政区");
        data.completion();
        data.printData();
        System.out.println("标准地址表  最终成果");
        data.addressFix();
        data.printData();

数据打印

原始地址信息
5={room_id=5, town=汤山街道, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=1, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5}
6={room_id=6, town=汤山街道, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=2, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6}
7={room_id=7, unit=2, id=7, type=room, building=9, room=1001}
8={room_id=8, unit=2, id=8, type=room, building=9, room=1001}
剔除垃圾地址
5={room_id=5, town=汤山街道, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=1, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5, is_address=1}
6={room_id=6, town=汤山街道, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=2, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6, is_address=1}
补全行政区
5={room_id=5, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=1, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5, is_address=1}
6={room_id=6, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=room, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=2, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6, is_address=1}
标准地址表  最终成果
5={room_id=5, building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=room, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=1, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5, is_address=1, unit_id=5_unit}
6={room_id=6, building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=room, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, building=9, room=1001, unit=2, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6, is_address=1, unit_id=6_unit}
5_bld={building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=building, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, building=9, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5_bld, is_address=1}
6_unit_sub={subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=sub_aoi, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6_unit_sub, is_address=1}
6_unit_sub_aoi={town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=aoi, aoi_id=6_unit_sub_aoi, alias_roads=天地大道#金山大道:9, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, aoi=大明湖畔, id=6_unit_sub_aoi, is_address=1}
6_unit={building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=unit, aoi_id=6_unit_sub, alias_roads=天地大道#金山大道:9, building=9, unit=2, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=6_unit, is_address=1, unit_id=6_unit}
5_unit={building_id=5_bld, subaoi_id=6_unit_sub, town=汤山街道, city=南京市, county=江宁区, community=中前社区, type=unit, aoi_id=6_unit_sub, alias_roads=天地大道#金山大道:9, building=9, unit=1, province=江苏省, road=宏运大道, road_no=123, alias_aois=乾清宫, sub_aoi=北苑, aoi=大明湖畔, id=5_unit, is_address=1, unit_id=5_unit}

如图,已成功进行自动化的垃圾地址过滤和正常地址行政区划补充,最终完美生成中文标准地址库。

二、业务地址关联标准地址库

		AddressTool ss = new AddressTool();
        // 将加工好的地址库写入到addresstool中
        data.initData(ss);

        System.out.println(ss.getStdAddress("大明湖畔北苑9-1-1001"));
        System.out.println(ss.getStdAddress("花果山大道大明湖畔北苑9-1-1001"));
        System.out.println(ss.getStdAddress("花果山大道大明湖畔9-1-1001"));
        System.out.println(ss.getStdAddress("金山大道大明湖畔"));

        SimpleDateFormat formatter= new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z");
        Date date = new Date(System.currentTimeMillis());
        System.out.println(formatter.format(date));
        for(int i=0;i<200000;i++){
            ss.getStdAddress("金山大道大明湖畔9-1-1001");
        }
		System.out.println(ss.getStdAddress("大明湖畔9-1-1001"));
        date = new Date(System.currentTimeMillis());
        System.out.println(formatter.format(date));

结果打印

{5=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔北苑91单元1001}
{5=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔北苑91单元1001}
{5=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔北苑91单元1001}
{6_unit_sub_aoi=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔}
开始时间2024-03-26 at 13:51:26 CST
{5=江苏省南京市江宁区汤山街道中前社区宏运大道123号大明湖畔北苑91单元1001}
结束时间2024-03-26 at 13:51:34 CST

实测速度25000条/秒
使用中有问题或者建议,欢迎联系邮箱addresstool@163.com

java资源下载

https://download.csdn.net/download/u011024436/89035851

源码学习
https://gitee.com/addresstool/address

  • 3
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

addresstool

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值