作用是将不规范的地址,例如:北京海淀区西二旗xx院xx号;石家庄桥西区xx路xx号;清洗为规范的省市县+街道楼门号格式的结构化地址数据。
先上代码:
using (var context = new Entities())
{
// 使用前2个字符中匹配省级行政区划
var provs = context.region.Where(r => r.Grade == 0);
var prov = provs.SingleOrDefault(p => p.Alias.Contains(address.Substring(0, 2)));
region city;
if (prov == null)
{
// 省级区划未知时,尝试使用前四个字符进行市级行政区划反向查询
city = (from c in context.region.Where(r => r.Grade == 1 && address.Substring(0, 4).Contains(r.Alias))
let index = address.IndexOf(c.Alias, StringComparison.OrdinalIgnoreCase)
where index >= 0
orderby index
select c).FirstOrDefault();
if (city == null) return result.BadRequest();
prov = context.region.Single(r => r.ID == city.ParentId);
}
else
{
// 在已知的省级区划中匹配市级行政区划
city = (from c in context.region.Where(r => r.ParentId == prov.ID)
let index = address.IndexOf(c.Alias, StringComparison.OrdinalIgnoreCase)
where index >= 0
orderby index
select c).FirstOrDefault();
}
region county;
if (city == null)
{
// 市级区划未知时,在已知的省级区划中尝试使用县级行政区划反向查询
var citys = context.region.Where(r => r.ParentId == prov.ID);
county = (from c in context.region.Where(r => citys.Any(c => c.ID == r.ParentId))
let index = address.IndexOf(c.Alias, StringComparison.OrdinalIgnoreCase)
where index >= 0
orderby index
select c).FirstOrDefault();
if (county == null) return result.BadRequest();
city = context.region.Single(r => r.ID == county.ParentId);
}
else
{
// 在已知的市级区划中匹配县级行政区划
county = (from c in context.region.Where(r => r.ParentId == city.ID)
let index = address.IndexOf(c.Alias, StringComparison.OrdinalIgnoreCase)
where index >= 0
orderby index
select c).FirstOrDefault();
}
// 移除省市县三级行政区划字符串
var idx = address.IndexOf(county == null ? city.Alias : county.Alias, StringComparison.OrdinalIgnoreCase);
var street = address.Substring(idx).Replace(county == null ? city.Name : county.Name, "");
var region = new Address
{
Prov = prov.Name,
City = city.Name,
County = county?.Name,
Street = street
};
return result.Success(region);
}
原理很简单,就是利用简称查找匹配,最后输出标准名称。
不足的地方是:如最后县级行政区划不是按正式名称填写的话,输出的街道楼门号里面会去不掉这一级。
如原字符串为石家庄桥西xx路xx号,输出格式为:
省:河北省
市:石家庄市
县:桥西区
街道:桥西xx路xx号
不知道各位博友有没有好办法可以去掉这里的桥西二字。