用户输入地址不可能一定规范,如按习惯省略掉:“省”、“市”、“区”等关键字,此时安装正则匹配很容易查找不到正确的地址。
以下代码按照用户输入的先后顺序,相同的词组进行匹配,可靠性与适配性大大提高,记录于此以供参考:
def get_area_code(biz_address_code):
"""获取省市编码"""
print(biz_address_code, type(biz_address_code))
data, error_info = None, None
# 判断传入数据是否为空
if (not biz_address_code) or (not isinstance(biz_address_code, str)):
error_info = '根据 开户银行省市:“{}” ,获取省市编码失败,请按规范填写!'.format(biz_address_code)
return None, error_info
num = -1 # 定义开始行数
# 打开对照表
comparison_table = os.path.join(BASE_PATH, 'static', '省市区编号对照表.xlsx')
sheet = xlrd.open_workbook(comparison_table)
table = sheet.sheets()[0]
cols = table.col_values(1)
res_address = None # 终值
res_weight = 0 # 权值
for regions in cols:
num += 1
region_list = regions.split(',') # eg: ['中国', '', '天津市', '河东区']
address_str = ''
weight = 0
data = table.cell(num, 0).value
biz_address = biz_address_code
for index, region in enumerate(region_list): # 使用index作为权值
if not region:
continue
# 找到大于两个字符的共同部分
ret = ''
for zip_li in zip(region, biz_address):
if len(set(zip_li)) == 1:
ret += zip_li[0]
else:
break
if len(ret) >= 2:
weight += index
address_str += region
biz_address = biz_address.replace(ret, '') # 将匹配到的部分去除掉,理论上该匹配没有问题
if res_weight < weight:
res_weight = weight
res_address = data
if not res_address:
error_info = '根据 开户银行省市:“{}”,获取省市编码失败,请按规范填写!'.format(biz_address_code)
return res_address, error_info
else:
res_address = str(int(res_address))
return res_address, error_info**
省市区编号对照表.xlsx 是记录好所有的地区编号对应信息的excel表格,如下入所示: