import re
import pandas as pd
import os
import glob
def building_discriminate(s):
try:
d = re.search("号(.*(座|大厦|栋|楼|中心|广场))", s)
return re.sub('\d+楼','',d.group(1))\
.replace('蔡屋围', '')\
.replace('清水河', '')\
.replace('笋岗', '')\
.replace('一楼', '') \
.replace('二楼', '') \
.replace('三楼', '') \
.replace('四楼', '') \
.replace('五楼', '') \
.replace('六楼', '') \
.replace('七楼', '') \
.replace('八楼', '') \
.replace('九楼', '') \
.replace('A栋', '') \
.replace('B栋', '') \
.replace('C栋', '') \
.replace('D栋', '')\
.replace('1栋','') \
.replace('2栋','') \
.replace('3栋','') \
.replace('4栋','') \
.replace('5栋','')\
.replace('6栋','')\
.replace('7栋','')\
.replace('8栋','')\
.replace('9栋','')\
.replace('北楼','')\
.replace('南楼','')\
.replace('西楼','')\
.replace('东楼','')\
.replace('A座','')\
.replace('B座','')\
.replace('C座','')\
.replace('D座','')\
.replace('1号楼','')\
.replace('2号楼','')\
.replace('3号楼','')\
.replace('4号楼','')\
.replace('5号楼','')\
.replace('6号楼','')\
.replace('7号楼','')\
.replace('8号楼','')\
.replace('9号楼','')\
.replace('10号楼','')
except:
try:
d = re.search("(侧|角|街道|路|社区)(.*(座|大厦|栋|楼|中心|广场)+([A-Z]座)?)", s)
return re.sub('.*社区','',re.sub('\d+层','',re.sub('\d+楼', '', d.group(2)))) \
.replace('蔡屋围', '') \
.replace('清水河', '') \
.replace('笋岗', '').replace('社区', '') \
.replace('一楼', '') \
.replace('二楼', '') \
.replace('三楼', '') \
.replace('四楼', '') \
.replace('五楼', '') \
.replace('六楼', '') \
.replace('七楼', '') \
.replace('八楼', '') \
.replace('九楼', '')\
.replace('A栋', '') \
.replace('B栋', '') \
.replace('C栋', '') \
.replace('D栋', '') \
.replace('1栋','') \
.replace('2栋','') \
.replace('3栋','') \
.replace('4栋','') \
.replace('5栋','') \
.replace('6栋','')\
.replace('7栋','')\
.replace('8栋','')\
.replace('9栋','')\
.replace('北楼','')\
.replace('南楼','')\
.replace('西楼','')\
.replace('东楼','')\
.replace('A座','')\
.replace('B座','')\
.replace('C座','')\
.replace('D座','')\
.replace('1号楼','')\
.replace('2号楼','')\
.replace('3号楼','')\
.replace('4号楼','')\
.replace('5号楼','')\
.replace('6号楼','')\
.replace('7号楼','')\
.replace('8号楼','')\
.replace('9号楼','')\
.replace('10号楼','')
except:
return 'NULL'
# 遍历指定目录下的所有 Excel 文件
for file_path in glob.glob('C:/Users/win10/Documents/*.xlsx'):
# 读取 Excel 文件到 DataFrame
df = pd.read_excel(file_path)
# 在 DataFrame 中添加新列
df['楼宇'] = df['企业地址'].map(building_discriminate, na_action='ignore')
# 将 DataFrame 写入原始 Excel 文件
df.to_excel(file_path, index=False)
python识别提取楼宇地址
最新推荐文章于 2024-03-15 11:12:35 发布