1.固定格式求地点
location_str = ['长沙开区橄榄城小区']#data_zhuti_qumin_tq[1]#投诉长沙开区橄榄城小区车位违建#["徐汇区虹漕路461号58号楼5楼", "泉州市洛江区万安塘西工业区", "朝阳区北苑华贸城"]
import addressparser
df = addressparser.transform(location_str)
print(df)
结果
2.一段文本求地点
import paddlehub as hub
lac = hub.Module(name='lac')
test_text=['投诉长沙开区橄榄城小区车位违建']#,'衡阳县关市网吧常年收留未成年人上网']
inputs = {'text':test_text}
results = lac.lexical_analysis(data = inputs)
for result in results:
print(result['word'])
print(result['tag'])
loc = ''
for i in range(len(result['tag'])):
if result['tag'][i] == 'LOC':
loc += result['word'][i]
print(loc)
3.正则表达式相似文本求地点
import re
import paddlehub as hub
import addressparser
lac = hub.Module(name='lac')
def location(id): #输入问题ID可以求特定人群地点
loc_sum = '' #主题里抽取出所有提取的地点,后边识别会更完善
true = biaoge2['ID'] == id
for i in range(len(biaoge2[true].head())):
test_text=[list(biaoge2[true]['主题'])[i]]#,'网吧常年收留未成年人上网']
inputs = {'text':test_text}
results = lac.lexical_analysis(data = inputs)
for result in results:
loc = ''
for i in range(len(result['tag'])):
if result['tag'][i] == 'LOC':# or 'ORG':
loc += result['word'][i]
#print(loc)
loc_sum += loc
loc_str = [loc_sum]
loc_str_split = addressparser.transform(loc_str)
#对具体地址的处理
loc_str_split_dizhi = re.sub('([^市]+市|[^县]+县)','',str(list(loc_str_split['地址']))) #删除市和县的内容
loc_str_split_dizhi_list = list(set(re.findall('([^镇]+镇|[^村]+村|[^区]+区|[^街]+街)',loc_str_split_dizhi))) #提取具体坐标
loc_str_split_dizhi_easy = ''
for j in range(len(loc_str_split_dizhi_list)): #连接具体地址
loc_str_split_dizhi_easy += loc_str_split_dizhi_list[j]
loc_str_split['地址'] = loc_str_split_dizhi_easy
location =list(loc_str_split['市']+loc_str_split['区']+loc_str_split['地址'])[0]
return location
# print(result['word'])
# print(result['tag'])
location(1)