import editdistance
def editdance_cluster(text_lists, thred=1):
clusters = []
for text in text_lists:
flag = False
for cluster in clusters:
for _text in cluster:
if editdistance.distance(text, _text)<=thred:
cluster.add(text)
flag=True
break
if flag:
break
if not flag:
clusters.append({text})
valid_clusters = [i for i in clusters if len(i)>1]
print('total clusters:{}, valid clusters:{}'.format(len(clusters), len(valid_clusters)))
return valid_clusters
if __name__=="__main__":
text_lists= ['粤A12345', '粤A12045', '粤A12046', '粤A0']
print('bigin cluster...')
valid_clusters=editdance_cluster(text_lists)
print(valid_clusters[:10])