Python set
set() 函数创建一个无序不重复元素集,可进行关系测试,删除重复数据,还可以计算交集、差集、并集等。
注:set中无index
今天读取两个文件合并,并去重。文件格式如下:
下面程序在29行报错’set’ object is not callable(当前程序中注释掉的行),此错误表示您可能已将变量名称设置为set,如果这样做,则会覆盖内置函数集 built-in就会报错。改为现在的程序,就可以成功运行了。
#将aminerpa 路径下的两个文件合并加上id
savepath = "/Users/sun/PycharmProjects/meta_process/data/dbis"
aminerpa = "/Users/sun/PycharmProjects/meta_process/net_dbis"
def merge_entity2id(amipa,savepa):
entity1 = amipa + "/author_paper.txt"
entity2 = amipa + "/paper_conf.txt"
result = savepa + "/entity2id.txt"
idx = 0
outline = []
with open(result, 'w') as fr:
with open(entity1,'r') as fa:
for line in fa:
arr = line.strip().split("\t")
outline.append(arr[0])
outline.append(arr[1])
if idx == 0:
print outline
idx += 1
fa.close()
with open(entity2,'r') as fc:
for line in fc:
arr = line.strip().split("\t")
outline.append(arr[0])
outline.append(arr[1])
fc.close()
print len(outline)
#outlines = set(outline)
#out = list(outlines())
outlines = list(set(outline))
print len(outlines)
for index in range(len(outlines)):
out = outlines[index] + "\t" + str(index)
fr.write(out + "\n")
#fr.write(outlines[index] + "\t" + "".join(str(index) + "\n")
fr.close()
if __name__ == '__main__':
merge_entity2id(aminerpa,savepath)