# 删除rating_add_user中图谱kg中没有出现的item信息
def user_rating():
ftele1 = open("./mvlens.txt", 'rb')
ftele2 = open("./item_index2entity_id.txt", 'rb')
# f1line = ftele1.readline() # 是否跳过第一行表头
# f2line = ftele2.readline()
lines1 = ftele1.readlines()
lines2 = ftele2.readlines()
list1_user = []
list1_item = []
list1_rating = []
list1_time = []
lines = []
# list2_user = []
list2_item = []
# list2_rating = []
for line in lines1:
elements = line.split()
list1_user.append(str(elements[0].decode("utf-8")))
list1_item.append(str(elements[1].decode("utf-8")))
list1_rating.append(str(elements[2].decode("utf-8")))
list1_time.append(str(elements[3].decode("utf-8")))
for line in lines2:
elements = line.split()
# list2_user.append(str(elements[0].decode("utf-8")))
list2_item.append(str(elements[0].decode("utf-8")))
# list2_rating.append(str(elements[2].decode("utf-8")))
num = 0 # 记录匹配的个数
print(len(list1_item))
for i in range(len(list1_item)): # 按索引方式遍历姓名列表1
s = ''
if list1_item[i] in list2_item:
# j = list2_item.index(list1_item[i]) # 按列表1中的姓名找到列表2中的对应索引
s = '\t'.join([list1_user[i], list1_item[i], list1_rating[i], list1_time[i], ])
s += '\n'
num = num + 1
# else:
# s = '\t'.join([list1_user[i], list1_item[i], list1_rating, str('----')])
# s += '\n'
lines.append(s)
# 处理姓名列表2中剩余的姓名
# for i in range(len(list1_user)):
# s = ''
# if list2_item[i] not in list1_user:
# s = '\t'.join([list2_item[i], str('----'), list2_index[i], list2_age[i], list2_jobid[i]])
# s += '\n'
# lines.append(s)
ftele3 = open("movielensRating.txt", 'w')
ftele3.writelines(lines)
ftele3.close()
ftele2.close()
ftele1.close()
print("合并已完成")
print(num)
user_rating()
python联合两个表数据(两个表中共有的数据)
最新推荐文章于 2024-07-05 11:38:44 发布