import re
import time
yuanshi = [] # 存储原始数据
xin = [] # 存储新的数据
result = [] # 存储新数据集中没有在原始数据集中的数据
people = 0 # 计算数据集中数据的个数
读取原始数据
f = open(“zong.txt”, “r”, encoding=‘utf-8’)
对原始数据进行清洗
for line in f.readlines():
line = line.strip(‘\n’) # 去掉列表中每一个元素的换行符
people = people+1
yuanshi.append(line)
print(“总共有:”, people, “人”)
people = 0
读取新的数据
f = open(“xin.txt”, “r”, encoding=‘utf-8’)
for line in f.readlines():
line = line.strip(‘\n’) # 去掉列表中每一个元素的换行符
# re.search(“[0-9]{3}”,“qw345erty”)
tempture = re.search(“[0-9]”, line) # 去点含有数字的无用数据
if tempture == None:
tempture = re.search(“[‘X’, ‘青年大学习’, ‘安徽省青年大学习’, ‘参与人数排行榜’, ‘姓名’, ‘时间’]”, line) # 去掉其他的数据无用数据
if tempture == None:
xin.append(line)
people = people+1
print(“完成有:”, people, “人”)
查找出新数据集中没有在原始数据集中的数据
people = 0
for i in yuanshi:
k = 0
for j in xin:
if i == j:
k = 1
if k == 0:
result.append(i)
people = people+1
print(“未完成有:”, people, “人”)
print(“分别是”, result)
延时10s
a = time.time()
time.sleep(10)
b = time.time()
print(b-a)