此代码是结合代码11的
需要将正例即为1的query减少一半
f0 = open('/home/xbwang/croschangetrain/data/sick/train/a.toks','r') f1 = open('/home/xbwang/croschangetrain/data/sick/train/b.toks','r') f2 = open('/home/xbwang/croschangetrain/data/sick/train/sim.txt','r') a = f0.readlines() b = f1.readlines() c = f2.readlines() length = len(a) lis = [] for i in range(length): lis.append(a[i]+'==='+b[i]+'==='+c[i]) num = 0 for j in range(4500): label = lis[j].split('===')[2] if (num == 2001): break if (label == '1\n'): lis.pop(j) num = num+1 f3 = open('/home/xbwang/Desktop/a','a') f4 = open('/home/xbwang/Desktop/b','a') f5 = open('/home/xbwang/Desktop/c','a') length1 = len(lis) for k in range(length1): tag = lis[k].split('===') f3.write(tag[0]) f4.write(tag[1]) f5.write(tag[2])