这道题有难度,要把这三个文件合并到一起。
文件一
1 161514631 T C|0.132632|(272,140,47,16)
2 222301193 A C|0.078624|(260,115,23,9)
3 89259567 A C|0.043716|(200,150,8,8)
4 55979552 G A|0.211921|(236,121,59,37)
6 26032069 C G|0.078212|(247,83,23,5)
6 93973579 A G|0.066667|(96,30,5,4)
7 87168642 G C|0.062016|(84,37,4,4)
文件二
2 222301193 A C|0.181507|(312,166,78,28)
6 93973579 A G|0.168981|(282,77,52,21)
14 102551631 T G|0.291411|(195,36,72,23)
19 54410004 C T|0.486056|(96,33,70,52)
20 30671857 C A|0.130909|(336,142,42,30)
文件三
2 222301193 A C|0.181507|(312,166,78,28)
17 78867580 C T|0.223464|(296,121,83,37)
19 2207712 C T|0.093567|(89,66,8,8)
19 54410004 C T|0.486056|(96,33,70,52)
20 30671857 C A|0.130909|(336,142,42,30)
要求合并成这样子:
Chr Position Ref test03_1 test03_2 test03_3
1 161514631 T C|0.132632|(272,140,47,16) - -
2 222301193 A C|0.078624|(260,115,23,9) C|0.181507|(312,166,78,28) C|0.181507|(312,166,78,28)
3 89259567 A C|0.043716|(200,150,8,8) - -
4 55979552 G A|0.211921|(236,121,59,37) - -
6 26032069 C G|0.078212|(247,83,23,5) - -
6 93973579 A G|0.066667|(96,30,5,4) G|0.168981|(282,77,52,21) -
7 87168642 G C|0.062016|(84,37,4,4) - -
14 102551631 T - G|0.291411|(195,36,72,23) -
19 54410004 C - T|0.486056|(96,33,70,52) T|0.486056|(96,33,70,52)
20 30671857 C - A|0.130909|(336,142,42,30) A|0.130909|(336,142,42,30)
17 78867580 C - - T|0.223464|(296,121,83,37)
19 2207712 C - - T|0.093567|(89,66,8,8)
我用了逐个合并的方法,合并一次生成一个字典。
像吃糖葫芦,一个一个吞
file1 = 'c:/Test3_1.info'
file2 = 'c:/Test3_2.info'
file3 = 'c:/Test3_3.info'
list_all = dict()
#list_title = []
#list_2d = {{'1':'2'}:'3'}
list_2d = {}
def read(file):
dictt = {}
array = []
fragment = []
f = open(file,"r")
try:
line = f.readlines()
array = line[:]
for i in array:
fragment.append(i.strip().split("\t"))
finally:
f.close()
for arr in fragment:
dictt[(arr)[0]+"\t"+(arr)[1]+"\t"+(arr)[2]] = (arr)[3]
return dictt
list1 = (read(file1))
list2 = (read(file2))
list3 = (read(file3))
list_all.update(list1)
list_all.update(list2)
list_all.update(list3)
list_all_2.update(list1)
list_all_2.update(list2)
list_all_2.update(list3)
#生成一个包含所有染色体的字典,值全是“-”
for k,v in list_all.items():
list_all[k] = "-"
for k,v in list_all_2.items():
list_all[k] = "-"
for k in list1:
for key in list_all:
if k in key:
list_all[key] = list1[k]
for k in list2:
for key in list_all_2:
if k in key:
list_all_2[key] = list2[k]
for k in list3:
for key in list_all_2:
if k in key:
list_all_3[key] = list3[k]
#生成key中包含list1的字典
list_all_all = []
for k,v in list_all.items():
list_all_all.append(str(k)+"\t"+str(v)+"\t")
dic_all_all = {}
for i in list_all_all:
dic_all_all[i] = "-"
#添加第2个文件!
for k in list2:
for key in dic_all_all:
if k in key:
dic_all_all[key] = list2[k]
#生成key中包含list1,2的字典
list_all_all2 = []
for k,v in dic_all_all.items():
list_all_all2.append(str(k)+"\t"+str(v)+"\t")
dic_all_all2 = {}
for i in list_all_all2:
dic_all_all2[i] = "-"
#添加第3个文件!
for k in list3:
for key in dic_all_all2:
if k in key:
dic_all_all2[key] = list3[k]
#print(dic_all_all)
#for k,v in list_all.items():
#print(k,v)
#for k,v in dic_all_all.items():
#print(k,v)
for k,v in dic_all_all2.items():
print(k,v)
with open("t3.txt","w") as f:
f.write("Chr\tPosition\tRef\ttest03_1\ttest03_2\ttest03_3\n")
#f.write(str(dic_all_all2))
for k,v in dic_all_all2.items():
f.write(str(k)+str(v)+"\n")
8.14 更新更好的解法,函数式编程
但是感觉还是不够模块化
以后能不能用class写一下?
import os
import re
path = 'c:/Users
os.chdir(path)
road = os.listdir(path)
def find_file(road):
file_list = []
regex = re.compile(r'Test3_\d.info')
for i in road:
m = re.findall(regex,i)
if m:
m = m[0]
file_list.append(m)
file_list.sort()
return file_list
def read(file):
dict_all = {}
dictt = {}
fragment = []
f = open(file,"r")
try:
line = f.readlines()
for i in line:
fragment.append(i.strip().split("\t"))
finally:
f.close()
for arr in fragment:
dictt[(arr)[0]+"\t"+(arr)[1]+"\t"+(arr)[2]] = (arr)[3]
return [line,dictt]
def update(file):
dict_all = {}
file_list = find_file(road)
for file in file_list:
dict_all.update(read(file)[1])
for k in dict_all:
dict_all[k] = ''
return dict_all
dict_all = update(file)
def add(road):
for f in find_file(road):
r = read(f)
for k in dict_all:
if k not in r[1]:
dict_all[k] +='\t' + '-'
else:
dict_all[k] += '\t' + r[1][k]
return dict_all
all = add(road)
with open("tt3.txt","w") as f:
f.write("Chr\tPosition\tRef\t")
for i in find_file(road):
f.write(i.split('.')[0]+'\t')
f.write('\t')
f.write('\n')
感觉代码可以维护了!