NCBI EST 文库格式转换
#!/usr/bin/env python
# -*- coding: utf-8 -*-
with open('1.txt', 'r') as f:
a = []
b = []
for num, line in enumerate(f):
if 'Lib' not in line:
line1 = line.strip().split()[-2]
b.append(int(line1))
for i in range(1, int(line1)+1):
a.append(num+i)
f.seek(0, 0)
print sum(b)
tissue = ''
for num, line in enumerate(f):
if 'Lib' not in line:
tissue = line.strip().split()[0]
if num in a:
line2 = line.strip('\t').split()
n = int(line2[0][4:])
if n < 1000:
print 'LIBEST_000%s\t%s\t%s\t%s' % (n, tissue, line2[-1], ' '.join(line2[1:-1]))
elif n < 10000:
print 'LIBEST_00%s\t%s\t%s\t%s' % (n, tissue, line2[-1], ' '.join(line2[1:-1]))
else:
print 'LIBEST_0%s\t%s\t%s\t%s' % (n, tissue, line2[-1], ' '.join(line2[1:-1]))