chr1AL-3889054 2870 3970 + Traes_1AL_0007E2761.1
chr1AL-3889054 3220 3970 + Traes_1AL_0007E2761.2
chr1AL-3877729 317 552 + Traes_1AL_002FAE6E8.1
chr1AL-3947740 1 401 - Traes_1AL_00485097A.1
chr1AL-3947740 1 233 - Traes_1AL_00485097A.2
from Bio import SeqIO
record_dict = SeqIO.index('URGI_HC.fasta', "fasta")
name = open('result_1.txt')
for lines in name:
lines = lines.strip().split()
gene_name, start, end, strand, CDS_name = lines
if gene_name in record_dict.keys():
if strand == '+' and int(start) > 2000:
print('>%s\n%s' % (CDS_name,record_dict[gene_name][int(start)-2001:int(end)].seq))
elif strand == '+' and int(start) <= 2000:
print('>%s\n%s' % (CDS_name, record_dict[gene_name][:int(end)].seq))
elif strand == '-' and int(end) > 2000:
print '>%s\n%s' % (CDS_name, record_dict[gene_name][int(start)-1:int(end)+2000].seq.reverse_complement())
elif strand == '-' and int(end) <= 2000:
print '>%s\n%s' % (CDS_name, record_dict[gene_name][int(start) - 1:].seq.reverse_complement())
else:
print('%s not found' % gene_name)