#!/usr/bin/env python
# -*- coding: utf-8 -*-
tmp = open('2.txt', 'w')
with open('147389_transcript.fa.gff3', 'r') as f:
for line in f:
if '#' not in line:
line1 = line.strip().split()
if line1[2] == 'mRNA':
tmp.write(line.strip() + '\t@\t',)
if line1[2] == 'exon':
tmp.write(line.strip() + '\t@\t', )
elif '###' in line:
tmp.write('\n', )
tmp.close()
a = []
for l in open('2.txt', 'r'):
new_l = l.strip().split()
a.append(new_l)
a.sort(key=lambda x: (x[0], int(x[3]), int(x[4])))
for i in a:
for m in i:
if '@' == m:
print '\n',
else:
print m,
输入文件格式
##gff-version 3
# Generated by GMAP version 2016-06-03 using call: gmapl.avx2 -D /data2/masw_data/ -d NRGenome --trim-end-exons=10 -t 8 --canonical-mode=2 --allow-close-indels=2 -B 5 -f 2 -n 1 --min-trimmed-coverage=0.5 --min-identity=0.90 147389_transcript.fa
chr3D NRGenome gene 256151590 256152989 . - . ID=gi|478413622|gb|GAEF01000004.1|.path1;Name=gi|478413622|gb|GAEF01000004.1|
chr3D NRGenome mRNA 256151590 256152989 . - . ID=gi|478413622|gb|GAEF01000004.1|.mrna1;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.path1;coverage=99.7;identity=97.3;matches=611;mismatches=9;indels=8;unknowns=0
chr3D NRGenome exon 256152745 256152989 99 - . ID=gi|478413622|gb|GAEF01000004.1|.mrna1.exon1;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.mrna1;Target=gi|478413622|gb|GAEF01000004.1| 3 247 +
chr3D NRGenome exon 256151590 256151964 96 - . ID=gi|478413622|gb|GAEF01000004.1|.mrna1.exon2;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.mrna1;Target=gi|478413622|gb|GAEF01000004.1| 248 630 +
chr3D NRGenome CDS 256152745 256152988 99 - 0 ID=gi|478413622|gb|GAEF01000004.1|.mrna1.cds1;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.mrna1;Target=gi|478413622|gb|GAEF01000004.1| 4 247 +
chr3D NRGenome CDS 256151826 256151964 93 - 1 ID=gi|478413622|gb|GAEF01000004.1|.mrna1.cds2;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.mrna1;Target=gi|478413622|gb|GAEF01000004.1| 248 393 +
###
chr6B NRGenome gene 393753187 393758971 . + . ID=gi|478413624|gb|GAEF01000002.1|.path1;Name=gi|478413624|gb|GAEF01000002.1|
chr6B NRGenome mRNA 393753187 393758971 . + . ID=gi|478413624|gb|GAEF01000002.1|.mrna1;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.path1;coverage=100.0;identity=97.2;matches=1041;mismatches=23;indels=7;unknowns=0
chr6B NRGenome exon 393753187 393753348 100 + . ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon1;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 1 162 +
chr6B NRGenome exon 393753915 393754065 98 + . ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon2;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 163 313 +
chr6B NRGenome exon 393754389 393754500 97 + . ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon3;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 314 425 +
chr6B NRGenome exon 393754907 393754973 97 + . ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon4;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 426 492 +
chr6B NRGenome exon 393755046 393755095 98 + . ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon5;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 493 542 +
chr6B NRGenome exon 393755173 393755233 98 + . ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon6;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 543 603 +
chr6B NRGenome exon 393757730 393757814 98 + . ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon7;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 604 688 +
chr6B NRGenome exon 393758596 393758971 95 + . ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon8;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 689 1071 +
chr6B NRGenome CDS 393755180 393755233 98 + 0 ID=gi|478413624|gb|GAEF01000002.1|.mrna1.cds1;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 550 603 +
chr6B NRGenome CDS 393757730 393757814 98 + 0 ID=gi|478413624|gb|GAEF01000002.1|.mrna1.cds2;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 604 688 +
chr6B NRGenome CDS 393758596 393758734 91 + 1 ID=gi|478413624|gb|GAEF01000002.1|.mrna1.cds3;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 689 834 +
###
chr1B NRGenome gene 538352839 538354283 . + . ID=gi|478413623|gb|GAEF01000003.1|.path1;Name=gi|478413623|gb|GAEF01000003.1|
chr1B NRGenome mRNA 538352839 538354283 . + . ID=gi|478413623|gb|GAEF01000003.1|.mrna1;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.path1;coverage=99.9;identity=95.1;matches=641;mismatches=22;indels=11;unknowns=0
chr1B NRGenome exon 538352839 538353126 97 + . ID=gi|478413623|gb|GAEF01000003.1|.mrna1.exon1;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.mrna1;Target=gi|478413623|gb|GAEF01000003.1| 2 289 +
chr1B NRGenome exon 538353906 538354283 93 + . ID=gi|478413623|gb|GAEF01000003.1|.mrna1.exon2;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.mrna1;Target=gi|478413623|gb|GAEF01000003.1| 290 672 +
chr1B NRGenome CDS 538353046 538353126 98 + 0 ID=gi|478413623|gb|GAEF01000003.1|.mrna1.cds1;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.mrna1;Target=gi|478413623|gb|GAEF01000003.1| 209 289 +
chr1B NRGenome CDS 538353906 538353995 96 + 0 ID=gi|478413623|gb|GAEF01000003.1|.mrna1.cds2;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.mrna1;Target=gi|478413623|gb|GAEF01000003.1| 290 379 +
###
chr3D NRGenome gene 256154707 256157108 . - . ID=gi|478413625|gb|GAEF01000001.1|.path1;Name=gi|478413625|gb|GAEF01000001.1|
chr3D NRGenome mRNA 256154707 256157108 . - . ID=gi|478413625|gb|GAEF01000001.1|.mrna1;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.path1;coverage=100.0;identity=98.7;matches=591;mismatches=8;indels=0;unknowns=0
chr3D NRGenome exon 256157043 256157108 100 - . ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon1;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 1 66 +
chr3D NRGenome exon 256156361 256156457 100 - . ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon2;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 67 163 +
chr3D NRGenome exon 256155580 256155743 96 - . ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon3;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 164 327 +
chr3D NRGenome exon 256155344 256155455 99 - . ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon4;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 328 439 +
chr3D NRGenome exon 256154872 256154938 100 - . ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon5;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 440 506 +
chr3D NRGenome exon 256154707 256154799 97 - . ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon6;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 507 599 +
chr3D NRGenome CDS 256155580 256155658 93 - 0 ID=gi|478413625|gb|GAEF01000001.1|.mrna1.cds1;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 249 327 +
chr3D NRGenome CDS 256155344 256155455 99 - 1 ID=gi|478413625|gb|GAEF01000001.1|.mrna1.cds2;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 328 439 +
chr3D NRGenome CDS 256154914 256154938 100 - 2 ID=gi|478413625|gb|GAEF01000001.1|.mrna1.cds3;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 440 464 +
###
chr6B NRGenome gene 393753187 393755138 . + . ID=gi|478413621|gb|GAEF01000005.1|.path1;Name=gi|478413621|gb|GAEF01000005.1|
chr6B NRGenome mRNA 393753187 393755138 . + . ID=gi|478413621|gb|GAEF01000005.1|.mrna1;Name=gi|478413621|gb|GAEF01000005.1|;Parent=gi|478413621|gb|GAEF01000005.1|.path1;coverage=100.0;identity=98.3;matches=575;mismatches=10;indels=0;unknowns=0
chr6B NRGenome exon 393753187 393753348 100 + . ID=gi|478413621|gb|GAEF01000005.1|.mrna1.exon1;Name=gi|478413621|gb|GAEF01000005.1|;Parent=gi|478413621|gb|GAEF01000005.1|.mrna1;Target=gi|478413621|gb|GAEF01000005.1| 1 162 +
chr6B NRGenome exon 393753915 393754065 98 + . ID=gi|478413621|gb|GAEF01000005.1|.mrna1.exon2;Name=gi|478413621|gb|GAEF01000005.1|;Parent=gi|478413621|gb|GAEF01000005.1|.mrna1;Target=gi|478413621|gb|GAEF01000005