GFF3文件按照染色体位置排序

#!/usr/bin/env python
# -*- coding: utf-8 -*-

tmp = open('2.txt', 'w')
with open('147389_transcript.fa.gff3', 'r') as f:
    for line in f:
        if '#' not in line:
            line1 = line.strip().split()
            if line1[2] == 'mRNA':
                tmp.write(line.strip() + '\t@\t',)
            if line1[2] == 'exon':
                tmp.write(line.strip() + '\t@\t', )
        elif '###' in line:
            tmp.write('\n', )
tmp.close()


a = []
for l in open('2.txt', 'r'):
    new_l = l.strip().split()
    a.append(new_l)

a.sort(key=lambda x: (x[0], int(x[3]), int(x[4])))

for i in a:
    for m in i:
        if '@' == m:
            print '\n',
        else:
            print m,

输入文件格式


##gff-version   3
# Generated by GMAP version 2016-06-03 using call:  gmapl.avx2 -D /data2/masw_data/ -d NRGenome --trim-end-exons=10 -t 8 --canonical-mode=2 --allow-close-indels=2 -B 5 -f 2 -n 1 --min-trimmed-coverage=0.5 --min-identity=0.90 147389_transcript.fa
chr3D    NRGenome    gene    256151590    256152989    .    -    .    ID=gi|478413622|gb|GAEF01000004.1|.path1;Name=gi|478413622|gb|GAEF01000004.1|
chr3D    NRGenome    mRNA    256151590    256152989    .    -    .    ID=gi|478413622|gb|GAEF01000004.1|.mrna1;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.path1;coverage=99.7;identity=97.3;matches=611;mismatches=9;indels=8;unknowns=0
chr3D    NRGenome    exon    256152745    256152989    99    -    .    ID=gi|478413622|gb|GAEF01000004.1|.mrna1.exon1;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.mrna1;Target=gi|478413622|gb|GAEF01000004.1| 3 247 +
chr3D    NRGenome    exon    256151590    256151964    96    -    .    ID=gi|478413622|gb|GAEF01000004.1|.mrna1.exon2;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.mrna1;Target=gi|478413622|gb|GAEF01000004.1| 248 630 +
chr3D    NRGenome    CDS    256152745    256152988    99    -    0    ID=gi|478413622|gb|GAEF01000004.1|.mrna1.cds1;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.mrna1;Target=gi|478413622|gb|GAEF01000004.1| 4 247 +
chr3D    NRGenome    CDS    256151826    256151964    93    -    1    ID=gi|478413622|gb|GAEF01000004.1|.mrna1.cds2;Name=gi|478413622|gb|GAEF01000004.1|;Parent=gi|478413622|gb|GAEF01000004.1|.mrna1;Target=gi|478413622|gb|GAEF01000004.1| 248 393 +
###
chr6B    NRGenome    gene    393753187    393758971    .    +    .    ID=gi|478413624|gb|GAEF01000002.1|.path1;Name=gi|478413624|gb|GAEF01000002.1|
chr6B    NRGenome    mRNA    393753187    393758971    .    +    .    ID=gi|478413624|gb|GAEF01000002.1|.mrna1;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.path1;coverage=100.0;identity=97.2;matches=1041;mismatches=23;indels=7;unknowns=0
chr6B    NRGenome    exon    393753187    393753348    100    +    .    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon1;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 1 162 +
chr6B    NRGenome    exon    393753915    393754065    98    +    .    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon2;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 163 313 +
chr6B    NRGenome    exon    393754389    393754500    97    +    .    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon3;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 314 425 +
chr6B    NRGenome    exon    393754907    393754973    97    +    .    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon4;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 426 492 +
chr6B    NRGenome    exon    393755046    393755095    98    +    .    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon5;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 493 542 +
chr6B    NRGenome    exon    393755173    393755233    98    +    .    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon6;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 543 603 +
chr6B    NRGenome    exon    393757730    393757814    98    +    .    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon7;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 604 688 +
chr6B    NRGenome    exon    393758596    393758971    95    +    .    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.exon8;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 689 1071 +
chr6B    NRGenome    CDS    393755180    393755233    98    +    0    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.cds1;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 550 603 +
chr6B    NRGenome    CDS    393757730    393757814    98    +    0    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.cds2;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 604 688 +
chr6B    NRGenome    CDS    393758596    393758734    91    +    1    ID=gi|478413624|gb|GAEF01000002.1|.mrna1.cds3;Name=gi|478413624|gb|GAEF01000002.1|;Parent=gi|478413624|gb|GAEF01000002.1|.mrna1;Target=gi|478413624|gb|GAEF01000002.1| 689 834 +
###
chr1B    NRGenome    gene    538352839    538354283    .    +    .    ID=gi|478413623|gb|GAEF01000003.1|.path1;Name=gi|478413623|gb|GAEF01000003.1|
chr1B    NRGenome    mRNA    538352839    538354283    .    +    .    ID=gi|478413623|gb|GAEF01000003.1|.mrna1;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.path1;coverage=99.9;identity=95.1;matches=641;mismatches=22;indels=11;unknowns=0
chr1B    NRGenome    exon    538352839    538353126    97    +    .    ID=gi|478413623|gb|GAEF01000003.1|.mrna1.exon1;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.mrna1;Target=gi|478413623|gb|GAEF01000003.1| 2 289 +
chr1B    NRGenome    exon    538353906    538354283    93    +    .    ID=gi|478413623|gb|GAEF01000003.1|.mrna1.exon2;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.mrna1;Target=gi|478413623|gb|GAEF01000003.1| 290 672 +
chr1B    NRGenome    CDS    538353046    538353126    98    +    0    ID=gi|478413623|gb|GAEF01000003.1|.mrna1.cds1;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.mrna1;Target=gi|478413623|gb|GAEF01000003.1| 209 289 +
chr1B    NRGenome    CDS    538353906    538353995    96    +    0    ID=gi|478413623|gb|GAEF01000003.1|.mrna1.cds2;Name=gi|478413623|gb|GAEF01000003.1|;Parent=gi|478413623|gb|GAEF01000003.1|.mrna1;Target=gi|478413623|gb|GAEF01000003.1| 290 379 +
###
chr3D    NRGenome    gene    256154707    256157108    .    -    .    ID=gi|478413625|gb|GAEF01000001.1|.path1;Name=gi|478413625|gb|GAEF01000001.1|
chr3D    NRGenome    mRNA    256154707    256157108    .    -    .    ID=gi|478413625|gb|GAEF01000001.1|.mrna1;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.path1;coverage=100.0;identity=98.7;matches=591;mismatches=8;indels=0;unknowns=0
chr3D    NRGenome    exon    256157043    256157108    100    -    .    ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon1;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 1 66 +
chr3D    NRGenome    exon    256156361    256156457    100    -    .    ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon2;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 67 163 +
chr3D    NRGenome    exon    256155580    256155743    96    -    .    ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon3;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 164 327 +
chr3D    NRGenome    exon    256155344    256155455    99    -    .    ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon4;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 328 439 +
chr3D    NRGenome    exon    256154872    256154938    100    -    .    ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon5;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 440 506 +
chr3D    NRGenome    exon    256154707    256154799    97    -    .    ID=gi|478413625|gb|GAEF01000001.1|.mrna1.exon6;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 507 599 +
chr3D    NRGenome    CDS    256155580    256155658    93    -    0    ID=gi|478413625|gb|GAEF01000001.1|.mrna1.cds1;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 249 327 +
chr3D    NRGenome    CDS    256155344    256155455    99    -    1    ID=gi|478413625|gb|GAEF01000001.1|.mrna1.cds2;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 328 439 +
chr3D    NRGenome    CDS    256154914    256154938    100    -    2    ID=gi|478413625|gb|GAEF01000001.1|.mrna1.cds3;Name=gi|478413625|gb|GAEF01000001.1|;Parent=gi|478413625|gb|GAEF01000001.1|.mrna1;Target=gi|478413625|gb|GAEF01000001.1| 440 464 +
###
chr6B    NRGenome    gene    393753187    393755138    .    +    .    ID=gi|478413621|gb|GAEF01000005.1|.path1;Name=gi|478413621|gb|GAEF01000005.1|
chr6B    NRGenome    mRNA    393753187    393755138    .    +    .    ID=gi|478413621|gb|GAEF01000005.1|.mrna1;Name=gi|478413621|gb|GAEF01000005.1|;Parent=gi|478413621|gb|GAEF01000005.1|.path1;coverage=100.0;identity=98.3;matches=575;mismatches=10;indels=0;unknowns=0
chr6B    NRGenome    exon    393753187    393753348    100    +    .    ID=gi|478413621|gb|GAEF01000005.1|.mrna1.exon1;Name=gi|478413621|gb|GAEF01000005.1|;Parent=gi|478413621|gb|GAEF01000005.1|.mrna1;Target=gi|478413621|gb|GAEF01000005.1| 1 162 +
chr6B    NRGenome    exon    393753915    393754065    98    +    .    ID=gi|478413621|gb|GAEF01000005.1|.mrna1.exon2;Name=gi|478413621|gb|GAEF01000005.1|;Parent=gi|478413621|gb|GAEF01000005.1|.mrna1;Target=gi|478413621|gb|GAEF01000005
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值