XML处理相关——学习一些处理脚本(2)——(待完善)

def main():
    #initialization
    n=0 # total number of query seq
    align_mi=0
    align_m=0    


    args=ParseArg()
    miRNA_result=open(args.mi_xml)
    mRNA_result=open(args.m_xml)
    miRNA_records=NCBIXML.parse(miRNA_result)
    mRNA_records=NCBIXML.parse(mRNA_result)
    output=open(args.output,'w')
    
    
    # E-values
    if args.evalue==0:
        evalue_mi=1e-5
        evalue_m=1e-15
    else:
        evalue_mi=float(args.evalue[0])
        evalue_m=float(args.evalue[1])
    #izip()是属于itertools库的一个函数,在Python3中已经取消(AttributeError: module 'itertools' has no attribute 'izip')
    for mi_record,m_record in itertools.izip(miRNA_records,mRNA_records):
        temp_output=''
        mi_indic=0 # whether there are miRNA alignment
        m_indic=0  # whether there are mRNA alignment
        mi_end=150  #shortest miRNA aligned end in query sequence
        n=n+1
        if (mi_record.query!=m_record.query):
            print >>sys.stderr,"The two query seqs from miRNA and mRNA results are not matched!"
            break
        temp_output=mi_record.query+'\n'
        for alignment in mi_record.alignments:
            for hsp in alignment.hsps:
                if hsp.expect < evalue_mi:
                    mi_indic=1
                    line="\t".join (str(f) for f in [hsp.query_start,hsp.query_end,alignment.title,hsp.sbjct,hsp.sbjct_start,hsp.sbjct_end,hsp.expect,hsp.score])
                    temp_output=temp_output+line+'\n'
                    if mi_end>max(hsp.query_start,hsp.query_end):
                        mi_end=max(hsp.query_start,hsp.query_end)
        
        if mi_indic==0:
            mi_end=0

        for alignment in m_record.alignments:
            for hsp in alignment.hsps:
                if (hsp.expect < evalue_m) and (min(hsp.query_start,hsp.query_end)>mi_end):
                    m_indic=1
                    line="\t".join (str(f) for f in [hsp.query_start,hsp.query_end,alignment.title,hsp.sbjct,hsp.sbjct_start,hsp.sbjct_end,hsp.expect,hsp.score])
                    temp_output=temp_output+line+'\n'
        if mi_indic+m_indic>=2:
            output.write(temp_output)
        if mi_indic==1:
            align_mi+=1
        if m_indic==1:
            align_m+=1
    print n,align_mi,align_m
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值