perl novel可变剪接识别(3)

本文介绍如何识别Perl小说(novel)可变剪接的不同类型,包括原理和分类方法。特别指出,intron retained 1类型依赖于覆盖度判断,而intergenic和other则分别代表基因间区域和无法识别的剪接形式。
摘要由CSDN通过智能技术生成

好吧,该到最后的时刻了,识别novel可变剪接的类型,首先要在原理上判断类型的情况,把类型画出来比较好一些:

以上即为大致的分类,不过有两点要说明的:intron retained 1用tophat的结果无法实现,这个看覆盖度即可;另外还有两个分类:intergenic和other,顾名思义基因间区域内的和无法识别的,当然也可以按照其他分类,就不多说了,就以此为例吧:

#!/usr/bin/env perl
use warnings;
use strict;

my %hash;
open GTF, $ARGV[0] or die $!;
while(<GTF>)
{
	chomp;
	my @tmp = split;
	my ($a, $b) = (split /-/, $tmp[2])[0,1];
	$hash{$tmp[0]}{$tmp[1]}{'pn'} = $tmp[3];
	$hash{$tmp[0]}{$tmp[1]}{'rs'} = $a;
	$hash{$tmp[0]}{$tmp[1]}{'re'} = $b;
	$hash{$tmp[0]}{$tmp[1]}{'type'} = $tmp[4];
	my @a = split /,/, $tmp[5];
	my @b = split /,/, $tmp[6];
	@{$hash{$tmp[0]}{$tmp[1]}{'start'}} = @a;
	@{$hash{$tmp[0]}{$tmp[1]}{'end'}} = @b;
}
close GTF;

open NOVEL, $ARGV[1] or die $!;
print "#chr\tsplice_start\tsplice_end\tsplice_strand\tsplice_reads\tsplice_type\tgene_id\tgene_strand\tgene_type\n";
while(<NOVEL>)
{
	chomp;
	my @tmp = split;
	my $js = $tmp[1] + $tmp[5];
	my $je = $tmp[2] - $tmp[6] + 1;
	my ($chr, $read, $jpn) = @tmp[0,3,4];
	my $flag = 0;
	foreach my $t(keys %{$hash{$chr}})
	{
		if($je < $hash{$chr}{$t}{'rs'} or $js > $hash{$chr}{$t}{'re'})
		{
			next;
		}else{
			$flag = 1;
			if($js < $hash{$chr}{$t}{'rs'} and $je >= $hash{$chr}{$t}{'rs'})
			{
				if($hash{$chr}{$t}{'pn'} eq '+')
				{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chr\t$js\t$je\t$jpn\t$read\tOther\t$t\t+\tNonCoding\n";
					}else{
						print "$chr\t$js\t$je\t$jpn\t$read\t5UTR\t$t\t+\tCoding\n";
					}
				}else{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chr\t$js\t$je\t$jpn\t$read\tOther\t$t\t-\tNonCoding\n";
					}else{
						print "$chr\t$js\t$je\t$jpn\t$read\t3UTR\t$t\t-\tCoding\n";
					}
				}
				last;
			}elsif($js <= $hash{$chr}{$t}{'re'} and $je > $hash{$chr}{$t}{'re'}){
				if($hash{$chr}{$t}{'pn'} eq '+')
				{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chr\t$js\t$je\t$jpn\t$read\tOther\t$t\t+\tNonCoding\n";
					}else{
						print "$chr\t$js\t$je\t$jpn\t$read\t3UTR\t$t\t+\tCoding\n";
					}
				}else{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chr\t$js\t$je\t$jpn\t$read\tOther\t$t\t-\tNonCoding\n";
					}else{
						print "$chr\t$js\t$je\t$jpn\t$read\t5UTR\t$t\t-\tCoding\n";
					}
				}
				last;
			}else{
				my ($js_stat, $je_stat) = (0, 0);
				foreach my $i(@{$hash{$chr}{$t}{'end'}})
				{
					$js_stat = 1 if($js eq $i);
				}
				foreach my $j(@{$hash{$chr}{$t}{'start'}})
				{
					$je_stat = 1 if($je eq $j);
				}
				if($js_stat eq 1 and $je_stat eq 1)
				{
					if($hash{$chr}{$t}{'type'} eq 0)
					{
						print "$chr\t$js\t$je\t$jpn\t$read\tES\t$t\t$hash{$chr}{$t}{'pn'}\tNonCoding\n";
					}else{
						print "$chr\t$js\t$je\t$jpn\t$read\tES\t$t\t$hash{$chr}{$t}{'pn'}\tCoding\n";
					}
				}elsif($js_stat eq 1){
					if($hash{$chr}{$t}{'pn'} eq '+')
					{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chr\t$js\t$je\t$jpn\t$read\t3S\t$t\t+\tNonCoding\n";
						}else{
							print "$chr\t$js\t$je\t$jpn\t$read\t3S\t$t\t+\tCoding\n";
						}
					}else{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chr\t$js\t$je\t$jpn\t$read\t5S\t$t\t-\tNonCoding\n";
						}else{
							print "$chr\t$js\t$je\t$jpn\t$read\t5S\t$t\t-\tCoding\n";
						}
					}
				}elsif($je_stat eq 1){
					if($hash{$chr}{$t}{'pn'} eq '+')
					{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chr\t$js\t$je\t$jpn\t$read\t5S\t$t\t+\tNonCoding\n";
						}else{
							print "$chr\t$js\t$je\t$jpn\t$read\t5S\t$t\t+\tCoding\n";
						}
					}else{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chr\t$js\t$je\t$jpn\t$read\t3S\t$t\t-\tNonCoding\n";
						}else{
							print "$chr\t$js\t$je\t$jpn\t$read\t3S\t$t\t-\tCoding\n";
						}
					}
				}else{
					my $f2 = 0;
					for(my $i = 0; $i < @{$hash{$chr}{$t}{'end'}}; $i ++)
					{
						if($js >= $hash{$chr}{$t}{'start'}->[$i] and $je <= $hash{$chr}{$t}{'end'}->[$i])
						{
							$f2 = 1;
							if($hash{$chr}{$t}{'type'} eq 0)
							{
								print "$chr\t$js\t$je\t$jpn\t$read\tIR\t$t\t$hash{$chr}{$t}{'pn'}\tNonCoding\n";
							}else{
								print "$chr\t$js\t$je\t$jpn\t$read\tIR\t$t\t$hash{$chr}{$t}{'pn'}\tCoding\n";
							}
							last;
						}
					}
					if($f2 eq 0)
					{
						if($hash{$chr}{$t}{'type'} eq 0)
						{
							print "$chr\t$js\t$je\t$jpn\t$read\tOther\t$t\t$hash{$chr}{$t}{'pn'}\tNonCoding\n";
						}else{
							print "$chr\t$js\t$je\t$jpn\t$read\tOther\t$t\t$hash{$chr}{$t}{'pn'}\tCoding\n";
						}
					}
				}
				last;
			}
		}
	}
	if($flag eq 0)
	{
		print "$chr\t$js\t$je\t$jpn\t$read\tIntergenic\t.\t.\t.\n";
	}
}
close NOVEL;


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值