use Perl analysis BLAST data


use Perl analysis BLAST data


#!/usr/bin/perl
use 5.010;
use strict;
say "Demo : use perl process BLAST's data : search a large sequence ,and find the sepcial sequence .";
my $REPORT_FILE = "report.txt";
my $blast_file = $ARGV[0] || 'todo';	# what's || ?

unless (-e $blast_file){
	die "$0 : ERROR : missing file :$blast_file";
}else{
	say "get $blast_file okay !";
}

my ($query_src , $sbjct_src);

open(IN,$blast_file) or die "$0,ERROR : $blast_file : $!";
while(my $line =<IN>){
	chomp $line;
	say "processing line $ : $line.";
	
	my @words = split /\s+/, $line;
	if($line =~ /^Query/){	
		$query_src .= $words[2];
	}elsif($line =~ /^Sbjct/){
		$sbjct_src .= $words[2];
	}
}
close IN;

my @patterns =('gagcc','caat','cat','tcggga','-');
my (%query_counts,%sbjct_counts);
foreach my $pattern (@patterns){
	while ($query_src =~ /$pattern/g){
		$query_counts{$pattern}++;	say "find $pattern for query!";
	}
	while ($sbjct_src =~ /$pattern/g){
		$sbjct_counts{$pattern}++;	say "find $pattern for sbjct!";
	}
}

open (OUT, ">$REPORT_FILE") or die "$0: ERROR: Can't write $REPORT_FILE";
print OUT "Sequence Report\n","Run by Yummy on ",scalar localtime, "\n",
	"\nNOTE: In the following reports, a dash (-) represents \n",
	"        missing data in the chromosomal sequence \n\n",
	"Total length of 'Query' sequence: ",length $query_src , " characters\n", "Results for 'Query':\n";
foreach my $key (sort @patterns){
	print OUT "\t'$key' seen $query_counts{$key}\n";
}
print OUT "\nTotal length of 'Sbjct' sequence: ",length $sbjct_src, " characters\n","Results for 'Sbjct':\n";
foreach my $key (sort @patterns){
	print OUT "\t'$key' seen $sbjct_counts{$key}\n";
}
close OUT;
__END__

and

BLAST.data (just a demo )

Query: 1165 gagcccaggagttcaagaccagcctgggtaacatgatgaaacc-tcgtctctac 1217
Sbjct: 11895 gagctcaggagtttgagaccagcctggggaacacggtgaaaccctgtctctac 11843
Query: 1170 caggagttcaagaccagcctg 1190
Sbjct: 69962 caggag-ttcaagaccagcctg 69942
Query: 1106 tggtggctcacacctgcaatcccagcact 1134
Sbjct: 77363 tggtggctcacgcctgtaatcccagcact 77335
 for more information, you can have a reference from 《Developing_Bioinformatics_Computer_Skills》. 

by Yummy on Tue Feb  7 23:06:18 2012

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值