use Perl analysis BLAST data
#!/usr/bin/perl
use 5.010;
use strict;
say "Demo : use perl process BLAST's data : search a large sequence ,and find the sepcial sequence .";
my $REPORT_FILE = "report.txt";
my $blast_file = $ARGV[0] || 'todo'; # what's || ?
unless (-e $blast_file){
die "$0 : ERROR : missing file :$blast_file";
}else{
say "get $blast_file okay !";
}
my ($query_src , $sbjct_src);
open(IN,$blast_file) or die "$0,ERROR : $blast_file : $!";
while(my $line =<IN>){
chomp $line;
say "processing line $ : $line.";
my @words = split /\s+/, $line;
if($line =~ /^Query/){
$query_src .= $words[2];
}elsif($line =~ /^Sbjct/){
$sbjct_src .= $words[2];
}
}
close IN;
my @patterns =('gagcc','caat','cat','tcggga','-');
my (%query_counts,%sbjct_counts);
foreach my $pattern (@patterns){
while ($query_src =~ /$pattern/g){
$query_counts{$pattern}++; say "find $pattern for query!";
}
while ($sbjct_src =~ /$pattern/g){
$sbjct_counts{$pattern}++; say "find $pattern for sbjct!";
}
}
open (OUT, ">$REPORT_FILE") or die "$0: ERROR: Can't write $REPORT_FILE";
print OUT "Sequence Report\n","Run by Yummy on ",scalar localtime, "\n",
"\nNOTE: In the following reports, a dash (-) represents \n",
" missing data in the chromosomal sequence \n\n",
"Total length of 'Query' sequence: ",length $query_src , " characters\n", "Results for 'Query':\n";
foreach my $key (sort @patterns){
print OUT "\t'$key' seen $query_counts{$key}\n";
}
print OUT "\nTotal length of 'Sbjct' sequence: ",length $sbjct_src, " characters\n","Results for 'Sbjct':\n";
foreach my $key (sort @patterns){
print OUT "\t'$key' seen $sbjct_counts{$key}\n";
}
close OUT;
__END__
and
BLAST.data (just a demo )
Query: 1165 gagcccaggagttcaagaccagcctgggtaacatgatgaaacc-tcgtctctac 1217
Sbjct: 11895 gagctcaggagtttgagaccagcctggggaacacggtgaaaccctgtctctac 11843
Query: 1170 caggagttcaagaccagcctg 1190
Sbjct: 69962 caggag-ttcaagaccagcctg 69942
Query: 1106 tggtggctcacacctgcaatcccagcact 1134
Sbjct: 77363 tggtggctcacgcctgtaatcccagcact 77335
for more information, you can have a reference from 《Developing_Bioinformatics_Computer_Skills》.
by Yummy on Tue Feb 7 23:06:18 2012