多发点无事之作得了:
#!/usr/bin/env perl
use warnings;
use strict;
die "Usage: perl $0 <fasta>\n" if(@ARGV ne 1);
open FA, $ARGV[0] or die $!;
$/ = "\n>";
my ($sl, $sg, $sc, %hash);
while(<FA>)
{
chomp;
s/^>//;
my @fa = split /\n/;
my ($id) = $fa[0] =~ /^(\S+)/;
my ($length, $g, $c);
if(defined $fa[1])
{
for(my $i = 1; $i < @fa; $i ++)
{
$length += length($fa[$i]);
my $gtmp = $fa[$i] =~ tr/Gg/Gg/;
my $ctmp = $fa[$i] =~ tr/Cc/Cc/;
$g += $gtmp;
$c += $ctmp;
}
}else{
warn "no sequences in $id\n";
}
$hash{$id} = $length;
$sl += $length;
$sg += $g;
$sc += $c;
}
my $ratio = sprintf("%.4f", ($sg + $sc) / $sl);
print "total_size: $sl\ttotal_G: $sg\ttotal_C: $sc\tGC_ratio: $ratio\n";
my @fasta = sort {$hash{$b} <=> $hash{$a}} keys %hash;
my $num = @fasta;
my $al = sprintf("%d", $sl / $num);
print "total_number: $num\tmax_length: $hash{$fasta[0]}\tmin_length: $hash{$fasta[-1]}\taverage_length: $al\n";
my $n50 = int($num * 0.5) - 1;
my $n50_n = map {$_} @fasta[0..$n50];
print "N50_size: $hash{$fasta[$n50]}\tN50_number: $n50_n\n";