rename.pl
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
my $usage = <<_USAGE_;
usage : perl $0 -i fasta file -o output
_USAGE_
my ($input, $output);
GetOptions(
"i=s" => \$input,
"o=s" => \$output
);
die $usage if (!$input || !$output);
open (my $in, "$input") || die "$!:$input\n";
open (my $out, ">$output") || die "$!:$output\n";
open (my $out2, ">$output.idChangeList") || die "$!:$output.idChangeList\n";
my $n = 1;
while (<$in>){
chomp;next if ($_ eq '');s/\r//g;
if (/^>/){
/\|(\d+)$/;
my $newID = ">gene_$n\|$1";
print $out "$newID\n";
print $out2 "$_\t$newID\n";
$n++;
}else{
print $out "$_\n";
}
}
close;
getNRNucl.pl
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
my $usage = <<_USAGE_;
usage :
version 1.0
getNrNucl.pl -i P_GC
_USAGE_
my $P_GC;
GetOptions(
"i=s" => \$P_GC
);
die $usage if (!$P_GC);
my %nrGeneListP;
open (my $pgl, "$P_GC/all.nr.prot.fa") || die "$!:$P_GC/all.nr.prot.fa\n";
while (<$pgl>){
chomp;next if ($_ eq '');s/\r//g;
if (/^>/){
$_ =~ s/^>(.+?)\|.+/$1/;
$nrGeneListP{$_} = 1;
}
}
close $pgl;
my %idExchange;
open (my $ie, "$P_GC/all.rename.nr.prot.fa.idChangeList") || die "$!:$P_GC/all.rename.nr.prot.fa.idChangeList\n";
while (<$ie>){
chomp;next if ($_ eq '');s/\r//g;
my @entries = split/\t/;
$entries[0] =~ /^>(.+?)\|.+/;
my $ri = $1;
$entries[1] =~ /^>(.+?)\|.+/;
my $ni = $1;
$idExchange{$ri} = $ni;
}
close $ie;
open (my $ng, "$P_GC/all.nucl.fa") || die "$P_GC/all.nucl.fa";
open (my $ngn, ">$P_GC/all.nr.nucl.fa") || die "$P_GC/all.nr.nucl.fa";
my $flag = 0;
while (<$ng>){
chomp;next if ($_ eq '');s/\r//g;
if (/^>(.+?\|(\d+))/){
my $id = $1;
my $len = $2;
$id =~ s/(\|.+)//;
if ($nrGeneListP{$id}){
$flag = 1;
$_ = ">$idExchange{$id}|$len";
}else{
$flag = 0;
}
}
if ($flag == 1){
print $ngn "$_\n";
}
}
close $ng;
close $ngn;