根据丰度从nr 注释结果提取每个基因的注释信息
nr.output
gene_1000002|288 288 1 216 75.0 95 1 72 310 2.5e-25 62 72 86.1 WP_019151153.1 WP_019151153.1 hypothetical protein [Alistipes senegalensis] 62 10 0 0 124.0 86.1
gene_1000002|288 288 1 216 75.0 95 1 72 307 5.5e-25 62 72 86.1 WP_010266016.1 WP_010266016.1 hypothetical protein [Alistipes timonensis] 59 13 0 0 122.9 81.9
gene_1000002|288 288 1 216 75.0 95 1 72 306 7.2e-25 62 72 86.1 WP_064974447.1 WP_064974447.1 hypothetical protein [Alistipes sp. Marseille-P2431] 58 14 0 0 122.5 80.6
Donor3.nr.output.forTA
gene_1000007|1197 2.4e-199 WP_008627072.1 1197
gene_1000007|1197 1.2e-198 WP_008619774.1 1197
gene_1000007|1197 3.4e-193 CCZ03104.1 1197
gene_1000007|1197 7.3e-164 CDB11117.1 1197
gene_1000007|1197 9.5e-164 WP_071148079.1 1197
gene_1000007|1197 2.9e-160 WP_005848635.1 1197
gene_1000007|1197 3.7e-160 WP_005846104.1 1197
gene_1000007|1197 3.7e-160 WP_008671032.1 1197
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
my $usage = <<_USAGE_;
usage :
version 1.0
getDataFromList.pl -i inputList -l log -td TMP_DONE -d target -p perl dir -q query column -t target column -o output dir -m mark
_USAGE_
my ($inputList, $totalRunLog, $TMP_DONE, $target, $queryColumn, $targetColumn, $perl_dir, $outputDir, $mark);
GetOptions(
"i=s" => \$inputList,
"d=s" => \$target,
"l=s" => \$totalRunLog,
"td=s" => \$TMP_DONE,
"q=s" => \$queryColumn,
"t=s" => \$targetColumn,
"o=s" => \$outputDir,
"p=s" => \$perl_dir,
"m=s" => \$mark
);
die $usage if (!$inputList || !$totalRunLog || !$TMP_DONE || !$target || !$queryColumn || !$targetColumn || !$outputDir|| !$perl_dir);
$queryColumn--;
$targetColumn--;
#my @targets = split/\s+/, $targetLine;
#foreach my $target (@targets){
# $target =~ /.+\/(.+)\./;
# my $mark = $1;
my %target;
open (my $t, "$target") || die "$!:$target\n";
while (<$t>){
chomp;next if ($_ eq '');s/\r//g;
my @entries = split/\t/;
if ($target{$entries[$targetColumn]}){
push (@{$target{$entries[$targetColumn]}}, $_);
}else{
$target{$entries[$targetColumn]}->[0] = $_;
}
}
close $t;
$inputList =~ /.+\/(.+?)\./;
my $sample = $1;
open (my $in, "$inputList" ) || die "$!:$inputList\n";
open (my $out, ">$outputDir/$sample.$mark.output") || die "$!:$outputDir/$sample.$mark.output\n";
while (<$in>){
chomp;next if ($_ eq '');s/\r//g;
my @entries = split/\t/;
my $key = $entries[$queryColumn];
if ($target{$key}){
foreach my $annot ( @{$target{$key}}){
print $out "$annot\n";
}
}
}
close $in;
close $out;
#}