TCGA rsem 计算

#################################准备对应物种index文件
rsem-prepare-reference --gtf Macaca_mulatta.MMUL_1.85.chr_fixed.gtf --bowtie-path /usr/bin Macaca_mulatta.MMUL_1.dna.toplevel.fa RSEM.index
#################################计算rsem值
rsem-calculate-expression -p 8 --bowtie-path /usr/bin --paired-end zhongliu1_merged.fq zhongliu2_merged.fq /home/ansanqi/RNAseq/daimonkey/rsem_caculate/old/RSEM.index  Rsem_zhongliu_merged


rsem-calculate-expression -p 8 --bowtie-path /usr/bin --paired-end zhengchang_HL7TWCCXX_L4_1.clean.fq zhengchang_HL7TWCCXX_L4_2.clean.fq /home/ansanqi/RNAseq/daimonkey/rsem_caculate/old/RSEM.index  Rsem_zhengchang


rsem-calculate-expression -p 8 --bowtie-path /usr/bin --paired-end s04112-zhongliu_HW22FCCXX_L7_1.clean.fq s04112-zhongliu_HW22FCCXX_L7_2.clean.fq /home/ansanqi/RNAseq/daimonkey/rsem_caculate/old/RSEM.index  Rsem_s04112-zhongliu


##################################四分位标准化
chmod +x NORMAL.pl
perl NORMAL.pl -c 5 -q 75 -t 1000 -o rsem.genes.normalized_results.zhengchang
Rsem_zhengchang.genes.results


perl NORMAL.pl -c 5 -q 75 -t 1000 -o rsem.genes.normalized_results.Rsem_s04112-zhongliu Rsem_s04112-zhongliu.genes.results


perl NORMAL.pl -c 5 -q 75 -t 1000 -o rsem.genes.normalized_results.Rsem_zhongliu_merged Rsem_zhongliu_merged.genes.results
#################################
#!/usr/bin/perl


use strict;
use Getopt::Long;


my $out = '-';
my $q = 75;
my @col;
my @also;
my $names = 1;
my $target = 1000;
my $skip = 0;
my $min=1;
GetOptions("quant=i"=>\$q, "target=i"=>\$target, "col=i@"=>\@col, "out=s"=>\$out, "also=i@"=>\@also, "skip=i"=>\$skip, "min=i"=>\$min);


my $in = shift @ARGV;


die usage() unless $in && @col;


open(OUT, ($out eq '-') ? '<&STDOUT' : ">$out") || die "Can't open $out\n";
open(IN, ($in eq '-') ? '<&STDIN' : $in) || die "Can't open $in\n";


@also = (1) if !@also && !grep {$_ eq '1'} @col;


map {$_--} @col;
map {$_--} @also;


my @d;
my $cnt = 0;
my $head ='';
while(<IN>) {
        if ($skip) {
                --$skip;
                $head .= $_;
                next;
        }
        chomp;
        my @f = split /\t/;
        if ($col[0] eq '-2') {
                @col = (1..$#f);
        }
        for (@col) {
                push @{$d[$_]}, $f[$_];
        }
        for (@also) {
                push @{$d[$_]}, $f[$_];
        }
        ++$cnt;
}
for (@col) {
        my @t = grep {$_>=$min} @{$d[$_]};
        @t = sort {$a <=> $b} @t;
        my $t=quantile(\@t, $q/100);
        for (@{$d[$_]}) {
                $_= sprintf "%.4f", $target*$_/$t;
        }
}


my @out = (sort {$a <=> $b} (@col, @also));


print OUT $head;


for (my $i=0;$i<$cnt;++$i) {
        for my $j (@out) {
                print OUT "\t" unless $j == $out[0];
                print OUT $d[$j][$i];
        }
        print OUT "\n";
}




sub usage {
<<EOF;
Usage: $0 -c COL [opts] FILE
Returns an upper quartile normalization of data in column(s) COL
of file FILE.
Col is 1-based, zeroes are ignores when calculating upper quartile
Options:
   -c|col COL    normalize this column of data (can specify more than once, or -1 for all but first col)
   -q|quant INT  quantile to use (75)
   -t|target INT target to use (1000)
   -a|also COL   output these columns also
   -o|out FILE   output to this file instead of stdout
   -m|min INT    minimum value (1)
   -s|skip INT   skip header rows
EOF
}


sub quantile {
        my ($a,$p) = @_;
        my $l = scalar(@{$a});
        my $t = ($l-1)*$p;
        my $v=$a->[int($t)];
        if ($t > int($t)) {
                return $v + $p * ($a->[int($t)+1] - $v);
        } else {
                return $v;
        }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值