head bed.v4.ncrna
head -2 v4.stats | tail -1 | perl -ne '@t= split(/\s/, $_); chomp @t; for($i = 6; $i < @t; $i ++){ print ($i - 5). $t[$i] . "\n";} '
for org in bosTau6 galGal3 tair10 dm3 ce10 danRer7 sacCer3
do
cd $org
head -2 v4.stats | tail -1 | awk '{ for (i = 7; i < NF; i++){ print $i;}}' > srcNames
cat bed.v4.ncrna | cut -f 4 | perl $CMD/leftJoin.pl - 1 v4.tracking 1| perl -ne '@t = split(/\t/, $_); chomp @t; $tid = $t[0]; for($i = 5; $i < (@t - 1); $i ++){ if($t[$i] ne "-"){ @oIds = split(/,/,$t[$i]); for($j = 0; $j < @oIds; $j ++){ $oIds[$j] =~ /.*?\|(.*?)\|/; $oId1 = $1; $oId1 =~ s/^\d{8}_//; print $tid . "\t" . ($i - 4) . "\t" . $oId1 . "\n";} } }' > source
cd ..
done
ln -s ds srcNames
cat bed.v4.ncrna | cut -f 4 | perl $CMD/leftJoin.pl - 1 v4.2.tracking 1| perl -ne '@t = split(/\t/, $_); chomp @t; $tid = $t[0]; for($i = 5; $i < @t ; $i ++){ if($t[$i] ne "-"){ @oIds = split(/,/,$t[$i]); for($j = 0; $j < @oIds; $j ++){ $oIds[$j] =~ /.*?\|(.*?)\|/; $oId1 = $1; $oId1 =~ s/^\d{8}_//; print $tid . "\t" . ($i - 4) . "\t" . $oId1 . "\n";} } }' > source
cat bed.v4.ncrna | cut -f 4 | perl $CMD/leftJoin.pl - 1 v4.tracking 1| perl -ne '@t = split(/\t/, $_); chomp @t; $tid = $t[0]; for($i = 5; $i < @t ; $i ++){ if($t[$i] ne "-"){ @oIds = split(/,/,$t[$i]); for($j = 0; $j < @oIds; $j ++){ $oIds[$j] =~ /.*?\|(.*?)\|/; $oId1 = $1; $oId1 =~ s/^\d{8}_//; print $tid . "\t" . ($i - 4) . "\t" . $oId1 . "\n";} } }' > source
grep n343060 hglnc_loci2.tracking
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat bed.v4.ncrna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_tcons 2 | perl -ne ' @t = split(/\t/,$_); $org="'$org'"; %ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); $nId = "NON".$ORGID{$org}."T" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" . $t[1] . "\t" .$t[2] . "\t" . $nId . "\t" .$t[4] . "\t" . $t[5] . "\t". $t[6] . "\t" . $t[7] . "\t". $t[8] . "\t" . $t[9] . "\t". $t[10] . "\t" . $t[11] . "\n" ; ' > db/rna.bed
cat loci.combined.gtf.loci.bed | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_xloc 2 | perl -ne ' @t = split(/\t/,$_); $org="'$org'"; %ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); $nId = "NON".$ORGID{$org}."G" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" . $t[1] . "\t" .$t[2] . "\t" . $nId . "\t" .$t[4] . "\t" . $t[5] . "\t". $t[6] . "\t" . $t[7] . "\t". $t[8] . "\t" . $t[9] . "\t". $t[10] . "\t" . $t[11] . "\n" ; ' > db/gene.bed
cd ..
done
ce10 sacCer3 galGal3
for((i=0; i < 60 ; i ++))do rm fa.$i bed.v4.ncrna$i; done
cat fa.v4.bed | perl -ne 'chomp ; if(/>(.*)/){ print $1 . "\t"; }else{ print $_ . "\n"; }' > seq.v4
cat bed.v4.ncrna | cut -f 4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 seq.v4 1 | cut -f 2,3 > seq.v4.1;
mv seq.v4.1 seq.v4
head source | perl $CMD/leftJoin.pl - 2 srcNames 1 | awk '$5 == "v3.gtf"{ print $1 "\t" $3;}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 seq.v3 1 | cut -f 1,4 > seq.v4.v3
perl $CMD/leftJoin.pl seq.v4 1 seq.v4.v3 1 | awk '{ if($1 == $3) { print $1 "\t" $4 ; }else{ print $1 "\t" $2 ;}}' > seq.v4.v3.2
mv seq.v4.v3.2 seq.v4
cat srcNames | awk '{ print NR "\t" $0; }' > srcNames.2; mv srcNames.2 srcNames;
wc -l */seq.v4.v3
head */seq.v4
bash $CMD/combinedGtfToLociBed.sh loci.combined.gtf
cat bed.v4.ncrna | awk '{ print NR "\t" $4;}' > id.v4_tcons
cat loci.combined.gtf.loci.bed | awk '{ print NR "\t" $4;}' > id.v4_xloc
mkdir db
head bed.v4.ncrna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_tcons 2 | awk '{ print $ }'
head NcFuncAnno.result.xcy | perl -ne 'chomp; if(/XLOC/){ print $_ . "\t"; }else{ print $_ . "\n"; }' | perl -ne 'chomp; $_ =~ s/\tGO:\d+\(0\)//g;print $_ . "\n" ';
cat bed.v4.lncrna | cut -f 10 | sort -n | uniq -c | less
cat bed.v4.lncrna | cut -f 10 | awk '$1 > 5 && $1 < 11' | wc -l
cat bed.v4.lncrna | cut -f 10 | awk '$1 > 10' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 < 200' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 200 && $2 < 1000' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 1000 && $2 < 2500' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 2500 && $2 < 5000' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 5000 && $2 < 10000' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 10000 ' | wc -l
cat NcFuncAnno.result.xcy | perl -ne 'chomp; if(/XLOC/){ print $_ . "\t"; }else{ print $_ . "\n"; }' | perl -ne 'chomp; $_ =~ s/\tGO:\d+\(0\)//g;print $_ . "\n" ' | awk '$2' > func.xloc
danRer7 sacCer3 tair10 bosTau6 hg19 mm9
cat bed.v4.ncrna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_tcons 2 | perl -ne ' @t = split(/\t/,$_); $org="'mm9'"; print $org . "\n";%ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); $nId = "NON".$ORGID{$org}."T" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" . $t[1] . "\t" .$t[2] . "\t" . $nId . "\t" .$t[4] . "\t" . $t[5] . "\t". $t[6] . "\t" . $t[7] . "\t". $t[8] . "\t" . $t[9] . "\t". $t[10] . "\t" . $t[11] . "\n" ; ' > db/rna.bed
cat loci.combined.gtf.loci.bed | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_xloc 2 | perl -ne ' @t = split(/\t/,$_); $org="'mm9'"; print $org . "\n";%ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); $nId = "NON".$ORGID{$org}."G" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" . $t[1] . "\t" .$t[2] . "\t" . $nId . "\t" .$t[4] . "\t" . $t[5] . "\t". $t[6] . "\t" . $t[7] . "\t". $t[8] . "\t" . $t[9] . "\t". $t[10] . "\t" . $t[11] . "\n" ; ' > db/gene.bed
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
for org in tair10 bosTau6 hg19 mm9
do
cd $org;
cat db/rna.bed | getBedSeqLength.sh - | paste db/rna.bed - | awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14}' | perl $CMD/leftJoin.pl - 1 id.v4_tcons 1 | perl $CMD/leftJoin.pl - 11 cnci.v4 1 | cut -f 1,2,3,4,5,6,7,8,9,13 > db/generalInfo
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3
do
cd $org;
cat db/rna.bed | getBedSeqLength.sh - | paste db/rna.bed - | awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14 "\t0" }' > db/generalInfo
cd ..
done
cat v4.fa | perl -ne 'chomp ; if(/>(.*)/){ print $1 . "\t"; }else{ print $_ . "\n"; }' > seq.v4
cat db/rna.bed | getBedSeqLength.sh - | paste db/rna.bed - | head | awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14}' | perl $CMD/leftJoin.pl - 1 id.v4_tcons 1 | perl $CMD/leftJoin.pl - 11 cnci.v4 1 | cut -f 1,2,3,4,5,6,7,8,9,13
cat db/rna.bed | getBedSeqLength.sh - | paste db/rna.bed - | awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14}' | perl $CMD/leftJoin.pl - 1 id.v4_tcons 1 | perl $CMD/leftJoin.pl - 11 cnci.v4 1 | cut -f 1,2,3,4,5,6,7,8,9,13 > generalInfo
cat seq.v4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.v4_tcons 2 - 1 | awk '{print $1 "\t" $4;}' > db/seq
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat seq.v4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.v4_tcons 2 - 1 | awk '{print $1 "\t" $4;}' > db/seq
cd ..
done
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{ print $4 "\t" $2 "\t" $3;}' > db/src
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{ print $4 "\t" $2 "\t" $3;}' > db/src
cp srcNames db/srcNames
cd ..
done
cat loci.combined.gtf | gtf2Bed.pl - | cut -f 4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_); $t[5] =~ /.*?\|(.*?)\|/; print $1 . "\n";' | perl $CMD/rmByKey.pl - bed.v4.lncrna 4 | bedToGtf.sh - > gtf.lnc.disp
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp -o disp
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | wc -l
######cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; '
cat gtf.lnc.disp | gtf2Bed.pl - > 1
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/rmByKey.pl - 1 4 | bedToGtf.sh - > gtf.lnc.disp2
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp2 -o disp2
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' > id_disp_contain
cat disp2.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' >> id_disp_contain
mv id_disp_contain id.disp_contain
#head loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";'
#gene_id "XLOC_000001"; transcript_id "TCONS_00000001";
cat loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";' | uniq | perl -pe 's/gene_id "(.*?)"; transcript_id "(.*?)";/$2\t$1/' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t" . $t[1]. "\n";' > rnaInGene
perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.disp_contain 2 rnaInGene 1 | cut -f 1,4 >> rnaInGene
cut -f 2 rnaInGene | sort | uniq -c | awk '{print $1;}' | sort -n | uniq -c | less
cut -f 2 rnaInGene | sort | uniq -c | awk '{print $1;}' | sort -n | uniq -c | wc -l
cut -f 2 rnaInGene | sort | uniq -c | awk '{print $1;}' | sort -n | uniq -c | cut -f 1 | tail -44 | perl /leofs/noncode/NONCODEv4/cmds/sum.pl -
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1 | grep ensembl | wc -l
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1 | grep ensembl | wc -l
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1 | grep refseq | wc -l
cd ..
done | perl $CMD/sum.pl -
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1 | grep -v refseq | grep -v ensembl | grep -v v3 | wc -l
cd ..
done | perl $CMD/sum.pl -
Ensembl:141194
Refseq:35445
Literature:118148
for org in galGal3 dm3 ce10 sacCer3 tair10 bosTau6
do
cd $org;
cat loci.combined.gtf | gtf2Bed.pl - | cut -f 4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_); $t[5] =~ /.*?\|(.*?)\|/; print $1 . "\n";' | perl $CMD/rmByKey.pl - bed.v4.lncrna 4 | bedToGtf.sh - > gtf.lnc.disp
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp -o disp
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | wc -l
######cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; '
cat gtf.lnc.disp | gtf2Bed.pl - > 1
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/rmByKey.pl - 1 4 | bedToGtf.sh - > gtf.lnc.disp2
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp2 -o disp2
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' > id_disp_contain
cat disp2.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' >> id_disp_contain
mv id_disp_contain id.disp_contain
#head loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";'
#gene_id "XLOC_000001"; transcript_id "TCONS_00000001";
cat loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";' | uniq | perl -pe 's/gene_id "(.*?)"; transcript_id "(.*?)";/$2\t$1/' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t" . $t[1]. "\n";' > rnaInGene
perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.disp_contain 2 rnaInGene 1 | cut -f 1,4 >> rnaInGene
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat rnaInGene | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{print $3 "\t" $2}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 id.v4_xloc 2 | awk '{print $1 "\t" $3}' > db/rnaInGene
cd ..
done
head loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" '{ if($2){print 1;}else{print 0;} if($3){print 1;}else{print 0;} if($3){print 1;}else{print 0;} if($4){print 1;}else{print 0;} }'
head loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" 'BEGIN{tmp="";}{ if($2){tmp=$tmp"1";}else{print 0;} if($3){print 1;}else{print 0;} if($3){print 1;}else{print 0;} if($4){print 1;}else{print 0;} }END{print $tmp}'
cat loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" '{ tmp=""; if($2){tmp=tmp"1";}else{tmp=tmp"0";} if($3){tmp=tmp"1";}else{tmp=tmp"0";} if($4){tmp=tmp"1";}else{tmp=tmp"0";} if($5){tmp=tmp"1";}else{tmp=tmp"0"; } print $1"\t"tmp;}'
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" '{ tmp=""; if($2){tmp=tmp"1";}else{tmp=tmp"0";} if($3){tmp=tmp"1";}else{tmp=tmp"0";} if($4){tmp=tmp"1";}else{tmp=tmp"0";} if($5){tmp=tmp"1";}else{tmp=tmp"0"; } print $1"\t"tmp;}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | awk '{ print $3 "\t" $2 ;}' | sort -nk 1 > db/gene_class
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat db/gene.bed | getBedSeqLength.sh - | paste db/gene.bed - | awk '{ print NR "\t" $1 "\t" $2 "\t" $3 "\t" $14 "\t" $6 }' >2
cat db/rnaInGene | cut -f 2 | sort -n | uniq -c | awk '{ print $2 "\t" $1;}' | perl $CMD/leftJoin.pl 2 1 - 1 | cut -f 1,2,3,4,5,6,8 > db/gene_info
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
#cat db/rna.bed | getBedSeqLength.sh - | awk '$2 > 200 { print $1;}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 db/rna.bed 4 | cut -f 2,3,4,5,6,7,8,9,10,11,12,13 > lncRNA.bed
mv lncRNA.bed db
cd ..
done
tar cvzf v4.tgz bosTau6/db ce10/db danRer7/db dm3/db galGal3/db hg19/db mm9/db sacCer3/db tair10/db
/data/tmp/v4/bosTau6/db
hgsql -e 'show tables;' ncrna
hgsql -e 'show create table HSAG;' ncrna
hgsql -e 'CREATE TABLE `human_gene` (
`id` int(6) NOT NULL DEFAULT '0',
`chr` char(20) DEFAULT NULL,
`start` int(10) DEFAULT NULL,
`end` int(10) DEFAULT NULL,
`length` int(7) DEFAULT NULL,
`strand` char(1) DEFAULT NULL,
`transcriptCount` int(2) DEFAULT NULL,
PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/gene_info" into table human_gene ' ncrna
hgsql -e 'CREATE TABLE `human_rnaInGene` (
`rid` int(6) NOT NULL DEFAULT '0',
`gid` int(6) NOT NULL DEFAULT '0',
PRIMARY KEY (`rid`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/rnaInGene" into table human_rnaInGene ' ncrna
hgsql -e 'drop TABLE `human_src`' ncrna
hgsql -e 'CREATE TABLE `human_src` (
`id` int(6) NOT NULL DEFAULT '0' ,
`src` int(2) NOT NULL DEFAULT '0',
`oId` char(25) NOT NULL DEFAULT '0'
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/src" into table human_src ' ncrna
hgsql -e 'CREATE TABLE `human_gene_class` (
`id` int(6) NOT NULL DEFAULT '0' ,
`oId` char(4) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/gene_class" into table human_gene_class ' ncrna
hgsql -e 'CREATE TABLE `human_generalInfo`(
`id` int(6) NOT NULL DEFAULT '0',
`chr` char(20) DEFAULT NULL,
`bedStart` int(10) DEFAULT NULL,
`bedEnd` int(10) DEFAULT NULL,
`strand` char(1) DEFAULT NULL,
`exonCount` int(3) DEFAULT NULL,
`exonLen` text,
`exonStart` text,
`length` int(6) DEFAULT NULL,
`cnci` decimal(15,7) DEFAULT NULL,
PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/generalInfo" into table human_generalInfo ' ncrna
hgsql -e 'drop TABLE `human_seq`; ' ncrna
hgsql -e 'CREATE TABLE `human_seq` (
`id` int(10) NOT NULL,
`seq` mediumtext,
PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/seq" into table human_seq ' ncrna
hgsql -e 'CREATE TABLE `human_srcNames` (
`id` int(10) NOT NULL,
`srcName` char(30)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/srcNames" into table human_srcNames ' ncrna
$sql = "SELECT * from ".$org_prefix[$org]."_src, where id = $id ;";
| Field | Type | Null | Key | Default | Extra |
+-------+----------+------+-----+---------+-------+
| id | int(6) | NO | | 0 | |
| src | int(2) | NO | | 0 | |
| oId | char(25) | NO | | 0 | |
+-------+----------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+---------+----------+------+-----+---------+-------+
| id | int(10) | NO | | NULL | |
| srcName | char(30) | YES | | NULL | |
+---------+----------+------+-----+---------+-------
srcName_id
cat expr/fpkm.loci | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | awk '{ print $3 "\t" $2;}' | sort -nk 1 > db/expr_profile
#head expr/fpkm.rna | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | awk '{ print $3 "\t" $2;}' | sort -nk 1 > db/expr_profile
#head expr/fpkm.rna | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1
#head expr/fpkm.rna | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t". $t[1] . "\n";' | perl $CMD/leftJoin.pl
cat expr/fpkm.rna | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t". $t[1] . "\n";' > expr.rna
perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.disp_contain 2 expr.rna 1 | cut -f 1,4 > expr.rna.2
cat expr.rna.2 >> expr.rna
cat expr.rna | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/rmByKey.pl - bed.v4.lncrna 4 | awk '{print $4"\t0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0"}' > expr.rna.3
cat expr.rna.3 >> expr.rna
cat expr.rna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{ print $3 "\t" $2;}' | sort -nk 1 > db/expr_profile.rna
cat func.xloc | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | perl -ne '@t = split(/\t/,$_); print $t[@t - 2]."\t"; for($i = 1; $i < (@t - 4); $i ++){$t[$i] =~ /(GO:\d+)\(/; print $1 . ",";} $t[$i] =~ /(GO:\d+)\(/; print $1 . "\n" ;' > db/function
cp mm9/db/expr_profile* mm9/db/function expr_func/mouse/
scp expr_func.tgz root@112.124.13.146:/data/tmp/
adipose,adrenal,brain,brain_R,breast,colon,foreskin,heart,hela_R,HLF_1,HLF_2,kidney,liver,liver_R,lung,lymphNode,ovary,placenta_R,prostate,skeltalMuscle,testes,testes_R,thyroid,whiteBloodCell
heart,hippocampus,liver,lung,spleen,thymus
scp fpkm_imgs/* root@112.124.13.146:/data/var/www/html/NONCODE/images/HSAG/fpkm_imgs
+----+-----+-----------------+
| id | src | oId |
+----+-----+-----------------+
| 1 | 9 | ENST00000456328 |
| 1 | 10 | NR_046018 |
| 2 | 9 | ENST00000515242 |
| 3 | 9 | ENST00000518655 |
| 4 | 9 | ENST00000450305 |
| 5 | 11 | n345739 |
| 6 | 3 | brain_Pred9 |
| 7 | 3 | brain_Pred15 |
| 8 | 11 | n385920 |
| 9 | 11 | n345943 |
+----+-----+-----------------+
Chicken=chicken
D.Melanogaster=dm
A.Thaliana=tair
Cow=cow
C.Elegans=celegans
Zebrafish=zebrafish
Human=human
Mouse=mouse
Yeast=yeast
for org in Chicken D.Melanogaster A.Thaliana Cow C.Elegans Zebrafish Human Mouse Yeast
do
echo "select a.id, a.oId, '$org' from ${!org}_src as a left join ${!org}_srcNames as b on (a.src = b.id) where b.srcName = 'ensembl' into outfile '/data/tmp/ensembl.${!org}';"
echo "select a.id, a.oId, '$org' from ${!org}_src as a left join ${!org}_srcNames as b on (a.src = b.id) where b.srcName = 'refseq' into outfile '/data/tmp/refseq.${!org}';"
done
cat id.v4_xloc | cut -f 2 | perl -pe 's/XLOC_0*//' | perl -ne 'chomp; $file1 = "expr/fpkm_imgs/".$_ . ".png"; $file2 = "expr/fpkm_imgs/O".$. .".png"; `mv $file1 $file2`; '
cat expr/fpkm.loci | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | perl -ne '@t = split(/\t/,$_); print $t[@t - 2] . "\t"; for($i = 1; $i < (@t - 3); $i ++){ print $t[$i] . "\t";} print $t[$i] . "\n" ; ' > expr/fpkm.loci.v4
cd expr
scp fpkm.loci.v4 header root@112.124.13.146:/data/var/www/html/NONCODE/images/MMUG
cat expr/fpkm.rna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | perl -ne '@t = split(/\t/,$_); print $t[@t - 2] . "\t"; for($i = 1; $i < (@t - 3); $i ++){ print $t[$i] . "\t";} print $t[$i] . "\n" ; ' > expr/fpkm.rna.v4
cd expr
scp fpkm.rna.v4 header root@112.124.13.146:/data/var/www/html/NONCODE/images/MMUT
cd /leofs/noncode/NONCODEv4/mm9/expr
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cat $org/db/seq | perl -ne '@t = split(/\t/,$_); %orgId = ( "hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); print ">NON" . $orgId{'$org'}. "T" . substr("000000", 0, 6 - length($t[0]) ) . $t[0] . "\n" . $t[1]; ' >> fa.v4
done
formatdb -p F -n noncodev4 -i fa.v4 -t "ncRNAs in NONCODEv4"
awk "NR>=223409 " fa.v4 | head
grep 139878 id.v4_tcons
/data/var/www/html/NONCODE/images/HSAG
head -2 v4.stats | tail -1 | perl -ne '@t= split(/\s/, $_); chomp @t; for($i = 6; $i < @t; $i ++){ print ($i - 5). $t[$i] . "\n";} '
for org in bosTau6 galGal3 tair10 dm3 ce10 danRer7 sacCer3
do
cd $org
head -2 v4.stats | tail -1 | awk '{ for (i = 7; i < NF; i++){ print $i;}}' > srcNames
cat bed.v4.ncrna | cut -f 4 | perl $CMD/leftJoin.pl - 1 v4.tracking 1| perl -ne '@t = split(/\t/, $_); chomp @t; $tid = $t[0]; for($i = 5; $i < (@t - 1); $i ++){ if($t[$i] ne "-"){ @oIds = split(/,/,$t[$i]); for($j = 0; $j < @oIds; $j ++){ $oIds[$j] =~ /.*?\|(.*?)\|/; $oId1 = $1; $oId1 =~ s/^\d{8}_//; print $tid . "\t" . ($i - 4) . "\t" . $oId1 . "\n";} } }' > source
cd ..
done
ln -s ds srcNames
cat bed.v4.ncrna | cut -f 4 | perl $CMD/leftJoin.pl - 1 v4.2.tracking 1| perl -ne '@t = split(/\t/, $_); chomp @t; $tid = $t[0]; for($i = 5; $i < @t ; $i ++){ if($t[$i] ne "-"){ @oIds = split(/,/,$t[$i]); for($j = 0; $j < @oIds; $j ++){ $oIds[$j] =~ /.*?\|(.*?)\|/; $oId1 = $1; $oId1 =~ s/^\d{8}_//; print $tid . "\t" . ($i - 4) . "\t" . $oId1 . "\n";} } }' > source
cat bed.v4.ncrna | cut -f 4 | perl $CMD/leftJoin.pl - 1 v4.tracking 1| perl -ne '@t = split(/\t/, $_); chomp @t; $tid = $t[0]; for($i = 5; $i < @t ; $i ++){ if($t[$i] ne "-"){ @oIds = split(/,/,$t[$i]); for($j = 0; $j < @oIds; $j ++){ $oIds[$j] =~ /.*?\|(.*?)\|/; $oId1 = $1; $oId1 =~ s/^\d{8}_//; print $tid . "\t" . ($i - 4) . "\t" . $oId1 . "\n";} } }' > source
grep n343060 hglnc_loci2.tracking
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat bed.v4.ncrna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_tcons 2 | perl -ne ' @t = split(/\t/,$_); $org="'$org'"; %ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); $nId = "NON".$ORGID{$org}."T" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" . $t[1] . "\t" .$t[2] . "\t" . $nId . "\t" .$t[4] . "\t" . $t[5] . "\t". $t[6] . "\t" . $t[7] . "\t". $t[8] . "\t" . $t[9] . "\t". $t[10] . "\t" . $t[11] . "\n" ; ' > db/rna.bed
cat loci.combined.gtf.loci.bed | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_xloc 2 | perl -ne ' @t = split(/\t/,$_); $org="'$org'"; %ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); $nId = "NON".$ORGID{$org}."G" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" . $t[1] . "\t" .$t[2] . "\t" . $nId . "\t" .$t[4] . "\t" . $t[5] . "\t". $t[6] . "\t" . $t[7] . "\t". $t[8] . "\t" . $t[9] . "\t". $t[10] . "\t" . $t[11] . "\n" ; ' > db/gene.bed
cd ..
done
ce10 sacCer3 galGal3
for((i=0; i < 60 ; i ++))do rm fa.$i bed.v4.ncrna$i; done
cat fa.v4.bed | perl -ne 'chomp ; if(/>(.*)/){ print $1 . "\t"; }else{ print $_ . "\n"; }' > seq.v4
cat bed.v4.ncrna | cut -f 4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 seq.v4 1 | cut -f 2,3 > seq.v4.1;
mv seq.v4.1 seq.v4
head source | perl $CMD/leftJoin.pl - 2 srcNames 1 | awk '$5 == "v3.gtf"{ print $1 "\t" $3;}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 seq.v3 1 | cut -f 1,4 > seq.v4.v3
perl $CMD/leftJoin.pl seq.v4 1 seq.v4.v3 1 | awk '{ if($1 == $3) { print $1 "\t" $4 ; }else{ print $1 "\t" $2 ;}}' > seq.v4.v3.2
mv seq.v4.v3.2 seq.v4
cat srcNames | awk '{ print NR "\t" $0; }' > srcNames.2; mv srcNames.2 srcNames;
wc -l */seq.v4.v3
head */seq.v4
bash $CMD/combinedGtfToLociBed.sh loci.combined.gtf
cat bed.v4.ncrna | awk '{ print NR "\t" $4;}' > id.v4_tcons
cat loci.combined.gtf.loci.bed | awk '{ print NR "\t" $4;}' > id.v4_xloc
mkdir db
head bed.v4.ncrna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_tcons 2 | awk '{ print $ }'
head NcFuncAnno.result.xcy | perl -ne 'chomp; if(/XLOC/){ print $_ . "\t"; }else{ print $_ . "\n"; }' | perl -ne 'chomp; $_ =~ s/\tGO:\d+\(0\)//g;print $_ . "\n" ';
cat bed.v4.lncrna | cut -f 10 | sort -n | uniq -c | less
cat bed.v4.lncrna | cut -f 10 | awk '$1 > 5 && $1 < 11' | wc -l
cat bed.v4.lncrna | cut -f 10 | awk '$1 > 10' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 < 200' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 200 && $2 < 1000' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 1000 && $2 < 2500' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 2500 && $2 < 5000' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 5000 && $2 < 10000' | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 10000 ' | wc -l
cat NcFuncAnno.result.xcy | perl -ne 'chomp; if(/XLOC/){ print $_ . "\t"; }else{ print $_ . "\n"; }' | perl -ne 'chomp; $_ =~ s/\tGO:\d+\(0\)//g;print $_ . "\n" ' | awk '$2' > func.xloc
danRer7 sacCer3 tair10 bosTau6 hg19 mm9
cat bed.v4.ncrna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_tcons 2 | perl -ne ' @t = split(/\t/,$_); $org="'mm9'"; print $org . "\n";%ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); $nId = "NON".$ORGID{$org}."T" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" . $t[1] . "\t" .$t[2] . "\t" . $nId . "\t" .$t[4] . "\t" . $t[5] . "\t". $t[6] . "\t" . $t[7] . "\t". $t[8] . "\t" . $t[9] . "\t". $t[10] . "\t" . $t[11] . "\n" ; ' > db/rna.bed
cat loci.combined.gtf.loci.bed | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_xloc 2 | perl -ne ' @t = split(/\t/,$_); $org="'mm9'"; print $org . "\n";%ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); $nId = "NON".$ORGID{$org}."G" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" . $t[1] . "\t" .$t[2] . "\t" . $nId . "\t" .$t[4] . "\t" . $t[5] . "\t". $t[6] . "\t" . $t[7] . "\t". $t[8] . "\t" . $t[9] . "\t". $t[10] . "\t" . $t[11] . "\n" ; ' > db/gene.bed
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
for org in tair10 bosTau6 hg19 mm9
do
cd $org;
cat db/rna.bed | getBedSeqLength.sh - | paste db/rna.bed - | awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14}' | perl $CMD/leftJoin.pl - 1 id.v4_tcons 1 | perl $CMD/leftJoin.pl - 11 cnci.v4 1 | cut -f 1,2,3,4,5,6,7,8,9,13 > db/generalInfo
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3
do
cd $org;
cat db/rna.bed | getBedSeqLength.sh - | paste db/rna.bed - | awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14 "\t0" }' > db/generalInfo
cd ..
done
cat v4.fa | perl -ne 'chomp ; if(/>(.*)/){ print $1 . "\t"; }else{ print $_ . "\n"; }' > seq.v4
cat db/rna.bed | getBedSeqLength.sh - | paste db/rna.bed - | head | awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14}' | perl $CMD/leftJoin.pl - 1 id.v4_tcons 1 | perl $CMD/leftJoin.pl - 11 cnci.v4 1 | cut -f 1,2,3,4,5,6,7,8,9,13
cat db/rna.bed | getBedSeqLength.sh - | paste db/rna.bed - | awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14}' | perl $CMD/leftJoin.pl - 1 id.v4_tcons 1 | perl $CMD/leftJoin.pl - 11 cnci.v4 1 | cut -f 1,2,3,4,5,6,7,8,9,13 > generalInfo
cat seq.v4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.v4_tcons 2 - 1 | awk '{print $1 "\t" $4;}' > db/seq
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat seq.v4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.v4_tcons 2 - 1 | awk '{print $1 "\t" $4;}' > db/seq
cd ..
done
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{ print $4 "\t" $2 "\t" $3;}' > db/src
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{ print $4 "\t" $2 "\t" $3;}' > db/src
cp srcNames db/srcNames
cd ..
done
cat loci.combined.gtf | gtf2Bed.pl - | cut -f 4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_); $t[5] =~ /.*?\|(.*?)\|/; print $1 . "\n";' | perl $CMD/rmByKey.pl - bed.v4.lncrna 4 | bedToGtf.sh - > gtf.lnc.disp
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp -o disp
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | wc -l
######cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; '
cat gtf.lnc.disp | gtf2Bed.pl - > 1
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/rmByKey.pl - 1 4 | bedToGtf.sh - > gtf.lnc.disp2
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp2 -o disp2
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' > id_disp_contain
cat disp2.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' >> id_disp_contain
mv id_disp_contain id.disp_contain
#head loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";'
#gene_id "XLOC_000001"; transcript_id "TCONS_00000001";
cat loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";' | uniq | perl -pe 's/gene_id "(.*?)"; transcript_id "(.*?)";/$2\t$1/' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t" . $t[1]. "\n";' > rnaInGene
perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.disp_contain 2 rnaInGene 1 | cut -f 1,4 >> rnaInGene
cut -f 2 rnaInGene | sort | uniq -c | awk '{print $1;}' | sort -n | uniq -c | less
cut -f 2 rnaInGene | sort | uniq -c | awk '{print $1;}' | sort -n | uniq -c | wc -l
cut -f 2 rnaInGene | sort | uniq -c | awk '{print $1;}' | sort -n | uniq -c | cut -f 1 | tail -44 | perl /leofs/noncode/NONCODEv4/cmds/sum.pl -
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1 | grep ensembl | wc -l
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1 | grep ensembl | wc -l
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1 | grep refseq | wc -l
cd ..
done | perl $CMD/sum.pl -
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1 | grep -v refseq | grep -v ensembl | grep -v v3 | wc -l
cd ..
done | perl $CMD/sum.pl -
Ensembl:141194
Refseq:35445
Literature:118148
for org in galGal3 dm3 ce10 sacCer3 tair10 bosTau6
do
cd $org;
cat loci.combined.gtf | gtf2Bed.pl - | cut -f 4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_); $t[5] =~ /.*?\|(.*?)\|/; print $1 . "\n";' | perl $CMD/rmByKey.pl - bed.v4.lncrna 4 | bedToGtf.sh - > gtf.lnc.disp
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp -o disp
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | wc -l
######cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; '
cat gtf.lnc.disp | gtf2Bed.pl - > 1
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/rmByKey.pl - 1 4 | bedToGtf.sh - > gtf.lnc.disp2
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp2 -o disp2
cat disp.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' > id_disp_contain
cat disp2.tracking | perl -ne '@t = split(/\t/,$_); $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n"; ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' >> id_disp_contain
mv id_disp_contain id.disp_contain
#head loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";'
#gene_id "XLOC_000001"; transcript_id "TCONS_00000001";
cat loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";' | uniq | perl -pe 's/gene_id "(.*?)"; transcript_id "(.*?)";/$2\t$1/' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t" . $t[1]. "\n";' > rnaInGene
perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.disp_contain 2 rnaInGene 1 | cut -f 1,4 >> rnaInGene
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat rnaInGene | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{print $3 "\t" $2}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 id.v4_xloc 2 | awk '{print $1 "\t" $3}' > db/rnaInGene
cd ..
done
head loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" '{ if($2){print 1;}else{print 0;} if($3){print 1;}else{print 0;} if($3){print 1;}else{print 0;} if($4){print 1;}else{print 0;} }'
head loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" 'BEGIN{tmp="";}{ if($2){tmp=$tmp"1";}else{print 0;} if($3){print 1;}else{print 0;} if($3){print 1;}else{print 0;} if($4){print 1;}else{print 0;} }END{print $tmp}'
cat loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" '{ tmp=""; if($2){tmp=tmp"1";}else{tmp=tmp"0";} if($3){tmp=tmp"1";}else{tmp=tmp"0";} if($4){tmp=tmp"1";}else{tmp=tmp"0";} if($5){tmp=tmp"1";}else{tmp=tmp"0"; } print $1"\t"tmp;}'
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" '{ tmp=""; if($2){tmp=tmp"1";}else{tmp=tmp"0";} if($3){tmp=tmp"1";}else{tmp=tmp"0";} if($4){tmp=tmp"1";}else{tmp=tmp"0";} if($5){tmp=tmp"1";}else{tmp=tmp"0"; } print $1"\t"tmp;}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | awk '{ print $3 "\t" $2 ;}' | sort -nk 1 > db/gene_class
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
cat db/gene.bed | getBedSeqLength.sh - | paste db/gene.bed - | awk '{ print NR "\t" $1 "\t" $2 "\t" $3 "\t" $14 "\t" $6 }' >2
cat db/rnaInGene | cut -f 2 | sort -n | uniq -c | awk '{ print $2 "\t" $1;}' | perl $CMD/leftJoin.pl 2 1 - 1 | cut -f 1,2,3,4,5,6,8 > db/gene_info
cd ..
done
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cd $org;
#cat db/rna.bed | getBedSeqLength.sh - | awk '$2 > 200 { print $1;}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 db/rna.bed 4 | cut -f 2,3,4,5,6,7,8,9,10,11,12,13 > lncRNA.bed
mv lncRNA.bed db
cd ..
done
tar cvzf v4.tgz bosTau6/db ce10/db danRer7/db dm3/db galGal3/db hg19/db mm9/db sacCer3/db tair10/db
/data/tmp/v4/bosTau6/db
hgsql -e 'show tables;' ncrna
hgsql -e 'show create table HSAG;' ncrna
hgsql -e 'CREATE TABLE `human_gene` (
`id` int(6) NOT NULL DEFAULT '0',
`chr` char(20) DEFAULT NULL,
`start` int(10) DEFAULT NULL,
`end` int(10) DEFAULT NULL,
`length` int(7) DEFAULT NULL,
`strand` char(1) DEFAULT NULL,
`transcriptCount` int(2) DEFAULT NULL,
PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/gene_info" into table human_gene ' ncrna
hgsql -e 'CREATE TABLE `human_rnaInGene` (
`rid` int(6) NOT NULL DEFAULT '0',
`gid` int(6) NOT NULL DEFAULT '0',
PRIMARY KEY (`rid`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/rnaInGene" into table human_rnaInGene ' ncrna
hgsql -e 'drop TABLE `human_src`' ncrna
hgsql -e 'CREATE TABLE `human_src` (
`id` int(6) NOT NULL DEFAULT '0' ,
`src` int(2) NOT NULL DEFAULT '0',
`oId` char(25) NOT NULL DEFAULT '0'
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/src" into table human_src ' ncrna
hgsql -e 'CREATE TABLE `human_gene_class` (
`id` int(6) NOT NULL DEFAULT '0' ,
`oId` char(4) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/gene_class" into table human_gene_class ' ncrna
hgsql -e 'CREATE TABLE `human_generalInfo`(
`id` int(6) NOT NULL DEFAULT '0',
`chr` char(20) DEFAULT NULL,
`bedStart` int(10) DEFAULT NULL,
`bedEnd` int(10) DEFAULT NULL,
`strand` char(1) DEFAULT NULL,
`exonCount` int(3) DEFAULT NULL,
`exonLen` text,
`exonStart` text,
`length` int(6) DEFAULT NULL,
`cnci` decimal(15,7) DEFAULT NULL,
PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/generalInfo" into table human_generalInfo ' ncrna
hgsql -e 'drop TABLE `human_seq`; ' ncrna
hgsql -e 'CREATE TABLE `human_seq` (
`id` int(10) NOT NULL,
`seq` mediumtext,
PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/seq" into table human_seq ' ncrna
hgsql -e 'CREATE TABLE `human_srcNames` (
`id` int(10) NOT NULL,
`srcName` char(30)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/srcNames" into table human_srcNames ' ncrna
$sql = "SELECT * from ".$org_prefix[$org]."_src, where id = $id ;";
| Field | Type | Null | Key | Default | Extra |
+-------+----------+------+-----+---------+-------+
| id | int(6) | NO | | 0 | |
| src | int(2) | NO | | 0 | |
| oId | char(25) | NO | | 0 | |
+-------+----------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+---------+----------+------+-----+---------+-------+
| id | int(10) | NO | | NULL | |
| srcName | char(30) | YES | | NULL | |
+---------+----------+------+-----+---------+-------
srcName_id
cat expr/fpkm.loci | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | awk '{ print $3 "\t" $2;}' | sort -nk 1 > db/expr_profile
#head expr/fpkm.rna | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | awk '{ print $3 "\t" $2;}' | sort -nk 1 > db/expr_profile
#head expr/fpkm.rna | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1
#head expr/fpkm.rna | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t". $t[1] . "\n";' | perl $CMD/leftJoin.pl
cat expr/fpkm.rna | perl -ne '@t = split(/\t/,$_); print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t". $t[1] . "\n";' > expr.rna
perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.disp_contain 2 expr.rna 1 | cut -f 1,4 > expr.rna.2
cat expr.rna.2 >> expr.rna
cat expr.rna | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/rmByKey.pl - bed.v4.lncrna 4 | awk '{print $4"\t0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0"}' > expr.rna.3
cat expr.rna.3 >> expr.rna
cat expr.rna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{ print $3 "\t" $2;}' | sort -nk 1 > db/expr_profile.rna
cat func.xloc | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | perl -ne '@t = split(/\t/,$_); print $t[@t - 2]."\t"; for($i = 1; $i < (@t - 4); $i ++){$t[$i] =~ /(GO:\d+)\(/; print $1 . ",";} $t[$i] =~ /(GO:\d+)\(/; print $1 . "\n" ;' > db/function
cp mm9/db/expr_profile* mm9/db/function expr_func/mouse/
scp expr_func.tgz root@112.124.13.146:/data/tmp/
adipose,adrenal,brain,brain_R,breast,colon,foreskin,heart,hela_R,HLF_1,HLF_2,kidney,liver,liver_R,lung,lymphNode,ovary,placenta_R,prostate,skeltalMuscle,testes,testes_R,thyroid,whiteBloodCell
heart,hippocampus,liver,lung,spleen,thymus
scp fpkm_imgs/* root@112.124.13.146:/data/var/www/html/NONCODE/images/HSAG/fpkm_imgs
+----+-----+-----------------+
| id | src | oId |
+----+-----+-----------------+
| 1 | 9 | ENST00000456328 |
| 1 | 10 | NR_046018 |
| 2 | 9 | ENST00000515242 |
| 3 | 9 | ENST00000518655 |
| 4 | 9 | ENST00000450305 |
| 5 | 11 | n345739 |
| 6 | 3 | brain_Pred9 |
| 7 | 3 | brain_Pred15 |
| 8 | 11 | n385920 |
| 9 | 11 | n345943 |
+----+-----+-----------------+
Chicken=chicken
D.Melanogaster=dm
A.Thaliana=tair
Cow=cow
C.Elegans=celegans
Zebrafish=zebrafish
Human=human
Mouse=mouse
Yeast=yeast
for org in Chicken D.Melanogaster A.Thaliana Cow C.Elegans Zebrafish Human Mouse Yeast
do
echo "select a.id, a.oId, '$org' from ${!org}_src as a left join ${!org}_srcNames as b on (a.src = b.id) where b.srcName = 'ensembl' into outfile '/data/tmp/ensembl.${!org}';"
echo "select a.id, a.oId, '$org' from ${!org}_src as a left join ${!org}_srcNames as b on (a.src = b.id) where b.srcName = 'refseq' into outfile '/data/tmp/refseq.${!org}';"
done
cat id.v4_xloc | cut -f 2 | perl -pe 's/XLOC_0*//' | perl -ne 'chomp; $file1 = "expr/fpkm_imgs/".$_ . ".png"; $file2 = "expr/fpkm_imgs/O".$. .".png"; `mv $file1 $file2`; '
cat expr/fpkm.loci | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | perl -ne '@t = split(/\t/,$_); print $t[@t - 2] . "\t"; for($i = 1; $i < (@t - 3); $i ++){ print $t[$i] . "\t";} print $t[$i] . "\n" ; ' > expr/fpkm.loci.v4
cd expr
scp fpkm.loci.v4 header root@112.124.13.146:/data/var/www/html/NONCODE/images/MMUG
cat expr/fpkm.rna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | perl -ne '@t = split(/\t/,$_); print $t[@t - 2] . "\t"; for($i = 1; $i < (@t - 3); $i ++){ print $t[$i] . "\t";} print $t[$i] . "\n" ; ' > expr/fpkm.rna.v4
cd expr
scp fpkm.rna.v4 header root@112.124.13.146:/data/var/www/html/NONCODE/images/MMUT
cd /leofs/noncode/NONCODEv4/mm9/expr
for org in galGal3 dm3 ce10 danRer7 sacCer3 tair10 bosTau6 hg19 mm9
do
cat $org/db/seq | perl -ne '@t = split(/\t/,$_); %orgId = ( "hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); print ">NON" . $orgId{'$org'}. "T" . substr("000000", 0, 6 - length($t[0]) ) . $t[0] . "\n" . $t[1]; ' >> fa.v4
done
formatdb -p F -n noncodev4 -i fa.v4 -t "ncRNAs in NONCODEv4"
awk "NR>=223409 " fa.v4 | head
grep 139878 id.v4_tcons
/data/var/www/html/NONCODE/images/HSAG