NONCODE 入库

head bed.v4.ncrna 


head -2 v4.stats | tail -1 | perl -ne '@t= split(/\s/, $_); chomp @t; for($i = 6; $i < @t; $i ++){ print ($i - 5). $t[$i] . "\n";} '




for org in bosTau6 galGal3 tair10 dm3 ce10 danRer7 sacCer3
do
cd $org
head -2 v4.stats | tail -1 | awk '{ for (i = 7; i < NF; i++){ print $i;}}' > srcNames


cat bed.v4.ncrna  | cut -f 4 | perl $CMD/leftJoin.pl - 1 v4.tracking 1| perl -ne '@t = split(/\t/, $_); chomp @t; $tid = $t[0]; for($i = 5; $i < (@t - 1); $i ++){  if($t[$i] ne "-"){ @oIds = split(/,/,$t[$i]); for($j = 0; $j < @oIds; $j ++){ $oIds[$j] =~ /.*?\|(.*?)\|/;   $oId1 = $1; $oId1 =~ s/^\d{8}_//; print $tid . "\t" . ($i - 4) . "\t" . $oId1 . "\n";} } }' > source
cd ..
done


ln -s ds srcNames




cat  bed.v4.ncrna  | cut -f 4 | perl $CMD/leftJoin.pl - 1 v4.2.tracking 1| perl -ne '@t = split(/\t/, $_); chomp @t; $tid = $t[0]; for($i = 5; $i < @t ; $i ++){  if($t[$i] ne "-"){ @oIds = split(/,/,$t[$i]); for($j = 0; $j < @oIds; $j ++){ $oIds[$j] =~ /.*?\|(.*?)\|/;   $oId1 = $1; $oId1 =~ s/^\d{8}_//; print $tid . "\t" . ($i - 4) . "\t" . $oId1 . "\n";} } }' > source


cat bed.v4.ncrna  | cut -f 4 | perl $CMD/leftJoin.pl - 1 v4.tracking 1| perl -ne '@t = split(/\t/, $_); chomp @t; $tid = $t[0]; for($i = 5; $i < @t ; $i ++){  if($t[$i] ne "-"){ @oIds = split(/,/,$t[$i]); for($j = 0; $j < @oIds; $j ++){ $oIds[$j] =~ /.*?\|(.*?)\|/;   $oId1 = $1; $oId1 =~ s/^\d{8}_//; print $tid . "\t" . ($i - 4) . "\t" . $oId1 . "\n";} } }' > source






grep n343060 hglnc_loci2.tracking 






for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9
do
cd $org;
cat   bed.v4.ncrna |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_tcons 2 | perl -ne ' @t = split(/\t/,$_); $org="'$org'"; %ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" );   $nId = "NON".$ORGID{$org}."T" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" .  $t[1] . "\t" .$t[2] . "\t" .  $nId . "\t" .$t[4] . "\t" .  $t[5] . "\t". $t[6] . "\t" .  $t[7] . "\t".  $t[8] . "\t" .  $t[9] . "\t". $t[10] . "\t" .  $t[11] . "\n" ; ' > db/rna.bed
cat loci.combined.gtf.loci.bed  |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_xloc  2 | perl -ne ' @t = split(/\t/,$_); $org="'$org'";  %ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" );   $nId = "NON".$ORGID{$org}."G" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" .  $t[1] . "\t" .$t[2] . "\t" .  $nId . "\t" .$t[4] . "\t" .  $t[5] . "\t". $t[6] . "\t" .  $t[7] . "\t".  $t[8] . "\t" .  $t[9] . "\t". $t[10] . "\t" .  $t[11] . "\n" ; ' > db/gene.bed


cd ..
done




ce10 sacCer3 galGal3 


for((i=0; i < 60 ; i ++))do rm  fa.$i bed.v4.ncrna$i; done


cat fa.v4.bed | perl -ne 'chomp ; if(/>(.*)/){ print $1 . "\t"; }else{ print $_ . "\n";  }' > seq.v4


cat  bed.v4.ncrna | cut -f 4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 seq.v4 1 | cut -f 2,3  > seq.v4.1; 
mv seq.v4.1 seq.v4


head source | perl $CMD/leftJoin.pl - 2  srcNames 1  |  awk '$5 == "v3.gtf"{ print $1 "\t"  $3;}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 seq.v3 1  | cut -f 1,4  > seq.v4.v3


perl $CMD/leftJoin.pl  seq.v4 1 seq.v4.v3 1 | awk '{ if($1 == $3) { print $1 "\t" $4 ; }else{ print $1 "\t" $2 ;}}' >  seq.v4.v3.2
mv seq.v4.v3.2  seq.v4


cat srcNames  | awk '{ print NR "\t" $0; }' > srcNames.2; mv srcNames.2 srcNames;


wc -l  */seq.v4.v3


head  */seq.v4




bash $CMD/combinedGtfToLociBed.sh loci.combined.gtf 
 


cat bed.v4.ncrna | awk '{ print NR "\t" $4;}'  > id.v4_tcons


cat loci.combined.gtf.loci.bed | awk '{ print NR "\t" $4;}'  > id.v4_xloc


mkdir db


head bed.v4.ncrna |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_tcons 2 | awk '{ print $ }'








head NcFuncAnno.result.xcy | perl -ne 'chomp; if(/XLOC/){ print $_ . "\t"; }else{ print $_ . "\n"; }' | perl -ne 'chomp; $_ =~ s/\tGO:\d+\(0\)//g;print $_ . "\n" ';




cat bed.v4.lncrna | cut -f 10 | sort -n | uniq -c  | less
cat bed.v4.lncrna | cut -f 10  | awk '$1 > 5 && $1 < 11' | wc -l 
cat bed.v4.lncrna | cut -f 10  | awk '$1 > 10' | wc -l 








cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 < 200'  | wc -l  
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 200 && $2 < 1000'  | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 1000 && $2 < 2500'  | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 2500 && $2 < 5000'  | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 5000 && $2 < 10000'  | wc -l
cat bed.v4.lncrna | getBedSeqLength.sh - | awk '$2 >= 10000 '  | wc -l


cat NcFuncAnno.result.xcy | perl -ne 'chomp; if(/XLOC/){ print $_ . "\t"; }else{ print $_ . "\n"; }' | perl -ne 'chomp; $_ =~ s/\tGO:\d+\(0\)//g;print $_ . "\n" ' |  awk '$2'  > func.xloc




danRer7 sacCer3 tair10 bosTau6  hg19 mm9
cat   bed.v4.ncrna |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_tcons 2 | perl -ne ' @t = split(/\t/,$_); $org="'mm9'";  print $org . "\n";%ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" );   $nId = "NON".$ORGID{$org}."T" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" .  $t[1] . "\t" .$t[2] . "\t" .  $nId . "\t" .$t[4] . "\t" .  $t[5] . "\t". $t[6] . "\t" .  $t[7] . "\t".  $t[8] . "\t" .  $t[9] . "\t". $t[10] . "\t" .  $t[11] . "\n" ; ' > db/rna.bed


cat loci.combined.gtf.loci.bed  |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 4 id.v4_xloc  2 | perl -ne ' @t = split(/\t/,$_); $org="'mm9'";  print $org . "\n";%ORGID = ("hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" );   $nId = "NON".$ORGID{$org}."G" . substr("000000", 0, 6 - length($t[12]) ) .$t[12]; print $t[0] . "\t" .  $t[1] . "\t" .$t[2] . "\t" .  $nId . "\t" .$t[4] . "\t" .  $t[5] . "\t". $t[6] . "\t" .  $t[7] . "\t".  $t[8] . "\t" .  $t[9] . "\t". $t[10] . "\t" .  $t[11] . "\n" ; ' > db/gene.bed
for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9
for org in   tair10 bosTau6  hg19 mm9
do
cd $org;
cat db/rna.bed | getBedSeqLength.sh  - | paste db/rna.bed - |  awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14}' |  perl $CMD/leftJoin.pl - 1 id.v4_tcons 1  |  perl $CMD/leftJoin.pl -  11   cnci.v4  1 | cut -f 1,2,3,4,5,6,7,8,9,13  > db/generalInfo


cd ..
done




for org in  galGal3  dm3 ce10 danRer7 sacCer3 
do
cd $org;
cat db/rna.bed | getBedSeqLength.sh  - | paste db/rna.bed - |  awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14 "\t0" }'    > db/generalInfo


cd ..
done


cat v4.fa | perl -ne 'chomp ; if(/>(.*)/){ print $1 . "\t"; }else{ print $_ . "\n";  }' > seq.v4


cat db/rna.bed | getBedSeqLength.sh  - | paste db/rna.bed - | head | awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14}' |  perl $CMD/leftJoin.pl - 1 id.v4_tcons 1  |  perl $CMD/leftJoin.pl -  11   cnci.v4  1 | cut -f 1,2,3,4,5,6,7,8,9,13   


 cat db/rna.bed | getBedSeqLength.sh  - | paste db/rna.bed - |  awk '{print NR "\t" $1 "\t" $2 "\t" $3 "\t" $6 "\t" $10 "\t" $11 "\t" $12 "\t" $14}' |  perl $CMD/leftJoin.pl - 1 id.v4_tcons 1  |  perl $CMD/leftJoin.pl -  11   cnci.v4  1 | cut -f 1,2,3,4,5,6,7,8,9,13  > generalInfo


cat seq.v4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl  id.v4_tcons 2 - 1 | awk '{print $1 "\t" $4;}' > db/seq


for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do
cd $org;
cat seq.v4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl  id.v4_tcons 2 - 1 | awk '{print $1 "\t" $4;}' > db/seq
cd ..
done


cat source  | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{ print $4 "\t" $2 "\t" $3;}' > db/src


for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do
cd $org;
cat source  | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{ print $4 "\t" $2 "\t" $3;}' > db/src
cp srcNames db/srcNames 
cd ..
done


cat loci.combined.gtf | gtf2Bed.pl - | cut -f 4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_);  $t[5] =~ /.*?\|(.*?)\|/; print $1 . "\n";' | perl $CMD/rmByKey.pl - bed.v4.lncrna 4 | bedToGtf.sh - > gtf.lnc.disp
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp -o disp




cat disp.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  ' | wc -l




######cat disp.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  '


cat gtf.lnc.disp | gtf2Bed.pl - > 1 
cat disp.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  ' | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/rmByKey.pl - 1 4 | bedToGtf.sh - > gtf.lnc.disp2
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp2 -o disp2


cat disp.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_);  $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' > id_disp_contain
cat disp2.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_);  $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' >> id_disp_contain
mv id_disp_contain id.disp_contain


#head loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";'


#gene_id "XLOC_000001"; transcript_id "TCONS_00000001";


cat loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";' | uniq  | perl -pe 's/gene_id "(.*?)"; transcript_id "(.*?)";/$2\t$1/' |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_);  $t[6] =~ /.*?\|(.*?)\|/; print  $1 . "\t" . $t[1].  "\n";'    > rnaInGene
perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.disp_contain 2 rnaInGene 1 | cut -f 1,4 >> rnaInGene




cut -f 2 rnaInGene | sort | uniq -c  | awk '{print $1;}' | sort -n | uniq -c | less
cut -f 2 rnaInGene | sort | uniq -c  | awk '{print $1;}' | sort -n | uniq -c | wc -l
cut -f 2 rnaInGene | sort | uniq -c  | awk '{print $1;}' | sort -n | uniq -c | cut -f 1 | tail -44 | perl /leofs/noncode/NONCODEv4/cmds/sum.pl -




cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1  | grep ensembl | wc -l


for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1  | grep ensembl | wc -l
cd ..
done


for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1  | grep refseq | wc -l
cd ..
done | perl $CMD/sum.pl -


for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do
cd $org;
cat source | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 srcNames 1  | grep -v refseq  | grep -v ensembl | grep -v  v3 | wc -l
cd ..
done | perl $CMD/sum.pl -






Ensembl:141194
Refseq:35445
Literature:118148


for org in  galGal3  dm3 ce10 sacCer3 tair10 bosTau6  


do
cd $org;
cat loci.combined.gtf | gtf2Bed.pl - | cut -f 4 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_);  $t[5] =~ /.*?\|(.*?)\|/; print $1 . "\n";' | perl $CMD/rmByKey.pl - bed.v4.lncrna 4 | bedToGtf.sh - > gtf.lnc.disp
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp -o disp




cat disp.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  ' | wc -l




######cat disp.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  '


cat gtf.lnc.disp | gtf2Bed.pl - > 1 
cat disp.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  ' | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/rmByKey.pl - 1 4 | bedToGtf.sh - > gtf.lnc.disp2
cuffcompare -r loci.combined.gtf -C gtf.lnc.disp2 -o disp2


cat disp.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_);  $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' > id_disp_contain
cat disp2.tracking | perl -ne '@t = split(/\t/,$_);  $t[2] =~ /.*?\|(.*)/; $rid = $1; $t[4] =~ /.*?\|(.*?)\|/; $did = $1; print $did . "\t" . $rid . "\n";  ' | perl $CMD/leftJoin.pl - 2 loci.tracking 1 | perl -ne '@t = split(/\t/,$_);  $t[6] =~ /.*?\|(.*?)\|/; $rid = $1; print $t[0]. "\t" . $rid . "\n"; ' >> id_disp_contain
mv id_disp_contain id.disp_contain


#head loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";'


#gene_id "XLOC_000001"; transcript_id "TCONS_00000001";


cat loci.combined.gtf | grep -Po 'gene_id ".*?"; transcript_id ".*?";' | uniq  | perl -pe 's/gene_id "(.*?)"; transcript_id "(.*?)";/$2\t$1/' |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking 1 | perl -ne ' @t = split(/\t/,$_);  $t[6] =~ /.*?\|(.*?)\|/; print  $1 . "\t" . $t[1].  "\n";'    > rnaInGene
perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.disp_contain 2 rnaInGene 1 | cut -f 1,4 >> rnaInGene


cd ..
done




for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do
cd $org;
cat rnaInGene  | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | awk '{print $3 "\t" $2}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 2 id.v4_xloc 2 | awk '{print $1 "\t" $3}' > db/rnaInGene


cd ..
done




head loci.loci  | cut -f 1 |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" '{ if($2){print 1;}else{print 0;}  if($3){print 1;}else{print 0;}   if($3){print 1;}else{print 0;}  if($4){print 1;}else{print 0;} }'


head loci.loci  | cut -f 1 |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" 'BEGIN{tmp="";}{ if($2){tmp=$tmp"1";}else{print 0;}  if($3){print 1;}else{print 0;}   if($3){print 1;}else{print 0;}  if($4){print 1;}else{print 0;} }END{print $tmp}'


cat loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" '{ tmp=""; if($2){tmp=tmp"1";}else{tmp=tmp"0";} if($3){tmp=tmp"1";}else{tmp=tmp"0";} if($4){tmp=tmp"1";}else{tmp=tmp"0";} if($5){tmp=tmp"1";}else{tmp=tmp"0"; } print $1"\t"tmp;}' 


for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do
cd $org;
cat loci.loci | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.a 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.eo 1| perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.io 1 | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.l 1 | awk -F "\t" '{ tmp=""; if($2){tmp=tmp"1";}else{tmp=tmp"0";} if($3){tmp=tmp"1";}else{tmp=tmp"0";} if($4){tmp=tmp"1";}else{tmp=tmp"0";} if($5){tmp=tmp"1";}else{tmp=tmp"0"; } print $1"\t"tmp;}' |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | awk '{  print $3 "\t" $2 ;}' | sort -nk 1 > db/gene_class
cd ..
done


for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do
cd $org;
cat db/gene.bed | getBedSeqLength.sh - | paste db/gene.bed - | awk '{ print NR "\t" $1 "\t" $2 "\t" $3 "\t" $14  "\t" $6  }' >2


cat db/rnaInGene  | cut -f 2  | sort -n | uniq -c  | awk '{ print $2 "\t" $1;}' | perl $CMD/leftJoin.pl 2 1 - 1 | cut -f 1,2,3,4,5,6,8 > db/gene_info


cd ..
done
for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do
cd $org;
#cat db/rna.bed  | getBedSeqLength.sh - | awk '$2 > 200 { print $1;}' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 db/rna.bed 4 | cut -f 2,3,4,5,6,7,8,9,10,11,12,13 > lncRNA.bed
mv lncRNA.bed db
cd ..
done




tar cvzf v4.tgz bosTau6/db ce10/db danRer7/db dm3/db galGal3/db hg19/db mm9/db sacCer3/db tair10/db






/data/tmp/v4/bosTau6/db




hgsql -e 'show tables;' ncrna 


hgsql -e 'show create table HSAG;' ncrna 


hgsql -e 'CREATE TABLE `human_gene` (
  `id` int(6) NOT NULL DEFAULT '0',
  `chr` char(20) DEFAULT NULL,
  `start` int(10) DEFAULT NULL,
  `end` int(10) DEFAULT NULL,
  `length` int(7) DEFAULT NULL,
  `strand` char(1) DEFAULT NULL,
  `transcriptCount` int(2) DEFAULT NULL,
  PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/gene_info" into table human_gene ' ncrna


hgsql -e 'CREATE TABLE `human_rnaInGene` (
  `rid` int(6) NOT NULL DEFAULT '0',
  `gid` int(6) NOT NULL DEFAULT '0',
  PRIMARY KEY (`rid`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/rnaInGene" into table human_rnaInGene ' ncrna


hgsql -e 'drop TABLE `human_src`' ncrna
hgsql -e 'CREATE TABLE `human_src` (
  `id` int(6) NOT NULL DEFAULT '0' ,
  `src` int(2) NOT NULL DEFAULT '0',
  `oId` char(25) NOT NULL DEFAULT '0'
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/src" into table human_src ' ncrna


hgsql -e 'CREATE TABLE `human_gene_class` (
  `id` int(6) NOT NULL DEFAULT '0' ,
   `oId` char(4) NOT NULL DEFAULT '0',
   PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/gene_class" into table human_gene_class ' ncrna


hgsql -e 'CREATE TABLE `human_generalInfo`(
  `id` int(6) NOT NULL DEFAULT '0',
  `chr` char(20) DEFAULT NULL,
  `bedStart` int(10) DEFAULT NULL,
  `bedEnd` int(10) DEFAULT NULL,
  `strand` char(1) DEFAULT NULL,
  `exonCount` int(3) DEFAULT NULL,
  `exonLen` text,
  `exonStart` text,
  `length` int(6) DEFAULT NULL,
  `cnci` decimal(15,7) DEFAULT NULL,
  PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/generalInfo" into table human_generalInfo ' ncrna
hgsql -e 'drop TABLE `human_seq`; ' ncrna
hgsql -e 'CREATE TABLE `human_seq` (
    `id` int(10) NOT NULL,
  `seq` mediumtext,
  PRIMARY KEY (`id`)
);' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/seq" into table human_seq ' ncrna 


hgsql -e 'CREATE TABLE `human_srcNames` (
  `id` int(10) NOT NULL,
  `srcName` char(30) 
  );' ncrna
hgsql -e 'load data infile "/data/tmp/v4/bosTau6/db/srcNames" into table human_srcNames ' ncrna 






$sql = "SELECT * from ".$org_prefix[$org]."_src,  where id = $id ;";


| Field | Type     | Null | Key | Default | Extra |
+-------+----------+------+-----+---------+-------+
| id    | int(6)   | NO   |     | 0       |       |
| src   | int(2)   | NO   |     | 0       |       |
| oId   | char(25) | NO   |     | 0       |       |
+-------+----------+------+-----+---------+-------+


| Field   | Type     | Null | Key | Default | Extra |
+---------+----------+------+-----+---------+-------+
| id      | int(10)  | NO   |     | NULL    |       |
| srcName | char(30) | YES  |     | NULL    |       |
+---------+----------+------+-----+---------+-------
srcName_id 


 




cat expr/fpkm.loci | perl -ne '@t = split(/\t/,$_);  print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2  | awk '{ print $3 "\t" $2;}' | sort -nk 1 > db/expr_profile
#head expr/fpkm.rna | perl -ne '@t = split(/\t/,$_);  print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2  | awk '{ print $3 "\t" $2;}' | sort -nk 1 > db/expr_profile
#head expr/fpkm.rna | perl -ne '@t = split(/\t/,$_);  print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking  1


#head expr/fpkm.rna | perl -ne '@t = split(/\t/,$_);  print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking  1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t". $t[1] . "\n";' | perl $CMD/leftJoin.pl


cat expr/fpkm.rna | perl -ne '@t = split(/\t/,$_);  print $t[0]."\t"; for($i = 1; $i < (@t -1); $i ++){print $t[$i] . ",";} print $t[$i] ;' |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 loci.tracking  1 | perl -ne '@t = split(/\t/,$_); $t[6] =~ /.*?\|(.*?)\|/; print $1 . "\t". $t[1] . "\n";' > expr.rna
perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl id.disp_contain 2 expr.rna 1 | cut -f 1,4  > expr.rna.2
cat expr.rna.2 >> expr.rna
 
cat expr.rna | cut -f 1 | perl /leofs/noncode/NONCODEv4/cmds/rmByKey.pl - bed.v4.lncrna 4 | awk '{print $4"\t0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0"}' > expr.rna.3
cat expr.rna.3 >> expr.rna




cat expr.rna | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2  | awk '{ print $3 "\t" $2;}' | sort -nk 1 > db/expr_profile.rna




cat func.xloc  | perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2  | perl -ne '@t = split(/\t/,$_);  print $t[@t - 2]."\t"; for($i = 1; $i < (@t - 4); $i ++){$t[$i] =~ /(GO:\d+)\(/; print $1 . ",";} $t[$i] =~ /(GO:\d+)\(/; print $1 . "\n" ;' > db/function


cp mm9/db/expr_profile* mm9/db/function expr_func/mouse/


scp expr_func.tgz root@112.124.13.146:/data/tmp/


adipose,adrenal,brain,brain_R,breast,colon,foreskin,heart,hela_R,HLF_1,HLF_2,kidney,liver,liver_R,lung,lymphNode,ovary,placenta_R,prostate,skeltalMuscle,testes,testes_R,thyroid,whiteBloodCell


heart,hippocampus,liver,lung,spleen,thymus




scp fpkm_imgs/* root@112.124.13.146:/data/var/www/html/NONCODE/images/HSAG/fpkm_imgs






+----+-----+-----------------+
| id | src | oId             |
+----+-----+-----------------+
|  1 |   9 | ENST00000456328 |
|  1 |  10 | NR_046018       |
|  2 |   9 | ENST00000515242 |
|  3 |   9 | ENST00000518655 |
|  4 |   9 | ENST00000450305 |
|  5 |  11 | n345739         |
|  6 |   3 | brain_Pred9     |
|  7 |   3 | brain_Pred15    |
|  8 |  11 | n385920         |
|  9 |  11 | n345943         |
+----+-----+-----------------+




Chicken=chicken
D.Melanogaster=dm
A.Thaliana=tair
Cow=cow
C.Elegans=celegans
Zebrafish=zebrafish
Human=human
Mouse=mouse
Yeast=yeast


for org in  Chicken D.Melanogaster A.Thaliana Cow C.Elegans Zebrafish Human Mouse Yeast
do
echo "select a.id, a.oId, '$org' from ${!org}_src as a left join ${!org}_srcNames as b  on (a.src = b.id) where b.srcName = 'ensembl' into outfile '/data/tmp/ensembl.${!org}';"


echo "select a.id, a.oId, '$org' from ${!org}_src as a left join ${!org}_srcNames as b  on (a.src = b.id) where b.srcName = 'refseq' into outfile '/data/tmp/refseq.${!org}';"


done


cat id.v4_xloc | cut -f 2 |  perl -pe 's/XLOC_0*//' | perl -ne 'chomp; $file1 = "expr/fpkm_imgs/".$_ . ".png"; $file2 = "expr/fpkm_imgs/O".$. .".png"; `mv $file1 $file2`; '








cat expr/fpkm.loci |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_xloc 2 | perl -ne '@t = split(/\t/,$_); print $t[@t - 2] . "\t"; for($i = 1; $i < (@t - 3); $i ++){  print $t[$i] . "\t";} print $t[$i] . "\n" ; ' > expr/fpkm.loci.v4
cd expr
scp  fpkm.loci.v4 header   root@112.124.13.146:/data/var/www/html/NONCODE/images/MMUG




cat expr/fpkm.rna |  perl /leofs/noncode/NONCODEv4/cmds/leftJoin.pl - 1 id.v4_tcons 2 | perl -ne '@t = split(/\t/,$_); print $t[@t - 2] . "\t"; for($i = 1; $i < (@t - 3); $i ++){  print $t[$i] . "\t";} print $t[$i] . "\n" ; ' > expr/fpkm.rna.v4
cd expr
scp  fpkm.rna.v4 header   root@112.124.13.146:/data/var/www/html/NONCODE/images/MMUT


cd /leofs/noncode/NONCODEv4/mm9/expr


for org in  galGal3  dm3 ce10 danRer7 sacCer3 tair10 bosTau6  hg19 mm9


do


cat $org/db/seq | perl -ne '@t = split(/\t/,$_);  %orgId = (  "hg19" => "HSA","mm9" => "MMU","dm3" => "DME","galGal3" => "GGA","ce10" => "CEL","danRer7" => "DRE","sacCer3" => "SCE","tair10" => "ATH", "bosTau6" => "BTA" ); print ">NON" . $orgId{'$org'}. "T" . substr("000000", 0, 6 - length($t[0]) ) . $t[0] . "\n" . $t[1];  ' >> fa.v4  


done




formatdb -p F   -n noncodev4 -i fa.v4 -t "ncRNAs in NONCODEv4"




awk "NR>=223409 " fa.v4  | head


grep  139878  id.v4_tcons




/data/var/www/html/NONCODE/images/HSAG

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值