【生物信息学】使用mRNA作为ref时,由bam格式计算rpkm 的 cpp程序

main.cpp

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
#include <map>
#include <set>
#include <cmath>
#include <inttypes.h>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string.hpp>
#include "rpkm.h"

using namespace std;

void usage()
{
   cout << "./rpkm <input.sh_sort.sam> <genome.fa.fai> <output> " << endl;
   cout << "  -h           get help information"   << endl;
   exit (0);
}

int main(int argc, char *argv[])
{
   int c;
   while ( (c=getopt(argc,argv,"a:p:h")) != -1 )
   {
      switch(c)
      {
         case 'h' : usage();break;
         default : usage();
      }
   }
   if (argc < 3) usage();
   string sam_file = argv[optind++];
   string fai_file = argv[optind++];

   map<string,int>   GenSize;
   map<string,int>   GenRead;
   load_fai(fai_file,GenSize);

   ifstream infile;
   infile.open(sam_file.c_str());
   if (! infile )
   {
      cerr << "fail to open input file" << sam_file;
   }
   string lineStr;
   int total_line;

   while ( getline(infile,lineStr,'\n' ))
   {
      vector<string> lineVec;
      ++total_line;
      boost::split(lineVec,lineStr,boost::is_any_of("\t"),boost::token_compress_on);
      if (lineVec[5] == "*"){
         continue;
      }
      GenRead[lineVec[2]]++;
   }
   infile.close();
	cout << "Gene\trpkm\treads\tGene_length\ttotal_reads" << endl;
	cal_rpkm(GenRead,GenSize,total_line);
}


rpkm.h

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
#include <map>
#include <set>
#include <cmath>
#include <inttypes.h>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string.hpp>

void load_fai(std::string &matrix_file,std::map<std::string,int> &GenSize);
void cal_rpkm(std::map<std::string,int> &GenRead, std::map<std::string,int> &GenSize,int total);


rpkm.cpp

#include "rpkm.h"
using namespace std;
void load_fai(string &matrix_file, map<string,int> &GenSize){
	ifstream infile;
   infile.open(matrix_file.c_str());
   if (! infile ) {
      cerr << "fail to open input file " << matrix_file << endl;
      exit(0);
   }
   string lineStr;

   while (getline(infile,lineStr,'\n')){
      if (lineStr[0] == ' ' || lineStr[0] == '\n'){
         continue;
      }

      vector<string> lineVec;
      boost::split(lineVec,lineStr, boost::is_any_of(":, \t\n"), boost::token_compress_on);
      GenSize[lineVec[0]] =  boost::lexical_cast<int>(lineVec[1]);
   }
   infile.close();
}

void cal_rpkm(map<string,int> &GenRead, map<string,int> &GenSize,int total){
   map<string,int>::const_iterator map_size;
   for (map_size=GenSize.begin();map_size!=GenSize.end();map_size++){
      int _size = map_size->second;
      int _read = 0;
   	map<string,int>::iterator map_read;
		map_read = GenRead.find(map_size->first) ;
		if ( map_read != GenRead.end() ){
			_read = GenRead[map_size->first];
		}
		double rpkm = log( (double)_read ) + log( (double)100000000 ) - log( (double)_size ) - log( (double)total );
      cout << map_size->first << "\t" << exp(rpkm) << "\t" << _read << "\t" << _size << "\t" << total << endl;
   }
}



Makefile

cc=g++
exe=rpkm_bam
obj=main.o rpkm.o

$(exe):$(obj)
	   $(cc) -o $(exe) $(obj)
main.o:main.cpp rpkm.h
	   $(cc) -c main.cpp
MTP.o:rpkm.cpp rpkm.h
	   $(cc) -c MTP.cpp
clean:
	   rm -rf *.o $(exe)



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值