C++使用rdkit示例

配置环境

conda create -n rdkit_dev
conda activate rdkit_dev #如果激活不了,先source activate
conda install -c conda-forge cmake rdkit eigen

C++ 文件示例

实现从一个csv文件读取字符串smiles,然后输出其原子个数的简单程序:

#include <iostream>
#include <vector>
#include <algorithm>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>

#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/FileParsers/MolWriters.h>
#include <GraphMol/RDKitBase.h>
#include <RDGeneral/RDLog.h>
#include <boost/timer/timer.hpp>
#include <GraphMol/Depictor/RDDepictor.h>

int main(){
    std::ifstream csv_data("../smi.csv", std::ios::in);
    std::string line;

    if (!csv_data.is_open())
    {
        std::cout << "Error: opening file fail" << std::endl;
        std::exit(1);
    }else{
        std::cout << "opening file succeed!" << std::endl;
    }
    
    std::istringstream sin;         //将整行字符串line读入到字符串istringstream中
    std::vector<std::string> words; //声明一个字符串向量
    std::string word;
    // 读取标题行
    std::getline(csv_data, line);

    while (std::getline(csv_data, line))
    {
        sin.clear();
        sin.str(line);
        words.clear();
        while (std::getline(sin, word, ',')) //将字符串流sin中的字符读到field字符串中,以逗号为分隔符
        {
            words.push_back(word); //将每一格中的数据逐个push
            // std::cout << word;
            // std::cout << atol(word.c_str());
        }
        RDKit::ROMol *mol = RDKit::SmilesToMol( words[1] );
        std::cout<<RDKit::MolToSmiles( *mol )<<std::endl;
        std::cout<<"num of atoms: "<<mol->getNumAtoms()<<std::endl;
        std::cout << std::endl;
        
    }
    csv_data.close();


  return 0;

}

数据文件

其中,smi.csv文件为:

md5,CleanSmiles,Main_id
aa,CCOC(=O)C1CC1(C(=O)OCC)c1cccc(C(F)(F)F)n1,1
57,CC(C)(C)c1ccc(S(=O)(=O)c2ccccn2)c(Br)c1,2
fa,COc1ccc2nc(-c3cccnc3)nc(Nc3cc(C(F)(F)F)n[nH]3)c2c1,3
61,COC(=O)CC(=O)Nc1cc(C(=O)O)ccc1NCCc1ccc(OC)c(OC)c1,4

CMakeLists.txt

cmake_minimum_required(VERSION 3.18)    #指定cmake最小版本,非必须

project(cal_atom)   #设置项目名称

set(CMAKE_CXX_STANDARD 14)  #指定C++版本
set(CMAKE_CXX_STANDARD_REQUIRED True)

find_package(RDKit REQUIRED)    #引入外部依赖包 RDKit

add_executable(cal_atom cal_atom.cpp)   #生成目标可执行文件cal_atom  源文件为cal_atom.cpp

target_link_libraries(cal_atom RDKit::SmilesParse)  #为target链接到对应的库

build

定义好CMakeLists.txt后,新建build目录,进入build,执行cmake生成Makefile文件

(rdkit_dev) wangzhe@user-NF5468M6:~/example$ mkdir build
(rdkit_dev) wangzhe@user-NF5468M6:~/example$ cd build
(rdkit_dev) wangzhe@user-NF5468M6:~/example/build$ cmake ..
-- The C compiler identification is GNU 7.5.0
-- The CXX compiler identification is GNU 7.5.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
-- Looking for pthread_create in pthreads
-- Looking for pthread_create in pthreads - not found
-- Looking for pthread_create in pthread
-- Looking for pthread_create in pthread - found
-- Found Threads: TRUE  
-- Found Boost: /home/wangzhe/anaconda3/lib/cmake/Boost-1.73.0/BoostConfig.cmake (found suitable version "1.73.0", minimum required is "1.73.0")  
-- Configuring done
-- Generating done
-- Build files have been written to: /home/wangzhe/example/build
(rdkit_dev) wangzhe@user-NF5468M6:~/example/build$ ls
CMakeCache.txt  CMakeFiles  cmake_install.cmake  Makefile

make

make生成可执行文件,然后执行

(rdkit_dev) wangzhe@user-NF5468M6:~/example/build$ make cal_atom
[ 50%] Building CXX object CMakeFiles/cal_atom.dir/cal_atom.cpp.o
[100%] Linking CXX executable cal_atom
[100%] Built target cal_atom
(rdkit_dev) wangzhe@user-NF5468M6:~/example/build$ ./cal_atom 
opening file succeed!
CCOC(=O)C1CC1(C(=O)OCC)c1cccc(C(F)(F)F)n1
num of atoms: 23

CC(C)(C)c1ccc(S(=O)(=O)c2ccccn2)c(Br)c1
num of atoms: 20

COc1ccc2nc(-c3cccnc3)nc(Nc3cc(C(F)(F)F)n[nH]3)c2c1
num of atoms: 28

COC(=O)CC(=O)Nc1cc(C(=O)O)ccc1NCCc1ccc(OC)c(OC)c1
num of atoms: 30

目录结构

.
├── build
│   ├── cal_atom
│   ├── CMakeCache.txt
│   ├── CMakeFiles
│   ├── cmake_install.cmake
│   └── Makefile
├── cal_atom.cpp
├── CMakeLists.txt
└── smi.csv

参考

https://greglandrum.github.io/rdkit-blog/posts/2021-07-24-setting-up-a-cxx-dev-env.html
C++读写CSV文件
rdkit C++文档

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值