配置环境
conda create -n rdkit_dev
conda activate rdkit_dev #如果激活不了,先source activate
conda install -c conda-forge cmake rdkit eigen
C++ 文件示例
实现从一个csv文件读取字符串smiles,然后输出其原子个数的简单程序:
#include <iostream>
#include <vector>
#include <algorithm>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/FileParsers/MolWriters.h>
#include <GraphMol/RDKitBase.h>
#include <RDGeneral/RDLog.h>
#include <boost/timer/timer.hpp>
#include <GraphMol/Depictor/RDDepictor.h>
int main(){
std::ifstream csv_data("../smi.csv", std::ios::in);
std::string line;
if (!csv_data.is_open())
{
std::cout << "Error: opening file fail" << std::endl;
std::exit(1);
}else{
std::cout << "opening file succeed!" << std::endl;
}
std::istringstream sin; //将整行字符串line读入到字符串istringstream中
std::vector<std::string> words; //声明一个字符串向量
std::string word;
// 读取标题行
std::getline(csv_data, line);
while (std::getline(csv_data, line))
{
sin.clear();
sin.str(line);
words.clear();
while (std::getline(sin, word, ',')) //将字符串流sin中的字符读到field字符串中,以逗号为分隔符
{
words.push_back(word); //将每一格中的数据逐个push
// std::cout << word;
// std::cout << atol(word.c_str());
}
RDKit::ROMol *mol = RDKit::SmilesToMol( words[1] );
std::cout<<RDKit::MolToSmiles( *mol )<<std::endl;
std::cout<<"num of atoms: "<<mol->getNumAtoms()<<std::endl;
std::cout << std::endl;
}
csv_data.close();
return 0;
}
数据文件
其中,smi.csv文件为:
md5,CleanSmiles,Main_id
aa,CCOC(=O)C1CC1(C(=O)OCC)c1cccc(C(F)(F)F)n1,1
57,CC(C)(C)c1ccc(S(=O)(=O)c2ccccn2)c(Br)c1,2
fa,COc1ccc2nc(-c3cccnc3)nc(Nc3cc(C(F)(F)F)n[nH]3)c2c1,3
61,COC(=O)CC(=O)Nc1cc(C(=O)O)ccc1NCCc1ccc(OC)c(OC)c1,4
CMakeLists.txt
cmake_minimum_required(VERSION 3.18) #指定cmake最小版本,非必须
project(cal_atom) #设置项目名称
set(CMAKE_CXX_STANDARD 14) #指定C++版本
set(CMAKE_CXX_STANDARD_REQUIRED True)
find_package(RDKit REQUIRED) #引入外部依赖包 RDKit
add_executable(cal_atom cal_atom.cpp) #生成目标可执行文件cal_atom 源文件为cal_atom.cpp
target_link_libraries(cal_atom RDKit::SmilesParse) #为target链接到对应的库
build
定义好CMakeLists.txt后,新建build目录,进入build,执行cmake生成Makefile文件
(rdkit_dev) wangzhe@user-NF5468M6:~/example$ mkdir build
(rdkit_dev) wangzhe@user-NF5468M6:~/example$ cd build
(rdkit_dev) wangzhe@user-NF5468M6:~/example/build$ cmake ..
-- The C compiler identification is GNU 7.5.0
-- The CXX compiler identification is GNU 7.5.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
-- Looking for pthread_create in pthreads
-- Looking for pthread_create in pthreads - not found
-- Looking for pthread_create in pthread
-- Looking for pthread_create in pthread - found
-- Found Threads: TRUE
-- Found Boost: /home/wangzhe/anaconda3/lib/cmake/Boost-1.73.0/BoostConfig.cmake (found suitable version "1.73.0", minimum required is "1.73.0")
-- Configuring done
-- Generating done
-- Build files have been written to: /home/wangzhe/example/build
(rdkit_dev) wangzhe@user-NF5468M6:~/example/build$ ls
CMakeCache.txt CMakeFiles cmake_install.cmake Makefile
make
make生成可执行文件,然后执行
(rdkit_dev) wangzhe@user-NF5468M6:~/example/build$ make cal_atom
[ 50%] Building CXX object CMakeFiles/cal_atom.dir/cal_atom.cpp.o
[100%] Linking CXX executable cal_atom
[100%] Built target cal_atom
(rdkit_dev) wangzhe@user-NF5468M6:~/example/build$ ./cal_atom
opening file succeed!
CCOC(=O)C1CC1(C(=O)OCC)c1cccc(C(F)(F)F)n1
num of atoms: 23
CC(C)(C)c1ccc(S(=O)(=O)c2ccccn2)c(Br)c1
num of atoms: 20
COc1ccc2nc(-c3cccnc3)nc(Nc3cc(C(F)(F)F)n[nH]3)c2c1
num of atoms: 28
COC(=O)CC(=O)Nc1cc(C(=O)O)ccc1NCCc1ccc(OC)c(OC)c1
num of atoms: 30
目录结构
.
├── build
│ ├── cal_atom
│ ├── CMakeCache.txt
│ ├── CMakeFiles
│ ├── cmake_install.cmake
│ └── Makefile
├── cal_atom.cpp
├── CMakeLists.txt
└── smi.csv
参考
https://greglandrum.github.io/rdkit-blog/posts/2021-07-24-setting-up-a-cxx-dev-env.html
C++读写CSV文件
rdkit C++文档

4487

被折叠的 条评论
为什么被折叠?



