这种方法只能解决非常简单的转换,更难的SMILES之间应该是无法直接转换,我可能很多人都使用神经网络解决 ,暂时还没仔细看,后面再仔细看吧...
在线转换网站:
网址一:
NCI/CADD Chemical Identifier Resolver
网址二:
简单的转换:
单个转换:
import urllib.error
import urllib.parse
import urllib.request
SMILES_URL_TEMPLATE = 'http://cactus.nci.nih.gov/chemical/structure/{}/smiles'
IUPAC_URL_TEMPLATE = 'http://cactus.nci.nih.gov/chemical/structure/{}/iupac_name'
def retrieve(url):
with urllib.request.urlopen(url) as f:
return f.read()
def iupac2smiles(iupac):
quoted_iupac = urllib.parse.quote(iupac)
smiles_url = SMILES_URL_TEMPLATE.format(quoted_iupac)
return retrieve(smiles_url).decode('utf-8')
def smiles2iupac(smiles):
quoted_smiles = urllib.parse.quote(smiles)
iupac_url = IUPAC_URL_TEMPLATE.format(quoted_smiles)
return retrieve(iupac_url).decode('utf-8')
convert = None
src = None
dst = None
# iupac2smiles/smiles2iupac
change_direction = 'smiles2iupac'
if change_direction == 'iupac2smiles':
convert = iupac2smiles
src = 'iupac'
dst = 'smiles'
elif change_direction == 'smiles2iupac':
convert = smiles2iupac
src = 'smiles'
dst = 'iupac'
try:
# 输入的化合物名称
input_compound_name = "CCCC"
output_compound_name = convert(input_compound_name)
# 输出转换后的化合物名称
print(output_compound_name)
except urllib.error.HTTPError as e:
print('[{src}] {original}\n: error {error}'.format(src=src, original=input_compound_name, error=e.code))
csv转换:
1、
# -*- coding: utf8 -*-
import urllib.error
import urllib.parse
import urllib.request
import pandas as pd
SMILES_URL_TEMPLATE = 'http://cactus.nci.nih.gov/chemical/structure/{}/smiles'
IUPAC_URL_TEMPLATE = 'http://cactus.nci.nih.gov/chemical/structure/{}/iupac_name'
def retrieve(url):
with urllib.request.urlopen(url) as f:
return f.read()
def iupac2smiles(iupac):
quoted_iupac = urllib.parse.quote(iupac)
smiles_url = SMILES_URL_TEMPLATE.format(quoted_iupac)
return retrieve(smiles_url).decode('utf-8')
def smiles2iupac(smiles):
quoted_smiles = urllib.parse.quote(smiles)
iupac_url = IUPAC_URL_TEMPLATE.format(quoted_smiles)
return retrieve(iupac_url).decode('utf-8')
convert = None
src = None
dst = None
# iupac2smiles/smiles2iupac
change_direction = 'iupac2smiles'
if change_direction == 'iupac2smiles':
convert = iupac2smiles
src = 'iupac'
dst = 'smiles'
elif change_direction == 'smiles2iupac':
convert = smiles2iupac
src = 'smiles'
dst = 'iupac'
def my_function(input_string):
try:
# 输入的化合物名称
output_compound_name = convert(input_string)
# 输出转换后的化合物名称
print(output_compound_name)
return output_compound_name
except urllib.error.HTTPError as e:
print('[{src}] {original}\n: error {error}'.format(src=src, original=input_string, error=e.code))
return "Error Process!"
# CSV文件路径
csv_file = 'data/new_csv_file.csv'
# 读取CSV文件
df = pd.read_csv(csv_file)
# 应用函数并将结果插入"IUPAC"列
df['IUPAC'] = df['Eng_IUPAC'].apply(my_function)
# 保存修改后的CSV文件
new_csv_file = 'data/new_csv_file111.csv'
df.to_csv(new_csv_file, index=False)
print("处理完成!")
2、 IUPAC2smiles
注意:这里直接使用本地的windows环境运行,不要使用WSL,因为可能联网很慢
这个比较好,上面那个容易错误:
# -*- coding: utf8 -*-
import pandas as pd
import requests
opsin = 'https://cactus.nci.nih.gov/chemical/structure/{0}/{1}'
def change2smiles(input_IUPAC):
try:
input_IUPAC = input_IUPAC.replace('#', '%23')
# 目标格式
rep = 'smiles'
url = opsin.format(input_IUPAC, rep)
response = requests.get(url)
response.raise_for_status()
output_smiles = response.text
print(output_smiles)
return output_smiles
except:
print("GO WRONG: ", input_IUPAC)
return "Error process!"
# CSV文件路径
csv_file = 'data/new_csv_file.csv'
df = pd.read_csv(csv_file)
# 定义边处理函数并边写入CSV文件
def process_row(row):
# 应用函数并将结果插入"IUPAC"列
row['IUPAC'] = change2smiles(row['Eng_IUPAC'])
# 将处理后的行写入CSV文件
row.to_frame().transpose().to_csv(new_csv_file, mode='a', header=False, index=False)
# 新CSV文件路径
new_csv_file = 'data/IUPAC2smiles.csv'
# 遍历每一行,并边处理边写入
df.apply(process_row, axis=1)
print("处理完成!")
复杂的转换:
STOUT: SMILES to IUPAC names using neural machine translation:STOUT: SMILES to IUPAC names using neural machine translation | Journal of Cheminformatics | Full Text
Transformer-Based Molecular Generative Model for Antiviral Drug
Design....