IUPAC和SMILES的相互转换

这种方法只能解决非常简单的转换,更难的SMILES之间应该是无法直接转换,我可能很多人都使用神经网络解决 ,暂时还没仔细看,后面再仔细看吧...

在线转换网站:

网址一:

NCI/CADD Chemical Identifier Resolver

网址二:

IUPAC to SMILES

简单的转换: 

单个转换:

import urllib.error
import urllib.parse
import urllib.request

SMILES_URL_TEMPLATE = 'http://cactus.nci.nih.gov/chemical/structure/{}/smiles'
IUPAC_URL_TEMPLATE = 'http://cactus.nci.nih.gov/chemical/structure/{}/iupac_name'

def retrieve(url):
    with urllib.request.urlopen(url) as f:
        return f.read()

def iupac2smiles(iupac):
    quoted_iupac = urllib.parse.quote(iupac)
    smiles_url = SMILES_URL_TEMPLATE.format(quoted_iupac)
    return retrieve(smiles_url).decode('utf-8')

def smiles2iupac(smiles):
    quoted_smiles = urllib.parse.quote(smiles)
    iupac_url = IUPAC_URL_TEMPLATE.format(quoted_smiles)
    return retrieve(iupac_url).decode('utf-8')


convert = None
src = None
dst = None
# iupac2smiles/smiles2iupac
change_direction = 'smiles2iupac'
if change_direction == 'iupac2smiles':
    convert = iupac2smiles
    src = 'iupac'
    dst = 'smiles'
elif change_direction == 'smiles2iupac':
    convert = smiles2iupac
    src = 'smiles'
    dst = 'iupac'

try:
    # 输入的化合物名称
    input_compound_name = "CCCC"
    output_compound_name = convert(input_compound_name)
    # 输出转换后的化合物名称
    print(output_compound_name)

except urllib.error.HTTPError as e:
    print('[{src}] {original}\n: error {error}'.format(src=src, original=input_compound_name, error=e.code))

csv转换:

1、

# -*- coding: utf8 -*-

import urllib.error
import urllib.parse
import urllib.request
import pandas as pd

SMILES_URL_TEMPLATE = 'http://cactus.nci.nih.gov/chemical/structure/{}/smiles'
IUPAC_URL_TEMPLATE = 'http://cactus.nci.nih.gov/chemical/structure/{}/iupac_name'

def retrieve(url):
    with urllib.request.urlopen(url) as f:
        return f.read()

def iupac2smiles(iupac):
    quoted_iupac = urllib.parse.quote(iupac)
    smiles_url = SMILES_URL_TEMPLATE.format(quoted_iupac)
    return retrieve(smiles_url).decode('utf-8')

def smiles2iupac(smiles):
    quoted_smiles = urllib.parse.quote(smiles)
    iupac_url = IUPAC_URL_TEMPLATE.format(quoted_smiles)
    return retrieve(iupac_url).decode('utf-8')


convert = None
src = None
dst = None
# iupac2smiles/smiles2iupac
change_direction = 'iupac2smiles'
if change_direction == 'iupac2smiles':
    convert = iupac2smiles
    src = 'iupac'
    dst = 'smiles'
elif change_direction == 'smiles2iupac':
    convert = smiles2iupac
    src = 'smiles'
    dst = 'iupac'

def my_function(input_string):
    try:
        # 输入的化合物名称
        output_compound_name = convert(input_string)
        # 输出转换后的化合物名称
        print(output_compound_name)
        return output_compound_name

    except urllib.error.HTTPError as e:
        print('[{src}] {original}\n: error {error}'.format(src=src, original=input_string, error=e.code))
        return "Error Process!"


# CSV文件路径
csv_file = 'data/new_csv_file.csv'
# 读取CSV文件
df = pd.read_csv(csv_file)

# 应用函数并将结果插入"IUPAC"列
df['IUPAC'] = df['Eng_IUPAC'].apply(my_function)

# 保存修改后的CSV文件
new_csv_file = 'data/new_csv_file111.csv'
df.to_csv(new_csv_file, index=False)

print("处理完成!")

2、 IUPAC2smiles

注意:这里直接使用本地的windows环境运行,不要使用WSL,因为可能联网很慢

这个比较好,上面那个容易错误:

# -*- coding: utf8 -*-


import pandas as pd
import requests


opsin = 'https://cactus.nci.nih.gov/chemical/structure/{0}/{1}'
def change2smiles(input_IUPAC):
    try:
        input_IUPAC = input_IUPAC.replace('#', '%23')
        # 目标格式
        rep = 'smiles'
        url = opsin.format(input_IUPAC, rep)
        response = requests.get(url)
        response.raise_for_status()
        output_smiles = response.text
        print(output_smiles)
        return output_smiles
    except:
        print("GO WRONG: ", input_IUPAC)
        return "Error process!"



# CSV文件路径
csv_file = 'data/new_csv_file.csv'
df = pd.read_csv(csv_file)

# 定义边处理函数并边写入CSV文件
def process_row(row):
    # 应用函数并将结果插入"IUPAC"列
    row['IUPAC'] = change2smiles(row['Eng_IUPAC'])

    # 将处理后的行写入CSV文件
    row.to_frame().transpose().to_csv(new_csv_file, mode='a', header=False, index=False)


# 新CSV文件路径
new_csv_file = 'data/IUPAC2smiles.csv'

# 遍历每一行,并边处理边写入
df.apply(process_row, axis=1)

print("处理完成!")


复杂的转换:

STOUT: SMILES to IUPAC names using neural machine translation:STOUT: SMILES to IUPAC names using neural machine translation | Journal of Cheminformatics | Full Text

Transformer-Based Molecular Generative Model for Antiviral Drug Design....

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Pengsen Ma

太谢谢了

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值