氨基酸三字母序列转单字母序列
1.氨基酸对照数据
2. 对照code
#%% 氨基酸的3 to 1
#导入package
import pandas as pd
from pandas import DataFrame as df
import numpy as np
import re
Amino_acid_path="D:\Topic\\alphafold\\Amino_acid.csv"#氨基酸对照表路径
amino_acid=pd.read_csv(Amino_acid_path)
amino_acid=np.array(amino_acid)
amino_acid=amino_acid.tolist()
#%%将氨基酸对照变转化为dict类型
amino_map={}
for i in range(len(amino_acid)):
amino_map[amino_acid[i][0]]=amino_acid[i][1]
#%%读入三字母氨基酸序列
amino_path="D:\Topic\demodata.txt"
#读入每个氨基酸序列文件
with open(amino_path, 'r', encoding='utf-8') as f:
aminofile = f.read() # 读取全部内容后,按行存储为list
#根据第一个大写字母分割三字母氨基酸并存为列表
aminofileList=re.sub(r"([A-Z])",r" \1",aminofile).split()
#%%
#转化为单字母
amino_one=str()
for j in aminofileList:
amino_one=amino_one+amino_map[j]
#保存文件
amino_one_path="D:\Topic\dataresult.fasta"
with open(amino_one_path,'w') as f:
f.write(amino_one)