#! /usr/bin/python3.5
#-*- coding:UTF-8 -*-
# python 2.x需要指定编码方式
'''
http://www.rcsb.org/pdb/explore/explore.do?structureId=1A26
读取蛋白质结构数据库文件.pdb中的氨基酸,并转化为1个字符的氨基酸名
'''
fin = open("1a26.pdb", "r")
fout= open('fout.txt', 'w')
lib3=("GLY","ALA","VAL","LEU","ILE","PRO","PHE","TYR","TRP","SER","THR","CYS","MET","ASN","GLN","ASP","GLU","LYS","ARG","HIS")
lib1=('G','A','V','L','I','P','F','Y','W','S','T','C','M','N','Q','D','E','K','R','H')
line = fin.readline()
while line:
line=line.strip() #当参数为空时,默认删除空白符(包括'\n','\r','\t', ' ')
p1=line.find(' ')
str=line[0:p1]
if 'SEQRES'==str:
while True:
line=line.strip()
site=line.find(' ') #查找空格的位置,找不到,返回-1
if(site==-1): #site==-1时本行字符串剩最后一个,处理完退出。
print ("line=%s"%line)
for i in range(len(lib3)):
if(lib3[i]==line):
fout.write(lib1[i]+'\n')
print ("{0}: {1}".format(str,lib1[i]))
break
str=line[0:site]
print ("line=%s"%line)
for i in range(len(lib3)):
if(lib3[i]==str):
fout.write(lib1[i])
print ("{0}: {1}".format(str,lib1[i]))
break
line=line[site:]
line = fin.readline()
fin.close()
fout.close()
C++ map版:
/***********************************************************************
> File Name: 1a26.cpp
> Author: ims
> Created Time: 2017年05月17日 星期三 15时51分20秒
************************************************************************/
#include<fstream>
#include<iostream>
#include<map>
#include<string>
using namespace std;
string& strip(string & s)
{
if (s.empty())
return s;
s.erase(0,s.find_first_not_of(" "));
s.erase(s.find_last_not_of(" ") + 1);
return s;
}
int main()
{
string line;
string sub;
int pos=0;
map<string,char>::iterator it;
map<string,char>mymap={{"ALA",'A'},{"CYS",'C'},{"ASP",'D'},{"GLU",'E'},{"PHE",'F'},{"GLY",'G'},{"HIS",'H'},{"ILE",'I'},{"LYS",'K'},{"LEU",'L'},{"MET",'M'},{"ASN",'N'},{"PRO",'P'},{"GLN",'Q'},{"ARG",'R'},{"SER",'S'},{"THR",'T'},{"VAL",'V'},{"TRP",'W'},{"TYR",'Y'}};
ifstream fin("1a26.pdb");
ofstream fout("1a26.out.pdb");
if(!fin.is_open())
{
cout<<"open file failed!"<<endl;
return 1;
}
while( getline(fin,line) )
{
if(line.find("SEQRES")!=string::npos)
{
while(1)
{
line=strip(line);
cout<<line<<endl;
pos=line.find(" ") ;
if (pos==string::npos)
{
it=mymap.find(line);
if(it!=mymap.end())
{
cout<<sub<<" : "<<it->second<<endl;
fout<<it->second;
}
break;
}
sub=line.substr(0,pos);
it=mymap.find(sub);
if(it!=mymap.end())
{
cout<<sub<<" : "<<it->second<<endl;
fout<<it->second;
}
line=line.substr(pos+1);
}
}
}
fin.close();
fout.close();
return 0;
}
C++版本:
/***********************************************************************
> File Name: 1a26.cpp
> Author: ims
> Created Time: 2017年05月17日 星期三 15时51分20秒
************************************************************************/
#include<fstream>
#include<iostream>
#include<array>
#include<string>
using namespace std;
string& strip(string & s)
{
if (s.empty())
{
return s;
}
s.erase(0,s.find_first_not_of(" "));
s.erase(s.find_last_not_of(" ") + 1);
return s;
}
int main()
{
string line;
array<string,20>lib1={"G","A","V","L","I","P","F","Y","W","S","T","C","M","N","Q","D","E","K","R","H"};
array<string,20>lib3={"GLY","ALA","VAL","LEU","ILE","PRO","PHE","TYR","TRP","SER","THR","CYS","MET","ASN","GLN","ASP","GLU","LYS","ARG","HIS"};
ifstream fin("1a26.pdb");
ofstream fout("1a26.out.pdb");
if(fin.is_open())
{
cout<<"open succeed!"<<endl;
}
else{
cout<<"open file failed!"<<endl;
return 1;
}
string sub;
int site=0;
int i=0;
while( getline(fin,line) )
{
if(line.find("SEQRES")!=string::npos)
{
while(1)
{
line=strip(line);
site=line.find(" ") ;
if (line.find(" ")==string::npos)
{
i=0;
for(auto it : lib3)
{
if( it==line)
{
cout<<line<<" : "<<lib1[i]<<endl;
fout<<lib1[i]<<endl;
break;
}
i++;
}
break;
}
sub=line.substr(0,site);
cout<<line<<endl;
i=0;
for(auto it: lib3)
{
if( it==sub)
{
cout<<sub<<" : "<<lib1[i]<<endl;
fout<<lib1[i];
break;
}
i++;
}
line=line.substr(site+1);
}
}
}
fin.close();
fout.close();
return 0;
}