1、 PROFphd结果
1) 提取结果,从文件夹中批量复制文件至另一文件夹
法一 find dir -name '*.c' | xargs -i cp {} dir_a
法二 #!/bin/sh for AA in `find . -name "*.c"` do cp -r $AA /a done |
2)检查结果
结果有16526个,但理论结果16623,少了97个。
其中,选择序列
>2B5DX
再运行一次 ../prof 2B5DX fileRdb=2B5DX.out sec
错误如下:
*** ERROR prof: line number of error=106
*** ERROR failed prof:full:doOne dbfile=./2B5DX, chain=*!
*** ERROR msg from where it failed:
*** ERROR doOne: assHack1994 failed
*** ERROR assHack1994: SYSTEM call:
/picb/home40/cyd/dys/backup/PROFphd/prof-tmp/prof/embl/phd.pl ./2B5DX sec exePhd=/picb/home40/cyd/dys/backup/PROFphd/prof-tmp/prof/bin/phd1994.UNKNOWN exeHsspFilter=/picb/home40/cyd/dys/backup/PROFphd/prof-tmp/prof/bin/filter_hssp.UNKNOWN exeHsspFilterPl=/picb/home40/cyd/dys/backup/PROFphd/prof-tmp/prof/scr/hssp_filter.pl exeHsspFilterPack=/picb/home40/cyd/dys/backup/PROFphd/prof-tmp/prof/scr/pack/hssp_filter.pm exeConvertSeq=/picb/home40/cyd/dys/backup/PROFphd/prof-tmp/prof/bin/convert_seq.UNKNOWN exeCopf=/picb/home40/cyd/dys/backup/PROFphd/prof-tmp/prof/scr/copf.pl exeCopfPack=/picb/home40/cyd/dys/backup/PROFphd/prof-tmp/prof/scr/pack/copf.pm exeConvHssp2saf=/picb/home40/cyd/dys/backup/PROFphd/prof-tmp/prof/embl/scr/conv_hssp2saf.pl ARCH=UNKNOWN filePhd=PROF14172.phdEMBL fileRdb=PROF14172.rdbPhdEMBL fileNotHtm=PROF14172.notHtm
failed producing output file=PROF14172.rdbPhdEMBL
1) 检查序列是否含非氨基酸字符(B、J、O、U、X、Z)
结果只发现X,但并非是预测出错的序列所特有。
2) 长度检查:出差的序列并非最长或最短
方法:用Excel找出错误序列
2、 安装prof
1) 根据readme安装出错
usage.c: In function `usage': usage.c:36: error: missing terminating " character usage.c:37: error: `o' undeclared (first use in this function) usage.c:37: error: (Each undeclared identifier is reported only once usage.c:37: error: for each function it appears in.) usage.c:37: error: `filename' undeclared (first use in this function) usage.c:37: error: syntax error before "for" usage.c:37: error: stray '/' in program usage.c:37: error: missing terminating " character usage.c:39: error: missing terminating " character usage.c:40: error: syntax error before "for" usage.c:40: error: stray '/' in program usage.c:40: error: missing terminating " character usage.c:43: error: missing terminating " character usage.c:44: error: `p' undeclared (first use in this function) usage.c:44: error: `Output' undeclared (first use in this function) usage.c:44: error: syntax error before "profil" usage.c:44: error: stray '/' in program usage.c:44: error: missing terminating " character usage.c:46: error: missing terminating " character usage.c:47: error: syntax error before "profil" usage.c:47: error: stray '/' in program usage.c:47: error: missing terminating " character make: *** [usage.o] Error 1 |
2) 修改usage.c 为:
#include<stdio.h>
#include<stdlib.h>
#include "Prof.h"
void usage(void)
{
printf("In automatic mode :/n") ;
printf("usage is Prof -A -v -d -m 0 -i [filename in fasta format] -b [database name for psiblast] -o [filename for the output]/n");
printf("Or if you want a casp format as output/n") ;
printf("usage is Prof -c -v -d -m 0 -i [filename in fasta format] -b [database name for psiblast] -o [filename for the output]/n");
printf("/n------------------------------------------------------/n/n") ;
printf("In non-automatic mode :/n") ;
printf("usage is Prof -A -v -d -m 1 -a [Output from clustalw in aln format] -p [Output profil form psi-blast] -o [filename for the output]/n");
printf("Or if you want a casp format as output/n") ;
printf("usage is Prof -c -v -d -m 1 -a [Output from clustalw in aln format] -p [Output profil form psi-blast] -o [filename for the output]/n");
printf("/n/n") ;
if(mode == 1)
{
if(!file_clu)
printf("file of multiple alignement in aln format not found. With option [-m 1] need option -a as well./n") ;
if(!file_psi)
printf("file of profil from psi-blast not found. With option [-m 1] need option -p as well./n") ;
}
if(!file_seq)
printf("file of sequence not found. With option [-m 0] need file of sequence in fasta format./n") ;
return ;
}
3) 安装blastpgb
注意:
1、 .cshrc只在csh下起作用,bash下起作用的是.bashrc
2、 .bash_profile与.bashrc作用的区别(zz)
1. /etc/profile:此文件为系统的每个用户设置环境信息,当用户第一次登 录时,该文件被执行.并从/etc/profile.d目录的配置文件中搜集shell的设置.
2. /etc/bashrc:为每一个运行bash shell的用户执行此文件.当bash shell被打开时,该文件被读取.
3. /.bash_profile:每个用户都可使用该文件输入专用于自己使用的shell信息,当用户登录时,该文件仅仅执行一次!默认情况下,他设置一些环境变量,执行用户的.bashrc文件.
4. /.bashrc:该文件包含专用于你的bash shell的bash信息,当登录时以及每次打开新的shell时,该该文件被读取.
5. /.bash_logout:当每次退出系统(退出bash shell)时,执行该文件.
6. /etc/profile中设定的变量(全局)的可以作用于任何用户,而~/.bashrc等中设定的变量(局部)只能继承/etc/profile中的变量,他们是"父子"关系.
7. /.bash_profile 是交互式、login 方式进入 bash 运行的/.bashrc 是交互式 non-login 方式进入 bash 运行的,通常二者设置大致相同,所以通常前者会调用后者。
3、 整合ssprod的结果:
1) 提取预测结果
> 1
MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG
CEEEEEECCCCEEEEECCCCCEHHHHHHHHHHHHCCCHHHEEEEECCEECCCCCECHHHCCCCCCEEEEEECCCCC
> 2
TLGALEFSLLYDQDNSNLQCIIRAKGLKPMDSNGLADPYVKLHLPGASKSNLRTKLRNTRNPVWNETLQYHGITEEDMQRKTLRISVCDEDKFGHNEFIGETRFSLKKLKANQRKNFNICLERVI
CCCEEEEEEEECCCCCCEEEEEEECCCCCCCCCCCCCCEEEEEECCCCCCCCCEEEECCCCCCECEEEEECCCCHHHHCCCEEEEEEEECCCCCCCCEEEEEEEECCCCCCCCCCCHHHHHHHHC
#include <stdio.h> #include <fstream> #include <iostream>
using namespace std;
int main() { ifstream result("ssprod_result"); ofstream ss("ssprod_ss"); string line; int i=1;
while(!result.eof()) { if(i%3==0) //取3的整数倍行,即预测结果 { result>>line; ss<<line<<endl; i++; } else { result>>line; i++; }
} } |
2) 整合数据combine
同前combine.cpp
#include <stdio.h> #include <fstream> #include <iostream>
using namespace std;
int main() { ifstream name("name.txt"); ifstream aa("protein.txt"); ifstream real_structure("re_sturcture.txt"); ifstream prediction("ssprod_ss"); ofstream combine("ssprod_final_result.txt");
string name_str,aa_str,real_structure_str,prediction_str;
name>>name_str; while(!name.eof()) {
aa>>aa_str; real_structure>>real_structure_str; prediction>>prediction_str;
//name_str>>endl>>aa_str>>endl>>real_structure_str>>endl>>prediction_str>>endl>>combine; combine<<name_str<<endl<<aa_str<<endl<<real_structure_str<<endl<<prediction_str<<endl;
name>>name_str; } }//int main |