结构域预测软件interproscan提供多种输出格式,出于后期分析的需要,选用了gff3格式。我比较喜欢结合数据库进行分析,所以先要把数据导入数据库。
我之前用QT写好了界面,所以只要在菜单里添加一个QAction、再在主窗口类中添加槽函数就可以了。这里给出我解析文件的槽函数。
void MainWindow::on_interproToMyDB(){
PfamToMyDBDlg * dbDlg = new PfamToMyDBDlg(this);
dbDlg->exec();
QFile qinFile(dbDlg->filename.toStdString().c_str());
// QFile qinFile("mypfam.gff3");
QStringList fullpath = dbDlg->filename.split(QRegExp("/"));
QString path;
for(int i = 0; i < fullpath.size() - 1; i++){
path.append(fullpath[i]);
path.append("/");
}
// QMessageBox::information(this, "ERROR2", path);
//QFile qoutFile( "a2.sql");
QFile qoutFile( path+dbDlg->name+".sql");
// QMessageBox::information(this, "ERROR3", dbDlg->filename.toStdString().c_str());
// QMessageBox::information(this, "ERROR4", dbDlg->name);
if (!qoutFile.open(QIODevice::ReadWrite | QIODevice::Text))
{
QMessageBox::information(this, "ERROR1", "failed to open");
return;
}
if(!qinFile.open(QIODevice::ReadOnly | QIODevice::Text))
{
QMessageBox::information(this, "ERROR2", "failed to open");
return ;
}
QTextStream myTextStream(&qoutFile);
while (!qinFile.atEnd())
{
QByteArray line = qinFile.readLine();
line[line.size()-1]='\0';
QString str(line);
if(str[0]=='#'||str.size() == 0)
{
continue;
}
if(!str.startsWith("WP_")){
break;
}
QStringList sections = str.split(QRegExp("\t"));
if(!sections[sections.size()-1].startsWith("Name")){
continue;
}
QStringList sections2 = sections[sections.size()-1].split(QRegExp(";"));
QStringList sections3 = sections2[2].split(QRegExp(" "));
QString SQL;
if(sections3.size() != 3 || sections2.size() != 6 ||\
!sections3[0].startsWith("Target=") || \
!sections2[0].startsWith("Name") ||\
!sections2[1].startsWith("signature_desc") ||\
!sections2[3].startsWith("status")||\
!sections2[4].startsWith("ID")||\
!sections2[5].startsWith("date")\
){
QMessageBox::warning(this, "ERROR", QString("format is wrong here, please add manully:\n")+sections[sections.size()-1].toStdString().c_str());
continue;
}
SQL.sprintf("insert into res_smart(Name, signature_desc, Target,start , stop, status, ID_SEQ, date, Uid)values(\"%s\", \"%s\", '%s', '%s' , '%s', '%s', '%s', '%s',(select Id from project_abbr where name = '%s'));", \
sections2[0].split(QRegExp("="))[1].toStdString().c_str() ,\
sections2[1].split(QRegExp("="))[1].toStdString().c_str() ,\
sections3[0].split(QRegExp("="))[1].toStdString().c_str() ,\
sections3[1].toStdString().c_str() ,\
sections3[2].toStdString().c_str() ,\
sections2[3].split(QRegExp("="))[1].toStdString().c_str() ,\
sections2[4].split(QRegExp("="))[1].toStdString().c_str() ,\
sections2[5].split(QRegExp("="))[1].toStdString().c_str() ,\
dbDlg->name.toStdString().c_str());
myTextStream<<SQL<<'\n';
}
qoutFile.close();
qinFile.close();
}
PfamToMyDBDlg继承自QDialog,也一并给出
#ifndef PFAMTOMYDBDLG_H
#define PFAMTOMYDBDLG_H
#include <QDialog>
#include <QPushButton>
#include <QLineEdit>
#include <QLabel>
#include <QComboBox>
class PfamToMyDBDlg : public QDialog
{
Q_OBJECT
public:
QString filename;
QString name;
public:
explicit PfamToMyDBDlg(QWidget *parent = 0);
private:
QLabel *filenameLabel, *nameLabel;
QLineEdit *filenameEdit, *nameEdit;
QPushButton *openFileBtn, *exeBtn;
QComboBox *nameComBox;
signals:
public slots:
void on_openFile();
void on_exe();
};
#endif // PFAMTOMYDBDLG_H
#include "pfamtomydbdlg.h"
#include <QHBoxLayout>
#include <QVBoxLayout>
#include <QMessageBox>
#include <QFont>
#include <QString>
#include <QFileDialog>
#include <QDebug>
PfamToMyDBDlg::PfamToMyDBDlg(QWidget *parent) :
QDialog(parent)
{
QVBoxLayout * globalLayout = new QVBoxLayout;
QHBoxLayout * filenameLayout = new QHBoxLayout;
QHBoxLayout * nameLayout = new QHBoxLayout;
filenameLabel = new QLabel(tr("Filename"));
QFont * myFont = new QFont;
myFont->setBold(true);
myFont->setPointSize(24);
filenameLabel->setFont(*myFont);
filenameLabel->setStyleSheet("color:red");
filenameEdit = new QLineEdit;
openFileBtn = new QPushButton("open");
connect(openFileBtn, SIGNAL(clicked()),this, SLOT(on_openFile()));
nameLabel = new QLabel(tr("name"));
nameLabel->setFont(*myFont);
nameLabel->setStyleSheet("color:red");
nameEdit = new QLineEdit;
nameComBox = new QComboBox;
nameComBox->setEditable(true);
nameComBox->addItem("Cytophaga hutchinsonii");
nameComBox->addItem("Dyadobacter fermentans");
nameComBox->addItem("Dyadobactor tibetensis");
nameComBox->addItem("Fibrella aestuarina");
nameComBox->addItem("Fibrisome limi");
nameComBox->addItem("Fibrobacter succinogenes");
nameComBox->addItem("Runella slithyformis");
nameComBox->addItem("Leadbetterella byssophilla");
nameComBox->addItem("Sporocytophaga myxococcoides");
nameComBox->addItem("Spirosoma linguale");
exeBtn = new QPushButton("execute");
connect(exeBtn, SIGNAL(clicked()), this, SLOT(on_exe()));
filenameLayout->addWidget(filenameLabel);
filenameLayout->addWidget(filenameEdit);
filenameLayout->addWidget(openFileBtn);
nameLayout->addWidget(nameLabel);
nameLayout->addWidget(nameComBox);
globalLayout->addLayout(filenameLayout);
globalLayout->addLayout(nameLayout);
globalLayout->addWidget(exeBtn);
this->setLayout(globalLayout);
}
void PfamToMyDBDlg::on_exe()
{
filename = filenameEdit->text().trimmed();
name = nameComBox->currentText().trimmed();
// qDebug() << name;
this->close();
}
void PfamToMyDBDlg::on_openFile()
{
QString temp = QFileDialog::getOpenFileName(this, "open", "c:/desktop/", "files(*.*)");
filenameEdit->setText(temp);
}
要提取的部分不包括后面的序列。中间部分的特点是以换行符区分不同字段,所以如果不做复杂分析的话,可以将这部分结果直接拷贝到excel进行分析。