使用linux批量引物设计,对组装结果中GAP左右批量设计引物

#!/usr/bin/perl -w

use strict;

use Cwd qw(abs_path getcwd);

use Getopt::Long;

use Data::Dumper;

use FindBin qw($Bin $Script);

use File::Basename qw(basename dirname);

use Bio::SeqIO;

my $BEGIN_TIME=time();

my $version="1.0";

#

------------------------------------------------------------------

# GetOptions

#

------------------------------------------------------------------

my

($fa,$flank,$epcrfa,$od,$cut,$outprefix,$PRIMER_NUM_RETURN,$PRIMER_OPT_SIZE,$PRIMER_MIN_SIZE,$PRIMER_MAX_SIZE,$PRIMER_PRODUCT_SIZE_RANGE,$PRIMER_MAX_NS_ACCEPTED);

GetOptions(

"help|?"

=>\&USAGE,

"fa=s"=>\$fa,

"flank=s"=>\$flank,

"PRIMER_MIN_SIZE=s"=>\$PRIMER_MIN_SIZE,

"PRIMER_OPT_SIZE=s"=>\$PRIMER_OPT_SIZE,

"PRIMER_MAX_SIZE=s"=>\$PRIMER_MAX_SIZE,

"PRIMER_NUM_RETURN=s"=>\$PRIMER_NUM_RETURN,

"PRIMER_PRODUCT_SIZE_RANGE=s"=>\$PRIMER_PRODUCT_SIZE_RANGE,

"epcrfa=s"=>\$epcrfa,

"od=s"=>\$od,

"prefix=s"=>\$outprefix,

) or

&USAGE;

&USAGE unless($fa);

sub USAGE {

my $usage=<

Program: $0

Version: $version

Contact: huang2002003\@126.com

Discription:

Usage:

Options:

-fa Input fa file, forced

-flank  100

cut 100

-PRIMER_MIN_SIZE 18 Minimum acceptable

length of a primer. Must be greater than 0 and less than or equal

to

PRIMER_MAX_SIZE

-PRIMER_OPT_SIZE 20 Optimum length (in

bases) of a primer. Primer3 will attempt to pick primers close to

this length.

-PRIMER_MAX_SIZE 23 Maximum acceptable

length (in bases) of a primer. Currently this parameter cannot

be

larger than 35. This limit is

governed by maximum oligo size for which primer3's

melting-temperature

is valid.

-PRIMER_PRODUCT_SIZE_RANGE 100-300 The

associated values specify the lengths of the product that the

user

wants the

primers to create, and is a space separated list of elements of the

form

-PRIMER_NUM_RETURN 1 Total num primer to

search

-epcrfa  run E-PCR to

filter multi mapped primers; required;

-od Key of output dir,default cwd;

-prefix defoult demo;

-h Help

USAGE

print $usage;

exit 1;

}

$PRIMER_MIN_SIZE||=18;

$PRIMER_OPT_SIZE||=20;

$PRIMER_MAX_SIZE||=23;

$PRIMER_NUM_RETURN||=1;

$PRIMER_MAX_NS_ACCEPTED||=1;

$PRIMER_PRODUCT_SIZE_RANGE||="100-300";

$od||=getcwd;

$flank||=100;

$od=abs_path($od);

unless(-d $od){ mkdir $od;}

$outprefix||="SSR";

$fa=abs_path($fa);

$epcrfa||=$fa;

$epcrfa=abs_path($epcrfa);

#################################################################

my%ref=();

my%ref_len=();

my$in  = Bio::SeqIO->new(-file => "$fa"

,

-format => 'Fasta');

while ( my $seq = $in->next_seq() ) {

$ref{$seq->id}=$seq;

$ref_len{$seq->id}=$seq->length;

}

$in->close();

############find SSR by misa#######################

print "perl /biosoft/MISA/my_misa1.pl $fa $od/$outprefix.misa

$od/$outprefix.misa.statistics\n";

system("perl /biosoft/MISA/my_misa1.pl $fa $od/$outprefix.misa

$od/$outprefix.misa.statistics");

###########################primer design

###############################

open IN,"$od/$outprefix.misa" or die "$!";

open OUT,">$od/primer3.input" or die "$!";

my%ssr_pos=(); #ssr相关信息

my%SSR=(); # SSR type

while(my$line=){

chomp $line;

my@tmp=split(/\t/,$line);

next if ($tmp[0] eq "ID");

my$ID="$tmp[0]_$tmp[5]_$tmp[6]_$tmp[4]";

$ssr_pos{$ID}="$tmp[0]\t$tmp[5]\t$tmp[6]\t$tmp[4]\t$tmp[2]";

my$seq=&get_target_fa($tmp[0],$tmp[5]-$flank,$tmp[6]+$flank);

#print $seq."\n";

my$tar=0;

if(($tmp[5]-$flank)<=0){

$tar=$tmp[5];

}else{

$tar=$flank;

}

if ($seq){

$SSR{$ID}=$tmp[3];

print OUT "SEQUENCE_ID=$ID\n";

print OUT "SEQUENCE_TEMPLATE=$seq\n";

printf OUT ("SEQUENCE_TARGET=%d,%d\n",$tar+1,$tmp[4]);

print OUT "SPRIMER_TASK=pick_detection_primers\n";

print OUT "PRIMER_PICK_LEFT_PRIMER=1\n";

print OUT "PRIMER_PICK_RIGHT_PRIMER=1\n";

print OUT "PRIMER_NUM_RETURN=$PRIMER_NUM_RETURN\n";

print OUT "PRIMER_MAX_END_STABILITY=250\n";

print OUT "PRIMER_MIN_SIZE=$PRIMER_MIN_SIZE\n";

print OUT "PRIMER_OPT_SIZE=$PRIMER_OPT_SIZE\n";

print OUT "PRIMER_MAX_SIZE=$PRIMER_MAX_SIZE\n";

print OUT "PRIMER_MAX_NS_ACCEPTED=1\n";

print OUT "PRIMER_MIN_GC=40.0\nPRIMER_MAX_GC=60.0\n";

print OUT

"PRIMER_PRODUCT_SIZE_RANGE=$PRIMER_PRODUCT_SIZE_RANGE\n";

printf OUT

("SEQUENCE_INTERNAL_EXCLUDED_REGION=%d,%d\n",$tar+1,$tmp[4]);

print OUT "=\n";

}

}

close(OUT);

close(IN);

print "/biosoft/Primer3/primer3-2.3.7/src/primer3_core

-p3_settings_file=/biosoft/Primer3/primer3-2.3.7/default_settings.txt

-output=$od/$outprefix.primers

$od/primer3.input\n";

system("/biosoft/Primer3/primer3-2.3.7/src/primer3_core

-p3_settings_file=/biosoft/Primer3/primer3-2.3.7/default_settings.txt

-output=$od/$outprefix.primers

$od/primer3.input");

###############################e-PCR

#####################

my%Pseq=();

$/="=\n";

open IN ,"$od/$outprefix.primers" or die "$!";

open OUT,">$od/$outprefix.primers.xls" or die "$!";

print OUT

"#SEQUENCE_ID\tPRIMER_LEFT_SEQUENCE\tPRIMER_RIGHT_SEQUENCE\tPRIMER_PAIR_PRODUCT_SIZE\tPRIMER_LEFT_TM\tPRIMER_RIGHT_TM\tPRIMER_LEFT_GC_PERCENT\tPRIMER_RIGHT_GC_PERCENT\tSSR\tSSR_TYPE\n";

while(my$line=){

chomp$line;

my@field=split(/\n/,$line);

my%primers=&get_hash(@field);

next if  !

defined($primers{"PRIMER_PAIR_NUM_RETURNED"}) or

$primers{"PRIMER_PAIR_NUM_RETURNED"}==0;

if($primers{"PRIMER_PAIR_NUM_RETURNED"}>=1){

for my$i (0..($primers{"PRIMER_PAIR_NUM_RETURNED"}-1)){

$Pseq{"$primers{SEQUENCE_ID}"}{$i}=[$primers{"PRIMER_LEFT_${i}_SEQUENCE"},$primers{"PRIMER_RIGHT_${i}_SEQUENCE"}];

print OUT

join("\t",("$primers{SEQUENCE_ID}_${i}",$primers{"PRIMER_LEFT_${i}_SEQUENCE"},$primers{"PRIMER_RIGHT_${i}_SEQUENCE"},$primers{"PRIMER_PAIR_${i}_PRODUCT_SIZE"},$primers{"PRIMER_LEFT_${i}_TM"},$primers{"PRIMER_RIGHT_${i}_TM"},$primers{"PRIMER_LEFT_${i}_GC_PERCENT"},$primers{"PRIMER_RIGHT_${i}_GC_PERCENT"},$SSR{$primers{"SEQUENCE_ID"}}))."\n";

}

}

}

$/="\n";

close(OUT);

print "/biosoft/e-PCR/Linux-x86_64/e-PCR  -w9

-f 1 -m100 $od/$outprefix.primers.xls D=100-400 $epcrfa N=1 G=1 T=3

> $od/$outprefix.epcr.out\n";

system("/biosoft/e-PCR/Linux-x86_64/e-PCR

-w9 -f 1 -m100 $od/$outprefix.primers.xls

D=100-400 $epcrfa N=1 G=1 T=3 > $od/$outprefix.epcr.out");

######################################################################################

open IN ,"$od/$outprefix.epcr.out" or die "$!";

open OUT,">$od/$outprefix.primers.result.xls" or die

"$!";

my %P=();

while(my$line=){

chomp$line;

my@tmp=split(/\t/,$line);

next if $tmp[6]+$tmp[7] >1;

$tmp[5]=~s#/.*$##;

my$ssr_id=$tmp[1];

my$num=0;

($ssr_id,$num)=($ssr_id=~/(.*)_(\d+)$/);

$P{$tmp[1]}{"$tmp[1]_$tmp[3]_$tmp[4]"}=[$tmp[1],$ssr_pos{$ssr_id},$Pseq{$ssr_id}{$num}->[0],$Pseq{$ssr_id}{$num}->[1],$tmp[5],@tmp[6..$#tmp]];

}

close(IN);

print OUT

"#GAP_ID\tCHR\tGAP_START\tGAP_END\tGAP_LENGTH\tGAP_TYPE\tPRIMER_LEFT_SEQUENCE\tPRIMER_RIGHT_SEQUENCE\tPRIMER_PAIR_PRODUCT_SIZE/PREDICT_RANGE\tMISM\tGAPS\tPRIMER_LEFT_TM\tPRIMER_RIGHT_TM\tPRIMER_LEFT_GC_PERCENT\tPRIMER_RIGHT_GC_PERCENT\tGAP\n";

open OUT1,">$od/$outprefix.NOprimers.result.xls" or die

"$!";

for my $id (sort keys %SSR){

for my$i(0..($PRIMER_NUM_RETURN-1)){

if (exists $P{"${id}_$i"}){

my@ps=(keys %{$P{"${id}_$i"}});

if(@ps ==1){

print OUT join("\t",@{$P{"${id}_$i"}{$ps[0]}})."\n";

}

}else{

print OUT1 "${id} not primers\n";

}

}

}

close(OUT);

close(OUT1);

open OUT,">$od/readme.txt" or die "$!";

my $readme=<

建议使用editplus打开本文件;

*primers.result.xls

#SEQUENCE_ID-- 引物ID

(命名规则:GAP所在来源序列_GAP所在位置start_GAP所在位置end_GAP长度_备用引物编号 ; )

PRIMER_LEFT_SEQUENCE-- 左引物

PRIMER_RIGHT_SEQUENCE-- 右引物

PRIMER_PAIR_PRODUCT_SIZE/PREDICT_RANGE--

MISM--  Total number of mismatches for two

primers(ePCR)

GAPS--  Total number of gaps for two

primers(ePCR)

PRIMER_LEFT_TM-- 退火温度

PRIMER_RIGHT_TM-- 退火温度

PRIMER_LEFT_GC_PERCENT-- GC含量

PRIMER_RIGHT_GC_PERCENT-- GC含量

GAP-- GAP类型

方法:

2.提取GAP左右及其左右序列,用primer3设计引物[2]

3.用Epcr ,在fasta上电子PCR,去除有多处比较的引物,保证引物扩增的特异性[3]

[1] MISA:Exploiting EST databases for the development and

characterization of gene-derived SSR-markers in barley (Hordeum

vulgare L.).

[2] Primer3: Untergasser A, Cutcutache I, Koressaar T, Ye J,

Faircloth BC, Remm M, Rozen SG (2012) Primer3 - new capabilities

and interfaces. Nucleic Acids Research 40(15):e115

[3] http://www.ncbi.nlm.nih.gov/tools/epcr/

README

print OUT $readme;

close(OUT);

################################################################################

sub get_hash{

my@info=@_;

my%result=();

for my$i(@info){

next if $i=~/^\s*$/;

my($k,$v)=split(/=/,$i);

$result{$k}=$v;

}

return %result;

}

sub get_target_fa(){

my $id=shift;

my $posU=shift;

my $posD=shift;

if($posU<=0){

$posU=1;

}

my $seqobj=$ref{$id};

my $len=$ref_len{$id};

return $seqobj->subseq($posU,$len) if $posD>$len;

my $tri="";

$tri=$seqobj->subseq($posU,$posD);

return $tri;

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值