使用linux批量引物设计,对组装结果中GAP左右批量设计引物

最新推荐文章于 2024-03-18 10:28:05 发布

xiaoxiao wang

最新推荐文章于 2024-03-18 10:28:05 发布

阅读量481

点赞数

文章标签：使用linux批量引物设计

#!/usr/bin/perl -w

use strict;

use Cwd qw(abs_path getcwd);

use Getopt::Long;

use Data::Dumper;

use FindBin qw($Bin $Script);

use File::Basename qw(basename dirname);

use Bio::SeqIO;

my $BEGIN_TIME=time();

my $version="1.0";

------------------------------------------------------------------

# GetOptions

------------------------------------------------------------------

($fa,$flank,$epcrfa,$od,$cut,$outprefix,$PRIMER_NUM_RETURN,$PRIMER_OPT_SIZE,$PRIMER_MIN_SIZE,$PRIMER_MAX_SIZE,$PRIMER_PRODUCT_SIZE_RANGE,$PRIMER_MAX_NS_ACCEPTED);

GetOptions(

"help|?"

=>\&USAGE,

"fa=s"=>\$fa,

"flank=s"=>\$flank,

"PRIMER_MIN_SIZE=s"=>\$PRIMER_MIN_SIZE,

"PRIMER_OPT_SIZE=s"=>\$PRIMER_OPT_SIZE,

"PRIMER_MAX_SIZE=s"=>\$PRIMER_MAX_SIZE,

"PRIMER_NUM_RETURN=s"=>\$PRIMER_NUM_RETURN,

"PRIMER_PRODUCT_SIZE_RANGE=s"=>\$PRIMER_PRODUCT_SIZE_RANGE,

"epcrfa=s"=>\$epcrfa,

"od=s"=>\$od,

"prefix=s"=>\$outprefix,

) or

&USAGE;

&USAGE unless($fa);

sub USAGE {

my $usage=<

Program: $0

Version: $version

Contact: huang2002003\@126.com

Discription:

Usage:

Options:

-fa Input fa file, forced

-flank 100

cut 100

-PRIMER_MIN_SIZE 18 Minimum acceptable

length of a primer. Must be greater than 0 and less than or equal

PRIMER_MAX_SIZE

-PRIMER_OPT_SIZE 20 Optimum length (in

bases) of a primer. Primer3 will attempt to pick primers close to

this length.

-PRIMER_MAX_SIZE 23 Maximum acceptable

length (in bases) of a primer. Currently this parameter cannot

larger than 35. This limit is

governed by maximum oligo size for which primer3's

melting-temperature

is valid.

-PRIMER_PRODUCT_SIZE_RANGE 100-300 The

associated values specify the lengths of the product that the

user

wants the

primers to create, and is a space separated list of elements of the

form

-PRIMER_NUM_RETURN 1 Total num primer to

-epcrfa run E-PCR to

filter multi mapped primers; required;

-od Key of output dir,default cwd;

-prefix defoult demo;

-h Help

USAGE

print $usage;

exit 1;

}

$PRIMER_MIN_SIZE||=18;

$PRIMER_OPT_SIZE||=20;

$PRIMER_MAX_SIZE||=23;

$PRIMER_NUM_RETURN||=1;

$PRIMER_MAX_NS_ACCEPTED||=1;

$PRIMER_PRODUCT_SIZE_RANGE||="100-300";

$od||=getcwd;

$flank||=100;

$od=abs_path($od);

unless(-d $od){ mkdir $od;}

$outprefix||="SSR";

$fa=abs_path($fa);

$epcrfa||=$fa;

$epcrfa=abs_path($epcrfa);

#################################################################

my%ref=();

my%ref_len=();

my$in = Bio::SeqIO->new(-file => "$fa"

-format => 'Fasta');

while ( my $seq = $in->next_seq() ) {

$ref{$seq->id}=$seq;

$ref_len{$seq->id}=$seq->length;

}

$in->close();

############find SSR by misa#######################

print "perl /biosoft/MISA/my_misa1.pl $fa $od/$outprefix.misa

$od/$outprefix.misa.statistics\n";

system("perl /biosoft/MISA/my_misa1.pl $fa $od/$outprefix.misa

$od/$outprefix.misa.statistics");

###########################primer design

###############################

open IN,"$od/$outprefix.misa" or die "$!";

open OUT,">$od/primer3.input" or die "$!";

my%ssr_pos=(); #ssr相关信息

my%SSR=(); # SSR type

while(my$line=){

chomp $line;

my@tmp=split(/\t/,$line);

next if ($tmp[0] eq "ID");

my$ID="$tmp[0]_$tmp[5]_$tmp[6]_$tmp[4]";

$ssr_pos{$ID}="$tmp[0]\t$tmp[5]\t$tmp[6]\t$tmp[4]\t$tmp[2]";

my$seq=&get_target_fa($tmp[0],$tmp[5]-$flank,$tmp[6]+$flank);

#print $seq."\n";

my$tar=0;

if(($tmp[5]-$flank)<=0){

$tar=$tmp[5];

}else{

$tar=$flank;

}

if ($seq){

$SSR{$ID}=$tmp[3];

print OUT "SEQUENCE_ID=$ID\n";

print OUT "SEQUENCE_TEMPLATE=$seq\n";

printf OUT ("SEQUENCE_TARGET=%d,%d\n",$tar+1,$tmp[4]);

print OUT "SPRIMER_TASK=pick_detection_primers\n";

print OUT "PRIMER_PICK_LEFT_PRIMER=1\n";

print OUT "PRIMER_PICK_RIGHT_PRIMER=1\n";

print OUT "PRIMER_NUM_RETURN=$PRIMER_NUM_RETURN\n";

print OUT "PRIMER_MAX_END_STABILITY=250\n";

print OUT "PRIMER_MIN_SIZE=$PRIMER_MIN_SIZE\n";

print OUT "PRIMER_OPT_SIZE=$PRIMER_OPT_SIZE\n";

print OUT "PRIMER_MAX_SIZE=$PRIMER_MAX_SIZE\n";

print OUT "PRIMER_MAX_NS_ACCEPTED=1\n";

print OUT "PRIMER_MIN_GC=40.0\nPRIMER_MAX_GC=60.0\n";

print OUT

"PRIMER_PRODUCT_SIZE_RANGE=$PRIMER_PRODUCT_SIZE_RANGE\n";

printf OUT

("SEQUENCE_INTERNAL_EXCLUDED_REGION=%d,%d\n",$tar+1,$tmp[4]);

print OUT "=\n";

}

close(OUT);

close(IN);

print "/biosoft/Primer3/primer3-2.3.7/src/primer3_core

-p3_settings_file=/biosoft/Primer3/primer3-2.3.7/default_settings.txt

-output=$od/$outprefix.primers

$od/primer3.input\n";

system("/biosoft/Primer3/primer3-2.3.7/src/primer3_core

-p3_settings_file=/biosoft/Primer3/primer3-2.3.7/default_settings.txt

-output=$od/$outprefix.primers

$od/primer3.input");

###############################e-PCR

#####################

my%Pseq=();

$/="=\n";

open IN ,"$od/$outprefix.primers" or die "$!";

open OUT,">$od/$outprefix.primers.xls" or die "$!";

print OUT

"#SEQUENCE_ID\tPRIMER_LEFT_SEQUENCE\tPRIMER_RIGHT_SEQUENCE\tPRIMER_PAIR_PRODUCT_SIZE\tPRIMER_LEFT_TM\tPRIMER_RIGHT_TM\tPRIMER_LEFT_GC_PERCENT\tPRIMER_RIGHT_GC_PERCENT\tSSR\tSSR_TYPE\n";

while(my$line=){

chomp$line;

my@field=split(/\n/,$line);

my%primers=&get_hash(@field);

next if !

defined($primers{"PRIMER_PAIR_NUM_RETURNED"}) or

$primers{"PRIMER_PAIR_NUM_RETURNED"}==0;

if($primers{"PRIMER_PAIR_NUM_RETURNED"}>=1){

for my$i (0..($primers{"PRIMER_PAIR_NUM_RETURNED"}-1)){

$Pseq{"$primers{SEQUENCE_ID}"}{$i}=[$primers{"PRIMER_LEFT_${i}_SEQUENCE"},$primers{"PRIMER_RIGHT_${i}_SEQUENCE"}];

print OUT

join("\t",("$primers{SEQUENCE_ID}_${i}",$primers{"PRIMER_LEFT_${i}_SEQUENCE"},$primers{"PRIMER_RIGHT_${i}_SEQUENCE"},$primers{"PRIMER_PAIR_${i}_PRODUCT_SIZE"},$primers{"PRIMER_LEFT_${i}_TM"},$primers{"PRIMER_RIGHT_${i}_TM"},$primers{"PRIMER_LEFT_${i}_GC_PERCENT"},$primers{"PRIMER_RIGHT_${i}_GC_PERCENT"},$SSR{$primers{"SEQUENCE_ID"}}))."\n";

}

$/="\n";

close(OUT);

print "/biosoft/e-PCR/Linux-x86_64/e-PCR -w9

-f 1 -m100 $od/$outprefix.primers.xls D=100-400 $epcrfa N=1 G=1 T=3

> $od/$outprefix.epcr.out\n";

system("/biosoft/e-PCR/Linux-x86_64/e-PCR

-w9 -f 1 -m100 $od/$outprefix.primers.xls

D=100-400 $epcrfa N=1 G=1 T=3 > $od/$outprefix.epcr.out");

######################################################################################

open IN ,"$od/$outprefix.epcr.out" or die "$!";

open OUT,">$od/$outprefix.primers.result.xls" or die

"$!";

my %P=();

while(my$line=){

chomp$line;

my@tmp=split(/\t/,$line);

next if $tmp[6]+$tmp[7] >1;

$tmp[5]=~s#/.*$##;

my$ssr_id=$tmp[1];

my$num=0;

($ssr_id,$num)=($ssr_id=~/(.*)_(\d+)$/);

$P{$tmp[1]}{"$tmp[1]_$tmp[3]_$tmp[4]"}=[$tmp[1],$ssr_pos{$ssr_id},$Pseq{$ssr_id}{$num}->[0],$Pseq{$ssr_id}{$num}->[1],$tmp[5],@tmp[6..$#tmp]];

}

close(IN);

print OUT

"#GAP_ID\tCHR\tGAP_START\tGAP_END\tGAP_LENGTH\tGAP_TYPE\tPRIMER_LEFT_SEQUENCE\tPRIMER_RIGHT_SEQUENCE\tPRIMER_PAIR_PRODUCT_SIZE/PREDICT_RANGE\tMISM\tGAPS\tPRIMER_LEFT_TM\tPRIMER_RIGHT_TM\tPRIMER_LEFT_GC_PERCENT\tPRIMER_RIGHT_GC_PERCENT\tGAP\n";

open OUT1,">$od/$outprefix.NOprimers.result.xls" or die

"$!";

for my $id (sort keys %SSR){

for my$i(0..($PRIMER_NUM_RETURN-1)){

if (exists $P{"${id}_$i"}){

my@ps=(keys %{$P{"${id}_$i"}});

if(@ps ==1){

print OUT join("\t",@{$P{"${id}_$i"}{$ps[0]}})."\n";

}

}else{

print OUT1 "${id} not primers\n";

}