/mnt/lustre/user/wubin/01.Program/Scripts/01.script/GeneLab/check_EGFR_790_797.pl
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
my ($read_len, $chr, $pos1, $ref1, $alt1, $pos2, $ref2, $alt2);
my $usage =<<qq;
perl $0 /mnt/lustre/user/wubin/03.Clinical_Data/11.PanCancer/BGI688_Local_20210901/pancancer688__21SSP2100168/Cancer/4.MarkDup/pancancer688__21SSP2100168__Cancer.markdup.bam \
--chr chr7
--pos1 55249071
--pos2 55249092
--ref1 C
--alt1 T
--ref2 G
--alt2 C
--read_len 93
qq
GetOptions('read_len=i' => \$read_len,
'chr=s' => \$chr,
'pos1=i' => \$pos1,
'pos2=i' => \$pos2,
'ref1=s' => \$ref1,
'alt1=s' => \$alt1,
'ref2=s' => \$ref2,
'alt2=s' => \$alt2);
die "$usage\n" unless (@ARGV>=1);
$chr ||= 'chr7';
$pos1 ||= 55249071;
$pos2 ||= 55249092;
$ref1 ||= 'C';
$alt1 ||= 'T';
$ref2 ||= 'G';
$alt2 ||= 'C';
$read_len ||= 93;
if (abs($pos2-$pos1)+1 > $read_len) {
die "chr:$pos2-chr:$pos1: read_length: $read_len\nthe 2 sites are separated too far to determine the cis/trans relation\n";
}
my $bam = shift;
#bam文件中,如果fq文件中的某条reads被比对到了负链上,则会被反向互补到正链上来,这样bam文件中的序列总是正链上的
my ($depth_pos1,$depth_pos2, $cis_count, $trans_pos1_count, $trans_pos2_count) = (0,0,0,0,0);
open IN, "samtools view $bam |" || die $!;
while(<IN>){
my @line = split /\t/;
my ($flag, $chr_tmp, $start, $seq) = @line[1..3,9];
my $end = $start + length($seq) -1;
if(($flag & 4) == 4){next;} #没比对上
if($chr_tmp ne $chr){next;} #染色体/contig都与目标位点不一致
if (in_expanse($pos1, $start, $end)){$depth_pos1++;} # $pos1的深度
if (in_expanse($pos2, $start, $end)){$depth_pos2++;} # $pos2的深度
#只要有一个位点不在read坐标范围内,就不计算cis, trans
if(not in_expanse($pos1, $start, $end)){next;} # $pos1不在该条序列的范围内
if(not in_expanse($pos2, $start, $end)){next;} # $pos2不在该条序列的范围内
my $base1_in_seq = get_base($pos1, $start, $seq);
my $base2_in_seq = get_base($pos2, $start, $seq);
if($base1_in_seq eq $alt1 && $base2_in_seq eq $alt2){$cis_count++;}
if($base1_in_seq eq $alt1 && $base2_in_seq eq $ref2){$trans_pos1_count++;}
if($base1_in_seq eq $ref1 && $base2_in_seq eq $alt2){$trans_pos2_count++;}
# if($base1_in_seq eq $ref1 && $base2_in_seq eq $ref2){} #两位位点都没有突变
}
close IN;
print "Depth:\t$depth_pos1\t$depth_pos2\n";
print "Cis\t$chr:$pos1$ref1>$alt1\t$chr:$pos2$ref2>$alt2\t$cis_count\n";
print "Trans\t$chr:$pos1$ref1>$alt1\t$chr:$pos2$ref2>$alt2\t$trans_pos1_count|$trans_pos2_count\n";
sub get_base{
my ($pos, $start, $seq) = @_;
my $base = substr($seq, $pos-$start, 1);
return $base;
}
sub in_expanse{
my ($pos, $start, $end) = @_;
if($start >= $end){($start, $end) = ($end, $start);} #其实用不着
if($pos >=$start && $pos<=$end){
return 1;
}else{
return 0;
}
}