#这里我们看到这样的一组数据
##reference=file:///home/wl/Data/data/yeast/ref/S288C_R64.fasta
##source=SelectVariants
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT d90_2_1_Clean d90_2_2_Clean d90_2_4_Clean d90_2_6_Clean d90_2_7_Clean d90_3_11_Clean d90_3_12_Clean d90_3_13_Clean d90_3_14_Clean d90_3_2_Clean d90_3_3_Clean d90_3_4_Clean d90_3_6_Clean d90_3_7_Clean d90_3_9_Clean
I 65 . C A 214.64 PASS AC=10;AF=0.357;AN=28;BaseQRankSum=3.713;DP=93;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=7.8615;InbreedingCoeff=-0.4411;MQ=51.26;MQ0=0;MQRankSum=-3.460;QD=3.46;ReadPosRankSum=0.591;SB=-0.02;comp2 GT:AD:DP:GQ:PL 0/1:3,2:6:48.17:48,0,64 0/1:4,2:6:12.64:13,0,106 0/0:5,0:5:15.03:0,15,150 0/1:8,4:12:60.60:61,0,211 ./. 0/1:5,1:6:15.42:15,0,119 0/0:3,1:4:6.02:0,6,64 0/1:2,2:4:24.58:25,0,26 0/0:3,1:4:3.01:0,3,32 0/1:10,4:14:60.46:60,0,227 0/1:3,1:4:21.41:21,0,76 0/0:11,2:13:0.45:0,0,254 0/0:2,3:5:3.01:0,3,32 0/1:4,2:6:21.40:21,0,83 1/1:1,2:4:2.99:24,3,0
I 74 . C G 25.82 SNPFilter AC=3;AF=0.107;AN=28;BaseQRankSum=1.463;DP=110;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=6.7908;InbreedingCoeff=-0.1875;MQ=50.37;MQ0=0;MQRankSum=-1.866;QD=1.61;ReadPosRankSum=0.338;SB=9.61;comp2 GT:AD:DP:GQ:PL 0/0:7,0:7:18.04:0,18,193 0/0:12,0:12:30.06:0,30,303 0/0:5,0:6:15.03:0,15,150 0/0:14,0:14:39.08:0,39,398 ./. 0/1:4,2:6:34.35:34,0,101 0/1:4,1:5:26.48:26,0,26 0/0:3,1:4:6.02:0,6,65 0/0:4,0:4:3.01:0,3,32 0/0:12,1:14:1.52:0,2,302 0/1:3,1:5:23.49:23,0,48 0/0:15,1:16:39.09:0,39,409 0/0:5,1:6:6.01:0,6,63 0/0:6,0:6:12.03:0,12,128 0/0:5,0:5:6:0,6,56
#虽然数据很多,但是还是按照一定的顺序排列,我们下面就是利用数据的有序性,利用空格将每一行变成一个数组,然后输出,我们需要的信息。
open (YEAST,"f:\\perl\\yeast_set2.S288C.stampy.pe.recal.variants.filter.vcf")||die("can not open the file!");
while(<YEAST>)
{
if($_=~/^##/)#这一行的作用就是判断这一行是否是我们所需要的,
{
}
elsif($_=~/^#/)
{
@category= split/\s/,$_;
open(COUNTBASE,">>f:\\perl\\a.csv")||die("can not open the file!");
print COUNTBASE " @category[0] @category[1] @category[3] @category[4] @category[5] \n"
}
else
{
@information=split/\s/,$_;
$number=8;
until($number>22)
{
$number++;
if(@information[$number]=~/^0\/0/)
{
open(COUNTBASE,">>f:\\perl\\a.csv")||die("can not open the file!");
printf COUNTBASE "@category[$number] @information[0] @information[1] @information[3] @information[4] @information[5] S\n" ;
}
elsif(@information[9]=~/^0\/1/)
{
open(COUNTBASE,">>f:\\perl\\a.csv")||die("can not open the file!");
printf COUNTBASE "@category[$number] @information[0] @information[1] @information[3] @information[4] @information[5] H\n";
}
elsif(@information[9]=~/^1\/1/)
{
open(COUNTBASE,">>f:\\perl\\a.csv")||die("can not open the file!");
printf COUNTBASE "@category[$number] @information[0] @information[1] @information[3] @information[4] @information[5] Y\n";
}
}
}
}