1.rename方便后续处理。
2.trim_galore去接头。
#这个软件使用之前要先安装fastqc和cutadapt
ls -d OV*|while read OV; do echo $OV;
trim_galore -q 20 --phred33 --paired -a AGATCGGAAGAGCACACGTCT -a2 GATCGTCGGACTGTAGAACTCTGAAC \
-e 0.01 -o /data/XXXXX/task_OV/01trim/ ${OV}/*_R1.fastq.gz ${OV}/*_R2.fastq.gz>>${OV}.log; done
3.fastqc看去接头情况
fastqc -o /data/XXXXX/task_OV/02fastqc -t 10 *.fq.gz
multiqc -o /data/XXXXX/task_OV/02fastqc/multiqc *_fastqc.zip
4.bwa比对
for(( i=1; i<157; i++ ))
do
bwa mem /data/XXXXX/WGS/02hg19/chrom.37.fa \
OV${i}_R1_val_1.fq.gz \
OV${i}_R2_val_2.fq.gz > /data/XXXXX/task_OV/03bwa/OV${i}.sam
done
5.samtools
#SAM2BAM
for(( i=1; i<157; i++ ))
do
samtools view -bS OV${i}.sam > /data/XXXXX/task_OV/04samtools/OV${i}.bam
done
#SORT
for(( i=10; i<157; i++ ))
do
samtools sort OV${i}.bam /data/XXXXX/task_OV/05sorted/OV${i}.sort
done
#INDEX
for(( i=1; i<157; i++ ))
do
samtools index OV${i}.sort.bam
done
6.bedtools
bedtools multicov -f 0.9 -bams OV{1..157}.sort.bam -bed /data/XXXXX/task_LE-miR/03-2miRSeq/hsa.37note_mature.gff > differmiR.txt
7.delete repeat
sort rowname.txt |uniq -d >repeat.name.txt
#repeat
setwd("C:\\Users\\dell-xps\\Desktop")
repeat.name<-read.table("repeat.name.txt", header=TRUE)
differ <- read.table("differ2.txt", header=TRUE)
rowname <- read.table("rowname.txt", header=TRUE)
j<-NULL
for(i in 1:6708){
a <- as.vector(repeat.name[i,1])
h <- 0
g <- NULL
for(k in 1:99236){
b <- as.vector(rowname[k,1])
if(a==b){
c <- differ[k,]
h <- h+c
}
g <- cbind(a,h)
g <- as.vector(g)
}
j <- rbind(j,g)
}
write.table(j, "serum.iso.repeat.txt")
repeat.iso<-read.table("repeat.txt", header=TRUE)
differ <- read.table("differ.txt", header=TRUE)
for(i in 1:6708){
a <- as.vector(repeat.iso[i,1])
for(k in 1:99236){
b <- as.vector(differ[k,1])
if(a==b){
differ[k,]<-repeat.iso[i,]
}
}
}
write.table(differ, "serum.iso.repeat.plus.txt")
8.RPM+QC+NORMALIZATION+COMBAT+TEST(看foldchange)
#RPM
setwd("C:\\User