raw.s03.singlet <- raw.s03[, raw.s03@meta.data[, DF.name.s03]=="Singlet"]#saveRDS(raw.s03.singlet,"~/Li/data/raw.s03.singlet")
raw.s03.singlet
# An object of class Seurat# 53255 features across 2785 samples within 2 assays# Active assay: SCT (21358 features, 3000 variable features)# 1 other assay present: RNA# 2 dimensional reductions calculated: pca, umap
2.2 Calculate QC
# mitochondrial ratio
raw.s03.singlet[["percent.mt"]]<- PercentageFeatureSet(object = raw.s03.singlet, pattern ="^MT-", assay ="RNA")# number of genes detected per UMI - represent the complexity of our dataset (more genes detected per UMI, more complex our data)
raw.s03.singlet[["log10GenesPerUMI"]]<- log10(raw.s03.singlet$nFeature_RNA) / log10(raw.s03.singlet$nCount_RNA)# 可视化质控指标# 一般可视化以下三个参数即可:"nFeature_RNA", "nCount_RNA", "percent.mt"
VlnPlot(raw.s03.singlet, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol =3, pt.size=0.05)# 2.2.1 Cell-level filtering-------------------------------------------------# 筛选参考阈值# nUMI > 500# nGene > 250# log10GenesPerUMI > 0.8# mitoRatio < 0.2
filteredData_s03 <- subset(x = raw.s03.singlet,
subset=(nCount_RNA >=500)&(nFeature_RNA >=250)&(log10GenesPerUMI >0.80)&(percent.mt >0.20)&(percent.mt <30))
filteredData_s03
# An object of class Seurat# 53255 features across 2721 samples within 2 assays# Active assay: SCT (21358 features, 3000 variable features)# 1 other assay present: RNA# 2 dimensional reductions calculated: pca, umap
dim(filteredData_s03)# 21358 2721# 2.2.2 Gene-level filtering-------------------------------------------------# 首先,我们将去除所有细胞中零表达的基因。此外,我们将根据流行程度执行一些过滤。如果一个基因只在少数细胞中表达,它就没有特别的意义。对于我们的数据,我们选择只保留在3个或3个以上细胞中表达的基因# Extract counts
counts.s03 <- GetAssayData(object = filteredData_s03, slot ="counts", assay ="RNA")# Output a logical vector for every gene on whether the more than zero counts per cell
nonzero.s03 <- counts.s03 >0# Sums all TRUE values and returns TRUE if more than 3 TRUE values per gene
keep_genes.s03 <- Matrix::rowSums(nonzero.s03)>=3# Only keeping those genes expressed in more than 3 cells
filtered_counts.s03 <- counts.s03[keep_genes.s03, ]# Reassign to filtered Seurat object
clean.s03 <- CreateSeuratObject(filtered_counts.s03, meta.data = filteredData_s03@meta.data)# clean.s03# An object of class Seurat# 23511 features across 2721 samples within 1 assay# Active assay: RNA (23511 features, 0 variable features)
dim(clean.s03)# 23511 2721# saveRDS(clean.s03, file = "~/Li/data/clean.s03.RDS")
# 【s04的过滤】#2.2remove Doublets==================================================
library(DoubletFinder)
raw.s04 <- SCTransform(raw.s04, verbose = F) %>% RunPCA(npcs =30, verbose = F)# pdf("Li/figures/elbow-10X-s04.pdf")# ElbowPlot(raw.s04,ndims = 30)# dev.off()# 选定dims为25
raw.s04 <- RunUMAP(raw.s04, dims=1:25,verbose = F) %>% FindNeighbors(reduction ="pca", dims =1:25)# saveRDS(raw.s04,"~/Li/data/raw.s04")# checkRes <- FindClusters(object = raw.s04, resolution = c(seq(.1,1.4,.2)), verbose = F)# pdf(file = "~/Li/figures/QC-checkres-s04.pdf", height = 8, width = 10, onefile = TRUE)# clustree(checkRes@meta.data,prefix = "SCT_snn_res.")# dev.off()
raw.s04<- FindClusters(raw.s04, resolution =0.7)# Optimize the parameters
sweep.res.list.s04 <- paramSweep_v3(raw.s04, PCs =1:25, sct = T)# Use log transform
sweep.stats.s04 <- summarizeSweep(sweep.res.list.s04, GT = F)# save(sweep.stats, file = "data/excludeP4B.obj/clean.data/sweepStats-10X.RData")# Show the best parameter
bcmvn.s04 <- find.pK(sweep.stats.s04)# Extract the best pK
pK_bcmvn.s04 <- bcmvn.s04$pK[which.max(bcmvn.s04$BCmetric)] %>% as.character() %>% as.numeric()
pK_bcmvn.s04
## [1] 0.04# Estimate the percentage of homotypic doublets
homotypic.prop.s04 <- modelHomotypic(raw.s04$seurat_clusters)# 10x的双细胞率为0.9%/1000 cell= 0.009/1000
*3000=0.027
DoubletRate <- 0.027
nExp_poi.s04 <- round(DoubletRate*ncol(raw.s04))# Adjust for homotypic doublets
nExp_poi.adj.s04 <- round(nExp_poi.s04*(1-homotypic.prop.s04))
raw.s04 <- doubletFinder_v3(raw.s04, PCs =1:25, pN =0.25, pK = pK_bcmvn.s04, nExp = nExp_poi.adj.s04, reuse.pANN = F, sct = T)
saveRDS(raw.s04, file="~/Li/data/raw.s04-db.RDS")# Present the res, classification info is saved in meta.data
DF.name.s04 <- colnames(raw.s04@meta.data)[grepl("DF.classification", colnames(raw.s04@meta.data))]# Visualization# pdf(file = "~/Li/figures/doublet-res.s04.pdf", height = 5, width = 12, onefile = TRUE)
cowplot::plot_grid(ncol =2, DimPlot(raw.s04, group.by ="orig.ident") + NoAxes(),DimPlot(raw.s04, group.by = DF.name.s04) + NoAxes())
raw.s04.singlet <- raw.s04[, raw.s04@meta.data[, DF.name.s04]=="Singlet"]
saveRDS(raw.s04.singlet,"~/Li/data/raw.s04.singlet")
raw.s04.singlet
# An object of class Seurat# 53255 features across 2795 samples within 2 assays# Active assay: SCT (22377 features, 3000 variable features)# 1 other assay present: RNA# 2 dimensional reductions calculated: pca, umap# 2.3 Calculate QC-------------------------------------------------------------# mitochondrial ratio
raw.s04.singlet[["percent.mt"]]<- PercentageFeatureSet(object = raw.s04.singlet, pattern ="^MT-", assay ="RNA")# number of genes detected per UMI - represent the complexity of our dataset (more genes detected per UMI, more complex our data)
raw.s04.singlet[["log10GenesPerUMI"]]<- log10(raw.s04.singlet$nFeature_RNA) / log10(raw.s04.singlet$nCount_RNA)# 可视化质控指标# 一般可视化以下三个参数即可:"nFeature_RNA", "nCount_RNA", "percent.mt"
VlnPlot(raw.s04.singlet, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol =3, pt.size=0.05)# 2.3.1 Cell-level filtering-------------------------------------------------# 筛选参考阈值# nUMI > 500# nGene > 250# log10GenesPerUMI > 0.8# mitoRatio < 0.2
filteredData_s04 <- subset(x = raw.s04.singlet,
subset=(nCount_RNA >=500)&(nFeature_RNA >=250)&(log10GenesPerUMI >0.80)&(percent.mt >0.20)&(percent.mt <30))
filteredData_s04
# An object of class Seurat# 55545 features across 2779 samples within 2 assays# Active assay: SCT (22377 features, 3000 variable features)# 1 other assay present: RNA# 2 dimensional reductions calculated: pca, umap
dim(filteredData_s04)# 22377 2779# 2.3.2 Gene-level filtering-------------------------------------------------# 首先,我们将去除所有细胞中零表达的基因。此外,我们将根据流行程度执行一些过滤。如果一个基因只在少数细胞中表达,它就没有特别的意义。对于我们的数据,我们选择只保留在3个或3个以上细胞中表达的基因# Extract counts
counts.s04 <- GetAssayData(object = filteredData_s04, slot ="counts", assay ="RNA")# Output a logical vector for every gene on whether the more than zero counts per cell
nonzero.s04 <- counts.s04 >0# Sums all TRUE values and returns TRUE if more than 3 TRUE values per gene
keep_genes.s04 <- Matrix::rowSums(nonzero.s04)>=3# Only keeping those genes expressed in more than 3 cells
filtered_counts.s04<- counts.s04[keep_genes.s04, ]# Reassign to filtered Seurat object
clean.s04 <- CreateSeuratObject(filtered_counts.s04, meta.data = filteredData_s04@meta.data)# clean.s04# An object of class Seurat# 24825 features across 2779 samples within 1 assay# Active assay: RNA (23511 features, 0 variable features)
dim(clean.s04)# 24825 2779
saveRDS(clean.s04, file="~/Li/data/clean.s04.RDS")
clean.s03$source<- "WT"
clean.s04$source<- "WT+h"
split_seurat <- SplitObject(mergedData, split.by="source")# Select the most variable features to use for integration
integ_features <- SelectIntegrationFeatures(object.list=split_seurat,nfeatures=3000)# Prepare the SCT list object for integration
split_seurat <- PrepSCTIntegration(object.list = split_seurat,
anchor.features = integ_features)# Perform CCA, find the best buddies or anchors and filter incorrect anchors. # Find best buddies - can take a while to run
integ_anchors <- FindIntegrationAnchors(object.list = split_seurat,
normalization.method ="SCT",
anchor.features = integ_features)# Integrate across conditions
integrated.s <- IntegrateData(anchorset = integ_anchors, normalization.method ="SCT")
integrated.s <- RunPCA(integrated.s, npcs =30, verbose = F) %>% RunUMAP(dims =1:30, reduction ="pca") %>% RunTSNE(dims =1:30)
integrated.s
#An object of class Seurat#52465 features across 5500 samples within 3 assays#Active assay: integrated (3000 features, 3000 variable features)# 2 other assays present: RNA, SCT# 3 dimensional reductions calculated: pca, umap, tsne
pdf("~/Li/figures/integrated_umap.pdf")
DimPlot(integrated.s, group.by ="source", reduction ="umap")
dev.off()