运行日记1

Lemonstan

已于 2022-11-06 01:19:29 修改

阅读量893

点赞数

文章标签： r语言

于 2022-11-05 03:49:54 首次发布

本文链接：https://blog.csdn.net/Lemonstan/article/details/127699253

版权

运行：Ctrl + Enter

设置语言

#Set Console language into English
Sys.setenv(LANGUAGE = "en")

加载(library)

library(Seurat) 
library(sctransform)
library(SeuratWrappers) 
library(CellChat) 
library(copykat)
library(tximport)
library(scran)
library(monocle3)  
library(CoGAPS)
library(harmony)    
library(liger)
library(Nebulosa)  
library(glmpca)
library(schex)
library(ggplot2)
library(biomaRt)
library(tidyverse)
library(scCATCH)
library(dplyr)
library(cowplot)
library(BiocManager)
library(CellPlot)
library(patchwork)
library(AnnotationHub)
library(BiocGenerics)
library(parallel)
library(SingleR) 
library(celldex) 
library(scRNAseq) 
library(Matrix)
library(RCurl)
library(scales)
library(devtools)
library(BiocManager)
library(KEGGREST)
library(KEGGgraph)
library(mygene)
library(plotrix)
library(EnhancedVolcano)
library(magrittr)
library(DESeq2)
library(SingleCellExperiment)
library(ggrepel)
library(ggpubr)
library(ggthemes)
library(multtest)
library(metap)
library(plotly)
library(clusterProfiler)
library(DOSE)
library(readxl)
library(enrichplot)
library(ggnewscale)
library(data.table)
library(igraph)
library(pathview)
library(ggupset)
library(europepmc)
library(rgl)
library(arsenal)
library(VennDetail)
library(venn)
library(ggvenn)
library(ggalt)

load("D:/Training/.RData")

Healthy1_data <- Read10X(data.dir = "D:/Training/1. Datasets/Healthy Kidney/GSE131685_RAW/GSM4145204/")
#as(<dgTMatrix>, "dgCMatrix") is deprecated since Matrix 1.5-0; do as(., "CsparseMatrix") instead

Healthy1 <- CreateSeuratObject(counts = Healthy1_data,  min.cell = 3 ,min.festures = 200,  project = "Healthy",assay = "RNA")
Healthy1

An object of class Seurat 
17419 features across 8164 samples within 1 assay 
Active assay: RNA (17419 features, 0 variable features)

Healthy2_data <- Read10X(data.dir = "D:/Training/1. Datasets/Healthy Kidney/GSE131685_RAW/GSM4145205/")
Healthy2 <- CreateSeuratObject(counts = Healthy2_data,  min.cell = 3 ,min.festures = 200,  project = "Healthy",assay = "RNA")
Healthy2

An object of class Seurat 
17255 features across 6499 samples within 1 assay 
Active assay: RNA (17255 features, 0 variable features)


```r
#An object of class Seurat 
#17255 features across 6499 samples within 1 assay 
#Active assay: RNA (17255 features, 0 variable features)

An object of class Seurat
17255 features across 6499 samples within 1 assay
Active assay: RNA (17255 features, 0 variable features)

Healthy3_data <- Read10X(data.dir = "D:/Training/1. Datasets/Healthy Kidney/GSE131685_RAW/GSM4145206/")
Healthy3 <- CreateSeuratObject(counts = Healthy3_data,  min.cell = 3 ,min.festures = 200, project = "Healthy",assay = "RNA")
Healthy3

Healthy <- merge(Healthy1, y = c(Healthy2,Healthy3),add.cell.ids = c ("Healthy1","Healthy2","Healthy3"),project = "Healthy")
Healthy

Error in RowMergeMatricesList(mat_list = all.mat, mat_rownames = all.rownames,  :
std::bad_alloc

#QC and selecting cells for further analysis
mito.genes <- grep(pattern = "^MT-", x = rownames(Healthy@assays[["RNA"]]), value = TRUE)
percent.mito <- Matrix::colSums(Healthy@assays[["RNA"]][mito.genes, ])/Matrix::colSums(Healthy@assays[["RNA"]])

# AddMetaData adds columns to object@meta.data, and is a great place to
# stash QC stats
#Seurat v2 function, but shows compatibility in Seurat v3
Healthy <- AddMetaData(object = Healthy, metadata = percent.mito, col.name = "percent.mito")

#in case the above function does not work simply do:
Healthy$percent.mito <- percent.mito
VlnPlot(object = Healthy, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"), ncol = 3)

# GenePlot is typically used to visualize gene-gene relationships, but can
# be used for anything calculated by the object, i.e. columns in
# object@meta.data, PC scores etc.  Since there is a rare subset of cells
# with an outlier level of high mitochondrial percentage and also low UMI
# content, we filter these as well
par(mfrow = c(1, 2))
P1 <- FeatureScatter(object = Healthy, feature1 = "nCount_RNA", feature2 = "percent.mito")
P2 <- FeatureScatter(object = Healthy, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
P1+P2

#store mitochondrial percentage in object meta data
Healthy <- PercentageFeatureSet(Healthy, pattern = "^MT-", col.name = "percent.mt")
#Apply sctransform normalization
#Note that this single command replaces NormalizeData, ScaleData, and FindVariableFeatures.
#Transformed data will be available in the SCT assay, which is set as the default after running sctransform
#During normalization, we can also remove confounding sources of variation, for example, mitochondrial mapping percentage
#run sctransform
Healthy <- SCTransform(Healthy, vars.to.regress = "percent.mt", verbose = T)

#Cluster cells
#These are now standard steps in the Seurat workflow for visualization and clustering
Healthy <- RunPCA(Healthy, verbose = FALSE)
Healthy <- RunUMAP(Healthy, dims = 1:30, verbose = FALSE)
Healthy <- FindNeighbors(Healthy, dims = 1:30, verbose = FALSE)
Healthy <- FindClusters(Healthy, verbose = FALSE, resolution = 0.6)
#Perform dimensionality reduction by PCA and UMAP embedding
DimPlot(Healthy, label = T, label.box = T, label.size = 6,
        pt.size = 0.5)

Healthy.cluster.cell_numbers <- table(Healthy@active.ident)
view(Healthy.cluster.cell_numbers)
write.csv(Healthy.cluster.cell_numbers,
          "D:/Training/3. Figures/1.Healthy Cluster Cell numbers.csv", quote = F)
Healthy.Cluster.markers <- FindAllMarkers(Healthy, only.pos = T, min.pct = 0.25, thresh.use = 0.25)
Healthy.Cluster.markers %>% group_by(cluster) %>% top_n(2, avg_log2FC)
View(Healthy.Cluster.markers)
write.csv(Healthy.Cluster.markers,
          "D:/Training/3. Figures/1.Healthy Cluster markers.csv", quote = F)

#Choose Top10 gene markers for each cluster
Healthy.Cluster.markers_TOP10 <- Healthy.Cluster.markers %>% group_by(cluster) %>% top_n(10, avg_log2FC)
view(Healthy.Cluster.markers_TOP10)
write.csv(Healthy.Cluster.markers_TOP10,
          "D:/Training/3. Figures/1.Healthy Cluster markers.TOP10.csv", quote = F )
######zoom the right corner window berfore run
DoHeatmap(Healthy, Healthy.Cluster.markers_TOP10$gene)

#Choose Top5 gene markers for each cluster
Healthy.Cluster.markers_TOP5 <- Healthy.Cluster.markers %>% group_by(cluster) %>% top_n(5, avg_log2FC)
view(Healthy.Cluster.markers_TOP5)
write.csv(Healthy.Cluster.markers_TOP5,
          "D:/Training/3. Figures/1.Healthy Cluster markers.TOP5.csv", quote = F )
DoHeatmap(Healthy, Healthy.Cluster.markers_TOP5$gene)

#B cell marker: MS4A1
#Neutrophil marker: MNDA, BCL2A1
#T cell marker: CD3D 
feature_genes <- c("MS4A1", "MNDA", "BCL2A1","CD3D")
FeaturePlot(Healthy, features = feature_genes, ncol = 2)

VlnPlot(Healthy, features = feature_genes, ncol = 2, pt.size = 0)

p_Healthy <- plot_density(Healthy, features = feature_genes)
p_Healthy + plot_layout(ncol = 2)

#Cluster 13 is B cell
#Cluster 9 is Neutrophil
#Cluster 10,12 is T cell
#assign cell names / label cell clusters
current.cluster.ids <- c(9, 13, 10 , 12)

new.cluster.ids <- c("Neutrophil", "B cell" , "T cell","T cell")

Healthy@active.ident <- plyr::mapvalues(x = Healthy@active.ident, from = current.cluster.ids, to = new.cluster.ids)

DimPlot(Healthy, label = T, label.box = T, label.size = 6, pt.size = 0.5)
# DimPlot(Healthy, label = TRUE, pt.size = 0.5) Healthy.cluster.cell_numbers <- table(Healthy@active.ident)

P.Healthy <- DimPlot(Healthy, label = TRUE, pt.size = 0.5, 
                     label.box = T, label.size = 4) 
P.Healthy

DimPlot(Healthy, label = T, label.box = T, label.size = 6, pt.size = 0.5)