Single-cell RNA sequencing reveals epithelial cells driving brain metastasis in lung adenocarcinoma

最新推荐文章于 2024-10-01 17:54:24 发布
科研Go
最新推荐文章于 2024-10-01 17:54:24 发布
阅读量614
点赞数 10
文章标签：服务器 r语言 github
本文链接：https://blog.csdn.net/KeYan_Go/article/details/135560412
版权
# 创建一个示例数据框
# GSE131907_Lung_Cancer_raw_UMI_matrix <- readRDS("F:/.meta brain/GSE131907_Lung_Cancer_raw_UMI_matrix.rds")
# data <- as.data.frame(GSE131907_Lung_Cancer_raw_UMI_matrix)
# save(data, file = "GSE131907_Lung_Cancer_RAW.rda")

# load("GSE131907_Lung_Cancer_RAW.rda")

# #normal
# normal_cols <- grepl("_LUNG_N", names(data))
# # 选择包含"_LUNG_N"的列
# normal <- data[,normal_cols]
# save(normal, file = "normal.rda")
load("normal.rda")

#PT_LNM_BM
# PT_LNM_BM_cols <- grepl("_LUNG_T|EBUS_|NS_|BRONCHO_", names(data))
# # 选择包含"--"的列
# PT_LNM_BM <- data[,PT_LNM_BM_cols]
# save(PT_LNM_BM, file = "PT_LNM_BM.rda")
load("PT_LNM_BM.rda")

# #tumor
# tumor_cols <- grepl("_LUNG_T|EBUS_06|EBUS_28|EBUS_49|BRONCHO_58", names(data))
# # 选择包含"--"的列
# tumor <- data[,tumor_cols]
# save(tumor, file = "tumor.rda")
load("tumor.rda")

# #Metastatic lymph node
# meta_ln_cols <- grepl("EBUS_10|EBUS_12|EBUS_13|EBUS_15|EBUS_19|EBUS_51|BRONCHO_11", names(data))
# # 选择包含"--"的列
# meta_ln <- data[,meta_ln_cols]
# save(meta_ln, file = "meta_ln.rda")
load("meta_ln.rda")

# #Normal lymph node
# normal_ln_cols <- grepl("_LN_",names(data))
# # 选择包含"NS"的列
# normal_ln <- data[,normal_ln_cols]
# save(normal_ln, file = "normal_ln.rda")
load("normal_ln.rda")

# #Pleural effusion
# EFFUSION_cols <- grepl("_EFFUSION_",names(data))
# # 选择包含"NS"的列
# EFFUSION <- data[,EFFUSION_cols]
# save(EFFUSION, file = "EFFUSION.rda")
load("EFFUSION.rda")

# #meta_brain
# ns_cols <- grepl("_NS_",names(data))
# # 选择包含"NS"的列
# meta_brain <- data[,ns_cols]
# save(meta_brain, file = "meta_brain.rda")
load("meta_brain.rda")


#install.packages("Seurat")

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("GSVA")
#BiocManager::install("GSEABase")
#BiocManager::install("limma")
#BiocManager::install("SingleR")
#BiocManager::install("celldex")
#BiocManager::install("monocle")

load("normal.rda")
load("tumor.rda")
load("meta_ln.rda")
load("normal_ln.rda")
load("EFFUSION.rda")
load("meta_brain.rda")


###################################01.数据前期处理与矫正###################################
#引用包
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(monocle)

logFCfilter=1               #logFC的过滤条件
adjPvalFilter=0.05          #校正后的pvalue过滤条件


#读取文件并处理
exp <- meta_brain
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data=avereps(data)

#将矩阵转换为Seurat对象，并对数据进行过滤
pbmc=CreateSeuratObject(counts = data,project = "seurat", min.cells=3, min.features=50, names.delim = "_")
#使用PercentageFeatureSet函数计算线粒体基因百分比c[["percent.mt"]] <- PercentageFeatureSet(object = pbmc, pattern = "^MT-")
#绘制特征基因小提琴图
(file="01.featureViolin.pdf", width=10, height=6)
VlnPlot(object = pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
dev.off()
pbmc=subset(x = pbmc, subset = nFeature_RNA > 50 & percent.mt < 5)    

#
pdf(file="01.featureCor.pdf",width=10,height=6)
plot1 <- FeatureScatter(object = pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt",pt.size=1.5)
plot2 <- FeatureScatter(object = pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",,pt.size=1.5)
CombinePlots(plots = list(plot1, plot2))
dev.off()

#
pbmc <- NormalizeData(object = pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(object = pbmc, selection.method = "vst", nfeatures = 1500)
#
top10 <- head(x = VariableFeatures(object = pbmc), 10)
pdf(file="01.featureVar.pdf",width=10,height=6)
plot1 <- VariableFeaturePlot(object = pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
dev.off()


###################################02.PCA###################################
##PCA
pbmc=ScaleData(pbmc)      
pbmc=RunPCA(object= pbmc,npcs = 20,pc.genes=VariableFeatures(object = pbmc))     #PCA????

#
pdf(file="02.pcaGene.pdf",width=10,height=8)
VizDimLoadings(object = pbmc, dims = 1:4, reduction = "pca",nfeatures = 20)
dev.off()

#
pdf(file="02.PCA.pdf",width=6.5,height=6)
DimPlot(object = pbmc, reduction = "pca")
dev.off()

#
pdf(file="02.pcaHeatmap.pdf",width=10,height=8)
DimHeatmap(object = pbmc, dims = 1:4, cells = 500, balanced = TRUE,nfeatures = 30,ncol=2)
dev.off()

#
pbmc <- JackStraw(object = pbmc, num.replicate = 100)
pbmc <- ScoreJackStraw(object = pbmc, dims = 1:15)
pdf(file="02.pcaJackStraw.pdf",width=8,height=6)
JackStrawPlot(object = pbmc, dims = 1:15)
dev.off()


###################################03.UMAP聚类分析和Marker基因###################################
##UMAP聚类分析
pbmc <- FindNeighbors(pbmc, dims = 1:20)       #前10个维度
pbmc <- FindClusters(object = pbmc, resolution = 0.5)   #对细胞分组，对细胞标准模块化，resolution越小cluster越少
head(Idents(pbmc),5)
# pbmc <- RunTSNE(object = pbmc, dims = 1:10)             ##TSNE聚类
# 
# pdf(file="03.TSNE.pdf",width=6.5,height=6)
# TSNEPlot(object = pbmc, pt.size = 0.2, label = TRUE)    ##TSNE可视化
# dev.off()
# write.table(pbmc$seurat_clusters,file="03.tsneCluster.txt",quote=F,sep="\t",col.names=F)

##############################################################################
pbmc <- RunUMAP(object = pbmc, dims = 1:30)          #UMAP聚类
pdf(file="03.UMAP.pdf",width=17,height=14)
DimPlot(object = pbmc, reduction = "umap", pt.size = 0.2, label = TRUE)   #可视化
dev.off()
write.table(pbmc$seurat_clusters,file="03.umapCluster.txt",quote=F,sep="\t",col.names=F)


##查找每个cluster差异基因
pbmc.markers <- FindAllMarkers(object = pbmc,
                               only.pos = FALSE,
                               min.pct = 0.25,
                               logfc.threshold = logFCfilter)
sig.markers=pbmc.markers[(abs(as.numeric(as.vector(pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig.markers,file="03.clusterMarkers.txt",sep="\t",row.names=F,quote=F)



#marker
showGenes = c('CD3D', 'CD3E',               # T cells
              'CD79A','MS4A1',              # B cells
              'NKG7','KLRD1',               # NK_cells
              'CD68','CD14',                # Macrophage_cells
              'TPSAB1','TPSB2',             # Mast_cells,
              'CD1C','CD1A',                # DC_cells
              'PDGFRA','COL1A1',            # Fibroblast_cells
              'CLDN5','VWF',                # Endothelial_cells
              'EPCAM','KRT19'               # Epithelial_cells
)

FeaturePlot(object = pbmc, features = 'ENG', cols = c("snow2", "red"))


#marker
showGenes1 = c('CD3D',             # T cells
              'CD79A',              # B cells
              'NKG7',               # NK_cells
              'CD68',           # Macrophage_cells
              'TPSAB1',           # Mast_cells,
              'CD1C',          # DC_cells
              'COL1A1',        # Fibroblast_cells
              'CLDN5',               # Endothelial_cells
              'EPCAM'           # Epithelial_cells
)
#绘制Marker在各个cluster的散点图
pdf(file="03.markerScatter_9.pdf",width=20,height=20)
FeaturePlot(object = pbmc, features = showGenes1, cols = c("snow2", "red"))
dev.off()

#绘制Marker在各个cluster的散点图
pdf(file="03.markerScatter.pdf",width=20,height=20)
FeaturePlot(object = pbmc, features = showGenes, cols = c("snow2", "red"))
dev.off()

top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
#绘制Marker在各个cluster的热图
pdf(file="03.umapHeatmap.pdf",width=12,height=9)
DoHeatmap(object = pbmc, features = top10$gene) + NoLegend()
dev.off()

#绘制marker小提琴图
pdf(file="03.markerViolin.pdf",width=10,height=6)
VlnPlot(object = pbmc, features = row.names(sig.markers)[1:2])
dev.off()

#绘制Marker在各个cluster的气泡图
pdf(file="03.markerBubble.pdf",width=20,height=12)
cluster10Marker=showGenes
DotPlot(object = pbmc, features = cluster10Marker)
dev.off()

###################################04.SingleR 注释细胞类型###################################
counts<-pbmc@assays$RNA@counts
clusters<-pbmc@meta.data$seurat_clusters
ann=pbmc@meta.data$orig.ident

#####初标记#####初标记#####初标记#####初标记#####初标记#####初标记#####初标记#####初标记#####初标记#####
ref=celldex::HumanPrimaryCellAtlasData()
singler=SingleR(test=counts, ref =ref,
                labels=ref$label.main, clusters = clusters)

clusterAnn=as.data.frame(singler)
clusterAnn=cbind(id=row.names(clusterAnn), clusterAnn)
clusterAnn=clusterAnn[,c("id", "labels")]

singler2=SingleR(test=counts, ref =ref,
                 labels=ref$label.main)
cellAnn=as.data.frame(singler2)
cellAnn=cbind(id=row.names(cellAnn), cellAnn)
cellAnn=cellAnn[,c("id", "labels")]
write.table(cellAnn, file="04.cellAnn_raw.txt", quote=F, sep="\t", row.names=F)

#####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####
celltype=data.frame(ClusterID=0:23,
                    celltype= 0:23) 

# 'CD3D', 'CD3E',               # T cells
# 'CD79A','MS4A1',              # B cells
# 'NKG7','KLRD1'                # NK_cells
# 'CD68','CD14',                # Macrophage cells
# 'TPSAB1','TPSB2',             # Mast cells,
# 'CD1C','CD1A'                 # DC_cells
# 'PDGFRA','COL1A1',            # Fibroblast cells
# 'CLDN5','VWF',                # Endothelial cells
# 'EPCAM','KRT19',              # Epithelial cells



# 'CSF3R','FCGR3B',             # Neutrophils

FeaturePlot(object = pbmc, features = 'PTPRC', cols = c("snow2", "red"))

celltype[celltype$ClusterID %in% c(0,1,2,16,19),2]='T_cells'
celltype[celltype$ClusterID %in% c(3,8,23),2]='B_cells'
celltype[celltype$ClusterID %in% c(5,7),2]='NK_cells'
celltype[celltype$ClusterID %in% c(4,10,18),2]='Macrophage_cells'
celltype[celltype$ClusterID %in% c(11,21,22),2]='Monocyte'
celltype[celltype$ClusterID %in% c(9),2]='DC_cells'
celltype[celltype$ClusterID %in% c(6,20),2]='Fibroblast_cells'
celltype[celltype$ClusterID %in% c(12,13,14,17),2]='Epithelial_cells'
celltype[celltype$ClusterID %in% c(15),2]='Endothelial_cells'


write.table(celltype,file="04.clusterAnn.txt",quote=F,sep="\t", row.names=F)



# colors <-c('#E5D2DD', '#53A85F', '#F1BB72', '#F3B1A0', '#D6E7A3',
#            '#57C3F3','#E95C59', '#E59CC4', '#AB3282',  '#BD956A', 
#            '#9FA3A8', '#E0D4CA',  '#C5DEBA', '#F7F398','#C1E6F3',
#            '#6778AE', '#91D0BE', '#B53E2B','#712820', '#DCC1DD', '#CCE0F5')

table(celltype$celltype)
mycolors =c("#3176B7","#F78000",'#EEAD0E',"#CE2820","#9265C1",
            "#885649","#DD76C5","#3FA116","#BBBE00","#41BED1", '#CCE0F5')



#cluster注释结果可视化
newLabels <- celltype$celltype
names(newLabels) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, newLabels)


#细胞类型主观排序
cell_type <- c('T_cells','B_cells','NK_cells','Macrophage_cells','Monocyte',
               'DC_cells','Fibroblast_cells','Epithelial_cells','Endothelial_cells'
               )

TB <- subset(pbmc,idents = cell_type)
levels(TB) <- cell_type


pdf(file="04.UMAP5.pdf",width=20,height=20)
UMAPPlot(object = TB,group.by = "ident",cols = mycolors, pt.size =0.5, label = TRUE) +
  theme(legend.position = "bottom") +
  theme(text = element_text(size = 30))#UMAP可视化
dev.off()

table(TB@active.ident)

##cluster注释结果差异化
pbmc.markers=FindAllMarkers(object = TB,
                            only.pos = FALSE,
                            min.pct = 0.25,
                            logfc.threshold = logFCfilter)
sig.cellMarkers=pbmc.markers[(abs(as.numeric(as.vector(pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig.cellMarkers,file="04.cellMarkers.txt",sep="\t",row.names=F,quote=F)


#绘制Marker在各个cluster的散点图
pdf(file="04.markerScatter.pdf",width=15,height=14)
FeaturePlot(object = TB, features = showGenes, cols = c("snow2", "red"))
dev.off()

top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)

#绘制Marker在各个cluster的热图
pdf(file="04.umapHeatmap.pdf",width=12,height=9)
DoHeatmap(object = TB, features = top10$gene) + NoLegend()
dev.off()


pdf(file="04.markerViolin.pdf",width=20,height=25)
VlnPlot(object = TB,fill.by = "ident",cols = mycolors, 
        features = showGenes,pt.size = 0,stack = T,flip = T) + 
  theme(text = element_text(size = 30))+
  NoLegend()
dev.off()

#绘制Marker在各个cluster的气泡图
pdf(file="04.markerBubble.pdf",width=10,height=15)
cluster10Marker=showGenes
DotPlot(object = TB, features = cluster10Marker,dot.scale = 10)+
  theme(axis.text.x = element_text(face = "bold",angle = 90,color = 'black'),
        panel.background = element_rect(fill = 'transparent',color = 'black',size = 1))+
  coord_flip()
dev.off()

#############################################################################
############取出一部分05.cluster用来亚分群######################################
#############################################################################

# save(list=ls(),file='subset_cluster_input.RData')
load('subset_cluster_input.RData')

table(pbmc@active.ident)

sub_pbmc<-subset(pbmc, idents = "Epithelial_cells")


#保存细胞亚型
cellAnn=as.data.frame(sub_pbmc@active.ident)
cellAnn=cbind(id=row.names(cellAnn), cellAnn)
write.table(cellAnn, file="05.Epi_cellAnn_input.txt", quote=F, sep="\t", row.names=F)


pdf(file="05.Epi_subset.pdf",width=8,height=8)
DimPlot(sub_pbmc, reduction = "umap",group.by = "ident",
        cols = mycolors,label = TRUE, pt.size = 0.5)
dev.off()

#########################################################################################################
######################09.恶行突变分析 ###################################################################
######################09.恶行突变分析 ###################################################################
#########################################################################################################
#09.恶行突变分析

#下载copykat包
# options(repos = c(
#   CRAN = "https://cloud.r-project.org",
#   BioCsoft = "https://bioconductor.org/packages/3.14/bioc",
#   BioCann = "https://bioconductor.org/packages/3.14/data/annotation",
#   BioCexp = "https://bioconductor.org/packages/3.14/data/experiment",
#   BioCextra = "https://bioconductor.org/packages/3.14/extra",
#   bioCviews = "https://bioconductor.org/packages/3.14/view"))
# devtools::install_github("navinlabcode/copykat")

cellAnn <- read.table('05.Epi_cellAnn_input.txt', sep = "\t", header = TRUE)
load("tumor_BRO58_EBUS.rda")
tumor <- PT_exp
tmp <- intersect(cellAnn$id,colnames(tumor))
df <- subset(tumor,select = tmp)

library(copykat)
library(Seurat)
# install.packages("coda")
# install.packages("mcmc")
library(mcmc)
library(coda)

library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)


logFCfilter=1               #logFC的过滤条件
adjPvalFilter=0.05          #校正后的pvalue过滤条件

#读取文件并处理
exp <- df
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
rm(meta_brain)
rm(exp)
rm(dimnames)
data=avereps(data)

counts<-data

# sc_cnv = copykat(rawmat = counts,ngene.chr = 5,sam.name = 'Epi_PT',n.cores = 1)
# save(sc_cnv,file = 'Epi_PT_sc_cnv.rds')
load('Epi_PT_sc_cnv.rds')

#copykat后续分析
#https://www.jianshu.com/p/78089451cc0e

# 整合预测结果到Seurat对象中
all_cells <- sc_cnv$prediction[sc_cnv$prediction$copykat.pred != 'not.defined',]

# 提取CopyKAT预测的恶性/非恶性细胞counts矩阵
sc_counts <- as.matrix(counts)[,all_cells$cell.names]

# 标准的Seurat流程
CRC.data <- Matrix::Matrix(sc_counts)
single_RNA<- CreateSeuratObject(counts = CRC.data,
                                project = "pbmc3k",
                                min.cells = 5, 
                                min.features = 500)

# 导入metadata信息
single_RNA[['group_copykat']] <- all_cells[match(rownames(single_RNA@meta.data),all_cells$cell.names),2]

# 标准化数据及鉴定高变基因 (feature selection)
single_RNA<- NormalizeData(single_RNA) %>%
  FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
  ScaleData(features = rownames(single_RNA)) # if 选择do.center = FALSE,得到值均为正


# 降维找邻近基因和cluster
single_RNA<- RunPCA(single_RNA,npcs = 30, features = VariableFeatures(object = single_RNA)) 
single_RNA <- FindNeighbors(single_RNA, dims = 1:20)       #前10个维度
single_RNA <- FindClusters(object = single_RNA, resolution = 0.5)   #对细胞分组，对细胞标准模块化，resolution越小cluster越少
head(Idents(single_RNA),5)
single_RNA <- RunUMAP(object = single_RNA, dims = 1:30) 

write.table(single_RNA$seurat_clusters,file="09.umapCluster.txt",quote=F,sep="\t",col.names=F)

##查找每个cluster差异基因
subb_pbmc.markers <- FindAllMarkers(object = single_RNA,
                                    only.pos = FALSE,
                                    min.pct = 0.25,
                                    logfc.threshold = logFCfilter)
sig_subb_pbmc_markers=subb_pbmc.markers[(abs(as.numeric(as.vector(subb_pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(subb_pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig_subb_pbmc_markers,file="09.clusterMarkers.txt",sep="\t",row.names=F,quote=F)


colors <-c('#B22222','#91D0BE','#FFE4C4','#FF9912',"#4169E1",
           '#968175','#3D9140','#CAFF70','#57C3F3','#E59CC4',
           '#AB3282','#BD956A','#F3B1A0','#E0D4CA','#9933FA', 
           '#C1E6F3','#B53E2B','#FF4500','#DCC1DD','#E3170D',
           '#CCE0F5','#CCC9E6','#625D9E','#68A180','#3A6963')

plot1 <- DimPlot(single_RNA, reduction = "umap",cols = colors,label = TRUE,label.box = TRUE,repel = TRUE,pt.size = 0.2)

plot2 <- DimPlot(single_RNA, reduction = "umap",cols = colors,group.by = 'group_copykat',label = TRUE,label.box = TRUE,pt.size = 0.2)

pdf(file="10.copykat.pdf",width=15,height=6)
plot1 + plot2
dev.off()

###################################################################################
##该代码通过10.monocle2对选取的细胞类型进行轨迹绘制################################
##该代码通过10.monocle2对选取的细胞类型进行轨迹绘制################################
##该代码通过10.monocle2对选取的细胞类型进行轨迹绘制################################
###################################################################################
# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# BiocManager::install("monocle")

##载入monocle包
# install.packages("densityClust")
devtools::load_all("C:\\Users\\10097\\AppData\\Local\\R\\win-library\\4.2\\monocle")

library(Seurat)
library(dplyr)
library(Matrix)


##取需要做轨迹的细胞群
plot1
sub_pbmc <- subset(single_RNA, idents=c(0:14))

##生成用于轨迹构建的基因，此处选的每个cluster的marker基因
markers <- FindAllMarkers(sub_pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top10 <- markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)

# save(sub_pbmc,file="10.sub.Robj") ##保存选取的细胞群


##将Seurat对象定义成monocle所需数据（monocle构建CDS需要3个矩阵：cellData、phenoData、featureData）
seurat_object <- sub_pbmc
data <- as(as.matrix(seurat_object@assays$RNA@counts), 'sparseMatrix')##此处需要提供原始的UMI值。 
pd <- new('AnnotatedDataFrame', data = seurat_object@meta.data)
fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
fd <- new('AnnotatedDataFrame', data = fData)

#用Seurat定义好的各参数构建monocle cds
cds <- newCellDataSet(data,
                      phenoData = pd,
                      featureData = fd,
                      lowerDetectionLimit = 0.5,##lowerDetectionLimit = 0.5可以自己调整
                      expressionFamily = negbinomial.size()); 

cds <- estimateSizeFactors(cds)#计算SizeFactor
cds <- estimateDispersions(cds)#计算Dispersions
# save(cds,file="10.cds.Robj")
# load("10.cds.Robj")
#####对细胞进行拟时间排序
ordering_genes_used<-as.matrix(top10)#指定建轨迹所用基因
cds <- setOrderingFilter(cds, ordering_genes = ordering_genes_used)
cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree',auto_param_selection = F)##降维
cds <- orderCells(cds)


# save(cds,file="10.orderCells.Robj")
# load("10.orderCells.Robj")
write.table(pData(cds),file="10.my_pseudotime.txt")
# save(list = ls(),file = 'monocle_all.rda')

##########################################################################
load("monocle_all.rda")                 #从这里开始
##########################################################################

#保存树枝的细胞轨迹图
pdf(file="10.trajectory.State.pdf",width=6.5,height=6)
plot_cell_trajectory(cds,color_by = "State")
dev.off()
#保存时间的细胞轨迹图
pdf(file="10.trajectory.Pseudotime.pdf",width=6.5,height=6)
plot_cell_trajectory(cds,color_by = "Pseudotime")
dev.off()
# #保存细胞名称的细胞轨迹图
# pdf(file="10.trajectory.cellType.pdf",width=6.5,height=6)
# plot_cell_trajectory(cds,color_by = "celltype")
# dev.off()

colors <-c('#B22222','#91D0BE','#FFE4C4','#FF9912',"#4169E1",
           '#968175','#3D9140','#CAFF70','#57C3F3','#E59CC4',
           '#AB3282','#BD956A','#F3B1A0','#E0D4CA','#9933FA', 
           '#C1E6F3','#B53E2B','#FF4500','#DCC1DD','#E3170D',
           '#CCE0F5','#CCC9E6','#625D9E','#68A180','#3A6963')

#保存聚类的细胞轨迹图
pdf(file="10.trajectory.cluster.pdf",width=10,height=10)
plot_cell_trajectory(cds, color_by = "seurat_clusters",cell_size =0.7)+
  scale_colour_manual(values = colors)+
  theme(axis.title = element_text(size=18),axis.text = element_text(size=18,colour ='black'),
        legend.title = element_text(size=18),
        legend.text = element_text(size=15),
        legend.position = c(0.2,0.9))+
  guides(color = guide_legend(nrow =3))
dev.off()

#轨迹图分面显示
pdf(file="10.trajectory.cluster_fenmian.pdf",width=50,height=10)
plot_cell_trajectory(cds, color_by = "seurat_clusters",cell_size =0.7)+
  scale_colour_manual(values = colors)+
  theme(axis.title = element_text(size=18),axis.text = element_text(size=18,colour ='black'),
        legend.title = element_text(size=18),
        legend.text = element_text(size=15),
        legend.position = c(0.5,0.9))+
  guides(color = guide_legend(nrow =3))+
  facet_wrap("~seurat_clusters", nrow = 1)
dev.off()

write.table(top10, file = "10.top10.csv", sep = ",", col.names = NA,
            qmethod = "double")


#查看cds数据
pData(cds)

##如果发现monocle定义的轨迹起点方向不对，可以自己重新用state数值指定起点重新构建轨迹(要根据自己轨迹情况调整)
#cds <- orderCells(cds,root=3)
#plot_cell_trajectory(cds, color_by = "Pseudotime")

##如果轨迹没有分支，起点反了通过reverse=T调整
#cds <- orderCells(cds,reverse=T)

###展示感兴趣基因在轨迹上表达量
plot_cell_trajectory(cds, markers="AKAP12", cell_size=0.2, use_color_gradient=T) + scale_color_gradient2(low="gray",mid="blue",high="red")

##计算差异基因并保存对象()
my_pseudotime_de <- differentialGeneTest(cds,fullModelFormulaStr = "~sm.ns(Pseudotime)") #耗内存和时间

# save(my_pseudotime_de,file="10.my_pseudotime_de.Robj")
# load("10.my_pseudotime_de.Robj")
# save(list=ls(),file='monocle_all.rda')
######################################################################
load('monocle_all.rda')                     #从这里开始
##################################################################

##将q值小于10^-5的差异基因保存，当然也可以不过滤全部保存,同时用这些基因绘制热图
c<-subset(my_pseudotime_de, qval < 10^-3)

write.table(c[order(c[,4]),],file="10.my_pseudotime_de1.xls",sep="\t")


##如需要调整绘制热图基因集，可以调整p值或者自行指定基因集 ，如q值小于10^-10如下
c<-subset(my_pseudotime_de, qval < 10^-10)

###展示感兴趣基因热图(一个方向走)
plot_pseudotime_heatmap(cds[row.names(c),], num_clusters = 4,use_gene_short_name = TRUE,show_rownames = TRUE)


#显著差异基因按热图结果排序并保存
Time_diff_sig <- c[,c("gene_short_name", "pval", "qval")]
write.csv(Time_diff_sig, "10.Time_diff_sig.csv", row.names = F)
marker_genes <- row.names(subset(fData(cds), gene_short_name %in% 
                                   c('AKAP12','AKR1B10')))
diff_test_res <- differentialGeneTest(cds[marker_genes,], fullModelFormulaStr = "~sm.ns(Pseudotime)")

# Time_genes <- diff_test_res %>% pull(gene_short_name) %>% as.character()
sig_gene_names <- row.names(subset(diff_test_res, qval < 0.1))

pdf(file="10.plot_pseudotime_heatmap.pdf",width=20,height=30)
plot_pseudotime_heatmap(cds[sig_gene_names,],num_clusters = 4,cores = 1,show_rownames = T)
dev.off()

###展示感兴趣基因拟时间表达变化
##定义展示基因
used <- row.names(subset(fData(cds),gene_short_name %in% 
                           c("CD79A", "NKG7", "HLA-DRA", "CCL5")))
cds_subset <- cds[used,]

###展示感兴趣基因拟时间表达变化(一个方向走)
p1 <- plot_genes_in_pseudotime(cds_subset,color_by="seurat_clusters")
p2 <-plot_genes_in_pseudotime(cds_subset, color_by = "Pseudotime")

p1+p2

p3 <- plot_genes_jitter(cds_subset,grouping = "State", color_by = "State")
p4 <- plot_genes_violin(cds_subset,grouping = "State", color_by = "State")

p3+p4




#################细胞通讯#############################################################################
#install.packages("Seurat")

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("GSVA")
#BiocManager::install("GSEABase")
#BiocManager::install("limma")
#BiocManager::install("SingleR")
#BiocManager::install("celldex")
#BiocManager::install("monocle")

load("tumor_BRO58_EBUS.rda")
tumor <- PT_exp

###################################01.数据前期处理与矫正###################################
#引用包
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)


logFCfilter=1               #logFC的过滤条件
adjPvalFilter=0.05          #校正后的pvalue过滤条件

#读取文件并处理
exp <- tumor
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
rm(tumor)
rm(exp)
rm(dimnames)
data=avereps(data)

# save(data,file='data.rda')
load("data.rda")

#将矩阵转换为Seurat对象，并对数据进行过滤
pbmc=CreateSeuratObject(counts = data,
                        project = "pbmc3k", 
                        min.cells=5,      #基因至少在多少个细胞中有表达
                        min.features=500) #至少表达多少个基因

#使用PercentageFeatureSet函数计算线粒体基因百分比
pbmc[["percent.mt"]] <- PercentageFeatureSet(object = pbmc, pattern = "^MT-")

#过滤细胞
pbmc=subset(x = pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 5)    #对数据进行过滤

#对数据进行标准化
pbmc <- NormalizeData(pbmc)
#提取细胞间变异系数较大的基因
pbmc <- FindVariableFeatures(object = pbmc, selection.method = "vst", nfeatures = 2000)
#输出特征方差图
top10 <- head(VariableFeatures(pbmc), 10)

###################################02.PCA主成分分析###################################
##PCA分析
pbmc=ScaleData(pbmc)          #PCA降维之前的标准处理步骤
pbmc=RunPCA(object= pbmc,npcs = 30 ,pc.genes=VariableFeatures(object = pbmc))     #PCA分析

###################################03.UMAP聚类分析和Marker基因###################################
##UMAP聚类分析
pbmc <- FindNeighbors(pbmc, dims = 1:20)       #前10个维度
pbmc <- FindClusters(object = pbmc, resolution = 0.5)   #对细胞分组，对细胞标准模块化，resolution越小cluster越少
head(Idents(pbmc),5)

pbmc <- RunUMAP(object = pbmc, dims = 1:30)          #UMAP聚类
pdf(file="03.UMAP.pdf",width=17,height=14)
DimPlot(object = pbmc, reduction = "umap", pt.size = 0.2, label = TRUE)   #可视化
dev.off()
write.table(pbmc$seurat_clusters,file="03.umapCluster.txt",quote=F,sep="\t",col.names=F)


##查找每个cluster差异基因
pbmc.markers <- FindAllMarkers(object = pbmc,
                               only.pos = FALSE,
                               min.pct = 0.25,
                               logfc.threshold = logFCfilter)
sig.markers=pbmc.markers[(abs(as.numeric(as.vector(pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig.markers,file="03.clusterMarkers.txt",sep="\t",row.names=F,quote=F)



#marker
showGenes = c('CD3D', 'CD3E',               # T cells
              'CD79A','MS4A1',              # B cells
              'NKG7','KLRD1',               # NK_cells
              'CD68','CD14',                # Macrophage_cells
              'TPSAB1','TPSB2',             # Mast_cells,
              'CD1C','CD1A',                # DC_cells
              'PDGFRA','COL1A1',            # Fibroblast_cells
              'CLDN5','VWF',                # Endothelial_cells
              'EPCAM','KRT19'               # Epithelial_cells
)

FeaturePlot(object = pbmc, features = 'ENG', cols = c("snow2", "red"))


###################################04.SingleR 注释细胞类型###################################
counts<-pbmc@assays$RNA@counts
clusters<-pbmc@meta.data$seurat_clusters
ann=pbmc@meta.data$orig.ident

#####初标记#####初标记#####初标记#####初标记#####初标记#####初标记#####初标记#####初标记#####初标记#####
# ref=celldex::HumanPrimaryCellAtlasData()
# singler=SingleR(test=counts, ref =ref,
#                 labels=ref$label.main, clusters = clusters)
# 
# clusterAnn=as.data.frame(singler)
# clusterAnn=cbind(id=row.names(clusterAnn), clusterAnn)
# clusterAnn=clusterAnn[,c("id", "labels")]

#####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####
celltype=data.frame(ClusterID=0:23,
                    celltype= 0:23) 

# 'CD3D', 'CD3E',               # T cells
# 'CD79A','MS4A1',              # B cells
# 'NKG7','KLRD1'                # NK_cells
# 'CD68','CD14',                # Macrophage cells
# 'TPSAB1','TPSB2',             # Mast cells,
# 'CD1C','CD1A'                 # DC_cells
# 'PDGFRA','COL1A1',            # Fibroblast cells
# 'CLDN5','VWF',                # Endothelial cells
# 'EPCAM','KRT19',              # Epithelial cells



# 'CSF3R','FCGR3B',             # Neutrophils

FeaturePlot(object = pbmc, features = 'COL1A1', cols = c("snow2", "red"))

celltype[celltype$ClusterID %in% c(0,1,2,16,19),2]='T_cells'
celltype[celltype$ClusterID %in% c(3,8,23),2]='B_cells'
celltype[celltype$ClusterID %in% c(5,7),2]='NK_cells'
celltype[celltype$ClusterID %in% c(4,10,18),2]='Macrophage_cells'
celltype[celltype$ClusterID %in% c(11,21,22),2]='Monocyte'
celltype[celltype$ClusterID %in% c(9),2]='DC_cells'
celltype[celltype$ClusterID %in% c(6,20),2]='Fibroblast_cells'
celltype[celltype$ClusterID %in% c(12,13,14,17),2]='Epithelial_cells'
celltype[celltype$ClusterID %in% c(15),2]='Endothelial_cells'


write.table(celltype,file="04.clusterAnn.txt",quote=F,sep="\t", row.names=F)



# colors <-c('#E5D2DD', '#53A85F', '#F1BB72', '#F3B1A0', '#D6E7A3',
#            '#57C3F3','#E95C59', '#E59CC4', '#AB3282',  '#BD956A', 
#            '#9FA3A8', '#E0D4CA',  '#C5DEBA', '#F7F398','#C1E6F3',
#            '#6778AE', '#91D0BE', '#B53E2B','#712820', '#DCC1DD', '#CCE0F5')

table(celltype$celltype)
mycolors =c("#3176B7","#F78000",'#EEAD0E',"#CE2820","#9265C1",
            "#885649","#DD76C5","#3FA116","#BBBE00","#41BED1", '#CCE0F5')



#cluster注释结果可视化
newLabels <- celltype$celltype
names(newLabels) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, newLabels)


#细胞类型主观排序
cell_type <- c('T_cells','B_cells','NK_cells','Macrophage_cells','Monocyte',
               'DC_cells','Fibroblast_cells','Epithelial_cells','Endothelial_cells'
)

TB <- subset(pbmc,idents = cell_type)
levels(TB) <- cell_type


#####（一）创建cellchat对象####################################################
#pbmc3k里的seurat_annotations有一些NA注释，过滤掉
data.input = TB@assays$RNA@data
meta.data =  as.data.frame(TB@active.ident)
meta.data$seurat_annotations <- meta.data$`TB@active.ident`
data.input = data.input[,row.names(meta.data)]

#######################################################################################
#######################################################################################
#######################################################################################
# save(list = ls(),file = 'PT_cell_communication.rda')
load('PT_cell_communication.rda')

library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(SeuratData)
library(patchwork) #最强大的拼图包
library(CellChat)
library(ggalluvial)
library(svglite)


#构建CellChat数据对象
cellchat <- createCellChat(object = data.input, 
                           meta = meta.data, 
                           group.by = "seurat_annotations")


cellchat
summary(cellchat)
str(cellchat)

#把metadata信息加到CellChat对象
identity = data.frame(group =meta.data$seurat_annotations, 
                      row.names = row.names(meta.data)) 

cellchat <- addMeta(cellchat, meta = identity, meta.name = "labels")
cellchat <- setIdent(cellchat, ident.use = "labels") # set "labels" as default cell identity
levels(cellchat@idents) # show factor levels of the cell labels


groupSize <- as.numeric(table(cellchat@idents)) # number of cells in each cell group

#导入配受体数据库
# 选择合适的物种，可选CellChatDB.human, CellChatDB.mouse
CellChatDB <- CellChatDB.human
#查看数据库的组成比例
showDatabaseCategory(CellChatDB)
# 查看数据库具体信息
CellChatDB$interaction[1:4,1:4]
head(CellChatDB$cofactor)
head(CellChatDB$complex)
head(CellChatDB$geneInfo)
CellChatDB.use <- subsetDB(CellChatDB, search = "Secreted Signaling")#取出相应分类用作分析数据库
cellchat@DB <- CellChatDB.use#将数据库内容载入cellchat对象中，相当于设置好接下来要参考的数据库

#数据预处理
cellchat <- subsetData(cellchat)#取出表达数据
cellchat <- identifyOverExpressedGenes(cellchat)#寻找高表达的基因#
cellchat <- identifyOverExpressedInteractions(cellchat)#寻找高表达的通路
cellchat <- projectData(cellchat, PPI.human)#投影到PPI，储存上一步的结果到cellchat@LR$LRsig

# save(list = ls(),file = 'PT_cell_communication_2.rda')
load('PT_cell_communication_2.rda')

library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(SeuratData)
library(patchwork) #最强大的拼图包
library(CellChat)
library(ggalluvial)
library(svglite)
library(echarts4r)
library(highcharter)
library(htmlwidgets)
library(networkD3)
#####（二）推断配受体通讯网络##########################################################
# 1.在配受体水平上计算细胞通讯
#相互作用推断
#如果所研究的生物过程中已知的信号通路无法预测，用户可以尝试truncatedMean来改变计算每个细胞组平均基因表达的方法
#对于population. size参数的解读：是否考虑所有测序细胞中每组细胞的比例，
#population. size = FALSE：如果分析分选富集的单细胞，以消除潜在的人为因素细胞群的大小。
#population. size = TRUE，如果分析非分选富集的单细胞转录组，原因是丰富的细胞群往往比稀少的细胞群发送集体更强的信号

cellchat <- computeCommunProb(cellchat,population.size = F)

# Filter out the cell-cell communication if there are only few number of cells in certain cell groups
cellchat <- filterCommunication(cellchat, min.cells = 10)




#将推断的结果提取出来
df.net <- subsetCommunication(cellchat)#将细胞通讯预测结果以数据框的形式取出
#install.packages("DT"),DT就是展示数据框的一种工具，可以调节展示的参数等
library(DT)
DT::datatable(df.net)
write.csv(df.net,'01.df.net.csv')


#通过计算链路数量或汇总通信概率来计算细胞间的聚合通信网络
cellchat <- computeCommunProbPathway(cellchat)
cellchat <- aggregateNet(cellchat)

##数据可视化
levels(cellchat@idents)
cellchat <- aggregateNet(cellchat)#计算细胞-细胞聚合通信网络
groupSize <- as.numeric(table(cellchat@idents))
groupSize

# #组图，1行2列
# par(mfrow = c(1,2), xpd=TRUE)
#使用圆图显示任意两个细胞群之间的相互作用数量或总相互作用强度(权重)。
#颜色和source是一致的,圈圈的大小是每个细胞群细胞的数量
pdf(file="1.cell_communication_Number_of_interactions.pdf",width=10,height=10)
netVisual_circle(cellchat@net$count, vertex.weight = groupSize, weight.scale = T,
                 label.edge= F, title.name = "Number of interactions")
dev.off()


#总相互作用强度
pdf(file="1.cell_communication_weights.pdf",width=10,height=10)
netVisual_circle(cellchat@net$weight, vertex.weight = groupSize, weight.scale = T, 
                 label.edge= F, title.name = "Interaction weights/strength")
dev.off()

net <- as.data.frame(cellchat@net$weight)
write.csv(net,file = 'net.csv')
x <- as.matrix(net)
heatmap(x)

#检查每种细胞和其他细胞的互作情况
mat <- cellchat@net$weight
# par(mfrow = c(3,4), xpd=TRUE)

pdf(file="2.cell_communication.pdf",width=10,height=10)

for (i in 1:nrow(mat)) {
  mat2 <- matrix(0, nrow = nrow(mat), ncol = ncol(mat), dimnames = dimnames(mat))
  mat2[i,] <- mat[i,]
  netVisual_circle(mat2, vertex.weight = groupSize, weight.scale = T, arrow.width = 0.2,
                   arrow.size = 0.2, edge.weight.max = max(mat), title.name = rownames(mat)[i])
}

dev.off()
######截止############截止############截止############截止############截止######
######截止############截止############截止############截止############截止######
######截止############截止############截止############截止############截止######