单细胞测序(二)

例 分析头颈部鳞状细胞癌组织在感染或未感染人乳头瘤病毒免疫谱系的转录特征

(一)原始数据的获取和预处理

在NCBI的GEO DataSets搜索有关人类研究的鳞状细胞癌实验研究结果,选取存储序列号为GSE139324的数据作为原始数据来源。选择其中的GSM4138110、GSM4138112、GSM4138113、GSM4138156、GSM4138158、GSM4138160样本,其中样本均来源于头颈部鳞状细胞癌患者,采样自外周血单核细胞,前三个样本人乳头瘤病毒阴性,后三个样本人乳头瘤病毒阳性。在GEO Accession页面下载各个样本的细胞代码 barcodes.tsv.gz、基因ID genes.tsv.gz和表达矩阵 matrix.mtx.gz用于后续研究。

图1 GEO数据库GSE139324数据集

最终整理成六个文件夹并且统一修改文件名,放在工作目录下

图2 整理下载数据及命名

首先,读取下载数据的R代码如下:

library(Seurat)

library(tidyverse)

library(dplyr)

library(org.Hs.eg.db)

library(clusterProfiler)

library(patchwork)

library(clustree)

library(pheatmap)

library(SingleR)

library(magrittr)

library(ggrepel)

library(reshape2)

library(GSEABase)

library(clusterProfiler)

library(msigdbr)

library(GSEA)

library(GSVA)

library(magrittr)#加载需要的R包

dir=c("HPV_NEG1/","HPV_NEG2/","HPV_NEG3/","HPV_POS1/","HPV_POS2","HPV_POS3")

names(dir)=c('NEG1','NEG2','NEG3',"POS1","POS2","POS3")

scRNAlist<-list()

for(i in 1:length(dir)){

  counts<-Read10X(data.dir=dir[i])#读取单细胞数据

  scRNAlist[[i]]<-CreateSeuratObject(counts,min.cells=3,min.features=300)}#创建Seurat对象

for(i in 1:length(dir)){

  scRNAlist[[i]][["percent.mt"]]<-PercentageFeatureSet(scRNAlist[[i]],pattern="^MT-")#计算每个细胞的线粒体基因含量

scRNAlist[[i]][["percent.ribo"]]<-PercentageFeatureSet(scRNAlist[[i]],pattern="^RP[SL][[:digit:]]")#计算每个细胞核糖体基因含量

  scRNAlist[[i]][["percent.red"]]<-PercentageFeatureSet(scRNAlist[[i]],pattern="^HB-")#计算每个细胞红细胞基因含量

  scRNAlist[[i]]<-subset(scRNAlist[[i]],#设置相应的阈值过滤低质量细胞

               subset=percent.mt<=20&

               percent.ribo<=100&

               percent.red<=20)}

for(i in 1:length(scRNAlist)){

  scRNAlist[[i]]<-NormalizeData(scRNAlist[[i]])#数据归一化

scRNAlist[[i]]<-FindVariableFeatures(scRNAlist[[i]],selection.method="vst",nfeatures=3000)}#寻找高变基因

scRNA.anchors<-FindIntegrationAnchors(object.list=scRNAlist,anchor.features=3000)

scRNA1<-IntegrateData(anchorset=scRNA.anchors)#采用锚定点整合多个样本

scRNA2<-merge(scRNAlist[[1]],c(scRNAlist[[2]],scRNAlist[[3]],scRNAlist[[4]],scRNAlist[[5]],scRNAlist[[6]]))

(二)降维聚类及细胞类群注释

DefaultAssay(scRNA1)<-"integrated"#调整active.ident

scRNA1<-ScaleData(scRNA1)#数据标准化

scRNA1<-RunPCA(scRNA1,features=VariableFeatures(scRNA1))#PCA降维

plot1<-ElbowPlot(scRNA1,ndims=50,reduction="pca")#根据“肘部法则”,选取转折处的主成分数量

plot1

图3 PCA降维主成分分布比例

scRNA1<-RunPCA(scRNA1,npcs=16,verbose=T)#选择主成分为16重新降维

scRNA1<-FindNeighbors(scRNA1,reduction="pca",dims=1:10)

scRNA1<-FindClusters(scRNA1,resolution=seq(0.4,1.2,by=0.1))#调整参数进行聚类

clustree(scRNA1)#查看各参数的分组情况进行选择

图4 细胞亚群聚类参数分布

scRNA1<-FindClusters(scRNA1,resolution=0.7)#选择resolution=0.7重新聚类

scRNA1<-RunUMAP(scRNA1,reduction="pca",dims=1:10)

scRNA1<-RunTSNE(scRNA1,dims=1:10)

DefaultAssay(scRNA1)<-"RNA"

load("ref_Human_all.RData")#加载人类参考marker基因

refdata<-ref_Human_all

scRNA1<-JoinLayers(scRNA1)

testdata<-GetAssayData(scRNA1,slot="data")#提取表达矩阵

clusters<-scRNA1@meta.data$seurat_clusters

cellpred<-SingleR(test=testdata,ref=refdata,labels=refdata$label.main,

               method="cluster",clusters=clusters,

               assay.type.test="logcounts",assay.type.ref="logcounts")#细胞类群注释

celltype<-data.frame(ClusterID=rownames(cellpred),celltype=cellpred$labels,stringsAsFactors=FALSE)

scRNA1@meta.data$celltype="NA"

for(i in 1:nrow(celltype)){

scRNA1@meta.data[which(scRNA1@meta.data$seurat_clusters==celltype$ClusterID[i]),'celltype']<-celltype$celltype[i]}

DimPlot(scRNA1,reduction="umap",group.by="celltype",label=T)

图5 umap细胞亚群聚类

DimPlot(scRNA1,reduction="tsne",group.by="celltype",label=T)

图6 tSNE细胞亚群聚类

(三)Marker基因展示

Idents(scRNA1)=scRNA1@meta.data$celltype#调整active.ident为细胞类群

degs<-FindAllMarkers(scRNA1,logfc.threshold=0.5,

                   test.use="roc",

                   return.thresh=0.25,

                   min.pct=0.3,only.pos=T)#寻找marker基因

degs_sig<-degs%>%filter(pct.1>0.3&power>0.25)%>%filter(cluster!="other")%>%arrange(cluster,-power)

degs_top20<-degs_sig%>%group_by(cluster)%>%top_n(20,power)%>%top_n(20,avg_diff)%>%arrange(cluster,-power)#选取前20的marker基因

表1 marker基因展示(部分)

gene

myAUC

avg_diff

power

avg_log2FC

pct.1

pct.2

cluster

CD79A

0.9830

2.6721

0.9660

7.5486

0.9690

0.0220

B_cell

FTL

0.9820

2.1252

0.9640

3.1327

1.0000

0.9750

Monocyte

CST3

0.9820

2.9469

0.9640

5.7309

0.9710

0.1680

Monocyte

CD74

0.9820

2.0616

0.9640

3.0619

1.0000

0.7900

B_cell

CTSS

0.9780

2.3691

0.9560

4.0504

0.9880

0.4300

Monocyte

LYZ

0.9750

3.8927

0.9500

6.5578

0.9760

0.2800

Monocyte

NKG7

0.9730

2.4126

0.9460

3.7102

0.9930

0.3280

NK_cell

FTH1

0.9720

1.6950

0.9440

2.5087

0.9980

0.9810

Monocyte

TYROBP

0.9630

1.9013

0.9260

2.9926

0.9920

0.3910

Monocyte

S100A6

0.9630

1.5368

0.9260

2.3251

0.9970

0.8920

Monocyte

LST1

0.9610

2.5829

0.9220

5.0680

0.9430

0.1910

Monocyte

S100A9

0.9600

4.0490

0.9200

6.4097

0.9740

0.4570

Monocyte

GZMB

0.9520

2.2850

0.9040

4.0168

0.9570

0.1490

NK_cell

CD37

0.9510

1.3778

0.9020

2.2023

0.9920

0.7600

B_cell

MS4A1

0.9500

2.0753

0.9000

7.7078

0.9050

0.0120

B_cell

CD79B

0.9430

2.0574

0.8860

4.9544

0.9110

0.1020

B_cell

avgData<-testdata[degs_top20$gene,]%>%#计算每个基因在细胞类群的平均表达量

  apply(1,function(x){

    tapply(x,scRNA1$celltype,mean)#计算平均表达量

  })%>%t

phData<-MinMax(scale(avgData),-2,2)#z-score标准化用于绘制热图

rownames(phData)<-degs_top20$gene

phres<-pheatmap(phData,color=colorRampPalette(c("darkblue","white","red3"))(100),

     scale="row",cluster_rows=F,cluster_cols=F,show_rownames=T)

图7 marker基因初步展示

(四)细胞比例展示

x=scRNA1@meta.data$celltype

Clusters<-table(scRNA1@meta.data$celltype,scRNA1@meta.data$orig.ident)%>%melt()

colnames(Clusters)<-c("Celltype","Sample","Number")

cluster=c("B_cell","Monocyte","NK_cell","T_cells")

Clusters$Celltype<-factor(Clusters$Celltype,levels=cluster)

sample_color<-c("#fccccb","#bdb5e1","#b0d992","#f9d580")

p<-ggplot(data=Clusters,aes(x=Number,y=Sample,fill=Celltype))+

  geom_bar(stat="identity",width=0.8,position="fill")+

  scale_fill_manual(values=sample_color)+

  theme_bw()+

  theme(panel.grid=element_blank())+

  labs(x="",y="Sample")+

  theme(axis.text.y=element_text(size=12,colour="black"))+

  theme(axis.text.x=element_text(size=12,colour="black"))

p

图8 各样本细胞比例分布

(五)各细胞类群差异分析

type=c("Monocyte","T_cells","NK_cell","B_cell")

r.deg=data.frame()

table(scRNA1@meta.data$orig.ident)

for(i in 1:4){#对各细胞类群进行差异分析

  Idents(scRNA1)="celltype"

deg=FindMarkers(scRNA1,ident.1=c("NEG1","NEG2","NEG3"),ident.2=c("POS1","POS2","POS3"),group.by="orig.ident",subset.ident=type[i])

  deg$celltype=type[i]

  deg$unm=i-1

  r.deg=rbind(deg,r.deg)}

r.deg$gene=rownames(r.deg)

r.deg<-subset(r.deg,p_val_adj<0.05&abs(avg_log2FC)>0.5)#筛选差异基因

r.deg$trend<-as.factor(ifelse(r.deg$avg_log2FC>0,'Up','Down'))#定义上调下调

r.deg$significance<-as.factor(ifelse(r.deg$p_val_adj<0.01,'Highly','Lowly'))#定义显著性

r.deg$sig_trend<-paste0(r.deg$trend," ",r.deg$significance)

r.deg$unm=r.deg$unm%>%as.vector(.)%>%as.numeric(.)

background<-r.deg%>%group_by(unm)%>%summarise(Min=min(avg_log2FC)-0.2,Max=max(avg_log2FC)+0.2)%>%as.data.frame()#绘制浅灰色背景

background$Celltype<-factor(type)

background$start<-background_position$unm-0.4

background$end<-background_position$unm+0.4

cluster_bar_position<-background_position#绘制细胞类群标注

cluster_bar_position$start<-cluster_bar_position$unm-0.5

cluster_bar_position$end<-cluster_bar_position$unm+0.5

cluster_bar_position$unm%<>%factor(.,levels=c(0:max(as.vector(.))))

cols<-c("Up Highly"="#ff0000","Down Highly"="#0000ff","Up Lowly"="black","Down Lowly"="black")

cols_cluster<-c("0"="#e3716e","1"="#eca680","2"="#7ac7e2","3"="#f7df87")

p1=ggplot()+geom_rect(data=background,aes(xmin=start,xmax=end,ymin=Min,ymax=Max),

           fill="#525252",alpha=0.1)+###添加灰色背景色

  geom_jitter(data=r.deg,aes(x=unm,y=avg_log2FC,colour=sig_trend),

           size=2,position=position_jitter(seed=1))+

scale_x_continuous(breaks=seq(0,3,by=1),labels=c("Monocyte","T_cells","NK_cell","B_cell"))+scale_color_manual(values=cols)+

geom_rect(data=cluster_bar_position,aes(xmin=start,xmax=end,ymin=-0.4,ymax=0.4,fill=unm),color="black",alpha=1,show.legend=F)+scale_fill_manual(values=cols_cluster)+

  labs(x="Cluster",y="Average log2FC")+theme_bw()+

  theme(axis.text.y=element_text(colour='black',size=14),

       axis.text.x=element_text(colour='black',size=12,vjust=75))

p1

图9 各细胞差异分析结果

富集分析

for (i in 1:4){

  i=4

  Idents(scRNA1)="celltype"

deg=FindMarkers(scRNA1,ident.1=c("NEG1","NEG2","NEG3"),ident.2=c("POS1","POS2","POS3"),group.by="orig.ident",subset.ident=type[i])

  deg$celltype=type[i]

  deg$unm=i-1

  degs.list=rownames(deg)

  df<-bitr(degs.list,fromType="SYMBOL",toType=c("ENTREZID"),OrgDb=org.Hs.eg.db)

eg=enrichGO(gene=degs.list,OrgDb=org.Hs.eg.db,keyType="SYMBOL",ont="all",pvalueCutoff=0.5,qvalueCutoff=0.5)

barplot(eg,x="GeneRatio",color="p.adjust",showCategory=8,split="ONTOLOGY")+facet_grid(ONTOLOGY~.,scale='free')

  dev.print(png,file=paste0(cluster[i],"-go.png"),width=2000,height=2500,res=300)

ek<-enrichKEGG(gene=df$ENTREZID,organism="hsa",pvalueCutoff=0.05,qvalueCutoff=0.05)

  barplot(ek, x = "GeneRatio", color = "p.adjust",showCategory =15)

  dev.print(png, file = paste0(cluster[i],"-kegg.png"), width = 2000, height = 2500, res = 300)

}

图10 各细胞GO和KEGG通路富集分析结果

左侧为GO功能注释结果,右侧为KEGG通路富集分析结果

在GSEA官网下载通路的参考数据集进行注释:

图11 GSEA官网

kegggmt2<-read.gmt("c2.all.v2023.2.Hs.symbols.gmt")

kegg_list=split(kegggmt2$gene,kegggmt2$term)

x=AverageExpression(scRNA1)

exp=x[["RNA"]]

exp1=as.matrix(exp)

m_df<-msigdbr(species="Homo sapiens",category="C2",subcategory="KEGG")

fgsea_sets<-m_df%>%split(x=.$gene_symbol,f=.$gs_name)

es.max<-gsva(exp1,fgsea_sets,mx.diff=FALSE)

pheatmap::pheatmap(es.max[1:20,],

                 cluster_rows=T,

                 cluster_cols=T,

                 show_colnames=T,

                 scale="row",               color=colorRampPalette(c("#Fc8002","white","#AAAAAA","#1663a9"))(100))

图12 GSEA富集分析热图

  • 18
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值