################# 进化树分析绘图 ###################
setwd('/biodata/06.personalized_analysis/AJFX2230314041_SSR_20230602/test/')
library(ggplot2)
library(ape)
library(ggtree)
library(ggthemes)
library(phylogram)
library(dendextend)
library(treeio)
library(ggtree)
library(dplyr)
args = commandArgs(T)
tree.dend <- read.dendrogram(file = args[1]) # 不能用Y叔叔的read.tree,属性会变
gpnum = args[2] # 分组数
#### 根据树文件分组
group_tree = data.frame(cutree(tree.dend, k = gpnum, order_clusters_as_data = T))
group_tree$Sample = rownames(group_tree)
group_tree$Group = paste0('Cluster',group_tree[,1])
group_tree = group_tree[,-1]
write.table(group_tree,'group_tree.xls',sep = '\t',quote = F,row.names = F)
Taxainfo = sapply(1:gpnum, function(x){
clu = paste0('Cluster',x)
clus = group_tree[group_tree$Group == clu,'Sample']
return(c(head(clus,1), tail(clus,1)))
}
)
Taxainfo
# 绘制环状热图
# 根据一个样本计算文字标签
# res <- cirp %>% filter(variable == 'GSM111122')
res <- group_tree
# 添加文字标签
# 计算角度
res$ang <- seq(from = (360/nrow(res)) / 1.5,
to = (1.5* (360/nrow(res))) - 360,
length.out = nrow(res)) + 80
# 根据角度确定朝向的方向
res$hjust <- 0
res$hjust[which(res$ang < -90)] <- 1
res$ang[which(res$ang < -90)] <- (180+res$ang)[which(res$ang < -90)]
# 查看结果
head(res,3)
cg = res %>% group_by(Group) %>% summarise(median(ang),median(hjust))
#cg$ang = -(cg$`median(ang)`+8)
cg$ang = -(cg$`median(ang)`+ 360/nrow(res)*5)
# 读取分组信息
group_file = group_tree
# group_file <- read.table('popmap.tmp',row.names = 1)
# colnames(group_file) = c('Sample','Group')
tree = read.tree(args[1])
# 按类分组
groupInfo <- split(row.names(group_file), as.factor(group_file$Group)) # 是0的是ggtree软件bug? ,树根?
# 将分组信息添加到树中
tree <- groupOTU(tree, groupInfo)
# 绘制进化树
output = 'tree'
Cladelist = paste0("Clade",c('I','II' ,'III','IV' ,'V' ,'VI','VII' ,'VIII' ,'IX'))
Clusterlist = paste0("Cluster",1:9)
Colorlist = RColorBrewer::brewer.pal(9,'Set1')
names(Colorlist) = Clusterlist
hed = 'ggtree(tree, layout = "circular", ladderize = FALSE, branch.length = "none",aes(color=group)) +
geom_point2(aes(subset=!isTip,fill=group),shape=21,size=2,show.legend = F)+
#geom_text(aes(label=node))+
scale_color_manual(values = Colorlist) +
# scale_color_manual(values = c(Clusterlist=Colorlist)) +
scale_fill_manual(values = Colorlist) +
geom_tiplab2(color="black", size=2.6) + theme(legend.position = "none")'
for (i in 1:gpnum) {
ts = paste0('geom_strip(taxa1 = Taxainfo[1,',i,'],
taxa2 = Taxainfo[2,',i,'],
hjust="center",align=T,offset=1.4,offset.text=0.5,extend=0,barsize = 1.5,
alpha=0.6,label="',Cladelist[i],'",angle=',cg$ang[i],',
color="',Colorlist[i],'")')
hed = paste(hed,ts,sep = '+')
}
p = eval(parse(text = hed))
p
ggsave(paste(output,"_K",gpnum,".png",sep=""),dpi = 300,width = 10,height = 10)
ggsave(paste(output,"_K",gpnum,".pdf",sep=""),width = 10,height = 10)
############ pca分析绘图 #########
num2name = read.table('num2name',header = F)
num2name$group = stringr::str_split(num2name$V2,'-',simplify = T)[,1]
#group = read.table('group_tree.xls',header = T)
pca = read.table('PCA.xls',header = F)
pca = merge(pca,num2name,by.x = "V1", by.y = "V1")
colnames(pca) <- c('num',paste0("PC",1:10),'Sample','group')
if(F){
# 计算每个主成分的贡献率(手动计算)
variances <- pca$sdev^2
total_var <- sum(variances)
contributions <- variances / total_var
# 打印每个主成分的贡献率(手动计算)
print(contributions)
var(pca$x[,1])/sum(var(pca$x[,1]),var(pca$x[,2]),var(pca$x[,3]),var(pca$x[,4]))
contributions1 = var(pca[,3])/sum(apply(pca, 2, var)[3:ncol(pca)])
contributions2 = var(pca[,4])/sum(apply(pca, 2, var)[3:ncol(pca)])
}
draw_axis_line <- function(length_x,length_y,tick_step=NULL,lab_step=NULL){
axis_x_begin <- -1 * length_x
axis_x_end <- length_x
axis_y_begin <- -1 * length_y
axis_y_end <- length_y
if (missing(tick_step))
tick_step <- 10
if (is.null(lab_step))
lab_step <- 10 #axis ticks data
tick_x_frame <- data.frame(ticks=seq(axis_x_begin,axis_x_end,by=tick_step))
tick_y_frame <- data.frame(ticks=seq(axis_y_begin,axis_y_end,by=tick_step)) #axis labels data
lab_x_frame <- subset(data.frame(lab=seq(axis_x_begin,axis_x_end,by=lab_step),zero=0),lab!=0)
lab_y_frame <- subset(data.frame(lab=seq(axis_y_begin,axis_y_end,by=lab_step),zero=0),lab!=0) #set tick length
tick_x_length <- 20/length(tick_x_frame$ticks)/2
tick_y_length <- 20/length(tick_y_frame$ticks)/2 #set zero point
data <- data.frame(x=0,y=0)
library(ggplot2)
p <- ggplot(pca, aes(x=pca$PC1,y=pca$PC2)) + geom_point(aes(color=group),size=2) +
#scale_color_brewer(palette = 'Set1') +
#draw axis line
geom_segment(y=0,yend=0,x=axis_x_begin,xend=axis_x_end,size=0.5) +
geom_segment(x=0,xend=0,y=axis_y_begin,yend=axis_y_end,size=0.5) + #draw x ticks
geom_segment(data=tick_x_frame,aes(x=ticks,xend=ticks,y=0,yend=0 + tick_x_length)) + #draw y ticks
geom_segment(data=tick_y_frame,aes(x=0,xend=0 + tick_y_length,y=ticks,yend=ticks)) + #labels
geom_text(data=lab_x_frame,aes(x=lab,y=zero,label=lab),vjust=1.5) +
geom_text(data=lab_y_frame,aes(x=zero,y=lab,label=lab),hjust=1.5) +
theme_void() + theme(legend.position = c(0.7,0.2))
return(p)
}
draw_axis_line(80,80)
ggsave('PCA.pdf',width = 8,height = 8)
ggsave('PCA.png',width = 8,height = 8,units = 'in',dpi = 300)
进化树+PCA的R定制化绘图参考代码
于 2023-06-05 17:21:24 首次发布