## 记录代码,保存备用
## 绘制小麦多环境曼哈顿图组合图(CMplot或许更快更好懂,可参考)
输入数据格式如下图:(E1,等可替换不同性状)
#加载所需R包
library(ggplot2)
library(dplyr)
#读取数据
data <- readxl::read_xlsx("G:\\data\\发芽率.xlsx", sheet = "Sheet1")
# 定义函数,将数据中Chr数字转换为小麦染色体命名方式
convert_chr <- function(chr_number) {
group <- (chr_number - 1) %/% 3 + 1
letter <- c("A", "B", "D")[(chr_number - 1) %% 3 + 1]
chr_name <- paste0(group, letter)
return(chr_name)
}
# 将Chr列转换为小麦染色体命名方式
data$Chr <- sapply(data$Chr, convert_chr)
# 将数据转换为所需格式
data_melt <- reshape2::melt(data,
id.vars = c("SNP", "Chr", "Pos"),
variable.name = "ENV", value.name = "P")
data_melt$ENV <- as.character(data_melt$ENV) #将ENV列转为字符串
# 设置颜色向量,用于区分不同的环境(性状),个数同环境个数
env_colors <- c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b",
"#e377c2", "#7f7f7f", "#bcbd22", "#17becf", "#aec7e8", "#ffbb78")
####--------多环境(性状)曼哈顿组合图---------#### 定义绘图函数
plot.man.multi <- function(d, traits, colors, alpha=1e-3) {
# 计算显著性阈值的-log10形式
a <- if (is.numeric(alpha)) -log10(alpha) else NULL
# 根据染色体和位置排序数据
ord <- with(d, order(Chr, Pos))
d <- d[ord,]
# 提取染色体的唯一值
chr <- unique(d$Chr)
# 计算p值的-log10形式,并添加索引列
d <- within(d, {
x <- NA
y <- -log10(P)
idx <- rep.int(seq_along(chr), times=tapply(Pos, Chr, length))
})
# 计算每个SNP在基因组中的位置
first <- 0
for (i in unique(d$idx)) {
w <- d$idx == i
Pos <- d[w, "Pos"]
Pos <- Pos - min(Pos) + 1
d[w, "x"] <- first + Pos
first <- first + max(Pos)
}
# 计算轴刻度和绘图范围
xtick <- with(d, tapply(x, idx, function(x) sum(range(x))/2))
ytick <- pretty(d$y)
xlim <- range(d$x, na.rm = TRUE)
ylim <- range(d$y, a, na.rm = TRUE)
# 防止xlim和ylim出现无限值
if (any(is.infinite(xlim))) {
xlim <- c(0, max(d$x, na.rm = TRUE))
}
if (any(is.infinite(ylim))) {
ylim <- c(0, max(d$y, a, na.rm = TRUE))
}
xlim <- c(-floor(max(xlim)) * 0.005, ceiling(max(xlim)) * 1.005)
ylim <- c(0, ceiling(max(ylim)) * 1.01)
# 绘制空白图表
plot(NA, xaxt="n", yaxt="n", bty="l", xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=NA, ylab=NA)
axis(1, at=xtick, cex.axis=0.8, mgp=c(0,0.2,0), tcl=-0.2, labels=chr, gap.axis=0)
axis(2, at=ytick, cex.axis=0.8, mgp=c(0,0.5,0), tcl=-0.2, las=1)
title(xlab="Chromosome", line=1.5)
title(ylab=expression(-log[10](italic(p))), line=1.5)
# 创建颜色映射
color_map <- setNames(colors, traits)
# 绘制每个性状的数据点
for (trait in traits) {
d_trait <- subset(d, ENV == trait)
for (i in unique(d_trait$idx)) {
w <- d_trait$idx == i
points(d_trait[w, "x"], d_trait[w, "y"], col=color_map[[trait]], pch=20, cex=0.5)
}
}
# 绘制显著性阈值线
if (is.numeric(a))
abline(h=a, col="red")
# 添加图例
legend_labels <- c("ENV", traits)
legend_colors <- c(NA, colors)
legend("topleft", legend=legend_labels, col=legend_colors, pch=c(NA, rep(20, length(traits))), horiz=TRUE, cex=0.8, bty="n", x.intersp=0.5)
invisible()
}
traits <- unique(data_melt$ENV)
colors <- env_colors
tiff("man.tif", width=20, height=10, units="cm", pointsize=10, res=300, compression="lzw")
par(mar=c(3,3,0.3,0.3), lend=2)
try(plot.man.multi(data_melt, traits, colors))
dev.off()