我们继续 **在之前完整流程的基础上**,使用你已经提取的三组差异基因(`genes_hfd_vs_normal`, `genes_luteolin_vs_hfd`, `genes_luteolin_vs_normal`),来 **正确绘制 Venn 图**,并解决所有潜在问题(如空集、重复基因名、大小写等)。
---
## ✅ 完整且健壮的 Venn 图绘制代码(已修复所有常见错误)
```r
# ==================== 1. 确保前面的 DEG 提取已完成 ====================
# 假设你已有以下三个字符向量:
# genes_hfd_vs_normal
# genes_luteolin_vs_hfd
# genes_luteolin_vs_normal
# 标准化:去重 + 转大写(避免 "TP53" 和 "tp53" 被视为不同)
genes_hfd_vs_normal <- unique(toupper(genes_hfd_vs_normal))
genes_luteolin_vs_hfd <- unique(toupper(genes_luteolin_vs_hfd))
genes_luteolin_vs_normal <- unique(toupper(genes_luteolin_vs_normal))
# 移除 NA
genes_hfd_vs_normal <- genes_hfd_vs_normal[!is.na(genes_hfd_vs_normal)]
genes_luteolin_vs_hfd <- genes_luteolin_vs_hfd[!is.na(genes_luteolin_vs_hfd)]
genes_luteolin_vs_normal <- genes_luteolin_vs_normal[!is.na(genes_luteolin_vs_normal)]
# 打印数量
cat("📊 HFD vs Normal:", length(genes_hfd_vs_normal), "DEGs\n")
cat("📊 Luteolin vs HFD:", length(genes_luteolin_vs_hfd), "DEGs\n")
cat("📊 Luteolin vs Normal:", length(genes_luteolin_vs_normal), "DEGs\n")
# ==================== 2. 计算各区域数值(关键:必须是数字!)====================
n1 <- length(genes_hfd_vs_normal)
n2 <- length(genes_luteolin_vs_hfd)
n3 <- length(genes_luteolin_vs_normal)
n12 <- length(intersect(genes_hfd_vs_normal, genes_luteolin_vs_hfd))
n23 <- length(intersect(genes_luteolin_vs_hfd, genes_luteolin_vs_normal))
n13 <- length(intersect(genes_hfd_vs_normal, genes_luteolin_vs_normal))
n123 <- length(Reduce(intersect, list(genes_hfd_vs_normal, genes_luteolin_vs_hfd, genes_luteolin_vs_normal)))
# ==================== 3. 绘制三元 Venn 图 ====================
library(VennDiagram)
# 创建绘图文件
png("venn_triple_final.png", width = 900, height = 900, res = 150)
venn_plot <- draw.triple.venn(
area1 = n1,
area2 = n2,
area3 = n3,
n12 = n12,
n23 = n23,
n13 = n13,
n123 = n123,
category = c("HFD vs Normal", "Luteolin vs HFD", "Luteolin vs Normal"),
fill = c("#E74C3C", "#3498DB", "#2ECC71"), # 红蓝绿
alpha = 0.6,
cat.col = c("black", "black", "black"),
cat.cex = 1.1,
cex = 1.2,
lwd = 2,
main = "Venn Diagram: Overlap of DEGs",
main.cex = 1.4,
sub = paste("Total Common Genes:", n123),
sub.cex = 1.0
)
grid.draw(venn_plot)
dev.off()
message("✅ 三元 Venn 图已保存为 venn_triple_final.png")
```
---
## ✅ 可视化增强:添加具体基因列表到图像或导出 CSV
虽然 `VennDiagram` 不支持直接在图上列出所有基因名(会太拥挤),但我们可以通过以下方式输出:
### 🔽 导出每个区域的基因到 CSV 文件
```r
# 定义函数导出各区域基因
export_venn_regions <- function(set1, set2, set3, label1, label2, label3, prefix = "venn") {
A_only <- setdiff(set1, union(set2, set3))
B_only <- setdiff(set2, union(set1, set3))
C_only <- setdiff(set3, union(set1, set2))
AB_only <- intersect(set1, set2)[!intersect(set1, set2) %in% set3]
BC_only <- intersect(set2, set3)[!intersect(set2, set3) %in% set1]
AC_only <- intersect(set1, set3)[!intersect(set1, set3) %in% set2]
ABC <- Reduce(intersect, list(set1, set2, set3))
# 导出
write.csv(data.frame(symbol = A_only), file = paste0(prefix, "_", label1, "_only.csv"), row.names = FALSE)
write.csv(data.frame(symbol = B_only), file = paste0(prefix, "_", label2, "_only.csv"), row.names = FALSE)
write.csv(data.frame(symbol = C_only), file = paste0(prefix, "_", label3, "_only.csv"), row.names = FALSE)
write.csv(data.frame(symbol = AB_only), file = paste0(prefix, "_", label1, "_", label2, "_only.csv"), row.names = FALSE)
write.csv(data.frame(symbol = BC_only), file = paste0(prefix, "_", label2, "_", label3, "_only.csv"), row.names = FALSE)
write.csv(data.frame(symbol = AC_only), file = paste0(prefix, "_", label1, "_", label3, "_only.csv"), row.names = FALSE)
write.csv(data.frame(symbol = ABC), file = paste0(prefix, "_common_all.csv"), row.names = FALSE)
message(paste("📁 已导出各区域基因列表 (前缀:", prefix, ")"))
}
# 使用示例
export_venn_regions(
genes_hfd_vs_normal,
genes_luteolin_vs_hfd,
genes_luteolin_vs_normal,
"HFD_vs_Normal",
"Luteolin_vs_HFD",
"Luteolin_vs_Normal",
prefix = "DEG_Venn"
)
```
---
## ✅ 可选:双组 Venn 图(更清晰展示“逆转”效应)
```r
# 想看:哪些基因在 HFD 中上调,在 Luteolin 处理后下调?
# 提取 HFD 上调基因
up_hfd <- get_degs(fit2, "HFD_vs_Normal", logFC.threshold = 1) |> toupper() |> unique()
down_lut <- get_degs(fit2, "Luteolin_vs_HFD", logFC.threshold = 1) |> toupper() |> unique()
# 求交集(可能被逆转)
reversed_genes <- intersect(up_hfd, down_lut)
# 绘图
png("venn_rescue_effect.png", width = 700, height = 700)
draw.pairwise.venn(
area1 = length(up_hfd),
area2 = length(down_lut),
cross.area = length(reversed_genes),
category = c("Up in HFD", "Down by Luteolin"),
fill = c("red", "blue"),
alpha = 0.6,
cat.cex = 1.2,
main = "Genes Potentially Rescued by Luteolin",
main.cex = 1.3
)
grid.text(paste("Overlap:", length(reversed_genes)), x = 0.5, y = 0.5, gp = gpar(cex = 1.1, col = "white"))
dev.off()
write.csv(data.frame(symbol = reversed_genes), "rescued_genes.csv", row.names = FALSE)
```
---
## ✅ 常见问题预防清单
| 问题 | 解决方案 |
|------|---------|
| `base::intersect(x, y, ...): 参数没有用(...)` | ❌ 错误地把基因向量传给了 `area1=`;✅ 改成 `length(vec)` |
| 基因符号大小写不一致 | ✅ 使用 `toupper()` 统一标准化 |
| 同一个探针对应多个基因 | ✅ 在构建 `expr_matrix` 时使用 `distinct()` 去重 |
| 某个对比无显著基因(空集) | ✅ 加判断:`if (length(vec) == 0) vec <- character()` |
---
## ✅ 最终建议
- Venn 图适合 **2~3 组比较**,超过 3 组推荐使用 [UpSetR](https://github.com/hms-dbmi/UpSetR)
- 若想交互式查看,可用 `ggplot2 + ggvenn` 或 Shiny 应用
- 所有分析结果务必导出为 CSV,便于后续做 GO/KEGG 分析
---