source activate r-4.1.3
R --max-ppsize=500000
##linux运行R的时候加上这个参数--max-ppsize=500000,不然会报错说内存溢出。
##windows则是下面命令:
##options("expressions"=20000)
##memory.limit(size=8000000)
24组织t-SNE作图数据.csv
————————————实操跑通的————————————————
library(tidyverse)
library(palmerpenguins)
library(Rtsne)
#参考教程https://zhuanlan.zhihu.com/p/352173349
#转置教程https://blog.csdn.net/lichangzai/article/details/41285205?spm=1001.2101.3001.6661.1&utm_medium=distribute.pc_relevant_t0.none-task-blog-2%7Edefault%7ECTRLIST%7Edefault-1-41285205-blog-106292361.pc_relevant_multi_platform_whitelistv3&depth_1-utm_source=distribute.pc_relevant_t0.none-task-blog-2%7Edefault%7ECTRLIST%7Edefault-1-41285205-blog-106292361.pc_relevant_multi_platform_whitelistv3&utm_relevant_index=1
data_input <- read.csv("24组织t-SNE作图数据.csv", header = TRUE,fileEncoding="GBK")##因为有中文所以编码用GBK。
#class=c("Bark","Cambium","Leaf","Xylem","Phloem","Root","Immature_xylem","Apex")
class=c("bark","OL","MX","EL","pistil","cambium","long1","long2","long3","long4","long5","long6","YL","IMX","apex","phloem","root","short1","short2","short3","short4","short5","short6","petiole")
data1 <- data.frame(data_input,row.names=1)
t1 <- t(data1)
t2 <- as.data.frame(t1,row.names=F)
t3 <- as.data.frame(cbind(class,t2))
write.table(t3,"data1.csv",row.names=FALSE,col.names=TRUE,sep=",")
##不能直接t转置,一定要这样转置,不然后面出问题,转置后导出的data.csv得查看一下
##不转置会出现以下问题:
##Error in complete.cases(object) :
## 现在还没有支持长向量:complete_cases.c:192
data <- read.csv(file = "data1.csv", header = TRUE)
# 检查并处理缺失值
data[is.na(data)] <- 0 # 将缺失值替换为0,可以根据具体情况选择合适的处理方法
# 将数值列提取出来
numeric_columns <- sapply(data[, -1], is.numeric)
numeric_data <- data[, numeric_columns]
# 将所有列转换为数值型(以防某些列仍包含非数值数据)
numeric_data <- data.matrix(numeric_data)
# 标准化数值数据
scaled_numeric_data <- scale(numeric_data)
# 创建新的数据框,保留分类列并替换标准化后的数值数据
data_standardized <- data
data_standardized[, numeric_columns] <- scaled_numeric_data
# 设置随机种子
set.seed(321)
# 执行 t-SNE 分析
tsne_out <- Rtsne(
as.matrix(data_standardized[, -1]), # 去掉分类列
dims = 2,
pca = TRUE,
perplexity = 3,
theta = 0.0,
max_iter = 1000
)
# 将降维后需要的结果转为数据框形式
tsne_result <- as.data.frame(tsne_out$Y)
# 将特征变量加入降维后的数据框
tsne_result$Class <- data$class
# 查看种类个数为24,定义24种颜色
mainPalette <- rainbow(24)
# 绘制降维后图形
pdf("perplexity2_3d.pdf", width = 8, height = 8)
ggplot(tsne_result, aes(x = V1, y = V2, color = Class)) +
geom_point(size = 1.25) +
labs(title = "t-SNE_analysis",
x = "tSNE1",
y = "tSNE2") +
theme(plot.title = element_text(hjust = 0.5))
dev.off()