今天遇到一个可视化问题,关于散点图的分类、以及如何添加趋势线?需要操作内容有数据导入、转化、整合、可视化;
一、可视化结果查看
结果基本满足我的要求,但还有不足的地方:
1.拟合直线不平滑(不直);
2.没有直接显示直线公式、R2及p值。
二、数据导入
已经习惯数据导入用csv格式。
TDwx0505 <- read.csv("C:/Users/D.csv);head(TDwx0505)##导入数据
TDwx0505$plot <- as.factor(TDwx0505$plot)#转换数字型为因子型
TDwx0505$rep <- as.factor(TDwx0505$rep)
str(TDwx0505)#查看数据格式(数字型、因子型)
class(TDwx0505)#查看数据格式(数据框、向量、矩阵)
head(TDwx0505)#数据排列形式
> plot variety Treatment rep TDW TDAG DWr DWs DWp TWDseed
1 7914 DD199 NTD 1 842.6 719.6 123.00 418.9 300.7 295.2
2 7916 DD199 NTD 2 540.4 461.6 78.75 250.4 211.3 231.1
3 7918 DD199 NTD 3 655.5 564.5 90.99 280.5 284.0 223.8
4 7923 DD199 NTDR 1 586.3 500.6 85.70 310.1 190.5 181.7
5 7921 DD199 NTDR 2 524.8 441.4 83.42 255.3 186.1 200.0
6 7919 DD199 NTDR 3 619.6 506.2 113.37 248.1 258.1 221.4
##
三、数据整合(宽转长)
library(reshape2)
TDwx0505_DW <- melt(data = TDwx0505[c(5:9)],##数据整合将TDW...DWp合为一列
ID=c("plot","variety","Treatment","rep"),
variable.name = "Biogroup",
value.name = "Biomass")
TDwx0505_DW$TWDseed <- matrix(rep(TDwx0505$TWDseed,5))##将TWDseed复制5次,整合到原来数据框中,目的保证数据的一一对应
> str(TDwx0505_DW)
'data.frame': 120 obs. of 7 variables:
$ plot : Factor w/ 24 levels "7901","7902",..: 14 16 18 23 21 19 2 4 6 11 ...
$ variety : Factor w/ 2 levels "DD199","YG2009": 1 1 1 1 1 1 1 1 1 1 ...
$ Treatment: Factor w/ 4 levels "NTD","NTDR","TD",..: 1 1 1 2 2 2 3 3 3 4 ...
$ rep : Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
$ Biogroup : Factor w/ 5 levels "TDW","TDAG","DWr",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Biomass : num 843 540 655 586 525 ...
$ TWDseed : num [1:120, 1] 295 231 224 182 200 ...
> head(TDwx0505_DW)
plot variety Treatment rep Biogroup Biomass TWDseed
1 7914 DD199 NTD 1 TDW 842.64 295.17
2 7916 DD199 NTD 2 TDW 540.39 231.08
3 7918 DD199 NTD 3 TDW 655.47 223.79
4 7923 DD199 NTDR 1 TDW 586.32 181.67
5 7921 DD199 NTDR 2 TDW 524.82 200.01
6 7919 DD199 NTDR 3 TDW 619.62 221.42
四、ggplot2作图
library(ggplot2)
library(ggpmisc)
TDwx0505_DW$Biogroup <- factor(TDwx0505_DW$Biogroup,ordered = TRUE,
levels = c("TDW","TDAG","DWs","DWp","DWr"))##顺序排列
TDwx0505 <- subset(TDwx0505,Treatment!="TT")##去除TT处理
TDwx0505_DW <- subset(TDwx0505_DW,Treatment!="TT")
ggplot(data = TDwx0505_DW,aes(x=Biomass,y=TWDseed,color=Biogroup,shape=Biogroup,fill=Biogroup))+
geom_point(size=2)+
geom_smooth (method = lm,linetype=1,se=FALSE,span=1)+
scale_shape_manual(values = c(21:25))+
scale_fill_manual(values = c('red','green', 'blue', 'orange', 'purple'))+
scale_color_manual(values = c('red','green', 'blue', 'orange', 'purple'))+
ylab(expression(paste("SW "," ( ","g"," ",m^-2," )")))+
xlab(expression(paste("DW"," ( ","g"," ",m^-2," )")))+
expand_limits(y=c(150,450))+
scale_y_continuous(breaks=c(150, 250,350,450))+
mytheme##(mytheme是自己设定的)
##线性回归检验
fit <- lm(TWDseed~DWr+TDAG,data = TDwx0505)
summary(fit)
fit$coefficients