题目描述
目录
一、身高
1.条形图
对一定区间的数据进行统计数量,绘制出条形图。按照常理,应该是符合正态分布的,但是我们班男生女生身高差大,方差也大。
# Load ggplot2
library(ggplot2)
# Create data
data <- data.frame(
区间=c('160~165','166~170','171~175','176~180','181~185','185以上') ,
人数=c(2,7,2,10,3,2)
)
# Barplot
ggplot(data, aes(x=区间, y=人数)) +
geom_bar(stat = "identity"
2.顺序的折线图
我们可以看到是较为均匀的上升,在178阶段人数众多
# Libraries
library(ggplot2)
# create data
xValue <- 1:26
yValue <- c(162, 164, 167, 168, 170, 170, 170, 170, 170, 172, 175, 178, 178, 178, 178, 178, 178, 179, 179, 180, 180, 181, 181, 185, 186, 190)
data <- data.frame(xValue,yValue)
# Plot
ggplot(data, aes(x=xValue, y=yValue)) +
geom_line()
二、体重
1.条形图
我们可以看到,体重大致呈现正态分布。也体现了,现实中的情况。
# Load ggplot2
library(ggplot2)
# Create data
data <- data.frame(
区间=c('45~50','50~60','60~70','70~75','75以上') ,
人数=c(2,8,12,2,2)
)
# Barplot
ggplot(data, aes(x=区间, y=人数)) +
geom_bar(stat = "identity")
三、相关系数
1.身高和体重的散点图
我们可以看到体重和身高大致还是呈现线性关系,尤其是在两端时候。但是由于我们的数据是男女混合,可能男生女生对于体重有不同的追求。
# library
library(ggplot2)
# The iris dataset is provided natively by R
#head(iris)
data <- data.frame(
体重=c(63,71,80,68,56,63,65,70,63,55,80,70,70,60,55,47,55,47,68.5,55,75,65,65,66,56,60) ,
身高=c(180,178,181,172,178,170,170,190,179,170,185,178,170,179,164,162,170,168,186,175,178,178,178,181,167,180)
)
# basic scatterplot
ggplot(data, aes(x=体重, y=身高)) +
geom_point()
2.身高和体重的热力图
通过颜色来表现不同的数值之间的关系
# Quick display of two cabapilities of GGally, to assess the distribution and correlation of variables
library(GGally)
# Create data
data <- data.frame(
var1=c(63,71,80,68,56,63,65,70,63,55,80,70,70,60,55,47,55,47,68.5,55,75,65,65,66,56,60) ,
var2=c(180,178,181,172,178,170,170,190,179,170,185,178,170,179,164,162,170,168,186,175,178,178,178,181,167,180)
)
data$v4 = data$var1 ** 2
data$v5 = -(data$var1 ** 2)
# Check correlation between variables
#cor(data)
# Nice visualization of correlations
ggcorr(data, method = c("everything", "pearson"))
library(corrplot)
library(RColorBrewer)
data <- data.frame(
体重=c(63,71,80,68,56,63,65,70,63,55,80,70,70,60,55,47,55,47,68.5,55,75,65,65,66,56,60) ,
身高=c(180,178,181,172,178,170,170,190,179,170,185,178,170,179,164,162,170,168,186,175,178,178,178,181,167,180)
)
corrplot(cor(data),type="upper",order="hclust",col=brewer.pal(n=8,name = "RdYlBu"))