library(ggplot2)
#必须先加载 ggplot 包 data(diamonds)
#加载钻石数据集,ggplot 包自带该数据集
library(ggplot2)
data(diamonds)
summary(diamonds)
#可以输入 ?diamonds,以了解更多关于钻石数据集的信息。
?diamonds
diamonds$color
qplot(x=diamonds$price,data=diamonds,binwidth=20,ylim=c(0,600))
mean(diamonds$price)
summary(diamonds$price <500)
summary(diamonds$price <250)
summary(diamonds$price >=15000)
保存生成的图片
ggsave('priceHistogram.png')
str(diamonds)
qplot(x=cut,data=diamonds)
对不同品质的价格分类
qplot(x=price,data=diamonds)+facet_wrap(~cut)
by(diamonds$price,diamonds$cut,summary)
diamonds$cut: Fair
Min. 1st Qu. Median Mean 3rd Qu. Max.
337 2050 3282 4359 5206 18574
-----------------------------------------------------------------------------------
diamonds$cut: Good
Min. 1st Qu. Median Mean 3rd Qu. Max.
327 1145 3050 3929 5028 18788
-----------------------------------------------------------------------------------
diamonds$cut: Very Good
Min. 1st Qu. Median Mean 3rd Qu. Max.
336 912 2648 3982 5373 18818
-----------------------------------------------------------------------------------
diamonds$cut: Premium
Min. 1st Qu. Median Mean 3rd Qu. Max.
326 1046 3185 4584 6296 18823
-----------------------------------------------------------------------------------
diamonds$cut: Ideal
Min. 1st Qu. Median Mean 3rd Qu. Max.
326 878 1810 3458 4678 18806
p1<-qplot(x = price,data=diamonds)
p2<-qplot(x=log10(price+1),data=diamonds)
p3<-qplot(x=sqrt(price),data=diamonds)
grid.arrange(p1,p2,p3,ncol=1)
对cut分类的结果进行箱图显示
qplot(x=cut,y=price,data=diamonds,geom='boxplot')
qplot(x=cut,y=price,data=subset(diamonds,!is.na(cut)),geom='boxplot')
qplot(x=color,y=price,data=subset(diamonds,!is.na(color)),geom='boxplot')
by(diamonds$price,diamonds$color,summary)
diamonds$color: D
Min. 1st Qu. Median Mean 3rd Qu. Max.
357 911 1838 3170 4214 18693
-----------------------------------------------------------------------------------
diamonds$color: E
Min. 1st Qu. Median Mean 3rd Qu. Max.
326 882 1739 3077 4003 18731
-----------------------------------------------------------------------------------
diamonds$color: F
Min. 1st Qu. Median Mean 3rd Qu. Max.
342 982 2344 3725 4868 18791
-----------------------------------------------------------------------------------
diamonds$color: G
Min. 1st Qu. Median Mean 3rd Qu. Max.
354 931 2242 3999 6048 18818
-----------------------------------------------------------------------------------
diamonds$color: H
Min. 1st Qu. Median Mean 3rd Qu. Max.
337 984 3460 4487 5980 18803
-----------------------------------------------------------------------------------
diamonds$color: I
Min. 1st Qu. Median Mean 3rd Qu. Max.
334 1120 3730 5092 7202 18823
-----------------------------------------------------------------------------------
diamonds$color: J
Min. 1st Qu. Median Mean 3rd Qu. Max.
335 1860 4234 5324 7695 18710
qplot(carat, price, data=diamonds, colour=color)
qplot(color, price/carat, data=diamonds, geom='boxplot')
qplot(color, price/carat, data=diamonds, geom='boxplot', alpha=I(1/5), colour=color, fill=color)
qplot(x=carat, data=diamonds, color=color,binwidth=1.01
,geom='freqpoly')
table(diamonds$carat)
qplot(x=carat, data=diamonds, color=color,binwidth=0.1
,geom='freqpoly')
这里取0.1
扰动点图
qplot(color, price/carat, data=data, geom='jitter', alpha=I(1/5))
tidyr - 用于重塑数据布局的包
dplyr - 用于帮助转换整洁的表格数据的包