# 保存最近一次生成的图片
ggsave('priceHistogram.png')
# ggsave currently recognises the extensions eps/ps, tex (pictex),
# pdf, jpeg, tiff, png, bmp, svg and wmf (windows only).
facet_wrap(formula)
facet_wrap(~variable)
facet_grid(formula)
facet_grid(vertical ~ horizontal)
# 方法一
pf <- read.csv('pseudo_facebook.tsv', sep = '\t')
# 方法二
pf <- read.delim('pseudo_facebook.tsv')
# 分面处理
qplot(x=dob_day, data=pf) +
scale_x_continuous(breaks = 1:31) +
facet_wrap(~dob_month, ncol = 3)
# 限制x轴:方法一
qplot(x=friend_count, data=pf, xlim=c(0, 1000))
# 方法二
qplot(x=friend_count, data=pf) +
scale_x_continuous(limits = c(0, 1000))
# 设置每组的宽度
qplot(x = friend_count, data = pf, binwidth = 10) +
scale_x_continuous(limits = c(0, 1000),
breaks = seq(0, 1000, 50))
# 按照性别拆分直方图
qplot(x = friend_count, data = pf, binwidth = 10) +
scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, 50)) +
facet_wrap(~gender)
# 删除 NA Values
qplot(x = friend_count, data=subset(pf, !is.na(gender)), binwidth = 10) +
scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, 50)) +
facet_wrap(~gender)
# 按照性别统计 friend 平均值
> by(pf$friend_count, pf$gender, summary)
pf$gender: female
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 37 96 242 244 4923
---------------------------------------------------------
pf$gender: male
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 27 74 165 182 4917
分析:女性的平均 friend_count 要高于男性,不管是中位数还是均值都大于男性。应该注意,中位数 指标比 均值 指标更可靠、稳定:因为friend_count 是右偏的,当右侧有很离谱的 friend_count 时,均值很容易会被拉向右方,而中位数却不受影响。
> summary(pf$tenure)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0 226.0 412.0 537.9 675.0 3139.0 2
# 设置颜色
qplot(x = tenure, data = pf,
color = I('black'), fill = I('#099dd9'))
# create a histogram of tenure by year
qplot(x = tenure/365, data = pf, binwidth = .25,
color = I('black'), fill = I('#f79420')) +
scale_x_continuous(breaks = seq(0, 7, 1), limits = c(0, 7))
# 增加x轴,y轴标签
qplot(x = tenure/365, data = pf, binwidth = .25,
xlab = 'Number of years using Facebook',
ylab = 'Number of users in sample',
color = I('black'), fill = I('#f79420')) +
scale_x_continuous(breaks = seq(0, 7, 1), limits = c(0, 7))
> summary(pf$age)
Min. 1st Qu. Median Mean 3rd Qu. Max.
13.00 20.00 28.00 37.28 50.00 113.00
# 绘制年龄直方图
qplot(x=age, data = pf, binwidth = 1,
color=I('black'), fill=I('#5760ab')) +
scale_x_continuous(breaks = seq(0, 113, 5))
# 分面时,使y轴按需要调节
qplot(x = price, data = diamonds) + facet_wrap(~cut, scales = "free")
# 分面时,使y轴按需要调节,对横坐标轴进行对数转换
# 对于长尾分布,你可以添加一个 ggplot 图层(如:scale_x_log10())来转换变量。
qplot(x = price/carat, data = diamonds) +
scale_x_log10() +
facet_wrap(~cut, scales = "free")