set.seed(1234)
smalldat <- data.frame(group1 = rep(1:2, each = 5),
group2 = rep(c('a','b'), times = 5),
x = rnorm(10))
# convert to data.frame to data.table
library(data.table)
smalldat <- data.table(smalldat)
# convert aggregated variable into raw data file
# 添加汇总的列
smalldat[, aggGroup1 := mean(x), by = group1]
# aggregate with 2 variables
smalldat[, aggGroup1.2 := mean(x), by = list(group1, group2)]
# 具有相似功能的plyr包的使用
library(plyr)
# create aggregated data
aggdat1 <- ddply(smalldat, .(group1), summarize,
aggGroup1plyr = mean(x))
aggdat12 <- ddply(smalldat, .(group1, group2), summarize,
aggGroup1.1plyr = mean(x))
# join back into data
# 类似merge的使用
smalldat <- join(smalldat, aggdat1, by = 'group1')
smalldat <- join(smalldat, aggdat12, by = c('group1', 'group2'))
# print data
smalldat
# 按照某些给定字段去除重复的记录
# Set keys - this sorts the data based on these values
setkeyv(smalldat, c('group1','group2'))
# keep unique observations (I also remove the variable x)
uniqdat <- subset(unique(smalldat), select = -x)
# print data
uniqdat