线性回归:
x<-1:10
y<-x+rnorm(10,0,1)
fit<-lm(y~x)
summary(fit)
关联挖掘:
library(arules)
data<-paste("item 1, item 2", "item2, item3", sep="\n")#一个简单的transaction数据的例子
write(data, file = "demo_basket')
tr<-read.transactions("demo_basket",format="basket",sep=",")
data("Adult")#用到了arules中的Adult数据
rules<-apriori("Adult, parameter=list(supp=0.5,conf=0.9,target="rules"))#学习规则
聚类分析:
x<-rbind(matrix(rnorm(100,sd=0.3), ncol=2), matrix(rnorm(100, mean=1, sd=0.3), ncol=2)) #构造了两组正态分布的数据(均值分别是0和1),并将他们组合起来
cl<-kmeans(x,2)#聚类分析的结果
plot(x, col=cl$cluster)#聚类结果的可视化
points(cl$centers, col=1:2, pch=8, cex=2)
分类
library(e1071)#要安装e1071这个包
data(iris)#用著名的iris数据
x<-subset(iris, select=-Species)#除掉标签的数据
y<-iris$Species#标签数据
model<-svm(x,y)#训练模型
summary(model)#查看模型参数
pred<-predict(model,x)#进行预测
table(pred,y)#看预测结果
R实现的item-based CF推荐算法。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | |
除去注释,有效代码只有16行。其中大量运用了向量化的函数与处理方式,所以没有任何的显式循环结构,关于向量化更详细的叙述可看这里。
注:该代码实现的只是最基本算法,仅作参考,不承诺在大规模与复杂数据环境下的实用性。
源数据文件data.dat的内容如下所列:
user_id,subject_id1,11,31,71,132,22,52,62,72,92,102,113,13,23,33,43,73,93,105,136,16,36,46,56,86,108,18,28,38,58,68,78,89,1310,1211,211,311,411,611,811,911,1312,1213,313,613,715,415,1215,1316,216,316,416,716,817,217,317,417,517,617,717,817,917,1017,1118,218,319,219,319,519,619,919,1019,1119,1220,120,320,420,720,1321,121,621,821,921,1121,1221,1322,623,223,423,923,1224,124,524,925,225,625,1025,1126,226,326,827,327,627,1227,1328,128,228,328,528,728,928,1028,1128,1228,1329,129,229,329,429,529,629,729,829,929,1030,630,730,930,1331,631,1132,132,533,233,1334,334,734,834,934,1034,1335,335,435,535,635,736,236,336,436,636,736,836,936,1136,1236,1338,541,141,341,441,541,641,741,1142,242,342,742,842,942,1042,1143,243,643,1043,1143,12
### Forward Stepwise Regression in R out0 = lm(y ~ 1) ### fit the initial model (intercept only) out = step(out0,direction="forward",trace=TRUE,scope= y ~ x1 + x2 + x3 + x4 + x5) summary(out) ### lasso library(lars) out = lars(x,y) ### x is a matrix of covariates summary(out) plot(out)
### Gamma Ray Data kernreg = function(y,x,h,newx){ ### kernel regression n = nrow(x) d = ncol(x) m = nrow(newx) f = rep(0,m) for(i in 1:m){ tmp = sqrt(apply((matrix(newx[i,],n,d,byrow=TRUE) - x)^2,1,sum)) w = exp(-tmp^2/(2*h^2)) f[i] = sum(w*y)/sum(w) } return(f) } Cv = function(y,x,H){ ### cross validation huge = mean(y^2)*10 n = nrow(x) d = ncol(x) m = length(H) cv = rep(0,m) for(j in 1:m){ h = H[j] for(i in 1:n){ tmp = sqrt(apply((matrix(x[i,],n,d,byrow=TRUE) - x)^2,1,sum)) w = exp(-tmp^2/(2*h^2)) w = w/sum(w) f = sum(w*y) if(w[i] == 1)cv[j] = cv[j] + huge if(w[i] < 1)cv[j] = cv[j] + ((y[i] - f)/(1-w[i]))^2 } } cv = cv/n return(cv) }