转载来源
R函数式的列表(Lisp表达方式)
Emacs的Repl开发体验C-x C-e
, 爽到根本停不下来!
R函数式的列表(Lisp表达方式)
Emacs C-x C-e
执行R的S表达式 lambda let if plot Reduce Filter Map vector factor list array data.frame (函数内赋值参数用: x=123) matrix (函数内赋值参数用: x=123) csv 表格数据文件 table记录频数的方法(每一类) round & prop.table & table计算频率百分比 summary 总结数据特征,极值, 细胞核的3种特征: 最小, 最大, 平均值,中间值等 min & max 标准化数值型数据,以便确保在标准的范围内 lapply表格数据每一个数据单元都执行某个操作: 相当于map了,结果变成了list列表 一元线性回归 knn regression bayes str 查看dataframe特征 & 类型 & 总数, 数据轮廓 summary总结某列数据的Min/Max,Median,Mean等 head看数据前几个值,tail-log 评估模型的性能: gmodels/CrossTable c50决策树 neuralnet svm kmeans R宏%>% 特征选择Boruta 特征选择Caret 直方图hist 散点图pairs
Emacs C-x C-e
执行R的S表达式
el-get-install ESS
C-c C-k
打开R的Repl, C-c C-l
eval当前文件缓冲到Repl里面C-x C-e
fun r lisp!
(defun ess-eval-sexp (vis )
(interactive "P" )
(save-excursion
(backward-sexp )
(let ((end (point ) ) )
(forward-sexp )
(ess-eval-region (point ) end vis "Eval sexp" ) ) ) )
(add-hook 'ess-mode-hook (lambda () (define-key global-map (kbd "C-x C-e" ) 'ess-eval-sexp) ) )
lambda
(function (y) (function (x) ('+' (x, y))))
((function (x) x) (1 ))
let
((function (x, y=(function (i) ('*' (i, 2 ))) ) (y (x))) (2 ))
(library (magrittr))
((c (1 , 2 , 3 )) %>% (function (x) (Map ((function (x) ('+' (x, 100 ))), x)))
%>% (function (x) (Reduce ('+' , x)) ) )
((function (x, y=('*' (x, 2 ))) y) (100 ))
((function (y, x, mx=(as.matrix (x)), cx=(cbind (Intercept=1 , mx)))
('%*%' (('%*%' ((solve ('%*%' ((t (cx)), cx))), (t (cx)))), y)) ) -> reg)
(reg (y=(launch$distress_ct), x=(launch [3 ])))
if
('if' (0 , ('==' (1 , 1 )), ('==' (2 , 1 ))))
plot
('plot' (('rnorm' (10 )), ('rnorm' (10 ))))
('plot' (('rnorm' (10 )), ('rnorm' (10 )), type='b' ))
Reduce
(Reduce ('*' , 1 :10 ))
Filter
((function (x) ('if' (('%%' (x, 2 )), x, 0 ))) (2 ))
(Filter ((function (x) ('if' (('%%' (x, 2 )), x, 0 ))), 1 :10 ))
Map
(Map ((function (x) ('+' (x, 100 ))), 1 :3 ))
[[1 ]]
[1 ] 101
[[2 ]]
[1 ] 102
[[3 ]]
[1 ] 103
vector
(c (1 , 1 , 3 ))
((c (1 , 8 , 3 )) [2 ])
((c ("A" , "B" , "C" )) -> defvar)
factor
(factor ((c ("1" , "1" , "3" , "11" , "9" , "8" )), levels=(c ("A" , "B" , "C" , "AA" , "BB" , "CC" ))))
[1 ] <NA > <NA > <NA > <NA > <NA > <NA >
Levels: A B C AA BB CC
((factor (wbcd$diagnosis, levels=(c ("B" , "M" )), labels=(c ("良性肿块" , "恶性肿块" )))) -> wbcd$diagnosis)
diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mean
1 恶性肿块 17.990 10.38 122.80 1001.0 0.11840
2 恶性肿块 20.570 17.77 132.90 1326.0 0.08474
21 良性肿块 13.080 15.71 85.63 520.0 0.10750
list
(list (11 , "aa" , FALSE ))
[[1 ]]
[1 ] 11
[[2 ]]
[1 ] "aa"
[[3 ]]
[1 ] FALSE
array
(1 :12 )
(array (1 :12 ))
(array (1 :12 , (c (2 , 3 , 2 ))))
data.frame (函数内赋值参数用: x=123)
((data.frame (
ID=(c (11 ,12 ,13 )),
Name=(c ("Devin" ,"Edward" ,"Wenli" )),
Gender=(c ("M" ,"M" ,"F" )),
Birthdate=(c ("1984-12-29" ,"1983-5-6" ,"1986-8-8" )))) -> pt_data)
ID Name Gender Birthdate
1 11 Devin M 1984 -12 -29
2 12 Edward M 1983 -5 -6
3 13 Wenli F 1986 -8 -8
(pt_data [1 , 2 ])
[1 ] Devin
Levels: Devin Edward Wenli
(pt_data [,3 ])
[1 ] M M F
Levels: F M
((pt_data [-1 ]) [-2 ])
Name Birthdate
1 Devin 1984 -12 -29
2 Edward 1983 -5 -6
3 Wenli 1986 -8 -8
(pt_data$Birthdate)
[1 ] 1984 -12 -29 1983 -5 -6 1986 -8 -8
Levels: 1983 -5 -6 1984 -12 -29 1986 -8 -8
(pt_data [2 :3 ])
Name Gender
1 Devin M
2 Edward M
3 Wenli F
matrix (函数内赋值参数用: x=123)
(matrix ((c (1 , 2 , 1 , 3 , 5 , 8 )), nrow=2 ))
[,1 ] [,2 ] [,3 ]
[1 ,] 1 1 5
[2 ,] 2 3 8
(matrix ((c (1 , 2 , 1 , 3 , 5 , 8 )), ncol=2 ))
[,1 ] [,2 ]
[1 ,] 1 3
[2 ,] 2 5
[3 ,] 1 8
(matrix ((c (1 , 2 , 4 , 3 )), ncol=1 ))
[,1 ]
[1 ,] 1
[2 ,] 2
[3 ,] 4
[4 ,] 3
(matrix ((c (1 , 2 , 4 , 3 )), nrow=1 ))
[,1 ] [,2 ] [,3 ] [,4 ]
[1 ,] 1 2 4 3
(cbind ((c (1 , 1 , 1 )), (c (1 , 0 , 1 )), (c (0 , 1 , 0 ))))
(t (matrix ((c (1 , 2 , 1 , 3 , 5 , 8 )), ncol=2 )))
('*' (10 , (matrix ((c (1 , 2 , 1 , 3 , 5 , 8 )), ncol=2 ))))
('+' ((matrix ((c (9 , 2 , 3 , 8 , 1 , 4 )), ncol=2 )),
(matrix ((c (0 , 3 , 5 , 3 , 7 , 2 )), ncol=2 ))))
('%*%' ((matrix ((c (1 , 4 , 3 , 0 , 1 , 2 )), ncol=2 )),
(matrix ((c (7 , 8 )), ncol=1 ))))
(solve (matrix ((c (1 , 4 , 3 , 0 , 1 , 2 , 1 , 6 , 8 )), ncol=3 )))
csv 表格数据文件
(write.csv (pt_data, file="my-data-frame.csv" ))
"" ,"ID" ,"Name" ,"Gender" ,"Birthdate"
"1" ,11 ,"Devin" ,"M" ,"1984-12-29"
"2" ,12 ,"Edward" ,"M" ,"1983-5-6"
"3" ,13 ,"Wenli" ,"F" ,"1986-8-8"
(read.csv ("my-data-frame.csv" ))
X ID Name Gender Birthdate
1 1 11 Devin M 1984 -12 -29
2 2 12 Edward M 1983 -5 -6
3 3 13 Wenli F 1986 -8 -8
((read.csv ("http://127.0.0.1:8003/wisc_bc_data.csv" , stringsAsFactors=FALSE )) -> wbcd)
table记录频数的方法(每一类)
(table (wbcd$diagnosis))
round & prop.table & table计算频率百分比
(round (('*' ((prop.table (table (wbcd$diagnosis))) ,100 )), digits=1 ))
summary 总结数据特征,极值, 细胞核的3种特征: 最小, 最大, 平均值,中间值等
(summary ((wbcd [(c ("radius_mean" , "area_mean" , "smoothness_mean" ))])))
radius_mean area_mean smoothness_mean
Min. : 6.981 Min. : 143.5 Min. :0.05263
1st Qu.:11.700 1st Qu.: 420.3 1st Qu.:0.08637
Median :13.370 Median : 551.1 Median :0.09587
Mean :14.127 Mean : 654.9 Mean :0.09636
3rd Qu.:15.780 3rd Qu.: 782.7 3rd Qu.:0.10530
Max. :28.110 Max. :2501.0 Max. :0.16340
min & max 标准化数值型数据,以便确保在标准的范围内
((function (x)
('/' (('-' (x, (min (x)))),
('-' ((max (x)), (min (x))))))) -> normalize)
(normalize ((c (10 , 20 , 30 , 40 , 50 ))))
lapply表格数据每一个数据单元都执行某个操作: 相当于map了,结果变成了list列表
(lapply ((wbcd [2 :31 ]), normalize))
$radius_mean
[1 ] 0.52103744 0.64314449 0.60149557 0.21009040 0.62989256 0.25883856
...
$texture_mean
[1 ] 0.02265810 0.27257355 0.39026040 0.36083869 0.15657761 0.20257017
...
((as.data.frame ((lapply ((wbcd [2 :31 ]), normalize)))) -> wbcd_n)
radius_mean texture_mean perimeter_mean area_mean smoothness_mean
1 0.52103744 0.02265810 0.54598853 0.36373277 0.59375282
2 0.64314449 0.27257355 0.61578329 0.50159067 0.28987993
一元线性回归
(1 :10 -> x)
(('+' (x, (rnorm (10 , 0 , 1 )))) -> y)
((lm (y ~ x)) -> fit)
(summary (fit))
(library (class))
((knn (train=wbcd_train, test=wbcd_test, cl=wbcd_train_labels, k=21 )) -> wbcd_test_pred)
(library (gmodels))
(CrossTable (x=wbcd_test_labels, y=wbcd_test_pred, prop.chisq=FALSE ))
str 查看dataframe特征 & 类型 & 总数, 数据轮廓
(str (credit))
(summary (credit$months_loan_duration))
Min. 1st Qu. Median Mean 3rd Qu. Max.
4.0 12.0 18.0 20.9 24.0 72.0
head看数据前几个值,tail-log
(head (credit_rand$amount))
[1 ] 2346 2030 1082 2631 3069 1333
评估模型的性能: gmodels/CrossTable
(library (gmodels))
(CrossTable (x=wbcd_test_labels, y=wbcd_test_pred, prop.chisq=FALSE ))
(library (C50))
((C5.0 ((credit_train [-17 ]), credit_train$default)) -> credit_model)
((predict (credit_model, credit_test)) -> credit_pred)
(CrossTable (credit_test$default, credit_pred, prop.chisq=FALSE , prop.c=FALSE , prop.r=FALSE , dnn=(c ('actual default' , 'predicted default' ))))
(library (neuralnet))
((neuralnet (strength ~ cement + slag + ash + water + superplastic + coarseagg + fineagg + age, data=concrete_train)) -> concrete_model)
((model_results$net.result) -> predicted_strength)
(cor (predicted_strength, concrete_test$strength))
(library (kernlab))
((ksvm (letter ~ ., data=letters_train, kernel="vanilladot" )) -> letter_classifier)
((predict (letter_classifier, letters_test)) -> letter_predictions)
(round (('*' ((prop.table (table ('==' (letter_predictions, letters_test$letter)))) ,100 )), digits=1 ))
((teens [5 :40 ]) -> interests)
((as.data.frame (lapply (interests, scale))) -> interests_z)
((kmeans (interests_z, 5 )) -> teen_clusters)
(teen_clusters$size)
(teen_clusters$centers)
R宏%>%
(library (magrittr))
(1 %>% (function (x) ('+' (x, 100 )))
%>% (function (x) (print (x))) )
(library (tm))
(library (magrittr))
((function (text)
(text
%>% (function (st) (Corpus ((VectorSource (st)))))
%>% (function (cor) (tm_map (cor, (content_transformer (tolower)))))
%>% (function (cor) (tm_map (cor, removePunctuation)))
%>% (function (cor) (tm_map (cor, removeNumbers)))
%>% (function (cor) (tm_map (cor, removeWords, (c (stopwords("SMART" ), "thy" , "thou" , "thee" , "the" , "and" , "but" )))))
%>% (function (cor) (TermDocumentMatrix (cor, control=(list (minWordLength=1 )))))
%>% (function (mydtm) (as.matrix (mydtm)))
%>% (function (m) (sort ((rowSums (m)), decreasing=TRUE ))) )) -> getTermMatrix)
(getTermMatrix ("The Clojure Programming Language. Clojure is a dynamic, general-purpose programming" ))
(cor (insurance [(c ("age" , "bmi" , "children" , "charges" ))]))
(library (psych))
((lm (charges ~ age + children + bmi + sex + smoker + region, data=insurance)) -> ins_model)
(summary (ins_model))
(library (Boruta))
((Boruta (Classes~., data=(train [,-348 ]))) -> Boruta.mod)
(png ("Boruta_selection.png" , width=4000 ,height=1600 ))
(plot (Boruta.mod, las="2" ))
(dev.off ())
(library (magrittr))
(library (dplyr))
(train %>%
(function (data) (select (data, zakończyć,zdjęcie,należeć,naprawdę,polski,kobieta,sierpień,zobaczyć,dotyczyć,szczęście,mężczyzna,europejski)))
-> train_Boruta)
(save (train_Boruta, file="train_Boruta.rda" ))
importance绘图
(library (caret))
(library (rpart))
(library (e1071))
((trainControl (method="repeatedcv" , number=10 ,repeats=3 )) -> control)
((train (churn~., data=trainset, method="rpart" ,preProcess="scale" , trControl=control)) -> model)
((varImp (model, scale=FALSE )) -> importance)
(plot (importance))
直方图hist
(hist (insurance$charges))
散点图pairs
(pairs (insurance [(c ("age" , "bmi" , "children" , "charges" ))]))