R的简单实用代码记录
赋值
>n<-10
>n
[1]10
#或
>20-n
>n
[1]20
#或
>assign("n",30)
>n
[1]30
#或
>n=4o
>n
[1]40
也可以直接输入函数表达式
>((10+2)*5-2^4)/4
[1]11
>sqrt(3)+exp(-2)
[1]1.867386
#代表注释开始
在线帮助
>help.start()
#关于R的基本知识的学习
>help(fun)
#或
>?fun
#显示该函数的帮助页面
>help("char")
#显示具有特殊语法意义字符的“char”帮助页面
>help("bs", try.all.packages=TRUE)
>help("bs", package="splines")
#在特定包或者全部包内搜索该函数
>apropos(fun)
或
>apropos("fun")
#找出所有名字中含有fun的函数,(只在已载入的包中),
#如果fun不是完整的函数名称,则第一条命令会出错。
>help.search("char")
#得到所有在帮助页面的含有字符char的函数,搜索范围比apropos更广泛
>find(char)
>find("char")
#得到命名为char的函数所在的程序包
>args(fun)
>args("fun")
>#得到名为fun的函数的自变量列表
R包的代码安装
>install.packages("ggplot2")
#字符串必须被双引号包围,R包的名称需要是字符串。
#同时选择dependences 是TURE(已默认)
>installed.packages()
#显示已安装R包
>install devtools
包的载入
>library(devtools)
#也可以直接点击packeages的包前的选项格。console上会显示这一代码。
>remove.packages("name")#卸载R包
>.libPaths()#路径查询
>install.github("name")#从GitHub上面下载选用该函数
生成数据序列
#向量:每个数据都有其自己的位置。(index)
#区别于集合的无序性
>identical(a,b)#使用该函数进行向量数据是否完全一致的区分。
#[]的功能:从向量中提取相应的元素。
#c:combines
#在[]中加入-就是去除相应的元素
>c(2,4,56,33)
#c函数可以combine字符串,数值串。
>c(1,2,"ABC")
[1] "1" "2" "ABC"
数值型向量
> vector1 <- c(1:3,6:10)
> vector1
[1] 1 2 3 6 7 8 9 10
> x <- 1
> class(x)
[1] "numeric"
> is.integer(x)
[1] FALSE
#在R中1也只被视作数值型,不视作整型
> 3/2
[1] 1.5
#数值型与整型在R中是一致的
> y <- 1:3
> class(y)
[1] "integer"
#整型的生成
seq,rep函数的调用
> seq(from = 1, to = 5, by =1 )
[1] 1 2 3 4 5
> seq(from = 1, to = 5, by =0.5 )
[1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
> seq(from = 10, to = 1, by =-1 )
[1] 10 9 8 7 6 5 4 3 2 1
> seq(from = 10, to = 1, length.out=8 )
[1] 10.000000 8.714286 7.428571 6.142857 4.857143 3.571429 2.285714 1.000000
> seq(1,10,len=5)
[1] 1.00 3.25 5.50 7.75 10.00
> seq(1,10,along.with=5)
[1] 1
> seq(1,10,along.with=1:3)
[1] 1.0 5.5 10.0
#seq函数生成数值型向量的调用,by表示步长,length.out是分为几份。
> rep(c(1,3),times=5)
[1] 1 3 1 3 1 3 1 3 1 3
> rep(c(1,3),each=5)
[1] 1 1 1 1 1 3 3 3 3 3
> rep(c(1,3),len=9)
[1] 1 3 1 3 1 3 1 3 1
> x <- rep(1:2,times=4)
> length(x)
[1] 8
#rep函数的调用生成重复的数值型向量
逻辑型向量
> logit <-rep (c(TRUE,FALSE),len=5)
> sum(logit)
[1] 3
#逻辑函数的本质是数值型向量,可以用SUM函数对其求和得到无缺失的总和
> logit2 <- c(100>99,100>101)
> class(logit2)
[1] "logical"
> logit2
[1] TRUE FALSE
> logit2 <- c(100>99,100=1)
Error:unexpected'=' in "logit2 <- c(100>99,100="
> logit2 <- c(100>99,100==1)
> logit2
[1] TRUE FALSE
#使用两个==来进行逻辑判断
> logit4 <- c(100!=1)
> logit4
[1] TRUE
#不等于使用!=来表示
> logit4 <- c(100>90&100>110)
> logit4
[1] FALSE
> logit5<- c(100>90|100>110)
> logit5
[1] TRUE
#或与且的表示
逻辑表达式的使用
> x <- seq(1,100,length=20)
> x
[1] 1.000000 6.210526 11.421053 16.631579
[5] 21.842105 27.052632 32.263158 37.473684
[9] 42.684211 47.894737 53.105263 58.315789
[13] 63.526316 68.736842 73.947368 79.157895
[17] 84.368421 89.578947 94.789474 100.000000
> index <- x>80
> index
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[9] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[17] TRUE TRUE TRUE TRUE
> x[index]
[1] 84.36842 89.57895 94.78947 100.00000
#方括号返回TRUE的值
> which(x>80)
[1] 17 18 19 20
> x[which(x>80)]
[1] 84.36842 89.57895 94.78947 100.00000
#which返回相应数值
> which(x>80&x<90)
[1] 17 18
> x[which(x>80&x<90)]
[1] 84.36842 89.57895
> x[x>80&x<90]#且
[1] 84.36842 89.57895
> x[x>80|x<90]#或
[1] 1.000000 6.210526 11.421053 16.631579
[5] 21.842105 27.052632 32.263158 37.473684
[9] 42.684211 47.894737 53.105263 58.315789
[13] 63.526316 68.736842 73.947368 79.157895
[17] 84.368421 89.578947 94.789474 100.000000
字符串
> string <- c("abc","1","2")
> string
[1] "abc" "1" "2"
> class(string)
[1] "character"
> is.character(string)
[1] TRUE
letters&LETTERS
> letters
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m"
[14] "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
> LETTERS
[1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M"
[14] "N" "O" "P" "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z"
#更快地生成向量
> letters[1:5]
[1] "a" "b" "c" "d" "e"
> LETTERS[24:28]
[1] "X" "Y" "Z" NA NA
#NA:not available
> y <-1:5
> y
[1] 1 2 3 4 5
> y[6]
[1] NA
因子型向量(分类变量)
有序/无序
> my_fac <- factor(x=rep(c(1,2),times=5),levels = c(1,2),labels = c("Male","Female"))
> my_fac
[1] Male Female Male Female Male Female Male
[8] Female Male Female
Levels: Male Female
> class(my_fac)
[1] "factor"
> my_fac2 <- factor(LETTERS[1:5],labels = letters[1:5])
> my_fac2
[1] a b c d e
Levels: a b c d e
> my_fac3 <- factor(1:5,labels = letters[1:5])
> my_fac3
[1] a b c d e
Levels: a b c d e
> my_fac4 <- gl(n=2,k=5,labels = c("Control","Treatment"))
> my_fac4
[1] Control Control Control Control Control
[6] Treatment Treatment Treatment Treatment Treatment
Levels: Control Treatment
#n:水平的个数,k:每个因子重复的次数。
> my_fac5 <- gl(n=2,k=1,labels = c("Control","Treatment"))
> my_fac5
[1] Control Treatment
Levels: Control Treatment
> my_fac5 <- gl(n=2,k=1,length=8,labels = c("Control","Treatment"))
> my_fac5
[1] Control Treatment Control Treatment Control
[6] Treatment Control Treatment
Levels: Control Treatment
互相转换
> temp_string <- c("A","B","AB","O")
> my_fac6 <- as.factor(temp_string)#字符串转为因子
> my_fac6
[1] A B AB O
Levels: A AB B O
> as.character(my_fac6)#因子转为字符串
[1] "A" "B" "AB" "O"
> x <- 1:5
> x
[1] 1 2 3 4 5
> as.factor(x)
[1] 1 2 3 4 5
Levels: 1 2 3 4 5
> as.character(x)
[1] "1" "2" "3" "4" "5"
判断有几个水平
> nlevels(my_fac6)
[1] 4
> levels(my_fac6)
[1] "A" "AB" "B" "O"
分类变量转换为哑变量
> my_fac7 <- relevel(my_fac6, ref="B")
#ref=reference
> my_fac7
[1] A B AB O
Levels: B A AB O
> my_fac6
[1] A B AB O
Levels: A AB B O
因子命名排序及其校正
> x <- c("placebo","10mg","20mg","50mg")
> my_order_fac <- factor(x,ordered = TRUE)
> my_order_fac
[1] placebo 10mg 20mg 50mg
Levels: 10mg < 20mg < 50mg < placebo
#解决方案
> x <- c("0mg","10mg","20mg","50mg")
> my_order_fac <- factor(x,ordered = TRUE)
> my_order_fac
[1] 0mg 10mg 20mg 50mg
Levels: 0mg < 10mg < 20mg < 50mg
#或者是使用package desctools
> x <- c('P', '10mg', '20mg', '50mg')
> my_order_fac <- factor(x, ordered = TRUE)
> library(DescTools)
> my_order_fac2 <- reorder.factor(my_order_fac, new.order = x)
> my_order_fac2
[1] P 10mg 20mg 50mg
Levels: P < 10mg < 20mg < 50mg
列表与矩阵
> my_list <- list(1,2,3,"R","nihaooo",TRUE,FALSE)
> my_list
[[1]]
[1] 1
[[2]]
[1] 2
[[3]]
[1] 3
[[4]]
[1] "R"
[[5]]
[1] "nihaooo"
[[6]]
[1] TRUE
[[7]]
[1] FALSE
> my_list2 <- list(1:10,letters[1:5])
> my_list2
[[1]]
[1] 1 2 3 4 5 6 7 8 9 10
[[2]]
[1] "a" "b" "c" "d" "e"
#多种数据类型的列表
> my_list2[[2]]
[1] "a" "b" "c" "d" "e"
> my_list2[[2]][1]
[1] "a"
#列表的定位
> my_list3 <- list(1:10,letters[1:5],list(11:14,LETTERS[1:5]))
> my_list3
[[1]]
[1] 1 2 3 4 5 6 7 8 9 10
[[2]]
[1] "a" "b" "c" "d" "e"
[[3]]
[[3]][[1]]
[1] 11 12 13 14
[[3]][[2]]
[1] "A" "B" "C" "D" "E"
> my_list3[[3]][[2]][1]
[1] "A"
#列表的相应元素的定位
矩阵
> my_matrix <- matrix(data = 1:6,nrow = 2,byrow = TRUE)
> my_matrix
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 4 5 6
#nrow:行数,byrow默认是FALSE,默认按列排序
> my_matrix2 <- matrix(data = 1:10, nrow = 5)
> my_matrix2
[,1] [,2]
[1,] 1 6
[2,] 2 7
[3,] 3 8
[4,] 4 9
[5,] 5 10
> my_matrix3 <- matrix(data = 2, nrow = 3,ncol=4)
> my_matrix3
[,1] [,2] [,3] [,4]
[1,] 2 2 2 2
[2,] 2 2 2 2
[3,] 2 2 2 2
> my_matrix4 <- matrix(data =letters[1:3], nrow = 2,ncol=4)
#Warning message:
#In matrix(data = letters[1:3], nrow = 2, ncol = 4) :
#数据长度[3]不是矩阵行数[2]的整倍
#该位置只报警告但是仍然出矩阵
> my_matrix4
[,1] [,2] [,3] [,4]
[1,] "a" "c" "b" "a"
[2,] "b" "a" "c" "b"
> my_matrix5 <- matrix(data = 1:12, nrow = 3, ncol = 4,
+ dimnames = list(c('A','B','C'),c('V1','V2','V3','V4')))
> my_matrix5
V1 V2 V3 V4
A 1 4 7 10
B 2 5 8 11
> t(my_matrix5)#转置
A B C
V1 1 2 3
V2 4 5 6
V3 7 8 9
V4 10 11 12
> my_matrix6 <- matrix(data=c(1:6,letters[1:6]), nrow = 3, ncol = 4)
> my_matrix6
[,1] [,2] [,3] [,4]
[1,] "1" "4" "a" "d"
[2,] "2" "5" "b" "e"
[3,] "3" "6" "c" "f"
数组是多维的矩阵
> dim(my_array)
[1] 2 4 2
> dim(my_array) <- c(4,2,2)
> mny_array
, , 1
[,1] [,2] [,3] [,4]
[1,] 1 3 5 7
[2,] 2 4 6 8
, , 2
[,1] [,2] [,3] [,4]
[1,] 9 11 13 15
[2,] 10 12 14 16
#改变相关维度
#命名
> my_array2 <- array(1:16, dim = c(4,2,2), dimnames = list(c(LETTERS[1:4]),c('col1','col2'),c('first','second')))
> my_array2
, , first
col1 col2
A 1 5
B 2 6
C 3 7
D 4 8
, , second
col1 col2
A 9 13
B 10 14
C 11 15
D 12 16
#相关位置的数值提取
> my_matrix
[,1] [,2]
[1,] 1 5
[2,] 2 6
[3,] 3 7
[4,] 4 8
> my_matrix [4,2]
[1] 8
> my_matrix[3,]
[1] 3 7
> my_matrix[,1]
[1] 1 2 3 4
> my_array2[2,2,1]
[1] 6
> my_array2[,2,1]
A B C D
5 6 7 8
数据框
> my_df <- data.frame(name=c('TOM','ENDY','MARRY'),age=c(24,25,26),height=c(15,20,30))
> my_df
name age height
1 TOM 24 15
2 ENDY 25 20
3 MARRY 26 30
#命名时,数字加英语不合法,纯数字不合法,带有特殊字符不合法,!下划线合法!
#大小写是区别的
> dim(my_df)
[1] 3 3
> View(my_df)
#可以看得到数据的表格样式