R语言入门

R语言的数据结构

对象的五种基本类型:
(1)字符(character)
(2)数值(numeric: real numbers)
(3)整数(integer)
(4)复数(complex: 1+2i)
(5)逻辑(logical :True / Fulse)

> x <- 1  //赋值
> x
[1] 1
> 
> class(x)  
[1] "numeric"
> x <- 2L   //整数后面加大写L表示为整数型
> class(x)
[1] "integer"
> y <- "hello world"
> class(y)
[1] "character"
> t <- TRUE
> x <- 1+2i
> class(x)
[1] "complex"
> 

对象的属性:
(1)名称
(2)维度
(3)类型
(3)长度

数据结构:

(1)向量:只能包含同一种类型的对象

# vector 

x <- vector("character", length = 10)//创建一个长度为10的字符型向量

x1 <- 1:4//x1为1到4的一个长为4的向量

x2 <- c(1,2,3,4)//直接创建一个向量

x3 <- c(TRUE,10,"a")
//系统会自动把这三个不同类型的转化为同一类型
x4 <- c("a","b","c")
as.numeric(x4)
as.logical()
as.character()
//强制转换

class(x1)


names(x1) <- c("a","b","c","d")//给变量起名字
> x1
a b c d 
1 2 3 4 

(2)矩阵

——向量+维度属性(整数向量:nrow,ncol)

#Matrix
//矩阵的创建
x <- matrix(1:6, nrow = 3, ncol = 2)
x
dim(x)
attributes(x)

y <- 1:6  //先创建一个向量,再赋值维度属性
dim(y) <- c(2,3)
y


y2 <- matrix(1:6, nrow = 2, ncol = 3)

rbind(y,y2)//按行拼接
cbind(y,y2)//按列拼接



//执行步骤
> x <- matrix(nrow = 3, ncol = 2)
> x
     [,1] [,2]
[1,]   NA   NA
[2,]   NA   NA
[3,]   NA   NA
> x <- matrix(1:6, nrow = 3, ncol = 2)
> x
     [,1] [,2]
[1,]    1    4
[2,]    2    5
[3,]    3    6
> dim(x)
[1] 3 2
> attributes(x)
$dim
[1] 3 2

> y <- 1:6
> dim(y) <- c(2,3)
> y
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
> y2 <- matrix(1:6, nrow = 2, ncol = 3)
> rbind(y,y2)
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
[3,]    1    3    5
[4,]    2    4    6
> cbind(y,y2)
     [,1] [,2] [,3] [,4] [,5] [,6]
[1,]    1    3    5    1    3    5
[2,]    2    4    6    2    4    6
> 

(3)数组
——与矩阵类似,但是维度可以大于2

#array
//数组的创建
x <- array(1:24, dim = c(4,6))


x <- array(1:24, dim = c(2,3,4))
x

执行步骤:

> x <- array(1:24, dim = c(4,6))
> x
     [,1] [,2] [,3] [,4] [,5] [,6]
[1,]    1    5    9   13   17   21
[2,]    2    6   10   14   18   22
[3,]    3    7   11   15   19   23
[4,]    4    8   12   16   20   24
> x <- array(1:24, dim = c(2,3,4))
> x
, , 1

     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

, , 2

     [,1] [,2] [,3]
[1,]    7    9   11
[2,]    8   10   12

, , 3

     [,1] [,2] [,3]
[1,]   13   15   17
[2,]   14   16   18

, , 4

     [,1] [,2] [,3]
[1,]   19   21   23
[2,]   20   22   24

(4)列表

#list
//列表的创建
l <- list("a", 2, 10L, 3+4i, TRUE)


l2 <- list(a=1, b=2, c=3)

l3 <- list(c(1,2,3), c(4,5,6,7))

x <- matrix(1:6, nrow = 2, ncol = 3)
dimnames(x) <- list(c("a","b"), c("c","d","e"))

执行步骤:

> l <- list("a", 2, 10L, 3+4i, TRUE)
> l
[[1]]
[1] "a"

[[2]]
[1] 2

[[3]]
[1] 10

[[4]]
[1] 3+4i

[[5]]
[1] TRUE

> 
> 
> l2 <- list(a=1, b=2, c=3)
> l2
$a
[1] 1

$b
[1] 2

$c
[1] 3

> l3 <- list(c(1,2,3), c(4,5,6,7))
> 
> 
> l3
[[1]]
[1] 1 2 3

[[2]]
[1] 4 5 6 7

> x <- matrix(1:6, nrow = 2, ncol = 3)
> x
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
> dimnames(x) <- list(c("a","b"), c("c","d","e"))
> x
  c d e
a 1 3 5
b 2 4 6
> 

(5)因子(factor)

——分类数据/有序 vs. 无序

——整数型向量+标签(label)(优于整数向量)
Male/Female vs. 1/2
常用于 lm( ),glm( )

//创建因子
> x <- factor(c("female","female","male","male","female"))
> x
[1] female female male   male   female
Levels: female male


> x <- factor(c("female","female","male","male","female"), levels = c("male","female"))
> x
[1] female female male   male   female
Levels: male female

//查看因子
> table(x)
x
  male female 
     2      3 
> unclass(x)
[1] 2 2 1 1 2
attr(,"levels")
[1] "male"   "female"
> class(unclass(x))
[1] "integer"

(6)缺失值(missing value)

——NA/NaN:NaN属于NA,NA不属于NaN
——NA有类型属性:interger NA,character NA等
——is.na()/is.nan()

(8) 数据框
——存储表格数据
——视为各元素长度相同的列表

  • 每个元素代表一列数据
  • 每个元素的长度代表行数
  • 元素类型可以不同
//创建一个数据框

> df <- data.frame(id = c(1,2,3,4), name = c("a","b","c","d"), gender = c(TRUE,TRUE,FALSE,FALSE))
> df
  id name gender
1  1    a   TRUE
2  2    b   TRUE
3  3    c  FALSE
4  4    d  FALSE
//查看行列
> nrow(df)
[1] 4
> ncol(df)
[1] 3
> df2 <- data.frame(id = c(1,2,3,4), score = c(80,90,86,100))
> df2
  id score
1  1    80
2  2    90
3  3    86
4  4   100
//转化为矩阵
> data.matrix(df2)
     id score
[1,]  1    80
[2,]  2    90
[3,]  3    86
[4,]  4   100
> 

(9)时间和日期

——日期:Date

  • 距离1970-01-01的天数/date()/Sys.Date()/weekdays()/months()/quarters()
//查询当前时间
> x <-date()
> x
[1] "Sat Feb 18 20:32:20 2017"
> class(x)
[1] "character"
//查询当前时间(表示方式不同)
> x2 <- Sys.Date()
> x2
[1] "2017-02-18"
> class(x2)
[1] "Date"
//创建一个时间
> x3 <- as.Date("2016-01-01")
> x3
[1] "2016-01-01"
//查询时间的当前属性
> class(x3)
[1] "Date"
> weekdays(x3)
[1] "星期五"
> months(x3)
[1] "一月"
> quarters(x3)  //季度
[1] "Q1"
> julian(x3)    //距离1970年过去了多少天
[1] 16801
attr(,"origin")
[1] "1970-01-01"
//时间运算
> x4 <- as.Date("2017-01-01")
> x4
[1] "2017-01-01"
> x4-x3
Time difference of 366 days
> as.numeric(x4-x3)
[1] 366

——时间:POSIXct / POSIXIt

  • 距离1970-01-01的秒数 / Sys.time()
  • POSIXct: 整数,常用来存入数据框
  • POSIXIt:列表,还包含星期、年、月、日等信息
//get time
> x <- Sys.time()
> x
[1] "2017-02-18 20:47:07 CST"
> class(x)
[1] "POSIXct" "POSIXt" 
> p <- as.POSIXlt(x)
> p
[1] "2017-02-18 20:47:07 CST"
> class(p)
[1] "POSIXlt" "POSIXt" 
//去掉类型,查看当前列表的属性
> names(unclass(p))
 [1] "sec"    "min"    "hour"   "mday"   "mon"    "year"   "wday"   "yday"  
 [9] "isdst"  "zone"   "gmtoff"
 //用变量名$属性  就能查看指定属性
> p$sec
[1] 7.927575
//规范化时间输出格式
> x1 <- "一月 1, 2015 01:01"
> strptime(x1,"%B %d, %Y %H:%M")
[1] "2015-01-01 01:01:00 CST"
> 

总结

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值