R语言入门

最新推荐文章于 2024-08-06 01:00:00 发布

sunniy27

最新推荐文章于 2024-08-06 01:00:00 发布

阅读量461

点赞数

分类专栏： r 文章标签： r语言

本文链接：https://blog.csdn.net/sunniy27/article/details/55671439

版权

r 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

R语言的数据结构

对象的五种基本类型：
（1）字符（character）
（2）数值(numeric: real numbers)
（3）整数(integer)
（4）复数(complex: 1+2i)
（5）逻辑(logical :True / Fulse)

> x <- 1  //赋值
> x
[1] 1
> 
> class(x)  
[1] "numeric"
> x <- 2L   //整数后面加大写L表示为整数型
> class(x)
[1] "integer"
> y <- "hello world"
> class(y)
[1] "character"
> t <- TRUE
> x <- 1+2i
> class(x)
[1] "complex"
>

对象的属性：
（1）名称
（2）维度
（3）类型
（3）长度

数据结构：

（1）向量：只能包含同一种类型的对象

# vector 

x <- vector("character", length = 10)//创建一个长度为10的字符型向量

x1 <- 1:4//x1为1到4的一个长为4的向量

x2 <- c(1,2,3,4)//直接创建一个向量

x3 <- c(TRUE,10,"a")
//系统会自动把这三个不同类型的转化为同一类型
x4 <- c("a","b","c")
as.numeric(x4)
as.logical()
as.character()
//强制转换

class(x1)


names(x1) <- c("a","b","c","d")//给变量起名字
> x1
a b c d 
1 2 3 4

（2）矩阵

——向量+维度属性（整数向量：nrow，ncol）

#Matrix
//矩阵的创建
x <- matrix(1:6, nrow = 3, ncol = 2)
x
dim(x)
attributes(x)

y <- 1:6  //先创建一个向量，再赋值维度属性
dim(y) <- c(2,3)
y


y2 <- matrix(1:6, nrow = 2, ncol = 3)

rbind(y,y2)//按行拼接
cbind(y,y2)//按列拼接



//执行步骤
> x <- matrix(nrow = 3, ncol = 2)
> x
     [,1] [,2]
[1,]   NA   NA
[2,]   NA   NA
[3,]   NA   NA
> x <- matrix(1:6, nrow = 3, ncol = 2)
> x
     [,1] [,2]
[1,]    1    4
[2,]    2    5
[3,]    3    6
> dim(x)
[1] 3 2
> attributes(x)
$dim
[1] 3 2

> y <- 1:6
> dim(y) <- c(2,3)
> y
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
> y2 <- matrix(1:6, nrow = 2, ncol = 3)
> rbind(y,y2)
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
[3,]    1    3    5
[4,]    2    4    6
> cbind(y,y2)
     [,1] [,2] [,3] [,4] [,5] [,6]
[1,]    1    3    5    1    3    5
[2,]    2    4    6    2    4    6
>

（3）数组
——与矩阵类似，但是维度可以大于2

#array
//数组的创建
x <- array(1:24, dim = c(4,6))


x <- array(1:24, dim = c(2,3,4))
x

执行步骤：

> x <- array(1:24, dim = c(4,6))
> x
     [,1] [,2] [,3] [,4] [,5] [,6]
[1,]    1    5    9   13   17   21
[2,]    2    6   10   14   18   22
[3,]    3    7   11   15   19   23
[4,]    4    8   12   16   20   24
> x <- array(1:24, dim = c(2,3,4))
> x
, , 1

     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

, , 2

     [,1] [,2] [,3]
[1,]    7    9   11
[2,]    8   10   12

, , 3

     [,1] [,2] [,3]
[1,]   13   15   17
[2,]   14   16   18

, , 4

     [,1] [,2] [,3]
[1,]   19   21   23
[2,]   20   22   24

（4）列表

#list
//列表的创建
l <- list("a", 2, 10L, 3+4i, TRUE)


l2 <- list(a=1, b=2, c=3)

l3 <- list(c(1,2,3), c(4,5,6,7))

x <- matrix(1:6, nrow = 2, ncol = 3)
dimnames(x) <- list(c("a","b"), c("c","d","e"))

执行步骤：

> l <- list("a", 2, 10L, 3+4i, TRUE)
> l
[[1]]
[1] "a"

[[2]]
[1] 2

[[3]]
[1] 10

[[4]]
[1] 3+4i

[[5]]
[1] TRUE

> 
> 
> l2 <- list(a=1, b=2, c=3)
> l2
$a
[1] 1

$b
[1] 2

$c
[1] 3

> l3 <- list(c(1,2,3), c(4,5,6,7))
> 
> 
> l3
[[1]]
[1] 1 2 3

[[2]]
[1] 4 5 6 7

> x <- matrix(1:6, nrow = 2, ncol = 3)
> x
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
> dimnames(x) <- list(c("a","b"), c("c","d","e"))
> x
  c d e
a 1 3 5
b 2 4 6
>

（5）因子（factor）

——分类数据/有序 vs. 无序

——整数型向量+标签（label）（优于整数向量）
Male/Female vs. 1/2
常用于 lm( ),glm( )

//创建因子
> x <- factor(c("female","female","male","male","female"))
> x
[1] female female male   male   female
Levels: female male


> x <- factor(c("female","female","male","male","female"), levels = c("male","female"))
> x
[1] female female male   male   female
Levels: male female

//查看因子
> table(x)
x
  male female 
     2      3 
> unclass(x)
[1] 2 2 1 1 2
attr(,"levels")
[1] "male"   "female"
> class(unclass(x))
[1] "integer"

（6）缺失值（missing value）

——NA/NaN：NaN属于NA，NA不属于NaN
——NA有类型属性：interger NA，character NA等
——is.na()/is.nan()

(8) 数据框
——存储表格数据
——视为各元素长度相同的列表

每个元素代表一列数据
每个元素的长度代表行数
元素类型可以不同

//创建一个数据框

> df <- data.frame(id = c(1,2,3,4), name = c("a","b","c","d"), gender = c(TRUE,TRUE,FALSE,FALSE))
> df
  id name gender
1  1    a   TRUE
2  2    b   TRUE
3  3    c  FALSE
4  4    d  FALSE
//查看行列
> nrow(df)
[1] 4
> ncol(df)
[1] 3
> df2 <- data.frame(id = c(1,2,3,4), score = c(80,90,86,100))
> df2
  id score
1  1    80
2  2    90
3  3    86
4  4   100
//转化为矩阵
> data.matrix(df2)
     id score
[1,]  1    80
[2,]  2    90
[3,]  3    86
[4,]  4   100
>

（9）时间和日期

——日期：Date

距离1970-01-01的天数/date（）/Sys.Date()/weekdays()/months()/quarters()

//查询当前时间
> x <-date()
> x
[1] "Sat Feb 18 20:32:20 2017"
> class(x)
[1] "character"
//查询当前时间（表示方式不同）
> x2 <- Sys.Date()
> x2
[1] "2017-02-18"
> class(x2)
[1] "Date"
//创建一个时间
> x3 <- as.Date("2016-01-01")
> x3
[1] "2016-01-01"
//查询时间的当前属性
> class(x3)
[1] "Date"
> weekdays(x3)
[1] "星期五"
> months(x3)
[1] "一月"
> quarters(x3)  //季度
[1] "Q1"
> julian(x3)    //距离1970年过去了多少天
[1] 16801
attr(,"origin")
[1] "1970-01-01"
//时间运算
> x4 <- as.Date("2017-01-01")
> x4
[1] "2017-01-01"
> x4-x3
Time difference of 366 days
> as.numeric(x4-x3)
[1] 366

——时间：POSIXct / POSIXIt

距离1970-01-01的秒数 / Sys.time()
POSIXct: 整数，常用来存入数据框
POSIXIt：列表，还包含星期、年、月、日等信息

//get time
> x <- Sys.time()
> x
[1] "2017-02-18 20:47:07 CST"
> class(x)
[1] "POSIXct" "POSIXt" 
> p <- as.POSIXlt(x)
> p
[1] "2017-02-18 20:47:07 CST"
> class(p)
[1] "POSIXlt" "POSIXt" 
//去掉类型，查看当前列表的属性
> names(unclass(p))
 [1] "sec"    "min"    "hour"   "mday"   "mon"    "year"   "wday"   "yday"  
 [9] "isdst"  "zone"   "gmtoff"
 //用变量名$属性  就能查看指定属性
> p$sec
[1] 7.927575
//规范化时间输出格式
> x1 <- "一月 1, 2015 01:01"
> strptime(x1,"%B %d, %Y %H:%M")
[1] "2015-01-01 01:01:00 CST"
>

sunniy27

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
R语言入门

R语言的数据结构对象的五种基本类型：（1）字符（character）（2）数值(numeric: real numbers) （3）整数(integer) （4）复数(complex: 1+2i) （5）逻辑(logical :True / Fulse)> x <- 1 //赋值> x[1] 1> > class(x) [1] "numeric"> x <- 2L
复制链接

扫一扫

专栏目录