R语言学习 day_3

  • 常规数据处理

  • 部分向量处理函数

    Namestructurefunction
    oder()按元素取值由小到大的顺序显示向量x各元素的序号
    unique()去除向量中的重复项
    rev()反转函数,反转函数排序方式
    sort()sort(a,decreasing=)排序函数,默认升序
    duplicated判断元素是否有重复
    diff()diff(目标向量,lag=)向量差分函数

    矩阵处理:

    1、选取矩阵子集

    利用行列序号选取

    **#利用行列序号选取**
    #利用[row,col]切片选取
    x=matrix(1:25,ncol=5) 
    > x[2,5] #选取第二行第五列的元素
    [1] 22
    > x[3,] #选取第三行的所有元素
    [1]  3  8 13 18 23
    > x[,5] #选取第四列的所有元素
    [1] 21 22 23 24 25
    > x[,-5] #利用负号选取剔除第五列后的所有行,返回结果保留表的结构
         [,1] [,2] [,3] [,4]
    [1,]    1    6   11   16
    [2,]    2    7   12   17
    [3,]    3    8   13   18
    [4,]    4    9   14   19
    [5,]    5   10   15   20
    > x[2,c(3,4)] #利用向量实现多个元素选取
    [1] 12 17
    

    利用行列序号矩阵选取

    #利用行列序号矩阵选取矩阵子集
    > x=matrix(-12:12,ncol=5)
    > x
         [,1] [,2] [,3] [,4] [,5]
    [1,]  -12   -7   -2    3    8
    [2,]  -11   -6   -1    4    9
    [3,]  -10   -5    0    5   10
    [4,]   -9   -4    1    6   11
    [5,]   -8   -3    2    7   12
    > index=cbind(c(1,2,5),c(3,4,4));index
         [,1] [,2]
    [1,]    1    3
    [2,]    2    4
    [3,]    5    4
    > x[index] #选取(1,3)、(2,4)、(5,4)的元素
    [1] -2  4  7
    

    利用向量序号选取

    #利用向量序号选取矩阵子集
    > x=matrix
    > x=matrix(0:35,ncol=6)
    > x[1]
    [1] 0
    > x[5]
    [1] 4
    > x[6]
    [1] 5
    > x[20:35]
     [1] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
    

    利用逻辑条件选取

    #利用逻辑条件选取子集
    > x=matrix(1:36,ncol=6)
    > y=x>16;y #返回逻辑值矩阵y
          [,1]  [,2]  [,3] [,4] [,5] [,6]
    [1,] FALSE FALSE FALSE TRUE TRUE TRUE
    [2,] FALSE FALSE FALSE TRUE TRUE TRUE
    [3,] FALSE FALSE FALSE TRUE TRUE TRUE
    [4,] FALSE FALSE FALSE TRUE TRUE TRUE
    [5,] FALSE FALSE  TRUE TRUE TRUE TRUE
    [6,] FALSE FALSE  TRUE TRUE TRUE TRUE
    > x[y] #返回符合y条件的x值
     [1] 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
    > x[median(x)] #median()表示取中值
    [1] 18
    > x[median(x)]=0 #使x的中值为0
    > x
         [,1] [,2] [,3] [,4] [,5] [,6]
    [1,]    1    7   13   19   25   31
    [2,]    2    8   14   20   26   32
    [3,]    3    9   15   21   27   33
    [4,]    4   10   16   22   28   34
    [5,]    5   11   17   23   29   35
    [6,]    6   12    0   24   30   36
    
  • 数据框的选取

    关于数据子集的选取,

    利用data()引用数据集,

    利用[]切片选取,

    利用$符号可以选取指定字段的数据。

    head()、subset()、tail()、name()等函数,结合逻辑条件表达式,可以实现条件选取

    > data(longley);str(longley) #获取数据集
    'data.frame':	16 obs. of  7 variables:
     $ GNP.deflator: num  83 88.5 88.2 89.5 96.2 ...
     $ GNP         : num  234 259 258 285 329 ...
     $ Unemployed  : num  236 232 368 335 210 ...
     $ Armed.Forces: num  159 146 162 165 310 ...
     $ Population  : num  108 109 110 111 112 ...
     $ Year        : int  1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 ...
     $ Employed    : num  60.3 61.1 60.2 61.2 63.2 ...
    > longley[1:3,] #选取前3行
         GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
    1947         83.0 234.289      235.6        159.0    107.608 1947   60.323
    1948         88.5 259.426      232.5        145.6    108.632 1948   61.122
    1949         88.2 258.054      368.2        161.6    109.773 1949   60.171
    > head(longley,3) #选取指定数据集的前3行
         GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
    1947         83.0 234.289      235.6        159.0    107.608 1947   60.323
    1948         88.5 259.426      232.5        145.6    108.632 1948   61.122
    1949         88.2 258.054      368.2        161.6    109.773 1949   60.171
    > head(longley,5)
         GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
    1947         83.0 234.289      235.6        159.0    107.608 1947   60.323
    1948         88.5 259.426      232.5        145.6    108.632 1948   61.122
    1949         88.2 258.054      368.2        161.6    109.773 1949   60.171
    1950         89.5 284.599      335.1        165.0    110.929 1950   61.187
    1951         96.2 328.975      209.9        309.9    112.075 1951   63.221
    > tail(longley,2) #tail(),选取最后两行
         GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
    1961        115.7 518.173      480.6        257.2    127.852 1961   69.331
    1962        116.9 554.894      400.7        282.7    130.081 1962   70.551
    > head(longley) #默认前6行
         GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
    1947         83.0 234.289      235.6        159.0    107.608 1947   60.323
    1948         88.5 259.426      232.5        145.6    108.632 1948   61.122
    1949         88.2 258.054      368.2        161.6    109.773 1949   60.171
    1950         89.5 284.599      335.1        165.0    110.929 1950   61.187
    1951         96.2 328.975      209.9        309.9    112.075 1951   63.221
    1952         98.1 346.999      193.2        359.4    113.270 1952   63.639
    
    > names(longley) #显示字段名称
    [1] "GNP.deflator" "GNP"          "Unemployed"   "Armed.Forces" "Population"  
    [6] "Year"         "Employed"
    > longley$Year #选取指定字段
     [1] 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962
    > head(longley["GNP"],3) #选取指定数据集的指定字段的前三行数据,保留数据表结构
             GNP
    1947 234.289
    1948 259.426
    1949 258.054
    > head(longley$GNP,3) #注意与前一种选法区别,结果结构不一样
    [1] 234.289 259.426 258.054
    > longley[[2]]
     [1] 234.289 259.426 258.054 284.599 328.975 346.999 365.385 363.112 397.469 419.180
    [11] 442.769 444.546 482.704 502.601 518.173 554.894
    > longley[2]
             GNP
    1947 234.289
    1948 259.426
    1949 258.054
    1950 284.599
    1951 328.975
    1952 346.999
    1953 365.385
    1954 363.112
    1955 397.469
    1956 419.180
    1957 442.769
    1958 444.546
    1959 482.704
    1960 502.601
    1961 518.173
    1962 554.894
    > #后者保留了结构
    > head(longley[2],3) #利用[]+索引的方式选择指定字段的数据
             GNP
    1947 234.289
    1948 259.426
    1949 258.054
    > longley[1:3,c("GNP","Population")] #利用向量函数,实现多字段指定行数数据选取
             GNP Population
    1947 234.289    107.608
    1948 259.426    108.632
    1949 258.054    109.773
    > Y1960=longley[1960,]
    > Y1960=longley[1960,];Y1960
       GNP.deflator GNP Unemployed Armed.Forces Population Year Employed
    NA           NA  NA         NA           NA         NA   NA       NA
    > Y1960=longley["1960",];Y1960
         GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
    1960        114.2 502.601      393.1        251.4    125.368 1960   69.564
    #注意加入""的区别
    
    > longley[c("1955","1960")]
    > longley[c("1955","1960"),]
         GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
    1955        101.2 397.469      290.4        304.8    117.388 1955   66.019
    1960        114.2 502.601      393.1        251.4    125.368 1960   69.564
    > subset(longley,GNP>350&Population>110)
         GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
    1953         99.0 365.385      187.0        354.7    115.094 1953   64.989
    1954        100.0 363.112      357.8        335.0    116.219 1954   63.761
    1955        101.2 397.469      290.4        304.8    117.388 1955   66.019
    1956        104.6 419.180      282.2        285.7    118.734 1956   67.857
    1957        108.4 442.769      293.6        279.8    120.445 1957   68.169
    1958        110.8 444.546      468.1        263.7    121.950 1958   66.513
    1959        112.6 482.704      381.3        255.2    123.366 1959   68.655
    1960        114.2 502.601      393.1        251.4    125.368 1960   69.564
    1961        115.7 518.173      480.6        257.2    127.852 1961   69.331
    1962        116.9 554.894      400.7        282.7    130.081 1962   70.551
    
  • 字段添加

    可以通过rbind或者cbind实现向指定数据集的行数据或者列数据添加

    > data(longley) 
    > gnpPop=round(longley[,"GNP"]/longley[,"Population"],2)
    > longley=cbind(longley,gnp.Pop=gnpPop)
    > head(longley)
         GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed gnp.Pop
    1947         83.0 234.289      235.6        159.0    107.608 1947   60.323    2.18
    1948         88.5 259.426      232.5        145.6    108.632 1948   61.122    2.39
    1949         88.2 258.054      368.2        161.6    109.773 1949   60.171    2.35
    1950         89.5 284.599      335.1        165.0    110.929 1950   61.187    2.57
    1951         96.2 328.975      209.9        309.9    112.075 1951   63.221    2.94
    1952         98.1 346.999      193.2        359.4    113.270 1952   63.639    3.06
    
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值