Notes for R study

最新推荐文章于 2024-12-08 22:26:02 发布

shipengfei92

最新推荐文章于 2024-12-08 22:26:02 发布

阅读量2.2k

点赞数

分类专栏： R 文章标签：数据 R

本文链接：https://blog.csdn.net/shipengfei92/article/details/45129745

版权

R 专栏收录该内容

5 篇文章

订阅专栏

本文介绍R语言中的数据操作技巧，包括数据框的合并、子集选取、样本抽取等实用方法，并展示如何通过SQL语法简化复杂的数据处理流程。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Code

source code

#######4.9######
authors <- data.frame(
  surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
  nationality = c("US", "Australia", "US", "UK", "Australia"),
  deceased = c("yes", rep("no", 4)))

authors

books <- data.frame(
  name = I(c("Tukey", "Venables", "Tierney",
             "Ripley", "Ripley", "McNeil", "R Core")),
  title = c("Exploratory Data Analysis",
            "Modern Applied Statistics ...",
            "LISP-STAT",
            "Spatial Statistics", "Stochastic Simulation",
            "Interactive Data Analysis",
            "An Introduction to R"),
  other.author = c(NA, "Ripley", NA, NA, NA, NA,
                   "Venables & Smith"))

books
m1 <- merge(authors, books, by.x = "surname", by.y = "name")
m1
m2 <-merge(authors, books, by.x = "surname", by.y = "name", all = TRUE)
m2

#######4.10######
source('~/4-1.R')
myvars <- c("item1","item2","item3","item4","item5")
newdata <- leadership[myvars]
newdata

myvars2 <- paste("item",1:5,sep = "")
myvars2

myvars3 <- names(leadership) %in% c("item3","item4")
newdata <- leadership[!myvars3]
newdata

newdata <- leadership[c(-8,-9)]
newdata

newdata <- leadership[1:3,]
newdata

attach(leadership)
which(gender=="M"&age>30)
newdata <- leadership[which(gender=="M"&age>30),]
newdata2 <- leadership[gender=="M"&age>30,]
newdata
newdata2
detach(leadership)

newdata <- subset(leadership,age>=35 |age<24,select = c(item2,item3))
newdata
newdata <- subset(leadership,age>=35 |age<24,select = gender:item4)
newdata

newdata <- leadership[sample(1:nrow(leadership),3,replace = FALSE),]
newdata

#######4.11########
library(sqldf)

newdf <- sqldf("select * from mtcars where carb=1 order by mpg",row.names = TRUE)
newdf

newdf <- sqldf("select avg(mpg) as avg_mpg ,avg(disp) as avg_disp,gear from mtcars where cyl in (4,6) group by gear",row.names = TRUE)
newdf

console result

> source('~/4-2.R', echo=TRUE)

> #######4.9######
> authors <- data.frame(
+   surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
+   nationality = c("US", "Austral ..." ... [TRUNCATED] 

> authors
   surname nationality deceased
1    Tukey          US      yes
2 Venables   Australia       no
3  Tierney          US       no
4   Ripley          UK       no
5   McNeil   Australia       no

> books <- data.frame(
+   name = I(c("Tukey", "Venables", "Tierney",
+              "Ripley", "Ripley", "McNeil", "R Core")),
+   title = c("Explorat ..." ... [TRUNCATED] 

> books
      name                         title     other.author
1    Tukey     Exploratory Data Analysis             <NA>
2 Venables Modern Applied Statistics ...           Ripley
3  Tierney                     LISP-STAT             <NA>
4   Ripley            Spatial Statistics             <NA>
5   Ripley         Stochastic Simulation             <NA>
6   McNeil     Interactive Data Analysis             <NA>
7   R Core          An Introduction to R Venables & Smith

> m1 <- merge(authors, books, by.x = "surname", by.y = "name")

> m1
   surname nationality deceased                         title other.author
1   McNeil   Australia       no     Interactive Data Analysis         <NA>
2   Ripley          UK       no            Spatial Statistics         <NA>
3   Ripley          UK       no         Stochastic Simulation         <NA>
4  Tierney          US       no                     LISP-STAT         <NA>
5    Tukey          US      yes     Exploratory Data Analysis         <NA>
6 Venables   Australia       no Modern Applied Statistics ...       Ripley

> m2 <-merge(authors, books, by.x = "surname", by.y = "name", all = TRUE)

> m2
   surname nationality deceased                         title     other.author
1   McNeil   Australia       no     Interactive Data Analysis             <NA>
2   R Core        <NA>     <NA>          An Introduction to R Venables & Smith
3   Ripley          UK       no            Spatial Statistics             <NA>
4   Ripley          UK       no         Stochastic Simulation             <NA>
5  Tierney          US       no                     LISP-STAT             <NA>
6    Tukey          US      yes     Exploratory Data Analysis             <NA>
7 Venables   Australia       no Modern Applied Statistics ...           Ripley

> #######4.10######
> source('~/4-1.R')
The following objects are masked _by_ .GlobalEnv:

    age, country, date, gender

The following objects are masked from leadership (pos = 9):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 10):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 11):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 12):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 13):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 14):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 15):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID


> myvars <- c("item1","item2","item3","item4","item5")

> newdata <- leadership[myvars]

> newdata
  item1 item2 item3 item4 item5
1     5     4     5     5     5
2     3     5     2     5     5
3     3     5     5     5     2
4     3     3     4    NA    NA
5     2     2     1     2     1

> myvars2 <- paste("item",1:5,sep = "")

> myvars2
[1] "item1" "item2" "item3" "item4" "item5"

> myvars3 <- names(leadership) %in% c("item3","item4")

> newdata <- leadership[!myvars3]

> newdata
  managerID       date country gender age item1 item2 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5  young
2         2 2008-10-28      US      F  45     3     5     5  young
3         3 2008-10-01      UK      F  25     3     5     2  young
4         4 2008-10-12      UK      M  39     3     3    NA  young
5         5 2009-05-01      UK      F  NA     2     2     1   <NA>

> newdata <- leadership[c(-8,-9)]

> newdata
  managerID       date country gender age item1 item2 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5  young
2         2 2008-10-28      US      F  45     3     5     5  young
3         3 2008-10-01      UK      F  25     3     5     2  young
4         4 2008-10-12      UK      M  39     3     3    NA  young
5         5 2009-05-01      UK      F  NA     2     2     1   <NA>

> newdata <- leadership[1:3,]

> newdata
  managerID       date country gender age item1 item2 item3 item4 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5     5     5  young
2         2 2008-10-28      US      F  45     3     5     2     5     5  young
3         3 2008-10-01      UK      F  25     3     5     5     5     2  young

> attach(leadership)
The following objects are masked _by_ .GlobalEnv:

    age, country, date, gender

The following objects are masked from leadership (pos = 9):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 10):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 11):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 12):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 13):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 14):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 15):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID


> which(gender=="M"&age>30)
[1] 1 4

> newdata <- leadership[which(gender=="M"&age>30),]

> newdata2 <- leadership[gender=="M"&age>30,]

> newdata
  managerID       date country gender age item1 item2 item3 item4 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5     5     5  young
4         4 2008-10-12      UK      M  39     3     3     4    NA    NA  young

> newdata2
  managerID       date country gender age item1 item2 item3 item4 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5     5     5  young
4         4 2008-10-12      UK      M  39     3     3     4    NA    NA  young

> detach(leadership)

> newdata <- subset(leadership,age>=35 |age<24,select = c(item2,item3))

> newdata
  item2 item3
2     5     2
4     3     4

> newdata <- subset(leadership,age>=35 |age<24,select = gender:item4)

> newdata
  gender age item1 item2 item3 item4
2      F  45     3     5     2     5
4      M  39     3     3     4    NA

> newdata <- leadership[sample(1:nrow(leadership),3,replace = FALSE),]

> newdata
  managerID       date country gender age item1 item2 item3 item4 item5 agecat
5         5 2009-05-01      UK      F  NA     2     2     1     2     1   <NA>
4         4 2008-10-12      UK      M  39     3     3     4    NA    NA  young
1         1 2008-10-24      US      M  32     5     4     5     5     5  young

> #######4.11########
> library(sqldf)

> newdf <- sqldf("select * from mtcars where carb=1 order by mpg",row.names = TRUE)

> newdf
                mpg cyl  disp  hp drat    wt  qsec vs am gear carb
Valiant        18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
Hornet 4 Drive 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
Toyota Corona  21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
Datsun 710     22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
Fiat X1-9      27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
Fiat 128       32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
Toyota Corolla 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1

> newdf <- sqldf("select avg(mpg) as avg_mpg ,avg(disp) as avg_disp,gear from mtcars where cyl in (4,6) group by gear",row.names = TRUE)

> newdf
   avg_mpg avg_disp gear
1 20.33333 201.0333    3
2 24.53333 123.0167    4
3 25.36667 120.1333    5

Functions

Merge

Merge Two Data Frames

Description

Merge two data frames by common columns or row names, or do other versions of database join operations.

Usage

merge(x, y, …)

Default S3 method:

merge(x, y, …)

S3 method for class ‘data.frame’

merge(x, y, by = intersect(names(x), names(y)),
by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all,
sort = TRUE, suffixes = c(“.x”,”.y”),
incomparables = NULL, …)

paste

Concatenate Strings

Description

Concatenate vectors after converting to character.

Usage

paste (…, sep = ” “, collapse = NULL)
paste0(…, collapse = NULL)

cbind {base} R Documentation

Combine R Objects by Rows or Columns

Description

Take a sequence of vector, matrix or data frames arguments and combine by columns or rows, respectively. These are generic functions with methods for other R classes.

Usage

cbind(…, deparse.level = 1)
rbind(…, deparse.level = 1)

match {base}

Value Matching

Description

match returns a vector of the positions of (first) matches of its first argument in its second.

%in% is a more intuitive interface as a binary operator, which returns a logical vector indicating if there is a match or not for its left operand.

Usage

match(x, table, nomatch = NA_integer_, incomparables = NULL)

x %in% table

which

Which indices are TRUE?

Description

Give the TRUE indices of a logical object, allowing for array indices.

Usage

which(x, arr.ind = FALSE, useNames = TRUE)
arrayInd(ind, .dim, .dimnames = NULL, useNames = FALSE)
Arguments

x

a logical vector or array. NAs are allowed and omitted (treated as if FALSE).

arr.ind

logical; should array indices be returned when x is an array?

ind

integer-valued index vector, as resulting from which(x).

.dim

dim(.) integer vector

.dimnames

optional list of character dimnames(.), of which only .dimnames[[1]] is used.

useNames

logical indicating if the value of arrayInd() should have (non-null) dimnames at all.

subset {base}

Subsetting Vectors, Matrices and Data Frames

Description

Return subsets of vectors, matrices or data frames which meet conditions.

Usage

subset(x, …)

Default S3 method:

subset(x, subset, …)

S3 method for class ‘matrix’

subset(x, subset, select, drop = FALSE, …)

S3 method for class ‘data.frame’

subset(x, subset, select, drop = FALSE, …)

sample {base}

Random Samples and Permutations

Description

sample takes a sample of the specified size from the elements of x using either with or without replacement.

Usage

sample(x, size, replace = FALSE, prob = NULL)

sample.int(n, size = n, replace = FALSE, prob = NULL)

R & Rstudio update

首先更新源：
在/etc/apt/sources.list中加入：
deb http://cran.rstudio.com/bin/linux/ubuntu trusty/

然后进行更新：

apt-get update
apt-get install r-base

然后重新安装rstudio－server：

dpkg -r rstudio-server
gdebi rstudio-server-0.98.1103-amd64.deb

再重新打开即可使用。