Notes for R study

本文介绍R语言中的数据操作技巧,包括数据框的合并、子集选取、样本抽取等实用方法,并展示如何通过SQL语法简化复杂的数据处理流程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Code

source code

#######4.9######
authors <- data.frame(
  surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
  nationality = c("US", "Australia", "US", "UK", "Australia"),
  deceased = c("yes", rep("no", 4)))

authors

books <- data.frame(
  name = I(c("Tukey", "Venables", "Tierney",
             "Ripley", "Ripley", "McNeil", "R Core")),
  title = c("Exploratory Data Analysis",
            "Modern Applied Statistics ...",
            "LISP-STAT",
            "Spatial Statistics", "Stochastic Simulation",
            "Interactive Data Analysis",
            "An Introduction to R"),
  other.author = c(NA, "Ripley", NA, NA, NA, NA,
                   "Venables & Smith"))

books
m1 <- merge(authors, books, by.x = "surname", by.y = "name")
m1
m2 <-merge(authors, books, by.x = "surname", by.y = "name", all = TRUE)
m2

#######4.10######
source('~/4-1.R')
myvars <- c("item1","item2","item3","item4","item5")
newdata <- leadership[myvars]
newdata

myvars2 <- paste("item",1:5,sep = "")
myvars2

myvars3 <- names(leadership) %in% c("item3","item4")
newdata <- leadership[!myvars3]
newdata

newdata <- leadership[c(-8,-9)]
newdata

newdata <- leadership[1:3,]
newdata

attach(leadership)
which(gender=="M"&age>30)
newdata <- leadership[which(gender=="M"&age>30),]
newdata2 <- leadership[gender=="M"&age>30,]
newdata
newdata2
detach(leadership)

newdata <- subset(leadership,age>=35 |age<24,select = c(item2,item3))
newdata
newdata <- subset(leadership,age>=35 |age<24,select = gender:item4)
newdata

newdata <- leadership[sample(1:nrow(leadership),3,replace = FALSE),]
newdata

#######4.11########
library(sqldf)

newdf <- sqldf("select * from mtcars where carb=1 order by mpg",row.names = TRUE)
newdf

newdf <- sqldf("select avg(mpg) as avg_mpg ,avg(disp) as avg_disp,gear from mtcars where cyl in (4,6) group by gear",row.names = TRUE)
newdf

console result

> source('~/4-2.R', echo=TRUE)

> #######4.9######
> authors <- data.frame(
+   surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
+   nationality = c("US", "Austral ..." ... [TRUNCATED] 

> authors
   surname nationality deceased
1    Tukey          US      yes
2 Venables   Australia       no
3  Tierney          US       no
4   Ripley          UK       no
5   McNeil   Australia       no

> books <- data.frame(
+   name = I(c("Tukey", "Venables", "Tierney",
+              "Ripley", "Ripley", "McNeil", "R Core")),
+   title = c("Explorat ..." ... [TRUNCATED] 

> books
      name                         title     other.author
1    Tukey     Exploratory Data Analysis             <NA>
2 Venables Modern Applied Statistics ...           Ripley
3  Tierney                     LISP-STAT             <NA>
4   Ripley            Spatial Statistics             <NA>
5   Ripley         Stochastic Simulation             <NA>
6   McNeil     Interactive Data Analysis             <NA>
7   R Core          An Introduction to R Venables & Smith

> m1 <- merge(authors, books, by.x = "surname", by.y = "name")

> m1
   surname nationality deceased                         title other.author
1   McNeil   Australia       no     Interactive Data Analysis         <NA>
2   Ripley          UK       no            Spatial Statistics         <NA>
3   Ripley          UK       no         Stochastic Simulation         <NA>
4  Tierney          US       no                     LISP-STAT         <NA>
5    Tukey          US      yes     Exploratory Data Analysis         <NA>
6 Venables   Australia       no Modern Applied Statistics ...       Ripley

> m2 <-merge(authors, books, by.x = "surname", by.y = "name", all = TRUE)

> m2
   surname nationality deceased                         title     other.author
1   McNeil   Australia       no     Interactive Data Analysis             <NA>
2   R Core        <NA>     <NA>          An Introduction to R Venables & Smith
3   Ripley          UK       no            Spatial Statistics             <NA>
4   Ripley          UK       no         Stochastic Simulation             <NA>
5  Tierney          US       no                     LISP-STAT             <NA>
6    Tukey          US      yes     Exploratory Data Analysis             <NA>
7 Venables   Australia       no Modern Applied Statistics ...           Ripley

> #######4.10######
> source('~/4-1.R')
The following objects are masked _by_ .GlobalEnv:

    age, country, date, gender

The following objects are masked from leadership (pos = 9):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 10):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 11):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 12):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 13):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 14):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 15):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID


> myvars <- c("item1","item2","item3","item4","item5")

> newdata <- leadership[myvars]

> newdata
  item1 item2 item3 item4 item5
1     5     4     5     5     5
2     3     5     2     5     5
3     3     5     5     5     2
4     3     3     4    NA    NA
5     2     2     1     2     1

> myvars2 <- paste("item",1:5,sep = "")

> myvars2
[1] "item1" "item2" "item3" "item4" "item5"

> myvars3 <- names(leadership) %in% c("item3","item4")

> newdata <- leadership[!myvars3]

> newdata
  managerID       date country gender age item1 item2 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5  young
2         2 2008-10-28      US      F  45     3     5     5  young
3         3 2008-10-01      UK      F  25     3     5     2  young
4         4 2008-10-12      UK      M  39     3     3    NA  young
5         5 2009-05-01      UK      F  NA     2     2     1   <NA>

> newdata <- leadership[c(-8,-9)]

> newdata
  managerID       date country gender age item1 item2 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5  young
2         2 2008-10-28      US      F  45     3     5     5  young
3         3 2008-10-01      UK      F  25     3     5     2  young
4         4 2008-10-12      UK      M  39     3     3    NA  young
5         5 2009-05-01      UK      F  NA     2     2     1   <NA>

> newdata <- leadership[1:3,]

> newdata
  managerID       date country gender age item1 item2 item3 item4 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5     5     5  young
2         2 2008-10-28      US      F  45     3     5     2     5     5  young
3         3 2008-10-01      UK      F  25     3     5     5     5     2  young

> attach(leadership)
The following objects are masked _by_ .GlobalEnv:

    age, country, date, gender

The following objects are masked from leadership (pos = 9):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 10):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 11):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 12):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 13):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 14):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID

The following objects are masked from leadership (pos = 15):

    age, agecat, country, date, gender, item1, item2, item3, item4, item5,
    managerID


> which(gender=="M"&age>30)
[1] 1 4

> newdata <- leadership[which(gender=="M"&age>30),]

> newdata2 <- leadership[gender=="M"&age>30,]

> newdata
  managerID       date country gender age item1 item2 item3 item4 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5     5     5  young
4         4 2008-10-12      UK      M  39     3     3     4    NA    NA  young

> newdata2
  managerID       date country gender age item1 item2 item3 item4 item5 agecat
1         1 2008-10-24      US      M  32     5     4     5     5     5  young
4         4 2008-10-12      UK      M  39     3     3     4    NA    NA  young

> detach(leadership)

> newdata <- subset(leadership,age>=35 |age<24,select = c(item2,item3))

> newdata
  item2 item3
2     5     2
4     3     4

> newdata <- subset(leadership,age>=35 |age<24,select = gender:item4)

> newdata
  gender age item1 item2 item3 item4
2      F  45     3     5     2     5
4      M  39     3     3     4    NA

> newdata <- leadership[sample(1:nrow(leadership),3,replace = FALSE),]

> newdata
  managerID       date country gender age item1 item2 item3 item4 item5 agecat
5         5 2009-05-01      UK      F  NA     2     2     1     2     1   <NA>
4         4 2008-10-12      UK      M  39     3     3     4    NA    NA  young
1         1 2008-10-24      US      M  32     5     4     5     5     5  young

> #######4.11########
> library(sqldf)

> newdf <- sqldf("select * from mtcars where carb=1 order by mpg",row.names = TRUE)

> newdf
                mpg cyl  disp  hp drat    wt  qsec vs am gear carb
Valiant        18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
Hornet 4 Drive 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
Toyota Corona  21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
Datsun 710     22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
Fiat X1-9      27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
Fiat 128       32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
Toyota Corolla 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1

> newdf <- sqldf("select avg(mpg) as avg_mpg ,avg(disp) as avg_disp,gear from mtcars where cyl in (4,6) group by gear",row.names = TRUE)

> newdf
   avg_mpg avg_disp gear
1 20.33333 201.0333    3
2 24.53333 123.0167    4
3 25.36667 120.1333    5

Functions

Merge

Merge Two Data Frames

Description

Merge two data frames by common columns or row names, or do other versions of database join operations.

Usage

merge(x, y, …)

Default S3 method:

merge(x, y, …)

S3 method for class ‘data.frame’

merge(x, y, by = intersect(names(x), names(y)),
by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all,
sort = TRUE, suffixes = c(“.x”,”.y”),
incomparables = NULL, …)

paste

Concatenate Strings

Description

Concatenate vectors after converting to character.

Usage

paste (…, sep = ” “, collapse = NULL)
paste0(…, collapse = NULL)

cbind {base} R Documentation

Combine R Objects by Rows or Columns

Description

Take a sequence of vector, matrix or data frames arguments and combine by columns or rows, respectively. These are generic functions with methods for other R classes.

Usage

cbind(…, deparse.level = 1)
rbind(…, deparse.level = 1)

match {base}

Value Matching

Description

match returns a vector of the positions of (first) matches of its first argument in its second.

%in% is a more intuitive interface as a binary operator, which returns a logical vector indicating if there is a match or not for its left operand.

Usage

match(x, table, nomatch = NA_integer_, incomparables = NULL)

x %in% table

which

Which indices are TRUE?

Description

Give the TRUE indices of a logical object, allowing for array indices.

Usage

which(x, arr.ind = FALSE, useNames = TRUE)
arrayInd(ind, .dim, .dimnames = NULL, useNames = FALSE)
Arguments

x

a logical vector or array. NAs are allowed and omitted (treated as if FALSE).

arr.ind

logical; should array indices be returned when x is an array?

ind

integer-valued index vector, as resulting from which(x).

.dim

dim(.) integer vector

.dimnames

optional list of character dimnames(.), of which only .dimnames[[1]] is used.

useNames

logical indicating if the value of arrayInd() should have (non-null) dimnames at all.

subset {base}

Subsetting Vectors, Matrices and Data Frames

Description

Return subsets of vectors, matrices or data frames which meet conditions.

Usage

subset(x, …)

Default S3 method:

subset(x, subset, …)

S3 method for class ‘matrix’

subset(x, subset, select, drop = FALSE, …)

S3 method for class ‘data.frame’

subset(x, subset, select, drop = FALSE, …)

sample {base}

Random Samples and Permutations

Description

sample takes a sample of the specified size from the elements of x using either with or without replacement.

Usage

sample(x, size, replace = FALSE, prob = NULL)

sample.int(n, size = n, replace = FALSE, prob = NULL)

R & Rstudio update

首先更新源:
在/etc/apt/sources.list中加入:
deb http://cran.rstudio.com/bin/linux/ubuntu trusty/

然后进行更新:

apt-get update
apt-get install r-base

然后重新安装rstudio-server:

dpkg -r rstudio-server
gdebi rstudio-server-0.98.1103-amd64.deb

再重新打开即可使用。

### Better Notes Plugin for Zotero Better Notes 是一款专为 Zotero 设计的增强型笔记管理插件,旨在提升用户在文献管理和笔记记录方面的效率。以下是关于该插件的一些核心功能和特点: #### 功能概述 1. **集成化体验** Better Notes 插件允许用户直接在 Zotero 的界面中创建、编辑和查看笔记[^3]。这种无缝集成的设计让用户无需切换到其他应用程序即可完成文献阅读与笔记整理。 2. **富文本支持** 用户可以利用富文本格式来编写笔记,包括但不限于加粗、斜体、列表以及插入图片等功能[^4]。这使得笔记更加直观且易于理解。 3. **标签与分类系统** 提供强大的标签和分类机制,帮助用户快速定位特定主题的相关笔记或文档集合[^5]。通过这些元数据的支持,用户的资料库能够保持高度有序的状态。 4. **同步能力** 支持与其他云服务(如 Dropbox 或 Google Drive)进行文件夹级别的自动同步操作[^6]。这意味着无论在哪台设备上工作,都可以随时访问最新的笔记内容。 5. **搜索优化** 增强了内置搜索引擎的功能,不仅限于全文检索,还扩展到了关联关键词匹配等方面[^7]。因此即使面对庞大的数据库也能迅速找到目标条目及其对应备注信息。 #### 安装方法 要安装此插件,请按照以下方式执行: ```bash # 打开Zotero官方网站或者第三方可信资源页面下载最新版本压缩包; wget https://example.com/better-notes-latest.zip # 将其解压后放入指定目录下并重启软件生效。 unzip better-notes-latest.zip -d ~/.zotero/zotero/profiles/default/xyz/addons/ ``` 需要注意的是,在实际部署过程中可能因操作系统差异而有所调整具体路径设置需参照官方指南说明为准[^8]。 --- ###
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值