文章目录
1. R 语言编程结构
R是一种块状结构程序语言,这也是C、C++、Python、Perl等ALGOL编程语言家族的风格。块(block)由大括号划分,不过当块只包含一条语句时大括号可以省略。程序语句由换行符或者分号分隔。
本节介绍了R语言编程的特点和基本结构
1.1 控制语句
1.1.1 循环
for (variable in vector) {
}
## ex.1
x <- c(5,12,13)
for (n in x) print(n^2)
i <- 1
while (i <= 10) i <- i+4
i
i <- 1
while (TRUE) {
i <- i+4
if (i>10) break
}
print(i)
i <- 1
repeat{
i <- i+4
if (i>10) break
}
i
## ex.2
sim <- function(nreps){
commdata <- list()
commdata$countabsamcomm <- 0
for (rep in 1:nreps) {
commdata$whosleft <- 1:20
commdata$numabchosen <- 0
commdata <- choosecomm(commdata,5)
if (commdata$numabchosen >0) next
commdata <- choosecomm(commdata,4)
if (commdata$numabchosen >0) next
commdata <- choosecomm(commdata,3)
}
print(commdata$countabsamecomm/nreps)
}
1.1.2 非向量集合的循环
u <- matrix(c(1,2,3,1,2,4),nrow = 3)
u
v <- matrix(c(8,12,20,15,10,2),nrow = 3)
v
for (m in c('u','v')) {
z <- get(m)
print(lm(z[,2] ~ z[,1]))
}
1.1.3 if-else 循环
r <- 3
if (r == 4) {
x <- 1
} else {
x <- 6
y <- 5
}
x <- 2
y <- if(x == 2) x else x+1
y
1.2 算数和逻辑运算符
x + y 加法
x - y 减法
x * y 乘法
x / y 除法
x ^ y 乘幂
x %% y 模运算
x %/% y 整数除法
x == y 判断是否相等
x <= y 判断是否小于等于
x >= y 判断是否大于等于
x && y 标量的逻辑“与”运算
x || y 标量的逻辑“或”运算
x & y 向量的逻辑“与”运算(x, y以及运算结果均是向量)
x | y 向量的逻辑“或”运算(x, y以及运算结果均是向量)
!x 逻辑非
1 < 2
(1<2) * (3<4)
(1<2) * (3<4) * (5<1)
(1<2) == 1
1.3 参数的默认值
在R中运行函数名字就会出现函数参数和函数主体,包括默认参数
read.table
function (file, header = FALSE, sep = "", quote = "\"'", dec = ".",
numerals = c("allow.loss", "warn.loss", "no.loss"), row.names,
col.names, as.is = !stringsAsFactors, na.strings = "NA",
colClasses = NA, nrows = -1, skip = 0, check.names = TRUE,
fill = !blank.lines.skip, strip.white = FALSE, blank.lines.skip = TRUE,
comment.char = "#", allowEscapes = FALSE, flush = FALSE,
stringsAsFactors = FALSE, fileEncoding = "", encoding = "unknown",
text, skipNul = FALSE)
{
if (missing(file) && !missing(text)) {
file <- textConnection(text, encoding = "UTF-8")
encoding <- "UTF-8"
on.exit(close(file))
}
if (is.character(file)) {
file <- if (nzchar(fileEncoding))
file(file, "rt", encoding = fileEncoding)
else file(file, "rt")
on.exit(close(file))
}
if (!inherits(file, "connection"))
stop("'file' must be a character string or connection")
if (!isOpen(file, "rt")) {
open(file, "rt")
on.exit(close(file))
}
pbEncoding <- if (encoding %in% c("", "bytes", "UTF-8"))
encoding
else "bytes"
numerals <- match.arg(numerals)
if (skip > 0L)
readLines(file, skip)
nlines <- n0lines <- if (nrows < 0L)
5
else min(5L, (header + nrows))
lines <- .External(C_readtablehead, file, nlines, comment.char,
blank.lines.skip, quote, sep, skipNul)
if (encoding %in% c("UTF-8", "latin1"))
Encoding(lines) <- encoding
nlines <- length(lines)
if (!nlines) {
if (missing(col.names))
stop("no lines available in input")
rlabp <- FALSE
cols <- length(col.names)
}
else {
if (all(!nzchar(lines)))
stop("empty beginning of file")
if (nlines < n0lines && file == 0L) {
pushBack(c(lines, lines, ""), file, encoding = pbEncoding)
on.exit((clearPushBack(stdin())))
}
else pushBack(c(lines, lines), file, encoding = pbEncoding)
first <- scan(file, what = "", sep = sep, quote = quote,
nlines = 1, quiet = TRUE, skip = 0, strip.white = TRUE,
blank.lines.skip = blank.lines.skip, na.strings = character(0),
comment.char = comment.char, allowEscapes = allowEscapes,
encoding = encoding, skipNul = skipNul)
col1 <- if (missing(col.names))
length(first)
else length(col.names)
col <- numeric(nlines - 1L)
if (nlines > 1L)
for (i in seq_along(col)) col[i] <- length(scan(file,
what = "", sep = sep, quote = quote, nlines = 1,
quiet = TRUE, skip = 0, strip.white = strip.white,
blank.lines.skip = blank.lines.skip, comment.char = comment.char,
allowEscapes = allowEscapes, encoding = encoding,
skipNul = skipNul))
cols <- max(col1, col)
rlabp <- (cols - col1) == 1L
if (rlabp && missing(header))
header <- TRUE
if (!header)
rlabp <- FALSE
if (header) {
.External(C_readtablehead, file, 1L, comment.char,
blank.lines.skip, quote, sep, skipNul)
if (missing(col.names))
col.names <- first
else if (length(first) != length(col.names))
warning("header and 'col.names' are of different lengths")
}
else if (missing(col.names))
col.names <- paste0("V", 1L:cols)
if (length(col.names) + rlabp < cols)
stop("more columns than column names")
if (fill && length(col.names) > cols)
cols <- length(col.names)
if (!fill && cols > 0L && length(col.names) > cols)
stop("more column names than columns")
if (cols == 0L)
stop("first five rows are empty: giving up")
}
if (check.names)
col.names <- make.names(col.names, unique = TRUE)
if (rlabp)
col.names <- c("row.names", col.names)
nmColClasses <- names(colClasses)
if (is.null(nmColClasses)) {
if (length(colClasses) < cols)
colClasses <- rep_len(colClasses, cols)
}
else {
tmp <- rep_len(NA_character_, cols)
names(tmp) <- col.names
i <- match(nmColClasses, col.names, 0L)
if (any(i <= 0L))
warning("not all columns named in 'colClasses' exist")
tmp[i[i > 0L]] <- colClasses[i > 0L]
colClasses <- tmp
}
what <- rep.int(list(""), cols)
names(what) <- col.names
colClasses[colClasses %in% c("real", "double")] <- "numeric"
known <- colClasses %in% c("logical", "integer", "numeric",
"complex", "character", "raw")
what[known] <- lapply(colClasses[known], do.call, list(0))
what[colClasses %in% "NULL"] <- list(NULL)
keep <- !sapply(what, is.null)
data <- scan(file = file, what = what, sep = sep, quote = quote,
dec = dec, nmax = nrows, skip = 0, na.strings = na.strings,
quiet = TRUE, fill = fill, strip.white = strip.white,
blank.lines.skip = blank.lines.skip, multi.line = FALSE,
comment.char = comment.char, allowEscapes = allowEscapes,
flush = flush, encoding = encoding, skipNul = skipNul)
nlines <- length(data[[which.max(keep)]])
if (cols != length(data)) {
warning("cols = ", cols, " != length(data) = ", length(data),
domain = NA)
cols <- length(data)
}
if (is.logical(as.is)) {
as.is <- rep_len(as.is, cols)
}
else if (is.numeric(as.is)) {
if (any(as.is < 1 | as.is > cols))
stop("invalid numeric 'as.is' expression")
i <- rep.int(FALSE, cols)
i[as.is] <- TRUE
as.is <- i
}
else if (is.character(as.is)) {
i <- match(as.is, col.names, 0L)
if (any(i <= 0L))
warning("not all columns named in 'as.is' exist")
i <- i[i > 0L]
as.is <- rep.int(FALSE, cols)
as.is[i] <- TRUE
}
else if (length(as.is) != cols)
stop(gettextf("'as.is' has the wrong length %d != cols = %d",
length(as.is), cols), domain = NA)
do <- keep & !known
if (rlabp)
do[1L] <- FALSE
for (i in (1L:cols)[do]) {
data[[i]] <- if (is.na(colClasses[i]))
type.convert(data[[i]], as.is = as.is[i], dec = dec,
numerals = numerals, na.strings = character(0L))
else if (colClasses[i] == "factor")
as.factor(data[[i]])
else if (colClasses[i] == "Date")
as.Date(data[[i]])
else if (colClasses[i] == "POSIXct")
as.POSIXct(data[[i]])
else methods::as(data[[i]], colClasses[i])
}
compactRN <- TRUE
if (missing(row.names)) {
if (rlabp) {
row.names <- data[[1L]]
data <- data[-1L]
keep <- keep[-1L]
compactRN <- FALSE
}
else row.names <- .set_row_names(as.integer(nlines))
}
else if (is.null(row.names)) {
row.names <- .set_row_names(as.integer(nlines))
}
else if (is.character(row.names)) {
compactRN <- FALSE
if (length(row.names) == 1L) {
rowvar <- (1L:cols)[match(col.names, row.names, 0L) ==
1L]
row.names <- data[[rowvar]]
data <- data[-rowvar]
keep <- keep[-rowvar]
}
}
else if (is.numeric(row.names) && length(row.names) == 1L) {
compactRN <- FALSE
rlabp <- row.names
row.names <- data[[rlabp]]
data <- data[-rlabp]
keep <- keep[-rlabp]
}
else stop("invalid 'row.names' specification")
data <- data[keep]
if (is.object(row.names) || !(is.integer(row.names)))
row.names <- as.character(row.names)
if (!compactRN) {
if (length(row.names) != nlines)
stop("invalid 'row.names' length")
if (anyDuplicated(row.names))
stop("duplicate 'row.names' are not allowed")
if (anyNA(row.names))
stop("missing values in 'row.names' are not allowed")
}
class(data) <- "data.frame"
attr(data, "row.names") <- row.names
data
}
<bytecode: 0x0000020a9f090278>
<environment: namespace:utils>
1.4 返回值
return()
function(){
t <- function(x) return(x^2)
return(t)
}
1.5 函数都是对象
function是R中内置的函数,功能就是创建函数, x是参数列表,如下例子
g <- function(x){ # function是R中内置的函数,功能就是创建函数, x是参数列表
return(x+1)吧 ## 函数体
}
g
formals(g) # 参数列表
body(g) # 函数体
formals(abline)
body(abline)
page(abline) ## 在外接读取软件中查看 函数
z <- NULL
for (i in 1:10) if (i %%2 == 0) z <- c(z,i)
z
使用函数所组成的列表做循环
g1 <- function(x) return(sin(x))
g2 <- function(x) return(sqrt(x^2+1))
g3 <- function(x) return(2*x-1)
plot(c(0,1),c(-1,1.5))
for (f in c(g1,g2,g3)) plot(f,0,1,add=T)
1.6 环境和变量作用域的问题
R中一个函数不仅包括参数和函数体,还包括环境(envrionment)
1.6.1 顶层环境
w <- 12
f <- function(y){
d <- 8
h <- function(){
return(d*(w+y))
}
return(h())
}
environment(f)
mean
ls()
ls.str()
1.6.2 变量作用域的层次
# h()对于f()来说是局部的,在顶层环境中不可见
f <- function(y){
d <- 8
h <- function(){
return(d*(w+y))
}
print(environment(h))
return(h())
}
f(2)
1.6.3 关于ls的进一步讨论
f <- function(y){
d <- 8
return(h(d,y))
}
h <- function(dee,yyy){
print(ls())
print(ls(envir = parent.frame(n = 1)))
return(dee*(w+yyy))
}
f(2)
1.6.4 函数几乎没有副作用,不会改变全局变量
1.6.5 扩展:显示调用框的函数
showfram <- function(upn){
# determine the proper environment
if (upn < 0) {
env <- .GlobalEnv
} else {
env <- parent.frame(n = upn + 1)
}
# get the list of variable names
vars <- ls(envir = env)
# for each variable name, print its value
for (vr in vars) {
vrg <- get(vr,envir = env)
if (!is.function(vrg)){
cat(vr, ":\n", sep = '')
print(vrg)
}
}
}
g <- function(aa){
b <- 2
showfram(0)
showfram(1)
aab <- h(aa+b)
return(aab)
}
f <- function(){
a <- 1
return(g(a)+a)
}
h <- function(aaa){
c <- 3
return(aaa+c)
}
f()
1.7 R语言中没有指针
x.sort() ## Python 中可以直接改写函数中的参数
R中只能重新对x进行赋值
x <- c(12,13,5)
x <- sort(x)
x
1.8 向上级层次进行写操作
超赋值运算符 <<- 或函数assign()
two <- function(u){
u <<- 2*u
z <- 2*z
}
x <- 1
z <- 3
u
u <- 2
u
two <- function(u){
assign('u',2*u, pos = .GlobalEnv)
z <- 2*z
}
x <- 1
z <- 2
two(x)
x
u
闭包
counter <- function(){
ctr <- 0
f <- function(){
ctr <<- ctr + 1
cat('this count currently has value', ctr, '\n')
}
return(f)
}
c1 <- counter()
c2 <- counter()
c1 ## 拷贝函数给c1
c2 ## 拷贝函数给c2
c1() ## 拷贝函数给c1
c1() ## 第二次拷贝函数给c1
c1()
c2()
c2()
c2()
1.9 递归
ex.1 ## Quicksort
qs <- function(x){
if (length(x) <= 1) return(x)
pivot <- x[1]
therest <- x[-1]
sv1 <- therest[therest < pivot]
sv2 <- therest[therest >= pivot]
sv1 <- qs(sv1)
sv2 <- qs(sv2)
return(c(sv1,pivot,sv2))
}
x <- c(1,5,3,4,8,4)
qs(x)
ex.2 ## 二叉查找树
# routines to create trees and insert items into them are included
# below;a deletion routine is left to the reader as an exercise
# storage is in a matrix,say m,one row per node of the tree;if row
# i contains(u,v,w),then node i stores the value w,and has left and
# right links to rows u and v;null links have the value NA
# the tree is represented as a list(mat,nxt,inc),where mat is the
# matrix,nxt is the next empty row to be used,and inc is the number of
# rows of expansion to be allocated whenever the matrix becomes full
#print sorted tree via in-order traversal
printtree <-function(hdidx,tr){
left <-trsmat[hdidx,1]
if(!is.na(left)) printtree(left,tr)
print(tr$mat[hdidx,3]) # print root
right <- tr$mat[hdidx,2]
if(!is.na(right)) printtree(right,tr)
}
#initializes a storage matrix,with initial stored value firstval
newtree <-function(firstval,inc){
m<-matrix(rep(NA,inc*3),nrow=inc,ncol=3)
m[1,3]<-firstval
return(list(mat=m,nxt=2,inc=inc))
}
#inserts newval into the subtree of tr,with the subtree's root being
#at index hdidx;note that return value must be reassigned to tr by the
#caller(including ins()itself,due to recursion)
ins <- function(hdidx,tr,newval){
#which direction will this new node go,left or right?
dir <- if(newval <= tr$mat[hdidx,3]) 1 else 2
#if null link in that direction,place the new node here,otherwise#recurse
if(is.na(tr$mat[hdidx,dir])){
newidx <- tr$nxt #where new node goes
#check for room to add a new element
if (tr$nxt == nrow(tr$mat)+1) {
tr$mat <-
rbind(tr$mat, matrix(rep(NA,tr$inc*3),nrow=tr$inc,ncol=3))
}
#insert new tree node
tr$mat[newidx,3] <- newval
#link to the new node
tr$mat[hdidx,dir] <- newidx
tr$nxt <- tr$nxt+1 #ready for next insert
return(tr)
} else tr <- ins(tr$mat[hdidx,dir],tr,newval)
}
x <- newtree(8,3)
x
x <- ins(1,x,5)
x
x <- ins(1,x,6)
x
x <- ins(1,x,2)
x
x <- ins(1,x,20)
x
1.10 置换函数
x <- c(1,2,4)
names(x)
names(x) <- c('a','b','ab')
names(x)
x
x <- 'names<-'(x,value=c('a','c','ab'))
x
置换函数
g(u) <- v
u <- 'g<-' (u,value=v)
x[3] <- 8 ## 赋值符号左边不是变量名,所以它也是置换语句
x
x <- c(8,88,5,12,13)
x[3]
'['(x,3)
x <- '[<-'(x,2:3,value=99:100)
x
等价于
x[2:3] <- 99:100
x
1.11 写函数代码的工具
1.11.1 文本编辑器和集成开发环境
可以使用vim,Emacs,甚至Notepad或txt编辑器等编写R脚本,之后使用source()即可读入R中
source('xy.R')
1.11.2 edit() 函数
f1 <- function(x){
return(x+1)
}
f1 <- edit(f1)
1.12 创建自己的二元运算符
'%2a+b%' <- function(a,b) return(2*a+b)
3 %2a+b% 5
1.13 匿名函数
直接调用function(),而不给对象命名,就为匿名函数
z <- matrix(c(1,2,3,4,5,6),nrow = 3)
z
y <- apply(z, 1, function(x) x/c(2,8))
y
参考 R语言编程艺术 https://book.douban.com/subject/24699632/?qq-pf-to=pcqq.c2c