作为一个开源软件,R的一个非常大的优点就是我们可以随意查看所有算法的源代码,在对这些源代码进行分析的过程中不仅可以加深对算法的认识,而且可以大步提高对R语言的掌握程度。所以接下来我重点写点关于各统计方法的R语言源代码的解释。今天先对如何查看源代码做点介绍。
最直接的方法当然是直接键入函数,大部分函数源代码就可以直接显现出来
> fivenum
function (x, na.rm = TRUE)
{
xna <- is.na(x)
if (na.rm)
x <- x[!xna]
else if (any(xna))
return(rep.int(NA, 5))
x <- sort(x)
n <- length(x)
if (n == 0)
rep.int(NA, 5)
else {
n4 <- floor((n + 3)/2)/2
d <- c(1, n4, (n + 1)/2, n + 1 - n4, n)
0.5 * (x[floor(d)] + x[ceiling(d)])
}
}
<bytecode: 0x01f32270>
<environment: namespace:stats>
还有些函数直接键入出不来源代码,主要是因为R是面向对象设计的程序语言,不同的对象,计算方式也不同,所以要通过methods()来进一步定义具体的查看对象
> mean
function (x, ...)
UseMethod("mean")
<bytecode: 0x019a54e4>
<environment: namespace:base>
不行,用methods查看
> methods(mean)
[1] mean.data.frame mean.Date mean.default mean.difftime
[5] mean.POSIXct mean.POSIXlt
任意选择一个进行查看
> mean.data.frame
function (x, ...)
{
msg <- "mean(<data.frame>) is deprecated.\n Use colMeans() or sapply(*, mean) instead."
warning(paste(msg, collapse = ""), call. = FALSE, domain = NA)
sapply(X = x, FUN = mean, ...)
}
<bytecode: 0x0260c92c>
<environment: namespace:base>
查看包里的函数过程一样,例如查看神经网络的源代码过程如下
> library(nnet)
> nnet
function (x, ...)
UseMethod("nnet")
<bytecode: 0x0180d6f4>
<environment: namespace:nnet>
> methods(nnet)
[1] nnet.default nnet.formula
> nnet.default
function (x, y, weights, size, Wts, mask = rep(TRUE, length(wts)),
linout = FALSE, entropy = FALSE, softmax = FALSE, censored = FALSE,
skip = FALSE, rang = 0.7, decay = 0, maxit = 100, Hess = FALSE,
trace = TRUE, MaxNWts = 1000, abstol = 1e-04, reltol = 1e-08,
...)
{
net <- NULL
x <- as.matrix(x)
y <- as.matrix(y)
if (any(is.na(x)))
stop("missing values in 'x'")
if (any(is.na(y)))
stop("missing values in 'y'")
if (dim(x)[1L] != dim(y)[1L])
stop("nrows of 'x' and 'y' must match")
if (linout && entropy)
stop("entropy fit only for logistic units")
if (softmax) {
linout <- TRUE
entropy <- FALSE
}
if (censored) {
linout <- TRUE
entropy <- FALSE
softmax <- TRUE
}
net$n <- c(dim(x)[2L], size, dim(y)[2L])
net$nunits <- as.integer(1L + sum(net$n))
net$nconn <- rep(0, net$nunits + 1L)
net$conn <- numeric(0L)
net <- norm.net(net)
if (skip)
net <- add.net(net, seq(1L, net$n[1L]), seq(1L + net$n[1L] +
net$n[2L], net$nunits - 1L))
if ((nwts <- length(net$conn)) == 0)
stop("no weights to fit")
if (nwts > MaxNWts)
stop(gettextf("too many (%d) weights", nwts), domain = NA)
nsunits <- net$nunits
if (linout)
nsunits <- net$nunits - net$n[3L]
net$nsunits <- nsunits
net$decay <- decay
net$entropy <- entropy
if (softmax && NCOL(y) < 2L)
stop("'softmax = TRUE' requires at least two response categories")
net$softmax <- softmax
net$censored <- censored
if (missing(Wts))
if (rang > 0)
wts <- runif(nwts, -rang, rang)
else wts <- rep(0, nwts)
else wts <- Wts
if (length(wts) != nwts)
stop("weights vector of incorrect length")
if (length(mask) != length(wts))
stop("incorrect length of 'mask'")
if (trace) {
cat("# weights: ", length(wts))
nw <- sum(mask != 0)
if (nw < length(wts))
cat(" (", nw, " variable)\n", sep = "")
else cat("\n")
flush.console()
}
if (length(decay) == 1L)
decay <- rep(decay, length(wts))
.C(VR_set_net, as.integer(net$n), as.integer(net$nconn),
as.integer(net$conn), as.double(decay), as.integer(nsunits),
as.integer(entropy), as.integer(softmax), as.integer(censored))
ntr <- dim(x)[1L]
nout <- dim(y)[2L]
if (missing(weights))
weights <- rep(1, ntr)
if (length(weights) != ntr || any(weights < 0))
stop("invalid weights vector")
Z <- as.double(cbind(x, y))
storage.mode(weights) <- "double"
tmp <- .C(VR_dovm, as.integer(ntr), Z, weights, as.integer(length(wts)),
wts = as.double(wts), val = double(1), as.integer(maxit),
as.logical(trace), as.integer(mask), as.double(abstol),
as.double(reltol), ifail = integer(1L))
net$value <- tmp$val
net$wts <- tmp$wts
net$convergence <- tmp$ifail
tmp <- matrix(.C(VR_nntest, as.integer(ntr), Z, tclass = double(ntr *
nout), as.double(net$wts))$tclass, ntr, nout)
dimnames(tmp) <- list(rownames(x), colnames(y))
net$fitted.values <- tmp
tmp <- y - tmp
dimnames(tmp) <- list(rownames(x), colnames(y))
net$residuals <- tmp
.C(VR_unset_net)
if (entropy)
net$lev <- c("0", "1")
if (softmax)
net$lev <- colnames(y)
net$call <- match.call()
if (Hess)
net$Hessian <- nnetHess(net, x, y, weights)
class(net) <- "nnet"
net
}
<bytecode: 0x01e37238>
<environment: namespace:nnet>