R 笔记

最新推荐文章于 2022-12-04 22:28:24 发布

ninuxGithub

最新推荐文章于 2022-12-04 22:28:24 发布

阅读量485

点赞数

分类专栏： r 文章标签： r

本文链接：https://blog.csdn.net/shen19920619/article/details/54925253

版权

r 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

begin note

调用命令：r CMD BATCH D:\RWORKSPACE\CMD_TEST.R （注意 CMD BATCH 都要大写）

ls(): 列出所有的变量名称

ls(pattern ='v'): 根据pattern匹配

rm('xxx') 删除变量

rm(list=ls()) 删除所有的变量
> ls()
character(0)

集合转数组：

> vector1 <- c(5,9,3)
> vector2 <- c(10,11,12,13,14,15)
> column.names <- c("COL1","COL2","COL3")
> row.names <- c("ROW1","ROW2","ROW3")
> matrix.names <- c("Matrix1","Matrix2")
> result <- array(c(vector1,vector2),dim=c(3,3,2),dimnames = list(column.names,row.names,matrix.names))
> print(result)
, , Matrix1

ROW1 ROW2 ROW3
COL1 5 10 13
COL2 9 11 14
COL3 3 12 15

, , Matrix2

ROW1 ROW2 ROW3
COL1 5 10 13
COL2 9 11 14
COL3 3 12 15

> # Print the third row of the second matrix of the array.
> print(result[3,,2])
ROW1 ROW2 ROW3
3 12 15
>
> # Print the element in the 1st row and 3rd column of the 1st matrix.
> print(result[1,3,1])
[1] 13
>
> # Print the 2nd Matrix.
> print(result[,,2])
ROW1 ROW2 ROW3
COL1 5 10 13
COL2 9 11 14
COL3 3 12 15
数组的操作：

# Create two vectors of different lengths.
vector1 <- c(5,9,3)
vector2 <- c(10,11,12,13,14,15)

# Take these vectors as input to the array.
array1 <- array(c(vector1,vector2),dim=c(3,3,2))

# Create two vectors of different lengths.
vector3 <- c(9,1,0)
vector4 <- c(6,0,11,3,14,1,2,6,9)
array2 <- array(c(vector1,vector2),dim=c(3,3,2))

# create matrices from these arrays.
matrix1 <- array1[,,2]
matrix2 <- array2[,,2]

# Add the matrices.
result <- matrix1+matrix2
print(result)

# Create two vectors of different lengths.
vector1 <- c(5,9,3)
vector2 <- c(10,11,12,13,14,15)

# Take these vectors as input to the array.
new.array <- array(c(vector1,vector2),dim=c(3,3,2))
print(new.array)

# Use apply to calculate the sum of the rows across all the matrices.计算所有矩阵每行的和
result <- apply(new.array, c(1), sum)
print(result)

# Create a vector as input.
data <- c("East","West","East","North","North","East","West","West","West","East","North")
print(data)
print(is.factor(data))

# Apply the factor function.
factor_data <- factor(data)
print(factor_data)
print(is.factor(factor_data))#判断是否是factor  , true

# Create the vectors for data frame.
height <- c(132,151,162,139,166,147,122)
weight <- c(48,49,66,53,67,52,40)
gender <- c("male","male","female","female","male","female","male")

# Create the data frame.
input_data <- data.frame(height,weight,gender)
print(input_data)

# Test if the gender column is a factor.  a row of data frame is a factor, like this
print(is.factor(input_data$gender))

# Print the gender column so see the levels.
print(input_data$gender)

data <- c("East","West","East","North","North","East","West","West","West","East","North")
# Create the factors
factor_data <- factor(data)
print(factor_data)

# Apply the factor function with required order of the level. 改变了level的顺序
new_order_data <- factor(factor_data,levels = c("East","West","North"))
print(new_order_data)

gl(n, k, labels)

以下是所使用的参数的说明：

n 是一个整数来给出级别数
k 是一个整数给出重复的数量
labels 为所得到的因子级别标签的向量。

示例

v <- gl(3, 4, labels = c("Tampa", "Seattle","Boston"))
print(v)

创建数据帧

# Create the data frame.
emp.data <- data.frame(
	emp_id = c (1:5), 
	emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
	salary = c(623.3,515.2,611.0,729.0,843.25), 
	start_date = as.Date(c("2012-01-01","2013-09-23","2014-11-15","2014-05-11","2015-03-27")),
	stringsAsFactors=FALSE
			)
# Print the data frame.			
print(emp.data)

str(emp.data)

print(summary(emp.data))

# Extract Specific columns.提取数据帧的列
result <- data.frame(emp.data$emp_name,emp.data$salary)
print(result)

result <- emp.data[1:2,]#提取数据的前两行和所有的列
print(result)

result <- emp.data[c(3,5),c(2,4)]# 提取3， 5 行的第2,4 列的数据
print(result)

# Add the "dept" coulmn.添加列
emp.data$dept <- c("IT","Operations","IT","HR","Finance")
v <- emp.data
print(v)

# Create the first data frame.
emp.data <- data.frame(
	emp_id = c (1:5), 
	emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
	salary = c(623.3,515.2,611.0,729.0,843.25), 
	start_date = as.Date(c("2012-01-01","2013-09-23","2014-11-15","2014-05-11","2015-03-27")),
	dept=c("IT","Operations","IT","HR","Finance"),
	stringsAsFactors=FALSE
			)

# Create the second data frame， 添加行记录
emp.newdata <- 	data.frame(
	emp_id = c (6:8), 
	emp_name = c("Rasmi","Pranab","Tusar"),
	salary = c(578.0,722.5,632.8), 
	start_date = as.Date(c("2013-05-21","2013-07-30","2014-06-17")),
	dept = c("IT","Operations","Fianance"),
	stringsAsFactors=FALSE
				)

# Bind the two data frames.
emp.finaldata <- rbind(emp.data,emp.newdata)
print(emp.finaldata)

install.packages(file_name_with_path, repos = NULL, type="source")

# Install the package named "XML", 安装package
install.packages("E:/XML_3.98-1.3.zip", repos = NULL, type="source")

# Create vector objects.
city <- c("Tampa","Seattle","Hartford","Denver")
state <- c("FL","WA","CT","CO")
zipcode <- c(33602,98104,06161,80294)

# Combine above three vectors into one data frame. cbind is column bind 行的结列
addresses <- cbind(city,state,zipcode)

# Print a header.
cat("# # # # The First data frame\n") 

# Print the data frame.
print(addresses)

# Create another data frame with similar columns
new.address <- data.frame(
   city = c("Lowry","Charlotte"),
   state = c("CO","FL"),
   zipcode = c("80230","33949"),
   stringsAsFactors=FALSE
)

# Print a header.
cat("# # # The Second data frame\n") 

# Print the data frame.
print(new.address)

# Combine rows form both the data frames. rbind is row bind 结合行
all.addresses <- rbind(addresses,new.address)

# Print a header.
cat("# # # The combined data frame\n") 

# Print the result.
print(all.addresses)

melt and cast

熔化和转换

R语言编程的最有趣的地方是关于改变多个步骤中的数据的形状来获得所希望的形状。用来做这种函数被称为 melt() 和 cast()。

我们认为数据集被称为 ships 出现在库被称为 "MASS".

library(MASS)
print(ships)

当我们上面的代码执行时，它产生以下结果：

   type year period service incidents
1     A   60     60     127         0
2     A   60     75      63         0
3     A   65     60    1095         3
4     A   65     75    1095         4
5     A   70     60    1512         6
.............
.............
8     A   75     75    2244        11
9     B   60     60   44882        39
10    B   60     75   17176        29
11    B   65     60   28609        58
............
............
17    C   60     60    1179         1
18    C   60     75     552         1
19    C   65     60     781         0
............
............

融化数据

现在，我们融化数据需要组织其转换类型(type), 并且 year 到多行以外的所有列。

molten.ships <- melt(ships, id = c("type","year"))
print(molten.ships)

当我们上面的代码执行时，它产生以下结果：

    type year  variable value
1      A   60    period    60
2      A   60    period    75
3      A   65    period    60
4      A   65    period    75
............
............
9      B   60    period    60
10     B   60    period    75
11     B   65    period    60
12     B   65    period    75
13     B   70    period    60
...........
...........
41     A   60   service   127
42     A   60   service    63
43     A   65   service  1095
...........
...........
70     D   70   service  1208
71     D   75   service     0
72     D   75   service  2051
73     E   60   service    45
74     E   60   service     0
75     E   65   service   789
...........
...........
101    C   70 incidents     6
102    C   70 incidents     2
103    C   75 incidents     0
104    C   75 incidents     1
105    D   60 incidents     0
106    D   60 incidents     0
...........
...........

转换数据

我们可以转化数据转换成在创建每种类型的 ships 每年的汇总的新形式。它是通过使用 case()函数。

recasted.ship <- cast(molten.ships, type+year~variable,sum)
print(recasted.ship)

当我们上面的代码执行时，它产生以下结果：

   type year period service incidents
1     A   60    135     190         0
2     A   65    135    2190         7
3     A   70    135    4865        24
4     A   75    135    2244        11
5     B   60    135   62058        68
6     B   65    135   48979       111
7     B   70    135   20163        56
8     B   75    135    7117        18
9     C   60    135    1731         2
10    C   65    135    1457         1
11    C   70    135    2731         8
12    C   75    135     274         1
13    D   60    135     356         0
14    D   65    135     480         0
15    D   70    135    1557        13
16    D   75    135    2051         4
17    E   60    135      45         0
18    E   65    135    1226        14
19    E   70    135    3318        17
20    E   75    135     542         1

读一个CSV文件

以下是 read.csv()函数的一个简单的例子，它读取在当前工作目录的可用的 CSV 文件：

data <- read.csv("input.csv")
print(data)

data <- read.csv("input.csv")#分析data的行列情况

print(is.data.frame(data))
print(ncol(data))
print(nrow(data))

# Get the person detail having max salary. 求最高工资记录的具体情况
retval <- subset(data, salary == max(salary))
print(retval)

# Create a data frame.
data <- read.csv("input.csv")

info <- subset(data, salary > 600 & dept == "IT")#工资大于600 并且是IT部门的员工
print(info)

# Create a data frame.
data <- read.csv("input.csv")

retval <- subset(data, as.Date(start_date) > as.Date("2014-01-01"))#生日大于2014-1-1日
print(retval)

# Load the packages required to read XML files.
library("XML")
library("methods")

# Convert the input xml file to a data frame.
xmldataframe <- xmlToDataFrame("input.xml")#加载xml里面的数据
print(xmldataframe)

# Load the package required to read JSON files.
library("rjson")

# Give the input file name to the function.
result <- fromJSON(file="input.json")

# Print the result.
print(result)

# Load the package required to read JSON files.
library("rjson")

# Give the input file name to the function.
result <- fromJSON(file="input.json")

# Convert JSON file to a data frame.
json_data_frame <- as.data.frame(result)#json字符类型 到frame 帧

print(json_data_frame)

# Create a connection Object to MySQL database.
# We will connect to the sampel database named "sakila" that comes with MySql installation.
 mysqlconnection = dbConnect(MySQL(), user='root', password='', dbname='sakila', host='localhost')

# List the tables available in this database.
 dbListTables(mysqlconnection)

# Query the "actor" tables to get all the rows.输入sql
result = dbSendQuery(mysqlconnection, "select * from actor")

# Store the result in a R data frame object. n=5 is used to fetch first 5 rows.现在查询的条数
data.frame = fetch(result, n=5)
print(data.fame)

# Create the function. 找到向量中出现次数最多的元素
getmode <- function(v) {
	uniqv <- unique(v)
	uniqv[which.max(tabulate(match(v, uniqv)))]
}

# Create the vector with numbers.
v <- c(2,1,2,3,1,2,3,4,1,5,5,3,2,3)

# Calculate the mode using the user function. 
result <- getmode(v)
print(result)

K线图：

library(quantmod)
sse<-getSymbols('^SSEC', from='2015-1-1',to=Sys.Date(), src='yahoo')
SSEC.m <- to.monthly(SSEC)
tail(SSEC.m)
candleChart(SSEC.m,theme = 'white')