R语言基础编程技巧汇编 - 6

最新推荐文章于 2022-06-22 08:37:42 发布

SimonLiu

最新推荐文章于 2022-06-22 08:37:42 发布

阅读量3.2k

点赞数 1

分类专栏： R语言文章标签： r语言数据分析数据挖掘机器学习

本文链接：https://blog.csdn.net/liu7788414/article/details/44587213

版权

R语言专栏收录该内容

30 篇文章 8 订阅

订阅专栏

1. 在图形上不显示科学计数法

x <- seq(1000000,10000000, 1000000)

y <- 2*x

plot(x, y,axes = FALSE)

x1 <- format(x,scientific = FALSE)

y1 <- format(y,scientific = FALSE)

axis(1,at = x,label=x1)

axis(2,at = y,label=y1)

2. 立体坐标图

require(scatterplot3d)

x1 <-1;x2 <-2;x3<-3

n <- 10000

y1 <-rnorm(n,0,x1)

y2 <-rnorm(n,0,x2)

y3 <-rnorm(n,0,x3)

x<-c(rep(x1,n),rep(x2,n),rep(x3,n))

y <- c(y1,y2,y3)

z<-c(dnorm(y1,0,x1),dnorm(y2,0,x2),dnorm(y3,0,x3))

ma.dat <-data.frame(x=x,y=y,z=z)

var3d <-scatterplot3d(ma.dat,type="p",lwd=2,pch=46,box=FALSE,x.ticklabs=c(1,NA,2,NA,3),scale.y=1)

#均值时的，概念密度函数值

var3d$points3d(c(1,2,3),c(0,0,0),c(1/sqrt(2*pi),1/(2*sqrt(2*pi)),1/(3*sqrt(2*pi))),col="blue",type="h",pch=" ")

#平行X，y为均值，z为无限接近0的线

var3d$points3d(c(1,2,3),c(0,0,0),c(0,0,0),col="blue",type="l",pch="")

#查看均值相同，标准差不同的（1,2,3）正态分布情况

x1 <-1;x2 <-2;x3<-3

y1 <-rnorm(10000,0,x1)

y2 <-rnorm(10000,0,x2)

y3 <-rnorm(10000,0,x3)

x<-c(rep(x1,10000),rep(x2,10000),rep(x3,10000))

y <- c(y1,y2,y3)

z<-c(dnorm(y1,0,x1),dnorm(y2,0,x2),dnorm(y3,0,x3))

ma.dat <-data.frame(x=x,y=y,z=z)

scatterplot3d(ma.dat,type="p",lwd=2,pch=46,box=FALSE,xlim=c(1,2,3),scale.y=1)

方差不同正态分布.png (12.96KB)

3. 指数分布的QQ图

install.packages("qualityTools")

library(qualityTools)

x= rexp(100)

qqPlot(x,"exponential")

4. edit和fix函数的区别

只用edit(X)在窗口中编辑修改X，关闭窗口后X没有改变还是原来的值；

应该使用 Y<- edit(X)，把改变后的值赋值给Y，Y就好似修改后你想要的了；

使用fix(X)在窗口中编辑修改X，关闭窗口后X就是你修改后的值，不必使用Y <- fix(X);

5. 用xlsx包读取Excel文件

xlsx包可以读取、写入Excel2007/2003文件并支持格式的设置。简单地来说，将Excel读取为数据框，以及将数据框写入为Excel文件都不是问题，而更加强大的是它能处理 Excel中的格式，比如合并单元格，设置列的宽度，设置字体和颜色等等。

如果只需要基本的读取/写入操作，那么其中的read.xlsx()和write.xlsx()应该就能满足大部分的需求了，其用法也很简单，看看帮助文档就了解了。此外，还有两个相应的函数read.xlsx2()和write.xlsx2()，按作者的话说，这两个函数使用了不同的实现方式，效率上会更高一些。

除了基本的读写操作之外，xlsx包还能进行格式方面的设置。下面是一个简单的例子，说明了如何创建工作簿和工作表，如何操作单元格等。

ind =read.table(url("http://yixuan.cos.name/cn/wp-content/uploads/2012/01/ind.txt"),sep= "\t");

library(xlsx);

# Create a new workbook

wb = createWorkbook();

# Create a new sheet witha name

sheet1 = createSheet(wb,"第一页");

# Set the zoom ratio whenyou open the Excel file

setZoom(sheet1, 50, 100);

# Set the width of columns

setColumnWidth(sheet1,1:100, 2.8);

# Create rows

rows = createRow(sheet1,1:40);

# Create cells for eachrow

cells = createCell(rows,1:73);

# Merge the first row intoone cell

addMergedRegion(sheet1, 1,1, 1, 73);

# Create the style fortitle cell

title_cell_style =CellStyle(wb,alignment = Alignment(horizontal = "ALIGN_CENTER"),font= Font(wb, "blue", 50, isBold = TRUE));

# Create the style forblack cells

black_cell_style =CellStyle(wb,border = Border(),fill = Fill(foregroundColor="black"));

# Get the first row

first_row =getRows(sheet1, 1);

# Get the title cell fromfirst row

title_cell =getCells(first_row, 1)[[1]];

# Set the value of thetitle cell

setCellValue(title_cell,"Read/Write Excel!");

# Set the style of thetitle cell

setCellStyle(title_cell,title_cell_style);

# Set the style of blackcells

tmp = mapply(function(x,y) setCellStyle(cells[[x, y]], black_cell_style),ind[, 1] 3, ind[, 2] 5);

# Save the workbook into afile

saveWorkbook(wb,"test.xlsx");

xlsx包是目前功能最全的操作Excel的R包，它只依赖于Java环境和rJava、xlsxjars两个包，在多种平台下都能运行。

6. 绘制球体

library(rgl)

library(MASS)

library(evd)

N1=100;N2=100

A=seq(0,2*pi,length.out=N1)

B=seq(0,pi,length.out=N2)

r=2

x=outer(r*sin(B),cos(A),"*")

y=outer(r*sin(B),sin(A),"*")

z=r-cos(B)

#col=sample(rainbow(N2),N2)

plot3d(x,y,z,col=rainbow(100),axes=F,xlab="",ylab="",zlab="")

lat <- matrix(seq(90,-90, len=50)*pi/180, 50, 50, byrow=TRUE)

long <- matrix(seq(-180, 180, len=50)*pi/180, 50, 50)

r <- 6378.1 # radius of Earth in km

x <- r*cos(lat)*cos(long)

y <- r*cos(lat)*sin(long)

z <- r*sin(lat)

open3d()

persp3d(x, y, z, col="white",

texture=system.file("textures/worldsmall.png",package="rgl"),

specular="black", axes=FALSE,box=FALSE, xlab="", ylab="", zlab="",

normal_x=x, normal_y=y, normal_z=z)

7. 热图

#Create test matrix

test= matrix(rnorm(200), 20, 10)

test[1:10,seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3

test[11:20,seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2

test[15:20,seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4

colnames(test)= paste("Test", 1:10, sep = "")

rownames(test)= paste("Gene", 1:20, sep = "")

#Draw heatmaps

pheatmap(test)

8. 绘制有很多置信区间的图形

low<- rnorm(100, 14, 1)

up<- rnorm(100, 16, 1)

CIs<- cbind(low, up)

CIlow<- apply(CIs, 1, min)

CIup<- apply(CIs, 1, max)

CIs<- data.frame(low=CIlow, up=CIup)

plot(0,xlim=c(0, 100), ylim=c(min(CIs)-0.2, max(CIs)+0.2), type="n")

for(i in 1:nrow(CIs)) {

lines(x=rep(i, 2), y=c(CIs[i, 1], CIs[i, 2]))

points(x=rep(i, 2), y=c(CIs[i, 1], CIs[i, 2]), pch=16,col="blue")

}

abline(h=15)

9. 使用rvest包抓取网络数据示例

rvest实乃利器，RCurl和XML相形见绌。

以AutomatedData Collection with R中的第十章的数据下载为例，比较一下两者的优劣。

数据源自英国ZF的一个新闻网站，网址是：

https://www.gov.uk/government/announcements?keywords=&announcem

ent_type_option=press-releases&topics[]=all&departments[]=all&

world_locations[]=all&from_date=&to_date=01%2F07%2F2010

选择2010年7月1日以前英国ZF相关部门所发的新闻，总计749篇，包含在19个网页中。

数据提取的第一个任务就是将这749篇新闻的链接路径保存在本地硬盘上。浏览网页，发现除了第一个网页外，其它18个网页都是有规律的，这样提取网页标题就分两步走：

library(rvest) #抓取网页数据

library(stringr) #处理文本

# 第一页网址

url ='https://www.gov.uk/government/announcements?keywords=&announcement_type_option=press-releases&topics[]=all&departments[]=all&world_locations[]=all&from_date=&to_date=01%2F07%2F2010'

first = url %>% html()%>% html_nodes("h3 a") %>% html_attrs()

# h3 a是使用查看器在网页的相应标题上停留给出的节点位置，非常方便快捷。

first =as.character(first)

# 其它18页网址

others =sapply(2:19,function(i)str_c("https://www.gov.uk/government/announcements?announcement_type_option=press-releases&departments%5B%5D=all&from_date=&keywords=&page=",i,"&to_date=01%2F07%2F2010&topics%5B%5D=all&world_locations%5B%5D=all"))

myfun = function(x) {

b = others[x] %>%html() %>% html_nodes("h3 a") %>% html_attrs()

as.character(b)

}

doc = sapply(1:18,myfun)

# 第一页加上18页总计19页。

dat = c(first,unlist(doc))

dat =str_c("https://www.gov.uk",dat)

#建立一个目录，把网址数据保留起来

dir.create("F:/Press_Releases")

for(i in 1:length(dat))write(dat, file = str_c("F:/Press_Releases/", i, ".html"))

# 验证一下

length(list.files("F:/Press_Releases"))

[1] 749

list.files("F:/Press_Releases")[1:3]

[1] "1.html""10.html" "100.html"

10. 对向量每n个数进行某种操作

问题:

zz<-rnorm(300)

c=0

for (i in 1:(300-2))

{

c[i]=zz[i]+zz[i+1]+zz[i+2]

}

要求z的元素每三个加在一起。

方法一:

可以用filter函数实现,下列代码表示

filter(zz, rep(1, 3))

方法二:

用zoo包的rollapply函数

rollapply(zz,3,sum)

11. 把factor转换成数值

(ff <-factor(substring("statistics", 1:10, 1:10), levels = letters))

#[1] s t a t i s t i c s

#Levels: a b c d e f g h i j k l m n o p q r s tu v w x y z

as.integer(ff)

#[1] 19 20 1 20 9 19 20 9 3 19

12. 两个矩阵相乘的符号

*是表示两个矩阵中对应的元素的乘积，

%*%表示矩阵理论中的两个矩阵的乘积，要求第一个矩阵的行数与第二个矩阵列数相等。

13. 用scan函数让用户输入参数

user_input<- scan()

#1:20

#2:30

#3:

#Read2 items

user_input

#[1]20 30

14. 在ggplot2中使用windows字体

很多杂志在发表文章的时候要求用Helvetica、Arial或者TimesNew Roman的字体，而R软件做图的时候并没有提供这些字体。今天在研究这个问题的时候发现一个讨论贴很好：

http://stackoverflow.com/questions/4094094/modifying-fonts-in-ggplot2

使用windowsFonts这个函数就可以达到要求，如下是我自己为了画线性回归而写的一个theme:

library(ggplot2)

windowsFonts(HEL=windowsFont("HelveticaCE 55 Roman"),RMN=windowsFont("Times NewRoman"),ARL=windowsFont("Arial"))

old_theme<- theme_update(plot.title=theme_text(family="ARL", size=18,face="bold",colour="black"),axis.title.x=theme_text(family="HEL",size=15,colour="black"),axis.title.y=theme_text(family="HEL",size=15, angle=90,colour="black"),axis.text.x=theme_text(family="RMN",size=11,colour="black"),axis.text.y=theme_text(family="RMN",size=11,colour="black"),axis.ticks=theme_segment(colour="black"),panel.grid.major=theme_blank(),panel.grid.minor=theme_blank(),panel.background=theme_blank(),axis.line=theme_segment(size=1))

运行如上的程序，直接调用函数画图：

运行的效果：

15. 求逆矩阵

如果X是个矩阵，则solve(X)就是它的逆矩阵。

SimonLiu

关注

1
点赞
踩
13

收藏

觉得还不错? 一键收藏
0
评论
R语言基础编程技巧汇编 - 6

1. 在图形上不显示科学计数法x y plot(x, y,axes = FALSE)x1 y1 axis(1,at = x,label=x1)axis(2,at = y,label=y1) 2. 立体坐标图require(scatterplot3d) x1 n y1 y2 y3 xy z
复制链接

扫一扫