读书笔记_第二十三章

最新推荐文章于 2020-08-17 21:59:01 发布

weixin_41560991

最新推荐文章于 2020-08-17 21:59:01 发布

阅读量394

点赞数

分类专栏： R R 语言实战文章标签： R R 语言实战读书笔记

本文链接：https://blog.csdn.net/weixin_41560991/article/details/86741491

版权

R 同时被 2 个专栏收录

25 篇文章 4 订阅

订阅专栏

R 语言实战

23 篇文章 1 订阅

订阅专栏

#lattice 高级绘图
#像ggplot2一样，lattice图形有它自己的语法，提供了对基础图形的替代方案
#lattice包提供了用于可视化单变量，多变量数据的一整套图形系统
#许多用户转向使用lattice包是因为它能很容易的生成网格图形
#网格图形能够展示变量的分布或变量之间的关系，每幅图代表了一个或多个变量的水平

#lattice提供单因素图：点图，核密度图，直方图，条形图，箱线图
#提供二元图：散点图，条形图，平行箱线图
#多元图：3D图，散点图矩阵

#y~x|A*B
#小写字母代表数值型变量
#大写字母代表分类型变量(因子)
#竖线左侧，y~x，其中y,x代表主要变量，主要变量对应x,y轴定义
#竖线右侧，A*B,其中A,B代表调节变量

#对于单变量图,用~x,代替y~x,实际一个轴有用
#对于3D图，用z~x*y代替y~x，实际3个轴有用
#对于多变量图，用数据框来代替y~x.
#注意：调节变量总是可选的

#~x|A,表示因子A每个水平的数值变量x
#y~x|A*B,表示在给定因子A和B的水平后，数值变量y和x的关系，A*B表示不同水平的所有组合
#A~x,表示在纵轴上的分类变量A,和横轴上的数值变量x
#~x,表示数值型变量x

#范例
library(lattice)

str(singer) #voice.part对应因子类型

#直方图
#~height|voice.part
#height对应x轴，独立变量
#voice.part，调节变量
#在voice.part的不同因子类型下，height变量的直方图
histogram(~height|voice.part,
data=singer,
main="Distribution of Heights by Voice Pitch",
xlab="Height (inches)")
#结果解读：
#调节变量的每个水平生成一个独立的面板
#每个面板的条带区域，会提供一个标签

mygraph<- densityplot(~height|voice.part,
data=singer,
main="Distribution of Heights by Voice Pitch",
xlab="Height (inches)")
mygraph #图形对象展示
plot(mygraph) #图形对象展示

#update可以调整显示图形对象
#mygraph不变
#newgraph为新图片对象
newgraph <- update(mygraph,
col="red",
pch=16,
cex=0.8,
jitter=0.05,
lwd=2)

attach(mtcars)

#数据准备
gear <- factor(gear,levels=c(3,4,5),
labels=c("3 gears","4 gears","5 gears"))
cyl <- factor(cyl,levels=c(4,6,8),
labels=c("4 cylinders","6 cylinders","8 cylinders"))

#核密度图
#~mpg，单变量图
densityplot(~mpg,
main="Density Plot",
xlab="Miles per Gallon")

#~mpg|cyl，基于cyl的不同因子值，画对应的核密度图
densityplot(~mpg|cyl,
main="Density Plot by Number of Cylinders",
xlab="Miles per Gallon")
#箱线图
#cyl~mpg|gear
#cyl对应y轴，mpg对应x轴，gear的不同因子值，画对应的箱线图
bwplot(cyl~mpg|gear,
main="Box Plots by Cylinders and Gears",
xlab="Miles per Gallon")

#散点图
#mpg~wt|cyl*gear
#mpg对应y轴，wt对应x轴，cyl*gear表示两种因子组合的各种情况
xyplot(mpg~wt|cyl*gear,
main="Scatter Plots by Cylinders and Gears",
xlab="Car Weight",
ylab="Miles per Gallon")

#3D散点图
#mpg对应纵向轴
#qsec对应底面的一个轴，与mpg轴连接
#wt对应底面的另一个轴，不与mpg轴连接
#cyl不同的因子值，对应不同的3D散点图
cloud(mpg~wt*qsec|cyl,
main="3D Scatter Plots by Cylinders")

#点图
dotplot(cyl~mpg|gear,
main="Dot Plots by Number of Gears and Cylinders",
xlab="Miles Per Gallon")

#散点图矩阵
#用数据框来代替y~x
splom(mtcars[c(1,3,4,5,6)],
main="Scatter Plot Matrix for mtcars Data")

detach(mtcars)

#调节变量
#lattice绘图的一个强大特征是可以增加调节变量
#如果存在一个调节变量，就可以绘制出对应每个水平的面板图
#如果存在两个调节变量，就可以绘制给定两个变量每个水平的任意组合的面板图
#包括两个以上调节变量的图就不怎么有用了

#通常情况下，调节变量是因子

#对于连续变量应该如何操作？
#方法一，利用cut()函数将连续的变量转化成离散的变量
#方法二，利用lattice包将数据转换成shingle的数据结构，分成一系列可能重叠的范围

#myshingle <- equal.count(x,number=n,overlap=proportion)
#将连续变量x分成n个间隔，重叠比例为proportion

displacement <- equal.count(mtcars$disp,number=3,overlap=0)
class(displacement) #返回shingle
displacement
#原值显示于Data中
#Intervals:
# min max count
#1 71.0 145.1 11
#2 146.6 275.9 10
#3 300.9 472.1 11
plot(displacement) #图形显示，3个分隔在range程度没有任何重叠

xyplot(mpg~wt|displacement,data=mtcars,
main="Miles per Gallon vs. Weight by Engine Displacement",
xlab="Weight",
ylab="Miles per Gallon",
layout=c(3,1),#布局显示为一行三列
aspect=1.5 #纵横比，高/宽
)

#面板函数

xyplot(mpg~wt|displacement,data=mtcars)
xyplot(mpg~wt|displacement,data=mtcars,pannel=panel.xyplot) #两者等价

#范例自定义面板函数

mypanel <- function(x,y){
#pch=19,指定填充圆
panel.xyplot(x,y,pch=19)
#panel.rug(x,FALSE),把地毯加到横轴
#panel.rug(FALSE,y),把地毯加到纵轴
#panel.rug(x,y),把地毯加到横轴,纵轴
panel.rug(x,y)
#panel.grid(),添加水平和垂直的网格线
panel.grid(h=-1,v=-1)
#panel.lmline(),增加回归线
#col="red",指定颜色为红色
#lwd=1,指定标准厚度
#lty=2,指定线类型
#help(panellmline)
panel.lmline(x,y,col="red",lwd=1,lty=2)
}

xyplot(mpg~wt|displacement,data=mtcars,
layout=c(3,1),
aspect=1.5,
main="Miles per Gallon vs. Weight by Engine Displacement",
xlab="Weight",
ylab="Miles per Gallon",
panel=mypanel)

#范例自定义面板函数和额外选项的xyplot()
library(lattice)
mtcars$transmission <- factor(mtcars$am,levels=c(0,1),
labels=c("Automatic","Manual"))
panel.smoother <- function(x,y){
#所有操作都是对所有面板产生的
panel.grid(h=-1,v=-1)#画网格
panel.xyplot(x,y)#画散点
panel.loess(x,y)#画非参数拟合曲线
panel.abline(h=mean(y),lwd=2,lty=2,col="darkgreen")#画每个面板中y的均值，而不是整体样本的均值
}

xyplot(mpg~disp|transmission,data=mtcars,
#呈现大小为默认字体80%的红色刻度注释，包括坐标轴数字和刻度线
#scale=list(x=list(),y=list())来指定横轴和纵轴的独立选项
scales=list(cex=0.8,col="red"),
panel=panel.smoother,
xlab="Displacement",
ylab="Miles per Gallon",
main="MPG vs Displacement by Transmission Type",
sub="Dotted lines are Group Means",
aspect=1)

#分组变量
library(lattice)
mtcars$transmission <- factor(mtcars$am,levels=c(0,1),
labels=c("Automatic","Manual"))

#group=transmission,屏蔽当前语句，表示所有组打混，仅画一条密度线
#group=transmission,打开当前语句，表示所有组单独一条密度线，
#但不分隔成不同面板，出现在同一个面板中
densityplot(~mpg,data=mtcars,
group=transmission,
main="MPG Distribution by Transmission Type",
xlab="Miles per Gallon",

#图例不会默认出现
#auto.key=TRUE,创建一个基本的图例，并把它放在图的上方
#auto.key=TRUE

#自定义，参考常规显示规格
#将图例放在图的右侧，并添加一个图例标题
#一组显示为一行，所有组显示成一列，纵向排列
auto.key=list(space="right",columns=1,title="Transmission")
)

#范例带有分组变量和自定义图例的核密度估计
library(lattice)
mtcars$transmission <- factor(mtcars$am,levels=c(0,1),
labels=c("Automatic","Manual"))

colors <- c("red","blue")#指定颜色集向量
lines <- c(1,2)#指定线条类型向量
points <- c(16,17)#指定点的类型向量
#每个向量的第一个元素应用到分组变量的第一个水平中，
#第二个元素应用到第二个水平，以此类推

#自定义图例
key.trans <- list(tile="Transmission",#图例标题
space="bottom",#图例位置
columns=2,#将图例中组标识，一组显示为一列，所有组显示为一行，横向排列
text=list(levels(mtcars$transmission)),#文本标注
points=list(pch=points,col=colors),#点类型
lines=list(col=colors,lty=lines),#线类型
cex.title=1,#标记大小
cex=0.9 #标记大小
)

densityplot(~mpg,data=mtcars,
group=transmission,
main="MPG Distribution by Transmission Type",
xlab="Miles per Gallon",
pch=points,
lty=lines,
col=colors,
lwd=2,
jitter=0.005,
key=key.trans
)

#带有分组和调节变量以及自定义图例的xyplot函数
library(lattice)
colors <- "darkgreen"
symbols <- c(1:12) #对应levels(CO2$Plant)，共计12个变量
linetype <- c(1:3)

key.species <- list(title="Plant",
space="right",
text=list(levels(CO2$Plant)),
#点标识，对应12个向量
#颜色标识，仅包含1个变量，不够，进行重复使用
points=list(pch=symbols,col=colors)
)
#Type*Treatment，表示Type,以及Treatment下的所有水平的可能组合
#levels(CO2$Type),返回 "Quebec" "Mississippi"，共计2组
#levels(CO2$Treatment)，返回"nonchilled" "chilled" ，共计2组
#unique(CO2$conc),返回 95 175 250 350 500 675 1000，表示其中二氧化碳浓度
xyplot(uptake~conc|Type*Treatment,data=CO2,
group=Plant,
type="o",
#当symbols对象缺失，或者拼写错误时
#报错error using packet 3 attempt to replicate an object of type closrue
pch=symbols,
col=colors,
lty=linetype,
#主标题
#\n，表示将标题分成两行显示
main="Carbon Dioxide Uptake\nin Grass Plants",
#frac(x, y) x over y
#frac("umol","m^2"),不可以单独执行，类似于umol/m^2
#italic(frac("umol","m^2")) 斜体字
#在x轴，y轴标签上书写数学表达式
ylab=expression(paste("Uptake",bgroup("(",italic(frac("umol","m^2")),")"))),
xlab=expression(paste("Concentration",bgroup("(",italic(frac(mL,L)),")"))),
sub="Grass Species:Echinochloa crus-galli",#副标题
key=key.species
)
#结果解释
#描述了12种植物levels(CO2$Plant)，在7种unique(CO2$conc)二氧化碳浓度下，二氧化碳的吸收率uptake
#6种植物来自Quebec
#6种植物来自Mississippi
#每个产地有3种植物在冷藏条件下研究
#3种在非冷藏条件下研究

#图形参数
#par()函数用来查看并设置默认的，原生图形系统的图形参数
#但对lattice图形不起作用
#lattice 函数使用的图形系统默认设置包含在一个大的列表对象中，
#可以通过trellis.par.get()函数获得并通过trellis.par.set()函数更改
#show.settings()函数能直观的展示当前的图形设置

#查看默认的当前的图形设置效果
show.settings()
#保存成一个大列表对象
mysettings <- trellis.par.get()
#查看列表的成分
names(mysettings)

mysettings$superpose.symbol
#$pch
#[1] 1 1 1 1 1 1 1
#默认符号为1，即图中一个组变量的图中的点
mysettings$superpose.symbo$pch

#修改默认变量
#两级列表
#使用符号1(开环)，代表分组变量的第一个水平
#使用符号2(开三角)，代表分组变量的第二个水平
#。。。。直到，被定义的第10个级别的分组变量
#在图形设备关闭之前，这些变化时一直起作用的
mysettings$superpose.symbo$pch <- c(1:10)
show.settings()# 仅赋值，并未完成修改
trellis.par.set(mysettings)

#自定义图形条带

#范例,默认状态
#第一个调节变量，条带显示为桃红色
#第二个调节变量，条带显示为浅绿色
#第三个调节变量，条带显示为浅蓝色
histogram(~height|voice.part,data=singer,
main="Distribution of Height by voice Pitch",
xlab="Height (inches)"
)

#bg="lightgrey",条带变成浅灰色

#par.strip.text控制条带文本的外观
#col="black"，文本变成黑色
#cex=0.8，缩小20%
#font=3，字体为斜体
#1 正常字体
#2 粗体
#3 斜体
#4 粗斜体
histogram(~height|voice.part,data=singer,
strip=strip.custom(bg="lightgrey",
par.strip.text=list(col="black",
cex=0.8,
font=3)),
main="Distribution of Height by voice Pitch",
xlab="Height (inches)"
)

#通过mysettings读取或者修改背景色
mysettings$strip.background$col
#返回[1] "#ffe5cc" "#ccffcc" "#ccffff" "#cce6ff" "#ffccff"
#[6] "#ffcccc" "#ffffcc"

#设置第一个调节变量的条带背景色为浅灰色，第二个为浅绿色
mysettings$strip.background$col <- c("lightgrey","lightgreen")
trellis.par.set(mysettings)

#页面布局

#图形对象准备
graph1 <- histogram(~height|voice.part,data=singer,
main="Heights of Choral Singers by Voice Part")
graph2 <- bwplot(height~voice.part,data=singer)

#范例一 split函数
#split(x,y,nx,ny)
#split()将一个页面分成指定数量的行和列，并把图放到结果矩阵的特定单元格中
#x,y表示图形存放于nx*ny数组单元格的列位置，行位置
#nx,表示存在的列数
#ny,表示存在的行数

#将页面分成一列(nx=1),和两行(ny=2)
#并把第一幅图放在了第一列，第一行
plot(graph1,split=c(1,1,1,2))

#同样将页面分成一列(nx=1),和两行(ny=2)
#并将第二幅图放在了第一列，第二行
#plot函数默认从一个新页面开始，newpage选项抑制新的页面产生
plot(graph2,split=c(1,2,1,2),newpage=FALSE)
#将第一幅图直接放在第二幅图的上面

#范例二 position函数
#页面的坐标系是x轴和y轴都从0到1的矩形，原点在左下角(0,0)
#c(xmin,ymin,xmax,ymax)

#从左下角原点(0,0)开始
#x的返回0~1，铺满横轴
#y的返回0.3~1，纵轴沿0.3开始，接近graph2,纵向向上叠加
plot(graph1,position=c(0,0.3,1,1))

#从左下角原点(0,0)开始
#x的返回0~1，铺满横轴
#y的返回0~0.3，纵轴只占领小块，接近原点
plot(graph2,position=c(0,0,1,0.3),newpage=FALSE)

#改变面板顺序

#范例一
levels(singer$voice.part)
#返回
#[1] "Bass 2" "Bass 1" "Tenor 2" "Tenor 1"
#[5] "Alto 2" "Alto 1" "Soprano 2" "Soprano 1"
#名称索引和位置索引意思一致

#面板原始顺序，参考levels(singer$voice.part)
#底下一行，1"Bass 2" ,2"Bass 1",3 "Tenor 2",4"Tenor 1"
#上一行， 5"Alto 2",6"Alto 1",7"Soprano 2",8"Soprano 1"
histogram(~height|voice.part,data=singer,
main="Heights of Choral Singers by Voice Part")

#面板重新排序以后
#底下一行，2"Bass 1", 4"Tenor 1" 6"Alto 1",8"Soprano 1"
#上一行， 1"Bass 2" ,3 "Tenor 2",5"Alto 2",,7"Soprano 2"
histogram(~height|voice.part,data=singer,
index.cond=list(c(2,4,6,8,1,3,5,7)),
main="Heights of Choral Singers by Voice Part")

#范例二
#带有分组和调节变量以及自定义图例的xyplot函数
library(lattice)
colors <- "darkgreen"
symbols <- c(1:12) #对应levels(CO2$Plant)，共计12个变量
linetype <- c(1:3)

key.species <- list(title="Plant",
space="right",
text=list(levels(CO2$Plant)),
#点标识，对应12个向量
#颜色标识，仅包含1个变量，不够，进行重复使用
points=list(pch=symbols,col=colors)
)

#levels(CO2$Type)
#[1] "Quebec" "Mississippi"
#1 "Quebec" ,2"Mississippi"

#levels(CO2$Treatment)
#返回[1] "nonchilled" "chilled"
#1 "nonchilled" ，2"chilled"
xyplot(uptake~conc|Type*Treatment,data=CO2,
group=Plant,
type="o",
#当symbols对象缺失，或者拼写错误时
#报错error using packet 3 attempt to replicate an object of type closrue
pch=symbols,
col=colors,
lty=linetype,
#主标题
#\n，表示将标题分成两行显示
main="Carbon Dioxide Uptake\nin Grass Plants",
#frac(x, y) x over y
#frac("umol","m^2"),不可以单独执行，类似于umol/m^2
#italic(frac("umol","m^2")) 斜体字
#在x轴，y轴标签上书写数学表达式
ylab=expression(paste("Uptake",bgroup("(",italic(frac("umol","m^2")),")"))),
xlab=expression(paste("Concentration",bgroup("(",italic(frac(mL,L)),")"))),
sub="Grass Species:Echinochloa crus-galli",#副标题
key=key.species

)
#结果解释
#注意原始：
#list(c(1,2),(1,2))
#底行(1,1),(2,1)
#上一行(1,2),(2,2)

xyplot(uptake~conc|Type*Treatment,data=CO2,
group=Plant,
type="o",
#当symbols对象缺失，或者拼写错误时
#报错error using packet 3 attempt to replicate an object of type closrue
pch=symbols,
col=colors,
lty=linetype,
#主标题
#\n，表示将标题分成两行显示
main="Carbon Dioxide Uptake\nin Grass Plants",
#frac(x, y) x over y
#frac("umol","m^2"),不可以单独执行，类似于umol/m^2
#italic(frac("umol","m^2")) 斜体字
#在x轴，y轴标签上书写数学表达式
ylab=expression(paste("Uptake",bgroup("(",italic(frac("umol","m^2")),")"))),
xlab=expression(paste("Concentration",bgroup("(",italic(frac(mL,L)),")"))),
sub="Grass Species:Echinochloa crus-galli",#副标题
key=key.species,
index.cond=list(c(1,2),c(2,1))

)
#list(c(1,2),(2,1))
#底行(1,2),(2,2)
#上一行(1,1),(2,1)

weixin_41560991

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
读书笔记_第二十三章

#lattice 高级绘图#像ggplot2一样，lattice图形有它自己的语法，提供了对基础图形的替代方案#lattice包提供了用于可视化单变量，多变量数据的一整套图形系统#许多用户转向使用lattice包是因为它能很容易的生成网格图形#网格图形能够展示变量的分布或变量之间的关系，每幅图代表了一个或多个变量的水平#lattice提供单因素图：点图，核密度图，直方图，条形图，箱线图...
复制链接

扫一扫

专栏目录