数据常见图表可视化(R语言)

注:R语言中<-和=是有一定区别的,<-是全局作用域 =是局部作用域

读取数据集 想复现的同学私聊我获取数据集

cars2 <- read.csv(file = "../cars2.txt",stringsAsFactors=TRUE)

汽车重量直方图

par(mfrow=c(1,1))
hist(cars2$weight,breaks=30,xlim=c(0,5000),col="blue",border="black",ylim=c(0,40),xlab="Weight",ylab="Counts",main="Histogram of Car Weights")
box(which="plot",lty="solid",col="black")

 创建散点图

plot(cars2$weight,cars2$mpg,xlim=c(0,5000),ylim=c(0,600),xlab="Weight",ylab="MPG",main="scatterplot of MPG by Weight",type="p",pch=16,col="blue")
points(cars2$weight,cars2$mpg,type="p",col="black")

 算箱图值

summary(cars$weight)#min median mean max Q1 Q2
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1613    2246    2835    3005    3664    4997
mean(cars$weight)
median(cars$weight)
length(cars$weight)
sd(cars$weight)
summary(cars$weight)#min median mean max Q1 Q2

变换 min-max规范化;Z-score标准化;小数定标规范化

#min-max规范化

mi <- min(cars$weight)
ma <- max(cars$weight)
minmax.weight <- (cars$weight-mi)/(ma-mi)
minmax.weight

#Z-score标准化

m <- mean(cars$weight)
s <- sd(cars$weight)
z.weight <- (cars$weight-m)/s
z.weight

#小数定标规范化

d.weight <- cars$weight/(10^4)

#并排柱状图

par(mfrow=c(1,2))
hist(cars$weight,breaks=20,xlim=c(1000,5000),main="Histogram of Weight",xlab="Weight",ylab="Counts")
box(which="plot",lty="solid",col="black")
hist(z.weight,breaks=20,xlim=c(-2,3),main="Histogram Z-score of Weight",xlab="Z-score of Weight",ylab="Counts")
box(which="plot",lty="solid",col="black")

 #倾斜程度

(3*(mean(cars$weight)-median(cars$weight)))/sd(cars$weight)
#0.5998638>0
#右倾数据 正倾斜

# 正态转换 (平方根 自然对数 逆平方根)

(3*(mean(cars$weight)-median(cars$weight)))/sd(cars$weight)
sqrt.weight <- sqrt(cars$weight)
sqrt.weight_skew <- (3*(mean(sqrt.weight)-median(sqrt.weight)))/sd(sqrt.weight)
ln.weight <-log(cars$weight)
ln.weight_skew <- (3*(mean(ln.weight)-median(ln.weight)))/sd(ln.weight)
invsqrt.weight <- 1/sqrt(cars$weight)
invsqrt.weight_skew <- (3*(mean(invsqrt.weight)-median(invsqrt.weight)))/sd(invsqrt.weight)

#正态分布直方图

par(mfrow=c(1,1))
x <- rnorm(1000000,mean=mean(invsqrt.weight),sd=sd(invsqrt.weight))
hist(invsqrt.weight,breaks=30,xlim=c(0.0125,0.0275),col="lightblue",prob=TRUE,boder="black",xlab="Inverse Square Root of Weight",ylab="Counts",main="Histogram of Inverse Square Root of Weight")
box(which="plot",lty="solid",col="black")
lines(density(x),col="red")

 #正态Q-Q图

qqnorm(invsqrt.weight,datax=TRUE,col="red",ylim=c(0.01,0.03),main="Normal Q—QPlot Inverse Square Root of Weight")
qqline(invsqrt.weight,col="blue",datax=TRUE)

 #数据逆变换

x <- cars$weight[1]
y <- 1/sqrt(x)
de_x <- 1/y^2
x
y
de_x
> x
[1] 4209
> y
[1] 0.01541383
> de_x
[1] 4209

#通过某索引字段排列表

cars[order(cars$mpg),]

 #检查重复记录

#检查重复记录
duplicated(cars)
#记录重复记录
anyDuplicated(cars)

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

夏曦儿

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值