###为一级标题,前后各空一行;##为二级标题,前空一行;#为三级标题,不空行。
# 查看与设置工作区域
getwd()
setwd("D:\\R")
# 下载安装并加载相关库
install.packages("readxl")
library(readxl)
# 清除所有变量
rm(list = ls())
### 任务一:数据整合
## 整合第一只股票
# 读取第一个文件
stock_000031 <- read_xlsx("D:\\数据\\000031\\股票行情.xlsx",col_names = T)
# 逐一读取剩下的30个文件并拼接到第一个文件中
for(i in c(1:30)){
temp <- read_xlsx(paste0("D:\\数据\\000031\\股票行情(",i,").xlsx"),col_names = T)
stock_000031 <- rbind(stock_000031,temp)
}
## 整合第二只股票
# 读取第一个文件
stock_000931 <- read_xlsx("D:\\数据\\000931\\股票行情.xlsx",col_names = T)
# 逐一读取剩下的30个文件并拼接到第一个文件中
for(i in c(1:30)){
temp <- read_xlsx(paste0("D:\\数据\\000931\\股票行情(",i,").xlsx"),col_names = T)
stock_000931 <- rbind(stock_000931,temp)
}
### 任务二:统计分析
## 数据检查与预处理
# 检查缺失值个数
table(!is.na(stock_000031))
table(!is.na(stock_000931))
# 查看数据集的维度
dim(stock_000031)
dim(stock_000931)
# 按日期升序
stock_000031 <- stock_000031[order(stock_000031$交易日期),]
stock_000931 <- stock_000931[order(stock_000931$交易日期),]
# 改变日期格式与清除千分位符
for (i in 1:149) {
stock_000031$交易日期[i] <- gsub("2023-","",stock_000031$交易日期[i])
stock_000031$`成交量(万股)`[i] <- gsub(",","",stock_000031$`成交量(万股)`[i])
stock_000031$`成交金额(万元)`[i] <- gsub(",","",stock_000031$`成交金额(万元)`[i])
}
for (i in 1:149) {
stock_000931$交易日期[i] <- gsub("2023-","",stock_000931$交易日期[i])
stock_000931$`成交量(万股)`[i] <- gsub(",","",stock_000931$`成交量(万股)`[i])
stock_000931$`成交金额(万元)`[i] <- gsub(",","",stock_000931$`成交金额(万元)`[i])
}
# 命名变量
data_dyc <- stock_000031$交易日期
data_zgc <- stock_000931$交易日期
price_dyc <- as.numeric(stock_000031$`今收`)
price_zgc <- as.numeric(stock_000931$`今收`)
limit_dyc <- as.numeric(stock_000031$`涨跌幅(%)`)
limit_zgc <- as.numeric(stock_000931$`涨跌幅(%)`)
quantity_dyc <- as.numeric(stock_000031$`成交量(万股)`)
quantity_zgc <- as.numeric(stock_000931$`成交量(万股)`)
money_dyc <- as.numeric(stock_000031$`成交金额(万元)`)
money_zgc <- as.numeric(stock_000931$`成交金额(万元)`)
## 以股票的每日收盘价为例
# 均值
mean(price_dyc)
mean(price_zgc)
# 频数
table(price_dyc)
table(price_zgc)
# 众数
names(table(price_dyc))[which.max(table(price_dyc))]
names(table(price_zgc))[which.max(table(price_zgc))]
# 最大值
max(price_dyc)
max(price_zgc)
# 最小值
min(price_dyc)
min(price_zgc)
# 中位数
median(price_dyc)
median(price_zgc)
# 极差
max(price_dyc)-min(price_dyc)
max(price_zgc)-min(price_zgc)
range(price_dyc)[2]-range(price_dyc)[1]
range(price_zgc)[2]-range(price_zgc)[1]
# 方差
var(price_dyc)
var(price_zgc)
# 标准差
sqrt(var(price_dyc))
sqrt(var(price_zgc))
sd(price_dyc)
sd(price_zgc)
# 分位数
quantile(price_dyc,probs = 0.25)
quantile(price_dyc,probs = 0.5)
quantile(price_dyc,probs = 0.75)
quantile(price_zgc,probs = 0.25)
quantile(price_zgc,probs = 0.5)
quantile(price_zgc,probs = 0.75)
# 最小值、25%分位数、中位数、均值、75%分位数、最大值
summary(price_dyc)
summary(price_zgc)
# 每日收盘价与成交量、成交金额的相关系数
cor(price_dyc,quantity_dyc)
cor(price_dyc,money_dyc)
cor(price_zgc,quantity_zgc)
cor(price_zgc,money_zgc)
### 任务三:数据可视化
## 时间序列图
# 保存图像
png("时间序列图.png")
# 调整画布
par(mfrow=c(2,1),mar=c(4,5,1.5,3))
# 画第一只股票(以每日收盘价为例)
plot(price_dyc,type = "l",
xlim=c(1,149),ylim=c(0.8*mean(price_dyc),1.2*mean(price_dyc)),
xaxt = "n",las=1,xlab = "",ylab = "")
lines(x=1:149,y=rep(mean(price_dyc),149))
text(x=10,y=mean(price_dyc),labels="均价")
lines(x=1:149,y=rep(1.1*mean(price_dyc),149),col="red")
text(x=10,y=1.1*mean(price_dyc),labels="涨10%",col="red")
lines(x=1:149,y=rep(0.9*mean(price_dyc),149),col="green")
text(x=10,y=0.9*mean(price_dyc),labels="跌10%",col="green")
title(xlab = "日期",ylab = "价格",line = 2.1)
title(main="大悦城股价变化趋势",line = 0.5)
axis(1,seq(1,149),data_dyc)
# 画第二只股票(以每日收盘价为例)
plot(stock_000931$今收,type = "l",
xlim=c(1,149),ylim=c(0.8*mean(price_zgc),1.2*mean(price_zgc)),
xaxt = "n",las=1,xlab = "",ylab = "")
lines(x=1:149,y=rep(mean(price_zgc),149))
text(x=10,y=mean(price_zgc),labels="均价")
lines(x=1:149,y=rep(1.1*mean(price_zgc),149),col="red")
text(x=10,y=1.1*mean(price_zgc),labels="涨10%",col="red")
lines(x=1:149,y=rep(0.9*mean(price_zgc),149),col="green")
text(x=10,y=0.9*mean(price_zgc),labels="跌10%",col="green")
title(xlab = "日期",ylab = "价格",line = 2.1)
title(main="中关村股价变化趋势",line = 0.5)
axis(1,1:149,data_zgc)
# 关闭图像
dev.off()
## 直方图
# 保存图像
png("直方图.png")
# 调整画布
par(mfrow=c(2,2),mar=c(4,5,1.5,3))
# 画第一只股票(以成交量和成交金额为例)
hist(quantity_dyc,labels = T,las=1,xlab = "",ylab = "",main="",
xlim=c(1,10000),ylim=c(1,100))
title(main="大悦城成交量分布图",line = 0.5)
title(xlab = "成交量(万股)",ylab = "天数",line = 2.1)
hist(money_dyc,labels = T,las=1,xlab = "",ylab = "",main="",
xlim=c(1,35000),ylim=c(1,100))
title(main="大悦城成交金额分布图",line = 0.5)
title(xlab = "成交金额(万元)",ylab = "天数",line = 2.1)
# 画第二只股票(以成交量和成交金额为例)
hist(quantity_zgc,labels = T,las=1,xlab = "",ylab = "",main="",
xlim=c(1,5000),ylim=c(1,100))
title(main="中关村成交量分布图",line = 0.5)
title(xlab = "成交量(万股)",ylab = "天数",line = 2.1)
hist(money_zgc,labels = T,las=1,xlab = "",ylab = "",main="",
xlim=c(1,30000),ylim=c(1,150))
title(main="中关村成交金额分布图",line = 0.5)
title(xlab = "成交金额(万元)",ylab = "天数",line = 2.1)
# 关闭图像
dev.off()
## 饼图
# 保存图像
png("饼图.png")
# 调整画布
par(mfrow=c(1,2),mar=c(4,4,5,3))
# 画第一只股票(以涨跌幅为例)
rise <- 0
flat <- 0
fall <- 0
for(i in 1:149){
if (limit_dyc[i]>0){
rise = rise + 1
}else if(limit_dyc[i]==0){
flat = flat + 1
}else{
fall = fall + 1
}
}
var1 <- c(rise,flat,fall)
names1 <- c("rise","flat","fall")
var2 <- round(var1/sum(var1)*100)
names2 <- paste0(names1,":",var2,"%")
pie(var1,labels = c(names2),cex=0.9,
radius=1,edges = 5000,main = "",clockwise = T)
title(main="大悦城涨跌情况",line = 0.1)
# 画第二只股票(以涨跌幅为例)
rise <- 0
flat <- 0
fall <- 0
for(i in 1:149){
if (limit_zgc[i]>0){
rise = rise + 1
}else if(limit_zgc[i]==0){
flat = flat + 1
}else{
fall = fall + 1
}
}
var1 <- c(rise,flat,fall)
names1 <- c("rise","flat","fall")
var2 <- round(var1/sum(var1)*100)
names2 <- paste0(names1,":",var2,"%")
pie(var1,labels = c(names2),cex=0.9,
radius=1,edges = 5000,main = "",clockwise = T)
title(main="中关村涨跌情况",line = 0.1)
# 关闭图像
dev.off()
R语言与金融计量
最新推荐文章于 2024-06-11 17:35:52 发布