【金融统计】
数据获取并计算收益
检索股票数据的一种方法是可以使用quantmod包中的getSymbols命令。这种方式下主要受到网络条件的影响,偶尔也可能不能成功获取。
#设置浏览目录
setwd("/Users/zz/Desktop/金融统计学/第一次上机作业")
library(quantmod)
#数据获取
data.AMZN<-getSymbols("AMZN",from="2015-12-31",to="2022-6-30",auto.assign=FALSE)
#判断数据类型
class(data.AMZN)
[1] "xts" "zoo"
#画图
plot(data.AMZN$AMZN.Close)
上图为AMZN从2015年12月31日到2022年6月19日的收盘价的折线图,可以看出从2015年到2021年基本处于上升状态,在2022年有下降趋势。
dim(data.AMZN)#获取维度
[1] 1635 6
summary(data.AMZN)#查看
Index AMZN.Open AMZN.High AMZN.Low AMZN.Close AMZN.Volume AMZN.Adjusted
Min. :2015-12-31 Min. : 23.90 Min. : 24.68 Min. : 23.70 Min. : 24.10 Min. : 17626000 Min. : 24.10
1st Qu.:2017-08-15 1st Qu.: 49.49 1st Qu.: 49.76 1st Qu.: 49.01 1st Qu.: 49.34 1st Qu.: 57445000 1st Qu.: 49.34
Median :2019-04-02 Median : 89.39 Median : 89.94 Median : 88.36 Median : 89.29 Median : 73414000 Median : 89.29
Mean :2019-03-31 Mean : 96.34 Mean : 97.40 Mean : 95.15 Mean : 96.28 Mean : 84781961 Mean : 96.28
3rd Qu.:2020-11-11 3rd Qu.:151.89 3rd Qu.:154.11 3rd Qu.:149.66 3rd Qu.:151.67 3rd Qu.:100235000 3rd Qu.:151.67
Max. :2022-06-29 Max. :187.20 Max. :188.65 Max. :184.84 Max. :186.57 Max. :331300000 Max. :186.57
wk<-data.AMZN
data.weekly<-to.weekly(wk)#周数据
data.weekly[c(1:3,nrow(data.weekly)),]#选取查看数据
wk.Open wk.High wk.Low wk.Close wk.Volume wk.Adjusted
2015-12-31 34.3040 34.3875 33.7945 33.7945 74992000 33.7945
2016-01-08 32.8145 32.8860 30.2605 30.3525 661082000 30.3525
2016-01-15 30.6240 31.2995 28.2650 28.5090 645868000 28.5090
2022-06-29 117.0900 117.9800 106.9100 108.9200 203451400 108.9200
mo<-data.AMZN
data.monthly<-to.monthly(mo)
data.monthly[c(1:3,nrow(data.monthly)),]
mo.Open mo.High mo.Low mo.Close mo.Volume mo.Adjusted
12 2015 34.3040 34.3875 33.7945 33.7945 74992000 33.7945
1 2016 32.8145 32.8860 27.3590 29.3500 2604018000 29.3500
2 2016 28.9075 29.0900 23.7000 27.6260 2482896000 27.6260
6 2022 122.2560 128.9900 101.4300 108.9200 1669921700 108.9200
ls()#显示内存中的对象
rm(list=ls())
[1] "all_graph" "AMZN" "AVG" "Close.Prices" "data.AMZN" "data.GSPC" "data.IBM" "data.monthly"
[9] "data.weekly" "date" "df" "df1" "df2" "df3" "df4" "df5"
[17] "df6" "f" "FF" "FF.raw" "mo" "multi.df" "p3" "process"
[25] "process2" "s_" "s380" "stocks" "symbols" "title1" "title2" "wk"
[33] "y" "y.range"
#数据包导入
library(dplyr)
library(pedquant)
library(purrr)
library(ggplot2)
library(gridExtra)
#数据获取
symbols <- c("300244.SZ", '600987.SH', "600766.SH", "601857.SH", "300089.SZ", "002616.SZ")
# %>%为管道符号,作用是将%>%左边的对象传递给右边的函数
stocks <- md_stock(symbols,
from = "2015-12-31",
to = "2022-6-30",
source = "163")
process2 <- function(tbl){
tbl %>%
as_tibble() %>%
select(date, close, unit) %>%
mutate(lag1 = lag(close),
sr = (close - lag1) / lag1,
lr = log(close) - log(lag1))
}
stocks %>%
map(process2)
#编写一个绘制date与close以及date与log Return图像,并上下对齐在一张图像上的函数,并绘图
all_graph <- function(tbl){
p1 <- tbl %>%
ggplot(aes(date, close)) +
geom_line()
p2 <- tbl %>%
ggplot(aes(date, lr)) +
geom_line() +
labs(x = "",
y = "log Return") +
scale_y_continuous(labels = scales::percent_format())
grid.arrange(p1, p2)
}
par(mfrow=c(1,1))#设置画布排版
stocks %>%
map(process2) %>%
map(all_graph)################################################
以上为数据获取的收盘价可视化结果,可以看出对数收益率波动率的总体还是比较大的。
#数据获取
#构造一个获取股票数据并计算收益率和对数收益率的函数,并获得数据
process <- function(symbol, from = "2020-12-31", to = "2022-06-30", source = "yahoo") {
md_stock(symbol = symbol, from = from, to = to, source = source)[[1]] %>%
as_tibble() %>%
select(date, close, unit) %>%
mutate(lag1 = lag(close),
sr = (close - lag1) / lag1,
lr = log(close) - log(lag1))
}
s380<- process("000009.SH",source = "163")
s_ = stocks %>% map(process2)
#建立数据,重组矩阵
df1<-s_[[1]]$lr
df1 <- df1[1:1579]
df2<-s_[[2]]$lr
df2 <- df2[1:1579]
df3<-s_[[3]]$lr
df3 <- df3[1:1579]
df4<-s_[[4]]$lr
df4 <- df4[1:1579]
df5<-s_[[5]]$lr
df5 <- df5[1:1579]
df6<-s_[[6]]$lr
df6 <- df6[1:1579]
df <- cbind(df1,df2,df3,df4,df5,df6)
df <- cbind(df,rowSums(df[, 1:6]/6))
#生成date和组合对数收益率的新数据
AVG <- stocks[[4]] %>%
as_tibble() %>%
select(date) %>%
mutate(fin_r = df[,7])
#可视化处理
#将组合对数收益率与红色代表的s380的对数收益率进行比较
p3 <-ggplot(AVG,aes(date, fin_r)) +
geom_line() +
geom_line(data = s380,aes(date, lr,color = 'red')) +
labs(x = "",
y = "log Return") +
scale_y_continuous(labels = scales::percent_format())
p3#输出结果
根据以上结果可以看出组合对数的回报率波动更大。