【金融统计】
多个证券收益比较
#获取数据
stocks1<-getSymbols("AMZN",src = "yahoo",from = "2015-01-01",to = "2022-09-16")
stocks2<-getSymbols("IBM",src = "yahoo",from = "2015-01-01",to = "2022-09-16")
stocks3<-getSymbols("GSPC",src = "yahoo",from = "2015-01-01",to = "2022-09-16")
其中,雅虎已经下市所以无法取得数据。
#写入csv
write.csv(stocks1,file = "AMZN Yahoo.csv")
write.csv(stocks2,file = "IBM Yahoo.csv")
write.csv(stocks3,file = "GSPC Yahoo.csv")
AMZN数据处理:
#读取数据
data.AMZN<-read.csv("AMZN Yahoo.csv",header=TRUE)
#读取csv中的日期
date<-as.Date(data.AMZN$Date,format="%Y/%m/%d")
#数据合并与修改格式
data.AMZN<-cbind(date, data.AMZN[,-1])
data.AMZN<-data.AMZN[order(data.AMZN$date),]
data.AMZN<-xts(data.AMZN[,2:7],order.by=data.AMZN[,1])
#给各列命名
names(data.AMZN)<-paste(c("AMZN.Close","AMZN.Open","AMZN.High","AMZN.Low","AMZN.Volume","AMZN.Adjusted"))
数据预览
data.AMZN[c(1:3,nrow(data.AMZN)),]
AMZN.Close AMZN.Open AMZN.High AMZN.Low AMZN.Volume AMZN.Adjusted
2010-12-31 " 9.00" " 9.10" " 9.12" " 8.98" "69.08M" "-1.53%"
2011-01-03 " 9.21" " 9.07" " 9.30" " 9.06" "106.68M" "2.33%"
2011-01-04 " 9.25" " 9.31" " 9.38" " 9.19" "100.70M" "0.43%"
2022-10-03 "115.88" "113.82" "116.81" "112.47" "50.66M" "2.55%"
IBM数据处理:
#读取数据
data.IBM<-read.csv("IBM Yahoo.csv",header=TRUE)
#读取csv中的日期
date<-as.Date(data.IBM$Date,format="%Y/%m/%d")
#数据合并与修改格式
data.IBM<-cbind(date, data.IBM[,-1])
data.IBM<-data.IBM[order(data.IBM$date),]
data.IBM<-xts(data.IBM[,2:7],order.by=data.IBM[,1])
#给各列命名
names(data.IBM)<-paste(c("IBM.Close","IBM.Open","IBM.High","IBM.Low","IBM.Volume","IBM.Adjusted"))
data.IBM[c(1:3,nrow(data.IBM)),]
IBM.Close IBM.Open IBM.High IBM.Low IBM.Volume IBM.Adjusted
2010-12-31 "146.76" "146.73" "147.07" "145.96" "2.97M" "0.06%"
2011-01-03 "147.48" "147.21" "148.20" "147.14" "4.60M" "0.49%"
2011-01-04 "147.64" "147.56" "148.22" "146.64" "5.06M" "0.11%"
2022-10-03 "121.51" "120.06" "122.20" "119.63" "4.24M" "2.27%"
GSPC数据处理:
#读取数据
data.GSPC<-read.csv("GSPC Yahoo.csv",header=TRUE)
#读取csv中的日期
date<-as.Date(data.GSPC$Date,format="%Y/%m/%d")
#数据合并与修改格式
data.GSPC<-cbind(date, data.GSPC[,-1])
data.GSPC<-data.GSPC[order(data.GSPC$date),]
data.GSPC<-xts(data.GSPC[,2:7],order.by=data.GSPC[,1])
#给各列命名
names(data.GSPC)<-paste(c("GSPC.Close","GSPC.Open","GSPC.High","GSPC.Low","GSPC.Volume","GSPC.Adjusted"))
data.GSPC[c(1:3,nrow(data.GSPC)),]
GSPC.Close GSPC.Open GSPC.High GSPC.Low GSPC.Volume GSPC.Adjusted
2010-12-31 "1,257.64" "1,256.76" "1,259.34" "1,254.19" NA "-0.02%"
2011-01-03 "1,271.87" "1,257.62" "1,276.17" "1,257.62" NA "1.13%"
2011-01-04 "1,270.20" "1,272.95" "1,274.12" "1,262.66" NA "-0.13%"
2022-10-03 "3,678.43" "3,609.78" "3,698.35" "3,604.93" NA "2.59%"
#统计AMZN的收盘价格
Close.Prices<-data.AMZN$AMZN.Close
#将AMZN、IBM、GSPC的收盘价格进行合并
Close.Prices<-cbind(Close.Prices,data.GSPC$GSPC.Close,data.IBM$IBM.Close)
Close.Prices[c(1:3,nrow(Close.Prices)),]
AMZN.Close GSPC.Close IBM.Close
2010-12-31 " 9.00" "1,257.64" "146.76"
2011-01-03 " 9.21" "1,271.87" "147.48"
2011-01-04 " 9.25" "1,270.20" "147.64"
2022-10-03 "115.88" "3,678.43" "121.51"
#提取对象索引并替换
multi.df<-cbind(index(Close.Prices), data.frame(Close.Prices))
#命名
names(multi.df)<-paste(c("date","AMZN","GSPC","IBM"))
rownames(multi.df)<-seq(1,nrow(multi.df),1)
multi.df[c(1:3,nrow(multi.df)),]
date AMZN GSPC IBM
1 2010-12-31 9.00 1,257.64 146.76
2 2011-01-03 9.21 1,271.87 147.48
3 2011-01-04 9.25 1,270.20 147.64
2960 2022-10-03 115.88 3,678.43 121.51
为比较四种证券的收益水平,我们可以累积每日价格随时间的变化。在程序实现时,可以为每种证券创建一个指数(index),其值等于该证券每天的价格除以该证券在2010年12月31日的价格。
multi.df$AMZN.idx<-as.numeric(multi.df$AMZN)/as.numeric(multi.df$AMZN[1])
str(multi.df$GSPC)#检验字符类型
chr [1:2960] "1,257.64" "1,271.87" "1,270.20" "1,276.56" "1,273.85" "1,271.50" "1,269.75" "1,274.48" "1,285.96" "1,283.76" ...
结果显示数据为文本型,但是其中都是浮点数据,因此考虑转换为数值型
#直接转换为数值型会产生NA,因此先转化为因子
multi.df$GSPC<-factor(multi.df$GSPC)#转化为因子
as.numeric(multi.df$GSPC)#转化为数值型
multi.df$GSPC.idx<-as.numeric(multi.df$GSPC)/as.numeric(multi.df$GSPC[1])#计算
str(multi.df$IBM)#检验字符类型
chr [1:2960] "146.76" "147.48" "147.64" "147.05" "148.66" "147.93" "147.64" "147.28" "149.10" "148.82" "150.00" "150.65" ...
结果显示数据仍然为文本型,但是其中都是浮点数据,因此考虑转换为数值型
multi.df$IBM<-factor(multi.df$IBM)#转化为因子
as.numeric(multi.df$IBM)#转化为数值型
根据结果显示已经转化为数值型。
multi.df$IBM.idx<-as.numeric(multi.df$IBM)/as.numeric(multi.df$IBM[1])#计算
#设置打印有效输出为5位
options(digits=5)
multi.df[c(1:3,nrow(multi.df)),]
date AMZN GSPC IBM AMZN.idx GSPC.idx IBM.idx
1 2010-12-31 9.00 1,257.64 146.76 1.0000 1.0000 1.00000
2 2011-01-03 9.21 1,271.87 147.48 1.0233 1.2000 1.02136
3 2011-01-04 9.25 1,270.20 147.64 1.0278 1.1579 1.02621
2960 2022-10-03 115.88 3,678.43 121.51 12.8756 26.2737 0.25534
#设置打印有效输出为7位
options(digits=7)
#画图(投资三者的价值)
plot(x=multi.df$date,y=multi.df$GSPC.idx,type="l",xlab="Date",ylab="Value of Investment ($)",col="black",lty=1,lwd=2,
main="Value of $1 Investment inAMZN, IBM, and the S&P 500 IndexDecember 31, 2010 - December 31, 2013")
lines(x=multi.df$date,y=multi.df$AMZN.idx,col="black",lty=2,lwd=1)
lines(x=multi.df$date,y=multi.df$IBM.idx,col="red",lty=2,lwd=1)
#lines(x=multi.df$date,y=multi.df$YHOO.idx,col="gray",lty=1,lwd=1)
abline(h=1,lty=1,col="black")
legend("topleft",c("AMZN","IBM","S&P 500 Index"),col=c("black","red","black"),lty=c(2,2,1),lwd=c(1,1,2))#创建图例
根据结果可以看出,IBM的投资价值最低。同时三者在2022年的趋势都是向下。
y<-multi.df[,5:7]
#选取最大最小值
y.range<-range(y,na.rm = TRUE)
y.range#显示结果
[1] 9.708738e-04 6.170667e+01
is.na(multi.df)#检查是否含有缺失值
na.omit(multi.df)#删除缺失值
plot(x=multi.df$date,y=multi.df$GSPC.idx,type="l",xlab="Date",ylim=y.range,ylab="Value of Investment ($)",col="black",lty=1,lwd=2,
main="Value of $1 Investment inAMZN, IBM, and the S&P 500 IndexDecember 31, 2010 - Sep 16, 2022")
lines(x=multi.df$date,y=multi.df$AMZN.idx,col="black",lty=2,lwd=1)
lines(x=multi.df$date,y=multi.df$IBM.idx,col="red",lty=2,lwd=1)
#lines(x=multi.df$date,y=multi.df$YHOO.idx,col="red",lty=1,lwd=1)
abline(h=1,lty=1,col="black")
legend("topleft",c("AMZN","IBM","S&P 500 Index"),col=c("black","red","black"),lty=c(2,2,1),lwd=c(1,1,2))
par(oma=c(0,0,3,0))
#分别突出显示各股情况
par(mfrow=c(2,2))#设置画布
plot(x=multi.df$date,xlab="",y=multi.df$IBM.idx, ylim=y.range,ylab="",type="l",col="gray",main="Amazon Stock")
lines(x=multi.df$date,y=multi.df$GSPC.idx,col="gray")
lines(x=multi.df$date,y=multi.df$AMZN.idx,col="black",lwd=2)
abline(h=1)
plot(x=multi.df$date,xlab="",y=multi.df$IBM.idx, ylim=y.range,ylab="",type="l",col="gray",main="S&P 500 Index")
lines(x=multi.df$date,y=multi.df$GSPC.idx,col="black",lwd=2)
lines(x=multi.df$date,y=multi.df$AMZN.idx,col="gray")
abline(h=1)
plot(x=multi.df$date,xlab="",y=multi.df$AMZN.idx, ylim=y.range,ylab="",type="l",col="gray",main="IBM Stock")
lines(x=multi.df$date,y=multi.df$GSPC.idx,col="gray")
lines(x=multi.df$date,y=multi.df$IBM.idx,col="black",lwd=2)
abline(h=1)
abline(h=1)
title1="Value of $1 Invested in Amazon, IBM and the Market"
title2="December 31, 2010 - Sep 16, 2022"
title(main=paste(title1,"\n",title2),outer=T)