python
import pmdarima as pm
from pmdarima.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime #数据索引改为时间
import numpy as np
from numpy import diff
import statsmodels.api as sm #acf,pacf图
from statsmodels.tsa.stattools import adfuller #adf检验
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.arima_model import ARIMA
##读取数据
data = pd.read_csv(r'C:\Users\13771\Desktop\outpatient expenses-days.CSV',index_col=u'TONGJIRQ')
data = pd.DataFrame(data)
temp = np.array(data["FEIYONG"])
##ADF检验
t = adfuller(temp)
print('The ADF Statistic of bls yield: %f' % t[0])
print('The p value of bls: %f' % t[1])
#自动定阶
pmax = int(len(data) / 10) #一般阶数不超过 length /10
qmax = int(len(data) / 10)
print(pmax)
model = pm.auto_arima(temp, start_p=1, start_q=1,
information_criterion='aic',
#test='adf', # use adftest to find optimal 'd'
max_p=pmax, max_q=qmax, # maximum p and q
m=1, # frequency of series
d=None, # let model determine 'd'
seasonal=False, # No Seasonality
start_P=0,
D=0,
trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
# print(model)
print(model.summary())
#输出
forecasts = model.predict(n_periods=10)
print(forecasts)
R
#load data
CRB<-read.csv(file='C:/Users/13771/Desktop/1.csv', header=TRUE, sep=",")
library(do)
library(dplyr)
sapply(CRB,typeof)
#select data
CRB <- subset(CRB, ORGCODE==330784002,
select = c(TONGJIRQ,BINGLISHU))
CRB<-CRB[order(CRB$TONGJIRQ),]
#test diff
library(tseries)
data<-CRB$BINGLISHU
i<-3
#lag差分阶数,differences差分次数(也即differences=2<=>diff(diff(X)))
while(adf.test(data)$p.value>0.05&i>=0)
{data<-diff(data,differences=1);
i=i-1}
if(i >0){
print(paste("第",3-i,"次差分后平稳"))
}else print('该序列不适用ARIMA')
#自动定阶,自动定阶可以实现对季节因素的定阶
library(forecast)
#Model1
f1<-auto.arima(result,trace = TRUE)
f2<-arima(result,order = c(3,1,2))
f3<-forecast(f2,h=90,level=c(99.5))
f3
plot(f3)
arima(result, order = c(3,1,2))$model$bic
BIC(arima(result, order = c(3,1,2)))
#diff1<-diff(result,differences=1)
#adf.test(diff1)
plot(diff1)
#数据处理(目前发现最细粒度是周,所以粒度为天时,不必进行TS处理)
#CRB=ts(CRB,frequency=12,start=c(1990,1))
##数据转换成ts
##[,2]是只读取第2列数据,start是在数据上增加起始时间,frequency是单位时间内观测值的频数(频率)。
#tsdisplay(data,xlab=“year”,ylab=“Retail index”)
#显示自相关系数ACF,偏自相关系数PACF,与数据原始图像。
#如果自相关系数显示y和时刻有强烈相关性,那么进行差分处理
#data_d1=diff(data,1)
#tsdisplay(data_d1,xlab=“year”,ylab=“Retail index”)
#tsdisplay(residuals(fit1)) #显示残差
#diff(x, lag = 2,differences = 2)
help(diff)
#空值处理
#plot
plot(CRB$BINGLISHU)
#异常值处理
#plot(paixu$jine[2:38])
#result<-paixu$jine[2:38]
result<-CRB$BINGLISHU
#平稳性检验,还需设置自动差分
#(psymonitor)
library(tseries)
help(ts)
adf.test(result)
diff1<-diff(result,differences=1)
adf.test(diff1)
plot(diff1)
#adfuller(result)
#ADF(result)
#自动定阶,自动定阶可以实现对季节因素的定阶
library(forecast)
#Model1
f1<-auto.arima(result,trace = TRUE)
f2<-arima(result,order = c(3,1,2))
f3<-forecast(f2,h=90,level=c(99.5))
f3
plot(f3)
arima(result, order = c(3,1,2))$model$bic
BIC(arima(result, order = c(3,1,2)))
#model2
f11<-auto.arima(result,trace = TRUE)
f22<-arima(result,order = c(1,0,1))
f33<-forecast(f22,h=1,level=c(99.5))
plot(f33)
f33
help(arima)
#model3
acf(diff1, lag.max=20)
acf(diff1, lag.max=20,plot='false')
pacf(diff1, lag.max=20) # 绘制偏相关图
pacf(diff1, lag.max=20, plot=FALSE)
#参数比较
Box.test(f2$residuals, lag=20, type="Ljung-Box")
Box.test(f22$residuals, lag=20, type="Ljung-Box")