数据挖掘1

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams.update({'font.size': 20})
In [2]:
weather_data = pd.read_csv("../data/3/Summary of Weather.csv")
c:\users\skd621\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3020: DtypeWarning: Columns (7,8,18,25) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
In [3]:
weather_data = weather_data.loc[:,["STA","Date","MeanTemp"]]
In [4]:
weather_data.head()
Out[4]:
STA    Date    MeanTemp
0    10001    1942-7-1    23.888889
1    10001    1942-7-2    25.555556
2    10001    1942-7-3    24.444444
3    10001    1942-7-4    24.444444
4    10001    1942-7-5    24.444444
In [5]:
weather_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119040 entries, 0 to 119039
Data columns (total 3 columns):
STA         119040 non-null int64
Date        119040 non-null object
MeanTemp    119040 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 2.7+ MB
选择一个气象站,分析其温度变化的时间特性
In [6]:
weather_palmyra = weather_data[weather_data.STA == 33023]
weather_palmyra['Date'] = pd.to_datetime(weather_palmyra['Date'])
plt.figure(figsize=(16,10))
plt.plot(weather_palmyra.Date,weather_palmyra.MeanTemp)
plt.title("Mean Temperature of MAISON BLANCHE")
plt.xlabel("Date")
plt.ylabel("Mean Temperature")
plt.show()
c:\users\skd621\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [7]:
timeSeries = weather_palmyra.loc[:, ["Date","MeanTemp"]]
timeSeries.index = timeSeries.Date
timeSeries = timeSeries.drop("Date",axis=1)
In [21]:
rolmean = timeSeries.rolling(6).mean()
rolstd = timeSeries.rolling(6).std()
plt.figure(figsize=(22,10))   
orig = plt.plot(timeSeries, 'r-',label='Original')
mean = plt.plot(rolmean, 'b', label='Rolling Mean',marker='+', markersize=12)
std = plt.plot(rolstd, 'g--', label = 'Rolling Std')
plt.xlabel("Date")
plt.ylabel("Mean Temperature")
plt.title('Rolling Mean & Standard Deviation')
plt.legend()
plt.show()

In [22]:
from statsmodels.tsa.stattools import adfuller
# res = adfuller(timeSeries.MeanTemp)
res = adfuller(timeSeries.MeanTemp, autolag='AIC')
print('Test statistic: %.4f; p-value: %.4f'%(res[0], res[1]))
print("Critical Values: ",res[4])
Test statistic: -1.9031; p-value: 0.3306
Critical Values:  {'1%': -3.4369994990319355, '5%': -2.8644757356011743, '10%': -2.5683331327427803}
c:\users\skd621\anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
In [34]:
def check_DF(timeSeries):
    res = adfuller(timeSeries.MeanTemp, autolag='AIC')
    print('Test statistic:%.4f;p-value: %.4f'%(res[0],res[1]))
    print("Critical Values: ",res[4])
def check_mean_std(timeSeries):
    rolmean = timeSeries.rolling(6).mean()
    rolstd = timeSeries.rolling(6).std()
    plt.figure(figsize=(22,10))   
    orig = plt.plot(timeSeries, 'r-',label='Original')
    mean = plt.plot(rolmean, 'b', label='Rolling Mean',marker='+', markersize=10)
    std = plt.plot(rolstd, 'g', label = 'Rolling Std',marker='o', markersize=3)
    plt.xlabel("Date")
    plt.ylabel("Mean Temperature")
    plt.title('Rolling Mean & Standard Deviation')
    plt.legend()
    plt.show()
In [25]:
timeSeries_diff = timeSeries - timeSeries.shift(periods=1)
In [26]:
plt.figure(figsize=(16,12))
plt.plot(timeSeries_diff)
plt.title("Differencing method") 
plt.xlabel("Date")
plt.ylabel("D

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值