pandas练习

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
df = pd.read_csv('/Users/PycharmProjects/newwork/BeijingPM20100101_20151231.csv')
df.head(3)
NoyearmonthdayhourseasonPM_DongsiPM_DongsihuanPM_NongzhanguanPM_US PostDEWPHUMIPRESTEMPcbwdIwsprecipitationIprec
0120101104NaNNaNNaNNaN-21.043.01021.0-11.0NW1.790.00.0
1220101114NaNNaNNaNNaN-21.047.01020.0-12.0NW4.920.00.0
2320101124NaNNaNNaNNaN-21.043.01019.0-11.0NW6.710.00.0
#将分散的时间合并到一起
period=pd.PeriodIndex(year=df['year'],month=df['month'],day=df['day'],hour=df['hour'],freq='H')
type(period)
pandas.core.indexes.period.PeriodIndex
print(period)
PeriodIndex(['2010-01-01 00:00', '2010-01-01 01:00', '2010-01-01 02:00',
             '2010-01-01 03:00', '2010-01-01 04:00', '2010-01-01 05:00',
             '2010-01-01 06:00', '2010-01-01 07:00', '2010-01-01 08:00',
             '2010-01-01 09:00',
             ...
             '2015-12-31 14:00', '2015-12-31 15:00', '2015-12-31 16:00',
             '2015-12-31 17:00', '2015-12-31 18:00', '2015-12-31 19:00',
             '2015-12-31 20:00', '2015-12-31 21:00', '2015-12-31 22:00',
             '2015-12-31 23:00'],
            dtype='period[H]', length=52584, freq='H')
#设置统一的时间索引
df['datatime'] = period
df.head(5)
NoyearmonthdayhourseasonPM_DongsiPM_DongsihuanPM_NongzhanguanPM_US PostDEWPHUMIPRESTEMPcbwdIwsprecipitationIprecdatatime
0120101104NaNNaNNaNNaN-21.043.01021.0-11.0NW1.790.00.02010-01-01 00:00
1220101114NaNNaNNaNNaN-21.047.01020.0-12.0NW4.920.00.02010-01-01 01:00
2320101124NaNNaNNaNNaN-21.043.01019.0-11.0NW6.710.00.02010-01-01 02:00
3420101134NaNNaNNaNNaN-21.055.01019.0-14.0NW9.840.00.02010-01-01 03:00
4520101144NaNNaNNaNNaN-20.051.01018.0-12.0NW12.970.00.02010-01-01 04:00
df.set_index('datatime',inplace=True)    #重设行索引
df.head(5)
NoyearmonthdayhourseasonPM_DongsiPM_DongsihuanPM_NongzhanguanPM_US PostDEWPHUMIPRESTEMPcbwdIwsprecipitationIprec
datatime
2010-01-01 00:00120101104NaNNaNNaNNaN-21.043.01021.0-11.0NW1.790.00.0
2010-01-01 01:00220101114NaNNaNNaNNaN-21.047.01020.0-12.0NW4.920.00.0
2010-01-01 02:00320101124NaNNaNNaNNaN-21.043.01019.0-11.0NW6.710.00.0
2010-01-01 03:00420101134NaNNaNNaNNaN-21.055.01019.0-14.0NW9.840.00.0
2010-01-01 04:00520101144NaNNaNNaNNaN-20.051.01018.0-12.0NW12.970.00.0
data = df['PM_US Post'].dropna()
plt.figure(figsize=(20,8),dpi=80)
_x = data.index
_y = data.values
plt.plot(range(len(_x)),_y)
#数据过长,不方便分析使用,所以下一步降采样
df = df.resample('7D').mean()    #忽略na
df.head(5)
NoyearmonthdayhourseasonPM_DongsiPM_DongsihuanPM_NongzhanguanPM_US PostDEWPHUMIPRESTEMPIwsprecipitationIprec
datatime
2010-01-01372.52010.01.016.011.54.0NaNNaNNaN90.40367-17.01344146.4495971028.009409-6.16263441.2273250.0150540.177688
2010-01-08NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2010-01-15NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2010-01-22NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2010-01-29NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
data = df['PM_US Post'].dropna()
_x = data.index
_y = data.values
plt.figure(figsize=(20,8),dpi=80)
<Figure size 1600x640 with 0 Axes>




<Figure size 1600x640 with 0 Axes>
plt.plot(range(len(_x)),_y)
plt.xticks(range(0,len(_x),10),list(_x)[::10],rotation=45)
plt.show()

png

两组数据对比:

df = pd.read_csv('/Users/marvinking/PycharmProjects/newwork/BeijingPM20100101_20151231.csv')

period=pd.PeriodIndex(year=df['year'],month=df['month'],day=df['day'],hour=df['hour'],freq='H')

df['datatime'] = period
df.set_index('datatime',inplace=True)

df = df.resample('7D').mean()
data = df['PM_US Post']
data_cn = df['PM_Nongzhanguan']
_x = data.index
_x = [i.strftime("%Y%m%d") for i in _x]
_x_china = [i.strftime("%Y%m%d") for i in data_cn.index]
_y = data.values
_y_china = data_cn.values
plt.figure(figsize=(20,8),dpi=80)


plt.plot(range(len(_x)),_y,label="US_POST",alpha=0.7)
plt.plot(range(len(_x_china)),_y_china,label="CN_POST",alpha=0.7)

plt.xticks(range(0,len(_x_china),10),list(_x_china)[::10],rotation=45)

plt.legend(loc="best")

plt.show()

在这里插入图片描述


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值