python数据科学包第三天(股票数据分析、时间事件日志)

股票数据分析

具体详见 https://github.com/kamidox/stock-analysis

这里假设数据已经下载下来,并且保存在 yahoo-data 目录下。

分析波动幅度

%matplotlib inline
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
datadir = 'yahoo-data'
fname = '002001.csv'
data = pd.read_csv(os.path.join(datadir, fname), index_col='Date', parse_dates=True)
data
OpenHighLowCloseVolumeAdj Close
Date
2016-05-2018.8200019.4600018.7100019.340001326540019.34000
2016-05-1919.1200019.5200018.9000018.980001258130018.98000
2016-05-1819.5000020.1000018.8300019.230002204250019.23000
2016-05-1719.7300020.2300019.6500019.770002046980019.77000
2016-05-1619.4300019.6400019.2000019.620001096320019.62000
2016-05-1319.7000019.9400019.2700019.400001565510019.40000
2016-05-1218.8100019.9500018.7100019.880001981430019.88000
2016-05-1119.5000019.8500019.1200019.280002374220019.28000
2016-05-1018.8600019.1700018.2300019.070002085820019.07000
2016-05-0918.6200018.8500017.8900018.670002252540018.67000
2016-05-0620.4900020.4900018.6900018.700004096250018.70000
2016-05-0520.1500020.5700020.0100020.490001521660020.49000
2016-05-0420.8000021.1000020.2000020.420002255540020.42000
2016-05-0320.6000020.9900019.9000020.990003531930020.99000
2016-05-0220.6100020.6100020.6100020.61000020.61000
2016-04-2920.2000020.7700019.8500020.610001784580020.61000
2016-04-2821.4900021.5000019.6000020.400004113000020.40000
2016-04-2721.2500022.0800021.1200021.440003139880021.44000
2016-04-2620.9000021.2700020.4100021.130001927110021.13000
2016-04-2521.6100021.6100020.5700020.920001957190020.92000
2016-04-2221.4400021.9200021.0100021.710002741120021.71000
2016-04-2120.0600021.9700019.8100021.420004563180021.42000
2016-04-2020.6800021.3000019.4200019.970003444440019.97000
2016-04-1920.9100021.2300020.3500020.550001648280020.55000
2016-04-1820.5500021.2000019.9900020.910002452070020.91000
2016-04-1520.3800020.7300020.3000020.710001627120020.71000
2016-04-1420.6000020.8300019.9200020.400002669850020.40000
2016-04-1320.6200021.1700020.4900020.500002501250020.50000
2016-04-1221.5000021.5000020.1000020.490002933180020.49000
2016-04-1121.9400022.3000021.5400021.570002406460021.57000
.....................
2004-08-0517.9000318.1000217.5299917.5600087502000.98194
2004-08-0417.8600218.0199917.4999817.83001106560000.99704
2004-08-0317.5800118.1200217.5800117.73998128154000.99201
2004-08-0217.4499617.8800317.2199717.61002105935000.98474
2004-07-3017.4799717.8000017.2099717.50999122845000.97914
2004-07-2917.1500317.9000316.9999817.46997157694000.97691
2004-07-2816.3200116.9599716.1300316.9299658684000.94671
2004-07-2716.9000416.9999815.9999816.3000078691000.91148
2004-07-2617.1999617.2199716.5500016.8000075094000.93944
2004-07-2317.0199917.2499816.6200217.2199783982000.96293
2004-07-2217.7199717.9499616.9100417.0299989472000.95230
2004-07-2117.9399618.1600417.5900117.6000294624000.98418
2004-07-2017.9999818.3200117.7499817.86002166079000.99872
2004-07-1917.3900318.3300117.2399818.05000236111001.00934
2004-07-1616.5199917.6700416.4599717.40003202797000.97300
2004-07-1516.8100017.2299716.5099916.7099794432000.93441
2004-07-1416.6999716.9000416.1500416.8900396653000.94448
2004-07-1316.1200216.8000015.8000016.80000107452000.93944
2004-07-1217.3500217.3500216.2099716.42996179231000.91875
2004-07-0917.6300318.1799617.3100017.47997109988000.97747
2004-07-0817.1700417.6600417.1000217.4999872634000.97858
2004-07-0717.8000017.8000017.1300317.27999131550000.96628
2004-07-0617.8000018.2499817.5199917.85002170180000.99816
2004-07-0517.4999817.9399617.1200217.73998177076000.99201
2004-07-0218.7699919.1000216.9999817.67996376181000.98865
2004-07-0118.9000319.3500218.6500318.76999329477001.04960
2004-06-3018.2599819.9999818.2599819.02999545611001.06414
2004-06-2920.2799920.2799920.2799920.2799934757001.13404
2004-06-2822.5299922.9999822.5299922.5299971682001.25986
2004-06-2528.1000129.9999723.9999725.029991779926001.39966

3057 rows × 6 columns

## 使用 resample 针对复权收盘价进行重采样
adj_price = data['Adj Close']
adj_price
Date
2016-05-20    19.34000
2016-05-19    18.98000
2016-05-18    19.23000
2016-05-17    19.77000
2016-05-16    19.62000
2016-05-13    19.40000
2016-05-12    19.88000
2016-05-11    19.28000
2016-05-10    19.07000
2016-05-09    18.67000
2016-05-06    18.70000
2016-05-05    20.49000
2016-05-04    20.42000
2016-05-03    20.99000
2016-05-02    20.61000
2016-04-29    20.61000
2016-04-28    20.40000
2016-04-27    21.44000
2016-04-26    21.13000
2016-04-25    20.92000
2016-04-22    21.71000
2016-04-21    21.42000
2016-04-20    19.97000
2016-04-19    20.55000
2016-04-18    20.91000
2016-04-15    20.71000
2016-04-14    20.40000
2016-04-13    20.50000
2016-04-12    20.49000
2016-04-11    21.57000
                ...   
2004-08-05     0.98194
2004-08-04     0.99704
2004-08-03     0.99201
2004-08-02     0.98474
2004-07-30     0.97914
2004-07-29     0.97691
2004-07-28     0.94671
2004-07-27     0.91148
2004-07-26     0.93944
2004-07-23     0.96293
2004-07-22     0.95230
2004-07-21     0.98418
2004-07-20     0.99872
2004-07-19     1.00934
2004-07-16     0.97300
2004-07-15     0.93441
2004-07-14     0.94448
2004-07-13     0.93944
2004-07-12     0.91875
2004-07-09     0.97747
2004-07-08     0.97858
2004-07-07     0.96628
2004-07-06     0.99816
2004-07-05     0.99201
2004-07-02     0.98865
2004-07-01     1.04960
2004-06-30     1.06414
2004-06-29     1.13404
2004-06-28     1.25986
2004-06-25     1.39966
Name: Adj Close, dtype: float64
resampled = adj_price.resample('m', how='ohlc')
resampled
openhighlowclose
Date
2004-06-301.399661.399661.064141.06414
2004-07-311.049601.049600.911480.97914
2004-08-310.984740.997040.779510.80244
2004-09-300.802440.960690.748760.91596
2004-10-310.915961.002630.810830.84270
2004-11-300.822010.894710.813620.82201
2004-12-310.818100.853890.744280.74428
2005-01-310.744280.764970.610080.61008
2005-02-280.613990.777840.613990.77784
2005-03-310.764970.780070.614550.62797
2005-04-300.654250.691720.612310.63580
2005-05-310.635800.824410.632450.78681
2005-06-300.786810.864180.687010.85695
2005-07-310.831640.933610.819350.93361
2005-08-310.933610.933610.739800.73980
2005-09-300.739070.785360.730400.75498
2005-10-310.754981.012070.754980.97953
2005-11-300.965431.063050.939391.06088
2005-12-311.038101.057630.987121.02725
2006-01-311.027251.130310.990371.12922
2006-02-281.129221.129220.962170.97627
2006-03-311.010981.098850.954581.08692
2006-04-301.117291.199731.064141.08475
2006-05-311.084751.376541.084751.28434
2006-06-301.345091.473561.263731.47356
2006-07-311.492251.695421.359141.35914
2006-08-311.361471.377821.198011.31477
2006-09-301.265731.359141.242371.35914
2006-10-311.359141.436201.242371.25639
2006-11-301.279741.314771.130281.20034
...............
2013-12-3112.4478912.9670112.0490012.21995
2014-01-3112.2199512.2199510.8269811.04228
2014-02-2811.0422812.3211911.0422811.38418
2014-03-3111.4221712.7454410.5040712.19459
2014-04-3012.2959314.1384212.2579412.40990
2014-05-3112.4099013.1565612.2832012.61372
2014-06-3012.6137212.6828111.6958212.22879
2014-07-3112.5347614.5580912.2386614.38043
2014-08-3114.0448515.1108013.9066714.21264
2014-09-3014.3804314.9232714.3804314.88379
2014-10-3114.8837916.0879214.8048314.80483
2014-11-3014.9430114.9430114.1238114.79496
2014-12-3114.9035316.1274014.6863914.97262
2015-01-3114.9726216.3346614.8837915.17989
2015-02-2814.6863915.1404114.3113415.05158
2015-03-3115.4562517.6374915.1897616.97621
2015-04-3016.9663420.6083316.9564719.05875
2015-05-3119.0587523.3028117.8447623.30281
2015-06-3025.0793928.0206117.0900017.09000
2015-07-3117.0900019.1800015.4800016.16000
2015-08-3116.5800019.0200012.7100015.23000
2015-09-3014.9800015.5700012.6500013.41000
2015-10-3113.4100015.3000013.4100015.22000
2015-11-3014.7400016.9400014.6200015.70000
2015-12-3115.8500018.6800015.5600017.44000
2016-01-3117.4400017.4400013.1000014.01000
2016-02-2914.1500019.2800013.9400019.00000
2016-03-3120.2000021.2000017.5300021.20000
2016-04-3020.3800021.7800019.9700020.61000
2016-05-3120.6100020.9900018.6700019.34000

144 rows × 4 columns

(resampled.high - resampled.low) / resampled.low
Date
2004-06-30    0.315297
2004-07-31    0.151534
2004-08-31    0.279060
2004-09-30    0.283041
2004-10-31    0.236548
2004-11-30    0.099666
2004-12-31    0.147270
2005-01-31    0.253885
2005-02-28    0.266861
2005-03-31    0.269335
2005-04-30    0.129689
2005-05-31    0.303518
2005-06-30    0.257886
2005-07-31    0.139452
2005-08-31    0.261976
2005-09-30    0.075246
2005-10-31    0.340526
2005-11-30    0.131639
2005-12-31    0.071430
2006-01-31    0.141301
2006-02-28    0.173618
2006-03-31    0.151135
2006-04-30    0.127417
2006-05-31    0.268993
2006-06-30    0.166040
2006-07-31    0.247421
2006-08-31    0.150091
2006-09-30    0.093990
2006-10-31    0.156016
2006-11-30    0.163225
                ...   
2013-12-31    0.076190
2014-01-31    0.128657
2014-02-28    0.115819
2014-03-31    0.213381
2014-04-30    0.153409
2014-05-31    0.071102
2014-06-30    0.084388
2014-07-31    0.189517
2014-08-31    0.086587
2014-09-30    0.037749
2014-10-31    0.086667
2014-11-30    0.058001
2014-12-31    0.098119
2015-01-31    0.097480
2015-02-28    0.057931
2015-03-31    0.161143
2015-04-30    0.215367
2015-05-31    0.305863
2015-06-30    0.639591
2015-07-31    0.239018
2015-08-31    0.496459
2015-09-30    0.230830
2015-10-31    0.140940
2015-11-30    0.158687
2015-12-31    0.200514
2016-01-31    0.331298
2016-02-29    0.383070
2016-03-31    0.209355
2016-04-30    0.090636
2016-05-31    0.124264
Freq: M, dtype: float64

增长曲线

# 600690.ss 000951.sz 002001.sz
stockid = '600690.sz'
stockfile = '600690.csv'
ds = pd.read_csv(os.path.join('yahoo-data', stockfile), index_col='Date', parse_dates=True)
ds.head()
OpenHighLowCloseVolumeAdj Close
Date
2016-05-208.749.158.749.14553904009.14
2016-05-198.849.058.818.84347859008.84
2016-05-188.828.938.658.88442543008.88
2016-05-179.089.088.828.83423922008.83
2016-05-168.909.088.809.07597495009.07
adj_price = ds['Adj Close']
adj_price.plot(figsize=(8, 6))

在这里插入图片描述

增长倍数

最大增长倍数及最大年化复合增长率

计算最低价和最高价之间的收盘价比较,以及增长的倍数和年化复全增长率,这个反应的是一个股票最好的情况下的投资收益情况。

# 最高增长倍数
total_max_growth = adj_price.max() / adj_price.min()
total_max_growth
1113.2977809591985
# 最大年均复合增长率
min_date = adj_price.argmin()
max_date = adj_price.argmax()
max_growth_per_year = total_max_growth ** (1.0 / (max_date.year - min_date.year))
max_growth_per_year
1.3966150915746656

求20次开方

当前增长倍数及复合增长率

计算上市时的收盘价与当前的收盘价比较,增长的倍数和年化复全增长率。

# 当前平均增长倍数
total_growth = adj_price.ix[0] / adj_price.ix[-1]
total_growth
180.205047318612
# 年复合增长倍数
old_date = adj_price.index[-1]
now_date = adj_price.index[0]
growth_per_year = total_growth ** (1.0 / (now_date.year - old_date.year))
growth_per_year
1.2533628673066715

平均年化增长率

计算每年的增长率,然后再求平均值。也可以计算每月的增长率,再求平均值,可以看到更短的一些周期变化。

price_in_years = adj_price.to_period(freq='A').groupby(level=0).first()
price_in_years
Date
1993    0.03573
1994    0.02459
1995    0.07254
1996    0.27879
1997    0.69135
1998    0.50219
1999    0.48011
2000    0.80252
2001    0.78662
2002    0.53786
2003    0.60910
2004    0.56913
2005    0.60712
2006    1.50079
2007    3.80700
2008    1.67358
2009    4.82062
2010    5.76779
2011    3.70347
2012    5.72073
2013    8.85739
2014    8.96458
2015    9.92000
2016    9.14000
Freq: A-DEC, Name: Adj Close, dtype: float64

取每组第一个数据

price_in_years.plot(figsize=(8,6))
<matplotlib.axes.AxesSubplot at 0x11160dfd0>

在这里插入图片描述

这里的关键信息:

计算年化收益率时,diff 应该要除以前一年的价格,即在前一年的价格的基础上上涨了多少,而不是在当前年的价格。

diff = price_in_years.diff()
rate_in_years = diff / (price_in_years - diff)
rate_in_years





    Date
    1993         NaN
    1994   -0.311783
    1995    1.949980
    1996    2.843259
    1997    1.479824
    1998   -0.273610
    1999   -0.043967
    2000    0.671534
    2001   -0.019813
    2002   -0.316239
    2003    0.132451
    2004   -0.065621
    2005    0.066751
    2006    1.471982
    2007    1.536664
    2008   -0.560394
    2009    1.880424
    2010    0.196483
    2011   -0.357905
    2012    0.544695
    2013    0.548297
    2014    0.012102
    2015    0.106577
    2016   -0.078629
    Freq: A-DEC, Name: Adj Close, dtype: float64

```python
rate_in_years.plot(kind='bar', figsize=(8,6))

在这里插入图片描述

时间事件日志

个人时间统计工具。要点:

  • 使用 dida365.com 来作为 GTD 工具
  • 使用特殊格式记录事件类别和花费的时间,如: “[探索发现] 体验 iMac 开发环境 [3h]
  • 导出数据
  • 分析数据

读取数据

分析并读取数据

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

以下代码是为了解决中文乱码

from matplotlib.pylab import mpl

mpl.rcParams['font.sans-serif'] = ['Arial Unicode MS'] # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题

header=3表示跳过头部3行,因为头部3行不是csv数据,_date_parser是创建的自己的数据解析函数

def _date_parser(dstr):
    return pd.Timestamp(dstr).date()

data = pd.read_csv('data/dida365.csv', header=3, index_col='Due Date', parse_dates=True, date_parser=_date_parser)
data.head()

数据清洗

  • 只关心己完成或己达成的事件,即 status != 0 的事件
  • 只需要 List NameTitle 字段
df = data[data['Status'] != 0].loc[:, ['List Name', 'Title']]
df.head()
List NameTitle
Due Date
2016-05-24自我成长[编程] javascript exercism [1h]
2016-05-23自我成长[编程] javascript exercism [0.5h]
2016-05-23自我成长[编程] clojure ring request [2h]
2016-05-22自我成长[编程] clojure ring 入门 [30m]
2016-05-22自我成长[探索发现] 体验 iMac 开发环境 [3h]
List NameTitleContentIs ChecklistReminderRepeatPriorityStatusCompleted TimeOrderTimezoneIs All Day
Due Date
2016-05-24自我成长[编程] javascript exercism [1h]NaNNNaNNaN022016-05-25T14:15:10+0000-235295488344064Asia/ShanghaiTrue
2016-05-23自我成长[编程] javascript exercism [0.5h]NaNNNaNNaN022016-05-24T15:59:08+0000-234195976716288Asia/ShanghaiTrue
2016-05-23自我成长[编程] clojure ring request [2h]阅读 ring.util.request 源码\rNNaNNaN022016-05-24T15:58:56+0000-233096465088512Asia/ShanghaiTrue
2016-05-22自我成长[编程] clojure ring 入门 [30m]NaNNNaNNaN022016-05-23T15:03:24+0000-231996953460736Asia/ShanghaiTrue
2016-05-22自我成长[探索发现] 体验 iMac 开发环境 [3h]iMac 的屏幕体验很棒,但使用非SSD硬盘速度上和mpb想着非常多。\rNNaNNaN022016-05-23T14:33:35+0000-230897441832960Asia/ShanghaiTrue

数据解析

解析事件类别和和花费的时间

import re

def parse_tag(value):
    m = re.match(r'^(\[(.*?)\])?.*$', value)
    if m and m.group(2):
        return m.group(2)
    else:
        return '其他'

def parse_duration(value):
    m = re.match(r'^.+?\[(.*?)([hm]?)\]$', value)
    if m:
        dur = 0
        try:
            dur = float(m.group(1))
        except e:
            print('parse duration error: \n%s' % e)
        if m.group(2) == 'm':
            dur = dur / 60.0
        return dur
    else:
        return 0
    
titles = df['Title']
df['Tag'] = titles.map(parse_tag)
df['Duration'] = titles.map(parse_duration)
df.head()
List NameTitleTagDuration
Due Date
2016-05-24自我成长[编程] javascript exercism [1h]编程1.0
2016-05-23自我成长[编程] javascript exercism [0.5h]编程0.5
2016-05-23自我成长[编程] clojure ring request [2h]编程2.0
2016-05-22自我成长[编程] clojure ring 入门 [30m]编程0.5
2016-05-22自我成长[探索发现] 体验 iMac 开发环境 [3h]探索发现3.0
df.count()
List Name    232
Title        232
Tag          232
Duration     232
dtype: int64

起始终止时间

start_date = df.index.min().date()
start_date
datetime.date(2015, 12, 2)
end_date = df.index.max().date()
end_date
datetime.date(2016, 5, 24)

数据分析

时间总览

平均每天投资在自己身上的时间是多少?-> 全部时间 / 总天数

end_date - start_date
datetime.timedelta(174)
df['Duration'].sum() 
482.19999999999999
df['Duration'].sum() / (end_date - start_date).days
2.7712643678160918
精力分配
tag_list = df.groupby(['Tag']).sum()
tag_list
Duration
Tag
写作49.0
探索发现54.5
机器学习33.5
电影50.8
编程243.4
阅读51.0
%%,是添加%标记
tag_list['Duration'].plot(kind='pie', figsize=(8, 8), fontsize=16, autopct='%1.2f%%')
<matplotlib.axes.AxesSubplot at 0x10e5b4f10>

在这里插入图片描述

专注力

长时间学习某项技能的能力

programming = df[df['Tag'] == '编程']
programming.head()
List NameTitleTagDuration
Due Date
2016-05-24自我成长[编程] javascript exercism [1h]编程1.0
2016-05-23自我成长[编程] javascript exercism [0.5h]编程0.5
2016-05-23自我成长[编程] clojure ring request [2h]编程2.0
2016-05-22自我成长[编程] clojure ring 入门 [30m]编程0.5
2016-05-22自我成长[编程] javascript exercism [0.5h]编程0.5
to_period是对重新采样的时期只显示月分
programming.resample('m', how='sum').to_period(freq='m').plot(kind='bar', figsize=(8, 8), fontsize=16)
<matplotlib.axes.AxesSubplot at 0x111352d50>

在这里插入图片描述

# 为什么不直接使用 df.pivot()? 因为有重复的行索引,如 2016-05-23
date_tags = df.reset_index().groupby(['Due Date', 'Tag']).sum()
date_tags
Duration
Due DateTag
2015-12-02写作3.0
2015-12-04阅读3.0
2015-12-06写作4.0
机器学习3.0
2015-12-07写作1.0
2015-12-08机器学习1.0
编程4.0
2015-12-09写作4.0
2015-12-10探索发现0.5
编程5.5
2015-12-11写作1.5
编程4.0
阅读4.0
2015-12-12写作2.0
机器学习1.5
2015-12-13编程6.0
2015-12-14阅读1.0
2015-12-15机器学习2.5
阅读1.0
2015-12-16探索发现1.0
机器学习1.5
编程3.0
阅读1.0
2015-12-17机器学习2.0
2015-12-18写作1.5
机器学习1.0
编程3.0
2015-12-19探索发现7.0
阅读0.5
2015-12-20写作1.0
.........
2016-04-24编程3.5
2016-04-25编程3.0
2016-04-26编程3.0
2016-04-29编程2.0
2016-04-30编程2.0
2016-05-01编程3.0
2016-05-02编程2.0
2016-05-03编程2.0
2016-05-04编程3.0
2016-05-05编程4.0
2016-05-06编程4.0
2016-05-07编程4.0
2016-05-08编程4.0
2016-05-09编程4.0
2016-05-10编程4.0
2016-05-11编程2.0
2016-05-12编程3.0
2016-05-13探索发现1.0
编程3.0
2016-05-14探索发现1.0
编程5.0
2016-05-15编程1.0
2016-05-17编程3.0
2016-05-18编程2.0
2016-05-19编程1.0
2016-05-20编程4.0
2016-05-22探索发现3.0
编程1.0
2016-05-23编程2.5
2016-05-24编程1.0

187 rows × 1 columns

# 以 tag 作为列索引
dates = date_tags.reset_index().pivot(index='Due Date', columns='Tag', values='Duration')
dates
Tag写作探索发现机器学习电影编程阅读
Due Date
2015-12-023.0NaNNaNNaNNaNNaN
2015-12-04NaNNaNNaNNaNNaN3.0
2015-12-064.0NaN3.0NaNNaNNaN
2015-12-071.0NaNNaNNaNNaNNaN
2015-12-08NaNNaN1.0NaN4.0NaN
2015-12-094.0NaNNaNNaNNaNNaN
2015-12-10NaN0.5NaNNaN5.5NaN
2015-12-111.5NaNNaNNaN4.04.0
2015-12-122.0NaN1.5NaNNaNNaN
2015-12-13NaNNaNNaNNaN6.0NaN
2015-12-14NaNNaNNaNNaNNaN1.0
2015-12-15NaNNaN2.5NaNNaN1.0
2015-12-16NaN1.01.5NaN3.01.0
2015-12-17NaNNaN2.0NaNNaNNaN
2015-12-181.5NaN1.0NaN3.0NaN
2015-12-19NaN7.0NaNNaNNaN0.5
2015-12-201.04.0NaNNaNNaNNaN
2015-12-21NaNNaNNaNNaNNaN0.5
2015-12-22NaN2.0NaNNaN8.0NaN
2015-12-23NaN1.0NaNNaNNaNNaN
2015-12-24NaNNaNNaNNaNNaN0.5
2015-12-252.0NaNNaNNaNNaN1.5
2015-12-26NaNNaNNaNNaN2.01.0
2015-12-29NaNNaNNaNNaNNaN2.0
2015-12-30NaNNaNNaNNaNNaN1.0
2016-01-01NaNNaNNaNNaNNaN5.0
2016-01-02NaNNaNNaNNaN2.02.0
2016-01-03NaNNaNNaNNaN3.5NaN
2016-01-04NaNNaNNaNNaN6.5NaN
2016-01-052.02.0NaNNaNNaNNaN
.....................
2016-04-21NaN2.0NaNNaN5.0NaN
2016-04-22NaNNaNNaNNaN6.02.0
2016-04-23NaNNaNNaNNaN3.0NaN
2016-04-24NaNNaNNaNNaN3.5NaN
2016-04-25NaNNaNNaNNaN3.0NaN
2016-04-26NaNNaNNaNNaN3.0NaN
2016-04-29NaNNaNNaNNaN2.0NaN
2016-04-30NaNNaNNaNNaN2.0NaN
2016-05-01NaNNaNNaNNaN3.0NaN
2016-05-02NaNNaNNaNNaN2.0NaN
2016-05-03NaNNaNNaNNaN2.0NaN
2016-05-04NaNNaNNaNNaN3.0NaN
2016-05-05NaNNaNNaNNaN4.0NaN
2016-05-06NaNNaNNaNNaN4.0NaN
2016-05-07NaNNaNNaNNaN4.0NaN
2016-05-08NaNNaNNaNNaN4.0NaN
2016-05-09NaNNaNNaNNaN4.0NaN
2016-05-10NaNNaNNaNNaN4.0NaN
2016-05-11NaNNaNNaNNaN2.0NaN
2016-05-12NaNNaNNaNNaN3.0NaN
2016-05-13NaN1.0NaNNaN3.0NaN
2016-05-14NaN1.0NaNNaN5.0NaN
2016-05-15NaNNaNNaNNaN1.0NaN
2016-05-17NaNNaNNaNNaN3.0NaN
2016-05-18NaNNaNNaNNaN2.0NaN
2016-05-19NaNNaNNaNNaN1.0NaN
2016-05-20NaNNaNNaNNaN4.0NaN
2016-05-22NaN3.0NaNNaN1.0NaN
2016-05-23NaNNaNNaNNaN2.5NaN
2016-05-24NaNNaNNaNNaN1.0NaN

133 rows × 6 columns

# 补足连续时间,可以看到哪些天没有在学习
full_dates = dates.reindex(pd.date_range(start_date, end_date)).fillna(0)
full_dates
Tag写作探索发现机器学习电影编程阅读
2015-12-023.00.00.000.00.0
2015-12-030.00.00.000.00.0
2015-12-040.00.00.000.03.0
2015-12-050.00.00.000.00.0
2015-12-064.00.03.000.00.0
2015-12-071.00.00.000.00.0
2015-12-080.00.01.004.00.0
2015-12-094.00.00.000.00.0
2015-12-100.00.50.005.50.0
2015-12-111.50.00.004.04.0
2015-12-122.00.01.500.00.0
2015-12-130.00.00.006.00.0
2015-12-140.00.00.000.01.0
2015-12-150.00.02.500.01.0
2015-12-160.01.01.503.01.0
2015-12-170.00.02.000.00.0
2015-12-181.50.01.003.00.0
2015-12-190.07.00.000.00.5
2015-12-201.04.00.000.00.0
2015-12-210.00.00.000.00.5
2015-12-220.02.00.008.00.0
2015-12-230.01.00.000.00.0
2015-12-240.00.00.000.00.5
2015-12-252.00.00.000.01.5
2015-12-260.00.00.002.01.0
2015-12-270.00.00.000.00.0
2015-12-280.00.00.000.00.0
2015-12-290.00.00.000.02.0
2015-12-300.00.00.000.01.0
2015-12-310.00.00.000.00.0
.....................
2016-04-250.00.00.003.00.0
2016-04-260.00.00.003.00.0
2016-04-270.00.00.000.00.0
2016-04-280.00.00.000.00.0
2016-04-290.00.00.002.00.0
2016-04-300.00.00.002.00.0
2016-05-010.00.00.003.00.0
2016-05-020.00.00.002.00.0
2016-05-030.00.00.002.00.0
2016-05-040.00.00.003.00.0
2016-05-050.00.00.004.00.0
2016-05-060.00.00.004.00.0
2016-05-070.00.00.004.00.0
2016-05-080.00.00.004.00.0
2016-05-090.00.00.004.00.0
2016-05-100.00.00.004.00.0
2016-05-110.00.00.002.00.0
2016-05-120.00.00.003.00.0
2016-05-130.01.00.003.00.0
2016-05-140.01.00.005.00.0
2016-05-150.00.00.001.00.0
2016-05-160.00.00.000.00.0
2016-05-170.00.00.003.00.0
2016-05-180.00.00.002.00.0
2016-05-190.00.00.001.00.0
2016-05-200.00.00.004.00.0
2016-05-210.00.00.000.00.0
2016-05-220.03.00.001.00.0
2016-05-230.00.00.002.50.0
2016-05-240.00.00.001.00.0

175 rows × 6 columns

# 画出柱状图
full_dates.plot(kind='bar', stacked=True, figsize=(16, 8))
<matplotlib.axes.AxesSubplot at 0x112dffdd0>

在这里插入图片描述

full_dates.resample('m', how='sum').to_period('m').plot(kind='bar', stacked=True, figsize=(8, 8))
<matplotlib.axes.AxesSubplot at 0x112e0c650>

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值