股票数据分析
具体详见 https://github.com/kamidox/stock-analysis
这里假设数据已经下载下来,并且保存在 yahoo-data 目录下。
分析波动幅度
%matplotlib inline
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
datadir = 'yahoo-data'
fname = '002001.csv'
data = pd.read_csv(os.path.join(datadir, fname), index_col='Date', parse_dates=True)
data
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2016-05-20 | 18.82000 | 19.46000 | 18.71000 | 19.34000 | 13265400 | 19.34000 |
2016-05-19 | 19.12000 | 19.52000 | 18.90000 | 18.98000 | 12581300 | 18.98000 |
2016-05-18 | 19.50000 | 20.10000 | 18.83000 | 19.23000 | 22042500 | 19.23000 |
2016-05-17 | 19.73000 | 20.23000 | 19.65000 | 19.77000 | 20469800 | 19.77000 |
2016-05-16 | 19.43000 | 19.64000 | 19.20000 | 19.62000 | 10963200 | 19.62000 |
2016-05-13 | 19.70000 | 19.94000 | 19.27000 | 19.40000 | 15655100 | 19.40000 |
2016-05-12 | 18.81000 | 19.95000 | 18.71000 | 19.88000 | 19814300 | 19.88000 |
2016-05-11 | 19.50000 | 19.85000 | 19.12000 | 19.28000 | 23742200 | 19.28000 |
2016-05-10 | 18.86000 | 19.17000 | 18.23000 | 19.07000 | 20858200 | 19.07000 |
2016-05-09 | 18.62000 | 18.85000 | 17.89000 | 18.67000 | 22525400 | 18.67000 |
2016-05-06 | 20.49000 | 20.49000 | 18.69000 | 18.70000 | 40962500 | 18.70000 |
2016-05-05 | 20.15000 | 20.57000 | 20.01000 | 20.49000 | 15216600 | 20.49000 |
2016-05-04 | 20.80000 | 21.10000 | 20.20000 | 20.42000 | 22555400 | 20.42000 |
2016-05-03 | 20.60000 | 20.99000 | 19.90000 | 20.99000 | 35319300 | 20.99000 |
2016-05-02 | 20.61000 | 20.61000 | 20.61000 | 20.61000 | 0 | 20.61000 |
2016-04-29 | 20.20000 | 20.77000 | 19.85000 | 20.61000 | 17845800 | 20.61000 |
2016-04-28 | 21.49000 | 21.50000 | 19.60000 | 20.40000 | 41130000 | 20.40000 |
2016-04-27 | 21.25000 | 22.08000 | 21.12000 | 21.44000 | 31398800 | 21.44000 |
2016-04-26 | 20.90000 | 21.27000 | 20.41000 | 21.13000 | 19271100 | 21.13000 |
2016-04-25 | 21.61000 | 21.61000 | 20.57000 | 20.92000 | 19571900 | 20.92000 |
2016-04-22 | 21.44000 | 21.92000 | 21.01000 | 21.71000 | 27411200 | 21.71000 |
2016-04-21 | 20.06000 | 21.97000 | 19.81000 | 21.42000 | 45631800 | 21.42000 |
2016-04-20 | 20.68000 | 21.30000 | 19.42000 | 19.97000 | 34444400 | 19.97000 |
2016-04-19 | 20.91000 | 21.23000 | 20.35000 | 20.55000 | 16482800 | 20.55000 |
2016-04-18 | 20.55000 | 21.20000 | 19.99000 | 20.91000 | 24520700 | 20.91000 |
2016-04-15 | 20.38000 | 20.73000 | 20.30000 | 20.71000 | 16271200 | 20.71000 |
2016-04-14 | 20.60000 | 20.83000 | 19.92000 | 20.40000 | 26698500 | 20.40000 |
2016-04-13 | 20.62000 | 21.17000 | 20.49000 | 20.50000 | 25012500 | 20.50000 |
2016-04-12 | 21.50000 | 21.50000 | 20.10000 | 20.49000 | 29331800 | 20.49000 |
2016-04-11 | 21.94000 | 22.30000 | 21.54000 | 21.57000 | 24064600 | 21.57000 |
... | ... | ... | ... | ... | ... | ... |
2004-08-05 | 17.90003 | 18.10002 | 17.52999 | 17.56000 | 8750200 | 0.98194 |
2004-08-04 | 17.86002 | 18.01999 | 17.49998 | 17.83001 | 10656000 | 0.99704 |
2004-08-03 | 17.58001 | 18.12002 | 17.58001 | 17.73998 | 12815400 | 0.99201 |
2004-08-02 | 17.44996 | 17.88003 | 17.21997 | 17.61002 | 10593500 | 0.98474 |
2004-07-30 | 17.47997 | 17.80000 | 17.20997 | 17.50999 | 12284500 | 0.97914 |
2004-07-29 | 17.15003 | 17.90003 | 16.99998 | 17.46997 | 15769400 | 0.97691 |
2004-07-28 | 16.32001 | 16.95997 | 16.13003 | 16.92996 | 5868400 | 0.94671 |
2004-07-27 | 16.90004 | 16.99998 | 15.99998 | 16.30000 | 7869100 | 0.91148 |
2004-07-26 | 17.19996 | 17.21997 | 16.55000 | 16.80000 | 7509400 | 0.93944 |
2004-07-23 | 17.01999 | 17.24998 | 16.62002 | 17.21997 | 8398200 | 0.96293 |
2004-07-22 | 17.71997 | 17.94996 | 16.91004 | 17.02999 | 8947200 | 0.95230 |
2004-07-21 | 17.93996 | 18.16004 | 17.59001 | 17.60002 | 9462400 | 0.98418 |
2004-07-20 | 17.99998 | 18.32001 | 17.74998 | 17.86002 | 16607900 | 0.99872 |
2004-07-19 | 17.39003 | 18.33001 | 17.23998 | 18.05000 | 23611100 | 1.00934 |
2004-07-16 | 16.51999 | 17.67004 | 16.45997 | 17.40003 | 20279700 | 0.97300 |
2004-07-15 | 16.81000 | 17.22997 | 16.50999 | 16.70997 | 9443200 | 0.93441 |
2004-07-14 | 16.69997 | 16.90004 | 16.15004 | 16.89003 | 9665300 | 0.94448 |
2004-07-13 | 16.12002 | 16.80000 | 15.80000 | 16.80000 | 10745200 | 0.93944 |
2004-07-12 | 17.35002 | 17.35002 | 16.20997 | 16.42996 | 17923100 | 0.91875 |
2004-07-09 | 17.63003 | 18.17996 | 17.31000 | 17.47997 | 10998800 | 0.97747 |
2004-07-08 | 17.17004 | 17.66004 | 17.10002 | 17.49998 | 7263400 | 0.97858 |
2004-07-07 | 17.80000 | 17.80000 | 17.13003 | 17.27999 | 13155000 | 0.96628 |
2004-07-06 | 17.80000 | 18.24998 | 17.51999 | 17.85002 | 17018000 | 0.99816 |
2004-07-05 | 17.49998 | 17.93996 | 17.12002 | 17.73998 | 17707600 | 0.99201 |
2004-07-02 | 18.76999 | 19.10002 | 16.99998 | 17.67996 | 37618100 | 0.98865 |
2004-07-01 | 18.90003 | 19.35002 | 18.65003 | 18.76999 | 32947700 | 1.04960 |
2004-06-30 | 18.25998 | 19.99998 | 18.25998 | 19.02999 | 54561100 | 1.06414 |
2004-06-29 | 20.27999 | 20.27999 | 20.27999 | 20.27999 | 3475700 | 1.13404 |
2004-06-28 | 22.52999 | 22.99998 | 22.52999 | 22.52999 | 7168200 | 1.25986 |
2004-06-25 | 28.10001 | 29.99997 | 23.99997 | 25.02999 | 177992600 | 1.39966 |
3057 rows × 6 columns
## 使用 resample 针对复权收盘价进行重采样
adj_price = data['Adj Close']
adj_price
Date
2016-05-20 19.34000
2016-05-19 18.98000
2016-05-18 19.23000
2016-05-17 19.77000
2016-05-16 19.62000
2016-05-13 19.40000
2016-05-12 19.88000
2016-05-11 19.28000
2016-05-10 19.07000
2016-05-09 18.67000
2016-05-06 18.70000
2016-05-05 20.49000
2016-05-04 20.42000
2016-05-03 20.99000
2016-05-02 20.61000
2016-04-29 20.61000
2016-04-28 20.40000
2016-04-27 21.44000
2016-04-26 21.13000
2016-04-25 20.92000
2016-04-22 21.71000
2016-04-21 21.42000
2016-04-20 19.97000
2016-04-19 20.55000
2016-04-18 20.91000
2016-04-15 20.71000
2016-04-14 20.40000
2016-04-13 20.50000
2016-04-12 20.49000
2016-04-11 21.57000
...
2004-08-05 0.98194
2004-08-04 0.99704
2004-08-03 0.99201
2004-08-02 0.98474
2004-07-30 0.97914
2004-07-29 0.97691
2004-07-28 0.94671
2004-07-27 0.91148
2004-07-26 0.93944
2004-07-23 0.96293
2004-07-22 0.95230
2004-07-21 0.98418
2004-07-20 0.99872
2004-07-19 1.00934
2004-07-16 0.97300
2004-07-15 0.93441
2004-07-14 0.94448
2004-07-13 0.93944
2004-07-12 0.91875
2004-07-09 0.97747
2004-07-08 0.97858
2004-07-07 0.96628
2004-07-06 0.99816
2004-07-05 0.99201
2004-07-02 0.98865
2004-07-01 1.04960
2004-06-30 1.06414
2004-06-29 1.13404
2004-06-28 1.25986
2004-06-25 1.39966
Name: Adj Close, dtype: float64
resampled = adj_price.resample('m', how='ohlc')
resampled
open | high | low | close | |
---|---|---|---|---|
Date | ||||
2004-06-30 | 1.39966 | 1.39966 | 1.06414 | 1.06414 |
2004-07-31 | 1.04960 | 1.04960 | 0.91148 | 0.97914 |
2004-08-31 | 0.98474 | 0.99704 | 0.77951 | 0.80244 |
2004-09-30 | 0.80244 | 0.96069 | 0.74876 | 0.91596 |
2004-10-31 | 0.91596 | 1.00263 | 0.81083 | 0.84270 |
2004-11-30 | 0.82201 | 0.89471 | 0.81362 | 0.82201 |
2004-12-31 | 0.81810 | 0.85389 | 0.74428 | 0.74428 |
2005-01-31 | 0.74428 | 0.76497 | 0.61008 | 0.61008 |
2005-02-28 | 0.61399 | 0.77784 | 0.61399 | 0.77784 |
2005-03-31 | 0.76497 | 0.78007 | 0.61455 | 0.62797 |
2005-04-30 | 0.65425 | 0.69172 | 0.61231 | 0.63580 |
2005-05-31 | 0.63580 | 0.82441 | 0.63245 | 0.78681 |
2005-06-30 | 0.78681 | 0.86418 | 0.68701 | 0.85695 |
2005-07-31 | 0.83164 | 0.93361 | 0.81935 | 0.93361 |
2005-08-31 | 0.93361 | 0.93361 | 0.73980 | 0.73980 |
2005-09-30 | 0.73907 | 0.78536 | 0.73040 | 0.75498 |
2005-10-31 | 0.75498 | 1.01207 | 0.75498 | 0.97953 |
2005-11-30 | 0.96543 | 1.06305 | 0.93939 | 1.06088 |
2005-12-31 | 1.03810 | 1.05763 | 0.98712 | 1.02725 |
2006-01-31 | 1.02725 | 1.13031 | 0.99037 | 1.12922 |
2006-02-28 | 1.12922 | 1.12922 | 0.96217 | 0.97627 |
2006-03-31 | 1.01098 | 1.09885 | 0.95458 | 1.08692 |
2006-04-30 | 1.11729 | 1.19973 | 1.06414 | 1.08475 |
2006-05-31 | 1.08475 | 1.37654 | 1.08475 | 1.28434 |
2006-06-30 | 1.34509 | 1.47356 | 1.26373 | 1.47356 |
2006-07-31 | 1.49225 | 1.69542 | 1.35914 | 1.35914 |
2006-08-31 | 1.36147 | 1.37782 | 1.19801 | 1.31477 |
2006-09-30 | 1.26573 | 1.35914 | 1.24237 | 1.35914 |
2006-10-31 | 1.35914 | 1.43620 | 1.24237 | 1.25639 |
2006-11-30 | 1.27974 | 1.31477 | 1.13028 | 1.20034 |
... | ... | ... | ... | ... |
2013-12-31 | 12.44789 | 12.96701 | 12.04900 | 12.21995 |
2014-01-31 | 12.21995 | 12.21995 | 10.82698 | 11.04228 |
2014-02-28 | 11.04228 | 12.32119 | 11.04228 | 11.38418 |
2014-03-31 | 11.42217 | 12.74544 | 10.50407 | 12.19459 |
2014-04-30 | 12.29593 | 14.13842 | 12.25794 | 12.40990 |
2014-05-31 | 12.40990 | 13.15656 | 12.28320 | 12.61372 |
2014-06-30 | 12.61372 | 12.68281 | 11.69582 | 12.22879 |
2014-07-31 | 12.53476 | 14.55809 | 12.23866 | 14.38043 |
2014-08-31 | 14.04485 | 15.11080 | 13.90667 | 14.21264 |
2014-09-30 | 14.38043 | 14.92327 | 14.38043 | 14.88379 |
2014-10-31 | 14.88379 | 16.08792 | 14.80483 | 14.80483 |
2014-11-30 | 14.94301 | 14.94301 | 14.12381 | 14.79496 |
2014-12-31 | 14.90353 | 16.12740 | 14.68639 | 14.97262 |
2015-01-31 | 14.97262 | 16.33466 | 14.88379 | 15.17989 |
2015-02-28 | 14.68639 | 15.14041 | 14.31134 | 15.05158 |
2015-03-31 | 15.45625 | 17.63749 | 15.18976 | 16.97621 |
2015-04-30 | 16.96634 | 20.60833 | 16.95647 | 19.05875 |
2015-05-31 | 19.05875 | 23.30281 | 17.84476 | 23.30281 |
2015-06-30 | 25.07939 | 28.02061 | 17.09000 | 17.09000 |
2015-07-31 | 17.09000 | 19.18000 | 15.48000 | 16.16000 |
2015-08-31 | 16.58000 | 19.02000 | 12.71000 | 15.23000 |
2015-09-30 | 14.98000 | 15.57000 | 12.65000 | 13.41000 |
2015-10-31 | 13.41000 | 15.30000 | 13.41000 | 15.22000 |
2015-11-30 | 14.74000 | 16.94000 | 14.62000 | 15.70000 |
2015-12-31 | 15.85000 | 18.68000 | 15.56000 | 17.44000 |
2016-01-31 | 17.44000 | 17.44000 | 13.10000 | 14.01000 |
2016-02-29 | 14.15000 | 19.28000 | 13.94000 | 19.00000 |
2016-03-31 | 20.20000 | 21.20000 | 17.53000 | 21.20000 |
2016-04-30 | 20.38000 | 21.78000 | 19.97000 | 20.61000 |
2016-05-31 | 20.61000 | 20.99000 | 18.67000 | 19.34000 |
144 rows × 4 columns
(resampled.high - resampled.low) / resampled.low
Date
2004-06-30 0.315297
2004-07-31 0.151534
2004-08-31 0.279060
2004-09-30 0.283041
2004-10-31 0.236548
2004-11-30 0.099666
2004-12-31 0.147270
2005-01-31 0.253885
2005-02-28 0.266861
2005-03-31 0.269335
2005-04-30 0.129689
2005-05-31 0.303518
2005-06-30 0.257886
2005-07-31 0.139452
2005-08-31 0.261976
2005-09-30 0.075246
2005-10-31 0.340526
2005-11-30 0.131639
2005-12-31 0.071430
2006-01-31 0.141301
2006-02-28 0.173618
2006-03-31 0.151135
2006-04-30 0.127417
2006-05-31 0.268993
2006-06-30 0.166040
2006-07-31 0.247421
2006-08-31 0.150091
2006-09-30 0.093990
2006-10-31 0.156016
2006-11-30 0.163225
...
2013-12-31 0.076190
2014-01-31 0.128657
2014-02-28 0.115819
2014-03-31 0.213381
2014-04-30 0.153409
2014-05-31 0.071102
2014-06-30 0.084388
2014-07-31 0.189517
2014-08-31 0.086587
2014-09-30 0.037749
2014-10-31 0.086667
2014-11-30 0.058001
2014-12-31 0.098119
2015-01-31 0.097480
2015-02-28 0.057931
2015-03-31 0.161143
2015-04-30 0.215367
2015-05-31 0.305863
2015-06-30 0.639591
2015-07-31 0.239018
2015-08-31 0.496459
2015-09-30 0.230830
2015-10-31 0.140940
2015-11-30 0.158687
2015-12-31 0.200514
2016-01-31 0.331298
2016-02-29 0.383070
2016-03-31 0.209355
2016-04-30 0.090636
2016-05-31 0.124264
Freq: M, dtype: float64
增长曲线
# 600690.ss 000951.sz 002001.sz
stockid = '600690.sz'
stockfile = '600690.csv'
ds = pd.read_csv(os.path.join('yahoo-data', stockfile), index_col='Date', parse_dates=True)
ds.head()
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2016-05-20 | 8.74 | 9.15 | 8.74 | 9.14 | 55390400 | 9.14 |
2016-05-19 | 8.84 | 9.05 | 8.81 | 8.84 | 34785900 | 8.84 |
2016-05-18 | 8.82 | 8.93 | 8.65 | 8.88 | 44254300 | 8.88 |
2016-05-17 | 9.08 | 9.08 | 8.82 | 8.83 | 42392200 | 8.83 |
2016-05-16 | 8.90 | 9.08 | 8.80 | 9.07 | 59749500 | 9.07 |
adj_price = ds['Adj Close']
adj_price.plot(figsize=(8, 6))
增长倍数
最大增长倍数及最大年化复合增长率
计算最低价和最高价之间的收盘价比较,以及增长的倍数和年化复全增长率,这个反应的是一个股票最好的情况下的投资收益情况。
# 最高增长倍数
total_max_growth = adj_price.max() / adj_price.min()
total_max_growth
1113.2977809591985
# 最大年均复合增长率
min_date = adj_price.argmin()
max_date = adj_price.argmax()
max_growth_per_year = total_max_growth ** (1.0 / (max_date.year - min_date.year))
max_growth_per_year
1.3966150915746656
求20次开方
当前增长倍数及复合增长率
计算上市时的收盘价与当前的收盘价比较,增长的倍数和年化复全增长率。
# 当前平均增长倍数
total_growth = adj_price.ix[0] / adj_price.ix[-1]
total_growth
180.205047318612
# 年复合增长倍数
old_date = adj_price.index[-1]
now_date = adj_price.index[0]
growth_per_year = total_growth ** (1.0 / (now_date.year - old_date.year))
growth_per_year
1.2533628673066715
平均年化增长率
计算每年的增长率,然后再求平均值。也可以计算每月的增长率,再求平均值,可以看到更短的一些周期变化。
price_in_years = adj_price.to_period(freq='A').groupby(level=0).first()
price_in_years
Date
1993 0.03573
1994 0.02459
1995 0.07254
1996 0.27879
1997 0.69135
1998 0.50219
1999 0.48011
2000 0.80252
2001 0.78662
2002 0.53786
2003 0.60910
2004 0.56913
2005 0.60712
2006 1.50079
2007 3.80700
2008 1.67358
2009 4.82062
2010 5.76779
2011 3.70347
2012 5.72073
2013 8.85739
2014 8.96458
2015 9.92000
2016 9.14000
Freq: A-DEC, Name: Adj Close, dtype: float64
取每组第一个数据
price_in_years.plot(figsize=(8,6))
<matplotlib.axes.AxesSubplot at 0x11160dfd0>
这里的关键信息:
计算年化收益率时,diff 应该要除以前一年的价格,即在前一年的价格的基础上上涨了多少,而不是在当前年的价格。
diff = price_in_years.diff()
rate_in_years = diff / (price_in_years - diff)
rate_in_years
Date
1993 NaN
1994 -0.311783
1995 1.949980
1996 2.843259
1997 1.479824
1998 -0.273610
1999 -0.043967
2000 0.671534
2001 -0.019813
2002 -0.316239
2003 0.132451
2004 -0.065621
2005 0.066751
2006 1.471982
2007 1.536664
2008 -0.560394
2009 1.880424
2010 0.196483
2011 -0.357905
2012 0.544695
2013 0.548297
2014 0.012102
2015 0.106577
2016 -0.078629
Freq: A-DEC, Name: Adj Close, dtype: float64
```python
rate_in_years.plot(kind='bar', figsize=(8,6))
时间事件日志
个人时间统计工具。要点:
- 使用 dida365.com 来作为 GTD 工具
- 使用特殊格式记录事件类别和花费的时间,如: “[探索发现] 体验 iMac 开发环境 [3h]”
- 导出数据
- 分析数据
读取数据
分析并读取数据
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
以下代码是为了解决中文乱码
from matplotlib.pylab import mpl
mpl.rcParams['font.sans-serif'] = ['Arial Unicode MS'] # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
header=3表示跳过头部3行,因为头部3行不是csv数据,_date_parser是创建的自己的数据解析函数
def _date_parser(dstr):
return pd.Timestamp(dstr).date()
data = pd.read_csv('data/dida365.csv', header=3, index_col='Due Date', parse_dates=True, date_parser=_date_parser)
data.head()
数据清洗
- 只关心己完成或己达成的事件,即
status != 0
的事件 - 只需要
List Name
和Title
字段
df = data[data['Status'] != 0].loc[:, ['List Name', 'Title']]
df.head()
List Name | Title | |
---|---|---|
Due Date | ||
2016-05-24 | 自我成长 | [编程] javascript exercism [1h] |
2016-05-23 | 自我成长 | [编程] javascript exercism [0.5h] |
2016-05-23 | 自我成长 | [编程] clojure ring request [2h] |
2016-05-22 | 自我成长 | [编程] clojure ring 入门 [30m] |
2016-05-22 | 自我成长 | [探索发现] 体验 iMac 开发环境 [3h] |
List Name | Title | Content | Is Checklist | Reminder | Repeat | Priority | Status | Completed Time | Order | Timezone | Is All Day | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Due Date | ||||||||||||
2016-05-24 | 自我成长 | [编程] javascript exercism [1h] | NaN | N | NaN | NaN | 0 | 2 | 2016-05-25T14:15:10+0000 | -235295488344064 | Asia/Shanghai | True |
2016-05-23 | 自我成长 | [编程] javascript exercism [0.5h] | NaN | N | NaN | NaN | 0 | 2 | 2016-05-24T15:59:08+0000 | -234195976716288 | Asia/Shanghai | True |
2016-05-23 | 自我成长 | [编程] clojure ring request [2h] | 阅读 ring.util.request 源码\r | N | NaN | NaN | 0 | 2 | 2016-05-24T15:58:56+0000 | -233096465088512 | Asia/Shanghai | True |
2016-05-22 | 自我成长 | [编程] clojure ring 入门 [30m] | NaN | N | NaN | NaN | 0 | 2 | 2016-05-23T15:03:24+0000 | -231996953460736 | Asia/Shanghai | True |
2016-05-22 | 自我成长 | [探索发现] 体验 iMac 开发环境 [3h] | iMac 的屏幕体验很棒,但使用非SSD硬盘速度上和mpb想着非常多。\r | N | NaN | NaN | 0 | 2 | 2016-05-23T14:33:35+0000 | -230897441832960 | Asia/Shanghai | True |
数据解析
解析事件类别和和花费的时间
import re
def parse_tag(value):
m = re.match(r'^(\[(.*?)\])?.*$', value)
if m and m.group(2):
return m.group(2)
else:
return '其他'
def parse_duration(value):
m = re.match(r'^.+?\[(.*?)([hm]?)\]$', value)
if m:
dur = 0
try:
dur = float(m.group(1))
except e:
print('parse duration error: \n%s' % e)
if m.group(2) == 'm':
dur = dur / 60.0
return dur
else:
return 0
titles = df['Title']
df['Tag'] = titles.map(parse_tag)
df['Duration'] = titles.map(parse_duration)
df.head()
List Name | Title | Tag | Duration | |
---|---|---|---|---|
Due Date | ||||
2016-05-24 | 自我成长 | [编程] javascript exercism [1h] | 编程 | 1.0 |
2016-05-23 | 自我成长 | [编程] javascript exercism [0.5h] | 编程 | 0.5 |
2016-05-23 | 自我成长 | [编程] clojure ring request [2h] | 编程 | 2.0 |
2016-05-22 | 自我成长 | [编程] clojure ring 入门 [30m] | 编程 | 0.5 |
2016-05-22 | 自我成长 | [探索发现] 体验 iMac 开发环境 [3h] | 探索发现 | 3.0 |
df.count()
List Name 232
Title 232
Tag 232
Duration 232
dtype: int64
起始终止时间
start_date = df.index.min().date()
start_date
datetime.date(2015, 12, 2)
end_date = df.index.max().date()
end_date
datetime.date(2016, 5, 24)
数据分析
时间总览
平均每天投资在自己身上的时间是多少?-> 全部时间 / 总天数
end_date - start_date
datetime.timedelta(174)
df['Duration'].sum()
482.19999999999999
df['Duration'].sum() / (end_date - start_date).days
2.7712643678160918
精力分配
tag_list = df.groupby(['Tag']).sum()
tag_list
Duration | |
---|---|
Tag | |
写作 | 49.0 |
探索发现 | 54.5 |
机器学习 | 33.5 |
电影 | 50.8 |
编程 | 243.4 |
阅读 | 51.0 |
tag_list['Duration'].plot(kind='pie', figsize=(8, 8), fontsize=16, autopct='%1.2f%%')
<matplotlib.axes.AxesSubplot at 0x10e5b4f10>
专注力
长时间学习某项技能的能力
programming = df[df['Tag'] == '编程']
programming.head()
List Name | Title | Tag | Duration | |
---|---|---|---|---|
Due Date | ||||
2016-05-24 | 自我成长 | [编程] javascript exercism [1h] | 编程 | 1.0 |
2016-05-23 | 自我成长 | [编程] javascript exercism [0.5h] | 编程 | 0.5 |
2016-05-23 | 自我成长 | [编程] clojure ring request [2h] | 编程 | 2.0 |
2016-05-22 | 自我成长 | [编程] clojure ring 入门 [30m] | 编程 | 0.5 |
2016-05-22 | 自我成长 | [编程] javascript exercism [0.5h] | 编程 | 0.5 |
programming.resample('m', how='sum').to_period(freq='m').plot(kind='bar', figsize=(8, 8), fontsize=16)
<matplotlib.axes.AxesSubplot at 0x111352d50>
# 为什么不直接使用 df.pivot()? 因为有重复的行索引,如 2016-05-23
date_tags = df.reset_index().groupby(['Due Date', 'Tag']).sum()
date_tags
Duration | ||
---|---|---|
Due Date | Tag | |
2015-12-02 | 写作 | 3.0 |
2015-12-04 | 阅读 | 3.0 |
2015-12-06 | 写作 | 4.0 |
机器学习 | 3.0 | |
2015-12-07 | 写作 | 1.0 |
2015-12-08 | 机器学习 | 1.0 |
编程 | 4.0 | |
2015-12-09 | 写作 | 4.0 |
2015-12-10 | 探索发现 | 0.5 |
编程 | 5.5 | |
2015-12-11 | 写作 | 1.5 |
编程 | 4.0 | |
阅读 | 4.0 | |
2015-12-12 | 写作 | 2.0 |
机器学习 | 1.5 | |
2015-12-13 | 编程 | 6.0 |
2015-12-14 | 阅读 | 1.0 |
2015-12-15 | 机器学习 | 2.5 |
阅读 | 1.0 | |
2015-12-16 | 探索发现 | 1.0 |
机器学习 | 1.5 | |
编程 | 3.0 | |
阅读 | 1.0 | |
2015-12-17 | 机器学习 | 2.0 |
2015-12-18 | 写作 | 1.5 |
机器学习 | 1.0 | |
编程 | 3.0 | |
2015-12-19 | 探索发现 | 7.0 |
阅读 | 0.5 | |
2015-12-20 | 写作 | 1.0 |
... | ... | ... |
2016-04-24 | 编程 | 3.5 |
2016-04-25 | 编程 | 3.0 |
2016-04-26 | 编程 | 3.0 |
2016-04-29 | 编程 | 2.0 |
2016-04-30 | 编程 | 2.0 |
2016-05-01 | 编程 | 3.0 |
2016-05-02 | 编程 | 2.0 |
2016-05-03 | 编程 | 2.0 |
2016-05-04 | 编程 | 3.0 |
2016-05-05 | 编程 | 4.0 |
2016-05-06 | 编程 | 4.0 |
2016-05-07 | 编程 | 4.0 |
2016-05-08 | 编程 | 4.0 |
2016-05-09 | 编程 | 4.0 |
2016-05-10 | 编程 | 4.0 |
2016-05-11 | 编程 | 2.0 |
2016-05-12 | 编程 | 3.0 |
2016-05-13 | 探索发现 | 1.0 |
编程 | 3.0 | |
2016-05-14 | 探索发现 | 1.0 |
编程 | 5.0 | |
2016-05-15 | 编程 | 1.0 |
2016-05-17 | 编程 | 3.0 |
2016-05-18 | 编程 | 2.0 |
2016-05-19 | 编程 | 1.0 |
2016-05-20 | 编程 | 4.0 |
2016-05-22 | 探索发现 | 3.0 |
编程 | 1.0 | |
2016-05-23 | 编程 | 2.5 |
2016-05-24 | 编程 | 1.0 |
187 rows × 1 columns
# 以 tag 作为列索引
dates = date_tags.reset_index().pivot(index='Due Date', columns='Tag', values='Duration')
dates
Tag | 写作 | 探索发现 | 机器学习 | 电影 | 编程 | 阅读 |
---|---|---|---|---|---|---|
Due Date | ||||||
2015-12-02 | 3.0 | NaN | NaN | NaN | NaN | NaN |
2015-12-04 | NaN | NaN | NaN | NaN | NaN | 3.0 |
2015-12-06 | 4.0 | NaN | 3.0 | NaN | NaN | NaN |
2015-12-07 | 1.0 | NaN | NaN | NaN | NaN | NaN |
2015-12-08 | NaN | NaN | 1.0 | NaN | 4.0 | NaN |
2015-12-09 | 4.0 | NaN | NaN | NaN | NaN | NaN |
2015-12-10 | NaN | 0.5 | NaN | NaN | 5.5 | NaN |
2015-12-11 | 1.5 | NaN | NaN | NaN | 4.0 | 4.0 |
2015-12-12 | 2.0 | NaN | 1.5 | NaN | NaN | NaN |
2015-12-13 | NaN | NaN | NaN | NaN | 6.0 | NaN |
2015-12-14 | NaN | NaN | NaN | NaN | NaN | 1.0 |
2015-12-15 | NaN | NaN | 2.5 | NaN | NaN | 1.0 |
2015-12-16 | NaN | 1.0 | 1.5 | NaN | 3.0 | 1.0 |
2015-12-17 | NaN | NaN | 2.0 | NaN | NaN | NaN |
2015-12-18 | 1.5 | NaN | 1.0 | NaN | 3.0 | NaN |
2015-12-19 | NaN | 7.0 | NaN | NaN | NaN | 0.5 |
2015-12-20 | 1.0 | 4.0 | NaN | NaN | NaN | NaN |
2015-12-21 | NaN | NaN | NaN | NaN | NaN | 0.5 |
2015-12-22 | NaN | 2.0 | NaN | NaN | 8.0 | NaN |
2015-12-23 | NaN | 1.0 | NaN | NaN | NaN | NaN |
2015-12-24 | NaN | NaN | NaN | NaN | NaN | 0.5 |
2015-12-25 | 2.0 | NaN | NaN | NaN | NaN | 1.5 |
2015-12-26 | NaN | NaN | NaN | NaN | 2.0 | 1.0 |
2015-12-29 | NaN | NaN | NaN | NaN | NaN | 2.0 |
2015-12-30 | NaN | NaN | NaN | NaN | NaN | 1.0 |
2016-01-01 | NaN | NaN | NaN | NaN | NaN | 5.0 |
2016-01-02 | NaN | NaN | NaN | NaN | 2.0 | 2.0 |
2016-01-03 | NaN | NaN | NaN | NaN | 3.5 | NaN |
2016-01-04 | NaN | NaN | NaN | NaN | 6.5 | NaN |
2016-01-05 | 2.0 | 2.0 | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... |
2016-04-21 | NaN | 2.0 | NaN | NaN | 5.0 | NaN |
2016-04-22 | NaN | NaN | NaN | NaN | 6.0 | 2.0 |
2016-04-23 | NaN | NaN | NaN | NaN | 3.0 | NaN |
2016-04-24 | NaN | NaN | NaN | NaN | 3.5 | NaN |
2016-04-25 | NaN | NaN | NaN | NaN | 3.0 | NaN |
2016-04-26 | NaN | NaN | NaN | NaN | 3.0 | NaN |
2016-04-29 | NaN | NaN | NaN | NaN | 2.0 | NaN |
2016-04-30 | NaN | NaN | NaN | NaN | 2.0 | NaN |
2016-05-01 | NaN | NaN | NaN | NaN | 3.0 | NaN |
2016-05-02 | NaN | NaN | NaN | NaN | 2.0 | NaN |
2016-05-03 | NaN | NaN | NaN | NaN | 2.0 | NaN |
2016-05-04 | NaN | NaN | NaN | NaN | 3.0 | NaN |
2016-05-05 | NaN | NaN | NaN | NaN | 4.0 | NaN |
2016-05-06 | NaN | NaN | NaN | NaN | 4.0 | NaN |
2016-05-07 | NaN | NaN | NaN | NaN | 4.0 | NaN |
2016-05-08 | NaN | NaN | NaN | NaN | 4.0 | NaN |
2016-05-09 | NaN | NaN | NaN | NaN | 4.0 | NaN |
2016-05-10 | NaN | NaN | NaN | NaN | 4.0 | NaN |
2016-05-11 | NaN | NaN | NaN | NaN | 2.0 | NaN |
2016-05-12 | NaN | NaN | NaN | NaN | 3.0 | NaN |
2016-05-13 | NaN | 1.0 | NaN | NaN | 3.0 | NaN |
2016-05-14 | NaN | 1.0 | NaN | NaN | 5.0 | NaN |
2016-05-15 | NaN | NaN | NaN | NaN | 1.0 | NaN |
2016-05-17 | NaN | NaN | NaN | NaN | 3.0 | NaN |
2016-05-18 | NaN | NaN | NaN | NaN | 2.0 | NaN |
2016-05-19 | NaN | NaN | NaN | NaN | 1.0 | NaN |
2016-05-20 | NaN | NaN | NaN | NaN | 4.0 | NaN |
2016-05-22 | NaN | 3.0 | NaN | NaN | 1.0 | NaN |
2016-05-23 | NaN | NaN | NaN | NaN | 2.5 | NaN |
2016-05-24 | NaN | NaN | NaN | NaN | 1.0 | NaN |
133 rows × 6 columns
# 补足连续时间,可以看到哪些天没有在学习
full_dates = dates.reindex(pd.date_range(start_date, end_date)).fillna(0)
full_dates
Tag | 写作 | 探索发现 | 机器学习 | 电影 | 编程 | 阅读 |
---|---|---|---|---|---|---|
2015-12-02 | 3.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2015-12-03 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2015-12-04 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 3.0 |
2015-12-05 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2015-12-06 | 4.0 | 0.0 | 3.0 | 0 | 0.0 | 0.0 |
2015-12-07 | 1.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2015-12-08 | 0.0 | 0.0 | 1.0 | 0 | 4.0 | 0.0 |
2015-12-09 | 4.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2015-12-10 | 0.0 | 0.5 | 0.0 | 0 | 5.5 | 0.0 |
2015-12-11 | 1.5 | 0.0 | 0.0 | 0 | 4.0 | 4.0 |
2015-12-12 | 2.0 | 0.0 | 1.5 | 0 | 0.0 | 0.0 |
2015-12-13 | 0.0 | 0.0 | 0.0 | 0 | 6.0 | 0.0 |
2015-12-14 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 1.0 |
2015-12-15 | 0.0 | 0.0 | 2.5 | 0 | 0.0 | 1.0 |
2015-12-16 | 0.0 | 1.0 | 1.5 | 0 | 3.0 | 1.0 |
2015-12-17 | 0.0 | 0.0 | 2.0 | 0 | 0.0 | 0.0 |
2015-12-18 | 1.5 | 0.0 | 1.0 | 0 | 3.0 | 0.0 |
2015-12-19 | 0.0 | 7.0 | 0.0 | 0 | 0.0 | 0.5 |
2015-12-20 | 1.0 | 4.0 | 0.0 | 0 | 0.0 | 0.0 |
2015-12-21 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.5 |
2015-12-22 | 0.0 | 2.0 | 0.0 | 0 | 8.0 | 0.0 |
2015-12-23 | 0.0 | 1.0 | 0.0 | 0 | 0.0 | 0.0 |
2015-12-24 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.5 |
2015-12-25 | 2.0 | 0.0 | 0.0 | 0 | 0.0 | 1.5 |
2015-12-26 | 0.0 | 0.0 | 0.0 | 0 | 2.0 | 1.0 |
2015-12-27 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2015-12-28 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2015-12-29 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 2.0 |
2015-12-30 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 1.0 |
2015-12-31 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... |
2016-04-25 | 0.0 | 0.0 | 0.0 | 0 | 3.0 | 0.0 |
2016-04-26 | 0.0 | 0.0 | 0.0 | 0 | 3.0 | 0.0 |
2016-04-27 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2016-04-28 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2016-04-29 | 0.0 | 0.0 | 0.0 | 0 | 2.0 | 0.0 |
2016-04-30 | 0.0 | 0.0 | 0.0 | 0 | 2.0 | 0.0 |
2016-05-01 | 0.0 | 0.0 | 0.0 | 0 | 3.0 | 0.0 |
2016-05-02 | 0.0 | 0.0 | 0.0 | 0 | 2.0 | 0.0 |
2016-05-03 | 0.0 | 0.0 | 0.0 | 0 | 2.0 | 0.0 |
2016-05-04 | 0.0 | 0.0 | 0.0 | 0 | 3.0 | 0.0 |
2016-05-05 | 0.0 | 0.0 | 0.0 | 0 | 4.0 | 0.0 |
2016-05-06 | 0.0 | 0.0 | 0.0 | 0 | 4.0 | 0.0 |
2016-05-07 | 0.0 | 0.0 | 0.0 | 0 | 4.0 | 0.0 |
2016-05-08 | 0.0 | 0.0 | 0.0 | 0 | 4.0 | 0.0 |
2016-05-09 | 0.0 | 0.0 | 0.0 | 0 | 4.0 | 0.0 |
2016-05-10 | 0.0 | 0.0 | 0.0 | 0 | 4.0 | 0.0 |
2016-05-11 | 0.0 | 0.0 | 0.0 | 0 | 2.0 | 0.0 |
2016-05-12 | 0.0 | 0.0 | 0.0 | 0 | 3.0 | 0.0 |
2016-05-13 | 0.0 | 1.0 | 0.0 | 0 | 3.0 | 0.0 |
2016-05-14 | 0.0 | 1.0 | 0.0 | 0 | 5.0 | 0.0 |
2016-05-15 | 0.0 | 0.0 | 0.0 | 0 | 1.0 | 0.0 |
2016-05-16 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2016-05-17 | 0.0 | 0.0 | 0.0 | 0 | 3.0 | 0.0 |
2016-05-18 | 0.0 | 0.0 | 0.0 | 0 | 2.0 | 0.0 |
2016-05-19 | 0.0 | 0.0 | 0.0 | 0 | 1.0 | 0.0 |
2016-05-20 | 0.0 | 0.0 | 0.0 | 0 | 4.0 | 0.0 |
2016-05-21 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 |
2016-05-22 | 0.0 | 3.0 | 0.0 | 0 | 1.0 | 0.0 |
2016-05-23 | 0.0 | 0.0 | 0.0 | 0 | 2.5 | 0.0 |
2016-05-24 | 0.0 | 0.0 | 0.0 | 0 | 1.0 | 0.0 |
175 rows × 6 columns
# 画出柱状图
full_dates.plot(kind='bar', stacked=True, figsize=(16, 8))
<matplotlib.axes.AxesSubplot at 0x112dffdd0>
full_dates.resample('m', how='sum').to_period('m').plot(kind='bar', stacked=True, figsize=(8, 8))
<matplotlib.axes.AxesSubplot at 0x112e0c650>