python数据分析——数据筛选实例【茅台】

import tushare as ts
import pandas as pd from pandas import DataFrame,Series 
In [3]:
df = ts.get_k_data('600519',start='1999-01-01') 
In [5]:
#将df存储到本地
df.to_csv('./maotai.csv') 
In [9]:
#将date这列数据类型转成事件序列,然后将改列作为原数据的行索引
df = pd.read_csv('./maotai.csv',index_col='date',parse_dates=['date']) df.drop(labels='Unnamed: 0',axis=1,inplace=True) df.head(5) 
Out[9]:
 openclosehighlowvolumecode
date      
2001-08-275.3925.5545.9025.132406318.00600519
2001-08-285.4675.7595.7815.407129647.79600519
2001-08-295.7775.6845.7815.64053252.75600519
2001-08-305.6685.7965.8605.62448013.06600519
2001-08-315.8045.7825.8775.74923231.48600519
In [26]:
#输出该股票所有收盘比开盘上涨3%以上的日期。
s = (df['close'] - df['open'])/df['open'] > 0.03 df.loc[s].index 
Out[26]:
DatetimeIndex(['2001-08-27', '2001-08-28', '2001-09-10', '2001-12-21',
               '2002-01-18', '2002-01-31', '2003-01-14', '2003-10-29',
               '2004-01-05', '2004-01-14',
               ...
               '2018-08-27', '2018-09-18', '2018-09-26', '2018-10-19',
               '2018-10-31', '2018-11-13', '2018-12-28', '2019-01-15',
               '2019-02-11', '2019-03-01'],
              dtype='datetime64[ns]', name='date', length=294, freq=None)
In [20]:
#输出该股票所有开盘比前日收盘跌幅超过2%的日期。
s1 = (df['open'] - df['close'].shift(1))/df['close'].shift(1) < -0.02 s1 
Out[20]:
date
2001-08-27    False
2001-08-28    False
2001-08-29    False
2001-08-30    False
2001-08-31    False
2001-09-03    False
2001-09-04    False
2001-09-05    False
2001-09-06    False
2001-09-07    False
2001-09-10    False
2001-09-11    False
2001-09-12     True
2001-09-13    False
2001-09-14    False
2001-09-17    False
2001-09-18    False
2001-09-19    False
2001-09-20    False
2001-09-21    False
2001-09-24    False
2001-09-25    False
2001-09-26    False
2001-09-27    False
2001-09-28    False
2001-10-08    False
2001-10-09    False
2001-10-10    False
2001-10-11    False
2001-10-12    False
              ...  
2019-01-16    False
2019-01-17    False
2019-01-18    False
2019-01-21    False
2019-01-22    False
2019-01-23    False
2019-01-24    False
2019-01-25    False
2019-01-28    False
2019-01-29    False
2019-01-30    False
2019-01-31    False
2019-02-01    False
2019-02-11    False
2019-02-12    False
2019-02-13    False
2019-02-14    False
2019-02-15    False
2019-02-18    False
2019-02-19    False
2019-02-20    False
2019-02-21    False
2019-02-22    False
2019-02-25    False
2019-02-26    False
2019-02-27    False
2019-02-28    False
2019-03-01    False
2019-03-04    False
2019-03-05    False
Length: 4174, dtype: bool
In [23]:
df.loc[s1].index 
Out[23]:
DatetimeIndex(['2001-09-12', '2002-06-26', '2002-12-13', '2004-07-01',
               '2004-10-29', '2006-08-21', '2006-08-23', '2007-01-25',
               '2007-02-01', '2007-02-06', '2007-03-19', '2007-05-21',
               '2007-05-30', '2007-06-05', '2007-07-27', '2007-09-05',
               '2007-09-10', '2008-03-13', '2008-03-17', '2008-03-25',
               '2008-03-27', '2008-04-22', '2008-04-23', '2008-04-29',
               '2008-05-13', '2008-06-10', '2008-06-13', '2008-06-24',
               '2008-06-27', '2008-08-11', '2008-08-19', '2008-09-23',
               '2008-10-10', '2008-10-15', '2008-10-16', '2008-10-20',
               '2008-10-23', '2008-10-27', '2008-11-06', '2008-11-12',
               '2008-11-20', '2008-11-21', '2008-12-02', '2009-02-27',
               '2009-03-25', '2009-08-13', '2010-04-26', '2010-04-30',
               '2011-08-05', '2012-03-27', '2012-08-10', '2012-11-22',
               '2012-12-04', '2012-12-24', '2013-01-16', '2013-01-25',
               '2013-09-02', '2014-04-25', '2015-01-19', '2015-05-25',
               '2015-07-03', '2015-07-08', '2015-07-13', '2015-08-24',
               '2015-09-02', '2015-09-15', '2017-11-17', '2018-02-06',
               '2018-02-09', '2018-03-23', '2018-03-28', '2018-07-11',
               '2018-10-11', '2018-10-24', '2018-10-25', '2018-10-29',
               '2018-10-30'],
              dtype='datetime64[ns]', name='date', freq=None)
In [33]:
#假如我从2010年1月1日开始,每月第一个交易日买入1手股票,每年最后一个交易日卖出所有股票,到今天为止,我的收益如何?
data = df['2010':'2019'] data.head() 
Out[33]:
 openclosehighlowvolumecode
date      
2010-01-04109.760108.446109.760108.04444304.88600519
2010-01-05109.116108.127109.441107.84631513.18600519
2010-01-06107.840106.417108.165106.12939889.03600519
2010-01-07106.417104.477106.691103.30248825.55600519
2010-01-08104.655103.379104.655102.16736702.09600519
In [35]:
#对数据进行重新取样
monthes = data.resample('M').first() 
Out[35]:
 openclosehighlowvolumecode
date      
2010-01-31109.760108.446109.760108.04444304.88600519
2010-02-28107.769107.776108.216106.57629655.94600519
2010-03-31106.219106.085106.857105.92521734.74600519
2010-04-30101.324102.141102.422101.31123980.83600519
2010-05-3181.67682.09182.67880.97423975.16600519
2010-06-3084.07584.63785.16683.27823525.57600519
2010-07-3181.58681.05781.58680.7257449.69600519
2010-08-3189.29692.46593.56789.29642965.73600519
2010-09-30102.288101.052103.834100.42025589.00600519
2010-10-31108.858111.776113.045108.85831608.00600519
2010-11-30105.122105.483106.217104.47849658.00600519
2010-12-31130.759130.372133.980128.83938016.00600519
2011-01-31120.388119.487121.033117.61960462.00600519
2011-02-28114.656116.124117.136114.33417758.00600519
2011-03-31115.113115.068115.899114.52723059.00600519
2011-04-30115.931115.596116.813115.01018227.00600519
2011-05-31117.876119.718120.324117.23241270.00600519
2011-06-30131.467135.197135.255130.77239093.00600519
2011-07-31137.895137.272137.895136.03315810.00600519
2011-08-31148.519145.689148.519145.27418572.00600519
2011-09-30153.798151.491154.357150.78925689.00600519
2011-10-31136.735134.672137.394134.40714283.00600519
2011-11-30145.546148.418148.640145.06627147.00600519
2011-12-31152.724152.466154.149150.62533785.00600519
2012-01-31137.179132.716138.089132.52333878.00600519
2012-02-29133.382133.347135.044132.43724824.00600519
2012-03-31145.789145.116147.186144.98016920.00600519
2012-04-30140.474147.566147.852140.47440960.00600519
2012-05-31161.893161.878162.301159.02735339.00600519
2012-06-30170.489172.187174.071169.56538504.00600519
.....................
2016-10-31289.945294.268295.142289.94521103.00600519
2016-11-30308.828307.594309.847306.49619512.00600519
2016-12-31310.819311.810315.277309.53727941.00600519
2017-01-31324.689324.961327.331323.26120763.00600519
2017-02-28336.073336.898339.162335.10220936.00600519
2017-03-31344.912346.378347.923344.81521944.00600519
2017-04-30374.595378.480383.657373.95440789.00600519
2017-05-31399.791400.257403.093397.44023291.00600519
2017-06-30429.804436.390437.040428.35740604.00600519
2017-07-31460.595447.161460.595444.76249628.00600519
2017-08-31474.266473.517476.238470.71627501.00600519
2017-09-30483.337488.248489.332483.17031451.00600519
2017-10-31517.196520.381528.052512.81934891.00600519
2017-11-30612.188614.288622.698610.55143435.00600519
2017-12-31629.078613.637630.044611.44847192.00600519
2018-01-31690.200693.996700.218680.23249612.00600519
2018-02-28756.262747.122756.558742.37950582.00600519
2018-03-31717.808731.582736.394713.63744794.00600519
2018-04-30670.480670.539681.326664.67332039.00600519
2018-05-31650.760658.480659.624636.02970259.00600519
2018-06-30740.614734.679744.410728.41736177.00600519
2018-07-31734.520711.550739.330703.00037558.00600519
2018-08-31731.400714.940732.300714.11025237.00600519
2018-09-30652.000666.210667.670650.80030179.00600519
2018-10-31715.410686.150719.000686.15082745.00600519
2018-11-30555.000563.000585.500551.25098106.00600519
2018-12-31589.000601.200605.000584.77083414.00600519
2019-01-31609.980598.980612.000595.01062286.00600519
2019-02-28697.040692.670699.000689.61030520.00600519
2019-03-31761.500789.300790.000761.00063840.00600519

111 rows × 6 columns

In [36]:
years = data.resample('Y').last() 
Out[36]:
 openclosehighlowvolumecode
date      
2010-12-31117.103118.469118.701116.62046084.0600519
2011-12-31138.039138.468139.600136.10529460.0600519
2012-12-31155.208152.087156.292150.14451914.0600519
2013-12-3193.18896.48097.17992.06157546.0600519
2014-12-31157.642161.056161.379157.13246269.0600519
2015-12-31207.487207.458208.704207.10619673.0600519
2016-12-31317.239324.563325.670317.23934687.0600519
2017-12-31707.948687.725716.329681.91876038.0600519
2018-12-31563.300590.010596.400560.00063678.0600519
2019-12-31785.000779.780789.550775.88044830.0600519 

转载于:https://www.cnblogs.com/bilx/p/11611899.html

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值