获取数据
通过pandas_datareader来获取沪深300的股票数据,为此,先从网上爬虫得到沪深300的股票名单
import os
import pandas as pd
import pandas_datareader as web
import pickle
import requests
import bs4 as bs
import matplotlib.pyplot as plt
import numpy as np
# import tushare
def save_hs300_tickers():
# resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
resp = requests.get(
'https://zh.wikipedia.org/wiki/%E6%B2%AA%E6%B7%B1300#%E6%88%90%E4%BB%BD%E8%82%A1%E5%88%97%E8%A1%A8')
soup =bs.BeautifulSoup(resp.text, "lxml")
table = soup.find('table', {
'class':"wikitable collapsible sortable"})
tickers = []
for row in table.find_all('tr')[1:]:
ticker = row.find('td').text
if ticker[0] == "6" :
ticker = ticker + ".SS"
else :
ticker = ticker + ".SZ"
tickers.append(ticker)
with open("hs300tickers.pickle","wb") as f:
pickle.dump(tickers,f)
print(tickers)
return tickers
之后下载数据
def get_data_from_yahoo(reload_hs300=False):
if reload_hs300:
tickers = save_hs300_tickers()
else:
with open("hs300tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start_date = dt.datetime(2010, 1, 1)
end_date = dt.datetime(