import numpy as np
import pandas as pd
from urllib2 import urlopen
from bs4 import BeautifulSoup
def get_year_range(code):
url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/%s.phtml' % code
content = urlopen(url).read()
soup = BeautifulSoup(content)
str1 = soup.findAll('select', attrs={'name':'year'})
optionSoup = str1[0]
optionTags = optionSoup.findAll('option')
yearlist = []
for i in range(0, len(optionTags)):
yearlist.append(optionTags[i].string)
return (yearlist)
def get_data(code):
yearlist = get_year_range(code)
df = pd.DataFrame()
for year in range(0,len(yearlist)):
for season in range(1,5):
try:
jidu = str(season)
codestr = str(code)
url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/'+codestr+'.phtml?year='+yearlist[year]+'&jidu='+jidu
rsp = urlopen(url)
html = rsp.read()
soup = BeautifulSoup(html, from_encoding = 'GB2312')
tablesoup = soup.find_all('table', attrs = {'id':'FundHoldSharesTable'})
date = []
openprice = []
closeprice = []
highprice = []
lowprice = []
sharevolume = []
dollarvolume = []
rows = tablesoup[0].findAll('tr')
#colume = rows[1].findAll('td')
for row in rows[2:]:
data = row.findAll('td')
date.append(data[0].get_text(strip = True))
openprice.append(data[1].get_text(strip = True))
highprice.append(data[2].get_text(strip = True))
closeprice.append(data[3].get_text(strip = True))
lowprice.append(data[4].get_text(strip = True))
sharevolume.append(data[5].get_text(strip = True))
dollarvolume.append(data[6].get_text(strip = True))
data = np.array([date, openprice, closeprice, highprice, lowprice, sharevolume, dollarvolume])
data = pd.DataFrame(data).T
data.columns = ['Date', 'Open', 'Close', 'High', 'Low', 'ShareVolume', 'DollarVolume']
df = df.append(data)
#print('Finish '+ yearlist[year] + ' Season '+ jidu)
except:
#print('Error!')
continue
print('Finish All: '+ code + '!')
filedestiny = './data/price_'+str(code)+'.csv'
df.to_csv(filedestiny, encoding='utf-8', index=False)
ratingdata = pd.read_csv('stocklist.csv')
stockcode = list(np.unique(ratingdata.Code).dropna())
for i in xrange(len(stockcode)):
code = str(int(stockcode[i]))
stockcode[i] = '0'*(6-len(code))+code
stockcodesub = stockcode[1932:]
for code in stockcodesub:
get_data(code)
Download Chinese Stock Data from Sina Finance
最新推荐文章于 2023-11-09 10:56:45 发布