Download Chinese Stock Data from Sina Finance

import numpy as np
import pandas as pd
from urllib2 import urlopen
from bs4 import BeautifulSoup 

def get_year_range(code):
    url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/%s.phtml' % code
    content = urlopen(url).read()
    soup = BeautifulSoup(content)
    str1 = soup.findAll('select', attrs={'name':'year'})    
    optionSoup = str1[0]
    optionTags = optionSoup.findAll('option')
    yearlist = []
    for i in range(0, len(optionTags)):
        yearlist.append(optionTags[i].string)
    return (yearlist)
       
def get_data(code):
    yearlist = get_year_range(code)
    df = pd.DataFrame()
    for year in range(0,len(yearlist)):
        for season in range(1,5):
            try:
                jidu = str(season)
                codestr = str(code)
                url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/'+codestr+'.phtml?year='+yearlist[year]+'&jidu='+jidu
                rsp = urlopen(url)
                html = rsp.read()
                soup = BeautifulSoup(html, from_encoding = 'GB2312')
                tablesoup = soup.find_all('table', attrs = {'id':'FundHoldSharesTable'})
               
                date = []
                openprice = []
                closeprice = []
                highprice = []
                lowprice = []
                sharevolume = []
                dollarvolume = []
                
                rows = tablesoup[0].findAll('tr')
                #colume = rows[1].findAll('td')
               
                for row in rows[2:]:
                    data = row.findAll('td')
                    date.append(data[0].get_text(strip = True))
                    openprice.append(data[1].get_text(strip = True))
                    highprice.append(data[2].get_text(strip = True))
                    closeprice.append(data[3].get_text(strip = True))
                    lowprice.append(data[4].get_text(strip = True))
                    sharevolume.append(data[5].get_text(strip = True))
                    dollarvolume.append(data[6].get_text(strip = True))
                data = np.array([date, openprice, closeprice, highprice, lowprice, sharevolume, dollarvolume]) 
                data = pd.DataFrame(data).T 
                data.columns = ['Date', 'Open', 'Close', 'High', 'Low', 'ShareVolume', 'DollarVolume']  
                
                df = df.append(data)
                #print('Finish '+ yearlist[year] + ' Season '+ jidu)
            except:
                #print('Error!')
                continue  
    print('Finish All: '+ code + '!')
    filedestiny = './data/price_'+str(code)+'.csv'
    df.to_csv(filedestiny, encoding='utf-8', index=False)

ratingdata = pd.read_csv('stocklist.csv')
stockcode = list(np.unique(ratingdata.Code).dropna())
for i in xrange(len(stockcode)):
    code = str(int(stockcode[i]))
    stockcode[i] = '0'*(6-len(code))+code
    
stockcodesub = stockcode[1932:]

for code in stockcodesub:
    get_data(code)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值