爬取新浪股票财务数据

# coding=utf-8
import HTMLParser

import urllib2
import sys

type = sys.getfilesystemencoding()
# 截止日期
# 每股净资产
# 每股收益
# 每股现金含量
# 每股资本公积金
# 固定资产合计
# 流动资产合计
# 资产总计
# 长期负债合计
# 主营业务收入
# 财务费用
# 净利润
class Stock:
    def __init__(self,line):
        # 20011231,每股净资产,1.5727,每股收益,0.3438,每股现金含量,11,每股资本公积金,0.5289,
        # 固定资产合计,11,流动资产合计,11,资产总计,11,长期负债合计,16,主营业务收入,11,11,净利润,11
        arr = line.split(",")
        self.day = arr[0].replace("-","") if arr[0]!='-' else '0'
        self.mgzjc = arr[2] if arr[2]!='-' else '0'
        self.mgsy = arr[4] if arr[4]!='-' else '0'
        self.mgxjhl = arr[6] if arr[6]!='-' else '0'
        self.mgjbgjj = arr[8] if arr[8]!='-' else '0'
        self.gdzchj = arr[10] if arr[10]!='-' else '0'
        self.ldzchj = arr[12] if arr[12]!='-' else '0'
        self.zchj = arr[14] if arr[14]!='-' else '0'
        self.cqfzhj = arr[16] if arr[16]!='-' else '0'
        self.zyywsr = arr[18] if arr[18]!='-' else '0'
        self.cwfy = arr[19] if arr[19]!='-' else '0'
        self.jlr = arr[21] if arr[21]!='-' else '0'


    def __repr__(self):
        return """day:%s,mgzjc:%s,mgsy:%s,mgxjhl:%s,mgjbgjj:%s,gdzchj:%s,ldzchj:%s,zchj:%s,
        cqfzhj:%s,zyywsr:%s,cwfy:%s,jlr:%s"""%(self.day,self.mgzjc,self.mgsy,self.mgxjhl,
                                               self.mgjbgjj,self.gdzchj,self.ldzchj,self.zchj,
                                               self.cqfzhj,self.zyywsr,self.cwfy,self.jlr)

class stock_parser(HTMLParser.HTMLParser):
    def __init__(self):
        HTMLParser.HTMLParser.__init__(self)
        self.handledtags = ['td']
        self.processing = None
        self.data = []

    def handle_starttag(self,tag,attrs):
        if tag in self.handledtags and len(attrs)>0 and attrs[0][0]=='align':
            self.processing = tag

    def handle_data(self,data):
        if self.processing:
            self.data.append(data)

    def handle_endtag(self,tag):
        if tag == self.processing:
            self.processing = None

def parse_data(urldata):
    tp = stock_parser()
    tp.feed(urldata)
    data = tp.data
    i = 0
    arr = []
    stocks = []
    for row in data:
        arr.append(row.replace(",","").replace("元","").replace("\r\n",""))
        i += 1
        if i%22 ==0 and i>0:
            line = ",".join(arr)
            stock = Stock(line)
            stocks.append(stock)
            arr = []
    return stocks


def get_stock(stock_code):
    headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
    url="http://vip.stock.finance.sina.com.cn/corp/go.php/vFD_FinanceSummary/stockid/%(stock_code)s.phtml?qq-pf-to=pcqq.c2c"%({'stock_code':stock_code})
    req = urllib2.Request(url=url,headers=headers)
    data = urllib2.urlopen(req).read()
    data = unicode(data,'GBK').encode('UTF-8').replace(" ", "-")
    stocks = parse_data(data)
    return stocks

if __name__ == '__main__':

    stocks = get_stock("002122")
    for stock in stocks:
        print stock

打印结果

day:20180331,mgzjc:3.9257,mgsy:-0.0198,mgxjhl:-0.2345,mgjbgjj:0.866,gdzchj:0,ldzchj:4102410000.00,zchj:8769580000.00,
        cqfzhj:2287790000.00,zyywsr:407660000.00,cwfy:63448800.00,jlr:-23579600.00
day:20171231,mgzjc:3.9596,mgsy:0.1076,mgxjhl:-0.5408,mgjbgjj:0.866,gdzchj:0,ldzchj:5597130000.00,zchj:9599200000.00,
        cqfzhj:2251640000.00,zyywsr:2544910000.00,cwfy:110869000.00,jlr:127774000.00
day:20170930,mgzjc:3.7981,mgsy:0.0315,mgxjhl:0.0256,mgjbgjj:0.8639,gdzchj:0,ldzchj:4179400000.00,zchj:7940650000.00,
        cqfzhj:434155000.00,zyywsr:1891130000.00,cwfy:24835500.00,jlr:37413300.00
day:20170630,mgzjc:3.7977,mgsy:0.0299,mgxjhl:-0.044,mgjbgjj:0.8654,gdzchj:0,ldzchj:3515640000.00,zchj:6672430000.00,
        cqfzhj:422083000.00,zyywsr:1237070000.00,cwfy:12539000.00,jlr:35469500.00
day:20170331,mgzjc:3.7869,mgsy:0.014,mgxjhl:0.0328,mgjbgjj:0.866,gdzchj:0,ldzchj:3738750000.00,zchj:6707870000.00,
        cqfzhj:442561000.00,zyywsr:604797000.00,cwfy:5217530.00,jlr:16600700.00
day:20161231,mgzjc:3.756,mgsy:-0.2123,mgxjhl:0.7535,mgjbgjj:0.866,gdzchj:0,ldzchj:3741660000.00,zchj:6648370000.00,
        cqfzhj:457945000.00,zyywsr:2158280000.00,cwfy:16807600.00,jlr:-252162000.00
day:20160930,mgzjc:3.9971,mgsy:0.024,mgxjhl:0.5512,mgjbgjj:0.8657,gdzchj:0,ldzchj:3832880000.00,zchj:6865150000.00,
        cqfzhj:484412000.00,zyywsr:1613520000.00,cwfy:21471000.00,jlr:28486400.00
day:20160630,mgzjc:3.9839,mgsy:0.0205,mgxjhl:0.2452,mgjbgjj:0.8657,gdzchj:0,ldzchj:4337500000.00,zchj:7355360000.00,
        cqfzhj:650189000.00,zyywsr:1109380000.00,cwfy:19810900.00,jlr:24399500.00
day:20160331,mgzjc:4.0109,mgsy:0.01,mgxjhl:0.1103,mgjbgjj:0.8657,gdzchj:0,ldzchj:4504310000.00,zchj:7521450000.00,
        cqfzhj:672886000.00,zyywsr:519708000.00,cwfy:9121710.00,jlr:11900100.00
day:20151231,mgzjc:3.999,mgsy:0.0397,mgxjhl:0.1782,mgjbgjj:0.8657,gdzchj:0,ldzchj:4466900000.00,zchj:7555120000.00,
        cqfzhj:698397000.00,zyywsr:2098700000.00,cwfy:51714300.00,jlr:47213000.00
day:20150930,mgzjc:3.9833,mgsy:0.0337,mgxjhl:0.003,mgjbgjj:0.8657,gdzchj:0,ldzchj:4565040000.00,zchj:7568470000.00,
        cqfzhj:741528000.00,zyywsr:1696520000.00,cwfy:33369000.00,jlr:40090600.00
  • 3
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值