Python获取股票实时数据

近期爬虫入门,简单搞了个小脚本经baidu查询指定股票代码的实时行情数据(好多网站禁了,所有走的baidu,原因就不细说了~~)。

前期操作呢?当然是按部就班的一些列抓包,浏览器上F12就行了,找到对应的URL,然后准备爬爬爬,,亦可赛艇

import os
import time
import urllib2
import cookielib
from bs4 import BeautifulSoup

check_url = r"https://www.baidu.com/s?wd={ID}"
check_id_list = ["000001", "399001", "300749", "600016", "601939", "601098"]

def get_sys_time():
    return time.strftime('%Y-%m-%d_%H_%M_%S', time.localtime(time.time()))

def get_info_from_baidu():
    cookie = cookielib.CookieJar()  # use to store cookies
    # create an OpenerDirector instance,which can handle cookie automatically
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))

    # OpenerDirector automatically adds User-Agent and Connection header to every Request
    opener.addheaders = [('User-Agent',
                          'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.100 Safari/537.36'),
                         ('Connection', 'keep-alive')]

    urllib2.install_opener(opener)

    info_dict = {}
    for check_id in check_id_list:
        url = check_url.replace("{ID}", check_id)
        request = urllib2.Request(url)
        response = opener.open(request)
        home_page = response.read()
        soup = BeautifulSoup(home_page, 'lxml')
        wikiNode = soup.find('div', class_='op-stockdynamic-moretab-cur')
        tds = wikiNode.find_all('span')
        info = tds[0].get_text().ljust(10) + tds[-1].get_text().ljust(16)
        info_dict[check_id] = info
    opener.close()
    cookie.clear()
    return info_dict

if __name__ == '__main__':
    print ">>> %s <<<" % get_sys_time()
    infos = get_info_from_baidu()
    for key, item in infos.items():
        print key, item

简单访问下目标Url,然后解析下返回数据即可获取期望实时信息,简单运行脚本后结果如下,激动ing ...

然而,,没有高兴多久,用了几天好发现数据有点问题,有的股票代码对应信息是几天前的收盘信息(明明大盘大涨啊,这里却跌着,蒙圈的很(⊙o⊙)…)。查了一阵没啥头绪,猜测大概率跟检索的Url有关,相关经验太少没搞定,若有大神了解麻烦给指点迷经。

此路不好走,只能换接口。找啊找,找到了sina这个接口,简单试了下还是蛮好用的,详细接口介绍可见https://blog.csdn.net/fangquan1980/article/details/80006762

接口找到了,继续爬爬爬,,,

import os
import time
import collections
import urllib2
import cookielib
from collections import OrderedDict

check_url = "http://hq.sinajs.cn/list={ID}"
check_id_list = ["sh000001", "sz399001","sz399006", "sh600016", "sh601939", "sh601098", "sz300749"]

def get_sys_time():
    return time.strftime('%Y-%m-%d_%H_%M_%S',time.localtime(time.time()))

def get_info_form_feedback(basic_info):
    startIndex = basic_info.find(",") + 1
    endIndex = basic_info.rfind(",")
    info = basic_info[startIndex:endIndex]
    info_list = info.split(",")

    yesterday = info_list[1]
    now = round(float(info_list[2]),2)
    gap = round(float(now) - float(yesterday), 2)
    percent = round((gap / float(yesterday)) * 100, 2)
    if gap > 0:
        gap = "+" + str(gap)
        percent = "+" + str(percent)

    return "%s%s(%s%%)" % (str(now).ljust(10), str(gap), str(percent))

def get_info_from_sina():
    # info_dict = {}
    info_dict = OrderedDict()

    cookie = cookielib.CookieJar()  # use to store cookies
    # create an OpenerDirector instance,which can handle cookie automatically
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
    # OpenerDirector automatically adds User-Agent and Connection header to every Request
    opener.addheaders = [('User-Agent',
                          'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.100 Safari/537.36'),
                         ('Connection', 'keep-alive')]
    urllib2.install_opener(opener)

    for check_id in check_id_list:
        url = check_url.replace("{ID}", check_id)
        request = urllib2.Request(url)
        response = opener.open(request)
        basic_info = response.read()
        info_dict[check_id] = get_info_form_feedback(basic_info)

    opener.close()
    cookie.clear()

    return info_dict

if __name__ == '__main__':
    print ">>> %s <<<"%get_sys_time()
    infos = get_info_from_sina()
    for key, item in infos.items():
        print key, item

这次确实可以了,数据实时,并且访问效率高,解决所需,再也不用偷偷看手机了O(∩_∩)O

总结:本次只是出于股票实时数据获取目的出发,暂未完成爬虫相关的系统学习,后续再接再厉!!!关于爬取百度数据滞后及相关Url问题还得再去研究,先Mark到此处。

 

  • 1
    点赞
  • 14
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值