用Python爬中国银行指定日期九点三十分汇率

最新推荐文章于 2022-11-23 16:57:49 发布

置顶 y_aaaa

最新推荐文章于 2022-11-23 16:57:49 发布

阅读量2.3k

点赞数 4

文章标签： Python 银行定点汇率爬虫

本文链接：https://blog.csdn.net/y_aaaa/article/details/103143970

版权

需求描述

公司财务需求，需要中国银行每天9点30分以后第一条汇率数据，由于网上汇率接口都是实时汇率没办法定点查询数据，故而从中国银行外汇牌价(https://srh.bankofchina.com/search/whpj/search_cn.jsp) 抓取数据。

环境搭建

项目采用python3.6

依赖安装

pip install requests
pip install lxml

代码实现

import requests
from lxml import etree
import re
import math

def getRate(dateb):
    #key 为币种 value为 中国银行外汇牌价 币种代码 可以通过 F12来抓取。
    pjcodes = [
        {"key": "美元", "value": 1316},
        {"key": '港币', "value": 1315},
        {"key": "日元", "value": 1323},
        {"key": "欧元", "value": 1326},
        {"key": '英镑', "value": 1314},
        {"key": "加拿大元", "value": 1324},
        {"key": "澳大利亚元", "value": 1325},
        {"key": "瑞士法郎", "value": 1317},
        {"key": "新加坡元", "value": 1375},
        # {"key": "瑞典克朗", "value": 1320},
        # {"key": "丹麦克朗", "value": 1321},
        # {"key": "挪威克朗", "value": 1322},
        # {"key": "德国马克", "value": 1318},
        # {"key": "法国法郎", "value": 1319},
        # {"key": "澳门元", "value": 1327},
        # {"key": "菲律宾比索", "value": 1328},
        # {"key": "泰国铢", "value": 1329},
        # {"key": "新西兰元", "value": 1330},
        # {"key": "韩元", "value": 1331},
        # {"key": "卢布", "value": 1843},
        # {"key": "林吉特", "value": 2890},
        # {"key": "新台币", "value": 2895},
        # {"key": "西班牙比塞塔", "value": 1370},
        # {"key": "意大利里拉", "value": 1371},
        # {"key": "荷兰盾", "value": 1372},
        # {"key": "比利时法郎", "value": 1373},
        # {"key": "芬兰马克", "value": 1374},
        # {"key": "印尼卢比", "value": 3030},
        # {"key": "巴西里亚尔", "value": 3253},
        # {"key": "阿联酋迪拉姆", "value": 3899},
        # {"key": "印度卢比", "value": 3900},
        # {"key": "南非兰特", "value": 3901},
        # {"key": "沙特里亚尔", "value": 4418},
        # {"key": "土耳其里拉", "value": 4560}
    ];
    lists = [];
    for pjcode in pjcodes:
        rate_09 = crow(dateb, dateb, pjcode['key'], None);
        if (len(rate_09)) != 0:
            print("正在获取"+rate_09[0]+" 汇率")
            rate_dic = {};
            rate_dic['currency'] = rate_09[0];#币种
            rate_dic['rate_xhr'] = rate_09[1];#现汇买入价
            rate_dic['rate_xcr'] = rate_09[2];#现钞买入价
            rate_dic['rate_xhc'] = rate_09[3];#现汇卖出价
            rate_dic['rate_xcc'] =rate_09[4];#现钞卖出价
            rate_dic['rate_zs'] = rate_09[5];#中行折算价
            rate_dic['dateb'] = rate_09[6];#发布时间
            lists.append(rate_dic);
    print("---------------------------")
    return lists;

# 返回html对象
def getHtml(erectDate, nothing, pjname, page):
    # 定义要传的json  formdata 内容 通过post 请求 拿到 html代码 erectDate开始时间 nothing结束时间  pjname货币代码 page页数 时间一般 开始和结束为同一天
    pyload = {"erectDate": erectDate,
              "nothing": nothing, "pjname": pjname, "page": page, "head": "head_620.js", "bottom": "ottom_591.js"}

    # 定义浏览器头部 防止被拦截
    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
        "Origin": "https://srh.bankofchina.com",
        "Referer": "https://srh.bankofchina.com/search/whpj/search_cn.jsp",
        "Upgrade-Insecure-Requests": "1",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
        "User-Agent": "Mozilla / 5.0(WindowsNT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 68.0.3440.106 Safari / 537.36"
    }
    # response = requests.post("http://srh.bankofchina.com/search/whpj/search.jsp", data=pyload, headers=headers)
    response = requests.post("https://srh.bankofchina.com/search/whpj/search_cn.jsp", data=pyload, headers=headers)
    # 转化为html对象
    html = etree.HTML(response.text);
    return (html, response.text)


def crow(erectDate, nothing, pjname, page):
    coutnt = 0;
    if (page == None):
        page = getPageCount(erectDate, nothing, pjname);  # 拿到总页数 只需要抓九点30第一条数据 往往出现在最后一页 所以优先抓最后一页数据
    html = getHtml(erectDate, nothing, pjname, page)[0];
    # 抓取 class 为 BOC_main publish 的talbel 下 所有tr
    datas = html.xpath('//div[@class="BOC_main publish"]/table/tr')
    arr = [];
    for index in range(1, len(datas) - 1):  # 第一个tr 和最后一个tr 无用, 剔除
        ratelist = datas[index].xpath('td')
        tds6 = [];
        for rates in ratelist:
            tds6.append(rates.xpath('text()')[0] if (len(rates.xpath(
                'text()')) != 0) else 0)  # 因为 有些 td里为空, 直接用datas[index].xpath('td/text()') 为空的td 会没有坐标 导致后面数组越界
        if (len(tds6)) != 0:
            if (int(tds6[6][11:13]) == 9 and int(tds6[6][14:16]) >= 30):  # 只要大于九点30的数据
                arr.append(tds6)
            elif (int(tds6[6][11:13]) > 9):
                arr.append(tds6)
    rate_09 = [];
    if (len(arr) != 0):
        rate_09 = arr[len(arr) - 1];  # 只要大于九点30 最小的一条数据 由于排序为倒序  即最后一条 为最小
    if (len(rate_09) == 0):  # 说明 当页未找到 大于九点的数据 需要往下一页查找
        if (page - 1 > 0):
            rate_09 = crow(erectDate, nothing, pjname, page - 1);  # 递归 找到便往上抛
    return rate_09  # 返回九点30最后一条数据 由于排序为倒序,最后一条即为 九点最早的汇率

def getPageCount(erectDate, nothing, pjname):
    html = getHtml(erectDate, nothing, pjname, 1)[1]  # 第一次先抓 总页数
    reg = re.compile(r"(?<=var m_nRecordCount = )\d+")
    match = reg.search(html);
    pageAll = 1;
    if (int(match.group(0)) > 20):  # 每页20条数据 算最大页
        pageAll = math.ceil(int(match.group(0)) / 20);  # 向上取整数
    return pageAll

if __name__ == '__main__':
    obj=getRate('2019-11-18')
    print(obj)

运行效果

正在获取美元 汇率
正在获取港币 汇率
正在获取日元 汇率
正在获取欧元 汇率
正在获取英镑 汇率
正在获取加拿大元 汇率
正在获取澳大利亚元 汇率
正在获取瑞士法郎 汇率
正在获取新加坡元 汇率
---------------------------
[{'currency': '美元', 'rate_xhr': '700.18', 'rate_xcr': '694.49', 'rate_xhc': '703.15', 'rate_xcc': '703.15', 'rate_zs': '700.37', 'dateb': '2019.11.18 09:31:33'}, {'currency': '港币', 'rate_xhr': '89.41', 'rate_xcr': '88.7', 'rate_xhc': '89.77', 'rate_xcc': '89.77', 'rate_zs': '89.46', 'dateb': '2019.11.18 09:31:33'}, {'currency': '日元', 'rate_xhr': '6.4259', 'rate_xcr': '6.2262', 'rate_xhc': '6.4731', 'rate_xcc': '6.4767', 'rate_zs': '6.443', 'dateb': '2019.11.18 09:31:33'}, {'currency': '欧元', 'rate_xhr': '773.11', 'rate_xcr': '749.08', 'rate_xhc': '778.81', 'rate_xcc': '780.54', 'rate_zs': '774.14', 'dateb': '2019.11.18 09:31:33'}, {'currency': '英镑', 'rate_xhr': '903.37', 'rate_xcr': '875.3', 'rate_xhc': '910.02', 'rate_xcc': '912.23', 'rate_zs': '904.8', 'dateb': '2019.11.18 09:31:33'}, {'currency': '加拿大元', 'rate_xhr': '528.98', 'rate_xcr': '512.28', 'rate_xhc': '532.88', 'rate_xcc': '534.17', 'rate_zs': '529.69', 'dateb': '2019.11.18 09:31:33'}, {'currency': '澳大利亚元', 'rate_xhr': '476.5', 'rate_xcr': '461.7', 'rate_xhc': '480.01', 'rate_xcc': '481.18', 'rate_zs': '477.28', 'dateb': '2019.11.18 09:31:33'}, {'currency': '瑞士法郎', 'rate_xhr': '706.46', 'rate_xcr': '684.66', 'rate_xhc': '711.42', 'rate_xcc': '713.76', 'rate_zs': '708.02', 'dateb': '2019.11.18 09:31:33'}, {'currency': '新加坡元', 'rate_xhr': '513.63', 'rate_xcr': '497.78', 'rate_xhc': '517.23', 'rate_xcc': '518.78', 'rate_zs': '514.87', 'dateb': '2019.11.18 09:31:33'}]