2021-09-26

爬取东方财富网异常股票值

这是菜鸡的一篇博客,菜鸡已经励志要成为一名程序员了,不想当咸鱼了,爬取的程序是借鉴一位大佬的,做了稍许改动,能在pycharm中运行
借鉴:https://www.cnblogs.com/Iceredtea/p/12164152.html

`# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import pandas as pd
import pymysql
import tushare as ts
import time
import requests
import json
from sqlalchemy import create_engine
from multiprocessing import Pool
from requests.packages.urllib3.exceptions import InsecureRequestWarning

# ====================东方财富个股盘口异动数据抓取============================================================================================================
def EMydSpider(param_list):
    # 抓取东财个股盘口异动数据:http://quote.eastmoney.com/changes

    # 获取当天日期并创建数据库引擎
    cur_date = time.strftime("%Y%m%d", time.localtime()); engine = create_engine('mysql+pymysql://fxq:fxq.520@127.0.0.1/gupiao?charset=utf8')
#mysql+pymysql://数据库名:数据库密码 数据库默认IP 数据库名
    # 创建空列表、空DataFrame,分别用于存储html、异动数据
    html_list = []; stock_yd = pd.DataFrame()

    # 分析找到真正能请求到数据的URL
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"}
    url = "http://push2ex.eastmoney.com/getAllStockChanges?type=8201,8202,8193,4,32,64,8207,8209,8211,8213,8215,8204,8203,8194,8,16,128,8208,8210,8212,8214,8216"

    # 模拟发送get请求,并实例化session对象,维持会话
    session = requests.Session()
    # 禁用安全请求警告
    requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
    for param in param_list:
        try:
            html = json.loads(session.get(url=url, params=param, headers=header).text)
            html_list.append(html)
            print("第%s页东财个股异动数据已抓取" % (param_list.index(param) + 1))
        except Exception as spider_error:
            print("html抓取过程报错,错误信息为:%s" % spider_error)

    print("--------------------------------------")
    print("开始抓取东方财富个股盘口异动网页数据解析")
    for html in html_list:
        try:
            allstock = html['data']['allstock']
            for stock in allstock:
                code = stock['c']       # 股票代码,无后缀
                stk_name = stock['n']   # 股票名称
                chg_time = stock['tm']  # 异动时间
                chg_type = stock['t']   # 异动类型
                chg_value = stock['i']  # 异动值

                dict = {'symbol': code, 'stk_name': stk_name, 'trade_date': cur_date, 'chg_time': chg_time, 'chg_type': chg_type, 'chg_value': chg_value}
                stock_yd = stock_yd.append(dict, ignore_index=True)
        except Exception as parse_error:
            print("html解析过程报错,错误信息为:%s" % parse_error)

    stock_yd = stock_yd[['symbol', 'stk_name', 'trade_date', 'chg_time', 'chg_type', 'chg_value']]
    stock_yd.to_sql('yichang', engine, if_exists='append', index = False)

    print(stock_yd)
    print("本次存储东方财富个股异动数据%s条" % stock_yd.shape[0])


# ====================主函数====================================================================================================================================

```python
if __name__ == '__main__':
    print("东方财富个股异动爬虫程序开始执行")
    print("--------------------------------------")
    start = time.time()
    # 定义空列表
    param_list = []

    # 构建表单
    for page in range(0, 30):
        param = {"pageindex": page, "pagesize": '64', "ut": '7eea3edcaed734bea9cbfc24409ed989', "dpt": 'wzchanges'}
        param_list.append(param)

    # 创建线程池
    pool = Pool(processes=4)

    # 开启多进程爬取东财异动数据
    try:
        pool.map(EMydSpider, (param_list, ))
    except Exception as error:
        print("进程执行过程报错,错误信息为:%s"%error)

    end = time.time()
    print('东方财富个股异动爬虫程序共执行%0.2f秒.' % ((end - start)))
    print("东方财富个股异动爬虫程序执行完成")`
      symbol stk_name trade_date  chg_time  chg_type  chg_value
0     605298     必得科技   20210926  145557.0    8194.0      49200
1     002564     天沃科技   20210926  145555.0      16.0   5.690000
2     600595    *ST中孚   20210926  145552.0      32.0   4.990000
3     300774      倍杰特   20210926  145543.0    8193.0      70000
4     000663     永安林业   20210926  145542.0      32.0   9.880000
...      ...      ...        ...       ...       ...        ...
1915  300358     楚天科技   20210926  132300.0    8201.0   0.030422



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值