爬取东方财富网异常股票值
这是菜鸡的一篇博客,菜鸡已经励志要成为一名程序员了,不想当咸鱼了,爬取的程序是借鉴一位大佬的,做了稍许改动,能在pycharm中运行
借鉴:https://www.cnblogs.com/Iceredtea/p/12164152.html
`# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import pandas as pd
import pymysql
import tushare as ts
import time
import requests
import json
from sqlalchemy import create_engine
from multiprocessing import Pool
from requests.packages.urllib3.exceptions import InsecureRequestWarning
# ====================东方财富个股盘口异动数据抓取============================================================================================================
def EMydSpider(param_list):
# 抓取东财个股盘口异动数据:http://quote.eastmoney.com/changes
# 获取当天日期并创建数据库引擎
cur_date = time.strftime("%Y%m%d", time.localtime()); engine = create_engine('mysql+pymysql://fxq:fxq.520@127.0.0.1/gupiao?charset=utf8')
#mysql+pymysql://数据库名:数据库密码 数据库默认IP 数据库名
# 创建空列表、空DataFrame,分别用于存储html、异动数据
html_list = []; stock_yd = pd.DataFrame()
# 分析找到真正能请求到数据的URL
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"}
url = "http://push2ex.eastmoney.com/getAllStockChanges?type=8201,8202,8193,4,32,64,8207,8209,8211,8213,8215,8204,8203,8194,8,16,128,8208,8210,8212,8214,8216"
# 模拟发送get请求,并实例化session对象,维持会话
session = requests.Session()
# 禁用安全请求警告
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
for param in param_list:
try:
html = json.loads(session.get(url=url, params=param, headers=header).text)
html_list.append(html)
print("第%s页东财个股异动数据已抓取" % (param_list.index(param) + 1))
except Exception as spider_error:
print("html抓取过程报错,错误信息为:%s" % spider_error)
print("--------------------------------------")
print("开始抓取东方财富个股盘口异动网页数据解析")
for html in html_list:
try:
allstock = html['data']['allstock']
for stock in allstock:
code = stock['c'] # 股票代码,无后缀
stk_name = stock['n'] # 股票名称
chg_time = stock['tm'] # 异动时间
chg_type = stock['t'] # 异动类型
chg_value = stock['i'] # 异动值
dict = {'symbol': code, 'stk_name': stk_name, 'trade_date': cur_date, 'chg_time': chg_time, 'chg_type': chg_type, 'chg_value': chg_value}
stock_yd = stock_yd.append(dict, ignore_index=True)
except Exception as parse_error:
print("html解析过程报错,错误信息为:%s" % parse_error)
stock_yd = stock_yd[['symbol', 'stk_name', 'trade_date', 'chg_time', 'chg_type', 'chg_value']]
stock_yd.to_sql('yichang', engine, if_exists='append', index = False)
print(stock_yd)
print("本次存储东方财富个股异动数据%s条" % stock_yd.shape[0])
# ====================主函数====================================================================================================================================
```python
if __name__ == '__main__':
print("东方财富个股异动爬虫程序开始执行")
print("--------------------------------------")
start = time.time()
# 定义空列表
param_list = []
# 构建表单
for page in range(0, 30):
param = {"pageindex": page, "pagesize": '64', "ut": '7eea3edcaed734bea9cbfc24409ed989', "dpt": 'wzchanges'}
param_list.append(param)
# 创建线程池
pool = Pool(processes=4)
# 开启多进程爬取东财异动数据
try:
pool.map(EMydSpider, (param_list, ))
except Exception as error:
print("进程执行过程报错,错误信息为:%s"%error)
end = time.time()
print('东方财富个股异动爬虫程序共执行%0.2f秒.' % ((end - start)))
print("东方财富个股异动爬虫程序执行完成")`
symbol stk_name trade_date chg_time chg_type chg_value
0 605298 必得科技 20210926 145557.0 8194.0 49200
1 002564 天沃科技 20210926 145555.0 16.0 5.690000
2 600595 *ST中孚 20210926 145552.0 32.0 4.990000
3 300774 倍杰特 20210926 145543.0 8193.0 70000
4 000663 永安林业 20210926 145542.0 32.0 9.880000
... ... ... ... ... ... ...
1915 300358 楚天科技 20210926 132300.0 8201.0 0.030422