更多精彩内容详见个人量化交易专辑索引
数据示例:
infoCode | stockCode | publishDate | emRatingCode | emRatingValue | emRatingName |
AP201701020232138105 | 2055 | 2017/1/2 | 7 | 3 | 买入 |
AP201701020232138995 | 2718 | 2017/1/3 | 0 | ||
AP201701020232139160 | 600105 | 2017/1/2 | 0 | ||
AP201701020232139321 | 300014 | 2017/1/2 | 6 | 2 | 增持 |
AP201701030232309726 | 538 | 2017/1/3 | 0 | ||
AP201701030232309729 | 300567 | 2017/1/3 | 0 | ||
AP201701030232328903 | 600415 | 2017/1/3 | 0 | ||
AP201701030232334715 | 600458 | 2017/1/3 | 7 | 3 | 买入 |
AP201701030232335344 | 300383 | 2017/1/3 | 0 |
代码示例:
1. 在items.py中添加如下代码
import scrapy
class ReportItem(scrapy.Item):
infoCode = scrapy.Field()
stockCode = scrapy.Field()
publishDate = scrapy.Field()
emRatingCode = scrapy.Field()
emRatingValue = scrapy.Field()
emRatingName = scrapy.Field()
2. 在spiders/report_eastmoney.py中添加如下代码
import scrapy
import re
import random
import time
import urllib
import json
import logging
from urllib.parse import urlencode
import datetime
from reptile.items import ReportItem
class ReportEastmoneySpider(scrapy.Spider):
name = "report_eastmoney"
allowed_domains = ["reportapi.eastmoney.com"]
def make_url(self, pageNo, beginTime, endTime):
params={'pageSize':50,
'beginTime':beginTime,
'endTime':endTime,
'pageNo':pageNo,
'qType':0,
}
encoded_params = urlencode(params)
url = f'https://reportapi.eastmoney.com/report/list?{encoded_params}'
return url
def start_requests(self):
# 获取最新数据的时间
beginTime = '18000101'
endTime = datetime.datetime.now().strftime('%Y%m%d')
# 构建url参数
url = self.make_url(1, beginTime, endTime)
yield scrapy.Request(url, meta={"pageNo":1, "beginTime":beginTime, "endTime":endTime})
def parse(self, response):
html = response.text
# 转换为json数据
js_html = json.loads(html)
js_data = js_html['data']
if not js_data:
return
num = len(js_data)
for i in range(num):
item = ReportItem()
item['infoCode'] = str(js_data[i]['infoCode'])
item['stockCode'] = str(js_data[i]['stockCode'])
item['publishDate'] = js_data[i]['publishDate']
item['emRatingCode'] = str(js_data[i]['emRatingCode'])
item['emRatingValue'] = js_data[i]['emRatingValue']
item['emRatingName'] = str(js_data[i]['emRatingName'])
yield item
pageNo = response.meta['pageNo']
beginTime = response.meta['beginTime']
endTime = response.meta['endTime']
if pageNo != 0:
while pageNo < js_html['TotalPage']:
url = self.make_url(pageNo+1, beginTime, endTime)
yield scrapy.Request(url, meta={"pageNo":0, "beginTime":beginTime, "endTime":endTime})
pageNo += 1
pass
更多历史数据下载: