更多精彩内容详见个人量化交易专辑索引
1. 在items.py中添加如下代码
import scrapy
class CodeItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
code = scrapy.Field()
market = scrapy.Field()
type = scrapy.Field()
name = scrapy.Field()
pass
2. 在spiders/codes_cgedt.py中添加如下代码
import scrapy
import re
import logging
from reptile.items import CodeItem
class CodesCgedtSpider(scrapy.Spider):
name = "codes_cgedt"
allowed_domains = ["www.cgedt.com"]
start_urls = ["http://www.cgedt.com/stockcode/hushi.asp",
"http://www.cgedt.com/stockcode/shenshi.asp"]
def start_requests(self):
for url in self.start_urls:
yield scrapy.Request(url)
def parse(self, response):
if self.start_urls[0] == response.url:
market = 'SH'
elif self.start_urls[1] == response.url:
market = 'SZ'
else:
logging.error('unknown url')
return
# 存放股票代码的集合
stockcodelist = response.xpath("//*[@id='stockcodelist']/ul[1]")
for each in stockcodelist.xpath("li"):
item = CodeItem()
string = each.xpath("a/text()").extract_first()
p_code = re.compile(r'[(](.*?)[)]', re.S)
p_name = re.compile(r'(.*?)[(]', re.S)
code = re.findall(p_code, string)
name = re.findall(p_name, string)
item['market'] = market
item['code'] = code[0]
item['name'] = name[0]
yield item
pass