直接上代码了,代码里面有注释,大家可以参考参考:
# -*- coding: utf-8 -*-
import scrapy
import time
import json
import os
class GupiaoSpider(scrapy.Spider):
name = 'gupiao'
start_urls = ['http://stock.10jqka.com.cn/']
# 处理响应函数
def parse(self, response):
# print(response.text)
a_list = response.xpath("//div[@id='rzrq']/table[@class='m-table']/tbody/tr/td[2]/a")
# 获取股票简称和链接
for text_href in a_list:
text_name = text_href.xpath(".//text()").extract()[0]
# print(text_name)
href_url = text_href.xpath(".//@href").extract()[0]
# print(href_url)
time.sleep(3)
yield scrapy.Request(href_url, callback=self.parse_data,
meta={'text_name':text_name, "seindex":1})
# 对