通常有两种方法,一种是使用meta进行参数传递。另一种是使用lambda进行参数传递。
方法一:
使用meta进行参数传递。
举例如下:
from scrapy.spiders import Spider
import scrapy
import FirmCrawler.items as MI
from sets import Set
import time
import urlparse
class RicohSpider(Spider):
name = "ricoh"
allowed_domain = ["www.ricoh-imaging.com.cn"]
start_urls = [
"http://www.ricoh-imaging.com.cn/ricoh/service_download.html"
]
allsuffix = Set()
timeout = 20
trytimes = 3
headurl = "http://www.ricoh-imaging.com.cn/"
def parse(self, response):
tr_list_1 = response.xpath(".//*[@id='content2']/div[4]/table[1]/tr[position()>1]")
for tr in tr_list_1:
href = tr.xpath("./td[2]/a/@href").extract().pop()
url = urlparse.urljoin(self.headurl, href)
prduct