下列代码
def parse_item(self, response):
sel = Selector(response)
item = LianjiaItem()
#item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
#item['name'] = response.xpath('//div[@id="name"]').get()
#item['description'] = response.xpath('//div[@id="description"]').get()
item['money'] = sel.xpath('//*[@id="content"]/div[1]/div[1]/div[1]/div/span/em/text()').extract()
item['area'] = sel.xpath('//div[@class="content__list--item--main"]/p[2]/text()[5]').re('[0-9]{0,2}}')
item['location'] = sel.xpath('//div[@class="content__list--item--main"]/p[2]/a[1]/text()').extract()
yield item
返回的Item是这样的(下图)
如果想要这样返回Item(下图)
则只需增加一个循环结构,代码如下
def parse_item(self, response):
sel = Selector(response)
item = LianjiaItem()
# item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
# item['name'] = response.xpath('//div[@id="name"]').get()
# item['description'] = response.xpath('//div[@id="description"]').get()
money = sel.xpath('//div[@class="content__list--item--main"]/span/em/text()').extract()
area = sel.xpath('//div[@class="content__list--item--main"]/p[2]/text()[5]').re('\d+')
location = sel.xpath('//div[@class="content__list--item--main"]/p[2]/a[1]/text()').extract()
for m, a, l in zip(money, area, location):
item['money'] = m
item['area'] = a
item['location'] = l
yield item