一、完整代码
bk.py
import json
import scrapy
from ScrapyAdvanced.items import HouseItem
class BkSpider(scrapy.Spider):
name = 'bk'
allowed_domains = ['bj.ke.com']
start_urls = ['https://bj.ke.com/ershoufang/']
def parse(self, response):
# house_list_position = response.xpath("//div[@class='address']/div/div/a/text()").extract()
# # 存在脏数据 一个空的脏数据 一个 正常数据
# house_list_info = response.xpath("//div[@class='address']/div[@class='houseInfo']/text()").extract()
house_list_address = response.xpath("//div[@class='address']")
house_list_titles = response.xpath("//div[@class='title']/a/text()").extract()
for i in range(len(house_list_address)):
title = house_list_titles