# -*- coding: utf-8 -*-
import scrapy
from lxml import etree
from hw_project.items import Bj58Item
class Bj58Spider(scrapy.Spider):
name = 'bj58'
allowed_domains = ['bj.58.com']
start_urls = ['http://bj.58.com/ershouche/?PGTID=0d100000-0000-1ce3-e602-1acefd5f07af&ClickID=4']
def parse(self, response):
html = response.text
# print(html)
html_ele = etree.HTML(html)
li_list = html_ele.xpath('//ul[@class="car_list ac_container"]/li')
# print(li_list)
for li in li_list:
brand = li.xpath('./div[2]/a/h1/font/text()')[0]
type_c = li.xpath('./div[2]/a/h1/text()')[1].replace('\r\n ','')
year_c = li.xpath('./div[2]/div[1]/span[1]/text()')[0]
distance = li.xpath('./div[2]/div[1]/span[2]/text()')[0]
cost = li.xpath('./div[2]/div[1]/span[3]/text()')[0]
operation_type = li.xpath('./div[2]/div[1]/span[4]/text()')[0]
price = li.xpath('./div[3]/h3/text()')[0]
item = Bj58Item()
item['brand'] = brand
item['type_c'] = type_c
item['year_c'] = year_c
item['distance'] = distance
item['cost'] = cost
item['operation_type'] = operation_type
item['price'] = price
yield item
练习 58信息
最新推荐文章于 2023-06-21 23:02:39 发布