练习 58信息

# -*- coding: utf-8 -*-
import scrapy
from lxml import etree
from hw_project.items import Bj58Item
class Bj58Spider(scrapy.Spider):
    name = 'bj58'
    allowed_domains = ['bj.58.com']
    start_urls = ['http://bj.58.com/ershouche/?PGTID=0d100000-0000-1ce3-e602-1acefd5f07af&ClickID=4']

    def parse(self, response):
        html = response.text
        # print(html)
        html_ele = etree.HTML(html)
        li_list = html_ele.xpath('//ul[@class="car_list ac_container"]/li')
        # print(li_list)
        for li in li_list:
            brand = li.xpath('./div[2]/a/h1/font/text()')[0]
            type_c = li.xpath('./div[2]/a/h1/text()')[1].replace('\r\n                                    ','')
            year_c = li.xpath('./div[2]/div[1]/span[1]/text()')[0]
            distance = li.xpath('./div[2]/div[1]/span[2]/text()')[0]
            cost = li.xpath('./div[2]/div[1]/span[3]/text()')[0]
            operation_type = li.xpath('./div[2]/div[1]/span[4]/text()')[0]
            price = li.xpath('./div[3]/h3/text()')[0]
            item = Bj58Item()
            item['brand'] = brand
            item['type_c'] = type_c
            item['year_c'] = year_c
            item['distance'] = distance
            item['cost'] = cost
            item['operation_type'] = operation_type
            item['price'] = price
            yield item
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值