练习 58信息

最新推荐文章于 2023-06-21 23:02:39 发布

qq_32498963

最新推荐文章于 2023-06-21 23:02:39 发布

阅读量178

点赞数

本文链接：https://blog.csdn.net/qq_32498963/article/details/82081074

版权

# -*- coding: utf-8 -*-
import scrapy
from lxml import etree
from hw_project.items import Bj58Item
class Bj58Spider(scrapy.Spider):
    name = 'bj58'
    allowed_domains = ['bj.58.com']
    start_urls = ['http://bj.58.com/ershouche/?PGTID=0d100000-0000-1ce3-e602-1acefd5f07af&ClickID=4']

    def parse(self, response):
        html = response.text
        # print(html)
        html_ele = etree.HTML(html)
        li_list = html_ele.xpath('//ul[@class="car_list ac_container"]/li')
        # print(li_list)
        for li in li_list:
            brand = li.xpath('./div[2]/a/h1/font/text()')[0]
            type_c = li.xpath('./div[2]/a/h1/text()')[1].replace('\r\n                                    ','')
            year_c = li.xpath('./div[2]/div[1]/span[1]/text()')[0]
            distance = li.xpath('./div[2]/div[1]/span[2]/text()')[0]
            cost = li.xpath('./div[2]/div[1]/span[3]/text()')[0]
            operation_type = li.xpath('./div[2]/div[1]/span[4]/text()')[0]
            price = li.xpath('./div[3]/h3/text()')[0]
            item = Bj58Item()
            item['brand'] = brand
            item['type_c'] = type_c
            item['year_c'] = year_c
            item['distance'] = distance
            item['cost'] = cost
            item['operation_type'] = operation_type
            item['price'] = price
            yield item

qq_32498963

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
练习 58信息

# -*- coding: utf-8 -*-import scrapyfrom lxml import etreefrom hw_project.items import Bj58Itemclass Bj58Spider(scrapy.Spider): name = 'bj58' allowed_domains = ['bj.58.com'] start_urls...
复制链接

扫一扫