2018/8/14第一个爬虫程序

最新推荐文章于 2018-12-03 15:12:55 发布

MjForPython

最新推荐文章于 2018-12-03 15:12:55 发布

阅读量258

点赞数

本文链接：https://blog.csdn.net/MjForPython/article/details/81671089

版权

# -*- coding: utf-8 -*-
import scrapy
from ..items import Mjproject1Item
class MjspideroneSpider(scrapy.Spider):
    name = 'MJspiderOne'
    allowed_domains = ['www.zuihaodaxue.com']
    start_urls = ["http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html"]

    def parse(self, response):
        for spd in response.css('tbody.hidden_zhpm'):
            item = Mjproject1Item()
            item['rank'] = spd.xpath('/html/body/div[3]/div/div[2]/div/div[3]/div/table/tbody/tr/td[1]/text()').extract()
            #记得在xpath后面加text()
            item['school'] = spd.xpath('/html/body/div[3]/div/div[2]/div/div[3]/div/table/tbody/tr/td[2]/div/text()').extract()
            item['city'] = spd.xpath('/html/body/div[3]/div/div[2]/div/div[3]/div/table/tbody/tr/td[3]/text()').extract()
            yield item