scrapy

import scrapy


class CnblSpider(scrapy.Spider):
    name = 'cnbl'
    allowed_domains = ['cnblogs.com']
    start_urls = ['http://cnblogs.com/pick/#p%s' % p for p in range(1, 21)]

    def parse(self, response):
        for blog in response.xpath('//div[@class="post_item"]'):
            print(blog.xpath('div[@class="digg"]/div[@class="diggit"]/span/text()').extract_first())

拼接url ,//div/[@class="post_item"]获得class等于post_item的div extract——first


import scrapy


class JulySpider(scrapy.Spider):
    name = 'july'
    allowed_domains = ['julyedu.com']
    start_urls = ['https://www.julyedu.com/category/index']

    def parse(self, response):
        for julyedu_class in response.xpath('//div[@class="course_info_box"]'):
            print(julyedu_class.xpath('a/h4/text()').extract_first())
            print(julyedu_class.xpath('a/p[@class="course-info-tip"][1]/text()').extract_first())
            print(julyedu_class.xpath('a/p[@class="course-info-tip"][2]/text()').extract_first())
            yield {'title': julyedu_class.xpath('a/h4/text()').extract_first()}



import scrapy


class QuoSpider(scrapy.Spider):
    name = 'quo'
    allowed_domains = ['quotes.toscrape.com']
    start_urls = ['http://quotes.toscrape.com/']

    def parse(self, response):
        for sel in response.xpath('//div[@class="quote"]'):
            print(sel.xpath('span[1]/text()').extract_first())
            print(sel.xpath('span[2]/small/text()').extract_first())
            yield {
                'text': sel.xpath('span[1]/text()').extract_first(),
                'author': sel.xpath('span[2]/small/text()').extract_first()
            }
            # 返回
                
            #     查找下一页按钮
            next_page = response.xpath('//li[@class="next"]/a/@href').extract_first()
            if next_page is not None:
                next_page = response.urljoin(next_page)
            #     拼接完整url,回调下一页面
            yield scrapy.Request(next_page, callback=self.parse)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值