Redis ,asyncio ,aiohttp和lxml爬虫实例

新的启动事件循环

# coding=utf-8
import time
import redis
import asyncio
import logging
from lxml import etree
from contextlib import closing
from aiohttp import ClientSession

logging.basicConfig(
    level = logging.INFO,
    format = "%(asctime)s [*] %(message)s"
)
URL = "https://www.fabiaoqing.com/biaoqing/lists/page/{}.html"


# 提取图片标题,链接
def parse(source_, red_):
    try:
        response = etree.HTML(source_)
        tagbqppdiv = response.xpath('//div[@class="tagbqppdiv"]/a')
        for index, img in enumerate(tagbqppdiv):
            args = (int(index + 1), img.xpath('@title')[0], img.xpath('img/@data-original')[0])
            red_.hset("mixintu", args[1], args[2])
            logging.info('第 %d 张  标题:%r  链接:%r' % args)
    except Exception as e:
        print('解析详情页出错!')
        pass


# 获取网页源码
async def get_source(url_, red_):
    try:
        async with ClientSession( ) as session:
            async with session.get(url_) as response:
                source = await response.read( )
                parse(source, red_)
    except asyncio.CancelledError:
        raise


async def main( ):
    now = time.time( )
    pool = redis.ConnectionPool(host = 'localhost', port = 6379, db = 1)
    red = redis.Redis(connection_pool = pool)
    tasks = [asyncio.ensure_future(get_source(URL.format(i), red))
             for i in range(1, 11)]
    # 第一种运行方式-----------------------------------
    dones, pendings = await asyncio.wait(tasks)
    # 已完成的协程-------------------------------------
    # for done in dones:
    #     logging.info(f"已完成的协程:{done.result( )}")
    # 超时未完成的协程---------------------------------
    # for pending in pendings:
    #     logging.info(f"超时未完成的协程  :{pending}")
    #     pending.cancel( )

    logging.info(f"总用时:{time.time( ) - now}")


if __name__ == "__main__":
    # 修改了这里
    with closing(asyncio.get_event_loop( )) as loop:
        loop.run_until_complete(main( ))

旧的启动事件循环

# coding=utf-8
import asyncio
import logging
import time

import redis
from aiohttp import ClientSession
from lxml import etree

logging.basicConfig(
    level = logging.INFO,
    format = "%(asctime)s [*] %(message)s"
)
URL = "https://www.fabiaoqing.com/biaoqing/lists/page/{}.html"


# 提取图片标题,链接
def parse(source_, red_):
    try:
        response = etree.HTML(source_)
        tagbqppdiv = response.xpath('//div[@class="tagbqppdiv"]/a')
        for index, img in enumerate(tagbqppdiv):
            args = (int(index + 1), img.xpath('@title')[0], img.xpath('img/@data-original')[0])
            red_.hset("mixintu", args[1], args[2])
            logging.info('第 %d 张  标题:%r  链接:%r' % args)
    except Exception as e:
        print('解析详情页出错!')
        pass


# 获取网页源码
async def get_source(url_, red_):
    try:
        async with ClientSession( ) as session:
            async with session.get(url_) as response:
                source = await response.read( )
                parse(source, red_)
    except asyncio.CancelledError:
        raise


async def main( ):
    now = time.time( )
    pool = redis.ConnectionPool(host = 'localhost', port = 6379, db = 1)
    red = redis.Redis(connection_pool = pool)
    tasks = [asyncio.ensure_future(get_source(URL.format(i), red))
             for i in range(1, 11)]
    # 第一种运行方式-----------------------------------
    dones, pendings = await asyncio.wait(tasks)
    # 已完成的协程-------------------------------------
    # for done in dones:
    #     logging.info(f"已完成的协程:{done.result( )}")
    # 超时未完成的协程---------------------------------
    # for pending in pendings:
    #     logging.info(f"超时未完成的协程  :{pending}")
    #     pending.cancel( )

    logging.info(f"总用时:{time.time( ) - now}")


if __name__ == "__main__":
    event_loop = asyncio.get_event_loop( )
    # ------------------------------------------------
    try:
        # 用这个协程启动循环,协程返回时这个方法将停止循环。
        event_loop.run_until_complete(main( ))
    except KeyboardInterrupt:
        for task in asyncio.Task.all_tasks( ):
            print(task.cancel( ))
        event_loop.stop( )
    finally:
        event_loop.close( )

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

迷心兔

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值