a-web-crawler-with-asyncio-coroutines

The one with the callback pattern:

import socket
from selectors import DefaultSelector,EVENT_WRITE,EVENT_READ



def loop():
    while not Stopped:
        events = selector.select()
        for event_key,event_mask in events:
            callback = event_key.data
            callback()

class Fetcher:
    def __init__(self,url):
        self.response = b''
        self.url = url
        self.sock = None

    #conncet a socket,
    def fetch(self):
        self.sock = socket.socket()
        self.sock.setblocking(False)
        try:
            self.sock.connect(('xkcd.com',80))
        except BlockingIOError:
            pass

        #register the nexr callback from connect to send request status
        selector.register(self.sock.fileno(),EVENT_WRITE,self.connected)

    def connected(self,key,mask):
        print('connceted!')
        selector.unregister(key.fd)
        request = 'GET {} HTTP/1.0\r\nHost:xkcd.com\r\n\r\n'.format(self.url)
        self.sock.send(request.encode('ascii'))

        #register the next callback from send request to wait response status
        selector.register(key.fd,EVENT_READ,self.read_response)

    def read_response(self,key,mask):
        global stopped
        chunk = self.sock.recv(4096)
        if chunk:
            self.response += chunk
        else:
            selector.unregister(key.fd)
            #parse the response data to a set of urls
            links = self.parse_links()

            #Python set-logic
            for link in links.difference(seen_urls):
                urls_todo.add(link)
                Fetcher(link).fetch()

            seen_urls.update(links)
            urls_todo.remove(self.url)
            if not urls_todo:
                stopped = True

selector = DefaultSelector()
#global sets of the URLs we have yet to fetch
urls_todo = set(['/'])
seen_urls = set(['/'])

but callbacks are stubborn to debug due to the stack ripping

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值