from selectors import DefaultSelector, EVENT_WRITE, EVENT_READ
from socket import socket
from urllib.parse import urlparse
class Future:
def __init__(self):
self.result = None
self.call_backs = []
def add_done_callback(self, func):
self.call_backs.append(func)
def set_result(self, result):
self.result = result
for call in self.call_backs:
call(self)
class Task:
_tasks = []
def __init__(self, coroutine):
self.coroutine = coroutine
self._tasks.append(coroutine)
f = Future()
self.step(f)
def step(self, future):
try:
next_future = self.coroutine.send(future.result)
except StopIteration as e:
self._tasks.remove(self.coroutine)
print(e.value)
return e.value
next_future.add_done_callback(self.step)
class Loop:
__instance = None
selector = DefaultSelector()
def __new__(cls, *args, **kwargs):
if cls.__instance is None:
cls.__instance = super().__new__(cls, *args, **kwargs)
return cls.__instance
def run_until_complete(self):
while Task._tasks:
for key, mask in self.selector.select():
callback = key.data
callback()
class Spider:
def __init__(self, url):
self.url = url
self.host = urlparse(self.url).netloc
self.path = urlparse(self.url).path
self.r = b''
def fetch(self):
sock = socket()
sock.setblocking(False)
try:
sock.connect((self.host, 80))
except BlockingIOError:
pass
f = Future()
def send():
f.set_result(None)
Loop.selector.register(sock.fileno(), EVENT_WRITE, send)
yield f
sock.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode("utf8"))
Loop.selector.unregister(sock.fileno())
def read():
f.set_result(sock.recv(1024))
Loop.selector.register(sock.fileno(), EVENT_READ, read)
while True:
f = Future()
chunk = yield f
if chunk:
self.r += chunk
else:
Loop.selector.unregister(sock.fileno())
sock.close()
break
self.r = self.r.decode().split('\r\n\r\n')[1]
return self.r
if __name__ == '__main__':
for i in range(1, 30):
url = "http://shop.projectsedu.com/goods/{}/".format(i)
spider = Spider(url)
Task(spider.fetch())
loop = Loop()
loop.run_until_complete()