开多线程时monkey会阻塞住线程的继续执行,需要对monkey.patch_all进行处理,解决方案有2种:
1. monkey.patch_all(thread=False)
2. 直接用gevent.sleep()
from bs4 import BeautifulSoup
import requests
import gevent
import time
from gevent import monkey, pool
from threading import Thread
monkey.patch_all()
jobs = []
links = []
p = pool.Pool(10)
url = [
'http://www.baidu.com',
]
def get_links(url):
# time.sleep(2)
r = requests.get(url)
if r.status_code == 200:
soup = BeautifulSoup(r.text, 'lxml')
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
links + soup.find_all('a')
def start_process():
time.sleep(2)
# for url in urls:
# jobs.append(p.spawn(get_links, url))
# gevent.joinall(jobs)
if __name__ == '__main__':
t = Thread(target=start_process, )
t.start()
t.join()
上面的例子会卡住,具体解释见StackOverflow链接~