对于io操作来说,多线程和多进程区别不大
1.通过Thread类实例化
# 模拟多线程爬虫
import time import threading def get_detail_url(url): print('get detail url start') time.sleep(2) print('get detail url end') def get_detail_html(url): print('get detail html start') time.sleep(2) print('get detail html end')
if __name__ == '__main__': thread1 = threading.Thread(target=get_detail_url, args=('',)) thread2 = threading.Thread(target=get_detail_html, args=('',)) start_time = time.time() thread1.setDaemon(True) # 设置setDaemon(后台守护)为True,当主线程停止的时候,该线程也会被停止 thread2.setDaemon(True) thread1.start() thread2.start() thread1.join() # 等待线程执行完,才执行主线程的剩余代码 thread2.join()
print('Main thread ended.Cost {}'.format(time.time()-start_time))
# 通过继承Thread类实现多线程
import time import threading
class GetDetaiUrl(threading.Thread): def __init__(self, name): # 非必须改写,若改写,可以添加自己定义的属性 super().__init__(name=name) def run(self): # 用来定制线程执行的代码 print('get detail url start') time.sleep(2) print('get detail url end') class GetDetailHtml(threading.Thread): def __init__(self, name): super().__init__(name=name) def run(self): print('get detail html start') time.sleep(4) print('get detail html end')
if __name__ == '__main__': thread1 = GetDetailHtml(name='get_detail_html') thread2 = GetDetaiUrl(name='get_detail_url') start_time = time.time() thread1.setDaemon(True) thread2.setDaemon(True) thread1.start() thread2.start() thread1.join() thread2.join() print('Main thread ended.Cost {}'.format(time.time()-start_time))