进程Process是程序的一次执行。
每个进程都有自己的地址空间、内存、数据栈以及记录运行轨迹的辅助数据,操作系统管理运行的所有进程,并为这些进程公平分配时间。
线程Thread运行在同一个进程中,共享运行环境。
线程有开始、顺序执行和结束3部分。线程之间可以比进程之间更方便地共享数据和相互通信。
线程一般是并发执行的。
多进程模式最大的优点是稳定性高,因为一个子进程崩溃不会影响主进程和其他子进程。多进程模式的缺点是创建进程的代价大。
多线程模式通常比多进程快一点,但是也快不了多少。多线程模式致命的缺点是任何一个线程挂掉都可能直接造成整个进程崩溃,因为所
有线程共享进程的内存。
CPU,计算密集型任务同时进行的数量应当等于CPU的核心数。
涉及网络、磁盘IO的任务都是IO密集型任务,这类任务的特点是CPU消耗很少,任务的大部分时间都在等待IO操作完成(因为IO的速度
远远低于CPU和内存的速度)。IO密集型任务的任务越多,CPU效率越高,不过有一个限度。
对于IO密集型任务而言,最适合的语言是开发效率高(代码量最少)的语言,脚本语言是首选,C语言最差。
考虑到CPU和IO之间速度差异很大,一个任务在执行的过程中大部分时间都在等待IO操作,单进程单线程模型会导致别的任务无法并行执
行,因此需要多进程模型或多线程模型支持多任务并发执行。
- 同步:一件事接着一件事发生,就像送葬队伍一样。
- 异步:任务是互相独立的,就像派对参与者从不同的车上下来一样。
Python提供了几个用于多线程编程的模块,包括_thread、threading和Queue等。
_thread和threading模块允许程序员创建和管理线程。_thread模块提供了基本线程和锁的支持,threading提供了更高级别的、功能更强的线程管理功能。Queue模块允许用户创建一个可以用于多个线程之间共享数据的队列数据结构。
多线程
_thread模块
_thread模块不支持守护线程
import _thread
from time import sleep
from datetime import datetime
date_time_format = '%y-%M-%d %H:%M:%S'
def date_time_str(date_time):
return datetime.strftime(date_time, date_time_format)
def loop_one():
print(f'+++线程一开始于:{date_time_str(datetime.now())}')
print('+++线程一休眠4秒')
sleep(4)
print(f'+++线程一休眠结束,结束于:{date_time_str(datetime.now())}')
def loop_two():
print(f'***线程二开始时间:{date_time_str(datetime.now())}')
print('***线程二休眠2秒')
sleep(2)
print(f'***线程二休眠结束,结束时间:{date_time_str(datetime.now())}')
def main():
print(f'------所有线程开始时间:{date_time_str(datetime.now())}')
_thread.start_new_thread(loop_one, ()) # 线程1
_thread.start_new_thread(loop_two, ()) # 线程2
sleep(6)
print(f'------所有线程结束时间:{date_time_str(datetime.now())}')
if __name__ == '__main__':
main()
import _thread
import time
from time import sleep
from datetime import datetime
loops = [4, 2]
date_time_format = '%y-%M-%d %H:%M:%S'
def date_time_str(date_time):
return datetime.strftime(date_time, date_time_format)
def loop(n_loops, n_sec, lock):
print(f'线程({n_loops})开始执行:{date_time_str(datetime.now())},先休眠({n_sec})秒')
sleep(n_sec)
print(f'线程({n_loops})休眠结束:{date_time_str(datetime.now())}')
lock.release() # 解锁
def main():
print(f'------所有线程开始时间------')
locks = []
n_loops = range(len(loops))
for i in n_loops:
lock = _thread.allocate_lock()
lock.acquire() # 获取锁
locks.append(lock)
for i in n_loops:
_thread.start_new_thread(loop, (i, loops[i], locks[i]))
time.sleep(0.005)
for i in n_loops:
while locks[i].locked():
pass
print(f'------所有线程结束时间:{date_time_str(datetime.now())}')
if __name__ == '__main__':
main()
threading模块
threading模块支持守护线程。
如果主线程退出时不用等待子线程完成,就要设定这些线程的daemon属性,即在线程Thread.start()开始前,调用setDaemon()函数设定线程的daemon标志(Thread.setDaemon(True)),表示这个线程“不重要”。新的子线程会继承父线程的daemon标志,主线程在所有非守护线程退出后才会结束,即进程中没有非守护线程存在时才结束。
Thread类
import _thread
import threading
import time
from time import sleep
from datetime import datetime
loops = [4, 2]
date_time_format = '%y-%M-%d %H:%M:%S'
def date_time_str(date_time):
return datetime.strftime(date_time, date_time_format)
def loop(n_loops, n_sec):
print(f'线程({n_loops})开始执行:{date_time_str(datetime.now())},先休眠({n_sec})秒')
sleep(n_sec)
print(f'线程({n_loops})休眠结束:{date_time_str(datetime.now())}')
def main():
print(f'------所有线程开始时间:{date_time_str(datetime.now())}')
threads = []
n_loops = range(len(loops))
for i in n_loops:
t = threading.Thread(target=loop, args=(i, loops[i]))
threads.append(t)
for i in n_loops:
threads[i].start()
for i in n_loops:
threads[i].join()
print(f'------所有线程结束时间:{date_time_str(datetime.now())}')
if __name__ == '__main__':
main()
import _thread
import threading
import time
from time import sleep
from datetime import datetime
loops = [4, 2]
date_time_format = '%y-%M-%d %H:%M:%S'
class ThreadFunc(object):
def __init__(self, func, args, name=''):
self.name = name
self.func = func
self.args = args
def __call__(self):
self.func(*self.args)
def date_time_str(date_time):
return datetime.strftime(date_time, date_time_format)
def loop(n_loops, n_sec):
print(f'线程({n_loops})开始执行:{date_time_str(datetime.now())},先休眠({n_sec})秒')
sleep(n_sec)
print(f'线程({n_loops})休眠结束:{date_time_str(datetime.now())}')
def main():
print(f'------所有线程开始时间:{date_time_str(datetime.now())}')
threads = []
n_loops = range(len(loops))
for i in n_loops:
t = threading.Thread(target=ThreadFunc(loop, (i, loops[i]), loop.__name__))
threads.append(t)
for i in n_loops:
threads[i].start()
for i in n_loops:
threads[i].join()
print(f'------所有线程结束时间:{date_time_str(datetime.now())}')
if __name__ == '__main__':
main()
import threading
from time import sleep
from datetime import datetime
loops = [4, 2]
date_time_format = '%y-%M-%d %H:%M:%S'
class MyThread(threading.Thread):
def __init__(self, func, args, name=''):
threading.Thread.__init__(self)
self.res = None
self.name = name
self.func = func
self.args = args
def getResult(self):
return self.res
def run(self):
print(f'starting {self.name} at:{date_time_str(datetime.now())}')
self.res = self.func(*self.args)
print(f'{self.name} finished at:{date_time_str(datetime.now())}')
def date_time_str(date_time):
return datetime.strftime(date_time, date_time_format)
def loop(n_loops, n_sec):
print(f'线程({n_loops})开始执行:{date_time_str(datetime.now())},先休眠({n_sec})秒')
sleep(n_sec)
print(f'线程({n_loops})休眠结束:{date_time_str(datetime.now())}')
def main():
print(f'------所有线程开始时间:{date_time_str(datetime.now())}')
threads = []
n_loops = range(len(loops))
for i in n_loops:
t = MyThread(loop, (i, loops[i]), loop.__name__)
threads.append(t)
for i in n_loops:
threads[i].start()
for i in n_loops:
threads[i].join()
print(f'------所有线程结束时间:{date_time_str(datetime.now())}')
if __name__ == '__main__':
main()
范例
import threading, queue, time, urllib
from urllib import request
BASE_URL = 'www.pythontab.com/html/pithonjichu/'
URL_QUEUE = queue.Queue()
for item in range(2, 10):
url = BASE_URL + str(item) + '.html'
URL_QUEUE.put(url)
def fetch_url(url_queue):
while True:
try:
# 不阻塞的读取队列数据
url_val = url_queue.get_nowait()
url_queue.qsize()
except Exception as ex:
print(f"ex info is:{ex}")
break
curr_thread_name = threading.current_thread().name
print(f"Current Thread Nanme {curr_thread_name}, Url:{url_val}")
try:
response = urllib.request.urlopen(url_val)
response_code = response.getcode()
except Exception as ex:
print(f"ex info is:{ex}")
continue
if response_code == 200:
# 抓取内容的数据处理放在这里
# 为了突出效果,设置延时
time.sleep(1)
if __name__=='__main__':
start_time=time.time()
threads = []
threads_num=4
for num in range(0, threads_num):
thread = threading.Thread(target= fetch_url,args=(URL_QUEUE,))
threads.append(thread)
for item in threads:
item.start()
for thread in threads:
thread.join()
print(f"All thread done,speed:{(time.time() - start_time)} s")
每一项使用一个线程
import threading
def function(item):
print(item)
items = ('Reykjavik', 'Vien', 'Zadar', 'Venice', 'Wr9claw', 'Bolognia', 'Berlin', 'Slubice', 'New York', 'Dehli',)
if __name__ == '__main__':
threads = []
for item in items:
thread = threading.Thread(target=function, args=[item])
thread.start()
threads.append(thread)
while threads:
threads.pop().join()
线程同步
使用Thread对象的Lock和RLock可以实现简单的线程同步,这两个对象都有acquire方法和release方法。对于每次只允许一个线程操作的数
据,可以将操作放到acquire和release方法之间。
锁有两种状态——锁定和未锁定。当一个线程(如set)要访问共享数据时,必须先获得锁定;如果已经有别的线程(如print)获得锁定
了,就让线程set暂停,也就是同步阻塞。
import threading
from time import sleep
from datetime import datetime
date_time_format = '%y-%M-%d %H:%M:%S'
class MyThread(threading.Thread):
def __init__(self, threadID, name, counter):
threading.Thread.__init__(self)
self.name = name
self.threadID = threadID
self.counter = counter
def run(self):
print(f'开启线程: {self.name}')
# 获取锁,用于线程同步
threadLock.acquire()
print(self.name, self.counter, 3)
# 释放锁,开启下有一个线程
threadLock.release()
def date_time_str(date_time):
return datetime.strftime(date_time, date_time_format)
def print_time(threadName, delay, counter):
while counter:
sleep(delay)
print(f"{threadName}:[{date_time_str(datetime.now())}]")
counter -= 1
def main():
# 创建线程
thread1 = MyThread(1, "Thread-1", 1)
thread2 = MyThread(2, "Thread-2", 2)
# 开启线程
thread1.start()
thread2.start()
# 添加线程到线程列表
threads.append(thread1)
threads.append(thread2)
# 等待所有线程完成
for t in threads:
t.join()
print("退出主线程")
if __name__ == '__main__':
threadLock = threading.Lock()
threads = []
main()
线程优先级队列
Queue模块可以用来进行线程间的通信,让各个线程之间共享数据。
Python的Queue模块提供了同步的、线程安全的队列类,包括FIFO(先入先出)队列Queue、LIFO(后入先出)队列LifoQueue和优
先级队列PriorityQueue。这些队列都实现了锁原语,能够在多线程中直接使用。可以使用队列实现线程间的同步。
import threading
import queue
from time import sleep
date_time_format = '%y-%M-%d %H:%M:%S'
class MyThread(threading.Thread):
def __init__(self, threadID, name, q):
threading.Thread.__init__(self)
self.name = name
self.threadID = threadID
self.q = q
def run(self):
print(f'开启线程: {self.name}')
process_data(self.name, self.q)
print(f'退出线程: {self.name}')
def process_data(threadName, q):
while not q.empty():
queueLock.acquire()
if not workQueue.empty():
data = q.get()
queueLock.release()
print(f"{threadName} processing {data}")
else:
queueLock.release()
sleep(1)
def main():
global exitFlag
exitFlag = 0
threadList = {"Thread-1", "Thread-2", "Thread-3"}
nameList = {"one", "two", "three", "four", "five"}
threads = []
threadID = 1
# 创建新线程
for tname in threadList:
thread = MyThread(threadID, tname,workQueue)
thread.start()
threads.append(thread)
threadID += 1
# 填充队列
queueLock.acquire()
for word in nameList:
workQueue.put(word)
queueLock.release()
# 等待队列清空
while not workQueue.empty():
pass
# 通知线程退出
exitFlag = 1
# 等待所有线程完成
for t in threads:
t.join()
print('退出主线程')
if __name__ == '__main__':
queueLock=threading.Lock()
workQueue=queue.Queue(10)
main()
使用线程池
import threading, queue
THREAD_POOL_SIZE = 4 # 线程池大小
params = ('Reykjavik', 'Vien', 'Zadar', 'Venice', 'Wr9claw', 'Bolognia', 'Berlin', 'Slubice', 'New York', 'Dehli',)
def runTask(param): # 功能函数
print(param)
def ThreadQueue(params_queue): # 线程队列
while not params_queue.empty(): # 队列不为空
try:
param = params_queue.get(block=False) # 从队列中移除并返回一个项目。 block 是 false), 如果一个项目立即可得到,则返回一个项目,否则引发 Empty 异常。
except queue.Empty:
break
else:
runTask(param) # 任务执行
params_queue.task_done() # 每个 get() 被用于获取一个任务, 后续调用 task_done() 告诉队列,该任务的处理已经完成。
if __name__ == '__main__':
params_queue = queue.Queue() # 线程参数
for param in params:
params_queue.put(param) # 将 参数 放入队列。
threads = [threading.Thread(target=ThreadQueue, args=(params_queue,)) for _ in range(THREAD_POOL_SIZE)]
for thread in threads:
thread.start()
params_queue.join() # 阻塞至队列中所有的元素都被接收和处理完毕。当条目添加到队列的时候,未完成任务的计数就会增加。
while threads:
threads.pop().join()
以多组参数并行执行函数
import queue
import threading
import time
class MultiThread(object):
def __init__(self, function, argsVector, maxThreads=5, queue_results=False):
self._function = function
self._lock = threading.Lock()
self._nextArgs = iter(argsVector).__next__()
self._threadPool = [threading.Thread(target=self._doSome) for i in range(maxThreads)]
if queue_results:
self._queue = queue.Queue()
else:
self._queue = None
def _doSome(self):
while True:
self._lock.acquire()
try:
try:
args = self._nextArgs
except StopIteration:
break
finally:
self._lock.release()
result = self._function(args)
if self._queue is not None:
self._queue.put((args, result))
def get(self, *a, **kw):
if self._queue is not None:
return self._queue.get(*a, **kw)
else:
raise ValueError('Not queueing results')
def start(self):
for thread in self._threadPool:
time.sleep(0)
thread.start()
def join(self, timeout=None):
for thread in self._threadPool:
thread.join(timeout)
if __name__ == "__main__":
import random
def recite_n_times_table(n):
for i in range(2, 11):
print("%d * %d = %d" % (n, i, n * i))
time.sleep(0.3 + 0.3 * random.random())
mt = MultiThread(recite_n_times_table, range(2, 11))
mt.start()
mt.join()
print("Well done kids!")
使用双向队列
import threading, queue
THREAD_POOL_SIZE = 4 # 线程池大小
params = ('Reykjavik', 'Vien', 'Zadar', 'Venice', 'Wr9claw', 'Bolognia', 'Berlin', 'Slubice', 'New York', 'Dehli',)
def runTask(param): # 功能函数
print(param+' send')
return param
def runResultTask(param): # 功能函数
print('Result:{} recv'.format(param))
def ThreadQueue(params_queue,result_queue): # 线程队列
while not params_queue.empty(): # 队列不为空
try:
param = params_queue.get(block=False) # 从队列中移除并返回一个项目。 block 是 false), 如果一个项目立即可得到,则返回一个项目,否则引发 Empty 异常。
except queue.Empty:
break
else:
result_queue.put(runTask(param)) # 任务执行
params_queue.task_done() # 每个 get() 被用于获取一个任务, 后续调用 task_done() 告诉队列,该任务的处理已经完成。
if __name__ == '__main__':
params_queue = queue.Queue() # 线程参数
result_queue = queue.Queue() # 线程参数
for param in params:
params_queue.put(param) # 将 参数 放入队列。
threads = [threading.Thread(target=ThreadQueue, args=(params_queue,result_queue)) for _ in range(THREAD_POOL_SIZE)]
for thread in threads:
thread.start()
params_queue.join() # 阻塞至队列中所有的元素都被接收和处理完毕。当条目添加到队列的时候,未完成任务的计数就会增加。
while threads:
threads.pop().join()
while not result_queue.empty():
runResultTask(result_queue.get()) # 从队列中移除并返回一个项目。
多进程
import os
pid_list=[]
if __name__ == '__main__':
pid_list.append(os.getpid())
child_pid = os.fork() # Fork 出一个子进程。在子进程中返回 0,在父进程中返回子进程的进程号。
# 由于Windows没有fork调用
if child_pid == 0: # 子进程
pid_list.append(os.getpid())
print()
print('这是一个子进程')
print('pid:{}'.format(pid_list))
if child_pid > 0: # 主进程
pid_list.append(os.getpid())
print()
print('这是一个主进程')
print('pid:{}'.format(pid_list))
import multiprocessing
import os
def work(identifier):
print('process {}, pid: {}'.format(identifier, os.getpid()))
if __name__ == '__main__':
processes = [multiprocessing.Process(target=work, args=(number,)) for number in range(5)]
for process in processes:
process.start()
while processes:
processes.pop().join()
Pipe
import multiprocessing
class customClass:
pass
def work(connection):
while True:
instance = connection.recv() # 返回一个由另一端使用 send() 发送的对象。
if instance:
print('子进程 接收:{}'.format(instance))
else:
return
if __name__ == '__main__':
parent_conn, child_conn = multiprocessing.Pipe()
child = multiprocessing.Process(target=work, args=(child_conn,)) # 子进程
for item in (42, 'some', {'one': 1}, customClass(), None):
print('父进程 发送:{}:'.format(item))
parent_conn.send(item) # 父进程 将一个对象发送到连接的另一端,可以用 recv() 读取。
child.start()
child.join()
共享内存
import multiprocessing
def f(n, a):
n.value = 3.1415926 # 更改num、arr的值
for i in range(len(a)):
a[i] = -a[i]
if __name__ == '__main__':
num = multiprocessing.Value('f', 0.0) # multiprocessing.sharedctypes 模块,该模块支持创建从共享内存分配的任意ctypes对象。Array、Value等
arr = multiprocessing.Array('i', range(10))
p = multiprocessing.Process(target=f, args=(num, arr))
p.start()
p.join()
print(num.value)
print(arr[:])
进程池
import multiprocessing
POOL_SIZE = 4
params = ('Reykjavik', 'Vien', 'Zadar', 'Venice', 'Wroe土aw', 'Bolognia', 'Berlin', 'Slubice', 'New York', 'Dehli',)
def fun1(parm):
print('发送:{}'.format(parm))
return parm
def fun2(parm):
print('接收:{}'.format(parm))
if __name__ == '__main__':
with multiprocessing.Pool(POOL_SIZE) as pool:
results = pool.map(fun1, params) # 线程池
for result in results:
fun2(result)
multiprocessing.dummy
import multiprocessing.dummy
import multiprocessing
POOL_SIZE = 4
params = ('Reykjavik', 'Vien', 'Zadar', 'Venice', 'Wroe土aw', 'Bolognia', 'Berlin', 'Slubice', 'New York', 'Dehli',)
def fun1(param):
print('发送:{}'.format(param))
return param
def fun2(param):
print('接收:{}'.format(param))
if __name__ == '__main__':
use_threads = False
if use_threads:
pool_cls = multiprocessing.dummy.Pool
else:
pool_cls = multiprocessing.Pool
results = pool_cls(POOL_SIZE).map(fun1, params)
for result in results:
fun2(result)
异步编程
import asyncio
async def funtion(num):
print(num)
if __name__ == '__main__':
loop = asyncio.new_event_loop() # 获取当前事件循环
loop.run_until_complete(asyncio.wait([funtion(number) for number in range(10)])) # 运行一个期程/任务/可等待对象直到完成
loop.close()
import random
import asyncio
async def funtion(name):
for _ in range(4):
time_to_sleep = random.randint(1, 3) / 4
await asyncio.sleep(time_to_sleep)
print("{} waited {} seconds".format(name, time_to_sleep))
async def main():
await asyncio.wait([funtion("foo"), funtion("bar")])
if __name__ == '__main__':
loop = asyncio.new_event_loop() # 获取当前事件循环
loop.run_until_complete(main()) # 运行一个期程/任务/可等待对象直到完成
loop.close()
concurrent.futures 的Executor
import concurrent.futures
# 范例1
def loudy_return():
print('processing')
return 42
executor = concurrent.futures.ThreadPoolExecutor(1)
future = executor.submit(loudy_return)
future.result()
# 范例2
params = ('Reykjavik', 'Vien', 'Zadar', 'Venice', 'Wroe土aw', 'Bolognia', 'Berlin', 'Slubice', 'New York', 'Dehli',)
POOL_SIZE = 4
def fun1(parm):
print('发送:{}'.format(parm))
return parm
def fun2(parm):
print('接收:{}'.format(parm))
def main():
with concurrent.futures.ThreadPoolExecutor(POOL_SIZE) as pool:
return pool.map(fun1,params)
for result in results:
fun2(result)
if __name__ == '__main__':
main()
微线程
无线程的多任务协作
import signal
def empty(name):
# 这是一个出于展示目的的空任务"""
while True:
print("<empty process>", name)
yield None
def terminating(name, maxn):
# 这是一个出于展示目的的计数任务
for i in range(maxn):
print("Here %s, %s out of %s" % (name, i, maxn))
yield None
print("Done with %s, bailing out after %s times" % (name, maxn))
def delay(duration=0.8):
# 在'duration'秒时问内什么也不做
import time
while True:
print("<sleep %d>" % duration)
time.sleep(duration)
yield None
class GenericScheduler(object):
def __init__(self, threads, stop_asap=False):
signal.signal(signal.SIGINT, self.shutdownHandler)
self.shutdownRequest = False
self.threads = threads
self.stop_asap = stop_asap
def shutdownHandler(self, n, frame):
# 初始化一个关闭的请求SIGINT.
print("Request to shut down.")
self.shutdownRequest = True
def schedule(self):
def noop():
while True:
yield None
n = len(self.threads)
while True:
for i, thread in enumerate(self.threads):
try:
thread.__next__()
except StopIteration:
if self.stop_asap:
return
n -= 1
if n == 0:
return
self.theads[i] = noop()
if self.shutdownRequest:
return
if __name__ == '__main__':
s = GenericScheduler([empty('boo'), delay(), empty('foo'), terminating('fie', 5), delay(0.5)], stop_asap=True)
s.schedule()
s = GenericScheduler([empty('boo'), delay(), empty('foo'), terminating('fie', 5), delay(0.5)], stop_asap=False)
s.schedule()