进程、线程、协程、锁
1、进程
a.进程就相当于一个文件,有自己独立的资源空间,每个进程之间的数据是相互隔离的;线程是最小的执行单元,每个进程中都至少存在一个线程
b.进程开启方式
from multiprocessing import Process
def func(参数):
print(123)
if __name__ == '__main__':
p = Process(target=func, args=(参数,))#args必须是元祖
p.start()
c.windows系统和Linux/iOS系统创建进程的方式不同Windows是在子进程中重新执行父进程中的代码,所以必须在主进程中添加if name == ‘main’;而Linux/iOS中创建子进程相当于直接复制父进程中要执行的子进程那一部分代码,所以不需要if name == ‘main’;
d.父进程等待所有子进程结束之后才结束,尽管父进程代码结束,但是父进程没有结束,父进程在等待子进程结束后回收子进程的资源,整个过程逻辑如下:
- 主进程代码结束
- 子进程结束
- 主进程回收资源
- 主进程结束
e.父进程如何知道子进程结束?
f.开启多个进程
from multiprocessing import Process
import random
def func(i):
time.sleep(random.random())
print(f'发送一封邮件{i}')
if __name__ == '__main__':
l = []
for i in range(10):
p = Process(target=func, args=(i,))
p.start()
l.append(p)
for n in l: n.join()#阻塞,阻塞主进程,直到主进程知道所有子进程结束后,结束循环中的所有阻塞
print('所有邮件发送完成')
g.守护进程
from multiprocessing import Process
import time
def son1():
while True:
print('is_alive')
time.sleep(0.5)
if __name__ == '__main__':
p = Process(target=son1)
p.daemon = True #守护进程必须在start之前,并且是随着主进程代码结束而结束
p.start()
time.sleep(2)
g.Process中的其他方法
- p.is_alive():判断进程是否还活着
- p.terminate():强制结束一个进程,这是一个异步非阻塞语句
h.用类的方式开启进程
from multiprocessing import Process
class Myprocess(Process):
def __init__(self, x, y):
slef.x = x
self.y = y
super().__init__()
def run(self): #方法命名必须是run,run方法就是子进程要执行的函数
print(self.x, self.y)
print(123)
if __name__ == '__main__':
mp = Myprocess(1,2)
mp.start()
i.购票系统诠释锁的概念
import json
import time
from multiprocessing import Process, Lock
def search_tickets(i):
with open(r'/Users/mac/Downloads/多线程/ticket', 'r', encoding='utf8') as f:
cont = f.read()
s_tickets = json.loads(cont)['count']
print(f'user{i}余票数:{s_tickets}')
def buy_tickets(i):
with lock:
time.sleep(0.1)
with open(r'/Users/mac/Downloads/多线程/ticket', 'r', encoding='utf8') as f:
cont = json.load(f)['count']
if cont > 0:
print(f'user{i}买到票了')
cont -= 1
else:
print(f'user{i}没买到票')
# dic = json.loads(f.read())
# if dic['count'] > 0:
# print(f'user{i}买到票了')
# dic['count'] -= 1
# else:
# print(f'user{i}没买到票')
time.sleep(0.1)
with open(r'/Users/mac/Downloads/多线程/ticket', 'w', encoding='utf8') as f:
f.write(json.dumps({'count': cont}))
# f.write(json.dumps(dic))
def task(i):
search_tickets(i)
buy_tickets(i)
if __name__ == '__main__':
lock = Lock()
for i in range(10):
p = Process(target=task, args=(i,))
p.start()
在两种情况下需要加锁:
- 需要修改所有进程共享的数据资源
考虑数据安全的前提下,才考虑效率问题
j.进程之间数据隔离
from multiprocessing import Process
n = 100
def func():
global n
n -= 1
print(n)
if __name__ == '__main__':
l = []
for i in range(10):
p = Process(target=func)
p.start()
l.append(p)
for i in l: i.join()
print(n)
result:
99
99
99
99
99
99
99
99
99
99
100
k.进程之间的通信(IPC)
进程之间的通信是通过队列来实现的,队列是先进先出
from multiprocess import Process, Queue
def func(astr, q):
res = eval(astr)
q.put(res)
q.put(res*2)
q.put(res*4)
if __name__ == '__main__':
q = Queue()
p = Process(target=func, args=('1+2+5',q))
p.start()
print(q.get())
print(q.get())
print(q.get())
result:
8
16
32 #先进先出
from multiprocessing import Queue
import queue
q = Queue(5)
q.put(1)
q.put(2)
q.put(3)
q.put(4)
q.put(5)
print('5555')
q.put(6) #队列满时,程序就阻塞了
print('6666')
print(q.get())
print(q.get())
print(q.get())
print(q.get())
result:
5555
#说明q.put(6)队列满时,程序就阻塞了
import queue
q = Queue(5)
q.put(1)
q.put(2)
q.put(3)
q.put(4)
q.put(5)
print('5555')
#q.put(6) #队列满时,程序就阻塞了
try:
q.put_nowait(6) #程序不阻塞,但是会出现数据丢失
except queue.Full:
pass
print('6666')
print(q.get())
print(q.get())
print(q.get())
#print(q.get())
#print(q.get())
#print(q.get())
result:
5555
6666
1
2
3
Process finished with exit code 0
#说明q.put_nowait(6) #程序不阻塞,但是会出现数据丢失,第六个print(q.get())拿不到值6
from multiprocessing import Queue
import queue
q = Queue(5)
q.put(1)
q.put(2)
q.put(3)
q.put(4)
q.put(5)
print('5555')
#q.put(6) #队列满时,程序就阻塞了
try:
q.put_nowait(6) #程序不阻塞,但是会出现数据丢失
except queue.Full:
pass
print('6666')
print(q.get())
print(q.get())
print(q.get())
print(q.get())
print(q.get())
print(q.get()) #拿不到值6,且队列为空时会阻塞
result:
5555
6666
1
2
3
4
5
#说明第六个print(q.get()) #拿不到值6,且队列为空时会阻塞
from multiprocessing import Queue
import queue
q = Queue(5)
q.put(1)
q.put(2)
q.put(3)
q.put(4)
q.put(5)
print('5555')
#q.put(6) #队列满时,程序就阻塞了
try:
q.put_nowait(6) #程序不阻塞,但是会出现数据丢失
except queue.Full:
pass
print('6666')
print(q.get())
print(q.get())
print(q.get())
print(q.get())
print(q.get())
try:
print(q.get_nowait()) #不会对程序造成任何影响,且不会阻塞
except queue.Empty:
pass
result:
5555
6666
1
2
3
4
5
Process finished with exit code 0
#说明print(q.get_nowait()) #不会对程序造成任何影响,且不会阻塞
l.生产者消费者模型
import random
from multiprocessing import Process, Queue
import time
def producer(q, name, food):
for i in range(10):
fd = f'{food}{i}'
q.put(fd)
time.sleep(random.random())
print(f'{name}制造了{fd}')
def consumer(q, name):
while True:
fd = q.get()
if fd == None:
break
time.sleep(0.5)
print(f'{name}吃了{fd}')
def task(p_count, c_count):
q = Queue(10)
l = []
for i in range(p_count):
pp = Process(target=producer, args=(q, 'alice', 'noodels'))
pp.start()
l.append(pp)
for i in range(c_count):
cp = Process(target=consumer, args=(q, f'mo{i}'))
cp.start()
for i in l: i.join()
for i in range(c_count):
q.put(None)
if __name__ == '__main__':
task(10, 5)
2、线程
a.线程和进程的区别
线程开销小,进程开销大
进程中至少有一个线程
进程是计算机最小的资源分配单位
线程是计算机最小的调度单位
进程之间是数据隔离的
线程之间是数据共享的
多进程可以利用多核
Cpython中多线程不能利用多核
b.开启线程和开启进程的方式一样(可参考上面进程的开启方式),但是不需要if __name == 'main’
c.Cpython中多线程不能利用多核的原因
是因为Cpython解释器中的垃圾回收机制导致的,垃圾回收机制会对线程中的变量进行计数操作,为了防止多线程中计数混乱,在外部加了一把大锁(GIL锁也称全局解释器锁),有了GIL锁因此垃圾回收机制只能在CPU调用某一个线程时针对被调用的线程进行计数操作,其他线程只能出狱等待状态,当被调用的线程遇到I/O操作或者时间片到达之后,CPU会调用其他线程,此时垃圾回收机制又对被调用的线程进行计数操作
d.线程中的阻塞join
线程中的阻塞和进程中的阻塞是一样的,都是等待子线程执行完毕后,主线程再继续执行
e.线程中没有terminate()强制结束线程的操作,只能等待线程执行完成后自己结束
f.守护线程
线程中的守护线程是: 非守护线程结束后结束
import time
from threading import Thread
def func():
for i in range(5):
time.sleep(1)
print(123)
def func2():
for i in range(4):
time.sleep(1)
print(456)
t1 = Thread(target=func)
t1.daemon = True #线程中的守护线程是直到非守护线程结束后才结束
t1.start()
t2 = Thread(target=func2)
#t2.daemon = True
t2.start()
g.线程中的其他方法
1.获取线程id
t.ident()
或者导入模块
from threading import current_thread
from threading import Thread
from threading import current_thread
def func2():
t = current_thread() #就相当于当前的子线程,包含子线程的所有信息
print(t)
#print(t.ident)
print(456)
t2 = Thread(target=func2)
#t2.daemon = True
t2.start()
a = current_thread() #相当于主线程,包含主线程的多有信息
b = a.ident
print(a, b)
result:
<Thread(Thread-1, started 123145422258176)>
<_MainThread(MainThread, started 4491701568)> 4491701568
456
2.获取存活的线程个数
from threading import enumerate
from threading import enumerate, active_count
def func2():
print(456)
t2 = Thread(target=func2)
t2.start()
print(enumerate())
print(len(enumerate()))
print(active_count()) #相当于len(enumerate())
k.锁
互斥锁、递归锁、死锁
- 互斥锁:同一把锁(一把锁)不能连续acquire()多次
from threading import Lock
lock = Lock()
lock.acquire()
print('1234')
lock.acquire()
print('4567')
result:
1234 #程序在print(1234)后锁住了
- 死锁:多把锁且交替使用会出现死锁现象
from threading import Thread, Lock
def func1(name1, lock1, lock2):
lock1.acquire()
print(f'{name1}拿到叉子')
lock2.acquire()
print(f'{name1}拿到面条')
time.sleep(0.1)
lock2.release()
print(f'{name1}放下面条')
lock1.release()
print(f'{name1}放下叉子')
def func2(name, lock1, lock2):
lock2.acquire()
print(f'{name}拿到面条')
lock1.acquire()
print(f'{name}拿到叉子')
time.sleep(0.1)
lock1.release()
print(f'{name}放下叉子')
lock2.release()
print(f'{name}放下面条')
lock1 = Lock()
lock2 = Lock()
t1 = Thread(target=func1, args=('cc', lock1,lock2))
t1.start()
t2 = Thread(target=func2, args=('jj', lock1,lock2))
t2.start()
t3 = Thread(target=func1, args=('kk', lock1,lock2))
t3.start()
t4 = Thread(target=func1, args=('aa', lock1,lock2))
t4.start()
result:
cc拿到叉子
cc拿到面条
cc放下面条
cc放下叉子
jj拿到面条
kk拿到叉子
#程序卡在此处,出现了死锁现象
3.递归锁:可以acquire多次,但是效率低
4. 死锁现象的解决方法:1、使用递归锁;2、调整代码实现逻辑
from threading import Thread, RLock
def func1(name1, lock1, lock2):
lock1.acquire()
print(f'{name1}拿到叉子')
lock2.acquire()
print(f'{name1}拿到面条')
time.sleep(0.1)
lock2.release()
print(f'{name1}放下面条')
lock1.release()
print(f'{name1}放下叉子')
def func2(name, lock1, lock2):
lock2.acquire()
print(f'{name}拿到面条')
lock1.acquire()
print(f'{name}拿到叉子')
time.sleep(0.1)
lock1.release()
print(f'{name}放下叉子')
lock2.release()
print(f'{name}放下面条')
# lock1 = Lock()
# lock2 = Lock()
lock1 = lock2 = RLock()
t1 = Thread(target=func1, args=('cc', lock1, lock2))
t1.start()
t2 = Thread(target=func2, args=('jj', lock1, lock2))
t2.start()
t3 = Thread(target=func1, args=('kk', lock1, lock2))
t3.start()
t4 = Thread(target=func1, args=('aa', lock1, lock2))
t4.start()
#递归锁的本质是一个锁,所以可以调整代码如下
from threading import Thread, Lock
def func1(name1, lock):
lock.acquire()
print(f'{name1}拿到叉子')
print(f'{name1}拿到面条')
time.sleep(0.1)
lock.release()
print(f'{name1}放下面条')
print(f'{name1}放下叉子')
def func2(name, lock):
lock.acquire()
print(f'{name}拿到面条')
print(f'{name}拿到叉子')
time.sleep(0.1)
lock.release()
print(f'{name}放下叉子')
print(f'{name}放下面条')
lock = Lock()
t1 = Thread(target=func1, args=('cc', lock))
t1.start()
t2 = Thread(target=func2, args=('jj', lock))
t2.start()
t3 = Thread(target=func1, args=('kk', lock))
t3.start()
t4 = Thread(target=func1, args=('aa', lock))
t4.start()
result:
cc拿到叉子
cc拿到面条
cc放下面条
cc放下叉子
jj拿到面条
jj拿到叉子
jj放下叉子
jj放下面条
kk拿到叉子
kk拿到面条
kk放下面条
kk放下叉子
aa拿到叉子
aa拿到面条
aa放下面条
aa放下叉子
Process finished with exit code 0
L.队列
- 先进先出
from queue import Queue - 后进先出
from threading import lifoQueue
算法中常用到
from queue import LifoQueue
lq = LifoQueue()
lq.put(1)
lq.put(2)
lq.put(3)
print(lq.get())
print(lq.get())
print(lq.get())
- 优先级队列
from threading import PriorityQueue
自动排序
from queue import PriorityQueue
pq = PriorityQueue()
pq.put((15, 'a'))
pq.put((2, 'b'))
pq.put((3, 'c'))
print(pq.get())
print(pq.get())
print(pq.get())
M.池(进程池、线程池)
池:一个池中可以固定开启多个线程或者进程,避免了单独开启多个线程或进程的开销和销毁多个进程和线程的时间
- 进程池
from concurrent.futures import ProcessPoolExecutor
import time
import random
def func(i):
print('start', os.getpid())
time.sleep(random.randint(1,2))
print('end', os.getpid())
return f'{i}*{os.getpid()}'
if __name__ == '__main__':
pp = ProcessPoolExecutor(5)
for i in range(10):
res = pp.submit(func, i)
print(res.result()) #这种取结果,每个任务都在等待任务执行完成,相当于是同步了
pp.shutdown() #关闭池,不再接收提交的任务,但会等到所有任务都执行完之后程序才往下运行
print('main')
from concurrent.futures import ProcessPoolExecutor
import time
import random
def func(i):
print('start', os.getpid())
time.sleep(random.randint(1,2))
print('end', os.getpid())
return f'{i}*{os.getpid()}'
if __name__ == '__main__':
pp = ProcessPoolExecutor(5)
pp_l = []
for i in range(10):
res = pp.submit(func, i)
pp_l.append(res)
for i in pp_l:
print(i.result()) #这种执行就是让已经添加的任务继续执行(并发继续),哪个有结果打印出来
pp.shutdown() #关闭池,不再接收提交的任务,但会等到所有任务都执行完之后程序才往下运行
print('main')
回调函数
from concurrent.futures import ThreadPoolExecutor
import requests
def get_page(url):
res = requests.get(url)
return {'url': url, 'cont': res.text}
def callfunc(res):
print(res.result()['url'])
tp = ThreadPoolExecutor(5)
url_l = ['http://www.baidu.com',
'http://www.tencent.com',
'http://www.douban.com',
'http://www.cnblogs.com']
tp_l = []
for url in url_l:
r = tp.submit(get_page, url)
#r.add_done_callback(callfunc)
tp_l.append(r)
for i in tp_l:
callfunc(i)
result:
http://www.baidu.com
http://www.tencent.com
http://www.douban.com
http://www.cnblogs.com
Process finished with exit code 0
#不使用回调函数,返回的结果会根据访问顺序来返回
def get_page(url):
res = requests.get(url)
return {'url': url, 'cont': res.text}
def callfunc(res):
print(res.result()['url'])
tp = ThreadPoolExecutor(5)
url_l = ['http://www.baidu.com',
'http://www.tencent.com',
'http://www.douban.com',
'http://www.cnblogs.com']
#tp_l = []
for url in url_l:
r = tp.submit(get_page, url)
r.add_done_callback(callfunc)
result:
http://www.baidu.com
http://www.douban.com
http://www.cnblogs.com
http://www.tencent.com
Process finished with exit code 0
#说明回调函数是先得到谁的结果先返回谁
- 线程池:和进程池一模一样
N.协程
a.协程:协程相较于线程来说,开销更小,是由用户级别控制调用的,所以可以人为的避免程序运行过程中比较明显的I/O操作,操作系统可以识别CPU不工作的微小瞬间,从而使多个任务在不停的切换进行,这样使CPU的利用率更高
b.gevent模块
import gevent, time
from gevent import monkey
monkey.patch_all()
def eat():
print('a is eating')
time.sleep(1)
print('a finished eat')
def sleep():
print('b is sleeping')
time.sleep(1)
print('b finished sleep')
g1 = gevent.spwan(eat) #开启了一个协程g1
g2 = gevent.spwan(sleep) #开启了一个协程g2
gevent.joinall([g1, g2]) #主程序中协程遇到阻塞才执行协程,直到g1,g2执行完成后
gevent模块获取返回值
import gevent, time
from gevent import monkey
monkey.patch_all()
def eat():
print('a is eating')
time.sleep(1)
print('a finished eat')
return 'a'
def sleep():
print('b is sleeping')
time.sleep(1)
print('b finished sleep')
return 'b'
g1 = gevent.spwan(eat) #开启了一个协程g1
g2 = gevent.spwan(sleep) #开启了一个协程g2
gevent.joinall([g1, g2]) #主程序中协程遇到阻塞才执行协程,直到g1,g2执行完成后
print(g1.value)
print(g2.value)
协程例子
#服务端
import socket, gevent
sk = socket.socket()
sk.bind(('127.0.0.1', 9999))
sk.listen()
conn, addr = sk.accept()
gevent.spawn(chat, conn) #开启一个协程
def chat(conn):
while True:
msg = conn.recv(1024).decode('utf-8')
conn.send(msg.upper().encode('utf-8'))
#客户端
import socket
from threading import Thread
def client():
sk = socket.socket()
sk.connect(('127.0.0.1', 9999))
while True:
sk.send('hello'.encode('utf-8'))
print(sk.recv(1024))
for i in range(500):
t = Thread(target=client)
t.start()
o.进程池、线程、协程联合使用实现高并发的思路
import gevent
from gevent import monkey
from threading import Thread
from multiprocessing import Process
import requests
from concurrent.futures import ProcessPoolExecutor
monkey.patch_socket()
url = 'https://'
def task():
"""任务"""
for i in range(10):
r = requests.get(url)
def g_fun():
"""协程"""
gevent.joinall([gevent.spawn(task) for i in range(10)])
def t_fun():
"""线程"""
for t in range(10):
t = Thread(target=g_fun)
t.start()
def pp_func():
'''进程池'''
pp = ProcessPoolExecutor(5)
pp.submit(t_func)
pp.shutdown()