一,多进程
# 多进程
import multiprocessing
import time
def run(name):
time.sleep(2)
print("hello,", name)
if __name__ == "__main__":
for i in range(10):
p = multiprocessing.Process(target=run, args=("hepengli %s" % i,))
p.start()
G:\Python38\python.exe G:/Project1/self_taught/week_10/multi_process.py
hello,hello, hello,hepengli 2 hepengli 3hepengli 0
hello,hello, hepengli 1hepengli 4
hello, hepengli 9hello, hepengli 5
hello, hepengli 7hello, hepengli 6
hello, hepengli 8
Process finished with exit code 0
二,进程间的通讯
不通过的进程间内存是不可共享的,要想实现两个进程间的数据交换,可以使用以下的方法:
Queue
# 进程queue
from multiprocessing import Queue, Process
def f(q):
q.put([42, None, "hello"])
if __name__ == "__main__":
q = Queue()
p = Process(target=f, args=(q,))
p.start()
print(q.get()) # 相当于print([42, None, "hello"])
p.join()
G:\Python38\python.exe G:/Project1/self_taught/week_10/process_queue.py
[42, None, 'hello']
Process finished with exit code 0
Pipe
from multiprocessing import Process, Pipe
def f(conn):
conn.send([42, None, "hello1"])
conn.send([42, None, "hello2"])
print("from for parent:", conn.recv())
conn.close()
if __name__ == "__main__":
parent_conn, child_conn = Pipe() # 管道有两端
p = Process(target=f, args=(child_conn,))
p.start()
print(parent_conn.recv()) # 相当于print([42, None, "hello"])
print(parent_conn.recv())
parent_conn.send("nice to meet you!")
p.join()
G:\Python38\python.exe G:/Project1/self_taught/week_10/pipe_1.py
[42, None, 'hello1']
[42, None, 'hello2']
from for parent: nice to meet you!
Process finished with exit code 0
Manager
import os
from multiprocessing import Process, Manager
def f(d, l):
d[1] = "1"
d["2"] = 2
d[0.25] = None
l.append(os.getpid())
print(l)
if __name__ == "__main__":
with Manager() as manager:
d = manager.dict() # 生成一个字典,可在多个进程之间传递和共享
l = manager.list(range(5)) # 生成一个列表,可在多个进程之间传递和共享
p_list = []
for i in range(10):
p = Process(target=f, args=(d, l))
p.start()
p_list.append(p)
for res in p_list: # 等待结果
res.join()
print(d)
print(l)
G:\Python38\python.exe G:/Project1/self_taught/week_10/manager_1.py
[0, 1, 2, 3, 4, 11640]
[0, 1, 2, 3, 4, 11640, 3320]
[0, 1, 2, 3, 4, 11640, 3320, 1008]
[0, 1, 2, 3, 4, 11640, 3320, 1008, 16872]
[0, 1, 2, 3, 4, 11640, 3320, 1008, 16872, 12760]
[0, 1, 2, 3, 4, 11640, 3320, 1008, 16872, 12760, 10364]
[0, 1, 2, 3, 4, 11640, 3320, 1008, 16872, 12760, 10364, 10692]
[0, 1, 2, 3, 4, 11640, 3320, 1008, 16872, 12760, 10364, 10692, 7308]
[0, 1, 2, 3, 4, 11640, 3320, 1008, 16872, 12760, 10364, 10692, 7308, 18180]
[0, 1, 2, 3, 4, 11640, 3320, 1008, 16872, 12760, 10364, 10692, 7308, 18180, 12352]
{1: '1', '2': 2, 0.25: None}
[0, 1, 2, 3, 4, 11640, 3320, 1008, 16872, 12760, 10364, 10692, 7308, 18180, 12352]
Process finished with exit code 0
进程同步
without using the lock output from the different processes is liable to get all mixed up.
如果不使用锁,来自不同进程的输出很容易混淆。
from multiprocessing import Process, Lock
def f(l, i): # l是锁 i是传进来的值
l.acquire() # 加一把锁
print("hello,world", i)
l.release() # 释放
if __name__ == "__main__":
lock = Lock() # 生成锁的实例
for num in range(10):
Process(target=f, args=(lock, num)).start()
G:\Python38\python.exe G:/Project1/self_taught/week_10/process_lock.py
hello,world 0
hello,world 3
hello,world 1
hello,world 2
hello,world 4
hello,world 8
hello,world 7
hello,world 9
hello,world 5
hello,world 6
Process finished with exit code 0
三,进程池
进程池内部维护一个进程序列,当使用时,就去进程池中获取一个进程,如果进程池序列中没有可供使用的进程,那么程序就会等待,直到进程池中有可用的进程为止。
进程池中的两个方法:
①apply------>同步执行(串行)
②apply_async------->异步执行(并行)
from multiprocessing import Pool
import time, os
def foo(i):
time.sleep(1)
print("in process", os.getpid())
return i + 100
def bar(arg):
print("---->exec done:", arg, os.getpid())
if __name__ == "__main__":
pool = Pool(processes=5) # 允许进程池同时放入5个进程
print("main process", os.getpid())
for i in range(10):
pool.apply(func=foo, args=(i,)) # 串行
print("end")
pool.close()
pool.join() # 要先关闭进程池再join
结果为:
from multiprocessing import Pool
import time, os
def foo(i):
time.sleep(1)
print("in process", os.getpid())
return i + 100
def bar(arg):
print("---->exec done:", arg, os.getpid())
if __name__ == "__main__":
pool = Pool(processes=5) # 允许进程池同时放入5个进程
print("main process", os.getpid())
for i in range(10):
pool.apply_async(func=foo, args=(i,)) # 并行
print("end")
pool.close()
pool.join() # 要先关闭进程池再join
结果为:
四,协程
协程,又称微线程,纤程。英文名Coroutine。协程是一种用户态的轻量级线程。协程拥有自己的寄存器上下文和栈。协程调度切换时,将寄存器上下文和栈保存到其他地方,在切回来的时候,恢复先前保存的寄存器上下文和栈。因此:
协程能保留上一次调用时的状态(即所有局部状态的一个特定组合),每次过程重入时,就相当于进入上一次调用的状态,换种说法:进入上一次离开时所处逻辑流的位置。
协程的好处:
①无需线程上下文切换的开销
②无需原子操作锁定及同步的开销
“原子操作(atomic operation)是不需要synchronized”,所谓原子操作是指不会被线程调度机制打断的操作;这种操作一旦开始,就一直运行到结束,中间不会有任何 context switch(切换到另一个线程)。原子操作可以是一个步骤,也可以是多个操作步骤,但是其顺序是不可以被打乱,或者切割掉只执行部分。视作整体是原子性的核心。
③方便切换控制流,简化编程模型
④高并发+高扩展性+低成本:一个CPU支持上万的协程都不是问题。所以很适合用于高并发处理。
greenlet
greenlet是一个用C实现的协程模块,相比与python自带的yield,它可以使你在任意函数之间随意切换,而不需把这个函数先声明为generator
from greenlet import greenlet
def test1():
print(12)
gr2.switch()
print(34)
gr2.switch()
def test2():
print(56)
gr1.switch()
print(78)
gr1 = greenlet(test1) # 启动一个协程
gr2 = greenlet(test2)
gr1.switch()
G:\Python38\python.exe G:/Project1/self_taught/week_10/greenlet_1.py
12
56
34
78
Process finished with exit code 0
Gevent
Gevent 是一个第三方库,可以轻松通过gevent实现并发同步或异步编程,在gevent中用到的主要模式是Greenlet。Greenlet全部运行在主程序操作系统进程的内部,但它们被协作式地调度。
# 自动切换
import gevent
def foo():
print("first running in foo!")
gevent.sleep(2)
print("The last one!")
def bar():
print("foo first switch to bar!")
gevent.sleep(1)
print("fun3 switch to bar!")
def fun3():
print("bar first switch to fun3!")
gevent.sleep(0.5)
print("bar last switch to fun3!")
gevent.joinall([
gevent.spawn(foo), # 生成
gevent.spawn(bar),
gevent.spawn(fun3),
])
结果为:
五,爬虫(low)
from urllib import request
def f(url):
print("GET: %s" % url)
resp = request.urlopen(url)
data = resp.read()
f = open("reptile2.txt", "wb")
f.write(data)
f.close()
print("%s bytes received from %s" % (len(data), url))
f("https://blog.csdn.net/hpl980342791/article/details/111401013")
# 爬虫
from urllib import request
import gevent, time
from gevent import monkey
monkey.patch_all() # 把当前程序的所有io操作单独打上标记
def f(url):
print("GET: %s" % url)
resp = request.urlopen(url)
data = resp.read()
print("%s bytes received from %s" % (len(data), url))
urls = [
"https://www.python.org/",
"https://www.yahoo.com/",
"https://www.github.com/",
]
time_start = time.time()
for url in urls:
f(url)
print("synchronization cost:", time.time() - time_start)
print("----"*20)
async_time_start = time.time()
gevent.joinall([
gevent.spawn(f, "https://www.python.org/"),
gevent.spawn(f, "https://www.yahoo.com/"),
gevent.spawn(f, "https://www.github.com/"),
])
print("asynchronous cost:", time.time() - async_time_start)
G:\Python38\python.exe G:/Project1/self_taught/week_10/reptile_1.py
GET: https://www.python.org/
50510 bytes received from https://www.python.org/
GET: https://www.yahoo.com/
489876 bytes received from https://www.yahoo.com/
GET: https://www.github.com/
185537 bytes received from https://www.github.com/
synchronization cost: 35.19561314582825
--------------------------------------------------------------------------------
GET: https://www.python.org/
GET: https://www.yahoo.com/
GET: https://www.github.com/
50510 bytes received from https://www.python.org/
185537 bytes received from https://www.github.com/
478711 bytes received from https://www.yahoo.com/
asynchronous cost: 28.023334980010986
Process finished with exit code 0
这个运行结果的快慢与网速有很大的关系,但总的来说,并行要更快一些。
异步IO
select 多并发socket
import select
import socket
import queue
server = socket.socket()
server.bind(("localhost", 6969))
server.listen(1024)
server.setblocking(False) # 不阻塞
msg_dic = {}
inputs = [server, ]
outputs = []
while True:
readable, writeable, exceptional = select.select(inputs, outputs, inputs)
print(readable, writeable, exceptional)
for r in readable:
if r is server:
conn, addr = server.accept()
print("There's a new link:", addr)
inputs.append(conn)
msg_dic[conn] = queue.Queue() # 初始化一个队列,后面春要返回给队列的数据
else:
data = r.recv(1024)
print("receive data:", data)
msg_dic[r].put(data)
outputs.append(r) # 放入返回的链接队列
# r.send(data)
# print("send done....")
for w in writeable: # 要返回给客户端的链接列表
data_to_client = msg_dic[w].get()
w.send(data_to_client) # 返回给客户端的原数据
outputs.remove(w) # 确保下次循环的时候,writeable不返回以处理过的链接
for e in exceptional:
if e in outputs:
outputs.remove()
inputs.remove(e)
del msg_dic[r]
import socket
client = socket.socket() # 声明协议类型,同时生成socket连接对象
client.connect(("localhost", 6969)) # 连接指定的端口
while True:
msg = input("\033[36;1mPlease enter what you want to send>>>:\033[0m").strip()
if len(msg) == 0:
print("\033[31;1mSend content is empty!!!\033[0m")
continue
client.send(msg.encode("utf-8")) # 写入内容
# client.send("Hello word!---你好,世界!".encode("utf-8")) # 发送
data = client.recv(1024) # 括号里面为字节(1024字节=1kb)
print("receive:", data.decode())
client.close()
好了,这周的学习内容就到此为止,望各位看客大佬发现有不足或错误能留言相告,臣不胜感激!!!