Python多线程编程深度指南:从基础到高级实践
一、Python多线程基础概念
✅✅✅✅✅
教程工具资料库
https://d00eo4b7huq.feishu.cn/docx/JHt7dKBgKosVwBxTaY5cUgH9nhe?from=from_copylink
传送代资料库
https://link3.cc/aa99
1.1 线程与进程的区别
特性 | 线程 | 进程 |
---|---|---|
内存空间 | 共享同一进程内存 | 独立内存空间 |
创建开销 | 较小(通常几MB) | 较大(通常几十MB) |
通信方式 | 直接共享变量 | IPC(管道、队列等) |
切换成本 | 低 | 高 |
Python实现 | threading 模块 | multiprocessing 模块 |
1.2 GIL(全局解释器锁)的影响
import threading
counter = 0
def increment():
global counter
for _ in range(1000000):
counter += 1
# 创建两个线程
t1 = threading.Thread(target=increment)
t2 = threading.Thread(target=increment)
t1.start()
t2.start()
t1.join()
t2.join()
print(f"Final counter value: {counter}") # 通常不会输出2000000
GIL关键点:
- 每个Python进程只有一个GIL
- 线程必须获取GIL才能执行字节码
- I/O操作会释放GIL
- CPU密集型任务受GIL限制严重
二、threading模块深度解析
2.1 线程生命周期管理
2.1.1 创建线程的三种方式
# 方法1:直接实例化Thread
def worker(num):
print(f'Worker: {num}')
threads = []
for i in range(5):
t = threading.Thread(target=worker, args=(i,))
threads.append(t)
t.start()
# 方法2:继承Thread类
class MyThread(threading.Thread):
def __init__(self, num):
super().__init__()
self.num = num
def run(self):
print(f'Worker: {self.num}')
for i in range(5):
t = MyThread(i)
t.start()
# 方法3:使用线程池(Python 3.2+)
from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=3) as executor:
executor.map(worker, range(5))
2.2 线程同步原语
2.2.1 Lock(互斥锁)
import threading
shared_resource = []
lock = threading.Lock()
def append_to_list(item):
with lock: # 自动获取和释放锁
shared_resource.append(item)
print(f"Appended {item}")
threads = []
for i in range(10):
t = threading.Thread(target=append_to_list, args=(i,))
threads.append(t)
t.start()
for t in threads:
t.join()
print(shared_resource)
2.2.2 RLock(可重入锁)
rlock = threading.RLock()
def recursive_func(count):
with rlock:
if count > 0:
print(f"Acquiring lock, count = {count}")
recursive_func(count - 1)
recursive_func(5)
2.2.3 Condition(条件变量)
from collections import deque
import random
queue = deque()
condition = threading.Condition()
def producer():
for i in range(5):
with condition:
item = random.randint(1, 100)
queue.append(item)
print(f"Produced {item}")
condition.notify() # 通知消费者
time.sleep(random.random())
def consumer():
while True:
with condition:
while not queue:
condition.wait() # 等待通知
item = queue.popleft()
print(f"Consumed {item}")
time.sleep(random.random())
producer_thread = threading.Thread(target=producer)
consumer_thread = threading.Thread(target=consumer)
consumer_thread.start()
producer_thread.start()
三、高级线程管理技术
3.1 线程池最佳实践
from concurrent.futures import ThreadPoolExecutor, as_completed
import urllib.request
URLS = [
'https://www.python.org/',
'https://www.google.com/',
'https://www.github.com/'
]
def fetch_url(url):
with urllib.request.urlopen(url) as conn:
return conn.read()[:100] # 返回前100个字符
# 方法1:使用map
with ThreadPoolExecutor(max_workers=3) as executor:
for url, data in zip(URLS, executor.map(fetch_url, URLS)):
print(f"{url} page length: {len(data)}")
# 方法2:使用submit/futures
with ThreadPoolExecutor(max_workers=3) as executor:
future_to_url = {
executor.submit(fetch_url, url): url for url in URLS
}
for future in as_completed(future_to_url):
url = future_to_url[future]
try:
data = future.result()
except Exception as exc:
print(f"{url} generated an exception: {exc}")
else:
print(f"{url} page length: {len(data)}")
3.2 线程局部数据
thread_local = threading.local()
def show_thread_data():
try:
val = thread_local.value
except AttributeError:
print("No value set for this thread")
else:
print(f"Value for this thread: {val}")
def worker(value):
thread_local.value = value
show_thread_data()
threads = [
threading.Thread(target=worker, args=(i,))
for i in range(3)
]
for t in threads:
t.start()
for t in threads:
t.join()
四、I/O密集型任务优化
4.1 多线程下载器实现
import os
import requests
from urllib.parse import urlparse
def download_file(url, chunk_size=8192):
parsed = urlparse(url)
filename = os.path.basename(parsed.path) or "index.html"
response = requests.get(url, stream=True)
response.raise_for_status()
with open(filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk: # 过滤keep-alive新块
f.write(chunk)
return filename
def threaded_download(urls, max_workers=4):
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = []
for url in urls:
future = executor.submit(download_file, url)
futures.append(future)
print(f"Submitted {url}")
for future in as_completed(futures):
try:
filename = future.result()
print(f"Downloaded {filename}")
except Exception as e:
print(f"Download failed: {e}")
# 使用示例
urls = [
"https://www.python.org/static/img/python-logo.png",
"https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png",
"https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png"
]
threaded_download(urls)
4.2 异步I/O与多线程结合
import asyncio
from concurrent.futures import ThreadPoolExecutor
def blocking_io_task(n):
# 模拟I/O密集型任务
print(f"Start blocking task {n}")
time.sleep(2)
print(f"End blocking task {n}")
return n * 10
async def async_main():
loop = asyncio.get_running_loop()
# 创建线程池
with ThreadPoolExecutor(max_workers=3) as pool:
tasks = []
for i in range(5):
# 将阻塞函数放入线程池执行
task = loop.run_in_executor(pool, blocking_io_task, i)
tasks.append(task)
# 异步等待所有任务完成
results = await asyncio.gather(*tasks)
print(f"All tasks completed: {results}")
asyncio.run(async_main())
五、调试与性能分析
5.1 线程调试技巧
import threading
import sys
import traceback
def worker():
try:
# 模拟工作
time.sleep(1)
# 模拟异常
1 / 0
except Exception:
print("Exception in thread:")
traceback.print_exc(file=sys.stdout)
finally:
print("Thread exiting")
t = threading.Thread(target=worker)
t.start()
# 主线程继续执行其他工作
while t.is_alive():
print("Main thread waiting...")
t.join(0.1) # 非阻塞式等待
print("Main thread exiting")
5.2 线程性能分析
import cProfile
import threading
import io
import pstats
def cpu_bound_task(n):
total = 0
for i in range(n):
total += i**2
return total
def profile_threads():
profiler = cProfile.Profile()
def run_task():
profiler.enable()
cpu_bound_task(10**6)
profiler.disable()
threads = [threading.Thread(target=run_task) for _ in range(3)]
for t in threads:
t.start()
for t in threads:
t.join()
# 输出分析结果
s = io.StringIO()
ps = pstats.Stats(profiler, stream=s).sort_stats('cumulative')
ps.print_stats()
print(s.getvalue())
profile_threads()
六、常见问题与解决方案
6.1 死锁预防模式
import threading
import contextlib
# 锁资源
lock_a = threading.Lock()
lock_b = threading.Lock()
def worker1():
with lock_a:
print("Worker1 acquired lock A")
time.sleep(0.1) # 模拟处理时间
with lock_b:
print("Worker1 acquired lock B")
def worker2():
with lock_b:
print("Worker2 acquired lock B")
time.sleep(0.1)
with lock_a:
print("Worker2 acquired lock A")
# 安全获取多个锁的上下文管理器
@contextlib.contextmanager
def acquire(*locks):
locks = sorted(locks, key=lambda x: id(x)) # 按对象ID排序获取顺序
acquired = []
try:
for lock in locks:
lock.acquire()
acquired.append(lock)
yield
finally:
for lock in reversed(acquired):
lock.release()
def safe_worker():
with acquire(lock_a, lock_b):
print("Safe worker acquired both locks")
# 正确用法
t1 = threading.Thread(target=safe_worker)
t2 = threading.Thread(target=safe_worker)
t1.start()
t2.start()
t1.join()
t2.join()
6.2 线程安全数据结构
from queue import Queue
import threading
class SafeCounter:
def __init__(self):
self._value = 0
self._lock = threading.Lock()
def increment(self):
with self._lock:
self._value += 1
def value(self):
with self._lock:
return self._value
def worker(counter, task_queue):
while True:
try:
item = task_queue.get_nowait()
except queue.Empty:
break
# 处理任务
time.sleep(0.01)
counter.increment()
task_queue.task_done()
# 使用示例
counter = SafeCounter()
tasks = Queue()
for i in range(1000):
tasks.put(i)
threads = []
for _ in range(10):
t = threading.Thread(target=worker, args=(counter, tasks))
t.start()
threads.append(t)
tasks.join() # 等待所有任务完成
for t in threads:
t.join()
print(f"Total tasks processed: {counter.value()}")
七、现代Python多线程最佳实践
7.1 使用concurrent.futures高级接口
from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED
import random
def simulate_task(task_id):
delay = random.uniform(0.5, 2.0)
time.sleep(delay)
return f"Task {task_id} completed in {delay:.2f}s"
def advanced_thread_pool():
with ThreadPoolExecutor(max_workers=3) as executor:
# 提交多个任务
futures = [executor.submit(simulate_task, i) for i in range(10)]
# 等待至少一个任务完成
done, not_done = wait(futures, return_when=FIRST_COMPLETED)
print(f"{len(done)} task(s) completed, {len(not_done)} remaining")
# 处理已完成任务
for future in done:
print(future.result())
# 取消剩余任务
for future in not_done:
future.cancel()
# 等待所有取消操作完成
wait(not_done)
advanced_thread_pool()
7.2 线程与asyncio集成
import asyncio
import concurrent.futures
import time
def blocking_io(n):
print(f"Start blocking I/O {n}")
time.sleep(2) # 模拟I/O操作
print(f"End blocking I/O {n}")
return f"Result from I/O {n}"
async def async_main():
loop = asyncio.get_running_loop()
# 选项1:在默认线程池执行器中运行
result = await loop.run_in_executor(
None, blocking_io, 1)
print(result)
# 选项2:在自定义线程池中运行
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as pool:
tasks = []
for i in range(2, 5):
task = loop.run_in_executor(
pool, blocking_io, i)
tasks.append(task)
results = await asyncio.gather(*tasks)
for res in results:
print(res)
# 并行执行阻塞任务和协程
coro_task = asyncio.create_task(asyncio.sleep(1))
io_task = loop.run_in_executor(pool, blocking_io, 5)
done, pending = await asyncio.wait(
{coro_task, io_task},
return_when=asyncio.ALL_COMPLETED)
for task in done:
if task is coro_task:
print("Coroutine completed")
else:
print(f"IO task result: {task.result()}")
asyncio.run(async_main())
八、总结:Python多线程应用指南
8.1 适用场景决策树
8.2 最佳实践清单
- I/O密集型任务优先选择多线程
- 使用
ThreadPoolExecutor
而非直接创建线程 - 共享资源必须使用适当的锁机制
- 避免在受GIL限制的代码中进行CPU密集型计算
- 考虑使用
concurrent.futures
高级接口 - 线程间通信优先使用
queue.Queue
- 调试时使用
threading.current_thread().name
标识线程 - 复杂场景考虑结合asyncio使用
通过深入理解Python的多线程特性和GIL机制,开发者可以在适当的场景下有效利用多线程提升程序性能,特别是在I/O密集型应用中。记住,多线程不是万能的,但正确使用时可以显著提高程序效率。