
线程是操作系统能够进行运算调度的最小单位 ,一个进程内可以包含多个线程,它们共享进程的内存空间 。Python 通过 threading 模块支持多线程编程。
一、线程 vs 进程
| 特性 | 线程 | 进程 |
|---|---|---|
| 内存空间 | 共享进程的内存 | 独立的内存空间 |
| 数据共享 | 容易(直接访问全局变量) | 困难(需要 IPC) |
| 创建开销 | 小 | 大 |
| 切换开销 | 小 | 大 |
| GIL 影响 | 受 GIL 限制 | 无影响 |
| 适用场景 | I/O 密集型 | CPU 密集型 |
import threading
import os
# 线程共享全局变量
global_data = []
def worker():
global_data.append(1)
print(f"线程 {threading.current_thread().name}: PID={os.getpid()}, 数据={global_data}")
threads = [threading.Thread(target=worker) for _ in range(3)]
for t in threads:
t.start()
for t in threads:
t.join()
# 输出:所有线程共享同一个 global_data
二、创建线程的方式
1. 使用 Thread 类
import threading
import time
# 方式1:传入目标函数
def worker(name, delay):
"""工作函数"""
for i in range(3):
print(f"{name}: 第{i+1}次执行")
time.sleep(delay)
# 创建线程
t1 = threading.Thread(target=worker, args=("线程A", 0.5))
t2 = threading.Thread(target=worker, args=("线程B", 0.3))
# 启动线程
t1.start()
t2.start()
# 等待线程结束
t1.join()
t2.join()
print("所有线程执行完毕")
2. 自定义线程类
import threading
import time
class MyThread(threading.Thread):
def __init__(self, name, delay):
super().__init__()
self.name = name
self.delay = delay
def run(self):
"""线程执行的主方法"""
for i in range(3):
print(f"{self.name}: 第{i+1}次执行")
time.sleep(self.delay)
t1 = MyThread("线程A", 0.5)
t2 = MyThread("线程B", 0.3)
t1.start()
t2.start()
t1.join()
t2.join()
3. 守护线程(Daemon Thread)
import threading
import time
def daemon_worker():
while True:
print("守护线程运行中...")
time.sleep(1)
def normal_worker():
for i in range(3):
print(f"普通线程: {i}")
time.sleep(1)
# 守护线程:主线程结束时自动终止
d = threading.Thread(target=daemon_worker, daemon=True)
n = threading.Thread(target=normal_worker)
d.start()
n.start()
n.join() # 等待普通线程结束
print("主线程结束")
# 守护线程会随着主线程结束而自动终止
三、线程同步
1. Lock(互斥锁)
import threading
counter = 0
lock = threading.Lock()
def increment():
global counter
for _ in range(100000):
lock.acquire()
try:
counter += 1
finally:
lock.release()
# 使用 with 语句更简洁
def increment_with():
global counter
for _ in range(100000):
with lock: # 自动获取和释放锁
counter += 1
threads = [threading.Thread(target=increment) for _ in range(10)]
for t in threads:
t.start()
for t in threads:
t.join()
print(f"最终值: {counter}") # 应该是 1000000
2. RLock(可重入锁)
import threading
rlock = threading.RLock()
def recursive_func(n):
with rlock: # 同一线程可以多次获取 RLock
if n > 0:
recursive_func(n - 1)
recursive_func(5)
print("可重入锁允许同一线程重复获取")
3. Semaphore(信号量)
import threading
import time
# 限制同时只有 3 个线程执行
semaphore = threading.Semaphore(3)
def worker(name):
with semaphore:
print(f"{name} 开始执行")
time.sleep(1)
print(f"{name} 执行结束")
threads = [threading.Thread(target=worker, args=(f"线程{i}",)) for i in range(10)]
for t in threads:
t.start()
for t in threads:
t.join()
4. Event(事件)
import threading
import time
event = threading.Event()
def waiter():
print("等待事件...")
event.wait() # 阻塞直到事件被设置
print("事件已触发,继续执行")
def setter():
print("3秒后触发事件...")
time.sleep(3)
event.set()
print("事件已触发")
threading.Thread(target=waiter).start()
threading.Thread(target=setter).start()
5. Condition(条件变量)
import threading
import time
condition = threading.Condition()
items = []
def producer():
for i in range(5):
with condition:
items.append(i)
print(f"生产: {i}")
condition.notify() # 通知等待的消费者
time.sleep(0.5)
def consumer():
while True:
with condition:
while not items: # 防止虚假唤醒
print("等待生产...")
condition.wait()
item = items.pop(0)
print(f"消费: {item}")
p = threading.Thread(target=producer)
c = threading.Thread(target=consumer, daemon=True)
p.start()
c.start()
p.join()
四、线程间通信
1. Queue(队列)
import threading
import queue
import time
def producer(q):
for i in range(5):
q.put(f"消息{i}")
print(f"生产: {i}")
time.sleep(0.5)
q.put(None) # 结束标志
def consumer(q):
while True:
msg = q.get()
if msg is None:
break
print(f"消费: {msg}")
time.sleep(1)
q = queue.Queue()
p = threading.Thread(target=producer, args=(q,))
c = threading.Thread(target=consumer, args=(q,))
p.start()
c.start()
p.join()
c.join()
Queue 类型:
| 类型 | 行为 |
|---|---|
Queue |
FIFO 队列(先进先出) |
LifoQueue |
LIFO 队列(后进先出,栈) |
PriorityQueue |
优先级队列 |
2. 共享变量(需加锁)
import threading
shared_data = []
lock = threading.Lock()
def producer():
for i in range(5):
with lock:
shared_data.append(i)
time.sleep(0.1)
def consumer():
while True:
with lock:
if shared_data:
item = shared_data.pop(0)
print(f"消费: {item}")
time.sleep(0.2)
五、线程池
1. 使用 concurrent.futures.ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor
import time
def task(n):
time.sleep(n)
return n * 2
# 创建线程池
with ThreadPoolExecutor(max_workers=4) as executor:
# 方式1:submit(单个任务)
future = executor.submit(task, 2)
result = future.result()
print(f"单个结果: {result}")
# 方式2:map(批量任务,保持顺序)
results = executor.map(task, [1, 2, 3, 4])
print(f"批量结果: {list(results)}")
# 方式3:submit + 回调
def callback(future):
print(f"回调结果: {future.result()}")
future = executor.submit(task, 2)
future.add_done_callback(callback)
2. 使用 multiprocessing.pool.ThreadPool
from multiprocessing.pool import ThreadPool
def task(n):
return n * 2
with ThreadPool(processes=4) as pool:
# 批量执行
results = pool.map(task, [1, 2, 3, 4])
print(results)
六、GIL(全局解释器锁)
GIL 的影响
import threading
import time
def cpu_bound():
"""CPU 密集型任务:计算"""
s = 0
for i in range(50_000_000):
s += i * i
return s
def io_bound():
"""I/O 密集型任务:睡眠"""
time.sleep(0.1)
def benchmark():
import time
# CPU 密集型:多线程不会加速
start = time.time()
threads = [threading.Thread(target=cpu_bound) for _ in range(4)]
for t in threads: t.start()
for t in threads: t.join()
print(f"CPU任务(4线程): {time.time() - start:.2f}s")
# CPU 密集型:单线程
start = time.time()
for _ in range(4):
cpu_bound()
print(f"CPU任务(单线程): {time.time() - start:.2f}s")
# I/O 密集型:多线程有效
start = time.time()
threads = [threading.Thread(target=io_bound) for _ in range(40)]
for t in threads: t.start()
for t in threads: t.join()
print(f"I/O任务(40线程): {time.time() - start:.2f}s")
典型输出(4核 CPU):
CPU任务(4线程): 12.5s # GIL 导致串行
CPU任务(单线程): 12.3s # 两者差不多
I/O任务(40线程): 0.15s # 线程切换有效
GIL 的释放时机
# GIL 会在以下情况释放:
# 1. 执行 I/O 操作时
# 2. 调用 time.sleep() 时
# 3. 执行一定数量的字节码后(可通过 sys.setswitchinterval 设置)
# 4. 调用 C 扩展时(如果扩展主动释放)
import sys
print(f"线程切换间隔: {sys.getswitchinterval()} 秒")
总表
| 操作 | 方法 | 说明 |
|---|---|---|
| 创建线程 | Thread(target=func) |
创建线程对象 |
| 启动线程 | t.start() |
开始执行 |
| 等待结束 | t.join() |
阻塞直到线程结束 |
| 守护线程 | daemon=True |
主线程结束自动终止 |
| 互斥锁 | Lock() |
保证互斥访问 |
| 信号量 | Semaphore(n) |
限制并发数量 |
| 事件 | Event() |
线程间信号通知 |
| 队列 | Queue() |
线程安全的数据传递 |
核心点:
-
线程共享内存 ,数据传递方便但需要加锁
-
GIL 限制 CPU 密集型任务的多线程效果
-
I/O 密集型任务适合用多线程
-
使用
ThreadPoolExecutor管理线程池 -
注意死锁 和竞态条件
-
守护线程会在主线程结束时自动终止