4.2 python多线程编程：threading 模块深度解析

4.2 多线程编程：threading 模块深度解析

[4.2 多线程编程：threading 模块深度解析](#4.2 多线程编程：threading 模块深度解析)
- - [4.2.1 线程基础与 threading 模块概览](#4.2.1 线程基础与 threading 模块概览)
  - [4.2.2 创建和管理线程](#4.2.2 创建和管理线程)
  - - 创建线程的基本方法
    - 线程的生命周期管理
  - [4.2.3 线程同步：锁、信号量和条件变量](#4.2.3 线程同步：锁、信号量和条件变量)
  - - 互斥锁（Lock）
    - 可重入锁（RLock）和信号量（Semaphore）
  - [4.2.4 线程间通信：队列和事件](#4.2.4 线程间通信：队列和事件)
  - - 使用队列进行线程安全的数据交换
  - [4.2.5 线程局部数据和定时器](#4.2.5 线程局部数据和定时器)
  - - 线程局部存储
  - [4.2.6 Python 多线程的局限性：GIL](#4.2.6 Python 多线程的局限性：GIL)
  - [4.2.7 最佳实践和常见陷阱](#4.2.7 最佳实践和常见陷阱)

4.2 多线程编程：threading 模块深度解析

4.2.1 线程基础与 threading 模块概览

在并发编程中，线程是操作系统能够进行运算调度的最小单位。它被包含在进程之中，是进程中的实际运作单位。一个进程可以包含多个线程，这些线程共享进程的内存空间和系统资源。

Python 通过 threading 模块提供了对线程操作的高级封装。与底层 _thread 模块相比，threading 模块功能更完善，接口更友好，是 Python 多线程编程的首选。

线程的比喻

想象一家餐厅的后厨（一个进程）：

主厨是主线程，负责协调工作
切菜工、炒菜工、摆盘师是子线程
所有厨师共享厨房的食材、厨具和空间（共享资源）
他们需要协调使用有限的灶台（同步机制）

4.2.2 创建和管理线程

创建线程的基本方法

Python 提供了两种创建线程的主要方式：

python 复制代码

import threading
import time

# 方法1：通过函数创建线程
def print_numbers(thread_name, count):
    """打印数字的简单函数"""
    for i in range(count):
        print(f"{thread_name}: {i}")
        time.sleep(0.1)

# 方法2：通过继承 Thread 类创建线程
class NumberPrinterThread(threading.Thread):
    """自定义线程类"""
    def __init__(self, thread_name, count):
        super().__init__()
        self.thread_name = thread_name
        self.count = count

    def run(self):
        """重写 run 方法，定义线程执行逻辑"""
        for i in range(self.count):
            print(f"{thread_name}: {i}")
            time.sleep(0.1)

# 使用示例
if __name__ == "__main__":
    # 使用方法1创建线程
    thread1 = threading.Thread(
        target=print_numbers,
        args=("Thread-1", 5),
        name="数字打印线程1"
    )

    # 使用方法2创建线程
    thread2 = NumberPrinterThread("Thread-2", 5)

    # 启动线程
    thread1.start()
    thread2.start()

    # 等待线程完成
    thread1.join()
    thread2.join()

    print("所有线程执行完成")

线程的生命周期管理

python 复制代码

import threading
import time

def worker(thread_id, duration):
    """模拟工作线程"""
    print(f"线程 {thread_id} 开始执行")
    time.sleep(duration)
    print(f"线程 {thread_id} 执行完成")

# 创建并管理多个线程
threads = []
for i in range(3):
    thread = threading.Thread(target=worker, args=(i, 2))
    threads.append(thread)
    thread.start()

# 检查线程状态
for i, thread in enumerate(threads):
    print(f"线程 {i} 是否存活: {thread.is_alive()}")
    print(f"线程 {i} 名称: {thread.name}")
    print(f"线程 {i} 标识符: {thread.ident}")

# 等待所有线程完成
for thread in threads:
    thread.join(timeout=3)  # 设置超时时间

print("所有工作线程已完成")

4.2.3 线程同步：锁、信号量和条件变量

当多个线程访问共享资源时，可能产生竞态条件（Race Condition）。Python 提供了多种同步原语来确保线程安全。

互斥锁（Lock）

python 复制代码

import threading
import time

class BankAccount:
    """银行账户类，演示线程安全问题"""
    def __init__(self, initial_balance=0):
        self.balance = initial_balance
        self.lock = threading.Lock()

    def deposit(self, amount):
        """存款操作 - 线程安全版本"""
        with self.lock:  # 使用上下文管理器自动获取和释放锁
            print(f"存款前余额: {self.balance}")
            new_balance = self.balance + amount
            time.sleep(0.001)  # 模拟处理延迟
            self.balance = new_balance
            print(f"存款 {amount} 后余额: {self.balance}")

    def withdraw(self, amount):
        """取款操作 - 线程安全版本"""
        # 手动获取和释放锁的方式
        self.lock.acquire()
        try:
            if self.balance >= amount:
                print(f"取款前余额: {self.balance}")
                new_balance = self.balance - amount
                time.sleep(0.001)  # 模拟处理延迟
                self.balance = new_balance
                print(f"取款 {amount} 后余额: {self.balance}")
            else:
                print("余额不足")
        finally:
            self.lock.release()

# 测试线程安全的银行账户
def test_bank_account():
    account = BankAccount(1000)

    # 创建多个存款和取款线程
    deposit_threads = [
        threading.Thread(target=account.deposit, args=(100,))
        for _ in range(5)
    ]

    withdraw_threads = [
        threading.Thread(target=account.withdraw, args=(50,))
        for _ in range(5)
    ]

    # 启动所有线程
    for thread in deposit_threads + withdraw_threads:
        thread.start()

    # 等待所有线程完成
    for thread in deposit_threads + withdraw_threads:
        thread.join()

    print(f"最终余额: {account.balance}")

if __name__ == "__main__":
    test_bank_account()

可重入锁（RLock）和信号量（Semaphore）

python 复制代码

import threading
import time

class ResourcePool:
    """资源池管理，演示信号量使用"""
    def __init__(self, total_resources):
        self.semaphore = threading.Semaphore(total_resources)
        self.resources = list(range(total_resources))
        self.lock = threading.RLock()  # 可重入锁，同一线程可多次获取

    def acquire_resource(self, worker_id):
        """获取资源"""
        print(f"工作者 {worker_id} 等待资源...")
        self.semaphore.acquire()

        with self.lock:  # 使用可重入锁保护资源列表
            resource = self.resources.pop()
            print(f"工作者 {worker_id} 获得资源 {resource}")
            return resource

    def release_resource(self, worker_id, resource):
        """释放资源"""
        with self.lock:
            self.resources.append(resource)
            print(f"工作者 {worker_id} 释放资源 {resource}")
        self.semaphore.release()

def worker(worker_id, pool, work_duration):
    """工作者线程"""
    resource = pool.acquire_resource(worker_id)
    time.sleep(work_duration)  # 模拟工作时间
    pool.release_resource(worker_id, resource)

# 测试资源池
def test_resource_pool():
    pool = ResourcePool(3)  # 只有3个可用资源

    threads = []
    for i in range(6):  # 创建6个工作线程
        thread = threading.Thread(
            target=worker,
            args=(i, pool, 2)
        )
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    print("所有工作完成")

if __name__ == "__main__":
    test_resource_pool()

4.2.4 线程间通信：队列和事件

使用队列进行线程安全的数据交换

python 复制代码

import threading
import queue
import time
import random

class ProducerConsumerExample:
    """生产者-消费者模式示例"""
    def __init__(self, max_size=5):
        self.queue = queue.Queue(max_size)
        self.producer_done = threading.Event()

    def producer(self, producer_id, item_count):
        """生产者线程"""
        for i in range(item_count):
            item = f"产品-{producer_id}-{i}"
            self.queue.put(item, block=True)  # 阻塞直到有空位
            print(f"生产者 {producer_id} 生产: {item}")
            time.sleep(random.uniform(0.1, 0.5))  # 随机延迟

        print(f"生产者 {producer_id} 完成生产")

    def consumer(self, consumer_id):
        """消费者线程"""
        while not (self.producer_done.is_set() and self.queue.empty()):
            try:
                item = self.queue.get(timeout=1)  # 超时等待
                print(f"消费者 {consumer_id} 消费: {item}")
                time.sleep(random.uniform(0.2, 0.8))  # 消费时间
                self.queue.task_done()  # 标记任务完成
            except queue.Empty:
                continue

        print(f"消费者 {consumer_id} 完成消费")

def test_producer_consumer():
    """测试生产者-消费者模式"""
    pc = ProducerConsumerExample(max_size=3)

    # 创建生产者线程
    producers = [
        threading.Thread(target=pc.producer, args=(i, 5))
        for i in range(2)
    ]

    # 创建消费者线程
    consumers = [
        threading.Thread(target=pc.consumer, args=(i,))
        for i in range(3)
    ]

    # 启动所有线程
    for producer in producers:
        producer.start()

    for consumer in consumers:
        consumer.start()

    # 等待生产者完成
    for producer in producers:
        producer.join()

    # 通知消费者生产者已完成
    pc.producer_done.set()

    # 等待队列清空
    pc.queue.join()

    # 等待消费者完成
    for consumer in consumers:
        consumer.join()

    print("生产-消费流程完成")

if __name__ == "__main__":
    test_producer_consumer()

4.2.5 线程局部数据和定时器

线程局部存储

python 复制代码

import threading
import time

# 创建线程局部数据对象
thread_local = threading.local()

def show_thread_data():
    """显示线程特定的数据"""
    try:
        data = thread_local.data
        print(f"线程 {threading.current_thread().name} 的数据: {data}")
    except AttributeError:
        print(f"线程 {threading.current_thread().name} 没有设置数据")

def worker(worker_id):
    """工作线程，设置和使用线程局部数据"""
    thread_local.data = f"工作者{worker_id}的专属数据"
    show_thread_data()
    time.sleep(1)
    show_thread_data()

def test_thread_local():
    """测试线程局部数据"""
    threads = []
    for i in range(3):
        thread = threading.Thread(target=worker, args=(i,))
        threads.append(thread)
        thread.start()

    # 主线程也测试
    show_thread_data()

    for thread in threads:
        thread.join()

if __name__ == "__main__":
    test_thread_local()

4.2.6 Python 多线程的局限性：GIL

Python 的全局解释器锁（Global Interpreter Lock，GIL）是一个重要的概念限制。GIL 确保任何时候只有一个线程在执行 Python 字节码，这影响了 CPU 密集型任务的并行性能。

GIL 的影响：

对于 I/O 密集型任务：多线程仍然有效，因为线程在等待 I/O 时会释放 GIL
对于 CPU 密集型任务：多线程无法实现真正的并行计算

python 复制代码

import threading
import time

def cpu_bound_task(n):
    """CPU 密集型任务"""
    count = 0
    for i in range(n):
        count += i
    return count

def io_bound_task(duration):
    """I/O 密集型任务（模拟）"""
    time.sleep(duration)
    return duration

def test_gil_impact():
    """测试 GIL 对性能的影响"""
    # CPU 密集型任务测试
    start_time = time.time()

    threads = []
    for _ in range(4):
        thread = threading.Thread(target=cpu_bound_task, args=(10**7,))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    cpu_time = time.time() - start_time
    print(f"CPU 密集型任务耗时: {cpu_time:.2f} 秒")

    # I/O 密集型任务测试
    start_time = time.time()

    threads = []
    for _ in range(4):
        thread = threading.Thread(target=io_bound_task, args=(1,))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    io_time = time.time() - start_time
    print(f"I/O 密集型任务耗时: {io_time:.2f} 秒")

if __name__ == "__main__":
    test_gil_impact()

4.2.7 最佳实践和常见陷阱

避免死锁：按固定顺序获取锁，使用超时机制
合理使用锁粒度：不要过度同步，也不要同步不足
优先使用队列：对于生产者-消费者模式，队列通常比复杂的锁机制更安全
注意异常处理：线程中的异常不会传播到主线程

python 复制代码

import threading
import logging

def setup_thread_exception_handling():
    """设置线程异常处理"""
    def handle_thread_exception(args):
        logging.error(f"线程 {args.thread.name} 发生异常: {args.exc_type.__name__}: {args.exc_value}")

    threading.excepthook = handle_thread_exception

def problematic_worker():
    """可能抛出异常的工作线程"""
    try:
        # 模拟工作
        time.sleep(1)
        # 模拟异常
        raise ValueError("模拟的工作异常")
    except Exception as e:
        logging.error(f"线程内部捕获的异常: {e}")
        raise  # 重新抛出以便 excepthook 捕获

def test_exception_handling():
    """测试线程异常处理"""
    setup_thread_exception_handling()

    thread = threading.Thread(target=problematic_worker)
    thread.start()
    thread.join()

    print("主线程继续执行")

if __name__ == "__main__":
    test_exception_handling()

通过深入理解 threading 模块的这些核心概念和实践，您将能够编写出健壮、高效的多线程 Python 应用程序，为后续学习更高级的并发编程技术奠定坚实基础。