c++多线程【多线程常见使用以及几个多线程数据结构实现】

一、多线程开启方式

1.1 std::asyn

作用：

异步执行函数，并返回一个 std::future 用于获取结果。

可以选择执行策略：

std::launch::async：立即在独立线程执行（默认）。

std::launch::deferred：惰性求值（直到 future::get() 时才执行）。
示例：

cpp 复制代码

#include <iostream>
#include <future>
#include <thread>

int compute() {
    std::cout << "Thread ID: " << std::this_thread::get_id() << std::endl;
    return 42;
}

int main() {
    // 立即异步执行（默认行为）
    std::future<int> fut1 = std::async(std::launch::async, compute);
    std::cout << "Result (async): " << fut1.get() << std::endl;

    // 延迟执行（调用 get() 时在当前线程执行）
    std::future<int> fut2 = std::async(std::launch::deferred, compute);
    std::cout << "Result (deferred): " << fut2.get() << std::endl;
}

关键点：

std::async 是最高层的封装，适用于简单的异步任务。

执行策略影响线程行为：

async 会立即启动线程（除非线程池用完）。

deferred 会在 get() 时执行，可能导致线程阻塞。

销毁 std::future 时会隐式 wait()（如果没 get() 过，可能导致调用线程阻塞）。

1.22. std::promise

作用：

允许手动设置 future 的值或异常（通常是生产者-消费者模式）。

常见用途：

在多线程间设置 future 的结果（如线程间通信）。

配合 std::future 手动控制异步结果的生成。

示例：

cpp 复制代码

#include <iostream>
#include <future>
#include <thread>
#include <stdexcept>

void set_value(std::promise<int> prom) {
    std::this_thread::sleep_for(std::chrono::seconds(1));
    prom.set_value(42); // 设置值
}

void set_exception(std::promise<int> prom) {
    try {
        throw std::runtime_error("Oops!");
    } catch (...) {
        prom.set_exception(std::current_exception()); // 设置异常
    }
}

int main() {
    std::promise<int> prom1;
    std::future<int> fut1 = prom1.get_future();
    std::thread t1(set_value, std::move(prom1)); // 必须 move(promise)
    std::cout << "Value: " << fut1.get() << std::endl;
    t1.join();

    std::promise<int> prom2;
    std::future<int> fut2 = prom2.get_future();
    std::thread t2(set_exception, std::move(prom2));
    try {
        std::cout << "Value: " << fut2.get() << std::endl;
    } catch (const std::exception& e) {
        std::cout << "Exception: " << e.what() << std::endl;
    }
    t2.join();
}

关键点：

std::promise 和 std::future 是一对：

promise 是写入端（生产端）。

future 是读取端（消费端）。

promise 只能被 move（不能被拷贝）。

promise 通过 set_value() 或 set_exception() 通知 future。

future 可以被多个线程 get()，保证线程安全（结果只取一次）。

1.3 std::packaged_task

作用：

将函数/可调用对象包装成一个异步任务，并绑定 std::future 获取结果。

比 std::async 更灵活，允许控制任务的执行时机（如放到线程池）

示例：

cpp 复制代码

#include <iostream>
#include <future>
#include <thread>
#include <queue>

int compute() {
    return 42;
}

int main() {
    // 包装任务（不会立即执行）
    std::packaged_task<int()> task(compute);
    std::future<int> fut = task.get_future();

    // 手动执行（可以在任意线程）
    std::thread t(std::move(task));
    t.join();

    std::cout << "Result: " << fut.get() << std::endl;
}

更复杂的线程池模拟：

cpp 复制代码

#include <queue>
#include <vector>
#include <thread>
#include <functional>

class ThreadPool {
    std::queue<std::function<void()>> tasks;
    std::vector<std::thread> workers;
    bool running = true;
public:
    ThreadPool(unsigned int n) {
        for (unsigned int i = 0; i < n; ++i) {
            workers.emplace_back([this] {
                while (running) {
                    std::function<void()> task;
                    {
                        std::unique_lock<std::mutex> lock(mtx);
                        cv.wait(lock, [this] { return !tasks.empty() || !running; });
                        if (!running) break;
                        task = std::move(tasks.front());
                        tasks.pop();
                    }
                    task();
                }
            });
        }
    }

    ~ThreadPool() {
        running = false;
        cv.notify_all();
        for (auto& worker : workers) {
            worker.join();
        }
    }

    template<typename F>
    auto enqueue(F&& f) -> std::future<decltype(f())> {
        using ResultType = decltype(f());
        auto task = std::make_shared<std::packaged_task<ResultType()>>(
            std::forward<F>(f));
        std::future<ResultType> fut = task->get_future();
        {
            std::lock_guard<std::mutex> lock(mtx);
            tasks.emplace([task] { (*task)(); });
        }
        cv.notify_one();
        return fut;
    }

private:
    std::mutex mtx;
    std::condition_variable cv;
};

int main() {
    ThreadPool pool(4);

    auto fut = pool.enqueue([] {
        std::this_thread::sleep_for(std::chrono::seconds(1));
        return 42;
    });

    std::cout << "Result: " << fut.get() << std::endl;
}

二、stl相关多线程函数实现以及注解

2.1sorter 实现

cpp 复制代码

#include <future>
#include <list>
#include <thread>
#include <vector>

// 并行快速排序模板类
// 使用多线程技术加速快速排序算法
template <typename T>
struct sorter {
  // 排序块结构：包含待排序数据和对应的promise
  struct chunk_to_sort {
    std::list<T> data;                   // 待排序的数据块
    std::promise<std::list<T>> promise;  // 用于异步返回排序结果的promise
  };

  // 线程安全的栈，用于存储待排序的数据块
  // 多个工作线程可以从这个栈中获取任务
  thread_safe_stack<chunk_to_sort> chunks;

  // 工作线程池，存储所有正在运行的线程
  std::vector<std::thread> threads;

  // 最大线程数：硬件并发数减1（为主线程保留一个核心）
  unsigned const max_thread_count;

  // 原子标志，用于通知所有工作线程停止运行
  std::atomic<bool> end_of_data;

  // 构造函数：初始化最大线程数和结束标志
  sorter()
      : max_thread_count(std::thread::hardware_concurrency() -
                         1),   // 获取硬件并发能力
        end_of_data(false) {}  // 初始化结束标志为false

  // 析构函数：确保所有线程正确退出
  ~sorter() {
    end_of_data = true;  // 设置结束标志，通知所有线程停止
    for (unsigned i = 0; i < threads.size(); ++i) {
      threads[i].join();  // 等待所有线程完成
    }
  }

  // 尝试从栈中获取一个排序块并进行排序
  void try_sort_chunk() {
    // 从线程安全栈中弹出一个排序块
    boost::shared_ptr<chunk_to_sort> chunk = chunks.pop();
    if (chunk) {
      // 如果成功获取到排序块，则进行排序
      sort_chunk(chunk);
    }
  }

  // 核心排序算法：递归实现并行快速排序
  std::list<T> do_sort(std::list<T>& chunk_data) {
    // 基本情况：如果数据为空，直接返回
    if (chunk_data.empty()) {
      return chunk_data;
    }

    // 将chunk_data的第一个元素移动到result的开头
    std::list<T> result;
    result.splice(result.begin(), chunk_data, chunk_data.begin());

    // 把result的第一个元素作为基准值
    T const& partition_val = *result.begin();

    // 使用partition算法将数据分为两部分：小于基准值和大于等于基准值
    typename std::list<T>::iterator divide_point =
        std::partition(chunk_data.begin(), chunk_data.end(),
                       [&](T& const& val) { return val < partition_val; });

    // 创建新的排序块来存储小于基准值的部分
    chunk_to_sort new_lower_chunk;
    new_lower_chunk.data.splice(new_lower_chunk.data.end(), chunk_data,
                                chunk_data.begin(), divide_point);

    // 获取future以便异步获取排序结果
    std::future<std::list<T>> new_lower = new_lower_chunk.promise.get_future();

    // 将新的排序块推入线程安全栈，等待其他线程处理
    chunks.push(std::move(new_lower_chunk));

    // 如果当前线程数未达到上限，创建新的工作线程
    if (threads.size() < max_thread_count) {
      threads.push_back(std::thread(&sorter<T>::sort_thread, this));
    }

    // 递归处理大于等于基准值的部分（在当前线程中）
    std::list<T> new_higher(do_sort(chunk_data));

    // 将大于基准值的部分合并到结果中
    result.splice(result.end(), new_higher);

    // 等待小于基准值的部分排序完成
    // 在等待期间，当前线程会帮助处理其他排序任务
    while (new_lower.wait_for(std::chrono::seconds(0)) !=
           std::future_status::ready) {
      try_sort_chunk();  // 尝试处理其他排序任务
    }

    // 获取小于基准值的排序结果并合并到最终结果中
    result.splice(result.begin(), new_lower.get());
    return result;
  }

  // 通过promise进行任务处理
  void sort_chunk(boost::shared_ptr<chunk_to_sort> const& chunk) {
    chunk->promise.set_value(do_sort(chunk->data));
  }

  // 工作线程的主函数：不断尝试获取排序任务
  void sort_thread() {
    // 只要没有收到结束信号，就继续工作
    // std::this_thread::yield();  // 知道是否有工作，它只是：
    // 无论当前情况如何，都礼貌地让出CPU时间片，给其他线程执行机会
    while (!end_of_data) {
      try_sort_chunk();
      std::this_thread::yield();
    }
  }

  /////while (!done) {
  //     if (任务队列不为空) {  // 显式检查
  //         处理任务();
  //     } else {
  //         std::this_thread::yield();  // 没有工作才让出
  //     }
  // }
};

// 并行快速排序的接口函数
// 用户可以通过这个函数使用并行快速排序
template <typename T>
std::list<T> parallel_quick_sort(std::list<T> input) {
  // 如果输入为空，直接返回
  if (input.empty()) {
    return input;
  }

  // 创建排序器实例
  sorter<T> s;

  // 开始并行排序并返回结果
  return s.do_sort(input);
}

2.2 std::accumulate

cpp 复制代码

#include <future>
#include <list>
#include <thread>
#include <vector>

class join_threads {
  std::vector<std::thread>& threads;

 public:
  explicit join_threads(std::vector<std::thread>& threads_)
      : threads(threads_) {}
  ~join_threads() {
    for (unsigned long i = 0; i < threads.size(); ++i) {
      if (threads[i].joinable()) {
        threads[i].join();
      }
    }
  }
};

template <typename Iterator, typename T>
struct accumulate_block {
  T operator()(Iterator first, Iterator last) {
    return std::accumulate(first, last, T());
  }
};

template <typename Iterator, typename T>
T parallel_accumulate(Iterator first, Iterator last, T init) {
  unsigned long const length = std::distance(first, last);

  if (!length) return init;
  unsigned long const min_per_thread = 25;
  unsigned long const max_threads =
      (length + min_per_thread - 1) / min_per_thread;
  unsigned long const hardware_threads = std::thread::hardware_concurrency();
  unsigned long const num_threads =
      std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads);
  unsigned long const block_size = length / num_threads;

  // 用于存储线程以及每个线程的返回值
  std::vector<std::future<T>> futures(num_threads - 1);
  std::vector<std::thread> threads(num_threads - 1);
  // join函数在析构时等待所有线程完成
  join_threads joiner(threads);
  Iterator block_start = first;
  // 还有一个线程是主线程，计算最后一个块
  for (unsigned long i = 0; i < (num_threads - 1); ++i) {
    Iterator block_end = block_start;
    // std::advance 函数将迭代器 block_end 向前移动 block_size 个位置
    // 迭代器增大的方向
    std::advance(block_end, block_size);
    std::packaged_task<T(Iterator, Iterator)> task(
        accumulate_block<Iterator, T>());
    // 设置线程以及返回值
    futures[i] = task.get_future();
    threads[i] = std::thread(std::move(task), block_start, block_end);
    block_start = block_end;
  }

  // 直接显示计算最后一个块的结果
  T last_result = accumulate_block<Iterator, T>()(block_start, last);

  // 获取线程的返回值并累加
  T result = init;
  for (unsigned long i = 0; i < (num_threads - 1); ++i) {
    result += futures[i].get();
  }

  // 叠加上主线程的结果
  result += last_result;
  return result;
}

asyn版本

cpp 复制代码

#include <thread>

template <typename Iterator, typename T>
T parallel_accumulate(Iterator first, Iterator last, T init) {
  unsigned long const length = std::distance(first, last);
  unsigned long const max_chunk_size = 25;
  if (length <= max_chunk_size) {
    return std::accumulate(first, last, init);
  }

  Iterator mid_point = first;
  std::advance(mid_point, length / 2);
  // 开启异步计算第一个半部分的结果
  std::future<T> first_half_result =
      std::async(parallel_accumulate<Iterator, T>, first, mid_point, init);
  // 递归计算第二个半部分的结果
  T second_half_result = parallel_accumulate(mid_point, last, T());
  return first_half_result.get() + second_half_result;
}

2.3 parallel_find

cpp 复制代码

#include <future>
#include <list>
#include <thread>
#include <vector>

template <typename Iterator, typename MatchType>
Iterator parallel_find(Iterator first, Iterator last, MatchType match) {
  struct find_element {
    void operator()(Iterator begin, Iterator end, MatchType match,
                    std::promise<Iterator>* result,
                    std::atomic<bool>* done_flag) {
      try {
        // 多线程查找检查是否找到 done flag
        for (; (begin != end) && (!done_flag->load()); ++begin) {
          if (*begin == match) {
            // 一个promise只能设置一次值或异常，所以这里只能设置一次
            result->set_value(begin);
            done_flage->store(true);
            return;
          }
        }
      } catch (...) {
        try {
          result->set_exception(std::current_exception());
          done_flag->store(true);
        } catch (...) {
        }
      }
    }
  };

  unsigned long const length = std::distance(first, last);
  if (!length) return last;

  unsigned long const min_per_thread = 25;
  unsigned long const max_threads =
      (length + min_per_thread - 1) / min_per_thread;
  unsigned long const hardware_threads = std::thread::hardware_concurrency();

  unsigned long const num_threads =
      std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads);

  unsigned long const block_size = length / num_threads;
  // std::promise是符合线程安全的，所以可以在多个线程中使用
  // std::promise 和 std::future 是一对,类似生产者和消费者模式
  std::promise<Iterator> result;
  std::atomic<bool> done_flag(false);

  std::vector<std::thread> threads(num_threads - 1);

  {
    join_threads joiner(threads);
    Iterator block_strat = first;
    for (unsigned long i = 0; i < (num_threads - 1); ++i) {
      Iterator block_end = block_start;
      std::advance(block_end, block_size);
      threads[i] = std::thread(find_element(), block_start, block_end, match,
                               &result, &done_flag);
      block_start = block_end;
    }

    find_element()(block_start, last, match, &result, &done_flag);
  }

  if (!done_flag.load()) {
    return last;
  }

  // 如果在执行过程中有异常，可以通过get_future().get()抛出异常
  // get 可以返回promise设置的值或异常
  return result.get_future().get();
}

asyn版本

cpp 复制代码

#include <future>
#include <list>
#include <thread>
#include <vector>

template <typename Iterator, typename MatchType>
Iterator parallel_find_impl(Iterator first, Iterator last, MatchType match,
                            std::atomic<bool>& done) {
  try {
    unsigned long const length = std::distance(first, last);
    unsigned long const min_per_thread = 25;
    if (length < (2 * min_per_thread)) {
      for (; (first != last) && !done.load(); ++first) {
        if (*first == match) {
          done = true;
          return first;
        }
      }
      return last;
    } else {
      // 分成两部分执行，前部分在当前线程递归执行，后部分在异步线程执行
      Iterator const mid_point = first + (length / 2);
      std::future<Iterator> async_result =
          std::async(&parallel_find_impl<Iterator, MatchType>, mid_point, last,
                     match, std::ref(done));
      Iterator const direct_result =
          parallel_find_impl(first, mid_point, match, done);
      // 如果前半部分找到匹配项，直接返回
      // 否则，等待异步线程的结果
      return (direct_result == mid_point) ? async_result.get() : direct_result;
    }
  } catch (...) {
    done = true;
    throw;
  }
}

template <typename Iterator, typename MatchType>
Iterator parallel_find(Iterator first, Iterator last, MatchType match) {
  std::atomic<bool> done(false);
  return parallel_find_impl(first, last, match, done);
}

2.4 parallel_for_each

cpp 复制代码

#include <future>
#include <list>
#include <thread>
#include <vector>

class join_threads {
  std::vector<std::thread>& threads;

 public:
  explicit join_threads(std::vector<std::thread>& threads_)
      : threads(threads_) {}
  ~join_threads() {
    for (unsigned long i = 0; i < threads.size(); ++i) {
      if (threads[i].joinable()) {
        threads[i].join();
      }
    }
  }
};

template <typename Iterator, typename Func>
void parallel_for_each(Iterator first, Iterator last, Func f) {
  unsigned long const length = std::distance(first, last);

  if (!length) return;

  unsigned long const min_per_thread = 25;
  unsigned long const max_threads =
      (length + min_per_thread - 1) / min_per_thread;
  unsigned long const hardware_threads = std::thread::hardware_concurrency();

  unsigned long const num_threads =
      std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads);
  unsigned long const block_size = length / num_threads;

  std::vector<std::future<void>> futures(num_threads - 1);
  std::vector<std::thread> threads(num_threads - 1);
  join_threads joiner(threads);
  Iterator block_start = first;

  for (unsigned long i = 0; i < (num_threads - 1); ++i) {
    Iterator block_end = block_start;
    std::advance(block_end, block_size);

    // 可以理解std::packaged_task为一个封装了函数的对象
    std::packaged_task<void(void)> task(
        [=]() { std::for_each(block_start, block_end, f); });
    futures[i] = task.get_future();

    // 启动线程执行任务,以packaged_task为参数
    threads[i] = std::thread(std::move(task));
    block_start = block_end;
  }

  // 主线程计算最后一个块
  std::for_each(block_start, last, f);
  // 提供一种方法，以获取工作线程抛出的任何异常，若不想传递异常，可以省略
  // future::get()会阻塞当前线程，直到异步操作完成
  // 若异步操作抛出异常，future::get()会重新抛出该异常
  for (unsigned long i = 0; i < (num_threads - 1); ++i) {
    futures[i].get();
  }
}

asyn版本

cpp 复制代码

#include <future>
#include <list>
#include <thread>
#include <vector>

template <typename Iterator, typename Func>
void parallel_for_each(Iterator first, Iterator last, Func f) {
  unsigned long const length = std::distance(first, last);

  if (!length) return;

  unsigned long const min_per_thread = 25;

  if (length < (2 * min_per_thread)) {
    std::for_each(first, last, f);
  } else {
    Iterator const mid_point = first + length / 2;
    // 递归处理前半部分
    std::future<void> first_half =
        std::async(&parallel_for_each<Iterator, Func>, first, mid_point, f);
    parallel_for_each(mid_point, last, f);

    // 提供对外传播异常的语义
    first_half.get();
  }
}