一、简介
本文介绍了如何使用thrust::sort()
函数对device_vector<int>
容器,或者int*
指针表示的内存(显存)处的数据进行排序。
二、示例代码
1. 排序 device_vector<int>
中的数据
main.cu
文件内容:
cpp
#include <iostream>
#include <cuda_runtime.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <algorithm>
#include <random>
#include <chrono>
using namespace std;
int main()
{
int min = 0, max = 100;
random_device seed; // 硬件生成随机数种子
ranlux48 engine(seed()); // 利用种子生成随机数引擎
uniform_int_distribution<> distrib(min, max); // 设置随机数范围,并为均匀分布
int n = 10;
thrust::host_vector<int> data_host(n);
thrust::device_vector<int> data_device(n);
/* 在host_vector上生成随机数 */
for (int i = 0; i < n; i++)
{
data_host[i] = distrib(engine);
}
std::cout << "Before sorting:\n";
for (int i = 0; i < n; i++)
{
std::cout << data_host[i] << ",";
}
std::cout << "\n";
/* 在device端进行排序 */
data_device = data_host; // 将host端的数据拷贝到device端
thrust::sort(data_device.begin(), data_device.end()); // 进行排序
data_host = data_device; // 将排序完成后的device端的数据拷贝回host端
std::cout << "After sorting:\n";
for (int i = 0; i < n; i++)
{
std::cout << data_host[i] << ",";
}
std::cout << "\n";
return 0;
}
CMakeLists.txt
文件内容:
cpp
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(HelloWolrd LANGUAGES CXX CUDA)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# CUDA设置
find_package(CUDA REQUIRED)
# 指定生成目标
cuda_add_executable(HelloWorld main.cu)
程序输出结果:
cpp
Before sorting:
85,73,8,0,77,68,93,31,85,76,
After sorting:
0,8,31,68,73,76,77,85,85,93,
2. 排序 int*
指针中的数据
main.cu
文件内容:
cpp
#include <iostream>
#include <cuda_runtime.h>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <algorithm>
#include <random>
#include <chrono>
using namespace std;
int main()
{
int min = 0, max = 100;
random_device seed; // 硬件生成随机数种子
ranlux48 engine(seed()); // 利用种子生成随机数引擎
uniform_int_distribution<> distrib(min, max); // 设置随机数范围,并为均匀分布
int n = 10;
int *data_host = new int[n];
/* 生成随机数 */
for (int i = 0; i < n; i++)
{
data_host[i] = distrib(engine);
}
std::cout << "Befor sorting:\n";
for (int i = 0; i < n; i++)
{
std::cout << data_host[i] << ",";
}
std::cout << "\n";
/*数据拷贝到device端*/
int *data_device = nullptr;
cudaMalloc((void **)&data_device, n * sizeof(int));
cudaMemcpy(data_device, data_host, n * sizeof(int), cudaMemcpyHostToDevice);
/*使用thrust::sort()排序*/
// thrust::sort()只支持迭代器,因此需要将int*指针转为device_ptr<int>容器类型
thrust::device_ptr<int> t_ptr(data_device);
thrust::sort(t_ptr, t_ptr + n);
/*数据拷贝回host端*/
cudaMemcpy(data_host, data_device, n * sizeof(int), cudaMemcpyDeviceToHost);
std::cout << "After sorting:\n";
for (int i = 0; i < n; i++)
{
std::cout << data_host[i] << ",";
}
std::cout << "\n";
/*释放内存*/
delete[] data_host;
cudaFree(data_device);
return 0;
}
CMakeLists.txt
文件内容同上。
程序输出结果:
cpp
Befor sorting:
5,82,71,3,90,69,8,0,23,81,
After sorting:
0,3,5,8,23,69,71,81,82,90,
三、自定义排序
更多自定义排序请参考thrust
官方代码仓库:NVIDIA/thrust。