OpenCL 学习(3)---- OpenCL 第一个程序

目录

OpenCL 开发流程

OpenCL 编程的标准开发流程如下:

  1. 查询平台(platform)和设备(device),选择需要的计算设备
  2. 创建上下文(context)
  3. 创建命令队列(command queue)
  4. 创建和编译程序对象(program)
  5. 创建内核对象,设置内核参数(kernel Arg)
  6. 执行内核
  7. 数据拷贝回主机端

基本流程如下所示:

参考实例
c 复制代码
static const int ARRAY_SIZE = 100;

static const char *kernel_function_vec_add =
		"__kernel void vector_add(global const float *a, global const float *b, global float *result)"
		"{                                                                                           "
		"int gid = get_global_id(0);                                                                 "
		"result[gid] = a[gid] + b[gid];                                                              "
		"}                                                                                           ";

int demoVectorAddOptimizeImpl(int argc, char* argv[]) {
	cl_int errNum;

	/*prepare input data*/
	float result[ARRAY_SIZE];
	float a[ARRAY_SIZE];
	float b[ARRAY_SIZE];

	for (int i = 0; i < ARRAY_SIZE; i++) {
		a[i] = (float)i + 0.123;
		b[i] = (float)(i * 2) + 0.345;
	}

	cl_uint numPlatforms;
	cl_platform_id firstPlatformId;
	cl_context context;
	cl_device_id device_id;
	errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
	if (errNum != CL_SUCCESS || numPlatforms <= 0) {
		printf("Failed to find any OpenCL platforms.");
		return EXIT_FAILURE;
	}

	errNum = clGetDeviceIDs(firstPlatformId, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
	if (errNum != CL_SUCCESS) {
		printf("There is no GPU, trying CPU... \n");
		errNum = clGetDeviceIDs(firstPlatformId, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
		if (errNum != CL_SUCCESS) {
			printf("There is NO GPU or CPU \n");
			return EXIT_FAILURE;
		}
	}

	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &errNum);
	if (errNum != CL_SUCCESS) {
		printf("create context error\n");
		return NULL;
	}

	cl_command_queue commandQueue = clCreateCommandQueueWithProperties(context, device_id, 0, NULL);
	if (commandQueue == NULL) {
		printf("Failed to create commandQueue for device 0 \n");
		return EXIT_FAILURE;
	}

	cl_program program;
	program = clCreateProgramWithSource(context, 1, &kernel_function_vec_add, NULL, NULL);
	if (program == NULL) {
		printf("Failed to create CL program from source. \n");
		return EXIT_FAILURE;
	}

	errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if (errNum != CL_SUCCESS) {
		char buildLog[16384];
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buildLog), buildLog, NULL);
		printf("Error in kernel:%s \n", buildLog);
		clReleaseProgram(program);
		return NULL;
	}

	cl_kernel kernel;
	kernel = clCreateKernel(program, "vector_add", NULL);
	if (kernel == NULL) {
		printf("Failed to create kernel \n");
		return EXIT_FAILURE;
	}

	cl_mem input_mem0 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*ARRAY_SIZE, a, NULL);
	cl_mem input_mem1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*ARRAY_SIZE, b, NULL);
	cl_mem output_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*ARRAY_SIZE, NULL, NULL);
	if (input_mem0 == NULL || input_mem1 == NULL || output_mem == NULL) {
		printf("Error creating memory objects. \n");
		return EXIT_FAILURE;
	}

	errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_mem0);
	errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &input_mem1);
	errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_mem);
	if (errNum != CL_SUCCESS) {
		printf("Error setting kernel arguments.\n");
		return EXIT_FAILURE;
	}

	size_t globalWorkSize[1] = { ARRAY_SIZE };
	size_t localWorkSize[1] = { 1 };
	//执行内核
	errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
	if (errNum != CL_SUCCESS) {
		printf("Error queuing kernel for execution. \n");

		return EXIT_FAILURE;
	}

	errNum = clEnqueueReadBuffer(commandQueue, output_mem, CL_TRUE, 0, ARRAY_SIZE * sizeof(float), result, 0, NULL, NULL);
	if (errNum != CL_SUCCESS) {
		printf("Error reading result buffer. \n");
		
		return EXIT_FAILURE;
	}

	for (int i = 0; i < ARRAY_SIZE; i++) {
		printf("a[%d]=%f b[%d]=%f result[%d]=%f\n", i, a[i], i, b[i], i, result[i]);
	}
	printf("Executed program succesfully. \n");

cleanup:
	if (input_mem0)
		clReleaseMemObject(input_mem0);
	if (input_mem1)
		clReleaseMemObject(input_mem1);
	if (output_mem)
		clReleaseMemObject(output_mem);
	if (commandQueue)
		clReleaseCommandQueue(commandQueue);
	if (kernel)
		clReleaseKernel(kernel);
	if (program)
		clReleaseProgram(program);
	if (context)
		clReleaseContext(context);

	return 0;
}
相关推荐
wrj的博客12 小时前
python环境安装
python·学习·环境配置
优雅的潮叭12 小时前
c++ 学习笔记之 chrono库
c++·笔记·学习
星火开发设计12 小时前
C++ 数组:一维数组的定义、遍历与常见操作
java·开发语言·数据结构·c++·学习·数组·知识
星幻元宇VR12 小时前
走进公共安全教育展厅|了解安全防范知识
学习·安全·虚拟现实
知识分享小能手12 小时前
Oracle 19c入门学习教程,从入门到精通, Oracle 表空间与数据文件管理详解(9)
数据库·学习·oracle
浅念-14 小时前
C语言小知识——指针(3)
c语言·开发语言·c++·经验分享·笔记·学习·算法
hkNaruto14 小时前
【AI】AI学习笔记:LangGraph 与 LangChain的关系以及系统性学习路线选择
笔记·学习·langchain
jrlong14 小时前
DataWhale大模型基础与量化微调task3学习笔记(第 5章:深入大模型架构_MoE 架构解析)
笔记·学习
wdfk_prog16 小时前
[Linux]学习笔记系列 --[drivers][base]map
linux·笔记·学习
浅念-16 小时前
链表经典面试题目
c语言·数据结构·经验分享·笔记·学习·算法