OpenCL 学习(3)---- OpenCL 第一个程序

目录

OpenCL 开发流程

OpenCL 编程的标准开发流程如下:

  1. 查询平台(platform)和设备(device),选择需要的计算设备
  2. 创建上下文(context)
  3. 创建命令队列(command queue)
  4. 创建和编译程序对象(program)
  5. 创建内核对象,设置内核参数(kernel Arg)
  6. 执行内核
  7. 数据拷贝回主机端

基本流程如下所示:

参考实例
c 复制代码
static const int ARRAY_SIZE = 100;

static const char *kernel_function_vec_add =
		"__kernel void vector_add(global const float *a, global const float *b, global float *result)"
		"{                                                                                           "
		"int gid = get_global_id(0);                                                                 "
		"result[gid] = a[gid] + b[gid];                                                              "
		"}                                                                                           ";

int demoVectorAddOptimizeImpl(int argc, char* argv[]) {
	cl_int errNum;

	/*prepare input data*/
	float result[ARRAY_SIZE];
	float a[ARRAY_SIZE];
	float b[ARRAY_SIZE];

	for (int i = 0; i < ARRAY_SIZE; i++) {
		a[i] = (float)i + 0.123;
		b[i] = (float)(i * 2) + 0.345;
	}

	cl_uint numPlatforms;
	cl_platform_id firstPlatformId;
	cl_context context;
	cl_device_id device_id;
	errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
	if (errNum != CL_SUCCESS || numPlatforms <= 0) {
		printf("Failed to find any OpenCL platforms.");
		return EXIT_FAILURE;
	}

	errNum = clGetDeviceIDs(firstPlatformId, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
	if (errNum != CL_SUCCESS) {
		printf("There is no GPU, trying CPU... \n");
		errNum = clGetDeviceIDs(firstPlatformId, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
		if (errNum != CL_SUCCESS) {
			printf("There is NO GPU or CPU \n");
			return EXIT_FAILURE;
		}
	}

	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &errNum);
	if (errNum != CL_SUCCESS) {
		printf("create context error\n");
		return NULL;
	}

	cl_command_queue commandQueue = clCreateCommandQueueWithProperties(context, device_id, 0, NULL);
	if (commandQueue == NULL) {
		printf("Failed to create commandQueue for device 0 \n");
		return EXIT_FAILURE;
	}

	cl_program program;
	program = clCreateProgramWithSource(context, 1, &kernel_function_vec_add, NULL, NULL);
	if (program == NULL) {
		printf("Failed to create CL program from source. \n");
		return EXIT_FAILURE;
	}

	errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if (errNum != CL_SUCCESS) {
		char buildLog[16384];
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buildLog), buildLog, NULL);
		printf("Error in kernel:%s \n", buildLog);
		clReleaseProgram(program);
		return NULL;
	}

	cl_kernel kernel;
	kernel = clCreateKernel(program, "vector_add", NULL);
	if (kernel == NULL) {
		printf("Failed to create kernel \n");
		return EXIT_FAILURE;
	}

	cl_mem input_mem0 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*ARRAY_SIZE, a, NULL);
	cl_mem input_mem1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*ARRAY_SIZE, b, NULL);
	cl_mem output_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*ARRAY_SIZE, NULL, NULL);
	if (input_mem0 == NULL || input_mem1 == NULL || output_mem == NULL) {
		printf("Error creating memory objects. \n");
		return EXIT_FAILURE;
	}

	errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_mem0);
	errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &input_mem1);
	errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_mem);
	if (errNum != CL_SUCCESS) {
		printf("Error setting kernel arguments.\n");
		return EXIT_FAILURE;
	}

	size_t globalWorkSize[1] = { ARRAY_SIZE };
	size_t localWorkSize[1] = { 1 };
	//执行内核
	errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
	if (errNum != CL_SUCCESS) {
		printf("Error queuing kernel for execution. \n");

		return EXIT_FAILURE;
	}

	errNum = clEnqueueReadBuffer(commandQueue, output_mem, CL_TRUE, 0, ARRAY_SIZE * sizeof(float), result, 0, NULL, NULL);
	if (errNum != CL_SUCCESS) {
		printf("Error reading result buffer. \n");
		
		return EXIT_FAILURE;
	}

	for (int i = 0; i < ARRAY_SIZE; i++) {
		printf("a[%d]=%f b[%d]=%f result[%d]=%f\n", i, a[i], i, b[i], i, result[i]);
	}
	printf("Executed program succesfully. \n");

cleanup:
	if (input_mem0)
		clReleaseMemObject(input_mem0);
	if (input_mem1)
		clReleaseMemObject(input_mem1);
	if (output_mem)
		clReleaseMemObject(output_mem);
	if (commandQueue)
		clReleaseCommandQueue(commandQueue);
	if (kernel)
		clReleaseKernel(kernel);
	if (program)
		clReleaseProgram(program);
	if (context)
		clReleaseContext(context);

	return 0;
}
相关推荐
西岸行者9 天前
学习笔记:SKILLS 能帮助更好的vibe coding
笔记·学习
悠哉悠哉愿意9 天前
【单片机学习笔记】串口、超声波、NE555的同时使用
笔记·单片机·学习
别催小唐敲代码9 天前
嵌入式学习路线
学习
毛小茛10 天前
计算机系统概论——校验码
学习
babe小鑫10 天前
大专经济信息管理专业学习数据分析的必要性
学习·数据挖掘·数据分析
winfreedoms10 天前
ROS2知识大白话
笔记·学习·ros2
在这habit之下10 天前
Linux Virtual Server(LVS)学习总结
linux·学习·lvs
我想我不够好。10 天前
2026.2.25监控学习
学习
im_AMBER10 天前
Leetcode 127 删除有序数组中的重复项 | 删除有序数组中的重复项 II
数据结构·学习·算法·leetcode
CodeJourney_J10 天前
从“Hello World“ 开始 C++
c语言·c++·学习