OpenCL 学习(3)---- OpenCL 第一个程序

static const int ARRAY_SIZE = 100;

static const char *kernel_function_vec_add =
		"__kernel void vector_add(global const float *a, global const float *b, global float *result)"
		"{                                                                                           "
		"int gid = get_global_id(0);                                                                 "
		"result[gid] = a[gid] + b[gid];                                                              "
		"}                                                                                           ";

int demoVectorAddOptimizeImpl(int argc, char* argv[]) {
	cl_int errNum;

	/*prepare input data*/
	float result[ARRAY_SIZE];
	float a[ARRAY_SIZE];
	float b[ARRAY_SIZE];

	for (int i = 0; i < ARRAY_SIZE; i++) {
		a[i] = (float)i + 0.123;
		b[i] = (float)(i * 2) + 0.345;
	}

	cl_uint numPlatforms;
	cl_platform_id firstPlatformId;
	cl_context context;
	cl_device_id device_id;
	errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
	if (errNum != CL_SUCCESS || numPlatforms <= 0) {
		printf("Failed to find any OpenCL platforms.");
		return EXIT_FAILURE;
	}

	errNum = clGetDeviceIDs(firstPlatformId, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
	if (errNum != CL_SUCCESS) {
		printf("There is no GPU, trying CPU... \n");
		errNum = clGetDeviceIDs(firstPlatformId, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
		if (errNum != CL_SUCCESS) {
			printf("There is NO GPU or CPU \n");
			return EXIT_FAILURE;
		}
	}

	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &errNum);
	if (errNum != CL_SUCCESS) {
		printf("create context error\n");
		return NULL;
	}

	cl_command_queue commandQueue = clCreateCommandQueueWithProperties(context, device_id, 0, NULL);
	if (commandQueue == NULL) {
		printf("Failed to create commandQueue for device 0 \n");
		return EXIT_FAILURE;
	}

	cl_program program;
	program = clCreateProgramWithSource(context, 1, &kernel_function_vec_add, NULL, NULL);
	if (program == NULL) {
		printf("Failed to create CL program from source. \n");
		return EXIT_FAILURE;
	}

	errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if (errNum != CL_SUCCESS) {
		char buildLog[16384];
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buildLog), buildLog, NULL);
		printf("Error in kernel:%s \n", buildLog);
		clReleaseProgram(program);
		return NULL;
	}

	cl_kernel kernel;
	kernel = clCreateKernel(program, "vector_add", NULL);
	if (kernel == NULL) {
		printf("Failed to create kernel \n");
		return EXIT_FAILURE;
	}

	cl_mem input_mem0 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*ARRAY_SIZE, a, NULL);
	cl_mem input_mem1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*ARRAY_SIZE, b, NULL);
	cl_mem output_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*ARRAY_SIZE, NULL, NULL);
	if (input_mem0 == NULL || input_mem1 == NULL || output_mem == NULL) {
		printf("Error creating memory objects. \n");
		return EXIT_FAILURE;
	}

	errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_mem0);
	errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &input_mem1);
	errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_mem);
	if (errNum != CL_SUCCESS) {
		printf("Error setting kernel arguments.\n");
		return EXIT_FAILURE;
	}

	size_t globalWorkSize[1] = { ARRAY_SIZE };
	size_t localWorkSize[1] = { 1 };
	//执行内核
	errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
	if (errNum != CL_SUCCESS) {
		printf("Error queuing kernel for execution. \n");

		return EXIT_FAILURE;
	}

	errNum = clEnqueueReadBuffer(commandQueue, output_mem, CL_TRUE, 0, ARRAY_SIZE * sizeof(float), result, 0, NULL, NULL);
	if (errNum != CL_SUCCESS) {
		printf("Error reading result buffer. \n");
		
		return EXIT_FAILURE;
	}

	for (int i = 0; i < ARRAY_SIZE; i++) {
		printf("a[%d]=%f b[%d]=%f result[%d]=%f\n", i, a[i], i, b[i], i, result[i]);
	}
	printf("Executed program succesfully. \n");

cleanup:
	if (input_mem0)
		clReleaseMemObject(input_mem0);
	if (input_mem1)
		clReleaseMemObject(input_mem1);
	if (output_mem)
		clReleaseMemObject(output_mem);
	if (commandQueue)
		clReleaseCommandQueue(commandQueue);
	if (kernel)
		clReleaseKernel(kernel);
	if (program)
		clReleaseProgram(program);
	if (context)
		clReleaseContext(context);

	return 0;
}
OpenCL 学习(3)---- OpenCL 第一个程序

目录

OpenCL 开发流程

参考实例