CUDA学习笔记3——图像卷积实现

分别采用GPU、CPU对图像进行sobel滤波处理

cpp 复制代码
#include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include<math.h>
#include <malloc.h> 
#include <opencv2/opencv.hpp>

#include <stdlib.h>

#define BLOCK_SIZE 1


//图像卷积 GPU
__global__ void sobel_gpu(unsigned char* in, unsigned char* out, const int Height, const int Width)
{
	int x = blockDim.x * blockIdx.x + threadIdx.x;
	int y = blockDim.y + blockIdx.y + threadIdx.y;
	int index = y * Width + x;

	int Gx = 0;
	int Gy = 0;

	unsigned char x0, x1, x2, x3, x4, x5, x6, x7, x8;

	if (x>0 && x<(Width-1) && y>0 && y<(Height-1))
	{
		x0 = in[(y - 1)*Width + (x - 1)];
		x1 = in[(y - 1)*Width + (x)];
		x2 = in[(y - 1)*Width + (x + 1)];
		x3 = in[(y)*Width + (x - 1)];

		x5 = in[(y)*Width + (x + 1)];
		x6 = in[(y + 1)*Width + (x - 1)];
		x7 = in[(y + 1)*Width + (x)];
		x8 = in[(y + 1)*Width + (x + 1)];

		Gx = (x0 + 2 * x3 + x6) - (x2 + 2 * x5 + x8);
		Gy = (x0 + 2 * x1 + x2) - (x6 + 2 * x7 + x8);

		out[index] = (abs(Gx) + abs(Gy)) / 2;
	}
}

//Sobel滤波  CPU实现
void sobel_cpu(cv::Mat srcImg, cv::Mat dstImg, int Height, int Width)
{
	int Gx = 0;
	int Gy = 0;
	for (int i = 1; i < Height - 1; i++)
	{
		unsigned char* dataUp = srcImg.ptr<unsigned char>(i - 1);
		unsigned char* data = srcImg.ptr<unsigned char>(i);
		unsigned char* dataDown = srcImg.ptr<unsigned char>(i + 1);
		unsigned char* out = dstImg.ptr<unsigned char>(i);
		for (int j = 1; j < Width - 1; j++)
		{
			Gx = (dataUp[j + 1] + 2 * data[j + 1] + dataDown[j + 1]) - (dataUp[j - 1] + 2 * data[j - 1] + dataDown[j - 1]);
			Gy = (dataUp[j - 1] + 2 * dataUp[j] + dataUp[j + 1]) - (dataDown[j - 1] + 2 * dataDown[j] + dataDown[j + 1]);
			out[j] = (abs(Gx) + abs(Gy)) / 2;
		}
	}
}


int main()
{
	cv::Mat src;
	src = cv::imread("photo16.jpg");

	cv::Mat grayImg,gaussImg;
	cv::cvtColor(src, grayImg, cv::COLOR_BGR2GRAY);
	cv::GaussianBlur(grayImg, gaussImg, cv::Size(3,3), 0, 0, cv::BORDER_DEFAULT);

	int height = src.rows;
	int width = src.cols;
	//输出图像
	cv::Mat dst_gpu(height, width, CV_8UC1, cv::Scalar(0));
	//GPU存储空间
	int memsize = height * width * sizeof(unsigned char);
	//输入 输出
	unsigned char* in_gpu;
	unsigned char* out_gpu;

	cudaMalloc((void**)&in_gpu, memsize);
	cudaMalloc((void**)&out_gpu, memsize);

	dim3 threadsPreBlock(BLOCK_SIZE, BLOCK_SIZE);
	dim3 blocksPreGrid((width + threadsPreBlock.x - 1)/threadsPreBlock.x, (height + threadsPreBlock.y - 1)/threadsPreBlock.y);
	
	cudaMemcpy(in_gpu, gaussImg.data, memsize, cudaMemcpyHostToDevice);

	sobel_gpu <<<blocksPreGrid, threadsPreBlock>>> (in_gpu, out_gpu, height, width);
	
	cudaMemcpy(dst_gpu.data, out_gpu, memsize, cudaMemcpyDeviceToHost);
	//cudaDeviceSynchronize();

	//输出图像
	cv::Mat dst_cpu(height, width, CV_8UC1, cv::Scalar(0));
	sobel_cpu(gaussImg, dst_cpu, height, width);

	cv::imwrite("dst_cpu_save.png", dst_cpu);
	cv::imwrite("dst_gpu_save.png", dst_gpu);

	//cv::namedWindow("src", cv::WINDOW_NORMAL);
	cv::imshow("src", src);
	//cv::namedWindow("dst_cpu", cv::WINDOW_NORMAL);
	cv::imshow("dst_cpu", dst_cpu);
	//cv::namedWindow("dst_gpu", cv::WINDOW_NORMAL);
	cv::imshow("dst_gpu", dst_gpu);
	cv::waitKey();

	cudaFree(in_gpu);
	cudaFree(out_gpu);

	return 0;
}



相关推荐
云雾J视界1 小时前
多Stream并发实战:用流水线技术将AIGC服务P99延迟压降63%
aigc·api·cpu·stream·gpu·cuda·多并发
碧海潮生_CC2 天前
【CUDA笔记】05 使用 AMGX 实现泊松图像编辑
笔记·cuda
Stara05112 天前
基于WSL 2在Windows 11 构建深度学习开发环境 —— 以Ubuntu、Anaconda、PyCharm及GPU支持为核心
pytorch·ubuntu·windows 11·cuda·anaconda·wsl 2·pyrhon
碧海潮生_CC6 天前
【CUDA笔记】04 CUDA 归约, 原子操作,Warp 交换
笔记·cuda
fpcc10 天前
并行编程实战——CUDA编程的流的优先级
c++·cuda
碧海潮生_CC11 天前
【CUDA笔记】03 CUDA GPU 架构与一般的程序优化思路(下)
笔记·架构·cuda
中医正骨葛大夫12 天前
一文解决如何在Pycharm中创建cuda深度学习环境?
pytorch·深度学习·pycharm·软件安装·cuda·anaconda·配置环境
lvxiangyu1117 天前
wsl2 ubuntu24 opengl 无法使用nvidia显卡 解决方法记录
wsl·cuda·opengl
李昊哲小课17 天前
wsl ubuntu24.04 cuda13 cudnn9 pytorch 显卡加速
人工智能·pytorch·python·cuda·cudnn
wanzhong233319 天前
CUDA学习2-CPU和GPU的性能优化
深度学习·gpu·cuda·高性能计算