cuda 图像处理之sobel边缘检测

news/2024/7/21 5:02:29 标签: 图像处理

贴上来源代码吧,是在linux下调试通过的,包含源码和CMakeLists.txt

  • 环境配置

配置好cuda pkg_config

配置好opencv pkg_config

之后的CMakeLists.txt会使用

  • 源码
#include "cuda_runtime.h"
#include <cuda.h>
#include <device_functions.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <chrono>

using namespace std;
using namespace cv;

//GPU sobel
//  x0 x1 x2
//  x3 x4 x5
//  x6 x7 x8
__global__ void sobel_gpu(unsigned char *in, unsigned char *out, int imgHeight, int imgWidth)
{
    int x = threadIdx.x + blockDim.x * blockIdx.x;
    int y = threadIdx.y + blockDim.y * blockIdx.y;

    int index = y * imgWidth + x;

    int Gx = 0;
    int Gy = 0;

    unsigned char x0, x1, x2, x3, x4, x5, x6,x7,x8;
    if (x > 0 && x < imgWidth-1 && y > 0 && y < imgHeight-1)
    {
        x0 = in[(y-1)* imgWidth + x - 1];
        x1 = in[(y-1)* imgWidth + x ];
        x2 = in[(y-1)* imgWidth + x + 1];

        x3 = in[y* imgWidth + x - 1];
        x4 = in[y* imgWidth + x ];
        x5 = in[y* imgWidth + x + 1];

        x6 = in[(y+1)* imgWidth + x - 1];
        x7 = in[(y+1)* imgWidth + x ];
        x8 = in[(y+1)* imgWidth + x + 1];

        Gx = x0 + 2*x3 + x6 - x2 - 2 * x5 - x8;
        Gy = x0 + 2 * x1 + x2 - x6 - 2 * x7 - x8;
        out[index] = (abs(Gx) + abs(Gy))/2;
    }

}

//CPU soble
void sobel_cpu(Mat srcImg, Mat dstImg, int imgHeight, int imgWidth)
{
    int Gx = 0;
    int Gy = 0;
    for(int i = 1; i < imgHeight-1; i++)
    {
        unsigned char *dataUp = srcImg.ptr<unsigned char>(i-1);
        unsigned char *data = srcImg.ptr<unsigned char>(i);
        unsigned char *dataDown = srcImg.ptr<unsigned char>(i+1);
        unsigned char *out = dstImg.ptr<unsigned char>(i);
        for (int j = 1; j < imgWidth-1; j++)
        {
            Gx = (dataUp[j+1] + 2 * data[j+1] + dataDown[j+1]) - (dataUp[j-1] + 2 * data[j-1] + dataDown[j-1]);
            Gy = (dataUp[j-1] + 2 * dataUp[j] + dataUp[j+1]) - (dataDown[j-1] + 2 * dataDown[j] + dataDown[j+1]);
            out[j] = (abs(Gx) + abs(Gy))/2;

        }
    }

}

int main()
{
    //opencv 读图像

    Mat grayImg = imread("1.jpg", 0);

    int imgWidth = grayImg.cols;
    int imgHeight = grayImg.rows;

    // 对gray image 进行去噪
    Mat gaussImg;
    GaussianBlur(grayImg, gaussImg, Size(3,3), 0, 0, BORDER_DEFAULT);

    // dst_cpu, dst_gpu
    Mat dst_cpu(imgHeight, imgWidth, CV_8UC1, Scalar(0));
    Mat dst_gpu(imgHeight, imgWidth, CV_8UC1, Scalar(0));


    //sobel_cpu
    auto start = std::chrono::system_clock::now();
    sobel_cpu(gaussImg, dst_cpu, imgHeight, imgWidth);
    auto end = std::chrono::system_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
    auto dur = double(duration.count()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den;
    std::cout << "cpu process time: " << 1000*dur << "ms" << std::endl;
	

    //申请指针,并将它指向GPU空间
    size_t num = imgHeight * imgWidth * sizeof(unsigned char);
    unsigned char * in_gpu;

    unsigned char *out_gpu;

    cudaMalloc((void**)&in_gpu, num);
    cudaMalloc((void **)&out_gpu, num);


    start = std::chrono::system_clock::now();
    //定义grid 和 block的维度
    dim3 threadsPerBlock(32,32); //32x32 = 1024 不能超过1024
    dim3 blocksPerGrid((imgWidth + threadsPerBlock.x -1)/threadsPerBlock.x,
    (imgHeight + threadsPerBlock.x-1)/threadsPerBlock.y);

    cudaMemcpy(in_gpu, gaussImg.data, num, cudaMemcpyHostToDevice);

    end = std::chrono::system_clock::now();
    duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
    dur = double(duration.count()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den;
    std::cout << "cpu->gpu copy time: " << 1000*dur << "ms ";


    start = std::chrono::system_clock::now();

    sobel_gpu<<<blocksPerGrid, threadsPerBlock>>>(in_gpu, out_gpu, imgHeight, imgWidth);

    end = std::chrono::system_clock::now();
    duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
    dur = double(duration.count()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den;
    std::cout << " gpu process time: " << 1000*dur << "ms ";

    start = std::chrono::system_clock::now();

    cudaMemcpy(dst_gpu.data, out_gpu, num, cudaMemcpyDeviceToHost);

    end = std::chrono::system_clock::now();
    duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
    dur = double(duration.count()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den;
    std::cout << " gpu->cpu copy time: " << 1000*dur << "ms" << std::endl;

    //显示处理结果
    //imshow("gpu", dst_gpu);
    //imshow("cpu", dst_cpu);
    imwrite("gpu_process.bmp", dst_gpu);
    imwrite("cpu_process.bmp", dst_cpu);

    cudaFree(in_gpu);
    cudaFree(out_gpu);


    return 0;
}
  • CMakeLists.txt
cmake_minimum_required(VERSION 2.6)

project(gpu_sobel)

add_definitions(-std=c++11)
add_definitions(-DAPI_EXPORTS)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)

find_package(CUDA REQUIRED)

include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(/usr/local/cuda-11.1/include)
link_directories(/usr/local/cuda-11.1/lib64)


set(OpenCV_DIR /home/szfhy/lib/opencv4.4.0/lib/cmake/opencv4)
find_package(OpenCV)
# find_package(OpenCV 4.4.0  REQUIRED)
include_directories(/home/szfhy/lib/opencv4.4.0/include)
link_directories(/home/szfhy/lib/opencv4.4.0/lib)
include_directories(${OpenCV_INCLUDE_DIRS})

cuda_add_executable(gpu_soble gpu_sobel.cu)

target_link_libraries(gpu_soble cudart)
target_link_libraries(gpu_soble ${OpenCV_LIBS})
  • 边缘检测效果

  • 运行时间对比


http://www.niftyadmin.cn/n/1414215.html

相关文章

[转]LINQ(1):初识 LINQ

为什么需要LINQ&#xff1f; 面向对象的编程语言已经成为企业应用开发的重要工具&#xff0c;ADO.NET对关系数据提供了一种方便的接口&#xff0c;但还不是一种面向对象的方法。例如&#xff0c;下面的伪代码&#xff1a; User u newUser(); //代表user表的一个User类u.LoginNa…

LeetCode 29 Divide Two Integers(两个整数相除)(*)

翻译 不用乘法、除法、取余操作&#xff0c;将两个数相除。如果它溢出了&#xff0c;返回MAX_INT 原文 Divide two integers without using multiplication, division and mod operator.If it is overflow, return MAX_INT. 代码 一心扑到了递归上&#xff0c;可惜没能写出来……

[转]VS.NET 2008 beta2中文版试用

原文地址:http://www.cnblogs.com/zxsoft/archive/2007/09/02/878629.html8月30日知道了VS2008中文版发布的消息&#xff0c;就去下载了。31日就下载好了&#xff0c;可今天才有时间运行一下测试。发现真的是强大了很多啊&#xff01;不说别的&#xff0c;光看了看新建窗口。支…

基于深度学习的图像语义分割技术概述之背景与深度网络架构

图像语义分割正在逐渐成为计算机视觉及机器学习研究人员的研究热点。大量应用需要精确、高效的分割机制&#xff0c;如&#xff1a;自动驾驶、室内导航、及虚拟/增强现实系统。这种需求与机器视觉方面的深度学习领域的目标一致&#xff0c;包括语义分割或场景理解。本文对多种应…

浅谈C语言参数可变函数的实现

1.需要的头文件&#xff1a;stdarg.h 需要的宏&#xff1a;va_start(a,b) va_arg(a,b) va_end(a) 需要的类型别名: va_list 2.基本用法 (1)写函数头 1 return_type function_name(first_argument,...); eg1&#xff1a; 1 int fun1&#xff08;int a,...&#xff09;; …

ubuntu安装MobaXterm和WPS

文章目录 ubuntu安装MobaXtermi386 架构wine操作步骤 ubuntu安装WPS操作步骤WPS版本知识补充 ubuntu安装MobaXterm i386 架构 sudo dpkg --add-architecture i386 是一个Linux系统中的命令&#xff0c;用于添加一个新的架构&#xff08;architecture&#xff09;支持到当前系统…

cuda 软硬件相关概念

软件概念 thread 计算任务的基本单元&#xff0c;每个thread完成一个任务, 运行在一个sp,或者cudacore上。 block 完整独立运行的最小单元&#xff0c;运行在一个SM中&#xff08;一个SM中可以跑多个block&#xff09;&#xff0c;SM级别的调度单位 grid 一堆block组成grid&…

微软免费图书 Introducing Microsoft LINQ 的翻译

译者翻译链接的地址http://www.cnblogs.com/hanxianlong/archive/2008/01/15/translating-ms-linq.html