Linux测试处理fps为30、1920*1080、一分钟的视频性能

前置条件

模拟fps为30、1920*1080、一分钟的视频

项目CMakeLists.txt

cmakelists 复制代码
cmake_minimum_required(VERSION 3.30)
project(testOpenGl)

set(CMAKE_CXX_STANDARD 11)

add_executable(testOpenGl main.cpp
        testOpenCl.cpp
        testOpenCl.h
        TestCpp.cpp
        TestCpp.h
        TestCppThread.cpp
        TestCppThread.h
        TestSIMD.cpp
        TestSIMD.h)

# 查找OpenCL
find_package(OpenCL REQUIRED)

# 链接OpenCl库
target_include_directories(testOpenGl PRIVATE ${OpenCL_INCLUDE_DIRS})
target_link_libraries(testOpenGl PRIVATE ${OpenCL_LIBRARIES})

# 检测SIMD支持并添加编译选项
include(CheckCXXCompilerFlag)

check_cxx_compiler_flag("-mavx" COMPILER_SUPPORTS_AVX)
check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)

if(COMPILER_SUPPORTS_AVX2)
    target_compile_options(testOpenGl PRIVATE -mavx2)
elseif (COMPILER_SUPPORTS_AVX)
    target_compile_options(testOpenGl PRIVATE -mavx)
else ()
    message(FATAL_ERROR "AVX or AVX2 is not supported by compiler")
endif ()

C++代码

cpp 复制代码
//
// Created by lai on 2025/1/17.
//

#include "TestCpp.h"

#include <iostream>
#include <vector>
#include <random>
#include <chrono>

// 灰度转换函数
void to_gray(const std::vector<unsigned char>& input, std::vector<unsigned char>& output, int width, int height) {
    for (int i = 0; i < width * height; ++i) {
        int offset = i * 3;  // RGB 分量
        unsigned char r = input[offset];
        unsigned char g = input[offset + 1];
        unsigned char b = input[offset + 2];
        // 灰度公式
        output[i] = static_cast<unsigned char>(0.299f * r + 0.587f * g + 0.114f * b);
    }
}
void TestCpp::runTest() {
    const int width = 1920;         // 视频宽度
    const int height = 1080;        // 视频高度
    const int fps = 30;             // 帧率
    const int duration = 60;        // 视频持续时间(秒)
    const int frameCount = fps * duration; // 总帧数

    // 模拟视频帧数据:随机生成每帧的 RGB 数据
    std::vector<unsigned char> inputFrame(width * height * 3);
    std::vector<unsigned char> outputFrame(width * height);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(0, 255);

    // 开始处理
    auto startTime = std::chrono::high_resolution_clock::now();

    for (int frame = 0; frame < frameCount; ++frame) {
        // 随机生成模拟的 RGB 数据
        for (auto& pixel : inputFrame) {
            pixel = dis(gen);
        }

        // 调用灰度转换函数
        to_gray(inputFrame, outputFrame, width, height);

        // 打印进度
        if (frame % 30 == 0) {
            std::cout << "Processed frame: " << frame + 1 << "/" << frameCount << std::endl;
        }
    }

    auto endTime = std::chrono::high_resolution_clock::now();
    double elapsedTime = std::chrono::duration<double>(endTime - startTime).count();

    // 打印处理时间
    std::cout << "Processed " << frameCount << " frames in " << elapsedTime << " seconds." << std::endl;
    std::cout << "Average time per frame: " << (elapsedTime / frameCount) << " seconds." << std::endl;

}

C++多线程

cpp 复制代码
//
// Created by lai on 2025/1/17.
//

#include "TestCppThread.h"


#include <iostream>
#include <vector>
#include <random>
#include <chrono>
#include <thread>

// 灰度转换函数,每个线程处理一部分图像
void to_gray_chunk(const std::vector<unsigned char>& input, std::vector<unsigned char>& output, int width, int height, int start, int end) {
    for (int i = start; i < end; ++i) {
        int offset = i * 3;  // RGB 分量
        unsigned char r = input[offset];
        unsigned char g = input[offset + 1];
        unsigned char b = input[offset + 2];
        // 灰度公式
        output[i] = static_cast<unsigned char>(0.299f * r + 0.587f * g + 0.114f * b);
    }
}

void TestCppThread::runTest() {
    const int width = 1920;         // 视频宽度
    const int height = 1080;        // 视频高度
    const int fps = 30;             // 帧率
    const int duration = 60;        // 视频持续时间(秒)
    const int frameCount = fps * duration; // 总帧数
    const int numThreads = std::thread::hardware_concurrency(); // 获取可用线程数

    // 模拟视频帧数据:随机生成每帧的 RGB 数据
    std::vector<unsigned char> inputFrame(width * height * 3);
    std::vector<unsigned char> outputFrame(width * height);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(0, 255);

    // 开始处理
    auto startTime = std::chrono::high_resolution_clock::now();

    for (int frame = 0; frame < frameCount; ++frame) {
        // 随机生成模拟的 RGB 数据
        for (auto& pixel : inputFrame) {
            pixel = dis(gen);
        }

        // 启动多个线程来处理图像
        std::vector<std::thread> threads;
        int chunkSize = width * height / numThreads; // 每个线程处理的像素块大小
        for (int t = 0; t < numThreads; ++t) {
            int start = t * chunkSize;
            int end = (t == numThreads - 1) ? (width * height) : (start + chunkSize); // 最后一个线程处理剩余的像素
            threads.emplace_back(to_gray_chunk, std::cref(inputFrame), std::ref(outputFrame), width, height, start, end);
        }

        // 等待所有线程完成
        for (auto& t : threads) {
            t.join();
        }

        // 打印进度
        if (frame % 30 == 0) {
            std::cout << "Processed frame: " << frame + 1 << "/" << frameCount << std::endl;
        }
    }

    auto endTime = std::chrono::high_resolution_clock::now();
    double elapsedTime = std::chrono::duration<double>(endTime - startTime).count();

    // 打印处理时间
    std::cout << "Processed " << frameCount << " frames in " << elapsedTime << " seconds." << std::endl;
    std::cout << "Average time per frame: " << (elapsedTime / frameCount) << " seconds." << std::endl;

}

CPU版本的Opencl

cmake中添加

cmakelist 复制代码
# 查找OpenCL
find_package(OpenCL REQUIRED)

# 链接OpenCl库
target_include_directories(testOpenGl PRIVATE ${OpenCL_INCLUDE_DIRS})
target_link_libraries(testOpenGl PRIVATE ${OpenCL_LIBRARIES})

测试代码

cpp 复制代码
//
// Created by lai on 2025/1/16.
//
#include "testOpenCl.h"

#include <chrono>
#include <CL/cl.h>
#include <iostream>
#include <vector>
#include <random>

// OpenCL 内核代码
const char* kernelSource = R"(
__kernel void to_gray(
    __global unsigned char* input,
    __global unsigned char* output,
    const int width,
    const int height)
{
    int id = get_global_id(0);  // 每个线程处理一个像素
    if (id < width * height) {
        int offset = id * 3;  // RGB 分量
        unsigned char r = input[offset];
        unsigned char g = input[offset + 1];
        unsigned char b = input[offset + 2];
        // 灰度公式
        output[id] = (unsigned char)(0.299f * r + 0.587f * g + 0.114f * b);
    }
}
)";
void TestOpenCl::runTests() {
    const int width = 1920;         // 视频宽度
    const int height = 1080;        // 视频高度
    const int fps = 30;             // 帧率
    const int duration = 60;        // 视频持续时间(秒)
    const int frameCount = fps * duration; // 总帧数

    // 模拟视频帧数据:随机生成每帧的 RGB 数据
    std::vector<unsigned char> inputFrame(width * height * 3);
    std::vector<unsigned char> outputFrame(width * height);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(0, 255);

    // 初始化 OpenCL
    cl_int err;
    cl_platform_id platform;
    clGetPlatformIDs(1, &platform, nullptr);

    cl_device_id device;
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, nullptr);

    cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
    cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);

    cl_program program = clCreateProgramWithSource(context, 1, &kernelSource, nullptr, &err);
    clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);

    cl_kernel kernel = clCreateKernel(program, "to_gray", &err);

    // 创建 OpenCL 缓冲区
    cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, inputFrame.size(), nullptr, &err);
    cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, outputFrame.size(), nullptr, &err);

    // 开始处理
    auto startTime = std::chrono::high_resolution_clock::now();

    for (int frame = 0; frame < frameCount; ++frame) {
        // 随机生成模拟的 RGB 数据
        for (auto& pixel : inputFrame) {
            pixel = dis(gen);
        }

        // 写入数据到 OpenCL 缓冲区
        clEnqueueWriteBuffer(queue, inputBuffer, CL_TRUE, 0, inputFrame.size(), inputFrame.data(), 0, nullptr, nullptr);

        // 设置内核参数
        clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);
        clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer);
        clSetKernelArg(kernel, 2, sizeof(int), &width);
        clSetKernelArg(kernel, 3, sizeof(int), &height);

        // 定义工作区大小
        size_t globalSize = width * height;

        // 执行内核
        clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &globalSize, nullptr, 0, nullptr, nullptr);

        // 读取处理后的灰度数据
        clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, outputFrame.size(), outputFrame.data(), 0, nullptr, nullptr);

        // 打印进度
        if (frame % 30 == 0) {
            std::cout << "Processed frame: " << frame + 1 << "/" << frameCount << std::endl;
        }
    }

    auto endTime = std::chrono::high_resolution_clock::now();
    double elapsedTime = std::chrono::duration<double>(endTime - startTime).count();

    // 打印处理时间
    std::cout << "Processed " << frameCount << " frames in " << elapsedTime << " seconds." << std::endl;
    std::cout << "Average time per frame: " << (elapsedTime / frameCount) << " seconds." << std::endl;

    // 释放 OpenCL 资源
    clReleaseMemObject(inputBuffer);
    clReleaseMemObject(outputBuffer);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);
}

内存对齐的SIMD指令集

cmake添加

cmakelists 复制代码
# 检测SIMD支持并添加编译选项
include(CheckCXXCompilerFlag)

check_cxx_compiler_flag("-mavx" COMPILER_SUPPORTS_AVX)
check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)

if(COMPILER_SUPPORTS_AVX2)
    target_compile_options(testOpenGl PRIVATE -mavx2)
elseif (COMPILER_SUPPORTS_AVX)
    target_compile_options(testOpenGl PRIVATE -mavx)
else ()
    message(FATAL_ERROR "AVX or AVX2 is not supported by compiler")
endif ()
cpp 复制代码
//
// Created by lai on 2025/1/17.
//

#include "TestSIMD.h"

#include <iostream>
#include <vector>
#include <random>
#include <chrono>
#include <immintrin.h> // SIMD 指令集
#include <cstdlib>  // 用于posix_memalign

void to_gray_simd(const unsigned char* input, unsigned char* output, int width, int height) {
    const int pixelCount = width * height;
    const __m256 scale_r = _mm256_set1_ps(0.299f); // 红色通道的权重
    const __m256 scale_g = _mm256_set1_ps(0.587f); // 绿色通道的权重
    const __m256 scale_b = _mm256_set1_ps(0.114f); // 蓝色通道的权重

    int i = 0;
    for (; i <= pixelCount - 8; i += 8) {
        // 加载 8 组 RGB 像素
        __m256i pixel_r = _mm256_loadu_si256((__m256i*)&input[i * 3]);  // 确保内存对齐
        __m256i pixel_g = _mm256_loadu_si256((__m256i*)&input[i * 3 + 1]);
        __m256i pixel_b = _mm256_loadu_si256((__m256i*)&input[i * 3 + 2]);

        // 转换为浮点数以便计算
        __m256 r_f = _mm256_cvtepi32_ps(pixel_r);
        __m256 g_f = _mm256_cvtepi32_ps(pixel_g);
        __m256 b_f = _mm256_cvtepi32_ps(pixel_b);

        // 灰度转换公式
        __m256 gray_f = _mm256_add_ps(
            _mm256_add_ps(_mm256_mul_ps(r_f, scale_r), _mm256_mul_ps(g_f, scale_g)),
            _mm256_mul_ps(b_f, scale_b));

        // 转回整数
        __m256i gray_i = _mm256_cvtps_epi32(gray_f);

        // 存储结果
        _mm256_storeu_si256((__m256i*)&output[i], gray_i);
    }

    // 处理剩余像素(非对齐部分)
    for (; i < pixelCount; ++i) {
        int offset = i * 3;
        unsigned char r = input[offset];
        unsigned char g = input[offset + 1];
        unsigned char b = input[offset + 2];
        output[i] = static_cast<unsigned char>(0.299f * r + 0.587f * g + 0.114f * b);
    }
}

void TestSIMD::runTest() {
    const int width = 1920;         // 视频宽度
    const int height = 1080;        // 视频高度
    const int fps = 30;             // 帧率
    const int duration = 60;        // 视频持续时间(秒)
    const int frameCount = fps * duration; // 总帧数
    size_t size = width * height * 3 * sizeof(unsigned char);

    // 模拟视频帧数据:随机生成每帧的 RGB 数据
    // 使用posix_memalign分配对齐内存
    unsigned char* inputFrame;
    unsigned char* outputFrame;
    int alignment = 32; // 使用32字节对齐
    int resultInput = posix_memalign((void**)&inputFrame, alignment, size);
    int resultOutput = posix_memalign((void**)&outputFrame, alignment, size);
    if (resultInput != 0 || resultOutput != 0) {
        std::cerr << "memory allocation failed" << std::endl;
        return;
    }


    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(0, 255);

    // 开始处理
    auto startTime = std::chrono::high_resolution_clock::now();

    for (int frame = 0; frame < frameCount; ++frame) {
        // 随机生成模拟的 RGB 数据
        for (int i = 0; i < width * height * 3; ++i) {
            inputFrame[i] = dis(gen);
        }

        // 使用 SIMD 转换灰度
        to_gray_simd(inputFrame, outputFrame, width, height);

        // 打印进度
        if (frame % 30 == 0) {
            std::cout << "Processed frame: " << frame + 1 << "/" << frameCount << std::endl;
        }
    }

    auto endTime = std::chrono::high_resolution_clock::now();
    double elapsedTime = std::chrono::duration<double>(endTime - startTime).count();

    // 打印处理时间
    std::cout << "Processed " << frameCount << " frames in " << elapsedTime << " seconds." << std::endl;
    std::cout << "Average time per frame: " << (elapsedTime / frameCount) << " seconds." << std::endl;
}

结论

复制代码
C++
Processed 1800 frames in 251.789 seconds.
Average time per frame: 0.139883 seconds.

C++ thread
Processed 1800 frames in 229.571 seconds.
Average time per frame: 0.12754 seconds.

CPU版本POCL的OPENCL
Processed 1800 frames in 233.25 seconds.
Average time per frame: 0.129583 seconds.

SIMD 内存对齐以后
Processed 1800 frames in 191.015 seconds.
Average time per frame: 0.106119 seconds.

SIMD的性能明显由于其他几项,但是还需要测试GPU版本的OPencl和多线程指令集优化对性能的提升

相关推荐
IC 见路不走1 小时前
LeetCode 第91题:解码方法
linux·运维·服务器
翻滚吧键盘1 小时前
查看linux中steam游戏的兼容性
linux·运维·游戏
小能喵1 小时前
Kali Linux Wifi 伪造热点
linux·安全·kali·kali linux
汀沿河2 小时前
8.1 prefix Tunning与Prompt Tunning模型微调方法
linux·运维·服务器·人工智能
zly35002 小时前
centos7 ping127.0.0.1不通
linux·运维·服务器
菜包eo2 小时前
基于二维码的视频合集高效管理与分发技术
音视频
文浩(楠搏万)2 小时前
用OBS Studio录制WAV音频,玩转语音克隆和文本转语音!
大模型·音视频·tts·wav·obs·声音克隆·语音录制
小哥山水之间2 小时前
基于dropbear实现嵌入式系统ssh服务端与客户端完整交互
linux
ldj20203 小时前
2025 Centos 安装PostgreSQL
linux·postgresql·centos
翻滚吧键盘3 小时前
opensuse tumbleweed上安装显卡驱动
linux