高速图像采集系统架构与实现

高速图像数据传输与存储实现方案

1. 系统架构设计

1.1 整体架构

复制代码

图像传感器 → CSI/MIPI接口 → ISP处理 → V4L2驱动 → 用户空间
                                             ↓
                                     内存映射/DMA传输
                                             ↓
                                       存储/处理管道

2. 设备树高速传输配置

2.1 高速MIPI-CSI2配置

dts 复制代码

camera_sensor: ov13850@36 {
    compatible = "ovti,ov13850";
    reg = <0x36>;
    
    // 高速时钟配置
    clocks = <&camera_clk 0>;
    clock-names = "xvclk";
    clock-frequency = <24000000>;
    
    // 电源管理
    avdd-supply = <&cam_avdd_2v8>;
    dovdd-supply = <&cam_dovdd_1v8>;
    dvdd-supply = <&cam_dvdd_1v2>;
    
    // MIPI配置
    csi-lane-count = <4>;
    csi-lane-speed = <1500000000>; // 1.5Gbps/lane
    
    port {
        endpoint {
            remote-endpoint = <&mipi_csi2_in>;
            data-lanes = <1 2 3 4>;
            clock-lanes = <0>;
            lane-polarities = <0 0 0 0 0>;
        };
    };
};

mipi_csi2: csi2@ff0d0000 {
    compatible = "allwinner,sun50i-a64-mipi-csi2";
    reg = <0xff0d0000 0x1000>;
    interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
    
    clocks = <&ccu CLK_BUS_CSI>, <&ccu CLK_CSI>;
    clock-names = "bus", "mod";
    
    ports {
        #address-cells = <1>;
        #size-cells = <0>;
        
        port@0 {
            reg = <0>;
            mipi_csi2_in: endpoint {
                remote-endpoint = <&camera_sensor_endpoint>;
                data-lanes = <1 2 3 4>;
            };
        };
        
        port@1 {
            reg = <1>;
            mipi_csi2_out: endpoint {
                remote-endpoint = <&isp_input>;
            };
        };
    };
};

3. 内核驱动高速实现

3.1 零拷贝DMA配置

c 复制代码

#include <linux/dma-mapping.h>
#include <linux/dma-buf.h>
#include <linux/scatterlist.h>

#define MAX_BUFFERS 8
#define FRAME_SIZE (1920 * 1080 * 2)  // YUYV格式

struct high_speed_device {
    struct vb2_queue queue;
    struct dma_chan *dma_chan;
    dma_addr_t dma_addr[MAX_BUFFERS];
    void *vaddr[MAX_BUFFERS];
    size_t buffer_size;
};

// DMA连续内存分配
static int alloc_dma_buffers(struct high_speed_device *hs_dev)
{
    struct device *dev = hs_dev->v4l2_dev.dev;
    int i;
    
    for (i = 0; i < MAX_BUFFERS; i++) {
        // 分配DMA连续内存
        hs_dev->vaddr[i] = dma_alloc_coherent(dev, 
                                            FRAME_SIZE,
                                            &hs_dev->dma_addr[i],
                                            GFP_KERNEL | __GFP_ZERO);
        if (!hs_dev->vaddr[i]) {
            dev_err(dev, "Failed to allocate DMA buffer %d\n", i);
            return -ENOMEM;
        }
        
        // 配置DMA分散聚集列表
        sg_init_table(&hs_dev->sg[i], 1);
        sg_dma_address(&hs_dev->sg[i]) = hs_dev->dma_addr[i];
        sg_dma_len(&hs_dev->sg[i]) = FRAME_SIZE;
    }
    return 0;
}

3.2 高性能V4L2驱动实现

c 复制代码

static const struct vb2_ops high_speed_vb2_ops = {
    .queue_setup = high_speed_queue_setup,
    .buf_init = high_speed_buf_init,
    .buf_prepare = high_speed_buf_prepare,
    .buf_queue = high_speed_buf_queue,
    .buf_cleanup = high_speed_buf_cleanup,
    .start_streaming = high_speed_start_streaming,
    .stop_streaming = high_speed_stop_streaming,
    .wait_prepare = vb2_ops_wait_prepare,
    .wait_finish = vb2_ops_wait_finish,
};

static int high_speed_start_streaming(struct vb2_queue *vq, unsigned int count)
{
    struct high_speed_device *hs_dev = vb2_get_drv_priv(vq);
    struct dma_async_tx_descriptor *desc;
    dma_cookie_t cookie;
    int i, ret;
    
    // 配置DMA传输
    for (i = 0; i < count; i++) {
        desc = dmaengine_prep_slave_sg(hs_dev->dma_chan,
                                     &hs_dev->sg[i],
                                     1,
                                     DMA_DEV_TO_MEM,
                                     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
        if (!desc) {
            dev_err(hs_dev->dev, "Failed to prepare DMA descriptor\n");
            return -EIO;
        }
        
        desc->callback = high_speed_dma_callback;
        desc->callback_param = hs_dev;
        
        cookie = dmaengine_submit(desc);
        if (dma_submit_error(cookie)) {
            dev_err(hs_dev->dev, "Failed to submit DMA\n");
            return -EIO;
        }
    }
    
    // 启动DMA传输
    dma_async_issue_pending(hs_dev->dma_chan);
    
    // 启动传感器流输出
    ret = sensor_start_streaming(hs_dev->sensor);
    if (ret) {
        dev_err(hs_dev->dev, "Failed to start sensor streaming\n");
        return ret;
    }
    
    return 0;
}

// DMA完成回调
static void high_speed_dma_callback(void *data)
{
    struct high_speed_device *hs_dev = data;
    struct vb2_buffer *vb;
    
    // 找到完成的缓冲区
    vb = &hs_dev->bufs[hs_dev->current_buf]->vb2_buf;
    
    // 更新时间戳
    vb->timestamp = ktime_get_ns();
    
    // 标记缓冲区完成
    vb2_buffer_done(vb, VB2_BUF_STATE_DONE);
    
    // 准备下一个缓冲区
    hs_dev->current_buf = (hs_dev->current_buf + 1) % hs_dev->num_bufs;
}

4. 用户空间高速采集实现

4.1 零拷贝内存映射方案

c 复制代码

#include <linux/videodev2.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>

struct buffer {
    void *start;
    size_t length;
    int fd;
};

class HighSpeedCamera {
private:
    int fd;
    struct buffer *buffers;
    unsigned int n_buffers;
    
public:
    HighSpeedCamera(const char* device = "/dev/video0") {
        fd = open(device, O_RDWR | O_NONBLOCK, 0);
        if (fd == -1) {
            throw std::runtime_error("Cannot open device");
        }
    }
    
    bool setupStreaming(unsigned int width, unsigned int height, 
                       unsigned int fps, unsigned int buffer_count = 8) {
        // 设置格式
        struct v4l2_format fmt = {};
        fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        fmt.fmt.pix.width = width;
        fmt.fmt.pix.height = height;
        fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
        fmt.fmt.pix.field = V4L2_FIELD_NONE;
        
        if (ioctl(fd, VIDIOC_S_FMT, &fmt) == -1) {
            return false;
        }
        
        // 设置高帧率
        struct v4l2_streamparm parm = {};
        parm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        parm.parm.capture.timeperframe.numerator = 1;
        parm.parm.capture.timeperframe.denominator = fps;
        
        if (ioctl(fd, VIDIOC_S_PARM, &parm) == -1) {
            return false;
        }
        
        // 申请DMA缓冲区
        struct v4l2_requestbuffers req = {};
        req.count = buffer_count;
        req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        req.memory = V4L2_MEMORY_DMABUF;
        
        if (ioctl(fd, VIDIOC_REQBUFS, &req) == -1) {
            return false;
        }
        
        n_buffers = req.count;
        buffers = new buffer[n_buffers];
        
        // 映射缓冲区
        for (unsigned int i = 0; i < n_buffers; ++i) {
            struct v4l2_buffer buf = {};
            buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
            buf.memory = V4L2_MEMORY_DMABUF;
            buf.index = i;
            
            if (ioctl(fd, VIDIOC_QUERYBUF, &buf) == -1) {
                return false;
            }
            
            buffers[i].length = buf.length;
            buffers[i].start = mmap(NULL, buf.length,
                                   PROT_READ | PROT_WRITE,
                                   MAP_SHARED,
                                   fd, buf.m.offset);
            
            if (buffers[i].start == MAP_FAILED) {
                return false;
            }
            
            // 入队缓冲区
            if (ioctl(fd, VIDIOC_QBUF, &buf) == -1) {
                return false;
            }
        }
        
        return true;
    }
    
    bool startStreaming() {
        enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        return ioctl(fd, VIDIOC_STREAMON, &type) != -1;
    }
    
    // 高性能帧捕获
    bool captureFrame(std::function<void(void* data, size_t size)> processor) {
        fd_set fds;
        struct timeval tv;
        int r;
        
        FD_ZERO(&fds);
        FD_SET(fd, &fds);
        
        tv.tv_sec = 2;
        tv.tv_usec = 0;
        
        r = select(fd + 1, &fds, NULL, NULL, &tv);
        
        if (r == -1) {
            return false;
        }
        
        if (r == 0) {
            return false; // 超时
        }
        
        struct v4l2_buffer buf = {};
        buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        buf.memory = V4L2_MEMORY_DMABUF;
        
        if (ioctl(fd, VIDIOC_DQBUF, &buf) == -1) {
            return false;
        }
        
        // 处理帧数据 - 零拷贝访问
        processor(buffers[buf.index].start, buf.bytesused);
        
        // 重新入队缓冲区
        if (ioctl(fd, VIDIOC_QBUF, &buf) == -1) {
            return false;
        }
        
        return true;
    }
};

4.2 多线程高速存储方案

c++ 复制代码

#include <thread>
#include <queue>
#include <mutex>
#include <condition_variable>
#include <atomic>

class FrameStorageManager {
private:
    std::queue<std::pair<void*, size_t>> frameQueue;
    std::mutex queueMutex;
    std::condition_variable queueCond;
    std::atomic<bool> running{false};
    std::thread storageThread;
    int outputFd;
    
    void storageWorker() {
        while (running) {
            std::pair<void*, size_t> frame;
            
            {
                std::unique_lock<std::mutex> lock(queueMutex);
                queueCond.wait(lock, [this]() { 
                    return !frameQueue.empty() || !running; 
                });
                
                if (!running && frameQueue.empty()) break;
                
                frame = frameQueue.front();
                frameQueue.pop();
            }
            
            // 直接写入文件（可优化为异步IO）
            write(outputFd, frame.first, frame.second);
            
            // 通知缓冲区可重用
            notifyBufferReady(frame.first);
        }
    }
    
public:
    FrameStorageManager(const std::string& filename) {
        outputFd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644);
        if (outputFd == -1) {
            throw std::runtime_error("Cannot open output file");
        }
        
        running = true;
        storageThread = std::thread(&FrameStorageManager::storageWorker, this);
    }
    
    ~FrameStorageManager() {
        running = false;
        queueCond.notify_all();
        if (storageThread.joinable()) {
            storageThread.join();
        }
        if (outputFd != -1) {
            close(outputFd);
        }
    }
    
    void queueFrame(void* data, size_t size) {
        {
            std::lock_guard<std::mutex> lock(queueMutex);
            frameQueue.push({data, size});
        }
        queueCond.notify_one();
    }
};

5. 性能优化技术

5.1 内存池优化

c 复制代码

// 预分配内存池
class MemoryPool {
private:
    std::vector<void*> pool;
    size_t block_size;
    size_t pool_size;
    
public:
    MemoryPool(size_t block_size, size_t pool_size) 
        : block_size(block_size), pool_size(pool_size) {
        
        for (size_t i = 0; i < pool_size; ++i) {
            void* block = aligned_alloc(4096, block_size);
            if (block) {
                pool.push_back(block);
            }
        }
    }
    
    void* allocate() {
        if (pool.empty()) {
            return aligned_alloc(4096, block_size);
        }
        
        void* block = pool.back();
        pool.pop_back();
        return block;
    }
    
    void deallocate(void* block) {
        if (pool.size() < pool_size) {
            pool.push_back(block);
        } else {
            free(block);
        }
    }
};

5.2 直接IO存储优化

c++ 复制代码

class DirectIOStorage {
private:
    int fd;
    size_t block_size;
    
public:
    DirectIOStorage(const std::string& filename, size_t block_size = 4096) 
        : block_size(block_size) {
        
        // 使用O_DIRECT绕过页面缓存
        fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0644);
        if (fd == -1) {
            throw std::runtime_error("Cannot open file with O_DIRECT");
        }
    }
    
    bool writeFrame(void* data, size_t size) {
        // 确保数据对齐
        void* aligned_data = align_data(data, block_size);
        size_t aligned_size = align_size(size, block_size);
        
        ssize_t written = write(fd, aligned_data, aligned_size);
        return written == (ssize_t)aligned_size;
    }
};

6. 系统调优配置

6.1 内核参数优化

bash 复制代码

# 增加DMA缓冲区大小
echo 2048 > /proc/sys/vm/dirty_bytes
echo 1024 > /proc/sys/vm/dirty_background_bytes

# 提高实时性优先级
echo -17 > /proc/$$/oom_adj

# 调整调度策略
chrt -f -p 99 $$

6.2 中断亲和性设置

bash 复制代码

# 将视频相关中断绑定到特定CPU核心
echo 2 > /proc/irq/56/smp_affinity  # 摄像头中断
echo 4 > /proc/irq/57/smp_affinity  # DMA中断

7. 性能监控

7.1 实时性能统计

c 复制代码

#include <chrono>

class PerformanceMonitor {
private:
    std::chrono::high_resolution_clock::time_point start_time;
    size_t frame_count;
    size_t total_bytes;
    
public:
    void frameCaptured(size_t bytes) {
        frame_count++;
        total_bytes += bytes;
        
        auto now = std::chrono::high_resolution_clock::now();
        auto duration = std::chrono::duration_cast<std::chrono::seconds>(
            now - start_time).count();
            
        if (duration >= 1) {
            double fps = frame_count / duration;
            double mbps = (total_bytes * 8.0) / (duration * 1000000.0);
            
            printf("FPS: %.2f, Bitrate: %.2f Mbps\n", fps, mbps);
            
            // 重置计数器
            frame_count = 0;
            total_bytes = 0;
            start_time = now;
        }
    }
};

这种实现方案能够实现高速图像数据的零拷贝传输和高效存储，适用于工业检测、高速摄影、科学成像等高性能应用场景。