高速图像数据传输与存储实现方案
1. 系统架构设计
1.1 整体架构
图像传感器 → CSI/MIPI接口 → ISP处理 → V4L2驱动 → 用户空间
↓
内存映射/DMA传输
↓
存储/处理管道
2. 设备树高速传输配置
2.1 高速MIPI-CSI2配置
dts
camera_sensor: ov13850@36 {
compatible = "ovti,ov13850";
reg = <0x36>;
// 高速时钟配置
clocks = <&camera_clk 0>;
clock-names = "xvclk";
clock-frequency = <24000000>;
// 电源管理
avdd-supply = <&cam_avdd_2v8>;
dovdd-supply = <&cam_dovdd_1v8>;
dvdd-supply = <&cam_dvdd_1v2>;
// MIPI配置
csi-lane-count = <4>;
csi-lane-speed = <1500000000>; // 1.5Gbps/lane
port {
endpoint {
remote-endpoint = <&mipi_csi2_in>;
data-lanes = <1 2 3 4>;
clock-lanes = <0>;
lane-polarities = <0 0 0 0 0>;
};
};
};
mipi_csi2: csi2@ff0d0000 {
compatible = "allwinner,sun50i-a64-mipi-csi2";
reg = <0xff0d0000 0x1000>;
interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_CSI>, <&ccu CLK_CSI>;
clock-names = "bus", "mod";
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
mipi_csi2_in: endpoint {
remote-endpoint = <&camera_sensor_endpoint>;
data-lanes = <1 2 3 4>;
};
};
port@1 {
reg = <1>;
mipi_csi2_out: endpoint {
remote-endpoint = <&isp_input>;
};
};
};
};
3. 内核驱动高速实现
3.1 零拷贝DMA配置
c
#include <linux/dma-mapping.h>
#include <linux/dma-buf.h>
#include <linux/scatterlist.h>
#define MAX_BUFFERS 8
#define FRAME_SIZE (1920 * 1080 * 2) // YUYV格式
struct high_speed_device {
struct vb2_queue queue;
struct dma_chan *dma_chan;
dma_addr_t dma_addr[MAX_BUFFERS];
void *vaddr[MAX_BUFFERS];
size_t buffer_size;
};
// DMA连续内存分配
static int alloc_dma_buffers(struct high_speed_device *hs_dev)
{
struct device *dev = hs_dev->v4l2_dev.dev;
int i;
for (i = 0; i < MAX_BUFFERS; i++) {
// 分配DMA连续内存
hs_dev->vaddr[i] = dma_alloc_coherent(dev,
FRAME_SIZE,
&hs_dev->dma_addr[i],
GFP_KERNEL | __GFP_ZERO);
if (!hs_dev->vaddr[i]) {
dev_err(dev, "Failed to allocate DMA buffer %d\n", i);
return -ENOMEM;
}
// 配置DMA分散聚集列表
sg_init_table(&hs_dev->sg[i], 1);
sg_dma_address(&hs_dev->sg[i]) = hs_dev->dma_addr[i];
sg_dma_len(&hs_dev->sg[i]) = FRAME_SIZE;
}
return 0;
}
3.2 高性能V4L2驱动实现
c
static const struct vb2_ops high_speed_vb2_ops = {
.queue_setup = high_speed_queue_setup,
.buf_init = high_speed_buf_init,
.buf_prepare = high_speed_buf_prepare,
.buf_queue = high_speed_buf_queue,
.buf_cleanup = high_speed_buf_cleanup,
.start_streaming = high_speed_start_streaming,
.stop_streaming = high_speed_stop_streaming,
.wait_prepare = vb2_ops_wait_prepare,
.wait_finish = vb2_ops_wait_finish,
};
static int high_speed_start_streaming(struct vb2_queue *vq, unsigned int count)
{
struct high_speed_device *hs_dev = vb2_get_drv_priv(vq);
struct dma_async_tx_descriptor *desc;
dma_cookie_t cookie;
int i, ret;
// 配置DMA传输
for (i = 0; i < count; i++) {
desc = dmaengine_prep_slave_sg(hs_dev->dma_chan,
&hs_dev->sg[i],
1,
DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc) {
dev_err(hs_dev->dev, "Failed to prepare DMA descriptor\n");
return -EIO;
}
desc->callback = high_speed_dma_callback;
desc->callback_param = hs_dev;
cookie = dmaengine_submit(desc);
if (dma_submit_error(cookie)) {
dev_err(hs_dev->dev, "Failed to submit DMA\n");
return -EIO;
}
}
// 启动DMA传输
dma_async_issue_pending(hs_dev->dma_chan);
// 启动传感器流输出
ret = sensor_start_streaming(hs_dev->sensor);
if (ret) {
dev_err(hs_dev->dev, "Failed to start sensor streaming\n");
return ret;
}
return 0;
}
// DMA完成回调
static void high_speed_dma_callback(void *data)
{
struct high_speed_device *hs_dev = data;
struct vb2_buffer *vb;
// 找到完成的缓冲区
vb = &hs_dev->bufs[hs_dev->current_buf]->vb2_buf;
// 更新时间戳
vb->timestamp = ktime_get_ns();
// 标记缓冲区完成
vb2_buffer_done(vb, VB2_BUF_STATE_DONE);
// 准备下一个缓冲区
hs_dev->current_buf = (hs_dev->current_buf + 1) % hs_dev->num_bufs;
}
4. 用户空间高速采集实现
4.1 零拷贝内存映射方案
c
#include <linux/videodev2.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
struct buffer {
void *start;
size_t length;
int fd;
};
class HighSpeedCamera {
private:
int fd;
struct buffer *buffers;
unsigned int n_buffers;
public:
HighSpeedCamera(const char* device = "/dev/video0") {
fd = open(device, O_RDWR | O_NONBLOCK, 0);
if (fd == -1) {
throw std::runtime_error("Cannot open device");
}
}
bool setupStreaming(unsigned int width, unsigned int height,
unsigned int fps, unsigned int buffer_count = 8) {
// 设置格式
struct v4l2_format fmt = {};
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
fmt.fmt.pix.width = width;
fmt.fmt.pix.height = height;
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
fmt.fmt.pix.field = V4L2_FIELD_NONE;
if (ioctl(fd, VIDIOC_S_FMT, &fmt) == -1) {
return false;
}
// 设置高帧率
struct v4l2_streamparm parm = {};
parm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
parm.parm.capture.timeperframe.numerator = 1;
parm.parm.capture.timeperframe.denominator = fps;
if (ioctl(fd, VIDIOC_S_PARM, &parm) == -1) {
return false;
}
// 申请DMA缓冲区
struct v4l2_requestbuffers req = {};
req.count = buffer_count;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_DMABUF;
if (ioctl(fd, VIDIOC_REQBUFS, &req) == -1) {
return false;
}
n_buffers = req.count;
buffers = new buffer[n_buffers];
// 映射缓冲区
for (unsigned int i = 0; i < n_buffers; ++i) {
struct v4l2_buffer buf = {};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_DMABUF;
buf.index = i;
if (ioctl(fd, VIDIOC_QUERYBUF, &buf) == -1) {
return false;
}
buffers[i].length = buf.length;
buffers[i].start = mmap(NULL, buf.length,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd, buf.m.offset);
if (buffers[i].start == MAP_FAILED) {
return false;
}
// 入队缓冲区
if (ioctl(fd, VIDIOC_QBUF, &buf) == -1) {
return false;
}
}
return true;
}
bool startStreaming() {
enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
return ioctl(fd, VIDIOC_STREAMON, &type) != -1;
}
// 高性能帧捕获
bool captureFrame(std::function<void(void* data, size_t size)> processor) {
fd_set fds;
struct timeval tv;
int r;
FD_ZERO(&fds);
FD_SET(fd, &fds);
tv.tv_sec = 2;
tv.tv_usec = 0;
r = select(fd + 1, &fds, NULL, NULL, &tv);
if (r == -1) {
return false;
}
if (r == 0) {
return false; // 超时
}
struct v4l2_buffer buf = {};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_DMABUF;
if (ioctl(fd, VIDIOC_DQBUF, &buf) == -1) {
return false;
}
// 处理帧数据 - 零拷贝访问
processor(buffers[buf.index].start, buf.bytesused);
// 重新入队缓冲区
if (ioctl(fd, VIDIOC_QBUF, &buf) == -1) {
return false;
}
return true;
}
};
4.2 多线程高速存储方案
c++
#include <thread>
#include <queue>
#include <mutex>
#include <condition_variable>
#include <atomic>
class FrameStorageManager {
private:
std::queue<std::pair<void*, size_t>> frameQueue;
std::mutex queueMutex;
std::condition_variable queueCond;
std::atomic<bool> running{false};
std::thread storageThread;
int outputFd;
void storageWorker() {
while (running) {
std::pair<void*, size_t> frame;
{
std::unique_lock<std::mutex> lock(queueMutex);
queueCond.wait(lock, [this]() {
return !frameQueue.empty() || !running;
});
if (!running && frameQueue.empty()) break;
frame = frameQueue.front();
frameQueue.pop();
}
// 直接写入文件(可优化为异步IO)
write(outputFd, frame.first, frame.second);
// 通知缓冲区可重用
notifyBufferReady(frame.first);
}
}
public:
FrameStorageManager(const std::string& filename) {
outputFd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (outputFd == -1) {
throw std::runtime_error("Cannot open output file");
}
running = true;
storageThread = std::thread(&FrameStorageManager::storageWorker, this);
}
~FrameStorageManager() {
running = false;
queueCond.notify_all();
if (storageThread.joinable()) {
storageThread.join();
}
if (outputFd != -1) {
close(outputFd);
}
}
void queueFrame(void* data, size_t size) {
{
std::lock_guard<std::mutex> lock(queueMutex);
frameQueue.push({data, size});
}
queueCond.notify_one();
}
};
5. 性能优化技术
5.1 内存池优化
c
// 预分配内存池
class MemoryPool {
private:
std::vector<void*> pool;
size_t block_size;
size_t pool_size;
public:
MemoryPool(size_t block_size, size_t pool_size)
: block_size(block_size), pool_size(pool_size) {
for (size_t i = 0; i < pool_size; ++i) {
void* block = aligned_alloc(4096, block_size);
if (block) {
pool.push_back(block);
}
}
}
void* allocate() {
if (pool.empty()) {
return aligned_alloc(4096, block_size);
}
void* block = pool.back();
pool.pop_back();
return block;
}
void deallocate(void* block) {
if (pool.size() < pool_size) {
pool.push_back(block);
} else {
free(block);
}
}
};
5.2 直接IO存储优化
c++
class DirectIOStorage {
private:
int fd;
size_t block_size;
public:
DirectIOStorage(const std::string& filename, size_t block_size = 4096)
: block_size(block_size) {
// 使用O_DIRECT绕过页面缓存
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0644);
if (fd == -1) {
throw std::runtime_error("Cannot open file with O_DIRECT");
}
}
bool writeFrame(void* data, size_t size) {
// 确保数据对齐
void* aligned_data = align_data(data, block_size);
size_t aligned_size = align_size(size, block_size);
ssize_t written = write(fd, aligned_data, aligned_size);
return written == (ssize_t)aligned_size;
}
};
6. 系统调优配置
6.1 内核参数优化
bash
# 增加DMA缓冲区大小
echo 2048 > /proc/sys/vm/dirty_bytes
echo 1024 > /proc/sys/vm/dirty_background_bytes
# 提高实时性优先级
echo -17 > /proc/$$/oom_adj
# 调整调度策略
chrt -f -p 99 $$
6.2 中断亲和性设置
bash
# 将视频相关中断绑定到特定CPU核心
echo 2 > /proc/irq/56/smp_affinity # 摄像头中断
echo 4 > /proc/irq/57/smp_affinity # DMA中断
7. 性能监控
7.1 实时性能统计
c
#include <chrono>
class PerformanceMonitor {
private:
std::chrono::high_resolution_clock::time_point start_time;
size_t frame_count;
size_t total_bytes;
public:
void frameCaptured(size_t bytes) {
frame_count++;
total_bytes += bytes;
auto now = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::seconds>(
now - start_time).count();
if (duration >= 1) {
double fps = frame_count / duration;
double mbps = (total_bytes * 8.0) / (duration * 1000000.0);
printf("FPS: %.2f, Bitrate: %.2f Mbps\n", fps, mbps);
// 重置计数器
frame_count = 0;
total_bytes = 0;
start_time = now;
}
}
};
这种实现方案能够实现高速图像数据的零拷贝传输和高效存储,适用于工业检测、高速摄影、科学成像等高性能应用场景。