cpp
#define NUM_THREADS 4
typedef struct {
uint8_t *dst;
const uint8_t *src;
size_t size;
} ThreadData;
void *thread_copy(void *arg) {
ThreadData *data = (ThreadData *)arg;
memcpy(data->dst, data->src, data->size);
return NULL;
}
void parallel_memcpy(uint8_t *dst, const uint8_t *src, size_t size) {
pthread_t threads[NUM_THREADS];
ThreadData thread_data[NUM_THREADS];
size_t chunk_size = size / NUM_THREADS;
size_t remaining = size % NUM_THREADS;
for (int i = 0; i < NUM_THREADS; i++) {
thread_data[i].dst = dst + i * chunk_size;
thread_data[i].src = src + i * chunk_size;
thread_data[i].size = chunk_size;
if (i == NUM_THREADS - 1) {
thread_data[i].size += remaining;
}
pthread_create(&threads[i], NULL, thread_copy, &thread_data[i]);
}
for (int i = 0; i < NUM_THREADS; i++) {
pthread_join(threads[i], NULL);
}
}
parallel_memcpy((uint8_t *)g_if_v_ctl._big_yuv_frame_buf.frame_vir_addr, (const uint8_t *)YuvBigImage, _FRAME_BIG_BUFF_SIZE);
利用多线程加速大块数据的拷贝过程。
最好在多核,大数据块的情况下使用该操作,否则可能会起到反作用。
可以探索使用指令集拷贝的方式,有不错的效果。