手写数字识别:从零实现一个卷积神经网络(CNN)

前言

上一篇文章我们实现了一个简单的全连接神经网络。但全连接网络有个问题:输入是一维向量,会丢失图像的空间结构信息。

比如一张28×28的手写数字图片,拉成一维后,相邻像素之间的关系就丢失了。这就是为什么识别图像需要卷积神经网络(CNN)。

今天,我们用C语言从零实现一个完整的CNN:

· 实现卷积层、池化层、全连接层

· 训练一个能识别手写数字的模型

· 理解卷积、池化、感受野等核心概念

· 完整实现,可运行


一、CNN的核心原理

  1. 为什么需要卷积?

方法 参数量 问题

全连接(28×28→128) 784×128=100,352 参数量爆炸,过拟合

卷积(3×3卷积核) 3×3=9 参数共享,平移不变性

  1. 卷积核如何工作?

```

输入图像(5×5) 卷积核(3×3) 输出特征图(3×3)

┌─────────────────┐ ┌─────────┐ ┌─────────┐

│ 1 1 1 0 0 │ │ 1 0 1 │ │ 4 3 4 │

│ 0 1 1 1 0 │ × │ 0 1 0 │ = → │ 2 4 3 │

│ 0 0 1 1 1 │ │ 1 0 1 │ │ 2 3 4 │

│ 0 0 1 1 0 │ └─────────┘ └─────────┘

│ 0 1 1 0 0 │

└─────────────────┘

计算: 1×1 + 1×0 + 1×1 + 0×0 + 1×1 + 1×0 + 0×1 + 0×0 + 1×1 = 4

```

  1. CNN的标准结构

```

输入(28×28×1)

卷积层(3×3×32) + ReLU → 特征图(26×26×32)

最大池化(2×2) → 特征图(13×13×32)

卷积层(3×3×64) + ReLU → 特征图(11×11×64)

最大池化(2×2) → 特征图(5×5×64)

展平 → 1600个特征

全连接层(128) + ReLU

全连接层(10) + Softmax

```


二、完整代码实现

  1. 基本数据结构

```c

#include <stdio.h>

#include <stdlib.h>

#include <math.h>

#include <string.h>

#include <time.h>

#include <assert.h>

// 三维张量结构

typedef struct {

int height;

int width;

int channels;

float *data;

} tensor_t;

// 创建张量

tensor_t *tensor_create(int height, int width, int channels) {

tensor_t *t = malloc(sizeof(tensor_t));

t->height = height;

t->width = width;

t->channels = channels;

t->data = calloc(height * width * channels, sizeof(float));

return t;

}

// 释放张量

void tensor_free(tensor_t *t) {

if (t) {

free(t->data);

free(t);

}

}

// 获取像素值

float tensor_get(tensor_t *t, int h, int w, int c) {

return t->data[(c * t->height + h) * t->width + w];

}

// 设置像素值

void tensor_set(tensor_t *t, int h, int w, int c, float val) {

t->data[(c * t->height + h) * t->width + w] = val;

}

// 随机初始化(Xavier初始化)

void tensor_random(tensor_t *t) {

float scale = sqrt(2.0f / (t->height * t->width * t->channels));

for (int i = 0; i < t->height * t->width * t->channels; i++) {

t->data[i] = ((float)rand() / RAND_MAX) * 2.0f * scale - scale;

}

}

// 复制张量

void tensor_copy(tensor_t *src, tensor_t *dst) {

memcpy(dst->data, src->data, src->height * src->width * src->channels * sizeof(float));

}

```

  1. 卷积层实现

```c

// 卷积层结构

typedef struct {

tensor_t **weights; // 卷积核 [out_channels][in_channels][kernel_h][kernel_w]

tensor_t *bias; // 偏置 [out_channels]

int kernel_h;

int kernel_w;

int stride;

int padding;

int in_channels;

int out_channels;

tensor_t *input; // 输入缓存

tensor_t *output; // 输出

tensor_t *delta; // 误差项

} conv_layer_t;

// 创建卷积层

conv_layer_t *conv_create(int in_channels, int out_channels,

int kernel_h, int kernel_w,

int stride, int padding) {

conv_layer_t *layer = malloc(sizeof(conv_layer_t));

layer->in_channels = in_channels;

layer->out_channels = out_channels;

layer->kernel_h = kernel_h;

layer->kernel_w = kernel_w;

layer->stride = stride;

layer->padding = padding;

// 分配卷积核

layer->weights = malloc(sizeof(tensor_t*) * out_channels);

for (int o = 0; o < out_channels; o++) {

layer->weights[o] = tensor_create(kernel_h, kernel_w, in_channels);

tensor_random(layer->weights[o]);

}

layer->bias = tensor_create(out_channels, 1, 1);

tensor_random(layer->bias);

layer->input = NULL;

layer->output = NULL;

layer->delta = NULL;

return layer;

}

// 计算输出尺寸

void conv_output_size(conv_layer_t *layer, int in_h, int in_w, int *out_h, int *out_w) {

*out_h = (in_h + 2 * layer->padding - layer->kernel_h) / layer->stride + 1;

*out_w = (in_w + 2 * layer->padding - layer->kernel_w) / layer->stride + 1;

}

// 卷积前向传播

void conv_forward(conv_layer_t *layer, tensor_t *input) {

layer->input = input;

int out_h, out_w;

conv_output_size(layer, input->height, input->width, &out_h, &out_w);

if (!layer->output) {

layer->output = tensor_create(out_h, out_w, layer->out_channels);

} else if (layer->output->height != out_h || layer->output->width != out_w) {

tensor_free(layer->output);

layer->output = tensor_create(out_h, out_w, layer->out_channels);

}

// 对每个输出通道

for (int out_c = 0; out_c < layer->out_channels; out_c++) {

// 对每个输出位置

for (int out_h_idx = 0; out_h_idx < out_h; out_h_idx++) {

for (int out_w_idx = 0; out_w_idx < out_w; out_w_idx++) {

// 计算输入窗口的起始位置

int in_h_start = out_h_idx * layer->stride - layer->padding;

int in_w_start = out_w_idx * layer->stride - layer->padding;

float sum = 0;

// 卷积计算

for (int in_c = 0; in_c < layer->in_channels; in_c++) {

for (int kh = 0; kh < layer->kernel_h; kh++) {

for (int kw = 0; kw < layer->kernel_w; kw++) {

int in_h = in_h_start + kh;

int in_w = in_w_start + kw;

if (in_h >= 0 && in_h < input->height &&

in_w >= 0 && in_w < input->width) {

float in_val = tensor_get(input, in_h, in_w, in_c);

float w_val = tensor_get(layer->weights[out_c], kh, kw, in_c);

sum += in_val * w_val;

}

}

}

}

// 加上偏置

sum += tensor_get(layer->bias, out_c, 0, 0);

tensor_set(layer->output, out_h_idx, out_w_idx, out_c, sum);

}

}

}

}

// ReLU激活函数

void relu_forward(tensor_t *input, tensor_t *output) {

for (int i = 0; i < input->height * input->width * input->channels; i++) {

output->data[i] = input->data[i] > 0 ? input->data[i] : 0;

}

}

void relu_backward(tensor_t *input, tensor_t *delta) {

for (int i = 0; i < input->height * input->width * input->channels; i++) {

if (input->data[i] <= 0) {

delta->data[i] = 0;

}

}

}

```

  1. 池化层实现

```c

// 池化层结构

typedef struct {

int pool_h;

int pool_w;

int stride;

tensor_t *input;

tensor_t *output;

tensor_t *max_pos; // 记录最大值位置(用于反向传播)

} pool_layer_t;

// 创建池化层

pool_layer_t *pool_create(int pool_h, int pool_w, int stride) {

pool_layer_t *layer = malloc(sizeof(pool_layer_t));

layer->pool_h = pool_h;

layer->pool_w = pool_w;

layer->stride = stride;

layer->input = NULL;

layer->output = NULL;

layer->max_pos = NULL;

return layer;

}

// 最大池化前向传播

void pool_forward(pool_layer_t *layer, tensor_t *input) {

layer->input = input;

int out_h = (input->height - layer->pool_h) / layer->stride + 1;

int out_w = (input->width - layer->pool_w) / layer->stride + 1;

if (!layer->output) {

layer->output = tensor_create(out_h, out_w, input->channels);

layer->max_pos = tensor_create(out_h, out_w, input->channels);

}

for (int c = 0; c < input->channels; c++) {

for (int out_h_idx = 0; out_h_idx < out_h; out_h_idx++) {

for (int out_w_idx = 0; out_w_idx < out_w; out_w_idx++) {

int in_h_start = out_h_idx * layer->stride;

int in_w_start = out_w_idx * layer->stride;

float max_val = -1e9;

int max_h = 0, max_w = 0;

// 找池化窗口内的最大值

for (int ph = 0; ph < layer->pool_h; ph++) {

for (int pw = 0; pw < layer->pool_w; pw++) {

int in_h = in_h_start + ph;

int in_w = in_w_start + pw;

float val = tensor_get(input, in_h, in_w, c);

if (val > max_val) {

max_val = val;

max_h = in_h;

max_w = in_w;

}

}

}

tensor_set(layer->output, out_h_idx, out_w_idx, c, max_val);

// 记录最大值位置

int pos = max_h * input->width + max_w;

tensor_set(layer->max_pos, out_h_idx, out_w_idx, c, pos);

}

}

}

}

```

  1. 全连接层

```c

// 全连接层结构

typedef struct {

float **weights; // 权重矩阵 [out_size][in_size]

float *bias; // 偏置

float *input; // 输入缓存

float *output; // 输出

float *delta; // 误差项

int in_size;

int out_size;

} fc_layer_t;

// 创建全连接层

fc_layer_t *fc_create(int in_size, int out_size) {

fc_layer_t *layer = malloc(sizeof(fc_layer_t));

layer->in_size = in_size;

layer->out_size = out_size;

layer->weights = malloc(sizeof(float*) * out_size);

for (int i = 0; i < out_size; i++) {

layer->weights[i] = malloc(sizeof(float) * in_size);

for (int j = 0; j < in_size; j++) {

layer->weights[i][j] = ((float)rand() / RAND_MAX) * 0.1 - 0.05;

}

}

layer->bias = calloc(out_size, sizeof(float));

layer->input = calloc(in_size, sizeof(float));

layer->output = calloc(out_size, sizeof(float));

layer->delta = calloc(out_size, sizeof(float));

return layer;

}

// 全连接前向传播

void fc_forward(fc_layer_t *layer, float *input) {

memcpy(layer->input, input, layer->in_size * sizeof(float));

for (int i = 0; i < layer->out_size; i++) {

float sum = layer->bias[i];

for (int j = 0; j < layer->in_size; j++) {

sum += layer->weights[i][j] * input[j];

}

layer->output[i] = sum;

}

}

// Softmax

void softmax_fc(float *input, float *output, int size) {

float max_val = input[0];

for (int i = 1; i < size; i++) {

if (input[i] > max_val) max_val = input[i];

}

float sum = 0;

for (int i = 0; i < size; i++) {

output[i] = expf(input[i] - max_val);

sum += output[i];

}

for (int i = 0; i < size; i++) {

output[i] /= sum;

}

}

```

  1. 展平和整体网络

```c

// 将3D张量展平为1D数组

void flatten(tensor_t *input, float *output) {

int size = input->height * input->width * input->channels;

memcpy(output, input->data, size * sizeof(float));

}

// CNN完整网络

typedef struct {

conv_layer_t *conv1;

pool_layer_t *pool1;

conv_layer_t *conv2;

pool_layer_t *pool2;

fc_layer_t *fc1;

fc_layer_t *fc2;

float *fc1_input;

} cnn_net_t;

// 创建CNN网络

cnn_net_t *cnn_create(void) {

cnn_net_t *net = malloc(sizeof(cnn_net_t));

// Conv1: 28x28x1 → 26x26x32 (kernel 3×3, stride 1, padding 0)

net->conv1 = conv_create(1, 32, 3, 3, 1, 0);

// Pool1: 26x26x32 → 13x13x32 (pool 2×2, stride 2)

net->pool1 = pool_create(2, 2, 2);

// Conv2: 13x13x32 → 11x11x64 (kernel 3×3, stride 1, padding 0)

net->conv2 = conv_create(32, 64, 3, 3, 1, 0);

// Pool2: 11x11x64 → 5x5x64 (pool 2×2, stride 2)

net->pool2 = pool_create(2, 2, 2);

// Fc1: 5×5×64=1600 → 128

net->fc1 = fc_create(1600, 128);

// Fc2: 128 → 10

net->fc2 = fc_create(128, 10);

net->fc1_input = malloc(sizeof(float) * 1600);

return net;

}

// CNN前向传播

int cnn_forward(cnn_net_t *net, tensor_t *input) {

// Conv1 + ReLU

conv_forward(net->conv1, input);

tensor_t *relu1 = tensor_create(net->conv1->output->height,

net->conv1->output->width,

net->conv1->output->channels);

relu_forward(net->conv1->output, relu1);

// Pool1

pool_forward(net->pool1, relu1);

tensor_free(relu1);

// Conv2 + ReLU

conv_forward(net->conv2, net->pool1->output);

tensor_t *relu2 = tensor_create(net->conv2->output->height,

net->conv2->output->width,

net->conv2->output->channels);

relu_forward(net->conv2->output, relu2);

// Pool2

pool_forward(net->pool2, relu2);

tensor_free(relu2);

// Flatten

flatten(net->pool2->output, net->fc1_input);

// Fc1 + ReLU

fc_forward(net->fc1, net->fc1_input);

for (int i = 0; i < net->fc1->out_size; i++) {

net->fc1->output[i] = net->fc1->output[i] > 0 ? net->fc1->output[i] : 0;

}

// Fc2 + Softmax

fc_forward(net->fc2, net->fc1->output);

float probs[10];

softmax_fc(net->fc2->output, probs, 10);

// 找最大值

int pred = 0;

float max_prob = probs[0];

for (int i = 1; i < 10; i++) {

if (probs[i] > max_prob) {

max_prob = probs[i];

pred = i;

}

}

return pred;

}

```


三、训练和测试

```c

// 交叉熵损失

float cross_entropy_loss(float *pred, int label) {

return -logf(pred[label] + 1e-8);

}

// 模拟MNIST数据(实际需加载真实数据)

void generate_mock_data(tensor_t *image, int label) {

// 生成28x28的模拟数据

for (int h = 0; h < 28; h++) {

for (int w = 0; w < 28; w++) {

float val = ((float)rand() / RAND_MAX) * 0.5;

// 在数字位置加一些模式

int center_h = 14 + (label - 4) * 2;

int center_w = 14;

float dist = sqrt((h - center_h)*(h - center_h) + (w - center_w)*(w - center_w));

if (dist < 8) {

val += 0.5;

}

tensor_set(image, h, w, 0, val > 1 ? 1 : val);

}

}

}

int main() {

printf("=== CNN手写数字识别 ===\n\n");

srand(time(NULL));

cnn_net_t *net = cnn_create();

printf("网络创建完成\n");

printf("Conv1: 1×28×28 → 32×26×26\n");

printf("Pool1: 32×26×26 → 32×13×13\n");

printf("Conv2: 32×13×13 → 64×11×11\n");

printf("Pool2: 64×11×11 → 64×5×5\n");

printf("FC1: 1600 → 128\n");

printf("FC2: 128 → 10\n\n");

// 模拟训练

printf("开始训练(模拟)...\n");

int epochs = 5;

int train_size = 1000;

tensor_t *train_image = tensor_create(28, 28, 1);

for (int epoch = 0; epoch < epochs; epoch++) {

int correct = 0;

for (int i = 0; i < train_size; i++) {

int label = rand() % 10;

generate_mock_data(train_image, label);

int pred = cnn_forward(net, train_image);

if (pred == label) correct++;

}

printf("Epoch %d/%d, Train Acc: %.2f%%\n",

epoch + 1, epochs, 100.0 * correct / train_size);

}

// 测试

printf("\n=== 测试 ===\n");

int test_size = 200;

int correct = 0;

tensor_t *test_image = tensor_create(28, 28, 1);

for (int i = 0; i < test_size; i++) {

int label = rand() % 10;

generate_mock_data(test_image, label);

int pred = cnn_forward(net, test_image);

if (pred == label) correct++;

}

printf("准确率: %.2f%%\n", 100.0 * correct / test_size);

// 清理

tensor_free(train_image);

tensor_free(test_image);

// 注意:需要完整的清理函数释放所有内存

return 0;

}

```


四、CNN vs 全连接网络对比

特性 全连接网络 CNN

参数量 巨大 小

平移不变性 无 有

空间结构 丢失 保留

训练速度 慢 快

准确率(图像) 约90% 约99%


五、总结

通过这篇文章,你学会了:

· 卷积神经网络的核心原理

· 卷积层、池化层、全连接层的实现

· CNN如何处理图像的空间结构

· 完整的CNN网络搭建

CNN是图像识别领域的基础。虽然工业界使用PyTorch/TensorFlow,但用C语言实现能帮你彻底理解每个细节。

下一篇预告:《循环神经网络(RNN):让AI拥有记忆》


评论区分享一下你想用CNN做什么~

相关推荐
BestOrNothing_20152 小时前
C++零基础到工程实战(5.1):初识函数—定义调用、参数返回值、栈区内存与变量作用域分析
c++·生命周期·作用域·变量·函数·栈内存
阿文的代码库2 小时前
如何在C++中使用标准库的智能指针
开发语言·c++·算法
郝学胜-神的一滴2 小时前
Qt 高级开发 008: 使用QSetting记住上次打开路径
开发语言·c++·qt·开源软件
kyle~3 小时前
机器人感知 --- 多相机传感时间误差分析
linux·c++·数码相机·机器人·ros2·传感器
周末也要写八哥3 小时前
C++变参模板之空参包的特殊情况
java·开发语言·c++
蝈理塘(/_\)大怨种3 小时前
c++ 入门基础
开发语言·c++
weixin_386468963 小时前
openharmony 6.0编译rk3568过程记录
c语言·c++·git·python·vim·harmonyos·openharmony
雪度娃娃3 小时前
转向现代C++——优先选用别名声明,而非 typedef
开发语言·c++