前言
你有没有想过:AI程序是怎么识别手写数字的?怎么判断一张图是猫还是狗?
很多人觉得神经网络高深莫测,必须用Python、TensorFlow、PyTorch。但其实,神经网络的数学原理并不复杂------加减乘除 + 激活函数。
今天,我们用纯C语言从零实现一个简单的神经网络:
· 实现神经元、层、前向传播、反向传播
· 训练一个能识别手写数字的模型(MNIST)
· 不依赖任何第三方库
· 彻底搞懂神经网络的核心原理
一、神经网络的核心原理
- 一个神经元长什么样
```
输入 x1 ────┐
│
输入 x2 ────┼───► Σ ───► 激活函数 ───► 输出 y
│
输入 x3 ────┘
y = 激活( w1*x1 + w2*x2 + w3*x3 + b )
```
· 权重 w:每个输入的重要性
· 偏置 b:神经元的阈值
· 激活函数:引入非线性(sigmoid/ReLU/tanh)
- 三层神经网络结构
```
输入层 隐藏层 输出层
(784个) (128个) (10个)
x1 ──┐
│ ┌──┐
x2 ──┼────│h1│────┐
│ └──┘ │ ┌──┐
... ──┼───────────┼────│y1│
│ ┌──┐ │ └──┘
x784 ─┴────│h2│────┘
└──┘
```
· 输入层:28×28=784个像素值
· 隐藏层:128个神经元,学习特征
· 输出层:10个神经元,对应数字0-9的概率
- 训练过程
```
输入 → 前向传播 → 预测值 → 计算损失 → 反向传播 → 更新权重 → 重复
```
· 前向传播:输入经过网络得到预测
· 损失函数:衡量预测与真实值的差距
· 反向传播:计算每个权重的梯度
· 梯度下降:更新权重,减小损失
二、完整代码实现
- 矩阵运算基础
```c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
// 矩阵结构
typedef struct {
int rows;
int cols;
float *data;
} matrix_t;
// 创建矩阵
matrix_t *mat_create(int rows, int cols) {
matrix_t *mat = malloc(sizeof(matrix_t));
mat->rows = rows;
mat->cols = cols;
mat->data = calloc(rows * cols, sizeof(float));
return mat;
}
// 释放矩阵
void mat_free(matrix_t *mat) {
if (mat) {
free(mat->data);
free(mat);
}
}
// 随机初始化(Xavier初始化)
void mat_random(matrix_t *mat) {
float scale = sqrt(2.0f / (mat->rows + mat->cols));
for (int i = 0; i < mat->rows * mat->cols; i++) {
mat->data[i] = ((float)rand() / RAND_MAX) * 2.0f * scale - scale;
}
}
// 矩阵乘法:C = A * B
void mat_mul(matrix_t *A, matrix_t *B, matrix_t *C) {
for (int i = 0; i < A->rows; i++) {
for (int j = 0; j < B->cols; j++) {
float sum = 0;
for (int k = 0; k < A->cols; k++) {
sum += A->data[i * A->cols + k] * B->data[k * B->cols + j];
}
C->data[i * C->cols + j] = sum;
}
}
}
// 矩阵加法:C = A + B
void mat_add(matrix_t *A, matrix_t *B, matrix_t *C) {
for (int i = 0; i < A->rows * A->cols; i++) {
C->data[i] = A->data[i] + B->data[i];
}
}
// 矩阵转置
matrix_t *mat_transpose(matrix_t *mat) {
matrix_t *res = mat_create(mat->cols, mat->rows);
for (int i = 0; i < mat->rows; i++) {
for (int j = 0; j < mat->cols; j++) {
res->data[j * res->cols + i] = mat->data[i * mat->cols + j];
}
}
return res;
}
// 复制矩阵
void mat_copy(matrix_t *src, matrix_t *dst) {
memcpy(dst->data, src->data, src->rows * src->cols * sizeof(float));
}
```
- 激活函数
```c
// Sigmoid激活函数
float sigmoid(float x) {
return 1.0f / (1.0f + expf(-x));
}
// Sigmoid导数
float sigmoid_derivative(float x) {
return x * (1.0f - x);
}
// ReLU激活函数
float relu(float x) {
return x > 0 ? x : 0;
}
// ReLU导数
float relu_derivative(float x) {
return x > 0 ? 1 : 0;
}
// Softmax(用于输出层)
void softmax(matrix_t *mat) {
float sum = 0;
for (int i = 0; i < mat->rows * mat->cols; i++) {
mat->data[i] = expf(mat->data[i]);
sum += mat->data[i];
}
for (int i = 0; i < mat->rows * mat->cols; i++) {
mat->data[i] /= sum;
}
}
```
- 网络层定义
```c
// 全连接层
typedef struct {
matrix_t *weights; // 权重矩阵
matrix_t *bias; // 偏置向量
matrix_t *input; // 输入(缓存,用于反向传播)
matrix_t *output; // 输出
matrix_t *delta; // 误差项
int input_size;
int output_size;
int (*activation)(float);
int (*activation_derivative)(float);
} layer_t;
// 创建全连接层
layer_t *layer_create(int input_size, int output_size, int use_relu) {
layer_t *layer = malloc(sizeof(layer_t));
layer->input_size = input_size;
layer->output_size = output_size;
layer->weights = mat_create(output_size, input_size);
layer->bias = mat_create(output_size, 1);
layer->input = mat_create(input_size, 1);
layer->output = mat_create(output_size, 1);
layer->delta = mat_create(output_size, 1);
// Xavier初始化
mat_random(layer->weights);
mat_random(layer->bias);
layer->activation = use_relu ? relu : sigmoid;
layer->activation_derivative = use_relu ? relu_derivative : sigmoid_derivative;
return layer;
}
// 释放层
void layer_free(layer_t *layer) {
if (layer) {
mat_free(layer->weights);
mat_free(layer->bias);
mat_free(layer->input);
mat_free(layer->output);
mat_free(layer->delta);
free(layer);
}
}
```
- 神经网络定义
```c
// 神经网络结构
typedef struct {
layer_t **layers;
int num_layers;
float learning_rate;
} neural_net_t;
// 创建神经网络
neural_net_t *net_create(int *layer_sizes, int num_layers, float learning_rate) {
neural_net_t *net = malloc(sizeof(neural_net_t));
net->num_layers = num_layers - 1;
net->learning_rate = learning_rate;
net->layers = malloc(sizeof(layer_t*) * net->num_layers);
for (int i = 0; i < net->num_layers; i++) {
int use_relu = (i < net->num_layers - 1); // 隐藏层用ReLU,输出层用Sigmoid
net->layers[i] = layer_create(layer_sizes[i], layer_sizes[i + 1], use_relu);
}
return net;
}
// 释放神经网络
void net_free(neural_net_t *net) {
if (net) {
for (int i = 0; i < net->num_layers; i++) {
layer_free(net->layers[i]);
}
free(net->layers);
free(net);
}
}
```
- 前向传播
```c
// 前向传播
void net_forward(neural_net_t *net, matrix_t *input) {
matrix_t *current_input = input;
for (int i = 0; i < net->num_layers; i++) {
layer_t *layer = net->layers[i];
// 复制输入(保存用于反向传播)
mat_copy(current_input, layer->input);
// y = W * x + b
mat_mul(layer->weights, current_input, layer->output);
mat_add(layer->output, layer->bias, layer->output);
// 应用激活函数
for (int j = 0; j < layer->output_size; j++) {
layer->output->data[j] = layer->activation(layer->output->data[j]);
}
current_input = layer->output;
}
// 输出层使用Softmax(概率分布)
softmax(current_input);
}
```
- 反向传播
```c
// 反向传播(计算梯度)
void net_backward(neural_net_t *net, matrix_t *target) {
// 输出层误差
layer_t *output_layer = net->layers[net->num_layers - 1];
// 计算输出层delta
for (int i = 0; i < output_layer->output_size; i++) {
output_layer->delta->data[i] = output_layer->output->data[i] - target->data[i];
}
// 逐层反向传播
for (int l = net->num_layers - 1; l >= 0; l--) {
layer_t *layer = net->layers[l];
// 更新权重梯度:dW = delta * input^T
matrix_t *input_t = mat_transpose(layer->input);
matrix_t *weight_grad = mat_create(layer->output_size, layer->input_size);
mat_mul(layer->delta, input_t, weight_grad);
// 更新偏置梯度
matrix_t *bias_grad = mat_create(layer->output_size, 1);
mat_copy(layer->delta, bias_grad);
// 更新权重
for (int i = 0; i < layer->weights->rows * layer->weights->cols; i++) {
layer->weights->data[i] -= net->learning_rate * weight_grad->data[i];
}
// 更新偏置
for (int i = 0; i < layer->bias->rows; i++) {
layer->bias->data[i] -= net->learning_rate * bias_grad->data[i];
}
// 向前一层传播delta
if (l > 0) {
layer_t *prev_layer = net->layers[l - 1];
matrix_t *weights_t = mat_transpose(layer->weights);
mat_mul(weights_t, layer->delta, prev_layer->delta);
// 乘以激活函数的导数
for (int i = 0; i < prev_layer->output_size; i++) {
prev_layer->delta->data[i] *= prev_layer->activation_derivative(prev_layer->output->data[i]);
}
mat_free(weights_t);
}
mat_free(input_t);
mat_free(weight_grad);
mat_free(bias_grad);
}
}
// 训练一个样本
float net_train(neural_net_t *net, matrix_t *input, matrix_t *target) {
net_forward(net, input);
net_backward(net, target);
// 计算损失(交叉熵)
float loss = 0;
for (int i = 0; i < target->rows; i++) {
loss -= target->data[i] * logf(net->layers[net->num_layers - 1]->output->data[i] + 1e-8);
}
return loss;
}
// 预测
int net_predict(neural_net_t *net, matrix_t *input) {
net_forward(net, input);
matrix_t *output = net->layers[net->num_layers - 1]->output;
int pred = 0;
float max_prob = output->data[0];
for (int i = 1; i < output->rows; i++) {
if (output->data[i] > max_prob) {
max_prob = output->data[i];
pred = i;
}
}
return pred;
}
```
- MNIST数据加载(简化版)
```c
// MNIST数据项
typedef struct {
matrix_t *image;
matrix_t *label;
} mnist_sample_t;
// 创建MNIST样本
mnist_sample_t *sample_create(void) {
mnist_sample_t *sample = malloc(sizeof(mnist_sample_t));
sample->image = mat_create(784, 1);
sample->label = mat_create(10, 1);
return sample;
}
void sample_free(mnist_sample_t *sample) {
mat_free(sample->image);
mat_free(sample->label);
free(sample);
}
// 将标签转换为one-hot向量
void label_to_onehot(int label, matrix_t *onehot) {
for (int i = 0; i < 10; i++) {
onehot->data[i] = (i == label) ? 1.0f : 0.0f;
}
}
// 像素值归一化到[0,1]
void normalize_image(matrix_t *image) {
for (int i = 0; i < image->rows * image->cols; i++) {
image->data[i] /= 255.0f;
}
}
```
三、训练代码
```c
int main() {
printf("=== 手写数字识别神经网络 ===\n\n");
srand(time(NULL));
// 网络结构:784输入 → 128隐藏 → 10输出
int layer_sizes[] = {784, 128, 10};
int num_layers = 3;
neural_net_t *net = net_create(layer_sizes, num_layers, 0.01);
printf("网络结构: 输入层784 → 隐藏层128 → 输出层10\n");
printf("学习率: %.3f\n\n", net->learning_rate);
// 模拟训练数据(实际使用中需要加载真实MNIST数据)
printf("开始训练...\n");
int epochs = 10;
int batch_size = 100;
int train_size = 60000; // MNIST训练集大小
for (int epoch = 0; epoch < epochs; epoch++) {
float total_loss = 0;
// 模拟训练(实际需要遍历真实数据)
for (int i = 0; i < 1000; i++) { // 简化为1000个样本
mnist_sample_t *sample = sample_create();
// 这里应该加载真实图像数据
// 为了演示,随机生成模拟数据
for (int j = 0; j < 784; j++) {
sample->image->data[j] = ((float)rand() / RAND_MAX);
}
int true_label = rand() % 10;
label_to_onehot(true_label, sample->label);
float loss = net_train(net, sample->image, sample->label);
total_loss += loss;
sample_free(sample);
}
printf("Epoch %d/%d, Loss: %.4f\n", epoch + 1, epochs, total_loss / 1000);
}
// 测试
printf("\n=== 测试 ===\n");
int correct = 0;
int test_size = 100;
for (int i = 0; i < test_size; i++) {
mnist_sample_t *sample = sample_create();
// 模拟测试数据
for (int j = 0; j < 784; j++) {
sample->image->data[j] = ((float)rand() / RAND_MAX);
}
int true_label = rand() % 10;
int pred = net_predict(net, sample->image);
if (pred == true_label) correct++;
sample_free(sample);
}
printf("准确率: %.2f%%\n", 100.0 * correct / test_size);
net_free(net);
return 0;
}
```
四、C语言神经网络的优缺点
优点 缺点
无依赖,可移植 手动写矩阵运算,代码量大
理解底层原理 没有GPU加速
性能可控 缺少高级优化器(Adam等)
适合嵌入式设备 开发效率低
五、进一步优化方向
-
批归一化(Batch Normalization)
-
Dropout防止过拟合
-
Adam优化器替代SGD
-
卷积层(CNN)实现
-
GPU加速(OpenCL/CUDA)
六、总结
通过这篇文章,你学会了:
· 神经网络的核心原理(前向传播、反向传播、梯度下降)
· 纯C语言实现完整的神经网络
· 矩阵运算、激活函数、损失函数
· 网络训练的基本流程
虽然工业界用Python+深度学习框架,但用C语言实现一遍能帮你彻底理解底层原理。这个知识在嵌入式AI、边缘计算领域依然很有价值。
下一篇预告:《从零实现一个卷积神经网络:手写数字识别实战》
评论区分享一下你对AI的理解~