摘要
线性映射是机器学习领域中最基础且至关重要的数学概念之一。作为向量空间之间的结构保持变换,线性映射为理解机器学习算法提供了坚实的数学基础。本文将深入探讨线性映射的核心原理、数学性质及其在实际应用中的实现。
1. 线性映射的数学定义与核心特性
1.1 形式化定义
设 VVV 和 WWW 是域 FFF 上的两个向量空间,映射 T:V→WT: V \rightarrow WT:V→W 被称为线性映射,当且仅当满足以下两个条件:
可加性 :
T(u+v)=T(u)+T(v)∀u,v∈VT(\mathbf{u} + \mathbf{v}) = T(\mathbf{u}) + T(\mathbf{v}) \quad \forall \mathbf{u}, \mathbf{v} \in VT(u+v)=T(u)+T(v)∀u,v∈V
齐次性 :
T(cv)=cT(v)∀v∈V,∀c∈FT(c\mathbf{v}) = cT(\mathbf{v}) \quad \forall \mathbf{v} \in V, \forall c \in FT(cv)=cT(v)∀v∈V,∀c∈F
1.2 基本性质推导
- 零向量保持 :T(0V)=0WT(\mathbf{0}_V) = \mathbf{0}_WT(0V)=0W
- 负向量保持 :T(−v)=−T(v)T(-\mathbf{v}) = -T(\mathbf{v})T(−v)=−T(v)
- 线性组合保持 :T(c1v1+c2v2+⋯+ckvk)=c1T(v1)+c2T(v2)+⋯+ckT(vk)T(c_1\mathbf{v}_1 + c_2\mathbf{v}_2 + \cdots + c_k\mathbf{v}_k) = c_1T(\mathbf{v}_1) + c_2T(\mathbf{v}_2) + \cdots + c_kT(\mathbf{v}_k)T(c1v1+c2v2+⋯+ckvk)=c1T(v1)+c2T(v2)+⋯+ckT(vk)
c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
typedef struct {
double* data;
int rows;
int cols;
} Matrix;
Matrix create_matrix(int rows, int cols) {
Matrix mat;
mat.rows = rows;
mat.cols = cols;
mat.data = (double*)malloc(rows * cols * sizeof(double));
return mat;
}
void free_matrix(Matrix mat) {
free(mat.data);
}
void matrix_multiply(Matrix A, Matrix B, Matrix result) {
for(int i = 0; i < A.rows; i++) {
for(int j = 0; j < B.cols; j++) {
double sum = 0.0;
for(int k = 0; k < A.cols; k++) {
sum += A.data[i * A.cols + k] * B.data[k * B.cols + j];
}
result.data[i * result.cols + j] = sum;
}
}
}
int main() {
Matrix A = create_matrix(2, 2);
A.data[0] = 2.0; A.data[1] = 1.0;
A.data[2] = 1.0; A.data[3] = 3.0;
Matrix v1 = create_matrix(2, 1);
Matrix v2 = create_matrix(2, 1);
v1.data[0] = 1.0; v1.data[1] = 2.0;
v2.data[0] = 3.0; v2.data[1] = 1.0;
Matrix v_sum = create_matrix(2, 1);
v_sum.data[0] = v1.data[0] + v2.data[0];
v_sum.data[1] = v1.data[1] + v2.data[1];
Matrix result1 = create_matrix(2, 1);
Matrix result2 = create_matrix(2, 1);
Matrix temp1 = create_matrix(2, 1);
Matrix temp2 = create_matrix(2, 1);
matrix_multiply(A, v_sum, result1);
matrix_multiply(A, v1, temp1);
matrix_multiply(A, v2, temp2);
result2.data[0] = temp1.data[0] + temp2.data[0];
result2.data[1] = temp1.data[1] + temp2.data[1];
printf("T(v1 + v2) = [%.2f, %.2f]\n", result1.data[0], result1.data[1]);
printf("T(v1) + T(v2) = [%.2f, %.2f]\n", result2.data[0], result2.data[1]);
free_matrix(A); free_matrix(v1); free_matrix(v2);
free_matrix(v_sum); free_matrix(result1); free_matrix(result2);
free_matrix(temp1); free_matrix(temp2);
return 0;
}
2. 线性映射的矩阵表示与几何解释
2.1 矩阵表示理论
对于有限维向量空间,任何线性映射都可以用矩阵来表示。设 T:Rn→RmT: \mathbb{R}^n \rightarrow \mathbb{R}^mT:Rn→Rm 是线性映射,则存在矩阵 AAA 使得:
T(x)=AxT(\mathbf{x}) = A\mathbf{x}T(x)=Ax
c
#include <stdio.h>
#include <math.h>
#define PI 3.14159265358979323846
typedef struct {
double x;
double y;
} Point2D;
void apply_linear_transform(Point2D* points, int count, double matrix[2][2]) {
for(int i = 0; i < count; i++) {
double x_old = points[i].x;
double y_old = points[i].y;
points[i].x = matrix[0][0] * x_old + matrix[0][1] * y_old;
points[i].y = matrix[1][0] * x_old + matrix[1][1] * y_old;
}
}
void print_points(Point2D* points, int count, const char* label) {
printf("%s: ", label);
for(int i = 0; i < count; i++) {
printf("(%.2f,%.2f) ", points[i].x, points[i].y);
}
printf("\n");
}
int main() {
Point2D triangle[] = {{0,0}, {1,0}, {0.5,1}, {0,0}};
int count = 4;
double scale_matrix[2][2] = {{2.0,0.0}, {0.0,1.5}};
double angle = PI/4;
double rotation_matrix[2][2] = {{cos(angle),-sin(angle)}, {sin(angle),cos(angle)}};
double shear_matrix[2][2] = {{1.0,0.5}, {0.0,1.0}};
Point2D scaled[4], rotated[4], sheared[4];
for(int i=0; i<count; i++) scaled[i]=triangle[i];
for(int i=0; i<count; i++) rotated[i]=triangle[i];
for(int i=0; i<count; i++) sheared[i]=triangle[i];
apply_linear_transform(scaled, count, scale_matrix);
apply_linear_transform(rotated, count, rotation_matrix);
apply_linear_transform(sheared, count, shear_matrix);
print_points(triangle, count, "Original");
print_points(scaled, count, "Scaled");
print_points(rotated, count, "Rotated");
print_points(sheared, count, "Sheared");
return 0;
}
3. 核与像的空间理论
3.1 核心概念
核 :
Ker(T)={v∈V∣T(v)=0}\text{Ker}(T) = \{\mathbf{v} \in V \mid T(\mathbf{v}) = \mathbf{0}\}Ker(T)={v∈V∣T(v)=0}
像 :
Im(T)={T(v)∈W∣v∈V}\text{Im}(T) = \{T(\mathbf{v}) \in W \mid \mathbf{v} \in V\}Im(T)={T(v)∈W∣v∈V}
3.2 秩-零化度定理
dim(Ker(T))+dim(Im(T))=dim(V)\text{dim}(\text{Ker}(T)) + \text{dim}(\text{Im}(T)) = \text{dim}(V)dim(Ker(T))+dim(Im(T))=dim(V)
c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define EPSILON 1e-10
int matrix_rank(double** matrix, int rows, int cols) {
int rank = 0;
double** temp = (double**)malloc(rows * sizeof(double*));
for(int i = 0; i < rows; i++) {
temp[i] = (double*)malloc(cols * sizeof(double));
for(int j = 0; j < cols; j++) {
temp[i][j] = matrix[i][j];
}
}
for(int col = 0; col < cols && rank < rows; col++) {
int pivot_row = -1;
for(int row = rank; row < rows; row++) {
if(fabs(temp[row][col]) > EPSILON) {
pivot_row = row;
break;
}
}
if(pivot_row == -1) continue;
if(pivot_row != rank) {
double* temp_ptr = temp[rank];
temp[rank] = temp[pivot_row];
temp[pivot_row] = temp_ptr;
}
double pivot = temp[rank][col];
for(int j = col; j < cols; j++) {
temp[rank][j] /= pivot;
}
for(int i = rank + 1; i < rows; i++) {
double factor = temp[i][col];
for(int j = col; j < cols; j++) {
temp[i][j] -= factor * temp[rank][j];
}
}
rank++;
}
for(int i = 0; i < rows; i++) {
free(temp[i]);
}
free(temp);
return rank;
}
int main() {
int rows = 3, cols = 3;
double** A = (double**)malloc(rows * sizeof(double*));
for(int i = 0; i < rows; i++) {
A[i] = (double*)malloc(cols * sizeof(double));
}
A[0][0] = 1.0; A[0][1] = 2.0; A[0][2] = 3.0;
A[1][0] = 2.0; A[1][1] = 4.0; A[1][2] = 6.0;
A[2][0] = 1.0; A[2][1] = 1.0; A[2][2] = 1.0;
int rank = matrix_rank(A, rows, cols);
int nullity = cols - rank;
printf("Rank: %d\n", rank);
printf("Nullity: %d\n", nullity);
printf("Rank + Nullity: %d\n", rank + nullity);
printf("Domain Dimension: %d\n", cols);
for(int i = 0; i < rows; i++) {
free(A[i]);
}
free(A);
return 0;
}
4. 机器学习中的线性映射应用
4.1 基础线性模型
线性回归、逻辑回归等基础模型本质上是线性映射的应用。
c
#include <stdio.h>
#include <stdlib.h>
typedef struct {
double** weights;
double* bias;
int input_dim;
int output_dim;
} LinearModel;
LinearModel create_linear_model(int input_dim, int output_dim) {
LinearModel model;
model.input_dim = input_dim;
model.output_dim = output_dim;
model.weights = (double**)malloc(output_dim * sizeof(double*));
for(int i = 0; i < output_dim; i++) {
model.weights[i] = (double*)malloc(input_dim * sizeof(double));
for(int j = 0; j < input_dim; j++) {
model.weights[i][j] = 0.1;
}
}
model.bias = (double*)malloc(output_dim * sizeof(double));
for(int i = 0; i < output_dim; i++) {
model.bias[i] = 0.0;
}
return model;
}
void free_linear_model(LinearModel model) {
for(int i = 0; i < model.output_dim; i++) {
free(model.weights[i]);
}
free(model.weights);
free(model.bias);
}
void forward_pass(LinearModel model, double* input, double* output) {
for(int i = 0; i < model.output_dim; i++) {
output[i] = model.bias[i];
for(int j = 0; j < model.input_dim; j++) {
output[i] += model.weights[i][j] * input[j];
}
}
}
int main() {
int input_size = 5;
int output_size = 3;
int batch_size = 2;
LinearModel model = create_linear_model(input_size, output_size);
double** inputs = (double**)malloc(batch_size * sizeof(double*));
for(int i = 0; i < batch_size; i++) {
inputs[i] = (double*)malloc(input_size * sizeof(double));
for(int j = 0; j < input_size; j++) {
inputs[i][j] = (i + j) * 0.1;
}
}
double** outputs = (double**)malloc(batch_size * sizeof(double*));
for(int i = 0; i < batch_size; i++) {
outputs[i] = (double*)malloc(output_size * sizeof(double));
}
for(int i = 0; i < batch_size; i++) {
forward_pass(model, inputs[i], outputs[i]);
printf("Batch %d output: ", i);
for(int j = 0; j < output_size; j++) {
printf("%.3f ", outputs[i][j]);
}
printf("\n");
}
for(int i = 0; i < batch_size; i++) {
free(inputs[i]);
free(outputs[i]);
}
free(inputs);
free(outputs);
free_linear_model(model);
return 0;
}
5. 奇异值分解与线性映射
c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void matrix_print(double** A, int rows, int cols) {
for(int i = 0; i < rows; i++) {
for(int j = 0; j < cols; j++) {
printf("%8.4f ", A[i][j]);
}
printf("\n");
}
}
void svd_2x2(double A[2][2], double U[2][2], double S[2], double Vt[2][2]) {
double a = A[0][0], b = A[0][1], c = A[1][0], d = A[1][1];
double M = a*a + b*b + c*c + d*d;
double sqrt_M = sqrt(M);
double sqrt_M2_4ad_4bc = sqrt(M*M - 4*(a*d - b*c)*(a*d - b*c));
S[0] = sqrt((M + sqrt_M2_4ad_4bc) / 2);
S[1] = sqrt((M - sqrt_M2_4ad_4bc) / 2);
double theta = 0.5 * atan2(2*(a*b + c*d), a*a + c*c - b*b - d*d);
double phi = 0.5 * atan2(2*(a*c + b*d), a*a + b*b - c*c - d*d);
U[0][0] = cos(phi); U[0][1] = -sin(phi);
U[1][0] = sin(phi); U[1][1] = cos(phi);
Vt[0][0] = cos(theta); Vt[0][1] = sin(theta);
Vt[1][0] = -sin(theta); Vt[1][1] = cos(theta);
}
int main() {
double A[2][2] = {{3,1}, {1,3}};
double U[2][2], Vt[2][2], S[2];
svd_2x2(A, U, S, Vt);
printf("Original matrix A:\n");
printf("%.4f %.4f\n", A[0][0], A[0][1]);
printf("%.4f %.4f\n", A[1][0], A[1][1]);
printf("\nSingular values: %.4f, %.4f\n", S[0], S[1]);
printf("\nLeft singular vectors U:\n");
printf("%.4f %.4f\n", U[0][0], U[0][1]);
printf("%.4f %.4f\n", U[1][0], U[1][1]);
printf("\nRight singular vectors Vt:\n");
printf("%.4f %.4f\n", Vt[0][0], Vt[0][1]);
printf("%.4f %.4f\n", Vt[1][0], Vt[1][1]);
return 0;
}
6. 数值稳定性与性能优化
c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
typedef struct {
double** data;
int size;
} SquareMatrix;
SquareMatrix create_square_matrix(int size) {
SquareMatrix mat;
mat.size = size;
mat.data = (double**)malloc(size * sizeof(double*));
for(int i = 0; i < size; i++) {
mat.data[i] = (double*)malloc(size * sizeof(double));
}
return mat;
}
void free_square_matrix(SquareMatrix mat) {
for(int i = 0; i < mat.size; i++) {
free(mat.data[i]);
}
free(mat.data);
}
double matrix_condition_number(SquareMatrix A) {
double norm = 0.0;
for(int i = 0; i < A.size; i++) {
double row_sum = 0.0;
for(int j = 0; j < A.size; j++) {
row_sum += fabs(A.data[i][j]);
}
if(row_sum > norm) norm = row_sum;
}
return norm;
}
SquareMatrix ridge_regression(SquareMatrix A, double lambda) {
SquareMatrix result = create_square_matrix(A.size);
for(int i = 0; i < A.size; i++) {
for(int j = 0; j < A.size; j++) {
result.data[i][j] = A.data[i][j];
if(i == j) {
result.data[i][j] += lambda;
}
}
}
return result;
}
int main() {
SquareMatrix ill_conditioned = create_square_matrix(2);
ill_conditioned.data[0][0] = 1.0;
ill_conditioned.data[0][1] = 1.0;
ill_conditioned.data[1][0] = 1.0;
ill_conditioned.data[1][1] = 1.0001;
double cond_number = matrix_condition_number(ill_conditioned);
printf("Condition number: %.2e\n", cond_number);
SquareMatrix regularized = ridge_regression(ill_conditioned, 0.01);
printf("Regularized matrix:\n");
for(int i = 0; i < 2; i++) {
for(int j = 0; j < 2; j++) {
printf("%.6f ", regularized.data[i][j]);
}
printf("\n");
}
free_square_matrix(ill_conditioned);
free_square_matrix(regularized);
return 0;
}
结论
线性映射作为机器学习数学基础的核心构件,为理解复杂模型提供了统一的数学框架。通过掌握线性映射的理论性质和实际实现,开发者能够设计出更高效、稳定的机器学习系统。本文提供的C语言实现展示了线性映射在实践中的具体应用,为工程实践提供了可靠参考。