使用GitHub上比较火的DdddOcr 带带弟弟OCR通用验证码离线本地识别开源项目提供的模型进行验证码文字识别ddddocr/ddddocr at master · sml2h3/ddddocr · GitHub
common.onnx模型信息:

部署代码:
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using OpenCvSharp;
using OpenCvSharp.Extensions;
using OpenVinoSharp;
using Size = OpenCvSharp.Size;
namespace WinFormOCR
{
public class SimpleOCREngine : IDisposable
{
private Core _core;
private CompiledModel _compiledModel;
private InferRequest _inferRequest;
private List<char> _charset;
private int _targetHeight = 64;
private bool _isInitialized = false;
public SimpleOCREngine(string modelPath, string charsetPath)
{
Initialize(modelPath, charsetPath);
}
private void Initialize(string modelPath, string charsetPath)
{
try
{
// 1. 初始化OpenVINO
_core = new Core();
// 2. 加载模型
if (!File.Exists(modelPath))
throw new FileNotFoundException($"模型文件不存在: {modelPath}");
_compiledModel = _core.compile_model(modelPath, "CPU");
// 3. 加载字符集
LoadCharset(charsetPath);
// 4. 创建推理请求
_inferRequest = _compiledModel.create_infer_request();
_isInitialized = true;
}
catch (Exception ex)
{
throw new Exception($"OCR引擎初始化失败: {ex.Message}", ex);
}
}
private void LoadCharset(string charsetPath)
{
if (!File.Exists(charsetPath))
throw new FileNotFoundException($"字符集文件不存在: {charsetPath}");
_charset = new List<char>();
// 读取字符集文件,每行一个字符
var lines = File.ReadAllLines(charsetPath);
foreach (var line in lines)
{
if (!string.IsNullOrEmpty(line))
{
// 处理特殊字符
if (line == "\\n")
_charset.Add('\n');
else if (line == "\\t")
_charset.Add('\t');
else
_charset.Add(line[0]);
}
}
// 确保第一个字符是空白字符(CTC blank)
if (_charset.Count > 0 && _charset[0] != '\0')
{
_charset.Insert(0, '\0');
}
}
public string Recognize(Mat image)
{
if (!_isInitialized)
throw new Exception("OCR引擎未初始化");
if (image == null)
throw new ArgumentNullException(nameof(image));
try
{
// 1. 预处理图像
var processedImage = PreprocessImage(image);
// 2. 执行推理
var result = Inference(processedImage);
return result;
}
catch (Exception ex)
{
throw new Exception($"OCR识别失败: {ex.Message}", ex);
}
}
private float[] PreprocessImage(Mat image)
{
// 转换为灰度图
Mat gray = new Mat();
if (image.Channels() == 3)
{
Cv2.CvtColor(image, gray, ColorConversionCodes.BGR2GRAY);
}
else if (image.Channels() == 4)
{
Cv2.CvtColor(image, gray, ColorConversionCodes.BGRA2GRAY);
}
else
{
gray = image.Clone();
}
// 计算目标宽度,保持宽高比
double scale = (double)_targetHeight / gray.Height;
int targetWidth = (int)(gray.Width * scale);
// 调整大小到目标高度64像素
Mat resized = new Mat();
Cv2.Resize(gray, resized, new Size(targetWidth, _targetHeight));
// 归一化到[0, 1]
Mat normalized = new Mat();
resized.ConvertTo(normalized, MatType.CV_32FC1, 1 / 255.0);
float[] data = new float[targetWidth * _targetHeight];
Marshal.Copy(normalized.Data, data, 0, data.Length);
gray.Dispose();
normalized.Dispose();
return data;
}
private string Inference(float[] inputData)
{
// 获取输入输出信息
var inputInfo = _compiledModel.inputs()[0];
var outputInfo = _compiledModel.outputs()[0];
// 输入形状是 [1, 1, 64, width]
int width = inputData.Length / 64;
int[] inputShape = { 1, 1, 64, width };
// 创建输入Tensor
using (var inputTensor = _inferRequest.get_input_tensor())
{
inputTensor.set_shape(new Shape(inputShape));
inputTensor.set_data(inputData);
// 执行推理
_inferRequest.infer();
// 获取输出Tensor
using (var outputTensor = _inferRequest.get_output_tensor())
{
var outputData = outputTensor.get_data<float>((int)outputTensor.get_size());
var outputShape = outputTensor.get_shape();
// 输出形状是 [seqlen, 1, 8210]
int seqLen = (int)outputShape[0];
int numClasses = (int)outputShape[2];
// 解码输出
return DecodeOutput(outputData, seqLen, numClasses);
}
}
}
private string DecodeOutput(float[] outputData, int seqLen, int numClasses)
{
// 对每个时间步,取概率最高的字符
List<int> predictedIndices = new List<int>();
for (int t = 0; t < seqLen; t++)
{
int maxIndex = 0;
float maxValue = float.MinValue;
// 在8210个类别中找到最大值
for (int c = 0; c < numClasses; c++)
{
int index = t * numClasses + c;
if (outputData[index] > maxValue)
{
maxValue = outputData[index];
maxIndex = c;
}
}
predictedIndices.Add(maxIndex);
}
// CTC解码:去除重复和空白字符
List<int> decodedIndices = new List<int>();
int? prevIdx = null;
foreach (int idx in predictedIndices)
{
// 跳过空白字符(假设索引0是空白)
if (idx != 0)
{
// 跳过连续重复的字符
if (idx != prevIdx)
{
decodedIndices.Add(idx);
}
}
prevIdx = idx;
}
// 转换为字符
List<char> resultChars = new List<char>();
foreach (int idx in decodedIndices)
{
if (idx >= 0 && idx < _charset.Count)
{
resultChars.Add(char.ToUpper(_charset[idx]));
}
}
return new string(resultChars.ToArray());
}
public void Dispose()
{
_inferRequest?.Dispose();
_compiledModel?.Dispose();
_core?.Dispose();
}
}
}
使用方法:
SimpleOCREngine simpleOCR = new SimpleOCREngine("C:\\common.onnx", "classes.txt");
string str = simpleOCR.Recognize(src);