C# OpenvinoSharp部署DDDDOCR验证码识别模型

使用GitHub上比较火的DdddOcr 带带弟弟OCR通用验证码离线本地识别开源项目提供的模型进行验证码文字识别ddddocr/ddddocr at master · sml2h3/ddddocr · GitHub

common.onnx模型信息：

部署代码：

using System;

using System.Collections.Generic;

using System.Drawing;

using System.Drawing.Imaging;

using System.IO;

using System.Linq;

using System.Runtime.InteropServices;

using OpenCvSharp;

using OpenCvSharp.Extensions;

using OpenVinoSharp;

using Size = OpenCvSharp.Size;

namespace WinFormOCR

{

public class SimpleOCREngine : IDisposable

{

private Core _core;

private CompiledModel _compiledModel;

private InferRequest _inferRequest;

private List<char> _charset;

private int _targetHeight = 64;

private bool _isInitialized = false;

public SimpleOCREngine(string modelPath, string charsetPath)

{

Initialize(modelPath, charsetPath);

}

private void Initialize(string modelPath, string charsetPath)

{

try

{

// 1. 初始化OpenVINO

_core = new Core();

// 2. 加载模型

if (!File.Exists(modelPath))

throw new FileNotFoundException($"模型文件不存在: {modelPath}");

_compiledModel = _core.compile_model(modelPath, "CPU");

// 3. 加载字符集

LoadCharset(charsetPath);

// 4. 创建推理请求

_inferRequest = _compiledModel.create_infer_request();

_isInitialized = true;

}

catch (Exception ex)

{

throw new Exception($"OCR引擎初始化失败: {ex.Message}", ex);

}

private void LoadCharset(string charsetPath)

{

if (!File.Exists(charsetPath))

throw new FileNotFoundException($"字符集文件不存在: {charsetPath}");

_charset = new List<char>();

// 读取字符集文件，每行一个字符

var lines = File.ReadAllLines(charsetPath);

foreach (var line in lines)

{

if (!string.IsNullOrEmpty(line))

{

// 处理特殊字符

if (line == "\\n")

_charset.Add('\n');

else if (line == "\\t")

_charset.Add('\t');

else

_charset.Add(line $0$ );

}

// 确保第一个字符是空白字符（CTC blank）

if (_charset.Count > 0 && _charset $0$ != '\0')

{

_charset.Insert(0, '\0');

}

public string Recognize(Mat image)

{

if (!_isInitialized)

throw new Exception("OCR引擎未初始化");

if (image == null)

throw new ArgumentNullException(nameof(image));

try

{

// 1. 预处理图像

var processedImage = PreprocessImage(image);

// 2. 执行推理

var result = Inference(processedImage);

return result;

}

catch (Exception ex)

{

throw new Exception($"OCR识别失败: {ex.Message}", ex);

}

private float\[\] PreprocessImage(Mat image)

{

// 转换为灰度图

Mat gray = new Mat();

if (image.Channels() == 3)

{

Cv2.CvtColor(image, gray, ColorConversionCodes.BGR2GRAY);

}

else if (image.Channels() == 4)

{

Cv2.CvtColor(image, gray, ColorConversionCodes.BGRA2GRAY);

}

else

{

gray = image.Clone();

}

// 计算目标宽度，保持宽高比

double scale = (double)_targetHeight / gray.Height;

int targetWidth = (int)(gray.Width * scale);

// 调整大小到目标高度64像素

Mat resized = new Mat();

Cv2.Resize(gray, resized, new Size(targetWidth, _targetHeight));

// 归一化到 $0, 1$

Mat normalized = new Mat();

resized.ConvertTo(normalized, MatType.CV_32FC1, 1 / 255.0);

float\[\] data = new float $targetWidth \* _targetHeight$ ;

Marshal.Copy(normalized.Data, data, 0, data.Length);

gray.Dispose();

normalized.Dispose();

return data;

}

private string Inference(float\[\] inputData)

{

// 获取输入输出信息

var inputInfo = _compiledModel.inputs() $0$ ;

var outputInfo = _compiledModel.outputs() $0$ ;

// 输入形状是 $1, 1, 64, width$

int width = inputData.Length / 64;

int\[\] inputShape = { 1, 1, 64, width };

// 创建输入Tensor

using (var inputTensor = _inferRequest.get_input_tensor())

{

inputTensor.set_shape(new Shape(inputShape));

inputTensor.set_data(inputData);

// 执行推理

_inferRequest.infer();

// 获取输出Tensor

using (var outputTensor = _inferRequest.get_output_tensor())

{

var outputData = outputTensor.get_data<float>((int)outputTensor.get_size());

var outputShape = outputTensor.get_shape();

// 输出形状是 $seqlen, 1, 8210$

int seqLen = (int)outputShape $0$ ;

int numClasses = (int)outputShape $2$ ;

// 解码输出

return DecodeOutput(outputData, seqLen, numClasses);

}

private string DecodeOutput(float\[\] outputData, int seqLen, int numClasses)

{

// 对每个时间步，取概率最高的字符

List<int> predictedIndices = new List<int>();

for (int t = 0; t < seqLen; t++)

{

int maxIndex = 0;

float maxValue = float.MinValue;

// 在8210个类别中找到最大值

for (int c = 0; c < numClasses; c++)

{

int index = t * numClasses + c;

if (outputData $index$ > maxValue)

{

maxValue = outputData $index$ ;

maxIndex = c;

}

predictedIndices.Add(maxIndex);

}

// CTC解码：去除重复和空白字符

List<int> decodedIndices = new List<int>();

int? prevIdx = null;

foreach (int idx in predictedIndices)

{

// 跳过空白字符（假设索引0是空白）

if (idx != 0)

{

// 跳过连续重复的字符

if (idx != prevIdx)

{

decodedIndices.Add(idx);

}

prevIdx = idx;

}

// 转换为字符

List<char> resultChars = new List<char>();

foreach (int idx in decodedIndices)

{

if (idx >= 0 && idx < _charset.Count)

{

resultChars.Add(char.ToUpper(_charset $idx$ ));

}

return new string(resultChars.ToArray());

}

public void Dispose()

{

_inferRequest?.Dispose();

_compiledModel?.Dispose();

_core?.Dispose();

}

使用方法：

SimpleOCREngine simpleOCR = new SimpleOCREngine("C:\\common.onnx", "classes.txt");

string str = simpleOCR.Recognize(src);