Github Copilot 实战: 从零开始用AI写一个OCR工具 (3)

源码

https://github.com/densen2014/Blazor100/tree/master/AI/MiOcr

添加一个屏幕截图功能,显示截图起始点,结束点,截图区域,按键ESC取消截图

这里AI就比较中规中矩,很快就能得到我要的功能了.下面只简单贴一下代码

ScreenCaptureWindow.xaml

复制代码
<Window x:Class="MiOcr.ScreenCaptureWindow"
        xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
        xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
        WindowStyle="None" AllowsTransparency="True" Background="#01000000"
        Topmost="True" ShowInTaskbar="False" WindowState="Maximized">
    <Canvas x:Name="CaptureCanvas">
        <TextBlock x:Name="StartCoordText"
               Foreground="Yellow"
               Background="#80000000"
               FontSize="14"
               Visibility="Collapsed"/>
        <TextBlock x:Name="CurrentCoordText"
               Foreground="Yellow"
               Background="#80000000"
               FontSize="14"
               Visibility="Collapsed"/>
        <TextBlock x:Name="SizeText"
               Foreground="Yellow"
               Background="#80000000"
               FontSize="14"
               Visibility="Collapsed"/>
    </Canvas>

</Window>

ScreenCaptureWindow.xaml.cs

复制代码
using System.Windows;
using System.Windows.Controls;
using System.Windows.Input;
using System.Windows.Interop;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;

namespace MiOcr;

public partial class ScreenCaptureWindow : Window
{
    public Rect SelectedRect { get; private set; }
    public BitmapSource? CapturedImage { get; private set; }

    private System.Windows.Point? _start;
    private Rectangle? _rectShape;

    public ScreenCaptureWindow()
    {
        InitializeComponent();
        MouseLeftButtonDown += OnMouseDown;
        MouseMove += OnMouseMove;
        MouseLeftButtonUp += OnMouseUp;
        Cursor = Cursors.Cross;
        PreviewKeyDown += ScreenCaptureWindow_PreviewKeyDown;
        Focusable = true;
        Loaded += (s, e) => Keyboard.Focus(this);
    }

    private void ScreenCaptureWindow_PreviewKeyDown(object sender, KeyEventArgs e)
    {
        if (e.Key == Key.Escape)
        {
            CapturedImage = null;
            DialogResult = false;
            Close();
        }
    }

    private void PositionTextBlocks(double x, double y, double w, double h)
    {
        double margin = 8;
        double canvasWidth = CaptureCanvas.ActualWidth;
        double canvasHeight = CaptureCanvas.ActualHeight;

        // 先测量文本大小
        StartCoordText.Measure(new Size(double.PositiveInfinity, double.PositiveInfinity));
        SizeText.Measure(new Size(double.PositiveInfinity, double.PositiveInfinity));
        double startW = StartCoordText.DesiredSize.Width;
        double startH = StartCoordText.DesiredSize.Height;
        double sizeW = SizeText.DesiredSize.Width;
        double sizeH = SizeText.DesiredSize.Height;

        // 1. 左上优先
        double startX = x - startW - margin;
        double startY = y - startH - margin;
        if (startX >= 0 && startY >= 0)
        {
            Canvas.SetLeft(StartCoordText, startX);
            Canvas.SetTop(StartCoordText, startY);
            Canvas.SetLeft(SizeText, startX);
            Canvas.SetTop(SizeText, startY + startH + 4);
            return;
        }

        // 2. 右上
        startX = x + w + margin;
        startY = y - startH - margin;
        if (startX + startW <= canvasWidth && startY >= 0)
        {
            Canvas.SetLeft(StartCoordText, startX);
            Canvas.SetTop(StartCoordText, startY);
            Canvas.SetLeft(SizeText, startX);
            Canvas.SetTop(SizeText, startY + startH + 4);
            return;
        }

        // 3. 左下
        startX = x - startW - margin;
        startY = y + h + margin;
        if (startX >= 0 && startY + startH + sizeH + 4 <= canvasHeight)
        {
            Canvas.SetLeft(StartCoordText, startX);
            Canvas.SetTop(StartCoordText, startY);
            Canvas.SetLeft(SizeText, startX);
            Canvas.SetTop(SizeText, startY + startH + 4);
            return;
        }

        // 4. 右下
        startX = x + w + margin;
        startY = y + h + margin;
        if (startX + startW <= canvasWidth && startY + startH + sizeH + 4 <= canvasHeight)
        {
            Canvas.SetLeft(StartCoordText, startX);
            Canvas.SetTop(StartCoordText, startY);
            Canvas.SetLeft(SizeText, startX);
            Canvas.SetTop(SizeText, startY + startH + 4);
            return;
        }

        // 5. 屏幕内兜底
        Canvas.SetLeft(StartCoordText, Math.Max(margin, Math.Min(canvasWidth - startW - margin, x)));
        Canvas.SetTop(StartCoordText, Math.Max(margin, Math.Min(canvasHeight - startH - margin, y)));
        Canvas.SetLeft(SizeText, Math.Max(margin, Math.Min(canvasWidth - sizeW - margin, x)));
        Canvas.SetTop(SizeText, Math.Max(margin, Math.Min(canvasHeight - sizeH - margin, y + startH + 4)));
    }

    private void OnMouseDown(object sender, MouseButtonEventArgs e)
    {
        _start = e.GetPosition(this);
        _rectShape = new Rectangle
        {
            Stroke = Brushes.Red,
            StrokeThickness = 2,
            Fill = new SolidColorBrush(Color.FromArgb(40, 0, 0, 255))
        };
        CaptureCanvas.Children.Add(_rectShape);
        Canvas.SetLeft(_rectShape, _start.Value.X);
        Canvas.SetTop(_rectShape, _start.Value.Y);

        StartCoordText.Text = $"起点: ({(int)_start.Value.X}, {(int)_start.Value.Y})";
        StartCoordText.Visibility = Visibility.Visible;
        CurrentCoordText.Text = $"当前: ({(int)_start.Value.X}, {(int)_start.Value.Y})";
        CurrentCoordText.Visibility = Visibility.Visible;
        SizeText.Text = $"大小: 0 x 0";
        SizeText.Visibility = Visibility.Visible;
        // 初始位置
        PositionTextBlocks(_start.Value.X, _start.Value.Y, 0, 0);
    }

    private void OnMouseMove(object sender, MouseEventArgs e)
    {
        if (_start.HasValue && _rectShape != null)
        {
            var pos = e.GetPosition(this);
            double x = Math.Min(_start.Value.X, pos.X);
            double y = Math.Min(_start.Value.Y, pos.Y);
            double w = Math.Abs(_start.Value.X - pos.X);
            double h = Math.Abs(_start.Value.Y - pos.Y);
            Canvas.SetLeft(_rectShape, x);
            Canvas.SetTop(_rectShape, y);
            _rectShape.Width = w;
            _rectShape.Height = h;

            // 更新当前点坐标
            CurrentCoordText.Text = $"当前: ({(int)pos.X}, {(int)pos.Y})";
            Canvas.SetLeft(CurrentCoordText, pos.X + 2);
            Canvas.SetTop(CurrentCoordText, pos.Y + 2);

            // 更新区域大小
            SizeText.Text = $"大小: {(int)w} x {(int)h}";
            // 动态调整文本位置
            PositionTextBlocks(x, y, w, h);
        }
    }

    private void OnMouseUp(object sender, MouseButtonEventArgs e)
    {
        if (_start.HasValue && _rectShape != null)
        {
            var end = e.GetPosition(this);
            double x = Math.Min(_start.Value.X, end.X);
            double y = Math.Min(_start.Value.Y, end.Y);
            double w = Math.Abs(_start.Value.X - end.X);
            double h = Math.Abs(_start.Value.Y - end.Y);
            SelectedRect = new Rect(x, y, w, h);

            // 隐藏坐标
            StartCoordText.Visibility = Visibility.Collapsed;
            CurrentCoordText.Visibility = Visibility.Collapsed;

            // 隐藏区域大小
            SizeText.Visibility = Visibility.Collapsed;

            // 截图
            CapturedImage = CaptureScreenArea(SelectedRect);
            DialogResult = true;
            Close();
        }
    }

    private BitmapSource CaptureScreenArea(Rect rect)
    {
        double dpiScale = NativeMethods.GetDpiScale(this);

        int x = (int)(rect.X * dpiScale);
        int y = (int)(rect.Y * dpiScale);
        int w = (int)(rect.Width * dpiScale);
        int h = (int)(rect.Height * dpiScale);

        IntPtr hdcSrc = NativeMethods.GetDC(IntPtr.Zero);
        IntPtr hdcDest = NativeMethods.CreateCompatibleDC(hdcSrc);
        IntPtr hBitmap = NativeMethods.CreateCompatibleBitmap(hdcSrc, w, h);
        IntPtr hOld = NativeMethods.SelectObject(hdcDest, hBitmap);

        NativeMethods.BitBlt(hdcDest, 0, 0, w, h, hdcSrc, x, y, 0x00CC0020); // SRCCOPY

        NativeMethods.SelectObject(hdcDest, hOld);
        NativeMethods.DeleteDC(hdcDest);
        NativeMethods.ReleaseDC(IntPtr.Zero, hdcSrc);

        try
        {
            var source = Imaging.CreateBitmapSourceFromHBitmap(
                hBitmap, IntPtr.Zero, Int32Rect.Empty, BitmapSizeOptions.FromEmptyOptions());
            source.Freeze();
            return source;
        }
        finally
        {
            NativeMethods.DeleteObject(hBitmap);
        }
    } 

}

截图api,不走system.draw

NativeMethods.cs

复制代码
using System.Windows;

namespace MiOcr;

public static class NativeMethods
{
    [System.Runtime.InteropServices.DllImport("gdi32.dll")]
    public static extern bool DeleteObject(IntPtr hObject);

    [System.Runtime.InteropServices.DllImport("user32.dll")]
    public static extern IntPtr GetDC(IntPtr hWnd);

    [System.Runtime.InteropServices.DllImport("user32.dll")]
    public static extern int ReleaseDC(IntPtr hWnd, IntPtr hDC);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]
    public static extern IntPtr CreateCompatibleDC(IntPtr hdc);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]
    public static extern IntPtr CreateCompatibleBitmap(IntPtr hdc, int nWidth, int nHeight);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]
    public static extern IntPtr SelectObject(IntPtr hdc, IntPtr hgdiobj);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]
    public static extern bool BitBlt(IntPtr hdcDest, int nXDest, int nYDest, int nWidth, int nHeight,
        IntPtr hdcSrc, int nXSrc, int nYSrc, int dwRop);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]
    public static extern bool DeleteDC(IntPtr hdc);

    /// <summary>
    /// 获取指定窗口的 DPI 缩放比例。
    /// </summary>
    /// <param name="window"></param>
    /// <returns></returns>
    public static double GetDpiScale(Window window)
    {
        var source = PresentationSource.FromVisual(window);
        if (source?.CompositionTarget != null)
        {
            return source.CompositionTarget.TransformToDevice.M11; // X 方向缩放
        }
        return 1.0;
    }
}

最后主界面添加按钮

复制代码
        <Button Width="140" Height="44"
                Background="#4361EE"
                Foreground="White"
                FontWeight="SemiBold"
                FontSize="16" 
                HorizontalAlignment="Left"
                VerticalAlignment="Top"
                Margin="180,20,20,20"
                Click="CaptureScreenButton_Click">
            <Button.Effect>
                <DropShadowEffect ShadowDepth="1" BlurRadius="8" Opacity="0.25" Color="#4361EE"/>
            </Button.Effect>
            <Button.Content>
                <StackPanel Orientation="Horizontal" VerticalAlignment="Center">
                    <TextBlock Text="&#xE8A7;" 
                       FontFamily="Segoe MDL2 Assets"
                       FontSize="20"
                       Margin="0,0,8,0"
                       VerticalAlignment="Center"/>
                    <TextBlock Text="截屏"
                       FontSize="16"
                       VerticalAlignment="Center"/>
                </StackPanel>
            </Button.Content>
        </Button>

执行OCR初始化检查模型是否存在, 不存在则回调一个正在初始化提示给UI, 免得用户等待不耐烦

可以通过先检查模型文件是否已存在本地,如果不存在则通知UI"正在初始化/下载模型",再进行下载。你可以这样实现:

  1. 检查模型本地路径是否存在。

  2. 如果不存在,调用UI回调(如事件、Action、委托等)提示"正在初始化"。

  3. 然后异步下载模型。
    假设你用 Action ? onStatusChanged 作为回调参数,代码如下:

    // 获取模型本地路径
    var modelInfo = OnlineFullModels.ChineseV3;
    string modelDir = modelInfo.LocalDir;
    if (!Directory.Exists(modelDir) || !File.Exists(Path.Combine(modelDir, "model.pdmodel")))
    {
    onStatusChanged?.Invoke("正在初始化OCR模型,请稍候...");
    }

A: modelInfo.LocalDir; 不存在

A: CreateLocalAsync 方法不存在, 改为异步超时回调UI

这个步骤花了一点时间,因为原本的库就不存在这个功能,所以最终走到这样的代码

明白。你可以通过以下方式实现"异步超时回调UI":

• 在下载模型前,先启动一个定时任务(如 Task.Delay),如果模型下载还未完成且超时,则回调 UI 提示"正在初始化"。

• 如果模型下载提前完成,则取消提示。

下面是推荐实现方式:

复制代码
public async Task<(List<string> strings, PaddleOcrResult result)> StartOCR(
    Mat src,
    Action<string>? onStatusChanged = null,
    int timeoutMs = 1500)
{
    PaddleOcrResult result;
    var resultText = new List<string>();
    var modelInfo = OnlineFullModels.ChineseV3;

    // 启动超时提示任务
    using var cts = new CancellationTokenSource();
    var timeoutTask = Task.Delay(timeoutMs, cts.Token)
        .ContinueWith(t =>
        {
            if (!t.IsCanceled)
            {
                onStatusChanged?.Invoke("正在初始化OCR模型,请稍候...");
            }
        });

    // 下载模型(如果已存在会跳过下载)
    FullOcrModel model = await modelInfo.DownloadAsync();
    cts.Cancel(); // 下载完成,取消超时提示

    using (PaddleOcrAll all = new(model)
    {
        AllowRotateDetection = true,
        Enable180Classification = true,
    })
    {
        Stopwatch sw = Stopwatch.StartNew();
        result = all.Run(src);
        Console.WriteLine($"elapsed={sw.ElapsedMilliseconds} ms");
        Console.WriteLine("Detected all texts: \n" + result.Text);
        foreach (PaddleOcrResultRegion region in result.Regions)
        {
            Console.WriteLine($"Text: {region.Text}, Score: {region.Score}, RectCenter: {region.Rect.Center}, RectSize: {region.Rect.Size}, Angle: {region.Rect.Angle}");
            resultText.Add(region.Text);
        }
    }
    src.Dispose();
    return (resultText, result);
}

说明:

• 增加了 timeoutMs 参数(默认1.5秒),可根据需要调整。

• 如果模型下载超过指定时间,UI 会收到"正在初始化OCR模型,请稍候..."提示。

• 如果模型提前下载完成,则不会回调提示。

这样可以优雅地实现异步超时回调UI,无需依赖不存在的 CreateLocalAsync 方法。

StartOCR 处理一下 onStatusChanged 回调, 下载初始化模型反馈友好提示

可以将 PaddleOCRService.StartOCR 的 onStatusChanged 回调参数传递到 UI 层,并在模型下载耗时时友好提示用户。

下面是推荐做法:

  1. PaddleOCRService.cs 已实现超时回调(见前述建议)。

  2. MainWindow.xaml.cs 需要在调用 StartOCR 时传递回调,并在 UI 上显示提示。
    步骤一:修改 RunOcrAndDraw,传递 onStatusChanged
    将 RunOcrAndDraw 方法修改如下,利用 Dispatcher.Invoke 保证 UI 线程安全:

    private async Task RunOcrAndDraw(byte[] imageBytes)
    {
    ProcessingText.Visibility = Visibility.Visible;
    ProcessingText.Text = "正在识别...";

    复制代码
     var ocr = new PaddleOCRService();
     var results = await ocr.StartOCR(imageBytes, msg =>
     {
         // 友好提示,确保在UI线程
         Dispatcher.Invoke(() =>
         {
             ProcessingText.Text = msg;
             ProcessingText.Visibility = Visibility.Visible;
         });
     });
    
     ProcessingText.Visibility = Visibility.Collapsed;
    
     if (results.strings == null || results.strings.Count == 0)
     {
         OcrTextBox.Text = "未识别到文本";
         return;
     }
    
     OcrTextBox.Text = string.Join(Environment.NewLine, results.result.Regions.Select(r => r.Text));
     _lastOcrResult = results.result;
     var drawed = DrawOcrResultsOnImage(_currentImage, results.result);
     MainImage.Source = drawed;

    }

步骤二:确保 PaddleOCRService.cs 的 StartOCR 支持 onStatusChanged

如前所述,PaddleOCRService.cs 需如下实现:

复制代码
public async Task<(List<string> strings, PaddleOcrResult result)> StartOCR(
    Mat src,
    Action<string>? onStatusChanged = null,
    int timeoutMs = 1500)
{
    PaddleOcrResult result;
    var resultText = new List<string>();
    var modelInfo = OnlineFullModels.ChineseV3;

    using var cts = new CancellationTokenSource();
    var timeoutTask = Task.Delay(timeoutMs, cts.Token)
        .ContinueWith(t =>
        {
            if (!t.IsCanceled)
            {
                onStatusChanged?.Invoke("正在初始化OCR模型,请稍候...");
            }
        });

    FullOcrModel model = await modelInfo.DownloadAsync();
    cts.Cancel();

    using (PaddleOcrAll all = new(model)
    {
        AllowRotateDetection = true,
        Enable180Classification = true,
    })
    {
        Stopwatch sw = Stopwatch.StartNew();
        result = all.Run(src);
        foreach (PaddleOcrResultRegion region in result.Regions)
        {
            resultText.Add(region.Text);
        }
    }
    src.Dispose();
    return (resultText, result);
}

这样,用户在模型首次下载或初始化时会看到"正在初始化OCR模型,请稍候...",其余时间显示"正在识别...",体验更友好。

回顾

项目目标

本项目旨在利用 Github Copilot 辅助开发,从零实现一个基于 AI 的 OCR(光学字符识别)工具。项目采用 .NET 9 和 WPF 技术栈,集成了 PaddleOCR 作为核心识别引擎,实现了图片文字识别、区域选择、结果高亮与复制等实用功能。

主要技术与依赖

• 开发语言与平台:C#,.NET 9,WPF

• OCR引擎:Sdcb.OpenVINO.PaddleOCR

• 图像处理:OpenCvSharp

• 界面交互:WPF,支持拖拽、粘贴、截图等多种图片输入方式

• AI辅助开发:Github Copilot 提供代码建议与自动补全

核心功能

  1. 图片输入
    • 支持文件选择、拖拽、粘贴、屏幕截图等多种方式加载图片。
  2. OCR识别
    • 调用 PaddleOCR 进行文字识别,支持中文、英文等多语种。
    • 识别结果实时显示,支持区域高亮和文字复制。
  3. 用户体验优化
    • 首次模型下载时,异步回调 UI,友好提示"正在初始化OCR模型,请稍候...",避免用户等待时无响应。
    • 识别过程有进度提示,提升交互体验。
  4. 结果交互
    • 支持鼠标框选图片区域,提取并复制选中区域的文字。
    • 右键点击可复制单个识别文本。

关键实现思路

• AI驱动开发:通过 Copilot 自动生成代码骨架、方法实现和注释,大幅提升开发效率。

• 异步与回调:模型下载和识别过程均为异步,UI 通过回调及时反馈进度和状态。

• 图像与坐标映射:实现了图片与控件坐标的精准映射,保证高亮和选区准确。

总结

本项目充分发挥了 Github Copilot 在 AI 辅助开发中的优势,实现了一个功能完善、交互友好的 OCR 工具。开发过程中 Copilot 提供了大量代码建议,极大提升了开发效率和代码质量。项目结构清晰,易于扩展,适合 AI+开发实战学习与参考。