C# OpenCvSharp DNN 实现百度网盘AI大赛-表格检测第2名方案第一部分-表格边界框检测

目录

说明

效果

模型

项目

代码

frmMain.cs

YoloDet.cs

参考

下载

其他


说明

百度网盘AI大赛-表格检测的第2名方案。

该算法包含表格边界框检测、表格分割和表格方向识别三个部分,首先,ppyoloe-plus-x 对边界框进行预测,并对置信度较高的表格边界框(box)进行裁剪。裁剪后的单个表格实例会送入到DBNet中进行语义分割,分割结果通过opencv轮廓处理获得表格关键点(point)。之后,我们根据DBNet计算的关键点在裁剪后的单个表格实例上绘制表格边界。最后,PP-LCNet结合表格边界先验和表格实例图像,对表格的方向进行预测,并根据之前定义的几何轮廓点与语义轮廓点的对应关系,将几何轮廓点映射为语义轮廓点。

本文使用C# OpenCvSharp DNN 实现百度网盘AI大赛-表格检测第2名方案第一部分-表格边界框检测。

效果

模型

Model Properties


date:2024-10-28T13:52:42.181333

description:Ultralytics YOLO11l model trained on coco.yaml

author:Ultralytics

version:8.3.23

task:detect

license:AGPL-3.0 License (https://ultralytics.com/license)

docs:https://docs.ultralytics.com

stride:32

batch:1

imgsz:[928, 928]

names:{0: 'table'}


Inputs


name:images

tensor:Float[1, 3, 928, 928]


Outputs


name:output0

tensor:Float[1, 5, 17661]


项目

代码

frmMain.cs

using OpenCvSharp;

using System;

using System.Collections.Generic;

using System.Drawing;

using System.Windows.Forms;

namespace OpenCvSharp_DNN_Demo

{

public partial class frmMain : Form

{

public frmMain()

{

InitializeComponent();

}

YoloDet obj_detector;

string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";

string image_path = "";

DateTime dt1 = DateTime.Now;

DateTime dt2 = DateTime.Now;

Mat image;

private void button1_Click(object sender, EventArgs e)

{

OpenFileDialog ofd = new OpenFileDialog();

ofd.Filter = fileFilter;

if (ofd.ShowDialog() != DialogResult.OK) return;

pictureBox1.Image = null;

pictureBox2.Image = null;

textBox1.Text = "";

image_path = ofd.FileName;

pictureBox1.Image = new Bitmap(image_path);

image = new Mat(image_path);

}

private void Form1_Load(object sender, EventArgs e)

{

string obj_model_path = "model/yolo_obj_det.onnx";

obj_detector = new YoloDet(obj_model_path);

image_path = "test_img/real5.jpg";

pictureBox1.Image = new Bitmap(image_path);

}

private unsafe void button2_Click(object sender, EventArgs e)

{

if (image_path == "")

{

return;

}

textBox1.Text = "检测中,请稍等......";

pictureBox2.Image = null;

Application.DoEvents();

Mat src = new Mat(image_path);

dt1 = DateTime.Now;

List<Bbox> result = obj_detector.infer(src);

dt2 = DateTime.Now;

//绘制

Mat draw_img = src.Clone();

for (int i = 0; i < result.Count; i++)

{

Rect r = Rect.FromLTRB(result[i].xmin, result[i].ymin, result[i].xmax, result[i].ymax);

Cv2.PutText(draw_img, $"table:{result[0].score:P0}", new OpenCvSharp.Point(r.TopLeft.X, r.TopLeft.Y - 10), HersheyFonts.HersheySimplex, 8, Scalar.Red, 8);

Cv2.Rectangle(draw_img, r, Scalar.Red, thickness: 8);

//裁剪,保存,为下一步检测做准备

Mat crop_img = new Mat(src, r);

Cv2.ImWrite(i + ".jpg", crop_img);

}

pictureBox2.Image = new Bitmap(draw_img.ToMemoryStream());

textBox1.Text = "推理耗时:" + (dt2 - dt1).TotalMilliseconds + "ms";

}

private void pictureBox2_DoubleClick(object sender, EventArgs e)

{

Common.ShowNormalImg(pictureBox2.Image);

}

private void pictureBox1_DoubleClick(object sender, EventArgs e)

{

Common.ShowNormalImg(pictureBox1.Image);

}

}

}

using OpenCvSharp;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Windows.Forms;

namespace OpenCvSharp_DNN_Demo
{
    public partial class frmMain : Form
    {
        public frmMain()
        {
            InitializeComponent();
        }

        YoloDet obj_detector;

        string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
        string image_path = "";

        DateTime dt1 = DateTime.Now;
        DateTime dt2 = DateTime.Now;

        Mat image;

        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = fileFilter;
            if (ofd.ShowDialog() != DialogResult.OK) return;

            pictureBox1.Image = null;
            pictureBox2.Image = null;
            textBox1.Text = "";

            image_path = ofd.FileName;
            pictureBox1.Image = new Bitmap(image_path);
            image = new Mat(image_path);
        }

        private void Form1_Load(object sender, EventArgs e)
        {
            string obj_model_path = "model/yolo_obj_det.onnx";

            obj_detector = new YoloDet(obj_model_path);

            image_path = "test_img/real5.jpg";
            pictureBox1.Image = new Bitmap(image_path);

        }

        private unsafe void button2_Click(object sender, EventArgs e)
        {
            if (image_path == "")
            {
                return;
            }
            textBox1.Text = "检测中,请稍等......";
            pictureBox2.Image = null;
            Application.DoEvents();

            Mat src = new Mat(image_path);

            dt1 = DateTime.Now;
            List<Bbox> result = obj_detector.infer(src);
            dt2 = DateTime.Now;

            //绘制
            Mat draw_img = src.Clone();
            for (int i = 0; i < result.Count; i++)
            {
                Rect r = Rect.FromLTRB(result[i].xmin, result[i].ymin, result[i].xmax, result[i].ymax);

                Cv2.PutText(draw_img, $"table:{result[0].score:P0}", new OpenCvSharp.Point(r.TopLeft.X, r.TopLeft.Y - 10), HersheyFonts.HersheySimplex, 8, Scalar.Red, 8);
                Cv2.Rectangle(draw_img, r, Scalar.Red, thickness: 8);

                //裁剪,保存,为下一步检测做准备
                Mat crop_img = new Mat(src, r);
                Cv2.ImWrite(i + ".jpg", crop_img);
            }
            pictureBox2.Image = new Bitmap(draw_img.ToMemoryStream());
            textBox1.Text = "推理耗时:" + (dt2 - dt1).TotalMilliseconds + "ms";
        }
       
        private void pictureBox2_DoubleClick(object sender, EventArgs e)
        {
            Common.ShowNormalImg(pictureBox2.Image);
        }
        
        private void pictureBox1_DoubleClick(object sender, EventArgs e)
        {
            Common.ShowNormalImg(pictureBox1.Image);
        }
    }
}

YoloDet.cs

using OpenCvSharp;
using OpenCvSharp.Dnn;
using System;
using System.Collections.Generic;
using System.Linq;

namespace OpenCvSharp_DNN_Demo
{
    internal class YoloDet
    {

        Net model;
        int[] resize_shape = new int[2] { 928, 928 };

        public YoloDet(string model_path)
        {
            model = CvDnn.ReadNetFromOnnx(model_path);
        }

        unsafe public List<Bbox> infer(Mat srcimg, float score = 0.4f)
        {
            int ori_h = srcimg.Rows;
            int ori_w = srcimg.Cols;
            img_preprocess
            Mat img;
            int new_w = 0;
            int new_h = 0;
            int left = 0;
            int top = 0;
            img = Common.ResizePad(srcimg, resize_shape[0], ref new_w, ref new_h, ref left, ref top);

            //Cv2.ImWrite("0.jpg", img);

            img.ConvertTo(img, MatType.CV_32FC3, 1.0 / 255.0);
            Mat blob = CvDnn.BlobFromImage(img);

            model.SetInput(blob);

            //模型推理,读取推理结果
            Mat[] outs = new Mat[1] { new Mat() };
            string[] outBlobNames = model.GetUnconnectedOutLayersNames().ToArray();
            model.Forward(outs, outBlobNames);

            img_postprocess
            float x_factor = (float)ori_w / new_w;
            float y_factor = (float)ori_h / new_h;
            List<Rect> boxes = new List<Rect>();
            List<float> scores = new List<float>();
            int rows = outs[0].Size(2);

            //将推理结果转为float数据类型
            //5, 17661
            Mat result_mat = new Mat(5, 17661, MatType.CV_32F, outs[0].Data);
            result_mat = result_mat.T();
            //string str = result_mat.Dump();

            for (int i = 0; i < rows; i++)
            {
                float* ptr = (float*)(result_mat.Ptr(i).ToPointer());
                float max_score = ptr[4];
                //float max_score = result_mat.At<float>(i, 4);

                if (max_score >= score)
                {
                    //At效率不高
                    //float x1 = result_mat.At<float>(i, 0);
                    //float y2 = result_mat.At<float>(i, 1);
                    //float w2 = result_mat.At<float>(i, 2);
                    //float h2 = result_mat.At<float>(i, 3);

                    float x = ptr[0];
                    float y = ptr[1];
                    float w = ptr[2];
                    float h = ptr[3];

                    int xmin = Math.Max((int)((x - w / 2 - left) * x_factor), 0);
                    int ymin = Math.Max((int)((y - h / 2 - top) * y_factor), 0);
                    boxes.Add(new Rect(xmin, ymin, (int)(w * x_factor), (int)(h * y_factor)));
                    scores.Add(max_score);
                }
            }

            int[] indices;
            CvDnn.NMSBoxes(boxes, scores, score, 0.4f, out indices);
            int num_keep = indices.Length;
            List<Bbox> bboxes = new List<Bbox>();
            for (int i = 0; i < num_keep; i++)
            {
                int ind = indices[i];
                bboxes.Add(new Bbox(boxes[ind].X, boxes[ind].Y, Math.Min(boxes[ind].X + boxes[ind].Width, ori_w - 1), Math.Min(boxes[ind].Y + boxes[ind].Height, ori_h - 1), scores[ind]));
            }
            return bboxes;
        }
    }
}

/*
 
 Model Properties
-------------------------
date:2024-10-28T13:52:42.181333
description:Ultralytics YOLO11l model trained on coco.yaml
author:Ultralytics
version:8.3.23
task:detect
license:AGPL-3.0 License (https://ultralytics.com/license)
docs:https://docs.ultralytics.com
stride:32
batch:1
imgsz:[928, 928]
names:{0: 'table'}
---------------------------------------------------------------

Inputs
-------------------------
name:images
tensor:Float[1, 3, 928, 928]
---------------------------------------------------------------

Outputs
-------------------------
name:output0
tensor:Float[1, 5, 17661]
---------------------------------------------------------------

 */

参考

https://github.com/hpc203/TableDetection
 
https://aistudio.baidu.com/projectdetail/5398861?searchKeyword=%E8%A1%A8%E6%A0%BC%E6%A3%80%E6%B5%8B%E5%A4%A7%E8%B5%9B&searchTab=ALL

下载

源码下载

其他

C# OnnxRuntime 第二部分-表格分割-CSDN博客

C# OpenCvSharp DNN 第三部分-表格方向识别-CSDN博客

相关推荐
ai产品老杨几秒前
提前对风险进行预警并实施管控,运用AI技术将管理推向新时代的智慧地产开源了。
vue.js·人工智能·安全·开源·音视频
Trouvaille ~12 分钟前
【机器学习】解构概率,重构世界:贝叶斯定理与智能世界的暗语
人工智能·python·深度学习·神经网络·机器学习·ai·概率论
梭七y24 分钟前
【笔记】记录对自主实现一个神经网络的步骤的理解
人工智能·笔记·神经网络
MavenTalk26 分钟前
微信小程序TTS解决方案
人工智能·微信小程序·小程序·tts·ai语音合成
CM莫问43 分钟前
<论文>如何构建一个更鲁棒的Bert?
人工智能·深度学习·算法·语言模型·自然语言处理·大模型·roberta
杨过过儿44 分钟前
【使用LLM搭建系统】1语言模型,提问范式与 Token
人工智能·语言模型·自然语言处理
墨绿色的摆渡人1 小时前
用 Python 从零开始创建神经网络(十七):回归(Regression)
开发语言·人工智能·python·深度学习·神经网络·回归
春末的南方城市1 小时前
单幅图像合成 360° 3D 场景的新方法:PanoDreamer,可同时生成全景图像和相应的深度信息。
人工智能·3d·aigc·controlnet·图像生成
赛丽曼2 小时前
Pandas
人工智能·python·pandas