目录
说明
百度网盘AI大赛-表格检测的第2名方案。
该算法包含表格边界框检测、表格分割和表格方向识别三个部分,首先,ppyoloe-plus-x 对边界框进行预测,并对置信度较高的表格边界框(box)进行裁剪。裁剪后的单个表格实例会送入到DBNet中进行语义分割,分割结果通过opencv轮廓处理获得表格关键点(point)。之后,我们根据DBNet计算的关键点在裁剪后的单个表格实例上绘制表格边界。最后,PP-LCNet结合表格边界先验和表格实例图像,对表格的方向进行预测,并根据之前定义的几何轮廓点与语义轮廓点的对应关系,将几何轮廓点映射为语义轮廓点。
本文使用C# OpenCvSharp DNN 实现百度网盘AI大赛-表格检测第2名方案第一部分-表格边界框检测。
效果
模型
Model Properties
date:2024-10-28T13:52:42.181333
description:Ultralytics YOLO11l model trained on coco.yaml
author:Ultralytics
version:8.3.23
task:detect
license:AGPL-3.0 License (https://ultralytics.com/license)
docs:https://docs.ultralytics.com
stride:32
batch:1
imgsz:[928, 928]
names:{0: 'table'}
Inputs
name:images
tensor:Float[1, 3, 928, 928]
Outputs
name:output0
tensor:Float[1, 5, 17661]
项目
代码
frmMain.cs
using OpenCvSharp;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Windows.Forms;
namespace OpenCvSharp_DNN_Demo
{
public partial class frmMain : Form
{
public frmMain()
{
InitializeComponent();
}
YoloDet obj_detector;
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string image_path = "";
DateTime dt1 = DateTime.Now;
DateTime dt2 = DateTime.Now;
Mat image;
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
pictureBox2.Image = null;
textBox1.Text = "";
image_path = ofd.FileName;
pictureBox1.Image = new Bitmap(image_path);
image = new Mat(image_path);
}
private void Form1_Load(object sender, EventArgs e)
{
string obj_model_path = "model/yolo_obj_det.onnx";
obj_detector = new YoloDet(obj_model_path);
image_path = "test_img/real5.jpg";
pictureBox1.Image = new Bitmap(image_path);
}
private unsafe void button2_Click(object sender, EventArgs e)
{
if (image_path == "")
{
return;
}
textBox1.Text = "检测中,请稍等......";
pictureBox2.Image = null;
Application.DoEvents();
Mat src = new Mat(image_path);
dt1 = DateTime.Now;
List<Bbox> result = obj_detector.infer(src);
dt2 = DateTime.Now;
//绘制
Mat draw_img = src.Clone();
for (int i = 0; i < result.Count; i++)
{
Rect r = Rect.FromLTRB(result[i].xmin, result[i].ymin, result[i].xmax, result[i].ymax);
Cv2.PutText(draw_img, $"table:{result[0].score:P0}", new OpenCvSharp.Point(r.TopLeft.X, r.TopLeft.Y - 10), HersheyFonts.HersheySimplex, 8, Scalar.Red, 8);
Cv2.Rectangle(draw_img, r, Scalar.Red, thickness: 8);
//裁剪,保存,为下一步检测做准备
Mat crop_img = new Mat(src, r);
Cv2.ImWrite(i + ".jpg", crop_img);
}
pictureBox2.Image = new Bitmap(draw_img.ToMemoryStream());
textBox1.Text = "推理耗时:" + (dt2 - dt1).TotalMilliseconds + "ms";
}
private void pictureBox2_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox2.Image);
}
private void pictureBox1_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox1.Image);
}
}
}
using OpenCvSharp;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Windows.Forms;
namespace OpenCvSharp_DNN_Demo
{
public partial class frmMain : Form
{
public frmMain()
{
InitializeComponent();
}
YoloDet obj_detector;
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string image_path = "";
DateTime dt1 = DateTime.Now;
DateTime dt2 = DateTime.Now;
Mat image;
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
pictureBox2.Image = null;
textBox1.Text = "";
image_path = ofd.FileName;
pictureBox1.Image = new Bitmap(image_path);
image = new Mat(image_path);
}
private void Form1_Load(object sender, EventArgs e)
{
string obj_model_path = "model/yolo_obj_det.onnx";
obj_detector = new YoloDet(obj_model_path);
image_path = "test_img/real5.jpg";
pictureBox1.Image = new Bitmap(image_path);
}
private unsafe void button2_Click(object sender, EventArgs e)
{
if (image_path == "")
{
return;
}
textBox1.Text = "检测中,请稍等......";
pictureBox2.Image = null;
Application.DoEvents();
Mat src = new Mat(image_path);
dt1 = DateTime.Now;
List<Bbox> result = obj_detector.infer(src);
dt2 = DateTime.Now;
//绘制
Mat draw_img = src.Clone();
for (int i = 0; i < result.Count; i++)
{
Rect r = Rect.FromLTRB(result[i].xmin, result[i].ymin, result[i].xmax, result[i].ymax);
Cv2.PutText(draw_img, $"table:{result[0].score:P0}", new OpenCvSharp.Point(r.TopLeft.X, r.TopLeft.Y - 10), HersheyFonts.HersheySimplex, 8, Scalar.Red, 8);
Cv2.Rectangle(draw_img, r, Scalar.Red, thickness: 8);
//裁剪,保存,为下一步检测做准备
Mat crop_img = new Mat(src, r);
Cv2.ImWrite(i + ".jpg", crop_img);
}
pictureBox2.Image = new Bitmap(draw_img.ToMemoryStream());
textBox1.Text = "推理耗时:" + (dt2 - dt1).TotalMilliseconds + "ms";
}
private void pictureBox2_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox2.Image);
}
private void pictureBox1_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox1.Image);
}
}
}
YoloDet.cs
using OpenCvSharp;
using OpenCvSharp.Dnn;
using System;
using System.Collections.Generic;
using System.Linq;
namespace OpenCvSharp_DNN_Demo
{
internal class YoloDet
{
Net model;
int[] resize_shape = new int[2] { 928, 928 };
public YoloDet(string model_path)
{
model = CvDnn.ReadNetFromOnnx(model_path);
}
unsafe public List<Bbox> infer(Mat srcimg, float score = 0.4f)
{
int ori_h = srcimg.Rows;
int ori_w = srcimg.Cols;
img_preprocess
Mat img;
int new_w = 0;
int new_h = 0;
int left = 0;
int top = 0;
img = Common.ResizePad(srcimg, resize_shape[0], ref new_w, ref new_h, ref left, ref top);
//Cv2.ImWrite("0.jpg", img);
img.ConvertTo(img, MatType.CV_32FC3, 1.0 / 255.0);
Mat blob = CvDnn.BlobFromImage(img);
model.SetInput(blob);
//模型推理,读取推理结果
Mat[] outs = new Mat[1] { new Mat() };
string[] outBlobNames = model.GetUnconnectedOutLayersNames().ToArray();
model.Forward(outs, outBlobNames);
img_postprocess
float x_factor = (float)ori_w / new_w;
float y_factor = (float)ori_h / new_h;
List<Rect> boxes = new List<Rect>();
List<float> scores = new List<float>();
int rows = outs[0].Size(2);
//将推理结果转为float数据类型
//5, 17661
Mat result_mat = new Mat(5, 17661, MatType.CV_32F, outs[0].Data);
result_mat = result_mat.T();
//string str = result_mat.Dump();
for (int i = 0; i < rows; i++)
{
float* ptr = (float*)(result_mat.Ptr(i).ToPointer());
float max_score = ptr[4];
//float max_score = result_mat.At<float>(i, 4);
if (max_score >= score)
{
//At效率不高
//float x1 = result_mat.At<float>(i, 0);
//float y2 = result_mat.At<float>(i, 1);
//float w2 = result_mat.At<float>(i, 2);
//float h2 = result_mat.At<float>(i, 3);
float x = ptr[0];
float y = ptr[1];
float w = ptr[2];
float h = ptr[3];
int xmin = Math.Max((int)((x - w / 2 - left) * x_factor), 0);
int ymin = Math.Max((int)((y - h / 2 - top) * y_factor), 0);
boxes.Add(new Rect(xmin, ymin, (int)(w * x_factor), (int)(h * y_factor)));
scores.Add(max_score);
}
}
int[] indices;
CvDnn.NMSBoxes(boxes, scores, score, 0.4f, out indices);
int num_keep = indices.Length;
List<Bbox> bboxes = new List<Bbox>();
for (int i = 0; i < num_keep; i++)
{
int ind = indices[i];
bboxes.Add(new Bbox(boxes[ind].X, boxes[ind].Y, Math.Min(boxes[ind].X + boxes[ind].Width, ori_w - 1), Math.Min(boxes[ind].Y + boxes[ind].Height, ori_h - 1), scores[ind]));
}
return bboxes;
}
}
}
/*
Model Properties
-------------------------
date:2024-10-28T13:52:42.181333
description:Ultralytics YOLO11l model trained on coco.yaml
author:Ultralytics
version:8.3.23
task:detect
license:AGPL-3.0 License (https://ultralytics.com/license)
docs:https://docs.ultralytics.com
stride:32
batch:1
imgsz:[928, 928]
names:{0: 'table'}
---------------------------------------------------------------
Inputs
-------------------------
name:images
tensor:Float[1, 3, 928, 928]
---------------------------------------------------------------
Outputs
-------------------------
name:output0
tensor:Float[1, 5, 17661]
---------------------------------------------------------------
*/
参考
https://github.com/hpc203/TableDetection
https://aistudio.baidu.com/projectdetail/5398861?searchKeyword=%E8%A1%A8%E6%A0%BC%E6%A3%80%E6%B5%8B%E5%A4%A7%E8%B5%9B&searchTab=ALL