
INSID3 是发表在 CVPR 2026 (Oral) 的一项训练自由(training‑free)的上下文分割 方法。它基于一个冻结的 DINOv3 视觉编码器,无需任何微调或辅助模块,仅通过一张带掩码的参考图和一张目标图,就能分割出目标图中的同类物体。
核心思想
-
利用 DINOv3 的密集特征:从参考和目标图像中提取深层特征。
-
发现并修正位置偏差 :DINOv3 的特征会受绝对图像位置影响(例如图片左侧的响应过强)。INSID3 通过 SVD 分析噪声图像 ,找出一个低维的"位置子空间",然后将特征投影到其正交补上,从而去除位置信息,保留语义。
-
轻量后处理:在去偏特征的基础上,进行原型匹配、聚类和聚合,生成最终分割掩码。
主要优点
-
完全无需训练:不更新任何参数,开箱即用。
-
单骨干网络:不需要分割解码器、额外模型或先验知识。
-
通用性强:适用于物体级、部件级、医学、水下、航拍等多种领域。
-
性能优异:在多个基准上达到 SOTA,且比许多参数化方法更小、更快。
典型用法(Python 示例)
python
model = build_insid3()
model.set_reference("ref_cat.jpg", "ref_cat_mask.png")
model.set_target("target_cat.jpg")
pred_mask = model.segment()
附加能力
-
语义对应:通过去偏特征匹配参考关键点到目标图像,提升匹配准确率。
-
可调参数:支持 CRF 细化、不同 DINOv3 规模(Small/Base/Large)等。
总之,INSID3 是首个仅靠一个冻结的 DINOv3 骨干完成高质量上下文分割的方法,揭示了 DINOv3 特征的空间偏差并给出了简洁有效的修正方案。
开源项目地址:https://github.com/visinf/INSID3
C# OpenvinoSharp部署代码(以DINOv3 vits16为例)
依赖项
用到的库如下,通过nuget安装即可
JYPPX.OpenVINO.CSharp.API 3.2.2
OpenVINO.runtime.win 2026.0.0
OpenCvSharp4.Windows 4.13.0.20260302
OpenCvSharp4.Extensions 4.13.0.20260330
模型链接:模型链接
onnx模型信息
输入:
name: imgs
tensor: float32[batch_size,3,512,512]
输出:
name: f_norm
tensor: float32[batch_size,Divf_norm_dim_1,Divf_norm_dim_2,Divf_norm_dim_3]
name: f_debias
tensor: float32[batch_size,Divf_debias_dim_1,Divf_debias_dim_2,Divf_debias_dim_3]
如果onnx模型是静态导出的话,输入输出如下

C#完整部署代码
Form1.cs
cs
using OpenCvSharp;
using OpenVinoSharp;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace INSID3_WinForm
{
public partial class Form1 : Form
{
// ---------- 变量 ----------
private Mat refImage = null;
private Mat tgtImage = null;
private Mat refMask = null;
private List<System.Drawing.Point> polyPoints = new List<System.Drawing.Point>();
private Rectangle imageRect = Rectangle.Empty;
// OpenVINO 对象
private Core ovCore;
private Model ovModel;
private CompiledModel ovCompiledModel;
private InferRequest ovInferRequest;
// 模型参数 (ViT-S/16)
private const int ModelSize = 512;//1024
private const int FeatC = 384; // 小模型特征维度
private const int FeatH = 32;//64
private const int FeatW = 32;//64
private readonly float[] mean = { 0.485f, 0.456f, 0.406f };
private readonly float[] std = { 0.229f, 0.224f, 0.225f };
private float CandidateSigma = 0.5f;
private float PixelSimSigma = 0.5f;
private float ClusterScoreRatio = 0.6f;
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
string modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "model", "insid3_encoder_small.onnx");
if (!File.Exists(modelPath))
{
MessageBox.Show($"模型不存在: {modelPath}");
btnSegment.Enabled = false;
return;
}
try
{
ovCore = new Core();
ovModel = ovCore.read_model(modelPath);
PartialShape partialShape = new PartialShape([2,3,512,512]);
ovModel.reshape(partialShape);
ovCore.set_property("GPU.0", new Dictionary<string, string>() { { "INFERENCE_PRECISION_HINT", "f32" } });
ovCompiledModel = ovCore.compile_model(ovModel);
ovInferRequest = ovCompiledModel.create_infer_request();
LogMessage("✅ 模型加载成功 (GPU.0)");
btnSegment.Enabled = true;
}
catch (Exception ex)
{
MessageBox.Show($"OpenVINO 初始化失败: {ex.Message}");
btnSegment.Enabled = false;
}
pictureBox1.MouseDown += PictureBox1_MouseDown;
pictureBox1.Paint += PictureBox1_Paint;
}
// ======================== UI 事件 ========================
private void btnLoadRef_Click(object sender, EventArgs e)
{
using var ofd = new OpenFileDialog();
ofd.Filter = "图像|*.jpg;*.jpeg;*.png;*.bmp";
if (ofd.ShowDialog() != DialogResult.OK) return;
refImage = Cv2.ImRead(ofd.FileName);
DisplayImage(refImage, pictureBox1, ref imageRect);
polyPoints.Clear();
refMask = null;
LogMessage($"参考图: {Path.GetFileName(ofd.FileName)}");
pictureBox1.Invalidate();
}
private void btnLoadTarget_Click(object sender, EventArgs e)
{
using var ofd = new OpenFileDialog();
ofd.Filter = "图像|*.jpg;*.jpeg;*.png;*.bmp";
if (ofd.ShowDialog() != DialogResult.OK) return;
tgtImage = Cv2.ImRead(ofd.FileName);
pictureBox2.Image = MatToBitmap(tgtImage);
LogMessage($"目标图: {Path.GetFileName(ofd.FileName)}");
}
private void btnLoadMask_Click(object sender, EventArgs e)
{
if (refImage == null) { MessageBox.Show("先加载参考图"); return; }
using var ofd = new OpenFileDialog();
ofd.Filter = "掩码|*.png;*.jpg;*.bmp";
if (ofd.ShowDialog() != DialogResult.OK) return;
using var mask = Cv2.ImRead(ofd.FileName, ImreadModes.Grayscale);
refMask = LetterboxMask(mask, refImage.Width, refImage.Height);
pictureBox1.Invalidate();
LogMessage($"掩码: {Path.GetFileName(ofd.FileName)}");
}
private void btnGenMask_Click(object sender, EventArgs e)
{
if (refImage == null || polyPoints.Count < 3)
{
MessageBox.Show("至少绘制3个点");
return;
}
float scale = (float)imageRect.Width / refImage.Width;
var ptsOnImage = polyPoints.Select(p => new PointF((p.X - imageRect.X) / scale, (p.Y - imageRect.Y) / scale)).ToList();
float mScale = ModelSize / (float)Math.Max(refImage.Width, refImage.Height);
int nw = (int)(refImage.Width * mScale);
int nh = (int)(refImage.Height * mScale);
int dx = (ModelSize - nw) / 2;
int dy = (ModelSize - nh) / 2;
var modelPts = ptsOnImage.Select(p => new OpenCvSharp.Point((int)(p.X * mScale + dx), (int)(p.Y * mScale + dy))).ToList();
refMask = new Mat(ModelSize, ModelSize, MatType.CV_8UC1, Scalar.All(0));
Cv2.FillPoly(refMask, new List<List<OpenCvSharp.Point>> { modelPts }, Scalar.All(255));
pictureBox1.Invalidate();
LogMessage($"生成掩码 ({polyPoints.Count} 点)");
}
private async void btnSegment_Click(object sender, EventArgs e)
{
if (refImage == null || refMask == null || tgtImage == null)
{
MessageBox.Show("请确保已加载参考图、掩码和目标图");
return;
}
btnSegment.Enabled = false;
LogMessage("开始推理...");
Application.DoEvents();
Mat resultMask = null;
TimeSpan preT, infT, postT;
await Task.Run(() =>
{
var sw = Stopwatch.StartNew();
float[] refIn = LetterboxAndNormalize(refImage, out _, out _, out _, out _, out _, out _);
float[] tgtIn = LetterboxAndNormalize(tgtImage, out int dxT, out int dyT, out int nwT, out int nhT, out int tgtOw, out int tgtOh);
float[] combined = new float[2 * 3 * ModelSize * ModelSize];
Buffer.BlockCopy(refIn, 0, combined, 0, refIn.Length * sizeof(float));
Buffer.BlockCopy(tgtIn, 0, combined, refIn.Length * sizeof(float), tgtIn.Length * sizeof(float));
preT = sw.Elapsed;
sw.Restart();
// OpenVINO 推理
var inputTensor = ovInferRequest.get_input_tensor();
inputTensor.shape = new Shape([2, 3, ModelSize, ModelSize]);
inputTensor.set_data(combined);
ovInferRequest.infer();
var outputTensor1 = ovInferRequest.get_tensor(ovModel.outputs()[0].get_any_name());
var outputTensor2 = ovInferRequest.get_tensor(ovModel.outputs()[1].get_any_name());
float[] fNorm = outputTensor1.get_data<float>((int)outputTensor1.size);
float[] fDebias = outputTensor2.get_data<float>((int)outputTensor2.size);
infT = sw.Elapsed;
sw.Restart();
resultMask = PostProcess(fNorm, fDebias, refMask, dxT, dyT, nwT, nhT, tgtOw, tgtOh);
postT = sw.Elapsed;
LogMessage($"前处理:{preT.TotalMilliseconds:F0}ms 推理:{infT.TotalMilliseconds:F0}ms 后处理:{postT.TotalMilliseconds:F0}ms");
});
if (resultMask != null)
{
using var overlay = OverlayMask(tgtImage, resultMask);
pictureBox2.Image = MatToBitmap(overlay);
resultMask.Dispose();
LogMessage("分割完成");
}
btnSegment.Enabled = true;
}
private void btnSave_Click(object sender, EventArgs e)
{
if (pictureBox2.Image == null) return;
using var sfd = new SaveFileDialog();
sfd.Filter = "PNG图片|*.png";
if (sfd.ShowDialog() == DialogResult.OK)
pictureBox2.Image.Save(sfd.FileName, ImageFormat.Png);
}
// ======================== 图像辅助 ========================
private void DisplayImage(Mat img, PictureBox pb, ref Rectangle rect)
{
float scale = Math.Min((float)pb.Width / img.Width, (float)pb.Height / img.Height);
int dw = (int)(img.Width * scale);
int dh = (int)(img.Height * scale);
rect = new Rectangle((pb.Width - dw) / 2, (pb.Height - dh) / 2, dw, dh);
using var resized = new Mat();
Cv2.Resize(img, resized, new OpenCvSharp.Size(dw, dh));
pb.Image = MatToBitmap(resized);
}
private Bitmap MatToBitmap(Mat mat)
{
using var ms = mat.ToMemoryStream();
return new Bitmap(ms);
}
private Mat LetterboxMask(Mat mask, int imgW, int imgH)
{
float scale = ModelSize / (float)Math.Max(imgW, imgH);
int nw = (int)(imgW * scale);
int nh = (int)(imgH * scale);
int dx = (ModelSize - nw) / 2;
int dy = (ModelSize - nh) / 2;
using var resized = new Mat();
Cv2.Resize(mask, resized, new OpenCvSharp.Size(nw, nh), 0, 0, InterpolationFlags.Nearest);
var canvas = new Mat(ModelSize, ModelSize, MatType.CV_8UC1, Scalar.All(0));
resized.CopyTo(canvas[new Rect(dx, dy, nw, nh)]);
return canvas;
}
private float[] LetterboxAndNormalize(Mat src, out int dx, out int dy, out int nw, out int nh, out int ow, out int oh)
{
ow = src.Width;
oh = src.Height;
float scale = ModelSize / (float)Math.Max(ow, oh);
nw = (int)(ow * scale);
nh = (int)(oh * scale);
dx = (ModelSize - nw) / 2;
dy = (ModelSize - nh) / 2;
using var resized = new Mat();
using var canvas = new Mat(ModelSize, ModelSize, MatType.CV_8UC3, Scalar.All(0));
using var rgb = new Mat();
Cv2.Resize(src, resized, new OpenCvSharp.Size(nw, nh));
resized.CopyTo(canvas[new Rect(dx, dy, nw, nh)]);
Cv2.CvtColor(canvas, rgb, ColorConversionCodes.BGR2RGB);
float[] data = new float[3 * ModelSize * ModelSize];
for (int c = 0; c < 3; c++)
{
int off = c * ModelSize * ModelSize;
for (int y = 0; y < ModelSize; y++)
for (int x = 0; x < ModelSize; x++)
{
float val = rgb.At<Vec3b>(y, x)[c] / 255f;
data[off + y * ModelSize + x] = (val - mean[c]) / std[c];
}
}
return data;
}
// ======================== 后处理核心 ========================
private Mat PostProcess(float[] fNorm, float[] fDebias, Mat refMask, int dx, int dy, int nw, int nh, int ow, int oh)
{
// 将一维数组转为二维特征列表 [H*W, C]
var refNorm = To2DFeatures(fNorm, 0);
var refDeb = To2DFeatures(fDebias, 0);
var tgtNorm = To2DFeatures(fNorm, 1);
var tgtDeb = To2DFeatures(fDebias, 1);
float[] refMask64 = ResizeMaskTo64(refMask);
float[] refProto = ComputePrototype(refDeb, refMask64);
int N = FeatH * FeatW;
float[] simFwd = new float[N];
for (int i = 0; i < N; i++)
{
float dot = 0;
for (int c = 0; c < FeatC; c++)
dot += tgtDeb[i][c] * refProto[c];
simFwd[i] = dot;
}
bool[] candidate = LocateCandidates(refDeb, refMask64, tgtDeb, simFwd);
int[] clusterLabels = ClusterFeatures(tgtNorm, 0.6f);
bool[] final64 = SeedAndAggregate(candidate, clusterLabels, tgtNorm, tgtDeb, refProto, simFwd);
float[] mask1024 = BilinearUpsample(final64, FeatH, FeatW, ModelSize, ModelSize);
return RecoverOriginalMask(mask1024, dx, dy, nw, nh, ow, oh);
}
private float[][] To2DFeatures(float[] data, int batchIdx)
{
int offset = batchIdx * FeatC * FeatH * FeatW;
var result = new float[FeatH * FeatW][];
for (int i = 0; i < FeatH * FeatW; i++)
{
var vec = new float[FeatC];
for (int c = 0; c < FeatC; c++)
vec[c] = data[offset + c * FeatH * FeatW + i];
result[i] = vec;
}
return result;
}
private float[] ResizeMaskTo64(Mat mask)
{
using var resized = new Mat();
Cv2.Resize(mask, resized, new OpenCvSharp.Size(FeatW, FeatH), 0, 0, InterpolationFlags.Nearest);
float[] result = new float[FeatH * FeatW];
for (int y = 0; y < FeatH; y++)
for (int x = 0; x < FeatW; x++)
result[y * FeatW + x] = resized.At<byte>(y, x) > 128 ? 1f : 0f;
return result;
}
private float[] ComputePrototype(float[][] features, float[] mask)
{
float[] proto = new float[FeatC];
int cnt = 0;
for (int i = 0; i < mask.Length; i++)
{
if (mask[i] > 0.5f)
{
for (int c = 0; c < FeatC; c++)
proto[c] += features[i][c];
cnt++;
}
}
if (cnt == 0) return proto;
float inv = 1f / cnt;
for (int c = 0; c < FeatC; c++) proto[c] *= inv;
float norm = (float)Math.Sqrt(proto.Sum(v => v * v));
if (norm > 1e-6f)
for (int c = 0; c < FeatC; c++) proto[c] /= norm;
return proto;
}
private bool[] LocateCandidates(float[][] refDeb, float[] refMask, float[][] tgtDeb, float[] simFwd)
{
int N = FeatH * FeatW;
float mean = simFwd.Average();
float std = (float)Math.Sqrt(simFwd.Select(v => (v - mean) * (v - mean)).Average());
float thr = mean + CandidateSigma * std;
bool[] fwd = new bool[N];
for (int i = 0; i < N; i++) fwd[i] = simFwd[i] > thr;
var fgIdx = new List<int>();
for (int i = 0; i < N; i++) if (refMask[i] > 0.5f) fgIdx.Add(i);
bool[] cand = new bool[N];
for (int i = 0; i < N; i++)
{
if (!fwd[i]) continue;
float maxDot = float.MinValue;
foreach (int j in fgIdx)
{
float dot = 0;
for (int c = 0; c < FeatC; c++) dot += refDeb[j][c] * tgtDeb[i][c];
if (dot > maxDot) maxDot = dot;
}
cand[i] = maxDot > 0;
}
return cand;
}
private int[] ClusterFeatures(float[][] features, float tau)
{
int N = features.Length;
const int K = 6;
int[] labels = new int[N];
float[][] normFeats = new float[N][];
for (int i = 0; i < N; i++)
{
float norm = (float)Math.Sqrt(features[i].Sum(v => v * v));
normFeats[i] = features[i].Select(v => v / (norm + 1e-8f)).ToArray();
}
Random rand = new Random(42);
float[][] centroids = new float[K][];
centroids[0] = normFeats[rand.Next(N)];
for (int k = 1; k < K; k++)
{
float[] dists = new float[N];
float sum = 0;
for (int i = 0; i < N; i++)
{
float minDist = float.MaxValue;
for (int c = 0; c < k; c++)
{
float dist = 0;
for (int d = 0; d < FeatC; d++)
{
float diff = normFeats[i][d] - centroids[c][d];
dist += diff * diff;
}
if (dist < minDist) minDist = dist;
}
dists[i] = minDist;
sum += minDist;
}
float r = (float)rand.NextDouble() * sum;
float cum = 0;
for (int i = 0; i < N; i++)
{
cum += dists[i];
if (cum >= r) { centroids[k] = normFeats[i]; break; }
}
}
for (int iter = 0; iter < 10; iter++)
{
for (int i = 0; i < N; i++)
{
int best = 0;
float bestDist = float.MaxValue;
for (int k = 0; k < K; k++)
{
float dist = 0;
for (int d = 0; d < FeatC; d++)
{
float diff = normFeats[i][d] - centroids[k][d];
dist += diff * diff;
}
if (dist < bestDist) { bestDist = dist; best = k; }
}
labels[i] = best;
}
int[] counts = new int[K];
float[][] sums = new float[K][];
for (int k = 0; k < K; k++) sums[k] = new float[FeatC];
for (int i = 0; i < N; i++)
{
int k = labels[i];
counts[k]++;
for (int d = 0; d < FeatC; d++) sums[k][d] += normFeats[i][d];
}
for (int k = 0; k < K; k++)
{
if (counts[k] > 0)
{
float inv = 1f / counts[k];
for (int d = 0; d < FeatC; d++) centroids[k][d] = sums[k][d] * inv;
}
}
}
int[] cnt = new int[K];
for (int i = 0; i < N; i++) if (labels[i] >= 0) cnt[labels[i]]++;
int[] map = new int[K];
for (int i = 0; i < K; i++) map[i] = -1;
int curLbl = 0;
for (int i = 0; i < N; i++)
{
int l = labels[i];
if (cnt[l] < 30) { labels[i] = -1; continue; }
if (map[l] == -1) map[l] = curLbl++;
labels[i] = map[l];
}
return labels;
}
private bool[] SeedAndAggregate(bool[] candidate, int[] labels, float[][] tgtNorm, float[][] tgtDeb, float[] refProto, float[] simFwd)
{
int N = FeatH * FeatW;
bool[] matched = new bool[N];
for (int i = 0; i < N; i++) if (candidate[i] && labels[i] >= 0) matched[i] = true;
var lblToK = new Dictionary<int, int>();
int K = 0;
for (int i = 0; i < N; i++) if (matched[i] && !lblToK.ContainsKey(labels[i])) lblToK[labels[i]] = K++;
if (K == 0) return new bool[N];
int[] mCounts = new int[K];
float[][] debSum = new float[K][];
for (int k = 0; k < K; k++) debSum[k] = new float[FeatC];
for (int i = 0; i < N; i++)
{
if (matched[i] && lblToK.TryGetValue(labels[i], out int k))
{
mCounts[k]++;
for (int c = 0; c < FeatC; c++) debSum[k][c] += tgtDeb[i][c];
}
}
float[][] debProtos = new float[K][];
for (int k = 0; k < K; k++)
{
debProtos[k] = new float[FeatC];
if (mCounts[k] > 0)
{
float inv = 1f / mCounts[k];
for (int c = 0; c < FeatC; c++) debProtos[k][c] = debSum[k][c] * inv;
}
}
int[] vCounts = new int[K];
for (int i = 0; i < N; i++) if (labels[i] >= 0 && lblToK.TryGetValue(labels[i], out int k)) vCounts[k]++;
float[] crossSim = new float[K];
for (int k = 0; k < K; k++)
{
float d = 0;
for (int c = 0; c < FeatC; c++) d += debProtos[k][c] * refProto[c];
crossSim[k] = d;
}
int seedK = 0;
float mx = float.MinValue;
for (int k = 0; k < K; k++) if (crossSim[k] > mx) { mx = crossSim[k]; seedK = k; }
float[][] normSum = new float[K][];
for (int k = 0; k < K; k++) normSum[k] = new float[FeatC];
for (int i = 0; i < N; i++)
if (labels[i] >= 0 && lblToK.TryGetValue(labels[i], out int k))
for (int c = 0; c < FeatC; c++) normSum[k][c] += tgtNorm[i][c];
float[][] normProtos = new float[K][];
for (int k = 0; k < K; k++)
{
normProtos[k] = new float[FeatC];
if (vCounts[k] > 0)
{
float inv = 1f / vCounts[k];
for (int c = 0; c < FeatC; c++) normProtos[k][c] = normSum[k][c] * inv;
}
float n = (float)Math.Sqrt(normProtos[k].Sum(v => v * v)) + 1e-8f;
for (int c = 0; c < FeatC; c++) normProtos[k][c] /= n;
}
float[] intraSim = new float[K];
float[] seedP = normProtos[seedK];
for (int k = 0; k < K; k++)
{
float d = 0;
for (int c = 0; c < FeatC; c++) d += seedP[c] * normProtos[k][c];
intraSim[k] = d;
}
float[] crossSum = new float[K];
for (int i = 0; i < N; i++)
if (labels[i] >= 0 && lblToK.TryGetValue(labels[i], out int k))
crossSum[k] += simFwd[i];
float[] crossScore = new float[K];
for (int k = 0; k < K; k++) crossScore[k] = vCounts[k] > 0 ? crossSum[k] / vCounts[k] : 0f;
float[] areaW = new float[K];
for (int k = 0; k < K; k++) areaW[k] = vCounts[k] > 0 ? (float)mCounts[k] / vCounts[k] : 0f;
areaW[seedK] = 1.0f;
float[] combined = new float[K];
for (int k = 0; k < K; k++) combined[k] = crossScore[k] * intraSim[k] * areaW[k];
float meanSim = simFwd.Average();
float stdSim = (float)Math.Sqrt(simFwd.Select(v => (v - meanSim) * (v - meanSim)).Average());
float pixelThresh = meanSim + PixelSimSigma * stdSim;
float bestComb = combined.Max();
float clusterThresh = bestComb * ClusterScoreRatio;
bool[] final = new bool[N];
for (int i = 0; i < N; i++)
{
if (labels[i] >= 0 && lblToK.TryGetValue(labels[i], out int k))
if (combined[k] > clusterThresh && simFwd[i] > pixelThresh)
final[i] = true;
}
return final;
}
private float[] BilinearUpsample(bool[] mask, int sh, int sw, int dh, int dw)
{
float[] res = new float[dh * dw];
for (int ty = 0; ty < dh; ty++)
for (int tx = 0; tx < dw; tx++)
{
float sx = (tx + 0.5f) * sw / dw - 0.5f;
float sy = (ty + 0.5f) * sh / dh - 0.5f;
int x0 = Math.Max(0, (int)Math.Floor(sx));
int x1 = Math.Min(sw - 1, x0 + 1);
int y0 = Math.Max(0, (int)Math.Floor(sy));
int y1 = Math.Min(sh - 1, y0 + 1);
float dx = sx - x0;
float dy = sy - y0;
float v00 = mask[y0 * sw + x0] ? 1f : 0f;
float v10 = mask[y0 * sw + x1] ? 1f : 0f;
float v01 = mask[y1 * sw + x0] ? 1f : 0f;
float v11 = mask[y1 * sw + x1] ? 1f : 0f;
res[ty * dw + tx] = (1 - dx) * (1 - dy) * v00 + dx * (1 - dy) * v10 + (1 - dx) * dy * v01 + dx * dy * v11;
}
return res;
}
private Mat RecoverOriginalMask(float[] m1024, int dx, int dy, int nw, int nh, int ow, int oh)
{
var full = new Mat(ModelSize, ModelSize, MatType.CV_8UC1);
for (int y = 0; y < ModelSize; y++)
for (int x = 0; x < ModelSize; x++)
full.At<byte>(y, x) = m1024[y * ModelSize + x] > 0.5f ? (byte)255 : (byte)0;
var cropped = new Mat(full, new Rect(dx, dy, nw, nh));
var result = new Mat();
Cv2.Resize(cropped, result, new OpenCvSharp.Size(ow, oh), 0, 0, InterpolationFlags.Nearest);
full.Dispose();
cropped.Dispose();
return result;
}
private Mat OverlayMask(Mat src, Mat mask)
{
Mat overlay = src.Clone();
for (int y = 0; y < overlay.Height; y++)
for (int x = 0; x < overlay.Width; x++)
if (mask.At<byte>(y, x) > 128)
{
Vec3b color = overlay.At<Vec3b>(y, x);
color[2] = (byte)Math.Min(255, color[2] + 100);
overlay.Set(y, x, color);
}
return overlay;
}
private void LogMessage(string msg)
{
if (InvokeRequired) Invoke(new Action(() => textBox1.AppendText(msg + Environment.NewLine)));
else textBox1.AppendText(msg + Environment.NewLine);
}
// ---------- 鼠标绘制 ----------
private void PictureBox1_MouseDown(object sender, MouseEventArgs e)
{
if (refImage == null || !imageRect.Contains(e.Location)) return;
if (e.Button == MouseButtons.Left)
{
polyPoints.Add(e.Location);
pictureBox1.Invalidate();
}
else if (e.Button == MouseButtons.Right && polyPoints.Count > 0)
{
int idx = -1;
float minDist = float.MaxValue;
for (int i = 0; i < polyPoints.Count; i++)
{
float d = (polyPoints[i].X - e.X) * (polyPoints[i].X - e.X) + (polyPoints[i].Y - e.Y) * (polyPoints[i].Y - e.Y);
if (d < minDist) { minDist = d; idx = i; }
}
if (idx >= 0) polyPoints.RemoveAt(idx);
pictureBox1.Invalidate();
}
}
private void PictureBox1_Paint(object sender, PaintEventArgs e)
{
if (refMask != null && !refMask.Empty() && imageRect != Rectangle.Empty)
{
float scale = ModelSize / (float)Math.Max(refImage.Width, refImage.Height);
int nw = (int)(refImage.Width * scale);
int nh = (int)(refImage.Height * scale);
int dx = (ModelSize - nw) / 2;
int dy = (ModelSize - nh) / 2;
using var roi = new Mat(refMask, new Rect(dx, dy, nw, nh));
using var resized = new Mat();
Cv2.Resize(roi, resized, new OpenCvSharp.Size(imageRect.Width, imageRect.Height), 0, 0, InterpolationFlags.Nearest);
Cv2.Threshold(resized, resized, 128, 255, ThresholdTypes.Binary);
Cv2.FindContours(resized, out var contours, out _, RetrievalModes.External, ContourApproximationModes.ApproxSimple);
using (Pen pen = new Pen(Color.Red, 2))
foreach (var cnt in contours)
if (cnt.Length >= 2)
{
var pts = cnt.Select(p => new System.Drawing.Point(imageRect.X + p.X, imageRect.Y + p.Y)).ToArray();
e.Graphics.DrawPolygon(pen, pts);
}
}
if (polyPoints.Count > 0)
{
using Pen pen = new Pen(Color.Green, 3);
using Brush brush = new SolidBrush(Color.Lime);
foreach (var pt in polyPoints) e.Graphics.FillEllipse(brush, pt.X - 4, pt.Y - 4, 8, 8);
if (polyPoints.Count > 1) e.Graphics.DrawLines(pen, polyPoints.ToArray());
}
}
}
}
Form1.designer.cs
cs
namespace INSID3_WinForm
{
partial class Form1
{
private System.ComponentModel.IContainer components = null;
private System.Windows.Forms.PictureBox pictureBox1;
private System.Windows.Forms.PictureBox pictureBox2;
private System.Windows.Forms.Button btnLoadRef;
private System.Windows.Forms.Button btnLoadTarget;
private System.Windows.Forms.Button btnLoadMask;
private System.Windows.Forms.Button btnGenMask;
private System.Windows.Forms.Button btnSegment;
private System.Windows.Forms.Button btnSave;
private System.Windows.Forms.TextBox textBox1;
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
private void InitializeComponent()
{
pictureBox1 = new PictureBox();
pictureBox2 = new PictureBox();
btnLoadRef = new Button();
btnLoadTarget = new Button();
btnLoadMask = new Button();
btnGenMask = new Button();
btnSegment = new Button();
btnSave = new Button();
textBox1 = new TextBox();
((System.ComponentModel.ISupportInitialize)pictureBox1).BeginInit();
((System.ComponentModel.ISupportInitialize)pictureBox2).BeginInit();
SuspendLayout();
//
// pictureBox1
//
pictureBox1.BackColor = Color.White;
pictureBox1.BorderStyle = BorderStyle.FixedSingle;
pictureBox1.Location = new Point(12, 12);
pictureBox1.Name = "pictureBox1";
pictureBox1.Size = new Size(555, 518);
pictureBox1.SizeMode = PictureBoxSizeMode.Zoom;
pictureBox1.TabIndex = 0;
pictureBox1.TabStop = false;
//
// pictureBox2
//
pictureBox2.BackColor = Color.White;
pictureBox2.BorderStyle = BorderStyle.FixedSingle;
pictureBox2.Location = new Point(596, 12);
pictureBox2.Name = "pictureBox2";
pictureBox2.Size = new Size(555, 518);
pictureBox2.SizeMode = PictureBoxSizeMode.Zoom;
pictureBox2.TabIndex = 1;
pictureBox2.TabStop = false;
//
// btnLoadRef
//
btnLoadRef.Location = new Point(13, 580);
btnLoadRef.Name = "btnLoadRef";
btnLoadRef.Size = new Size(90, 30);
btnLoadRef.TabIndex = 2;
btnLoadRef.Text = "加载参考图";
btnLoadRef.UseVisualStyleBackColor = true;
btnLoadRef.Click += btnLoadRef_Click;
//
// btnLoadTarget
//
btnLoadTarget.Location = new Point(109, 580);
btnLoadTarget.Name = "btnLoadTarget";
btnLoadTarget.Size = new Size(90, 30);
btnLoadTarget.TabIndex = 3;
btnLoadTarget.Text = "加载目标图";
btnLoadTarget.UseVisualStyleBackColor = true;
btnLoadTarget.Click += btnLoadTarget_Click;
//
// btnLoadMask
//
btnLoadMask.Location = new Point(205, 580);
btnLoadMask.Name = "btnLoadMask";
btnLoadMask.Size = new Size(90, 30);
btnLoadMask.TabIndex = 4;
btnLoadMask.Text = "加载掩码";
btnLoadMask.UseVisualStyleBackColor = true;
btnLoadMask.Click += btnLoadMask_Click;
//
// btnGenMask
//
btnGenMask.Location = new Point(301, 580);
btnGenMask.Name = "btnGenMask";
btnGenMask.Size = new Size(110, 30);
btnGenMask.TabIndex = 5;
btnGenMask.Text = "从多边形生成掩码";
btnGenMask.UseVisualStyleBackColor = true;
btnGenMask.Click += btnGenMask_Click;
//
// btnSegment
//
btnSegment.Location = new Point(12, 641);
btnSegment.Name = "btnSegment";
btnSegment.Size = new Size(140, 30);
btnSegment.TabIndex = 6;
btnSegment.Text = "开始分割";
btnSegment.UseVisualStyleBackColor = true;
btnSegment.Click += btnSegment_Click;
//
// btnSave
//
btnSave.Location = new Point(158, 641);
btnSave.Name = "btnSave";
btnSave.Size = new Size(100, 30);
btnSave.TabIndex = 7;
btnSave.Text = "保存结果";
btnSave.UseVisualStyleBackColor = true;
btnSave.Click += btnSave_Click;
//
// textBox1
//
textBox1.Location = new Point(596, 559);
textBox1.Multiline = true;
textBox1.Name = "textBox1";
textBox1.ScrollBars = ScrollBars.Vertical;
textBox1.Size = new Size(555, 134);
textBox1.TabIndex = 8;
//
// Form1
//
AutoScaleDimensions = new SizeF(7F, 17F);
AutoScaleMode = AutoScaleMode.Font;
ClientSize = new Size(1167, 705);
Controls.Add(textBox1);
Controls.Add(btnSave);
Controls.Add(btnSegment);
Controls.Add(btnGenMask);
Controls.Add(btnLoadMask);
Controls.Add(btnLoadTarget);
Controls.Add(btnLoadRef);
Controls.Add(pictureBox2);
Controls.Add(pictureBox1);
Name = "Form1";
Text = "INSID3 分割 (ViT-S/16)";
Load += Form1_Load;
((System.ComponentModel.ISupportInitialize)pictureBox1).EndInit();
((System.ComponentModel.ISupportInitialize)pictureBox2).EndInit();
ResumeLayout(false);
PerformLayout();
}
}
}
以上是全部完整代码