基于TensorFlow.js和COCO-SsD模型的实时目标检测网络应用程序

实现流程

访问用户的桌面录屏并且显示视频源（位置居中）。
对视频源进行实时目标检测。
在检测到的目标周围绘制边界框，并用它们的类别和检测置信度进行标记。
在视频源下方显示一个唯一检测到的目标列表，显示目标类别和首次检测到的时间。
确保每个目标类别只列出一次，不管它被检测到多少次。
使用2帧每秒的检测频率来平衡性能和响应性。
包括屏幕录制访问和模型加载的错误处理。
为应用程序设计一个干净、现代的外观，并具有响应式设计。
将所有必要的HTML、CSS和JavaScriptt包含在一个单一的自包含文件中。
为TensorFlow.js和COCO-SSD模型库使用CDN链接。请提供完整可运行的HTML文件，其中包含内联CSS和JavaScript。

效果图

代码

html 复制代码

<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>实时目标检测</title>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/coco-ssd"></script>
    <style>
        body {
            font-family: Arial, sans-serif;
            display: flex;
            flex-direction: column;
            align-items: center;
            background-color: #f0f0f0;
            margin: 0;
            padding: 20px;
        }
        h1 {
            color: #333;
        }
        #videoContainer {
            position: relative;
            margin-bottom: 20px;
        }
        #output {
            position: absolute;
            top: 0;
            left: 0;
        }
        #detectionsList {
            background-color: white;
            border-radius: 8px;
            padding: 20px;
            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            max-width: 600px;
            width: 100%;
        }
        #detectionsList h2 {
            margin-top: 0;
        }
        #detectionsList ul {
            list-style-type: none;
            padding: 0;
        }
        #detectionsList li {
            margin-bottom: 10px;
            padding: 10px;
            background-color: #f9f9f9;
            border-radius: 4px;
        }
        #error {
            color: red;
            margin-top: 20px;
        }
    </style>
</head>
<body>
    <h1>实时目标检测</h1>
    <div id="videoContainer">
        <video id="video" width="640" height="480" autoplay muted></video>
        <canvas id="output" width="640" height="480"></canvas>
    </div>
    <div id="detectionsList">
        <h2>检测到的目标</h2>
        <ul id="detectedObjects"></ul>
    </div>
    <div id="error"></div>

    <script>
        const video = document.getElementById('video');
        const output = document.getElementById('output');
        const ctx = output.getContext('2d');
        const detectedObjects = document.getElementById('detectedObjects');
        const errorDiv = document.getElementById('error');

        let model;
        let detections = new Map();

        async function setupCamera() {
            try {
                const stream = await navigator.mediaDevices.getDisplayMedia({ video: true });
                video.srcObject = stream;
                return new Promise((resolve) => {
                    video.onloadedmetadata = () => {
                        resolve(video);
                    };
                });
            } catch (error) {
                errorDiv.textContent = '无法访问屏幕录制：' + error.message;
                throw error;
            }
        }

        async function loadModel() {
            try {
                model = await cocoSsd.load();
            } catch (error) {
                errorDiv.textContent = '无法加载模型：' + error.message;
                throw error;
            }
        }

        async function detectObjects() {
            try {
                const predictions = await model.detect(video);
                ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height);
                ctx.drawImage(video, 0, 0, ctx.canvas.width, ctx.canvas.height);

                predictions.forEach(prediction => {
                    const [x, y, width, height] = prediction.bbox;
                    ctx.strokeStyle = '#00FFFF';
                    ctx.lineWidth = 2;
                    ctx.strokeRect(x, y, width, height);
                    
                    ctx.fillStyle = '#00FFFF';
                    ctx.font = '16px Arial';
                    ctx.fillText(`${prediction.class} (${Math.round(prediction.score * 100)}%)`, x, y > 10 ? y - 5 : 10);

                    if (!detections.has(prediction.class)) {
                        const timestamp = new Date().toLocaleTimeString();
                        detections.set(prediction.class, timestamp);
                        updateDetectionsList();
                    }
                });
            } catch (error) {
                console.error('检测对象时出错：', error);
            }

            setTimeout(detectObjects, 500); // 每2秒检测一次 (2 FPS)
        }

        function updateDetectionsList() {
            detectedObjects.innerHTML = '';
            detections.forEach((timestamp, objectClass) => {
                const li = document.createElement('li');
                li.textContent = `${objectClass} - 首次检测时间: ${timestamp}`;
                detectedObjects.appendChild(li);
            });
        }

        async function run() {
            try {
                await setupCamera();
                await loadModel();
                detectObjects();
            } catch (error) {
                console.error('应用程序初始化失败：', error);
            }
        }

        run();
    </script>
</body>
</html>