Question

如何使用 JavaScript 中的 Teachable Machine 在视频而不是网络摄像头上运行姿势估计推理？

2021-08-04

806

javascript html5-video tensorflow.js pose-estimation

我正在尝试运行一个在视频文件上训练过的姿势分类模型，但除了他们提供的用于在网络摄像头输入上运行推理的文档之外，我似乎无法弄清楚如何让它与任何其他东西一起工作。我正在尝试使用他们提供的 javascript 示例代码运行该模型。我添加了一个 html 视频元素并指定了高度和宽度参数，但我不知道如何在异步函数 predict() 中使预测工作。我注释掉了原始代码中提供的网络摄像头设置，并将所有 webcam.canvas 对象更改为 video.canvas。我对 javascript 还很陌生，但该模型仅导出为 tensorflow.js，所以我必须使用它:)

这是我的代码：

<video id="video" style="width: 540; height: 360;" muted>
    <source src="GTSolo2.mp4" type="video/mp4" />
</video>
<div>Teachable Machine Pose Model</div>
<br>
<button type="button" onclick="init()">Start</button>
<div><canvas id="canvas"></canvas></div>
<div id="label-container"></div>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/[email protected]/dist/tf.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@teachablemachine/[email protected]/dist/teachablemachine-pose.min.js"></script>
<!-- AJA adds the osc lib-->
<script type="text/javascript" src="node_modules/osc-js/lib/osc.min.js"></script>
<script type="text/javascript">


    // More API functions here:
    // https://github.com/googlecreativelab/teachablemachine-community/tree/master/libraries/pose

    // the link to your model provided by Teachable Machine export panel
    const URL = "https://teachablemachine.withgoogle.com/models/rpV37-6o2/";
    let model, webcam, ctx, labelContainer, maxPredictions, video;

    async function init() {
        const modelURL = URL + "model.json";
        const metadataURL = URL + "metadata.json";

        // load the model and metadata
        // Refer to tmImage.loadFromFiles() in the API to support files from a file picker
        // Note: the pose library adds a tmPose object to your window (window.tmPose)
        model = await tmPose.load(modelURL, metadataURL);
        maxPredictions = model.getTotalClasses();

        // Convenience function to setup a webcam
        // const width = 540;
        // const height = 360;
        // const flip = false; // whether to flip the webcam
        // webcam = new tmPose.Webcam(width, height, flip); // width, height, flip - this should actually ideally be a sqaure
        // await webcam.setup(); // request access to the webcam
        // await webcam.play();

        video = document.getElementById("video");
        video.width = 540;
        video.height = 360;
        video.addEventListener('play', loop );
        video.play();
        window.requestAnimationFrame(loop);

        // append/get elements to the DOM
        const canvas = document.getElementById("canvas");
        //canvas.width = width; canvas.height = height;
        ctx = canvas.getContext("2d");
        labelContainer = document.getElementById("label-container");
        for (let i = 0; i < maxPredictions; i++) { // and class labels
            labelContainer.appendChild(document.createElement("div"));
        }
    }

    async function loop(timestamp) {
        //webcam.update();
        await predict();
        window.requestAnimationFrame(loop);
    }

    async function predict() {
        // Prediction #1: run input through posenet
        // estimatePose can take in an image, video or canvas html element
        const { pose, posenetOutput } = await model.estimatePose(video.canvas); //EDIT THIS ELEMENT TO RUN A VIDEO INSTEAD OF WEBCAM

        // Prediction 2: run input through teachable machine classification model
        const prediction = await model.predict(posenetOutput); // Add maxPredictions here?

        for (let i = 0; i < maxPredictions; i++) {
            const classPrediction =
                prediction[i].className + ": " + prediction[i].probability.toFixed(2);
        }

        // finally draw the poses
        drawPose(pose);
    }

    function drawPose(pose) {
        if (video.canvas) {
            ctx.drawImage(video.canvas, 0, 0);
            // draw the keypoints and skeleton
            if (pose) {
                const minPartConfidence = 0.5;
                tmPose.drawKeypoints(pose.keypoints, minPartConfidence, ctx);
                tmPose.drawSkeleton(pose.keypoints, minPartConfidence, ctx);
            }
        }
    }
</script>

这是抛出的错误：

teachablemachine-pose.min.js:58 Uncaught (in promise) TypeError: Cannot read property 'height' of undefined
at y (teachablemachine-pose.min.js:58)
at Object.e.padAndResizeTo (teachablemachine-pose.min.js:58)
at t. (teachablemachine-pose.min.js:51)
at teachablemachine-pose.min.js:34
at Object.next (teachablemachine-pose.min.js:34)
at teachablemachine-pose.min.js:34
at new Promise ()
at r (teachablemachine-pose.min.js:34)
at t.estimatePoseOutputs (teachablemachine-pose.min.js:51)
at t. (teachablemachine-pose.min.js:51)

Answer 1

HTMLVideoElement 上没有属性 canvas 。您可能只需要直接传入视频元素。我猜原始网络摄像头对象对画布元素有所敬意，但如果您想使用视频，我认为它看起来会像这样：

const { pose, posenetOutput } = await model.estimatePose(video);

Answer 2

<video id="video" style="width: 540; height: 360;" muted>
    <source src="GTSolo2.mp4" type="video/mp4" />
</video>

视频的src属性值不能设置为“GTSolo2.mp4”。

<input id="selectVideo" type="file" accept="video/*"/>
<video id="video" src="" preload autoplay loop muted controls> 
</video>
<script>
const video = document.getElementById('video');
const selectVideo = document.getElementById('selectVideo');
selectVideo.onchange = function (event) {
  var target = event.target || window.event.srcElement;
  var files = target.files;
  if (files && files.length) {
    var file = files[0];
    if (video.canPlayType(file.type)!="")
        video.src = URL.createObjectURL(file);
    else
        console.log("The file type is not supported.");
  }
}
</script>

请参考我的代码。