使用 React Native 和 Tensorflow.js 对实时视频进行预测
我已经设置了我的 React Native 应用,所有 unimodules 和软件包的安装和配置都按预期工作。依赖关系等没有问题。
现在我想实现一个 TensorFlow 模型,这个模型是我通过 Google 的 teachablemachine 训练的,我不明白如何将它与相机一起使用,因为我想实时处理帧,就像 TensorFlow React Native API 文档所说的那样。 这是我在网上找到的代码,我会用我的模型更改它,但问题是它只在用户拍照时检测模型。我希望我的相机能够实时理解模型,就像人脸检测、条形码扫描仪一样。
Main.js
import React, {useRef, useEffect, useState} from 'react';
import {View, StyleSheet, Dimensions} from 'react-native';
import {
getModel,
convertBase64ToTensor,
startPrediction,
} from '../../helpers/tensor-helper';
import {Camera} from 'expo-camera';
import * as tf from '@tensorflow/tfjs';
import '@tensorflow/tfjs-react-native';
import {
cameraWithTensors,
bundleResourceIO,
} from '@tensorflow/tfjs-react-native';
const TensorCamera = cameraWithTensors(Camera);
const Main = () => {
const [model, setModel] = useState();
const [prediction, setPredictions] = useState();
const cameraRef = useRef(null);
let requestAnimationFrameId = 0;
let frameCount = 0;
let makePredictionsEveryNFrame = 1;
const modelJson = require('../../model/model.json');
const modelWeights = require('../../model/weights.bin');
const getModel = async () => {
try {
await tf.ready();
const model = await tf.loadLayersModel(
bundleResourceIO(modelJson, modelWeights),
);
return model;
} catch (error) {
console.log('Could not load model', error);
}
};
useEffect(() => {
setModel(getModel());
}, []);
useEffect(() => {
return () => {
cancelAnimationFrame(requestAnimationFrameId);
};
}, [requestAnimationFrameId]);
const handleCameraStream = tensors => {
if (!tensors) {
console.log('Image not found!');
}
const loop = async () => {
if (frameCount % makePredictionsEveryNFrame === 0) {
const imageTensor = tensors.next().value;
if (model) {
const results = await startPrediction(model, imageTensor);
setPredictions(results);
console.log(`prediction: ${JSON.stringify(prediction)}`);
}
tf.dispose(tensors);
}
frameCount += 1;
frameCount = frameCount % makePredictionsEveryNFrame;
requestAnimationFrameId = requestAnimationFrame(loop);
};
console.log(`prediction: ${JSON.stringify(prediction)}`);
loop();
console.log(`prediction: ${JSON.stringify(prediction)}`);
};
let textureDims;
if (Platform.OS === 'ios') {
textureDims = {
height: 1920,
width: 1080,
};
} else {
textureDims = {
height: 1200,
width: 1600,
};
}
return (
<View style={styles.container}>
<TensorCamera
ref={cameraRef}
// Standard Camera props
style={styles.camera}
type={Camera.Constants.Type.back}
flashMode={Camera.Constants.FlashMode.off}
// Tensor related props
cameraTextureHeight={textureDims.height}
cameraTextureWidth={textureDims.width}
resizeHeight={50}
resizeWidth={50}
resizeDepth={3}
onReady={tensors => handleCameraStream(tensors)}
autorender={true}
/>
</View>
);
};
export default Main;
tensorhelper.js:
import * as tf from '@tensorflow/tfjs';
import {bundleResourceIO, decodeJpeg} from '@tensorflow/tfjs-react-native';
import * as tfc from '@tensorflow/tfjs-core';
import {Base64Binary} from '../utils/utils';
const BITMAP_DIMENSION = 224;
const modelJson = require('../model/model.json');
const modelWeights = require('../model/weights.bin');
// 0: channel from JPEG-encoded image
// 1: gray scale
// 3: RGB image
const TENSORFLOW_CHANNEL = 3;
export const getModel = async () => {
try {
await tf.ready();
const model = await tf.loadLayersModel(
bundleResourceIO(modelJson, modelWeights),
);
return model;
} catch (error) {
console.log('Could not load model', error);
}
};
export const convertBase64ToTensor = async base64 => {
try {
const uIntArray = Base64Binary.decode(base64);
// decode a JPEG-encoded image to a 3D Tensor of dtype
const decodedImage = decodeJpeg(uIntArray, 3);
// reshape Tensor into a 4D array
return decodedImage.reshape([
1,
BITMAP_DIMENSION,
BITMAP_DIMENSION,
TENSORFLOW_CHANNEL,
]);
} catch (error) {
console.log('Could not convert base64 string to tesor', error);
}
};
export const startPrediction = async (model, tensor) => {
try {
// predict against the model
const output = await model.predict(tensor);
// return typed array
return tfc.tensor().dataSync();
} catch (error) {
console.log('Error predicting from tesor image', error);
}
};
我编辑了文件并得到以下输出:
LOG prediction: undefined
LOG prediction: undefined
WARN Possible Unhandled Promise Rejection (id: 1):
Error: When using targetShape.depth=3, targetShape.width must be a multiple of 4. Alternatively do not call detectGLCapabilities()
fromTexture@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:267911:24
nextFrameGenerator$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:268598:67
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32
loop$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126503:43
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26610:30
http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26640:19
tryCallTwo@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31390:9
doResolve@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31554:25
Promise@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31413:14
callInvokeWithMethodAndArg@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26639:33
enqueue@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26644:157
async@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26661:69
loop@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126494:42
handleCameraStream@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126535:11
onReady@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126572:34
onGLContextCreate$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:268641:37
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32
__callImmediates@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3317:35
http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3096:34
__guard@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3300:15
flushedQueue@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3095:21
flushedQueue@[native code]
invokeCallbackAndReturnFlushedQueue@[native code]```
好的,我之前做过这个(去年),所以我可能忘记了一些东西,但你可以参考 这里 的代码,使用 Expo 并对实时视频源进行预测,请原谅非常糟糕的代码(我现在写了更好的代码)。
无论如何,这是一个关于你需要做的事情的简单更新,主要是关于
handleCameraStream()
。您将需要运行两个不同的
useEffect
钩子,一个用于最初加载模型,另一个用于取消动画帧,当不断进行预测时,您将需要使用这些动画帧。
将模型设置为状态,然后您可以使用
model
从文件中的任何部分访问它。我也对
predictions
做了同样的操作。
我还添加了每
N
帧进行预测的功能,通过将
makePredictionsEveryNFrames
设置为
1
,它基本上将
TensorCamera
中的张量传递给函数以对每一帧进行预测。进行预测后,您还需要使用
tf.dispose()
处理张量。此函数
loop()
需要无限运行,以连续对即将到来的帧进行预测。
const Main = () => {
const [model, setModel] = useState();
const [predictions, setPredictions] = useState();
let requestAnimationFrameId = 0;
let frameCount = 0;
let makePredictionsEveryNFrames = 1;
useEffect(() => {
setModel(await getModel());
}, []);
useEffect(() => {
return () => {
cancelAnimationFrame(requestAnimationFrameId);
};
}, [requestAnimationFrameId]);
const handleCameraStream = (tensors) => {
if (!tensors) {
console.log("Image not found!");
}
const loop = async () => {
if (frameCount % makePredictionsEveryNFrame === 0) {
const imageTensor = tensors.next().value;
if (model) {
const results = await startPrediction(model, imageTensor);
setPredictions(results);
}
tf.dispose(tensors);
}
frameCount += 1;
frameCount = frameCount % makePredictionsEveryNFrame;
requestAnimationFrameId = requestAnimationFrame(loop);
};
loop();
};
}
我更新了
getModel()
以在加载模型时返回模型,这样我们就可以将其设置为状态。
export const getModel = async () => {
try {
await tf.ready();
const model = await tf.loadLayersModel(
bundleResourceIO(modelJson, modelWeights)
);
return model;
} catch (error) {
console.log("Could not load model", error);
}
};
因此,您只需要访问
predictions
并呈现它们即可。
编辑 1 :
回顾代码,
startPredictions
函数存在一些问题,您实际上并没有从模型中返回预测,您需要一次对一批图像进行预测。
export const startPrediction = async (model, tensor) => {
try {
// predict against the model
const output = await model.predict(tensor, {batchSize: 1});
return output.dataSync();
} catch (error) {
console.log('Error predicting from tesor image', error);
}
};
编辑 2 :
查看模型输入形状
此处
预期输入形状为
(batch_size, 224,224,3)
。但您传入的图像为
(batch_size, 50,50,3)
。因此,请尝试将参数
resizeWidth
和
resizeHeight
更新为
224
。
<TensorCamera
ref={cameraRef}
// Standard Camera props
style={styles.camera}
type={Camera.Constants.Type.back}
flashMode={Camera.Constants.FlashMode.off}
// Tensor related props
cameraTextureHeight={textureDims.height}
cameraTextureWidth={textureDims.width}
resizeHeight={224}
resizeWidth={224}
resizeDepth={3}
onReady={tensors => handleCameraStream(tensors)}
autorender={true}
/>
除此之外,您还需要将 3D 张量转换为 4D 张量,然后再将其传递给模型进行预测(也称为扩展其中一个维度)。将
handleCameraStream
函数也更新为这个。张量的大小为
(224,224,3)
,扩展第一个维度后,它将是
(1,224,224,3)
。
const handleCameraStream = (tensors) => {
if (!tensors) {
console.log("Image not found!");
}
const loop = async () => {
if (frameCount % makePredictionsEveryNFrame === 0) {
const imageTensor = tensors.next().value;
if (model) {
const imageTensorReshaped = imageTensor.expandDims(axis=0);
const results = await startPrediction(model, imageTensorReshaped);
setPredictions(results);
}
tf.dispose(imageTensorReshaped);
}
frameCount += 1;
frameCount = frameCount % makePredictionsEveryNFrame;
requestAnimationFrameId = requestAnimationFrame(loop);
};
loop();
};