How I can use it with Sentis 2.1.1
Is it possible to update it for Unity Sentis 2.1.1?
What I need to modify?
Hello, did you manage to make it work ?
Hi, unfortunately not yet.
Hello, i think i managed to do it (somehow), i don’t have my pc with me right now to share the code, i’ll try to do so later … just remind me if I forget .
Hi Saydrk217, any news for the code?
Here is the full code from a project I am working on where I need to implement a custom object detector but it should work also with Yolov8, the main differences between the earlier versions of sentis and this new one are :
- change tensofloat and tensorint with tensor and tensor
- read from tensors using the method ReadbackanClone() after the PeekOutput()
- change the functional method as shown in the code ( there are several examples of how to set this up ) because the new on uses a different logic !
- there are some other changes so you should definitely check the full changelog in the official sentis webpage
Good luck :)
using System.Collections.Generic;
using Unity.Sentis;
using UnityEngine;
using UnityEngine.UI;
using UnityEngine.Video;
using Lays = Unity.Sentis.Layers;
using System.IO;
using FF = Unity.Sentis.Functional;
using Unity.VisualScripting;
using UnityEngine.UIElements;
using Image = UnityEngine.UI.Image;
using UnityEngine.Android;
using System;
using UnityEngine.Timeline;
// Rotation speed in degrees per second
public class RunSSAFT : MonoBehaviour
{
public Camera mainCamera;
public List ObjectLabels;
public List ObjectsPrefs;
public Dictionary<string, GameObject> spawnablePrefabs = new Dictionary<string, GameObject>();
public float fixedDepth = -500.0f; // Depth at which the object should float
public Material blendMaterial;
public RenderTexture someRenderTexture;
// Drag the sentis file here
public ModelAsset asset;
const string modelName = "SSAFT.onnx";
//const string videoName = "vid.mp4";
// Link the classes.txt here:
public TextAsset labelsAsset;
// Create a Raw Image in the scene and link it here:
public RawImage displayImage;
// Link to a bounding box sprite or texture here:
public Sprite borderSprite;
public Texture2D borderTexture;
// Link to the font for the labels:
public Font font;
const BackendType backend = BackendType.GPUCompute;
private Transform displayLocation;
private Worker engine;
private string[] labels;
private RenderTexture targetRT;
public WebCamTexture webcamTexture;
//Image size for the model
private const int imageWidth = 640;
private const int imageHeight = 640;
private VideoPlayer video;
List<GameObject> boxPool = new();
List<GameObject> ObjectPool = new();
[SerializeField, Range(0, 1)] float iouThreshold = 0.5f;
[SerializeField, Range(0, 1)] float scoreThreshold = 0.5f;
int maxOutputBoxes = 64;
public List<string> desiredClasses = new List<string> { "Coffee cup", "Computer mouse", "Computer keyboard",
"Cutting board", "Scissors" };
Tensor<float> centersToCorners;
//bounding box data
public struct BoundingBox
{
public float centerX;
public float centerY;
public float width;
public float height;
public string label;
public float lastUpdatedTime; // Timestamp of the last update
}
private void Awake()
{
displayImage.material = blendMaterial;
Application.targetFrameRate = 60;
Screen.orientation = ScreenOrientation.LandscapeLeft;
//Parse neural net labels
labels = labelsAsset.text.Split('\n');
LoadModel();
targetRT = new RenderTexture(imageWidth, imageHeight, 0);
//Create image to display video
displayLocation = displayImage.transform;
SetupWebcam();
SetupInput();
if (borderSprite == null)
{
borderSprite = Sprite.Create(borderTexture, new Rect(0, 0, borderTexture.width, borderTexture.height), new Vector2(borderTexture.width / 2, borderTexture.height / 2));
}
}
void Start()
{
// Request camera permission for Android
if (Application.platform == RuntimePlatform.Android)
{
if (!Permission.HasUserAuthorizedPermission(Permission.Camera))
{
Permission.RequestUserPermission(Permission.Camera);
}
}
for (int i = 0; i < ObjectLabels.Count; i++)
{
spawnablePrefabs.Add(ObjectLabels[i], ObjectsPrefs[i]);
}
}
void SetupWebcam()
{
webcamTexture = new WebCamTexture();
displayImage.texture = webcamTexture;
webcamTexture.Play();
}
void LoadModel()
{
//Load model
//var model1 = ModelLoader.Load(Path.Join(Application.streamingAssetsPath, modelName));
var model1 = ModelLoader.Load(asset);
centersToCorners = new Tensor<float>(new TensorShape(4, 4),
new float[]
{
1, 0, 1, 0,
0, 1, 0, 1,
-0.5f, 0, 0.5f, 0,
0, -0.5f, 0, 0.5f
});
//Here we transform the output of the model1 by feeding it through a Non-Max-Suppression layer.
var graph = new FunctionalGraph();
var input = graph.AddInput(model1,0);
var modelOutput = Functional.Forward(model1, input)[0];
var boxCoords = modelOutput[0, 0..4, ..].Transpose(0, 1); //shape=(8400,4)
var allScores = modelOutput[0, 4.., ..]; //shape=(80,8400)
var scores = FF.ReduceMax(allScores, 0); //shape=(8400)
var classIDs = FF.ArgMax(allScores, 0); //shape=(8400)
var boxCorners = FF.MatMul(boxCoords, Functional.Constant(centersToCorners));
var indices = FF.NMS(boxCorners, scores, iouThreshold, scoreThreshold); //shape=(N)
var indices2 = indices.Unsqueeze(-1).BroadcastTo(new int[] { 4 });//shape=(N,4)
var coords = FF.Gather(boxCoords, 0, indices2); //shape=(N,4)
var labelIDs = FF.Gather(classIDs, 0, indices); //shape=(N)
model1 = graph.Compile(coords,labelIDs);
//Create engine to run model
engine = new Worker(model1,backend);
}
void SetupInput()
{
video = gameObject.AddComponent<VideoPlayer>();
video.renderMode = VideoRenderMode.APIOnly;
video.source = VideoSource.Url;
//video.url = Path.Join(Application.streamingAssetsPath, videoName);
video.isLooping = true;
video.Play();
}
private void Update()
{
if (webcamTexture.didUpdateThisFrame)
{
ExecuteML();
}
/*
if (Input.GetKeyDown(KeyCode.Escape))
{
Application.Quit();
}*/
blendMaterial.SetTexture("_MainTex", webcamTexture);
blendMaterial.SetTexture("_OverlayTex", someRenderTexture);
}
public class BoundingBoxHolder : MonoBehaviour
{
public BoundingBox boundingBox;
}
public void ExecuteML()
{
ClearAnnotations();
if (webcamTexture && webcamTexture.width > 100)
{
// Scaling might be necessary if the webcam's aspect ratio differs
float aspect = webcamTexture.width * 1f / webcamTexture.height;
Graphics.Blit(webcamTexture, targetRT, new Vector2(1f / aspect, 1), new Vector2(0, 0));
displayImage.texture = targetRT;
}
else return;
using var input = TextureConverter.ToTensor(targetRT, imageWidth, imageHeight, 3);
engine.Schedule(input);
var outputGPU = engine.PeekOutput("output_0") as Tensor<float>;
var labelIDsGPU = engine.PeekOutput("output_1") as Tensor<int>;
var output = outputGPU.ReadbackAndClone();
var labelIDs = labelIDsGPU.ReadbackAndClone();
float displayWidth = displayImage.rectTransform.rect.width;
float displayHeight = displayImage.rectTransform.rect.height;
float scaleX = displayWidth / imageWidth;
float scaleY = displayHeight / imageHeight;
int boxesFound = output.shape[0];
//Draw the bounding boxes
for (int n = 0; n < Mathf.Min(boxesFound, 200); n++)
{
string detectedLabel = labels[labelIDs[n]];
if (desiredClasses.Contains(detectedLabel))
{
var box = new BoundingBox
{
centerX = output[n, 0] * scaleX - displayWidth / 2,
centerY = output[n, 1] * scaleY - displayHeight / 2,
width = output[n, 2] * scaleX,
height = output[n, 3] * scaleY,
label = labels[labelIDs[n]],
};
DrawBox(box, n, displayHeight * 0.05f);
}
}
}
Vector3 CalculateScaleAndDepth(float width, float height)
{
// Constants to define how scale and depth relate to the bounding box size
float baseScale = 0.5f; // Base scale factor
float depthFactor = 0.1f; // Factor to calculate depth from size
// Calculate scale as a function of the average of width and height
float scale = baseScale * (width + height) / 2;
// Calculate depth inversely proportional to the size of the bounding box
float depth = fixedDepth - depthFactor * (width + height);
return new Vector3(scale, scale, depth);
}
public void DrawBox(BoundingBox box, int id, float fontSize)
{
GameObject panel;
GameObject object3d;
if (id < boxPool.Count)
{
panel = boxPool[id];
panel.SetActive(true);
object3d = ObjectPool[id];
object3d.SetActive(true);
//EnsureRotationScript(object3d);
}
else
{
panel = CreateNewBox(Color.yellow);
object3d = CreateNewObject3d(box);
}
object3d.SetActive(true);
// Update object3d to correct prefab each time
if (spawnablePrefabs.ContainsKey(box.label))
{
GameObject correctPrefab = spawnablePrefabs[box.label];
if (object3d.name != correctPrefab.name + "(Clone)") // Check if correct prefab is already used
{
Destroy(object3d); // Remove wrong prefab
object3d = Instantiate(correctPrefab, displayLocation); // Create correct prefab
ObjectPool[id] = object3d; // Update ObjectPool with correct object
}
}
if (!spawnablePrefabs.ContainsKey(box.label))
{
Debug.LogError("Prefab not found for label: " + box.label);
return; // Skip spawning if prefab is missing
}
float verticalOffset = (box.height * 0.5f) + 65;
Vector3 scaleAndDepth = CalculateScaleAndDepth(box.width, box.height);
// Set box and object position and size
panel.transform.localPosition = new Vector3(box.centerX, -box.centerY);
object3d.transform.localPosition = new Vector3(box.centerX, -box.centerY + verticalOffset, -500); // Ensure it's visible and at correct depth
object3d.transform.localScale = new Vector3(scaleAndDepth.x, scaleAndDepth.x, scaleAndDepth.x);
RectTransform rt = panel.GetComponent<RectTransform>();
rt.sizeDelta = new Vector2(box.width, box.height);
// Set label text
Text label = panel.GetComponentInChildren<Text>();
label.text = box.label;
label.fontSize = (int)fontSize;
}
public GameObject CreateNewBox(Color color)
{
//Create the box and set image
var panel = new GameObject("ObjectBox");
panel.AddComponent<CanvasRenderer>();
UnityEngine.UI.Image img = panel.AddComponent<UnityEngine.UI.Image>();
img.color = color;
img.sprite = borderSprite;
img.type = Image.Type.Sliced;
panel.transform.SetParent(displayLocation, false);
//Create the label
var text = new GameObject("ObjectLabel");
text.AddComponent<CanvasRenderer>();
text.transform.SetParent(panel.transform, false);
Text txt = text.AddComponent<Text>();
txt.font = font;
txt.color = color;
txt.fontSize = 40;
txt.horizontalOverflow = HorizontalWrapMode.Overflow;
RectTransform rt2 = text.GetComponent<RectTransform>();
rt2.offsetMin = new Vector2(20, rt2.offsetMin.y);
rt2.offsetMax = new Vector2(0, rt2.offsetMax.y);
rt2.offsetMin = new Vector2(rt2.offsetMin.x, 0);
rt2.offsetMax = new Vector2(rt2.offsetMax.x, 30);
rt2.anchorMin = new Vector2(0, 0);
rt2.anchorMax = new Vector2(1, 1);
boxPool.Add(panel);
return panel;
}
public GameObject CreateNewObject3d(BoundingBox box)
{
//Create the box and set image
var object3d = new GameObject();
Debug.Log(box.label);
object3d = Instantiate(spawnablePrefabs[box.label]);
object3d.transform.SetParent(displayLocation, false);
ObjectPool.Add(object3d);
return object3d;
}
public void ClearAnnotations()
{
foreach (var box in boxPool)
{
box.SetActive(false);
}
foreach (var obj in ObjectPool)
{
obj.SetActive(false);
}
}
private void OnDestroy()
{
// Dispose resources and stop webcam
webcamTexture.Stop();
centersToCorners?.Dispose();
engine?.Dispose();
}
}
Thanks a lot!!
Hello GiusScot.
The code has been updated to work with Sentis 2.1.2 (tested in Unity 6).
Small and Nano sizes of YOLO models versions 9, 11, 12 are now also included.
Please note that the provided code simply demonstrates how to run YOLO models, but can be further improved. The provided .onnx
models were exported with nms=False
option, which makes them all compatible with the provided NMS processing block assembled at runtime using Functional API and attached to the output of the models. The runtime YOLO code can be significantly simplified if the result of this attachment is serialized offline to a .sentis
model file and then used as a runtime asset. This will also eliminate the model optimization step which is currently performed at runtime.
Specifically, right after the model compilation in the LoadModel()
you can optionally quantize it and save to the drive, i.e.:
var model = graph.Compile(coords, labelIDs);
ModelQuantizer.QuantizeWeights(QuantizationType.Float16, ref model); // optional
ModelWriter.Save(Path.Join(Application.streamingAssetsPath, "combined-model.sentis"), model);
After that the result of the Functional API code execution running inside the LoadModel()
function will be already optimized and serialized to the generated combined-model.sentis
file. You can then put this .sentis
file to the Assets
folder and drag into the Model Asset
field instead of the pre-NMS .onnx
model, previously used for generation of the combined result. Then you no longer need to call the LoadModel()
at runtime and just do
worker = new Worker(ModelLoader.Load(modelAsset), backend);
instead, because the generated combined-model.sentis
will already contain the NMS block functionality.
The reason why the provided models are not preprocessed this way in advance (which would probably be convenient), is because this preprocessing also bakes the selected iouThreshold
and scoreThreshold
into the model, and makes them non-configurable after serialization. This also allows to implement custom optimizations for the NMS processing block, which would be impossible for a combined model.