unity
/

inference-engine-phi-1_5

Text Generation

unity-sentis

unity-inference-engine

Model card Files Files and versions

xet

Community

carlesonielfa commited on Dec 1, 2024

Commit

50589b7

1 Parent(s): 192d6ae

Update to Sentis 2.1.1

Browse files

Files changed (2) hide show

README.md +2 -3
RunPhi15.cs +20 -20

README.md CHANGED Viewed

@@ -5,8 +5,7 @@ pipeline_tag: text-generation
 ---
-# Phi 1.5 Model in Unity Sentis (Version 1.5.0-pre.2)
-*Version 1.3.0 Sentis files are not compatible with Sentis 1.5.0 and need to be recreated/downloaded
 This is the [Microsoft Phi 1.5](https://huggingface.co/microsoft/phi-1_5) model checked to run on Unity 2023. Phi 1.5 is a Large Language Model that was trained on synthesized data. Please see their page for more information about the model and license.
 The model has 1.3 billion parameters.
@@ -14,7 +13,7 @@ The model has 1.3 billion parameters.
 ## How to Use
 * Create a new scene in Unity 2023
-* Install `com.unity.sentis` version `1.5.0-pre.2` and `com.unity.nuget.newtonsoft-json` packages
 * Add the RunPhi15.cs file to the Main Camera
 * Put `phi15.sentis`, `vocab.json` and `merges.txt` in the Assets/StreamingAssets folder
 * Adjust some of the variables such as the `outputText` string to set the prompt

 ---
+# Phi 1.5 Model in Unity Sentis (Version 2.1.1)
 This is the [Microsoft Phi 1.5](https://huggingface.co/microsoft/phi-1_5) model checked to run on Unity 2023. Phi 1.5 is a Large Language Model that was trained on synthesized data. Please see their page for more information about the model and license.
 The model has 1.3 billion parameters.
 ## How to Use
 * Create a new scene in Unity 2023
+* Install `com.unity.sentis` version `2.1.1` and `com.unity.nuget.newtonsoft-json` packages
 * Add the RunPhi15.cs file to the Main Camera
 * Put `phi15.sentis`, `vocab.json` and `merges.txt` in the Assets/StreamingAssets folder
 * Adjust some of the variables such as the `outputText` string to set the prompt

RunPhi15.cs CHANGED Viewed

@@ -45,7 +45,7 @@ public class RunPhi15: MonoBehaviour
     //Store the vocabulary
     string[] tokens;
-    IWorker engine;
     int currentToken = 0;
     int[] outputTokens = new int[maxTokens];
@@ -76,16 +76,14 @@ public class RunPhi15: MonoBehaviour
         int outputIndex = model1.outputs.Count - 1;
         //var model1 = ModelLoader.Load(asset);
         //Create a new model to select the random token:
-        var model2 = FF.Compile(
-            (input, currentToken) =>
-            {
-                var row = FF.Select(model1.Forward(input)[outputIndex], 1, currentToken);
-                return FF.Multinomial(predictability * row, 1);
-            },
-            (model1.inputs[0], InputDef.Int(new TensorShape()))
-        );
-        engine = WorkerFactory.CreateWorker(backend, model2);
         DecodePrompt(outputString);
@@ -103,17 +101,19 @@ public class RunPhi15: MonoBehaviour
     void RunInference()
     {
-        using var tokensSoFar = new TensorInt(new TensorShape(1, maxTokens), outputTokens);
-        using var index = new TensorInt(currentToken);
-        engine.Execute(new Dictionary<string, Tensor> { {"input_0", tokensSoFar },  { "input_1", index }});
-        var probs = engine.PeekOutput() as TensorInt;
         //Debug.Log(probs.shape);
-        probs.CompleteOperationsAndDownload();
-        int ID = probs[0];
         //shift window down if got to the end
         if (currentToken >= maxTokens - 1)
@@ -266,4 +266,4 @@ public class RunPhi15: MonoBehaviour
         engine?.Dispose();
     }
-}

     //Store the vocabulary
     string[] tokens;
+    Worker engine;
     int currentToken = 0;
     int[] outputTokens = new int[maxTokens];
         int outputIndex = model1.outputs.Count - 1;
         //var model1 = ModelLoader.Load(asset);
         //Create a new model to select the random token:
+        FunctionalGraph graph = new FunctionalGraph();
+        FunctionalTensor input_0 = graph.AddInput<int>(new TensorShape(1, maxTokens));
+        FunctionalTensor input_1 = graph.AddInput<int>(new TensorShape(1));
+        FunctionalTensor row = Functional.Select(Functional.Forward(model1, input_0)[outputIndex], 1, input_1);
+        FunctionalTensor output = Functional.Multinomial(predictability * row, 1);
+        Model model2 = graph.Compile(output);
+        engine = new Worker(model2, backend);
         DecodePrompt(outputString);
     void RunInference()
     {
+        using var tokensSoFar = new Tensor<int>(new TensorShape(1, maxTokens), outputTokens);
+        using var index = new Tensor<int>(new TensorShape(1));
+        index[0] = currentToken;
+        engine.SetInput("input_0", tokensSoFar);
+        engine.SetInput("input_1", index);
+        engine.Schedule();
+        var probs = engine.PeekOutput() as Tensor<int>;
         //Debug.Log(probs.shape);
+        probs.CompleteAllPendingOperations();
+        var result = probs.ReadbackAndClone();
+        int ID = result[0];
         //shift window down if got to the end
         if (currentToken >= maxTokens - 1)
         engine?.Dispose();
     }
+}