From 331b7d9ba6a881324c22120573233f1aa54c3778 Mon Sep 17 00:00:00 2001
From: Jacob Bohlin <jacob.bohlin@arm.com>
Date: Thu, 12 Dec 2024 17:13:06 +0000
Subject: [PATCH] MLBEDSW-10129 Preserve input/output order for raw format

This patch ensure the order of input and output tensors are preserved
from source model to serialising to raw format.

Previously the tensors were ordered after the execution order of
their consumer operation.

Change-Id: Ia5a32362a5fa2eb0217a1b69015795258c238000
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
---
 ethosu/regor/bindings/python/py_regor.cpp | 12 +++++-
 ethosu/regor/compiler/raw_writer.cpp      | 46 ++++++++++-----------
 ethosu/regor/test/test_raw_writer.cpp     | 49 +++++++++++------------
 ethosu/vela/rawdata_writer.py             |  4 ++
 test/test_ethos_u_vela.py                 |  5 +++
 5 files changed, 66 insertions(+), 50 deletions(-)
diff --git a/ethosu/regor/bindings/python/py_regor.cpp b/ethosu/regor/bindings/python/py_regor.cpp
index 8046d50a..4c3b34c9 100644
--- a/ethosu/regor/bindings/python/py_regor.cpp
+++ b/ethosu/regor/bindings/python/py_regor.cpp
@@ -172,6 +172,7 @@ struct PyRegorCompiledRawModel : PyRegorCompiledModel
     PyRegorCompiledRawModelNonConstantTensor scratch_fast;
     std::vector<PyRegorCompiledRawModelNonConstantTensor> inputs;
     std::vector<PyRegorCompiledRawModelNonConstantTensor> outputs;
+    std::vector<PyRegorCompiledRawModelNonConstantTensor> variables;
 };
 
 struct PyRegorCompiledTFLiteModel : PyRegorCompiledModel
@@ -460,6 +461,14 @@ private:
                         shape.insert(shape.end(), header.tensor.output.shape, header.tensor.output.shape + 4);
                         raw.outputs.emplace_back(region, address, size, element_size, shape);
                         break;
+                    case regor_raw_tensor_header_t::RAW_TENSOR_TYPE_VARIABLE:
+                        region = header.tensor.variable.region;
+                        address = header.tensor.variable.address;
+                        data_size = header.tensor.variable.size;
+                        element_size = header.tensor.variable.element_size;
+                        shape.insert(shape.end(), header.tensor.variable.shape, header.tensor.variable.shape + 4);
+                        raw.variables.emplace_back(region, address, size, element_size, shape);
+                        break;
                     default:
                         break;
                 }
@@ -556,7 +565,8 @@ PYBIND11_MODULE(regor, m)
         .def_readwrite("scratch", &PyRegorCompiledRawModel::scratch, "The compiled model scratch area")
         .def_readwrite("scratch_fast", &PyRegorCompiledRawModel::scratch_fast, "The compiled model scratch fast area")
         .def_readwrite("inputs", &PyRegorCompiledRawModel::inputs, "The compiled model inputs")
-        .def_readwrite("outputs", &PyRegorCompiledRawModel::outputs, "The compiled model outputs");
+        .def_readwrite("outputs", &PyRegorCompiledRawModel::outputs, "The compiled model outputs")
+        .def_readwrite("variables", &PyRegorCompiledRawModel::variables, "The compiled model variables");
 
     py::class_<PyRegorCompiledTFLiteModel, PyRegorCompiledModel>(m, "CompiledTFLiteModel", "A Regor-compiled TFLite model")
         .def(py::init<>())
diff --git a/ethosu/regor/compiler/raw_writer.cpp b/ethosu/regor/compiler/raw_writer.cpp
index f0777198..dc39d61a 100644
--- a/ethosu/regor/compiler/raw_writer.cpp
+++ b/ethosu/regor/compiler/raw_writer.cpp
@@ -65,12 +65,6 @@ std::vector<std::pair<std::unique_ptr<const uint8_t[]>, size_t>> RawWriter::Seri
         throw std::invalid_argument("RawWriter expects graph with 1 CustomNpuOp");
     }
 
-    const auto &graphInputs = graph->Inputs();
-    std::unordered_set<std::shared_ptr<Tensor>> inputsSet(graphInputs.begin(), graphInputs.end());
-
-    const auto &graphOutputs = graph->Outputs();
-    std::unordered_set<std::shared_ptr<Tensor>> outputsSet(graphOutputs.begin(), graphOutputs.end());
-
     // ethos_u_command_stream in TFLite format
     auto commandStreamTensorConnection = customNpuOp->Input(MakeTensorUsage(TensorUsage::Params, 0));
     auto commandStreamTensor = commandStreamTensorConnection->tensor.get();
@@ -98,36 +92,40 @@ std::vector<std::pair<std::unique_ptr<const uint8_t[]>, size_t>> RawWriter::Seri
         SerialiseScratchFastTensor(stagingTensor, tensor_address_map.at(stagingTensor));
     }
 
-    for ( const auto &[tensorUsage, tensorConnection] : customNpuOp->Inputs().pairs() )
+    // Serialise input tensors
+    for ( const auto &input : graph->Inputs() )
     {
-        auto inputTensor = tensorConnection.tensor.get();
-        if ( graph->IsPersistent(inputTensor) )
-        {
-            // Serialise variable (input) tensor
-            SerialiseVariableTensor(inputTensor, tensor_address_map.at(inputTensor));
-        }
-        else if ( IsIFM(tensorUsage) && !inputTensor->IsConstant() )
+        const Tensor *tensor = input.get();
+        auto tensorUsage = customNpuOp->UsageOfTensor(tensor);
+        if ( IsIFM(tensorUsage) && !tensor->IsConstant() )
         {
-            // Serialise input tensor
-            SerialiseInputTensor(inputTensor, tensor_address_map.at(inputTensor));
+            SerialiseInputTensor(tensor, tensor_address_map.at(tensor));
         }
     }
 
-    for ( const auto &[tensorUsage, tensorConnection] : customNpuOp->Outputs().pairs() )
+    // Serialise output tensors
+    for ( const auto &output : graph->Outputs() )
     {
-        auto outputTensor = tensorConnection.tensor.get();
-        if ( graph->IsPersistent(outputTensor) )
+        const Tensor *tensor = output.get();
+        auto tensorUsage = customNpuOp->UsageOfTensor(tensor);
+        if ( IsOFM(tensorUsage) )
         {
-            // Serialise variable (output) tensor
-            SerialiseVariableTensor(outputTensor, tensor_address_map.at(outputTensor));
+            SerialiseOutputTensor(tensor, tensor_address_map.at(tensor));
         }
-        else if ( IsOFM(tensorUsage) )
+    }
+
+    // Serialise persistent tensors
+    for ( const auto &persistent : graph->Persistent() )
+    {
+        const Tensor *tensor = persistent.get();
+        auto tensorUsage = customNpuOp->UsageOfTensor(tensor);
+        if ( (tensorUsage != TensorUsage::None) )
         {
-            // Serialise output tensor
-            SerialiseOutputTensor(outputTensor, tensor_address_map.at(outputTensor));
+            SerialiseVariableTensor(tensor, tensor_address_map.at(tensor));
         }
     }
 
+
     return std::move(_raw);
 }
 
diff --git a/ethosu/regor/test/test_raw_writer.cpp b/ethosu/regor/test/test_raw_writer.cpp
index 8261c7c1..6838b73f 100644
--- a/ethosu/regor/test/test_raw_writer.cpp
+++ b/ethosu/regor/test/test_raw_writer.cpp
@@ -52,14 +52,14 @@ TEST_CASE("raw_writer")
     const auto input = std::make_shared<Tensor>("input_1", DataType::Int8, Shape({1, 1, 8}));
     REQUIRE_FALSE(input->IsConstant());
 
-    // Build variable tensor
-    const auto variable1 = std::make_shared<Tensor>("variable_1", DataType::Int8, Shape({1, 1, 1, 9}));
-    REQUIRE_FALSE(variable1->IsConstant());
-
     // Build output tensor
     const auto output = std::make_shared<Tensor>("output_1", DataType::Int8, Shape({1, 1, 1, 10}));
     REQUIRE_FALSE(output->IsConstant());
 
+    // Build variable tensor
+    const auto variable1 = std::make_shared<Tensor>("variable_1", DataType::Int8, Shape({1, 1, 1, 9}));
+    REQUIRE_FALSE(variable1->IsConstant());
+
     // Build another variable tensor
     const auto variable2 = std::make_shared<Tensor>("variable_2", DataType::Int8, Shape({1, 1, 1, 11}));
     REQUIRE_FALSE(variable2->IsConstant());
@@ -175,7 +175,6 @@ TEST_CASE("raw_writer")
         REQUIRE(header.tensor.scratch_fast.region == 2);
         REQUIRE(header.tensor.scratch_fast.address == 77);
     }
-
     // Check input
     {
         // Check blob size
@@ -197,7 +196,7 @@ TEST_CASE("raw_writer")
         REQUIRE(header.tensor.input.shape[3] == 8);
     }
 
-    // Check (input) variable
+    // Check output
     {
         // Check blob size
         size_t dataSize = sizeof(regor_raw_tensor_header_t);
@@ -207,18 +206,18 @@ TEST_CASE("raw_writer")
         auto &data = blobs[5].first;
         regor_raw_tensor_header_t header;
         std::copy_n(data.get(), sizeof(header), reinterpret_cast<uint8_t *>(&header));
-        REQUIRE(header.type == regor_raw_tensor_header_t::RAW_TENSOR_TYPE_VARIABLE);
-        REQUIRE(header.tensor.input.size == 9);
-        REQUIRE(header.tensor.input.region == 1);
-        REQUIRE(header.tensor.input.address == 11);
-        REQUIRE(header.tensor.input.element_size == 1);
-        REQUIRE(header.tensor.input.shape[0] == 1);
-        REQUIRE(header.tensor.input.shape[1] == 1);
-        REQUIRE(header.tensor.input.shape[2] == 1);
-        REQUIRE(header.tensor.input.shape[3] == 9);
+        REQUIRE(header.type == regor_raw_tensor_header_t::RAW_TENSOR_TYPE_OUTPUT);
+        REQUIRE(header.tensor.output.size == 10);
+        REQUIRE(header.tensor.output.region == 1);
+        REQUIRE(header.tensor.output.address == 99);
+        REQUIRE(header.tensor.output.element_size == 1);
+        REQUIRE(header.tensor.output.shape[0] == 1);
+        REQUIRE(header.tensor.output.shape[1] == 1);
+        REQUIRE(header.tensor.output.shape[2] == 1);
+        REQUIRE(header.tensor.output.shape[3] == 10);
     }
 
-    // Check output
+    // Check (input) variable
     {
         // Check blob size
         size_t dataSize = sizeof(regor_raw_tensor_header_t);
@@ -228,15 +227,15 @@ TEST_CASE("raw_writer")
         auto &data = blobs[6].first;
         regor_raw_tensor_header_t header;
         std::copy_n(data.get(), sizeof(header), reinterpret_cast<uint8_t *>(&header));
-        REQUIRE(header.type == regor_raw_tensor_header_t::RAW_TENSOR_TYPE_OUTPUT);
-        REQUIRE(header.tensor.output.size == 10);
-        REQUIRE(header.tensor.output.region == 1);
-        REQUIRE(header.tensor.output.address == 99);
-        REQUIRE(header.tensor.output.element_size == 1);
-        REQUIRE(header.tensor.output.shape[0] == 1);
-        REQUIRE(header.tensor.output.shape[1] == 1);
-        REQUIRE(header.tensor.output.shape[2] == 1);
-        REQUIRE(header.tensor.output.shape[3] == 10);
+        REQUIRE(header.type == regor_raw_tensor_header_t::RAW_TENSOR_TYPE_VARIABLE);
+        REQUIRE(header.tensor.input.size == 9);
+        REQUIRE(header.tensor.input.region == 1);
+        REQUIRE(header.tensor.input.address == 11);
+        REQUIRE(header.tensor.input.element_size == 1);
+        REQUIRE(header.tensor.input.shape[0] == 1);
+        REQUIRE(header.tensor.input.shape[1] == 1);
+        REQUIRE(header.tensor.input.shape[2] == 1);
+        REQUIRE(header.tensor.input.shape[3] == 9);
     }
 
     // Check (output) variable
diff --git a/ethosu/vela/rawdata_writer.py b/ethosu/vela/rawdata_writer.py
index 153def44..2bcc12e9 100644
--- a/ethosu/vela/rawdata_writer.py
+++ b/ethosu/vela/rawdata_writer.py
@@ -102,4 +102,8 @@ def write_rawdata_output_from_model(filename, model):
         output_elem_size=[t.element_size for t in model.outputs],
         output_region=[t.region for t in model.outputs],
         output_offset=[t.address for t in model.outputs],
+        variable_shape=[t.shape for t in model.variables],
+        variable_elem_size=[t.element_size for t in model.variables],
+        variable_region=[t.region for t in model.variables],
+        variable_offset=[t.address for t in model.variables],
     )
diff --git a/test/test_ethos_u_vela.py b/test/test_ethos_u_vela.py
index 58bf71c6..9cd9dcd3 100644
--- a/test/test_ethos_u_vela.py
+++ b/test/test_ethos_u_vela.py
@@ -122,6 +122,11 @@ def test_ethos_u_vela_with_regor_raw_output(tmp_path):
     assert raw["output_elem_size"][0] == 1
     assert raw["output_region"][0] == 1
     assert raw["output_offset"][0] >= 0
+    # Model contains no variables, check that the values are empty arrays.
+    assert raw["variable_shape"].size == 0
+    assert raw["variable_elem_size"].size == 0
+    assert raw["variable_region"].size == 0
+    assert raw["variable_offset"].size == 0
 
 
 def test_regor():
-- 
GitLab