From 3a9736a4039adbec81c2f746a3bb0e2a18ba7290 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johan=20Alfv=C3=A9n?= <johan.alfven@arm.com>
Date: Tue, 20 May 2025 13:57:51 +0200
Subject: [PATCH] MLBEDSW-10840: MLCE: Add support for RELU_0_TO_1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add support for RELU_0_TO_1 for both Vela and Regor
- Updated SUPPORTED_OPS

Change-Id: Icd8a3bb35008c20d8cd2118c0e11cefcfcbe1cd9
Signed-off-by: Johan Alfvén <johan.alfven@arm.com>
---
 CHANGELOG.md                                  |  6 ++++--
 SUPPORTED_OPS.md                              | 20 ++++++++++---------
 .../ethosu55/ethos_u55_performance.cpp        |  2 +-
 .../ethosu85/ethos_u85_performance.cpp        |  2 +-
 ethosu/regor/compiler/op_type.cpp             |  1 +
 ethosu/regor/compiler/op_type.hpp             |  6 ++++--
 .../regor/compiler/tflite_graph_optimiser.cpp |  5 +++++
 ethosu/regor/tflite/tflite_mapping.cpp        |  4 +++-
 .../tflite/tflite_supported_operators_u55.cpp |  3 ++-
 .../tflite/tflite_supported_operators_u85.cpp |  3 ++-
 ethosu/vela/npu_performance.py                |  4 ++--
 ethosu/vela/operation.py                      |  5 ++++-
 ethosu/vela/tflite_supported_operators.py     |  1 +
 ethosu/vela/vela.py                           |  1 +
 14 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 717ff7cf..47bd81aa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,13 +22,15 @@ main feature changes, interface changes and reported defects that have been
 fixed.  The version numbering adheres to the
 [semantic versioning](https://semver.org/) scheme.
 
-## Release 4.3.0 - 25/04/2025
+## Release 4.3.0 - 20/05/2025
 
 **Main feature changes:**
 
 * Extended operator support:
   * Ethos-U85:
-    * TFLite: LOG
+    * TFLite: LOG and RELU_0_TO_1
+  * Ethos-U55/Ethos-U65:
+    * TFLite: RELU_0_TO_1
 
 **Interface changes:**
 
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index 5febbaa8..42a030e7 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -19,7 +19,7 @@ limitations under the License.
 # Supported Ops
 
 This file was automatically generated by Vela using the `--supported-ops-report` parameter.  
-Vela version: `4.2.0`
+Vela version: `4.3.0rc2.dev0+gfddbad1d.d20250520`
 
 This file complies with
 [**Gitiles Markdown syntax**](https://gerrit.googlesource.com/gitiles/+/HEAD/Documentation/markdown.md)
@@ -62,6 +62,7 @@ Please check the supported operator list for your chosen runtime for further inf
 | QUANTIZE | [Generic](#tflite-generic-constraints) |
 | RELU | [Generic](#tflite-generic-constraints) |
 | RELU6 | [Generic](#tflite-generic-constraints) |
+| RELU_0_TO_1 | [Generic](#tflite-generic-constraints) |
 | RELU_N1_TO_1 | [Generic](#tflite-generic-constraints) |
 | RESHAPE | [Generic](#tflite-generic-constraints), [Specific](#ethos-u55-and-ethos-u65-tflite-reshape-constraints) |
 | RESIZE_BILINEAR | [Generic](#tflite-generic-constraints), [Specific](#ethos-u55-and-ethos-u65-tflite-resize_bilinear-constraints) |
@@ -119,6 +120,7 @@ Please check the supported operator list for your chosen runtime for further inf
 | QUANTIZE | [Generic](#tflite-generic-constraints) |
 | RELU | [Generic](#tflite-generic-constraints) |
 | RELU6 | [Generic](#tflite-generic-constraints) |
+| RELU_0_TO_1 | [Generic](#tflite-generic-constraints) |
 | RELU_N1_TO_1 | [Generic](#tflite-generic-constraints) |
 | RESHAPE | [Generic](#tflite-generic-constraints), [Specific](#ethos-u85-tflite-reshape-constraints) |
 | RESIZE_BILINEAR | [Generic](#tflite-generic-constraints), [Specific](#ethos-u85-tflite-resize_bilinear-constraints) |
@@ -162,7 +164,7 @@ This is a list of constraints that most operators must satisfy in order to be sc
 - Tensor dimensions must be in the range [1, 65535]
 - Per-axis quantization is only supported for the following op types: CONV_2D, DEPTHWISE_CONV_2D, FULLY_CONNECTED, TRANSPOSE_CONV
 - IFM Tensor batch size must be 1 - [FULLY_CONNECTED, RESHAPE, SHAPE, SLICE, SOFTMAX, SPLIT, SPLIT_V, SQUEEZE, STRIDED_SLICE, UNPACK]
-- The fused activation function (if present) must be one of type: LOGISTIC, RELU, RELU6, RELU_N1_TO_1, TANH
+- The fused activation function (if present) must be one of type: LOGISTIC, RELU, RELU6, RELU_0_TO_1, RELU_N1_TO_1, TANH
 - If a fused activation function is present, the Output tensor must be one of type: int16, int8, uint8
 
 ## Ethos-U55 and Ethos-U65 Specific Operator constraints
@@ -661,22 +663,22 @@ This is a list of constraints that the LEAKY_RELU operator must satisfy in order
 - At least one Input's shape must match the OFM's shape
 - IFM and OFM data types must match
 
-### Ethos-U85 TFLite LOG Constraints
+### Ethos-U85 TFLite MAXIMUM Constraints
 
-This is a list of constraints that the LOG operator must satisfy in order to be scheduled on the NPU.
+This is a list of constraints that the MAXIMUM operator must satisfy in order to be scheduled on the NPU.
 
 - At least one Input's shape must match the OFM's shape
 - IFM and OFM data types must match
-- IFM must be int8 or int16
+- Both Input quantization parameters must match OFM quantization parameters
+- Broadcasting is only allowed for rank indices with dimension 1, from either IFM1 or IFM2
 
-### Ethos-U85 TFLite MAXIMUM Constraints
+### Ethos-U85 TFLite LOG Constraints
 
-This is a list of constraints that the MAXIMUM operator must satisfy in order to be scheduled on the NPU.
+This is a list of constraints that the LOG operator must satisfy in order to be scheduled on the NPU.
 
 - At least one Input's shape must match the OFM's shape
 - IFM and OFM data types must match
-- Both Input quantization parameters must match OFM quantization parameters
-- Broadcasting is only allowed for rank indices with dimension 1, from either IFM1 or IFM2
+- IFM must be int8 or int16
 
 ### Ethos-U85 TFLite MAX_POOL_2D Constraints
 
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp
index bd6afa5f..d150264b 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp
@@ -499,7 +499,7 @@ EthosU55ElementCycles EthosU55Performance::EstimateOutputCyclesPerElement(const
         {
             activationPerfIndex = 0;
         }
-        else if ( fusedOp.type == OpType::Relu || fusedOp.type == OpType::Relu6 || fusedOp.type == OpType::ReluN1To1 )
+        else if ( fusedOp.type == OpType::Relu || fusedOp.type == OpType::Relu0To1 || fusedOp.type == OpType::Relu6 || fusedOp.type == OpType::ReluN1To1 )
         {
             activationPerfIndex = 1;
         }
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp
index 0c7c0909..0b9fedf4 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp
@@ -408,7 +408,7 @@ EthosU85ElementCycles EthosU85Performance::EstimateOutputCyclesPerElement(const
         {
             activationPerfIndex = 0;
         }
-        else if ( fusedOp.type == OpType::Relu || fusedOp.type == OpType::Relu6 || fusedOp.type == OpType::ReluN1To1 )
+        else if ( fusedOp.type == OpType::Relu || fusedOp.type == OpType::Relu0To1 || fusedOp.type == OpType::Relu6 || fusedOp.type == OpType::ReluN1To1 )
         {
             activationPerfIndex = 1;
         }
diff --git a/ethosu/regor/compiler/op_type.cpp b/ethosu/regor/compiler/op_type.cpp
index 3d90d4b0..0a37f270 100644
--- a/ethosu/regor/compiler/op_type.cpp
+++ b/ethosu/regor/compiler/op_type.cpp
@@ -153,6 +153,7 @@ BEGIN_ENUM_TABLE(regor::OpType)
     ADD_ENUM_NAME(Range)
     ADD_ENUM_NAME(Rank)
     ADD_ENUM_NAME(Relu)
+    ADD_ENUM_NAME(Relu0To1)
     ADD_ENUM_NAME(Relu6)
     ADD_ENUM_NAME(ReluN1To1)
     ADD_ENUM_NAME(ReluN)
diff --git a/ethosu/regor/compiler/op_type.hpp b/ethosu/regor/compiler/op_type.hpp
index 77c8ef5f..512b0d18 100644
--- a/ethosu/regor/compiler/op_type.hpp
+++ b/ethosu/regor/compiler/op_type.hpp
@@ -164,6 +164,7 @@ enum class OpType : uint16_t
     Range,
     Rank,
     Relu,
+    Relu0To1,
     Relu6,
     ReluN1To1,
     ReluN,
@@ -211,8 +212,9 @@ inline std::string OpTypeToString(const OpType type)
 
 constexpr inline bool IsActivation(OpType opType)
 {
-    return opType == OpType::Relu || opType == OpType::Relu6 || opType == OpType::ReluN || opType == OpType::ReluN1To1 ||
-           opType == OpType::Prelu || opType == OpType::Clamp || opType == OpType::Sigmoid || opType == OpType::Tanh || opType == OpType::LUT;
+    return opType == OpType::Relu || opType == OpType::Relu0To1 || opType == OpType::Relu6 || opType == OpType::ReluN ||
+           opType == OpType::ReluN1To1 || opType == OpType::Prelu || opType == OpType::Clamp ||
+           opType == OpType::Sigmoid || opType == OpType::Tanh || opType == OpType::LUT;
 }
 
 constexpr inline bool IsUnaryElementwise(OpType opType)
diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
index 78fb7a95..e4a4db81 100644
--- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp
+++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
@@ -2878,6 +2878,11 @@ Operation *TFLiteGraphOptimiser::ClampActivations(Graph *const graph, Operation
     {
         quant.quantMin = {Quantize(0, quant)};
     }
+    else if ( opType == OpType::Relu0To1 )
+    {
+        quant.quantMin = {Quantize(0, quant)};
+        quant.quantMax = {Quantize(1, quant)};
+    }
     else if ( opType == OpType::Relu6 )
     {
         quant.quantMin = {Quantize(0, quant)};
diff --git a/ethosu/regor/tflite/tflite_mapping.cpp b/ethosu/regor/tflite/tflite_mapping.cpp
index f4b4eec0..4026456f 100644
--- a/ethosu/regor/tflite/tflite_mapping.cpp
+++ b/ethosu/regor/tflite/tflite_mapping.cpp
@@ -199,7 +199,8 @@ const std::map<tflite::BuiltinOperator, OpType> TfLiteMapping::_builtinOperatorT
     {tflite::BuiltinOperator::SEGMENT_SUM,                      OpType::SegmentSum},
     {tflite::BuiltinOperator::BATCH_MATMUL,                     OpType::BatchMatMul},
     {tflite::BuiltinOperator::CUMSUM,                           OpType::Cumsum},
-    {tflite::BuiltinOperator::REDUCE_ALL,                       OpType::ReduceAll}
+    {tflite::BuiltinOperator::REDUCE_ALL,                       OpType::ReduceAll},
+    {tflite::BuiltinOperator::RELU_0_TO_1,                      OpType::Relu0To1}
     // clang-format on
 };
 
@@ -557,6 +558,7 @@ const std::multimap<OpType, TensorUsage> TfLiteMapping::_inputTensorIndices = {
     {OpType::ReduceSum,                         TensorUsage::IFM0},
     {OpType::ReduceSum,                         TensorUsage::Params},
     {OpType::Relu,                              TensorUsage::IFM0},
+    {OpType::Relu0To1,                          TensorUsage::IFM0},
     {OpType::Relu6,                             TensorUsage::IFM0},
     {OpType::ReluN1To1,                         TensorUsage::IFM0},
     {OpType::ReluN,                             TensorUsage::IFM0},
diff --git a/ethosu/regor/tflite/tflite_supported_operators_u55.cpp b/ethosu/regor/tflite/tflite_supported_operators_u55.cpp
index 156f46ee..1c9fdc3c 100644
--- a/ethosu/regor/tflite/tflite_supported_operators_u55.cpp
+++ b/ethosu/regor/tflite/tflite_supported_operators_u55.cpp
@@ -44,8 +44,9 @@ TfLiteSupportedOperatorsU55::TfLiteSupportedOperatorsU55(IArchitectureConstraint
         OpType::MaxPool,
         OpType::Mul,
         OpType::Relu,
-        OpType::ReluN1To1,
+        OpType::Relu0To1,
         OpType::Relu6,
+        OpType::ReluN1To1,
         OpType::Reshape,
         OpType::Softmax,
         OpType::Tanh,
diff --git a/ethosu/regor/tflite/tflite_supported_operators_u85.cpp b/ethosu/regor/tflite/tflite_supported_operators_u85.cpp
index db231e77..b01d9809 100644
--- a/ethosu/regor/tflite/tflite_supported_operators_u85.cpp
+++ b/ethosu/regor/tflite/tflite_supported_operators_u85.cpp
@@ -43,8 +43,9 @@ TfLiteSupportedOperatorsU85::TfLiteSupportedOperatorsU85(IArchitectureConstraint
         OpType::MaxPool,
         OpType::Mul,
         OpType::Relu,
-        OpType::ReluN1To1,
+        OpType::Relu0To1,
         OpType::Relu6,
+        OpType::ReluN1To1,
         OpType::Reshape,
         OpType::ResizeBilinear,
         OpType::Softmax,
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 32d0749a..5b54e022 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -292,7 +292,7 @@ def _estimate_output_cycles_per_element(arch, op_type: Op, faf_type: Op, query:
 
     if faf_type in (Op.Sigmoid, Op.Tanh, Op.LUT):
         activation_perf_index = 0
-    elif faf_type in (Op.Relu, Op.Relu6, Op.ReluN1To1):
+    elif faf_type in (Op.Relu, Op.Relu6, Op.Relu0To1, Op.ReluN1To1):
         activation_perf_index = 1
     else:
         activation_perf_index = 2
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index f9bf7fe6..ba88b10d 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -373,7 +373,7 @@ class Op(Enum):
         return self.info.block_type == NpuBlockType.ElementWise and not self.info.is_unary
 
     def is_relu_op(self):
-        return self in (Op.Relu, Op.Relu6, Op.ReluN1To1, Op.ReluN, Op.Clip, Op.Clamp)
+        return self in (Op.Relu, Op.Relu6, Op.Relu0To1, Op.ReluN1To1, Op.ReluN, Op.Clip, Op.Clamp)
 
     def is_activation_op(self):
         return self.is_relu_op() or self in (Op.Tanh, Op.Sigmoid, Op.Softmax, Op.LUT, Op.HardSwish)
@@ -454,6 +454,9 @@ def create_activation_function(op_type: Op, min=None, max=None) -> ActivationFun
     elif op_type == Op.Relu6:
         act.min = 0.0
         act.max = 6.0
+    elif op_type == Op.Relu0To1:
+        act.min = 0.0
+        act.max = 1.0
     elif op_type == Op.ReluN1To1:
         act.min = -1.0
         act.max = 1.0
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index 1fd284b4..776f534c 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -126,6 +126,7 @@ class TFLiteSupportedOperators:
         (
             Op.Relu,
             Op.Relu6,
+            Op.Relu0To1,
             Op.ReluN1To1,
             Op.Clip,
         )
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index 55591e02..3f2d0c2d 100755
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -396,6 +396,7 @@ Please check the supported operator list for your chosen runtime for further inf
 | QUANTIZE | [Generic](#tflite-generic-constraints) |
 | RELU | [Generic](#tflite-generic-constraints) |
 | RELU6 | [Generic](#tflite-generic-constraints) |
+| RELU_0_TO_1 | [Generic](#tflite-generic-constraints) |
 | RELU_N1_TO_1 | [Generic](#tflite-generic-constraints) |
 | RESHAPE | [Generic](#tflite-generic-constraints), [Specific](#ethos-u85-tflite-reshape-constraints) |
 | RESIZE_BILINEAR | [Generic](#tflite-generic-constraints), [Specific](#ethos-u85-tflite-resize_bilinear-constraints) |
-- 
GitLab