From 14f2efcabfa41b064dab7dd85f525f7d8be1bbc3 Mon Sep 17 00:00:00 2001
From: Alexander Bengtsson <Alexander.Bengtsson@arm.com>
Date: Mon, 13 Jan 2025 13:12:59 +0100
Subject: [PATCH] MLBEDSW-10227: Additional checks for rescale IFM-fusing

- Add 3 missing checks when performing IFM rescale-fusing on binary
  elementwise operations.
  1. Pass both rescale in/out dtypes and operation in/out dtypes to
     SupportsFusedRescale. All 4 are required to determine whether
     an operation can be fused.
  2. When performing IFM-fusing, the fused tensor should not be in
     graph-outputs.
  3. When checking whether binary elementwise operations can IFM-fuse
     The compiler must also check following for the second input:
       * input/output unsigned attributes
       * that the fused tensor is not in graph-output.

Change-Id: I82ed1d07f14d48c70c8a94b9579be20200029f95
Signed-off-by: Alexander Bengtsson <Alexander.Bengtsson@arm.com>
---
 .../architecture/architecture_constraints.hpp |  4 +-
 .../ethosu55/ethos_u55_constraints.cpp        | 22 +++++----
 .../ethosu55/ethos_u55_constraints.hpp        |  3 +-
 .../ethosu85/ethos_u85_constraints.cpp        | 22 +++++----
 .../ethosu85/ethos_u85_constraints.hpp        |  3 +-
 ethosu/regor/compiler/graphir_optimiser.cpp   | 46 +++++++++++--------
 6 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/ethosu/regor/architecture/architecture_constraints.hpp b/ethosu/regor/architecture/architecture_constraints.hpp
index f1171ea8..c5e24beb 100644
--- a/ethosu/regor/architecture/architecture_constraints.hpp
+++ b/ethosu/regor/architecture/architecture_constraints.hpp
@@ -96,8 +96,8 @@ public:
     virtual ~IArchitectureConstraints() = default;
 
     virtual bool SupportsReverse(OpType opType, ReverseType reverseTypeMask) = 0;
-    virtual bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType fromType, DataType toType,
-        const Quantization &quantization) = 0;
+    virtual bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType rescaleFromType,
+        DataType rescaleToType, DataType opFromType, DataType opToType, const Quantization &quantization) = 0;
     virtual bool SupportsRescale(DataType fromType, DataType toType) = 0;
     virtual TransposeSupport SupportsTranspose(OpType opType, TransposeType transposeType) = 0;
     virtual bool SupportsAccumulatorSaveRestore() = 0;
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp
index 435e5de7..a97ca2bf 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp
@@ -54,21 +54,22 @@ bool EthosU55Constraints::SupportsReverse(OpType opType, ReverseType reverseType
     return reverseTypeMask == ReverseType::None;
 }
 
-bool EthosU55Constraints::SupportsFusedRescale(
-    OpType opType, TensorUsage tensorUsage, DataType fromType, DataType toType, const Quantization &quantization)
+bool EthosU55Constraints::SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType rescaleFromType,
+    DataType rescaleToType, DataType opFromType, DataType opToType, const Quantization &quantization)
 {
     auto npuOp = ArchEthosU55::GetHWOp(opType);
     bool globalScale = quantization.scales.size() == 1;
-    int fromBits = DataTypeSizeBits(fromType);
-    int toBits = DataTypeSizeBits(toType);
     bool isUnitScale = quantization.IsUnitScale();
 
     if ( tensorUsage == TensorUsage::IFM )
     {
+        int fromBits = DataTypeSizeBits(rescaleFromType);
+        int toBits = DataTypeSizeBits(opToType);
         if ( npuOp == EthosU55NpuOp::Elementwise && globalScale )
         {
-            bool fromTypeSupported = IsInteger(fromType) && (fromBits == 8 || fromBits == 16);
-            bool toTypeSupported = (IsInteger(toType) && (toBits == 8 || toBits == 16)) || toType == DataType::Int32;
+            bool fromTypeSupported = IsInteger(rescaleFromType) && (fromBits == 8 || fromBits == 16);
+            bool toTypeSupported = (IsInteger(opToType) && (toBits == 8 || toBits == 16)) || opToType == DataType::Int32;
+
             // TODO MLBEDSW-10115: Support full 32-bit (advanced) rescale (with nonzero shift)
             // For now only allow 16-bit (simple) rescale
             auto &qs = quantization.scales.front();
@@ -76,10 +77,10 @@ bool EthosU55Constraints::SupportsFusedRescale(
 
             // Make sure the rescale can be done without clipping
             int64_t zp = quantization.zeroPoints.front();
-            int64_t value = (zp < 0 ? int64_t(IntegerMax(fromType)) : IntegerMin(fromType));
+            int64_t value = (zp < 0 ? int64_t(IntegerMax(rescaleFromType)) : IntegerMin(rescaleFromType));
             value = value - zp;
             value = (value * qs.scale) >> qs.shift;
-            bool noClipping = value >= IntegerMin(toType) && value <= int64_t(IntegerMax(toType));
+            bool noClipping = value >= IntegerMin(rescaleToType) && value <= int64_t(IntegerMax(rescaleToType));
 
             if ( opType == OpType::Add || opType == OpType::Sub )
             {
@@ -94,6 +95,7 @@ bool EthosU55Constraints::SupportsFusedRescale(
     }
     else if ( tensorUsage == TensorUsage::OFM )
     {
+        int fromBits = DataTypeSizeBits(opFromType);
         if ( npuOp == EthosU55NpuOp::Convolution || npuOp == EthosU55NpuOp::Depthwise ||
              npuOp == EthosU55NpuOp::Pooling || npuOp == EthosU55NpuOp::VectorProduct )
         {
@@ -101,8 +103,8 @@ bool EthosU55Constraints::SupportsFusedRescale(
         }
         else if ( npuOp == EthosU55NpuOp::Elementwise && globalScale )
         {
-            bool fromTypeSupported = (IsInteger(fromType) && (fromBits == 8 || fromBits == 16)) || fromType == DataType::Int32;
-            if ( fromType == DataType::Int32 )
+            bool fromTypeSupported = (IsInteger(opFromType) && (fromBits == 8 || fromBits == 16)) || opFromType == DataType::Int32;
+            if ( opFromType == DataType::Int32 )
             {
                 // For 32-bit operations scale is not applied but shift is
                 return quantization.scales.front().scale == 1;
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp
index 045125f5..b0355f65 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp
@@ -31,7 +31,8 @@ public:
     bool SupportsMatMul(OpType opType) override;
     TransposeSupport SupportsTranspose(OpType opType, TransposeType transposeType) override;
     bool SupportsReverse(OpType opType, ReverseType reverseTypeMask) override;
-    bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType fromType, DataType toType, const Quantization &quantization) override;
+    bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType rescaleFromType, DataType rescaleToType,
+        DataType opFromType, DataType opToType, const Quantization &quantization) override;
     bool SupportsRescale(DataType fromType, DataType toType) override;
     bool SupportsAccumulatorSaveRestore() override { return false; }
     bool SupportsGather(OpType opType) override;
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp
index c64550aa..b24ebb40 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp
@@ -83,31 +83,31 @@ bool EthosU85Constraints::SupportsReverse(OpType opType, ReverseType reverseType
     return true;
 }
 
-bool EthosU85Constraints::SupportsFusedRescale(
-    OpType opType, TensorUsage tensorUsage, DataType fromType, DataType toType, const Quantization &quantization)
+bool EthosU85Constraints::SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType rescaleFromType,
+    DataType rescaleToType, DataType opFromType, DataType opToType, const Quantization &quantization)
 {
     auto npuOp = ArchEthosU85::GetHWOp(opType);
     bool globalScale = quantization.scales.size() == 1;
-    int fromBits = DataTypeSizeBits(fromType);
-    int toBits = DataTypeSizeBits(toType);
     bool isUnitScale = quantization.IsUnitScale();
 
     if ( tensorUsage == TensorUsage::IFM )
     {
+        int fromBits = DataTypeSizeBits(rescaleFromType);
+        int toBits = DataTypeSizeBits(opToType);
         if ( npuOp == EthosU85NpuOp::Elementwise && globalScale )
         {
-            bool fromTypeSupported = (IsInteger(fromType) && fromBits == 8) || fromType == DataType::Int16;
-            bool toTypeSupported = (IsInteger(toType) && (toBits == 8 || toBits == 16)) || toType == DataType::Int32;
+            bool fromTypeSupported = (IsInteger(rescaleFromType) && fromBits == 8) || rescaleFromType == DataType::Int16;
+            bool toTypeSupported = (IsInteger(opToType) && (toBits == 8 || toBits == 16)) || opToType == DataType::Int32;
 
             auto &qs = quantization.scales.front();
             // Make sure shift is valid
             if ( qs.shift < 0 || qs.shift > 63 ) return false;
             // Make sure the rescale can be done without clipping
             int64_t zp = quantization.zeroPoints.front();
-            int64_t value = (zp < 0 ? int64_t(IntegerMax(fromType)) : IntegerMin(fromType));
+            int64_t value = (zp < 0 ? int64_t(IntegerMax(rescaleFromType)) : IntegerMin(rescaleFromType));
             value = value - zp;
             value = (value * qs.scale) >> qs.shift;
-            bool noClipping = value >= IntegerMin(toType) && value <= int64_t(IntegerMax(toType));
+            bool noClipping = value >= IntegerMin(rescaleToType) && value <= int64_t(IntegerMax(rescaleToType));
 
             if ( opType == OpType::Div || opType == OpType::Mul )
             {
@@ -122,6 +122,8 @@ bool EthosU85Constraints::SupportsFusedRescale(
     }
     else if ( tensorUsage == TensorUsage::OFM )
     {
+        int fromBits = DataTypeSizeBits(opFromType);
+        int toBits = DataTypeSizeBits(rescaleToType);
         if ( npuOp == EthosU85NpuOp::Convolution || npuOp == EthosU85NpuOp::Depthwise ||
              npuOp == EthosU85NpuOp::Pooling || npuOp == EthosU85NpuOp::VectorProduct )
         {
@@ -134,8 +136,8 @@ bool EthosU85Constraints::SupportsFusedRescale(
         }
         else if ( npuOp == EthosU85NpuOp::Elementwise && globalScale )
         {
-            bool fromTypeSupported = (IsInteger(fromType) && (fromBits == 8 || fromBits == 16)) || fromType == DataType::Int32;
-            if ( opType == OpType::Mul && fromTypeSupported && fromType == DataType::Int32 )
+            bool fromTypeSupported = (IsInteger(opFromType) && (fromBits == 8 || fromBits == 16)) || opFromType == DataType::Int32;
+            if ( opType == OpType::Mul && fromTypeSupported && opFromType == DataType::Int32 )
             {
                 return quantization.scales.front().scale == 1;  // Only shift supported
             }
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp
index bd939523..eab0a264 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp
@@ -31,7 +31,8 @@ public:
     bool SupportsMatMul(OpType opType) override;
     TransposeSupport SupportsTranspose(OpType opType, TransposeType transposeType) override;
     bool SupportsReverse(OpType opType, ReverseType reverseTypeMask) override;
-    bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType fromType, DataType toType, const Quantization &quantization) override;
+    bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType rescaleFromType, DataType rescaleToType,
+        DataType opFromType, DataType opToType, const Quantization &quantization) override;
     bool SupportsRescale(DataType fromType, DataType toType) override;
     bool SupportsAccumulatorSaveRestore() override { return true; }
     bool SupportsGather(OpType opType) override;
diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp
index 590bbc8a..eb103f68 100644
--- a/ethosu/regor/compiler/graphir_optimiser.cpp
+++ b/ethosu/regor/compiler/graphir_optimiser.cpp
@@ -860,12 +860,6 @@ Operation *GraphIrOptimiser::FuseRescale(Graph *const graph, Operation *const op
         auto ifmConn = operation->Input(TensorUsage::IFM);
         auto producer = ifmConn->tensor->Writers().size() == 1 ? ifmConn->tensor->Writers().front() : nullptr;
 
-        if ( IsTensorInVector(graph->Outputs(), ifmConn->tensor.get()) )
-        {
-            // If ifm is graph output, fusing it not possible
-            return returnOp;
-        }
-
         // Convert scales to have 0 shift if possible, since this can improve fusing for Ethos-U55/65
         auto ConvertedScales = [](const TensorConnection *conn)
         {
@@ -881,9 +875,11 @@ Operation *GraphIrOptimiser::FuseRescale(Graph *const graph, Operation *const op
         };
 
         // Check if there is only one consumer of the output of the rescale and try to fuse to that operation.
-        // Note: For input fusing we cannot have an output zero point on the Rescale operation (since the
+        // Do not fuse if the connecting tensor is also a graph-output.
+        // Note (ZeroPoints): For input fusing we cannot have an output zero point on the Rescale operation (since the
         //       zero point is applied before scaling on inputs), however input zero point is fine.
-        if ( ofmConn->tensor->Readers().size() == 1 && ofmConn->quantization.zeroPoints == Quantization::Unit().zeroPoints )
+        if ( ofmConn->tensor->Readers().size() == 1 && ofmConn->quantization.zeroPoints == Quantization::Unit().zeroPoints &&
+             !IsTensorInVector(graph->Outputs(), ofmConn->tensor.get()) )
         {
             // Propagate rescaling to input of next op
             auto consumer = ofmConn->tensor->Readers().front();
@@ -898,36 +894,46 @@ Operation *GraphIrOptimiser::FuseRescale(Graph *const graph, Operation *const op
             {
                 if ( ifm.second.tensor == ofmConn->tensor )
                 {
+                    auto consumerOfmCon = consumer->Output(TensorUsage::OFM);
                     // This is the input of the next operation that consumes the rescaled values,
                     // check that this input does not already have scaling and that fusing is allowed
                     // by the constraints of the architecture.
                     if ( ifm.second.quantization.EqualScales(Quantization::Unit()) &&
                          _constraints->SupportsFusedRescale(consumer->Type(), TensorUsage::IFM, ifmConn->tensor->Type(),
-                             ofmConn->tensor->Type(), ifmQuant) )
+                             ofmConn->tensor->Type(), ifm.second.tensor->Type(), consumerOfmCon->tensor->Type(), ifmQuant) )
                     {
                         // If the consumer is a binary elementwise make sure that both inputs have
                         // the same data type after fusing.
                         if ( IsBinaryElementwise(consumer->Type()) )
                         {
-                            auto otherIfmCon = consumer->Input(
+                            auto consumerOtherIfmCon = consumer->Input(
                                 ofmConn->tensor.get() == consumer->IFM(0) ? TensorUsage::IFM1 : TensorUsage::IFM0);
+
                             auto otherProducer =
-                                otherIfmCon->tensor->Writers().size() == 1 ? otherIfmCon->tensor->Writers().front() : nullptr;
+                                consumerOtherIfmCon->tensor->Writers().size() == 1 ?
+                                    consumerOtherIfmCon->tensor->Writers().front() :
+                                    nullptr;
+
                             // Both ifms must have same type after fusing (no rescale fused to this input case)
-                            bool sameType = otherIfmCon->tensor->Type() == ifmConn->tensor->Type();
+                            bool sameType = consumerOtherIfmCon->tensor->Type() == ifmConn->tensor->Type();
                             // Is there a Rescale for the other ifm
                             if ( otherProducer && otherProducer->Type() == OpType::Rescale )
                             {
                                 // Check if the other ifm rescale can be fused
+                                auto opSign = otherProducer->Attribute<sign_attr_t>();
+                                bool otherFusedTensorInGraphOutputs = IsTensorInVector(
+                                    graph->Outputs(), consumerOtherIfmCon->tensor.get());
+                                bool otherRescaleUnsigned = opSign ? (opSign->input_unsigned || opSign->output_unsigned) : false;
+
                                 auto otherIfmQuant = otherProducer->Input(TensorUsage::IFM)->quantization;
                                 otherIfmQuant.scales = ConvertedScales(otherProducer->Output(TensorUsage::OFM));
-
-                                if ( otherIfmCon->quantization.EqualScales(Quantization::Unit()) &&
+                                if ( consumerOtherIfmCon->quantization.EqualScales(Quantization::Unit()) && !otherRescaleUnsigned && !otherFusedTensorInGraphOutputs &&
                                      _constraints->SupportsFusedRescale(consumer->Type(), TensorUsage::IFM,
-                                         otherProducer->IFM(0)->Type(), otherProducer->OFM()->Type(), otherIfmQuant) )
+                                         otherProducer->IFM(0)->Type(), otherProducer->OFM()->Type(),
+                                         consumerOtherIfmCon->tensor->Type(), consumerOfmCon->tensor->Type(), otherIfmQuant) )
                                 {
-                                    // and if so that both ifms will have the same type, either when this rescale is
-                                    // fused or not.
+                                    // If other input can be fused, check that both ifms will have the same type,
+                                    // either when this rescale is fused or not.
                                     sameType = sameType || otherProducer->IFM(0)->Type() == ifmConn->tensor->Type();
                                 }
                             }
@@ -954,8 +960,10 @@ Operation *GraphIrOptimiser::FuseRescale(Graph *const graph, Operation *const op
 
         if ( returnOp == operation && producer && producer->Output(TensorUsage::OFM)->quantization.EqualScales(Quantization::Unit()) &&
              ifmConn->quantization.zeroPoints == Quantization::Unit().zeroPoints &&
-             _constraints->SupportsFusedRescale(
-                 producer->Type(), TensorUsage::OFM, producer->IFM(0)->Type(), ofmConn->tensor->Type(), ofmQuant) )
+             // fused tensor cannot be in graph-outputs
+             !IsTensorInVector(graph->Outputs(), ifmConn->tensor.get()) &&
+             _constraints->SupportsFusedRescale(producer->Type(), TensorUsage::OFM, ifmConn->tensor->Type(),
+                 ofmConn->tensor->Type(), producer->IFM(0)->Type(), producer->OFM()->Type(), ofmQuant) )
         {
             // Propagate rescaling to output of previous op
             producer->CopyOutput(TensorUsage::OFM, *ofmConn);
-- 
GitLab