From bd67fb570b447fe3ab02b6db865c281530f4ba8d Mon Sep 17 00:00:00 2001
From: Jacob Bohlin <jacob.bohlin@arm.com>
Date: Tue, 8 Jul 2025 17:33:28 +0100
Subject: [PATCH] MLBEDSW-10948 Fix output diff for TOSA binary elementwise

If the same Rescale output tensor was used as both inputs to a binary
elementwise, the quantization parameters would only be reflected in one
of the IFM tensor connections after fusing the rescale.

Change-Id: I84168abe038012e6210f8fb9d4a020d2a0c16633
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
---
 ethosu/regor/compiler/graphir_optimiser.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp
index 2f80e6ae..1dea160a 100644
--- a/ethosu/regor/compiler/graphir_optimiser.cpp
+++ b/ethosu/regor/compiler/graphir_optimiser.cpp
@@ -1161,7 +1161,7 @@ Operation *GraphIrOptimiser::FuseRescale(Graph *const graph, Operation *const op
             auto ifmQuant = ifmConn->quantization;
             // Normalize scales to shift 0 if possible
             ifmQuant.scales = ConvertedScales(ofmConn);
-
+            bool replaceInput = false;
             for ( auto ifm : consumer->Inputs().pairs() )
             {
                 if ( ifm.second.tensor == ofmConn->tensor )
@@ -1213,13 +1213,19 @@ Operation *GraphIrOptimiser::FuseRescale(Graph *const graph, Operation *const op
                             // avoid performing this fuse operation.
                             if ( !sameType ) break;
                         }
-                        ReplaceConsumerInput(nullptr, ofmConn->tensor->Readers(), ofmConn->tensor.get(), ifmConn->tensor);
                         ifm.second.quantization = ifmQuant;
                         consumer->Input(ifm.first)->Set(ofmConn->rounding);
                         returnOp = consumer.get();
+                        replaceInput = true;
                     }
                 }
             }
+            if ( replaceInput )
+            {
+                // This is done outside the loop to avoid modifying the consumer inputs while they are being iterated
+                // over.
+                ReplaceConsumerInput(nullptr, ofmConn->tensor->Readers(), ofmConn->tensor.get(), ifmConn->tensor);
+            }
         }
         // If the rescale could not be fused to the consumer of the output of the rescale, check if there
         // is only one producer of the input to the rescale operation. If this input has no zero point
-- 
GitLab