From 44fc603f94e6ed941d2827ae409a17985d29a292 Mon Sep 17 00:00:00 2001
From: Jacob Bohlin <jacob.bohlin@arm.com>
Date: Mon, 2 Jun 2025 17:17:51 +0100
Subject: [PATCH] MLBEDSW-10719 Refactor GetScaleFactor()

Function is architecture agnostic but was only used for Ethos-U85.
This commit makes a few refactors:
* Refactor to common function to be used by all architectures.
* Re-use the function in more places.
* Added `reducedPrecision` flag to the function.

Change-Id: I8c50b91cfc6790cc767eb30e030d6ba940d5d3d2
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
---
 ethosu/regor/architecture/ethos_u_scaling.hpp | 10 +++-
 .../ethosu55/ethos_u55_scaling.cpp            | 34 ++++--------
 .../ethos_u85_register_cs_generator.cpp       |  2 +-
 .../ethosu85/ethos_u85_scaling.cpp            | 52 ++++++-------------
 .../ethosu85/ethos_u85_scaling.hpp            |  3 +-
 ethosu/regor/compiler/quantization.hpp        |  2 +
 6 files changed, 39 insertions(+), 64 deletions(-)
diff --git a/ethosu/regor/architecture/ethos_u_scaling.hpp b/ethosu/regor/architecture/ethos_u_scaling.hpp
index 0be3db12..42736792 100644
--- a/ethosu/regor/architecture/ethos_u_scaling.hpp
+++ b/ethosu/regor/architecture/ethos_u_scaling.hpp
@@ -1,5 +1,5 @@
 //
-// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -20,6 +20,7 @@
 
 #include "common/data_type.hpp"
 #include "common/scaling.hpp"
+#include "compiler/high_level_command_stream.hpp"
 #include "compiler/op_type.hpp"
 #include "compiler/quantization.hpp"
 
@@ -40,4 +41,11 @@ void SimplifiedElementwiseAddSubScale(double input1Scale, double input2Scale, do
 Quantization RescalePerChannel(const Quantization &ifmQuant, const Quantization &weightQuant,
     const Quantization &ofmQuant, const DataType scaleDataType, const DataType ifmDataType, OpType opType);
 
+static inline double GetScaleFactor(HLCOperation *op, bool reducedPrecision = false)
+{
+    float ifmScale = op->ifm[0].quantization.Scale().Dequantize();
+    float ofmScale = op->ofm.quantization.Scale().Dequantize();
+    return reducedPrecision ? (ifmScale / ofmScale) : (static_cast<double>(ifmScale) / static_cast<double>(ofmScale));
+}
+
 }  // namespace regor
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_scaling.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_scaling.cpp
index 4af22546..cca8011a 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55_scaling.cpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55_scaling.cpp
@@ -39,18 +39,6 @@ void AdvancedElementwiseAddSubScale(double input1Scale, double input2Scale, doub
     input1Rescale = QuantizedScale(ifm1Rescale);
 }
 
-float GetScale(const Quantization *quant)
-{
-    if ( quant != nullptr && quant->scales.size() != 0 )
-    {
-        return float(quant->scales[0].Dequantize());
-    }
-    else
-    {
-        return 1.0f;
-    }
-}
-
 }  // namespace
 
 void RescaleElementwise(HLCOperation *op)
@@ -59,6 +47,7 @@ void RescaleElementwise(HLCOperation *op)
     Quantization *ifm1Quant = &op->ifm[0].quantization;
     Quantization *ifm2Quant = ifmCnt == 2 ? &op->ifm[1].quantization : nullptr;
     Quantization *ofmQuant = &op->ofm.quantization;
+    assert(ifm1Quant && ofmQuant);
 
     if ( ifm1Quant->type == QuantizationType::EXPLICIT && ofmQuant->type == QuantizationType::EXPLICIT &&
          (ifm2Quant == nullptr || ifm2Quant->type == QuantizationType::EXPLICIT) )
@@ -68,9 +57,9 @@ void RescaleElementwise(HLCOperation *op)
 
     QuantizedScale outScale(1, 0);
 
-    double ifm1Scale = GetScale(ifm1Quant);
-    double ifm2Scale = GetScale(ifm2Quant);
-    double ofmScale = GetScale(ofmQuant);
+    double ifm1Scale = ifm1Quant->Scale().Dequantize();
+    double ifm2Scale = ifm2Quant ? ifm2Quant->Scale().Dequantize() : 1.0;
+    double ofmScale = ofmQuant->Scale().Dequantize();
 
     DataType ifmDataType = op->ifm[0].dataType;
     OpType opType = op->type;
@@ -187,6 +176,7 @@ void RescalePooling(HLCOperation *op, bool isNoOp)
 
     Quantization *ifm1Quant = &op->ifm[0].quantization;
     Quantization *ofmQuant = &op->ofm.quantization;
+    assert(ifm1Quant && ofmQuant);
     uint32_t scale = 1;
     int shift = 0;
     DataType ifmDataType = op->ifm[0].dataType;
@@ -202,8 +192,8 @@ void RescalePooling(HLCOperation *op, bool isNoOp)
 
     if ( !ifm1Quant->scales.empty() && !ofmQuant->scales.empty() )
     {
-        double ifmScale = GetScale(ifm1Quant);
-        double ofmScale = GetScale(ofmQuant);
+        double ifmScale = ifm1Quant->Scale().Dequantize();
+        double ofmScale = ofmQuant->Scale().Dequantize();
         auto actType = op->subOps.empty() ? opType : op->subOps[0].type;
         if ( actType == OpType::Sigmoid || actType == OpType::Tanh )
         {
@@ -240,29 +230,27 @@ void RescalePooling(HLCOperation *op, bool isNoOp)
         }
         else if ( opType == OpType::MemoryCopy )
         {
-            double rescale = ifmScale / ofmScale;
             // In case of concat or other memory operation, rescaling might be needed.
             // The scale is maximised, to get maximum precision
-            QuantizePoolingScaleMaxPrecision(op->kernel.ElementsWH(), rescale, scale, shift, 32);
+            QuantizePoolingScaleMaxPrecision(op->kernel.ElementsWH(), GetScaleFactor(op), scale, shift, 32);
         }
         else if ( opType == OpType::Quantize )
         {
             // Quantize operations need double-precision scaling
-            QuantizedScale quantScale(ifmScale / ofmScale);
+            QuantizedScale quantScale(GetScaleFactor(op));
             scale = uint32_t(quantScale.scale);
             shift = quantScale.shift;
         }
         else if ( isNoOp )
         {
-            QuantizedScale quantScale(float(ifmScale) / float(ofmScale));
+            QuantizedScale quantScale(GetScaleFactor(op, /* reducedPrecision */ true));
             scale = uint32_t(quantScale.scale);
             shift = quantScale.shift;
         }
         else
         {
             // Normal pooling operation, without need for special scaling
-            double rescale = ifmScale / ofmScale;
-            QuantizePoolingScale(op->kernel.ElementsWH(), rescale, 0, scale, shift, 32);
+            QuantizePoolingScale(op->kernel.ElementsWH(), GetScaleFactor(op), 0, scale, shift, 32);
         }
     }
     ofmQuant->scales.clear();
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp
index 607eac86..e0ae155e 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp
@@ -696,7 +696,7 @@ void EthosU85RCSGenerator::GenerateOFMScalingForPooling(HLCOperation *poolOp, bo
     {
         uint32_t scale = 1;
         int shift = 0;
-        QuantizePoolingScale(poolOp->kernel.ElementsWH(), ethosU85Scaling::GetScaleFactor(poolOp), 0, scale, shift, 31);
+        QuantizePoolingScale(poolOp->kernel.ElementsWH(), GetScaleFactor(poolOp), 0, scale, shift, 31);
         ofmScale = QuantizedScale(int32_t(scale), shift);
     }
     else if ( poolOp->type == OpType::ArgMax && useGlobalScale )
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.cpp
index cffe836a..f24e50e6 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.cpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.cpp
@@ -27,6 +27,7 @@
 
 namespace regor::ethosU85Scaling
 {
+
 namespace
 {
 void AdvancedElementwiseAddSubScale(double input1Scale, double input2Scale, double outputScale, int bitDepth,
@@ -39,37 +40,15 @@ void AdvancedElementwiseAddSubScale(double input1Scale, double input2Scale, doub
     input1Rescale = QuantizedScale(ifm1Rescale);
     input2Rescale = QuantizedScale(ifm2Rescale);
 }
-
-float GetScale(const Quantization *quant)
-{
-    if ( quant != nullptr && quant->scales.size() != 0 )
-    {
-        // Use single precision to match reference
-        return float(quant->scales[0].Dequantize());
-    }
-    else
-    {
-        return 1.0f;
-    }
-}
-
 }  // namespace
 
-double GetScaleFactor(HLCOperation *op)
-{
-    Quantization *ifmQuant = &op->ifm[0].quantization;
-    Quantization *ofmQuant = &op->ofm.quantization;
-    double ifmScale = GetScale(ifmQuant);
-    double ofmScale = GetScale(ofmQuant);
-    return ifmScale / ofmScale;
-}
-
 void RescaleConvolution(HLCOperation *op)
 {
     int ifmCnt = int(op->ifm.size());
     Quantization *ifm1Quant = &op->ifm[0].quantization;
     Quantization *ifm2Quant = ifmCnt == 2 ? &op->ifm[1].quantization : nullptr;
     Quantization *ofmQuant = &op->ofm.quantization;
+    assert(ifm1Quant && ofmQuant);
 
     if ( ofmQuant->type == QuantizationType::EXPLICIT )
     {
@@ -78,9 +57,9 @@ void RescaleConvolution(HLCOperation *op)
 
     QuantizedScale outScale(1, 0);
 
-    double ifm1Scale = GetScale(ifm1Quant);
-    double ifm2Scale = GetScale(ifm2Quant);
-    double ofmScale = GetScale(ofmQuant);
+    double ifm1Scale = ifm1Quant->Scale().Dequantize();
+    double ifm2Scale = ifm2Quant ? ifm2Quant->Scale().Dequantize() : 1.0;
+    double ofmScale = ofmQuant->Scale().Dequantize();
 
     DataType ifmDataType = op->ifm[0].dataType;
     OpType opType = op->type;
@@ -114,6 +93,7 @@ void RescaleElementwise(HLCOperation *op)
     Quantization *ifm1Quant = &op->ifm[0].quantization;
     Quantization *ifm2Quant = ifmCnt == 2 ? &op->ifm[1].quantization : nullptr;
     Quantization *ofmQuant = &op->ofm.quantization;
+    assert(ifm1Quant && ofmQuant);
 
     if ( ifm1Quant->type == QuantizationType::EXPLICIT && ofmQuant->type == QuantizationType::EXPLICIT &&
          (ifm2Quant == nullptr || ifm2Quant->type == QuantizationType::EXPLICIT) )
@@ -125,9 +105,9 @@ void RescaleElementwise(HLCOperation *op)
     QuantizedScale input2Scale(1, 0);
     QuantizedScale outScale(1, 0);
 
-    double ifm1Scale = GetScale(ifm1Quant);
-    double ifm2Scale = GetScale(ifm2Quant);
-    double ofmScale = GetScale(ofmQuant);
+    double ifm1Scale = ifm1Quant->Scale().Dequantize();
+    double ifm2Scale = ifm2Quant ? ifm2Quant->Scale().Dequantize() : 1.0;
+    double ofmScale = ofmQuant->Scale().Dequantize();
 
     DataType ifmDataType = op->ifm[0].dataType;
     OpType opType = op->type;
@@ -203,6 +183,7 @@ void RescalePooling(HLCOperation *op, bool isNoOp)
 {
     Quantization *ifm1Quant = &op->ifm[0].quantization;
     Quantization *ofmQuant = &op->ofm.quantization;
+    assert(ifm1Quant && ofmQuant);
     uint32_t scale = 1;
     int shift = 0;
     DataType ifmDataType = op->ifm[0].dataType;
@@ -221,10 +202,9 @@ void RescalePooling(HLCOperation *op, bool isNoOp)
     }
     else if ( !ifm1Quant->scales.empty() && !ofmQuant->scales.empty() )
     {
-        double ifmScale = GetScale(ifm1Quant);
-        double ofmScale = GetScale(ofmQuant);
         if ( opType == OpType::Sigmoid || opType == OpType::Tanh )
         {
+            double ifmScale = ifm1Quant->Scale().Dequantize();
             assert(ifmDataType == DataType::Int16);
             double rescale = 0x3000 * ifmScale;
             // Calculate scale and shift for the output scale of 1/(3*4096)
@@ -252,29 +232,27 @@ void RescalePooling(HLCOperation *op, bool isNoOp)
         }
         else if ( opType == OpType::MemoryCopy )
         {
-            double rescale = ifmScale / ofmScale;
             // In the case of concat or other memory operation, rescaling might be needed.
             // The scale is maximised, to get maximum precision
-            QuantizePoolingScaleMaxPrecision(op->kernel.ElementsWH(), rescale, scale, shift, 31);
+            QuantizePoolingScaleMaxPrecision(op->kernel.ElementsWH(), GetScaleFactor(op), scale, shift, 31);
         }
         else if ( opType == OpType::Quantize )
         {
             // Quantize operations need double-precision scaling
-            QuantizedScale quantScale(ifmScale / ofmScale);
+            QuantizedScale quantScale(GetScaleFactor(op));
             scale = uint32_t(quantScale.scale);
             shift = quantScale.shift;
         }
         else if ( isNoOp )
         {
-            QuantizedScale quantScale(float(ifmScale) / float(ofmScale));
+            QuantizedScale quantScale(GetScaleFactor(op, /* reducedPrecision */ true));
             scale = uint32_t(quantScale.scale);
             shift = quantScale.shift;
         }
         else
         {
             // Normal pooling operation, without need for special scaling
-            double rescale = ifmScale / ofmScale;
-            QuantizePoolingScale(op->kernel.ElementsWH(), rescale, 0, scale, shift, 31);
+            QuantizePoolingScale(op->kernel.ElementsWH(), GetScaleFactor(op), 0, scale, shift, 31);
         }
     }
     ofmQuant->scales.clear();
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.hpp
index 5125a020..ff906455 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.hpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.hpp
@@ -1,5 +1,5 @@
 //
-// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -30,6 +30,5 @@ namespace ethosU85Scaling
 void RescalePooling(HLCOperation *op, bool isNoOp);
 void RescaleConvolution(HLCOperation *op);
 void RescaleElementwise(HLCOperation *op);
-double GetScaleFactor(HLCOperation *op);
 }  // namespace ethosU85Scaling
 }  // namespace regor
diff --git a/ethosu/regor/compiler/quantization.hpp b/ethosu/regor/compiler/quantization.hpp
index 19c8caf4..7612de33 100644
--- a/ethosu/regor/compiler/quantization.hpp
+++ b/ethosu/regor/compiler/quantization.hpp
@@ -87,6 +87,8 @@ public:
         }
         return *this;
     }
+
+    const QuantizedScale &Scale() const { return scales.empty() ? QuantizedScale::Unit() : scales.front(); }
 };
 
 inline int64_t Quantize(float value, const Quantization &quant)
-- 
GitLab