From 44fc603f94e6ed941d2827ae409a17985d29a292 Mon Sep 17 00:00:00 2001 From: Jacob Bohlin Date: Mon, 2 Jun 2025 17:17:51 +0100 Subject: [PATCH] MLBEDSW-10719 Refactor GetScaleFactor() Function is architecture agnostic but was only used for Ethos-U85. This commit makes a few refactors: * Refactor to common function to be used by all architectures. * Re-use the function in more places. * Added `reducedPrecision` flag to the function. Change-Id: I8c50b91cfc6790cc767eb30e030d6ba940d5d3d2 Signed-off-by: Jacob Bohlin --- ethosu/regor/architecture/ethos_u_scaling.hpp | 10 +++- .../ethosu55/ethos_u55_scaling.cpp | 34 ++++-------- .../ethos_u85_register_cs_generator.cpp | 2 +- .../ethosu85/ethos_u85_scaling.cpp | 52 ++++++------------- .../ethosu85/ethos_u85_scaling.hpp | 3 +- ethosu/regor/compiler/quantization.hpp | 2 + 6 files changed, 39 insertions(+), 64 deletions(-) diff --git a/ethosu/regor/architecture/ethos_u_scaling.hpp b/ethosu/regor/architecture/ethos_u_scaling.hpp index 0be3db12..42736792 100644 --- a/ethosu/regor/architecture/ethos_u_scaling.hpp +++ b/ethosu/regor/architecture/ethos_u_scaling.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -20,6 +20,7 @@ #include "common/data_type.hpp" #include "common/scaling.hpp" +#include "compiler/high_level_command_stream.hpp" #include "compiler/op_type.hpp" #include "compiler/quantization.hpp" @@ -40,4 +41,11 @@ void SimplifiedElementwiseAddSubScale(double input1Scale, double input2Scale, do Quantization RescalePerChannel(const Quantization &ifmQuant, const Quantization &weightQuant, const Quantization &ofmQuant, const DataType scaleDataType, const DataType ifmDataType, OpType opType); +static inline double GetScaleFactor(HLCOperation *op, bool reducedPrecision = false) +{ + float ifmScale = op->ifm[0].quantization.Scale().Dequantize(); + float ofmScale = op->ofm.quantization.Scale().Dequantize(); + return reducedPrecision ? (ifmScale / ofmScale) : (static_cast(ifmScale) / static_cast(ofmScale)); +} + } // namespace regor diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_scaling.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_scaling.cpp index 4af22546..cca8011a 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_scaling.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_scaling.cpp @@ -39,18 +39,6 @@ void AdvancedElementwiseAddSubScale(double input1Scale, double input2Scale, doub input1Rescale = QuantizedScale(ifm1Rescale); } -float GetScale(const Quantization *quant) -{ - if ( quant != nullptr && quant->scales.size() != 0 ) - { - return float(quant->scales[0].Dequantize()); - } - else - { - return 1.0f; - } -} - } // namespace void RescaleElementwise(HLCOperation *op) @@ -59,6 +47,7 @@ void RescaleElementwise(HLCOperation *op) Quantization *ifm1Quant = &op->ifm[0].quantization; Quantization *ifm2Quant = ifmCnt == 2 ? &op->ifm[1].quantization : nullptr; Quantization *ofmQuant = &op->ofm.quantization; + assert(ifm1Quant && ofmQuant); if ( ifm1Quant->type == QuantizationType::EXPLICIT && ofmQuant->type == QuantizationType::EXPLICIT && (ifm2Quant == nullptr || ifm2Quant->type == QuantizationType::EXPLICIT) ) @@ -68,9 +57,9 @@ void RescaleElementwise(HLCOperation *op) QuantizedScale outScale(1, 0); - double ifm1Scale = GetScale(ifm1Quant); - double ifm2Scale = GetScale(ifm2Quant); - double ofmScale = GetScale(ofmQuant); + double ifm1Scale = ifm1Quant->Scale().Dequantize(); + double ifm2Scale = ifm2Quant ? ifm2Quant->Scale().Dequantize() : 1.0; + double ofmScale = ofmQuant->Scale().Dequantize(); DataType ifmDataType = op->ifm[0].dataType; OpType opType = op->type; @@ -187,6 +176,7 @@ void RescalePooling(HLCOperation *op, bool isNoOp) Quantization *ifm1Quant = &op->ifm[0].quantization; Quantization *ofmQuant = &op->ofm.quantization; + assert(ifm1Quant && ofmQuant); uint32_t scale = 1; int shift = 0; DataType ifmDataType = op->ifm[0].dataType; @@ -202,8 +192,8 @@ void RescalePooling(HLCOperation *op, bool isNoOp) if ( !ifm1Quant->scales.empty() && !ofmQuant->scales.empty() ) { - double ifmScale = GetScale(ifm1Quant); - double ofmScale = GetScale(ofmQuant); + double ifmScale = ifm1Quant->Scale().Dequantize(); + double ofmScale = ofmQuant->Scale().Dequantize(); auto actType = op->subOps.empty() ? opType : op->subOps[0].type; if ( actType == OpType::Sigmoid || actType == OpType::Tanh ) { @@ -240,29 +230,27 @@ void RescalePooling(HLCOperation *op, bool isNoOp) } else if ( opType == OpType::MemoryCopy ) { - double rescale = ifmScale / ofmScale; // In case of concat or other memory operation, rescaling might be needed. // The scale is maximised, to get maximum precision - QuantizePoolingScaleMaxPrecision(op->kernel.ElementsWH(), rescale, scale, shift, 32); + QuantizePoolingScaleMaxPrecision(op->kernel.ElementsWH(), GetScaleFactor(op), scale, shift, 32); } else if ( opType == OpType::Quantize ) { // Quantize operations need double-precision scaling - QuantizedScale quantScale(ifmScale / ofmScale); + QuantizedScale quantScale(GetScaleFactor(op)); scale = uint32_t(quantScale.scale); shift = quantScale.shift; } else if ( isNoOp ) { - QuantizedScale quantScale(float(ifmScale) / float(ofmScale)); + QuantizedScale quantScale(GetScaleFactor(op, /* reducedPrecision */ true)); scale = uint32_t(quantScale.scale); shift = quantScale.shift; } else { // Normal pooling operation, without need for special scaling - double rescale = ifmScale / ofmScale; - QuantizePoolingScale(op->kernel.ElementsWH(), rescale, 0, scale, shift, 32); + QuantizePoolingScale(op->kernel.ElementsWH(), GetScaleFactor(op), 0, scale, shift, 32); } } ofmQuant->scales.clear(); diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp index 607eac86..e0ae155e 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp @@ -696,7 +696,7 @@ void EthosU85RCSGenerator::GenerateOFMScalingForPooling(HLCOperation *poolOp, bo { uint32_t scale = 1; int shift = 0; - QuantizePoolingScale(poolOp->kernel.ElementsWH(), ethosU85Scaling::GetScaleFactor(poolOp), 0, scale, shift, 31); + QuantizePoolingScale(poolOp->kernel.ElementsWH(), GetScaleFactor(poolOp), 0, scale, shift, 31); ofmScale = QuantizedScale(int32_t(scale), shift); } else if ( poolOp->type == OpType::ArgMax && useGlobalScale ) diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.cpp index cffe836a..f24e50e6 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.cpp @@ -27,6 +27,7 @@ namespace regor::ethosU85Scaling { + namespace { void AdvancedElementwiseAddSubScale(double input1Scale, double input2Scale, double outputScale, int bitDepth, @@ -39,37 +40,15 @@ void AdvancedElementwiseAddSubScale(double input1Scale, double input2Scale, doub input1Rescale = QuantizedScale(ifm1Rescale); input2Rescale = QuantizedScale(ifm2Rescale); } - -float GetScale(const Quantization *quant) -{ - if ( quant != nullptr && quant->scales.size() != 0 ) - { - // Use single precision to match reference - return float(quant->scales[0].Dequantize()); - } - else - { - return 1.0f; - } -} - } // namespace -double GetScaleFactor(HLCOperation *op) -{ - Quantization *ifmQuant = &op->ifm[0].quantization; - Quantization *ofmQuant = &op->ofm.quantization; - double ifmScale = GetScale(ifmQuant); - double ofmScale = GetScale(ofmQuant); - return ifmScale / ofmScale; -} - void RescaleConvolution(HLCOperation *op) { int ifmCnt = int(op->ifm.size()); Quantization *ifm1Quant = &op->ifm[0].quantization; Quantization *ifm2Quant = ifmCnt == 2 ? &op->ifm[1].quantization : nullptr; Quantization *ofmQuant = &op->ofm.quantization; + assert(ifm1Quant && ofmQuant); if ( ofmQuant->type == QuantizationType::EXPLICIT ) { @@ -78,9 +57,9 @@ void RescaleConvolution(HLCOperation *op) QuantizedScale outScale(1, 0); - double ifm1Scale = GetScale(ifm1Quant); - double ifm2Scale = GetScale(ifm2Quant); - double ofmScale = GetScale(ofmQuant); + double ifm1Scale = ifm1Quant->Scale().Dequantize(); + double ifm2Scale = ifm2Quant ? ifm2Quant->Scale().Dequantize() : 1.0; + double ofmScale = ofmQuant->Scale().Dequantize(); DataType ifmDataType = op->ifm[0].dataType; OpType opType = op->type; @@ -114,6 +93,7 @@ void RescaleElementwise(HLCOperation *op) Quantization *ifm1Quant = &op->ifm[0].quantization; Quantization *ifm2Quant = ifmCnt == 2 ? &op->ifm[1].quantization : nullptr; Quantization *ofmQuant = &op->ofm.quantization; + assert(ifm1Quant && ofmQuant); if ( ifm1Quant->type == QuantizationType::EXPLICIT && ofmQuant->type == QuantizationType::EXPLICIT && (ifm2Quant == nullptr || ifm2Quant->type == QuantizationType::EXPLICIT) ) @@ -125,9 +105,9 @@ void RescaleElementwise(HLCOperation *op) QuantizedScale input2Scale(1, 0); QuantizedScale outScale(1, 0); - double ifm1Scale = GetScale(ifm1Quant); - double ifm2Scale = GetScale(ifm2Quant); - double ofmScale = GetScale(ofmQuant); + double ifm1Scale = ifm1Quant->Scale().Dequantize(); + double ifm2Scale = ifm2Quant ? ifm2Quant->Scale().Dequantize() : 1.0; + double ofmScale = ofmQuant->Scale().Dequantize(); DataType ifmDataType = op->ifm[0].dataType; OpType opType = op->type; @@ -203,6 +183,7 @@ void RescalePooling(HLCOperation *op, bool isNoOp) { Quantization *ifm1Quant = &op->ifm[0].quantization; Quantization *ofmQuant = &op->ofm.quantization; + assert(ifm1Quant && ofmQuant); uint32_t scale = 1; int shift = 0; DataType ifmDataType = op->ifm[0].dataType; @@ -221,10 +202,9 @@ void RescalePooling(HLCOperation *op, bool isNoOp) } else if ( !ifm1Quant->scales.empty() && !ofmQuant->scales.empty() ) { - double ifmScale = GetScale(ifm1Quant); - double ofmScale = GetScale(ofmQuant); if ( opType == OpType::Sigmoid || opType == OpType::Tanh ) { + double ifmScale = ifm1Quant->Scale().Dequantize(); assert(ifmDataType == DataType::Int16); double rescale = 0x3000 * ifmScale; // Calculate scale and shift for the output scale of 1/(3*4096) @@ -252,29 +232,27 @@ void RescalePooling(HLCOperation *op, bool isNoOp) } else if ( opType == OpType::MemoryCopy ) { - double rescale = ifmScale / ofmScale; // In the case of concat or other memory operation, rescaling might be needed. // The scale is maximised, to get maximum precision - QuantizePoolingScaleMaxPrecision(op->kernel.ElementsWH(), rescale, scale, shift, 31); + QuantizePoolingScaleMaxPrecision(op->kernel.ElementsWH(), GetScaleFactor(op), scale, shift, 31); } else if ( opType == OpType::Quantize ) { // Quantize operations need double-precision scaling - QuantizedScale quantScale(ifmScale / ofmScale); + QuantizedScale quantScale(GetScaleFactor(op)); scale = uint32_t(quantScale.scale); shift = quantScale.shift; } else if ( isNoOp ) { - QuantizedScale quantScale(float(ifmScale) / float(ofmScale)); + QuantizedScale quantScale(GetScaleFactor(op, /* reducedPrecision */ true)); scale = uint32_t(quantScale.scale); shift = quantScale.shift; } else { // Normal pooling operation, without need for special scaling - double rescale = ifmScale / ofmScale; - QuantizePoolingScale(op->kernel.ElementsWH(), rescale, 0, scale, shift, 31); + QuantizePoolingScale(op->kernel.ElementsWH(), GetScaleFactor(op), 0, scale, shift, 31); } } ofmQuant->scales.clear(); diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.hpp index 5125a020..ff906455 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.hpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_scaling.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -30,6 +30,5 @@ namespace ethosU85Scaling void RescalePooling(HLCOperation *op, bool isNoOp); void RescaleConvolution(HLCOperation *op); void RescaleElementwise(HLCOperation *op); -double GetScaleFactor(HLCOperation *op); } // namespace ethosU85Scaling } // namespace regor diff --git a/ethosu/regor/compiler/quantization.hpp b/ethosu/regor/compiler/quantization.hpp index 19c8caf4..7612de33 100644 --- a/ethosu/regor/compiler/quantization.hpp +++ b/ethosu/regor/compiler/quantization.hpp @@ -87,6 +87,8 @@ public: } return *this; } + + const QuantizedScale &Scale() const { return scales.empty() ? QuantizedScale::Unit() : scales.front(); } }; inline int64_t Quantize(float value, const Quantization &quant) -- GitLab