From 6770537febd0a97b8b2f8562aa4ac392d468e972 Mon Sep 17 00:00:00 2001 From: Johan Gunnarsson Date: Wed, 2 Jul 2025 15:01:41 +0200 Subject: [PATCH] MLBEDSW-10899: Handle RESCALE with output unsigned properly Networks with a RESCALE that writes to graph output and has output_unsigend set lost the unsignedness of the output tensor when the ReinterpretCast op was handled. This caused wrong OFM clipping and zero point. This patch changes the following: * Don't add REINTERPRETCAST before/after RESCALE. Instead, keep the RESCALE and its attributes and deal with the unsigned input/output when lowering SchedIR to HLC. * Reset all buffers when cloning IFM. This is important when cloning IFM with the purpose of creating a new intermediate tensor. If the IFM is constant, the cloned tensor will inherit the contant data as well, which is not desired in most cases. Signed-off-by: Johan Gunnarsson Change-Id: Ie0adbdfbee18e92568acb1d1f44637fbdd309e8d --- ethosu/regor/common/data_type.hpp | 5 ++ ethosu/regor/compiler/graphir_optimiser.cpp | 54 +------------------ ethosu/regor/compiler/graphir_optimiser.hpp | 1 - .../high_level_command_stream_generator.cpp | 30 +++++++++++ ethosu/regor/compiler/optimiser_utils.cpp | 3 +- ethosu/regor/compiler/scheduler.cpp | 5 ++ ethosu/regor/compiler/scheduler_decompose.cpp | 6 +++ .../regor/compiler/tflite_graph_optimiser.cpp | 2 + 8 files changed, 52 insertions(+), 54 deletions(-) diff --git a/ethosu/regor/common/data_type.hpp b/ethosu/regor/common/data_type.hpp index bb02cb1c..6c40f13d 100644 --- a/ethosu/regor/common/data_type.hpp +++ b/ethosu/regor/common/data_type.hpp @@ -226,6 +226,11 @@ inline std::string DataTypeToString(const DataType type) return EnumToString(type); } +inline constexpr DataType DataTypeSetSignedness(DataType type, bool setSigned) +{ + return (type & ~unsigned(DataType::Signed)) | (setSigned ? DataType::Signed : DataType::None); +} + inline constexpr bool IsInteger(DataType type) { return (type & DataType::Int) == DataType::Int; diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp index efa5e3c4..2f80e6ae 100644 --- a/ethosu/regor/compiler/graphir_optimiser.cpp +++ b/ethosu/regor/compiler/graphir_optimiser.cpp @@ -99,6 +99,7 @@ Tensor *GraphIrOptimiser::ConvertBool8Tensors(Graph *graph, Tensor *tensor) { // Replace the OFM of ops producing the graph output tensor std::shared_ptr newTensor = tensor->Clone(); + newTensor->SetBuffer(nullptr); newTensor->SetName(newTensor->Name() + "_int8"); std::shared_ptr graphOutputTensor = tensor->shared_from_this(); ReplaceProducerOutput(graphOutputTensor->Writers(), graphOutputTensor.get(), newTensor); @@ -681,58 +682,6 @@ Operation *GraphIrOptimiser::RewriteRescaleInputs(Graph *const, Operation *const return returnOp; } -Operation *GraphIrOptimiser::RemoveRescaleUnsignedAttribute(Graph *const, Operation *const operation) -{ - OpType opType = operation->Type(); - if ( opType == OpType::Rescale ) - { - auto signAttr = operation->Attribute(); - if ( signAttr->input_unsigned ) - { - const auto &ifmConn = operation->Input(TensorUsage::IFM0); - DataType ifmType = ifmConn->tensor->Type(); - auto newIfmType = ifmType & ~unsigned(DataType::Signed); - - // Create a reinterpret OP to reinterpret the input as unsigned - auto reinterpretOp = std::make_shared(OpType::ReinterpretCast); - - // Create an unsigned data type tensor for the reinterpret OP - std::shared_ptr unsignedTensor = ifmConn->tensor->Clone(); - unsignedTensor->ChangeType(newIfmType); - - // Connect the reinterpret OP between the rescale and the rescale IFM - reinterpretOp->CopyInput(TensorUsage::IFM, *ifmConn); - reinterpretOp->ConnectOutput(TensorUsage::OFM, unsignedTensor); - - // Connect the rescale OP input to the unsigned data type tensor - operation->ConnectInput(TensorUsage::IFM, unsignedTensor); - signAttr->input_unsigned = false; - } - if ( signAttr->output_unsigned ) - { - const auto &ofmConn = operation->Output(TensorUsage::OFM); - DataType ofmType = ofmConn->tensor->Type(); - auto newOfmType = ofmType & ~unsigned(DataType::Signed); - - // Create a reinterpret OP to reinterpret the input as unsigned - auto reinterpretOp = std::make_shared(OpType::ReinterpretCast); - - // Create an unsigned data type tensor for the reinterpret OP - std::shared_ptr unsignedTensor = ofmConn->tensor->Clone(); - unsignedTensor->ChangeType(newOfmType); - - // Connect the reinterpret OP between the rescale and the rescale OFM - reinterpretOp->ConnectInput(TensorUsage::IFM, unsignedTensor); - reinterpretOp->CopyOutput(TensorUsage::OFM, *ofmConn); - - // Connect the rescale OP output to the unsigned data type tensor - operation->ConnectOutput(TensorUsage::OFM, unsignedTensor); - signAttr->output_unsigned = false; - } - } - return operation; -} - /* * Lower Rescale into one (or more) 32-bit elementwise MUL operations. * Multipliers are moved to a constant-tensor, while the shift value is keps as ofm-quantization @@ -1723,6 +1672,7 @@ Operation *GraphIrOptimiser::RewriteReduceSum(Graph *const graph, Operation *con // Temporary tensor between ReduceSum and Sub std::shared_ptr reduceSumTens = ofmConn->tensor->Clone(); + reduceSumTens->SetBuffer(nullptr); reduceSumTens->SetName(ofmConn->tensor->Name() + "_reducesum"); reduceSumTens->ChangeType(DataType::Int32); reduceSumTens->SetStorageShape(ofmConn->shape); diff --git a/ethosu/regor/compiler/graphir_optimiser.hpp b/ethosu/regor/compiler/graphir_optimiser.hpp index e36c308e..9b36d2af 100644 --- a/ethosu/regor/compiler/graphir_optimiser.hpp +++ b/ethosu/regor/compiler/graphir_optimiser.hpp @@ -130,7 +130,6 @@ private: { &GraphIrOptimiser::ConvertZeroPointTensors, &GraphIrOptimiser::RewriteRescaleInputs, - &GraphIrOptimiser::RemoveRescaleUnsignedAttribute, &GraphIrOptimiser::FuseRescale, // First pass fuse all possible ifm and ofm rescales } }, diff --git a/ethosu/regor/compiler/high_level_command_stream_generator.cpp b/ethosu/regor/compiler/high_level_command_stream_generator.cpp index b3efcf5c..48900207 100644 --- a/ethosu/regor/compiler/high_level_command_stream_generator.cpp +++ b/ethosu/regor/compiler/high_level_command_stream_generator.cpp @@ -289,6 +289,11 @@ static HLCSubOperation MakeSubOperation(const std::unique_ptrType(); auto lutConn = schedOp->TryInput(TensorUsage::LUT); size_t ifms = 0; + sign_attr_t *signAttr = nullptr; + if ( schedOp->Type() == OpType::Rescale && schedOp->HasAttribute() ) + { + signAttr = schedOp->Attribute(); + } for ( const auto &input : schedOp->inputs.pairs() ) { if ( IsIFM(input.first) || GetUsageType(input.first) == TensorUsage::Scratch ) @@ -308,9 +313,19 @@ static HLCSubOperation MakeSubOperation(const std::unique_ptrdataType = DataTypeSetSignedness(at->dataType, !signAttr->input_unsigned); + } } } MakeFeatureMap(TensorUsage::OFM, schedOp->OFM(), hlcSubOp.ofm); + if ( signAttr ) + { + // Fixup OFM datatype signedness for rescale ops + hlcSubOp.ofm.dataType = DataTypeSetSignedness(hlcSubOp.ofm.dataType, !signAttr->output_unsigned); + } hlcSubOp.srcId = schedOp->Uid(); @@ -340,6 +355,11 @@ static std::shared_ptr MakeOperation(SchedulerOperation *schedOp, op->config = opInfo->Config(); op->srcId = schedOp->Uid(); size_t ifms = 0; + sign_attr_t *signAttr = nullptr; + if ( schedOp->Type() == OpType::Rescale && schedOp->HasAttribute() ) + { + signAttr = schedOp->Attribute(); + } for ( const auto &input : schedOp->inputs.pairs() ) { if ( IsIFM(input.first) || GetUsageType(input.first) == TensorUsage::Scratch ) @@ -358,9 +378,19 @@ static std::shared_ptr MakeOperation(SchedulerOperation *schedOp, at = op->ifm.emplace(op->ifm.end()); } MakeFeatureMap(input.first, &input.second, *at); + if ( signAttr ) + { + // Fixup IFM datatype signedness for rescale ops + at->dataType = DataTypeSetSignedness(at->dataType, !signAttr->input_unsigned); + } } } MakeFeatureMap(TensorUsage::OFM, schedOp->OFM(), op->ofm); + if ( signAttr ) + { + // Fixup OFM datatype signedness for rescale ops + op->ofm.dataType = DataTypeSetSignedness(op->ofm.dataType, !signAttr->output_unsigned); + } #ifndef NDEBUG op->name = schedOp->OFM()->tensor->Name(); diff --git a/ethosu/regor/compiler/optimiser_utils.cpp b/ethosu/regor/compiler/optimiser_utils.cpp index f52fa0e1..bcf90fda 100644 --- a/ethosu/regor/compiler/optimiser_utils.cpp +++ b/ethosu/regor/compiler/optimiser_utils.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -38,6 +38,7 @@ bool IsTensorInVector(const std::vector> &tensorVec, con std::shared_ptr InsertCopyOpAfterTensor(const std::shared_ptr &ifm, const Quantization &quantization) { std::shared_ptr copyTensor = ifm->Clone(); + copyTensor->SetBuffer(nullptr); auto copyOp = std::make_shared(OpType::MemoryCopy); copyOp->ConnectInput(TensorUsage::IFM0, ifm).Set(quantization); auto name = ifm->Name(); diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp index d2b6ca13..77eda599 100644 --- a/ethosu/regor/compiler/scheduler.cpp +++ b/ethosu/regor/compiler/scheduler.cpp @@ -309,6 +309,11 @@ int Scheduler::UpdateSchedulerTensor(TensorUsage usage, SchedulerConnection *con ArchOperatorQuery query; Set(query.ifm[0], consumer->TryIFM(0)); Set(query.ifm[1], consumer->TryIFM(1)); + if ( consumer->Type() == OpType::Rescale && consumer->HasAttribute() ) + { + const auto attr = consumer->Attribute(); + query.ifm[0].type = DataTypeSetSignedness(query.ifm[0].type, !attr->input_unsigned); + } query.transposeMask = consumer->OFM()->transpose; if ( _arch->Constraints()->OperatorQuery(consumer->Type(), &query, &req).Any(QueryResult::Native) ) { diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index 11c549f9..0c43470f 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -39,6 +39,12 @@ Flags OperatorQuery(Architecture *arch, const SchedulerOperation *s Set(query.ifm[0], schedOp->IFM(0)); Set(query.ifm[1], schedOp->TryIFM(1)); Set(query.ofm, ofmConn); + if ( schedOp->Type() == OpType::Rescale && schedOp->HasAttribute() ) + { + const auto attr = schedOp->Attribute(); + query.ifm[0].type = DataTypeSetSignedness(query.ifm[0].type, !attr->input_unsigned); + query.ofm.type = DataTypeSetSignedness(query.ofm.type, !attr->output_unsigned); + } const auto weights = schedOp->TryInput(TensorUsage::Weights); const auto scales = schedOp->TryInput(TensorUsage::Scales); const bool constantWeights = weights && weights->tensor && weights->tensor->IsConstant(); diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp index 9fd7a2a7..fdd716c3 100644 --- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp +++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp @@ -185,6 +185,7 @@ Operation *TFLiteGraphOptimiser::ConvertLeakyRelu16bit(TensorConnection &ifmConn // Create Minimum(IFM, 0) std::shared_ptr zeroTens = CreateConstTensor("zero_const", ifmConn.tensor->Type(), 0); std::shared_ptr fmNegative = ifmConn.tensor->Clone(); + fmNegative->SetBuffer(nullptr); auto minOp = std::make_shared(OpType::Minimum); minOp->CopyInput(TensorUsage::IFM0, ifmConn); minOp->ConnectInput(TensorUsage::IFM1, zeroTens).Set(ifmConn.quantization); @@ -2109,6 +2110,7 @@ Operation *TFLiteGraphOptimiser::ConvertPrelu(Graph *const graph, Operation *con std::shared_ptr zeroTens = CreateConstTensor("zero_const", ifmConn->tensor->Type(), 0); std::shared_ptr fmNegative = ifmConn->tensor->Clone(); + fmNegative->SetBuffer(nullptr); std::shared_ptr fmAlpha = ofmConn->tensor->Clone(); std::shared_ptr fmScaled = ofmConn->tensor->Clone(); -- GitLab