From 3f5c91046a59a94d87d30ba415098bb2790664a8 Mon Sep 17 00:00:00 2001 From: Philip Hall Date: Mon, 27 Jan 2025 12:22:48 +0000 Subject: [PATCH] MLBEDSW-10106: Fix bitflags casting issue. An obscure cast issue occured during another implementation where attempting to cast the bitflags object causes it to take the implicit boolean value, rather than the explicit unsigned cast. - This commit prevents such casts by removing the implicit boolean cast. - Added specific flag-test operator, resulting in an actual boolean value. - Allowed double-negation semantics (!!) to check for non-zero flags. Signed-off-by: Philip Hall Change-Id: Ide581e840a0c848be68bbc3249518ab901ce480b --- ethosu/regor/architecture/architecture.hpp | 4 ++-- .../ethosu85/ethos_u85_performance.cpp | 7 +++---- .../ethos_u85_register_cs_generator.cpp | 4 ++-- .../ethosu85/ethos_u85_weight_encoder.cpp | 10 +++++----- ethosu/regor/common/bit_flags.hpp | 6 ++++-- ethosu/regor/compiler/scheduler.cpp | 20 +++++++++---------- .../regor/tflite/custom_operator_ethosu.hpp | 4 ++-- 7 files changed, 28 insertions(+), 27 deletions(-) diff --git a/ethosu/regor/architecture/architecture.hpp b/ethosu/regor/architecture/architecture.hpp index 1b6a8a1b..7546e762 100644 --- a/ethosu/regor/architecture/architecture.hpp +++ b/ethosu/regor/architecture/architecture.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -119,7 +119,7 @@ struct MemArea struct hash { - size_t operator()(const MemArea &memArea) const { return size_t(memArea.memory) | size_t(memArea.usage); } + size_t operator()(const MemArea &memArea) const { return size_t(memArea.memory) | unsigned(memArea.usage); } }; }; diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp index a6785cf1..302f70b5 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -595,9 +595,8 @@ ElementAccess EthosU85Performance::ElementTransferToBytes(const PerformanceQuery int64_t EthosU85Performance::WeightDecodeCycles( const PerformanceQuery &, const WeightStats &weights, Flags format, ArchitectureMemory *weightsMemory) { - using WF = Flags; int weightsPerCycle; - if ( format & WF(WeightFormat::Fast) ) + if ( format % WeightFormat::Fast ) { weightsPerCycle = (weights.distinctWeights < 16) ? 64 : 32; } @@ -617,7 +616,7 @@ int64_t EthosU85Performance::WeightDecodeCycles( _nextId = -1; } - MemChannel channel = (format & WeightFormat::Fast) ? MemChannel::FastWeight : MemChannel::Weight; + MemChannel channel = (format % WeightFormat::Fast) ? MemChannel::FastWeight : MemChannel::Weight; int64_t dmaCycles = int64_t(float(weights.encodedSize) / ChannelBW(weightsMemory, channel)); dmaCycles += weightsMemory->ReadLatency(); return std::max(decodeCycles, dmaCycles); diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp index 8aa3501f..9412c0b0 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp @@ -1336,8 +1336,8 @@ void EthosU85RCSGenerator::GenerateWeights(const HLCStripe *stripe, MemoryAccess EthosU85OpConfig *config = static_cast(stripe->operation->config); - auto wgtFormat = (weights->format & WeightFormat::Fast) ? weight_format::FWD : weight_format::SWD; - auto wgtSparsity = (weights->format & WeightFormat::Sparse2_4) ? weight_sparsity::SPARSE_2_4 : weight_sparsity::NONE; + auto wgtFormat = (weights->format % WeightFormat::Fast) ? weight_format::FWD : weight_format::SWD; + auto wgtSparsity = (weights->format % WeightFormat::Sparse2_4) ? weight_sparsity::SPARSE_2_4 : weight_sparsity::NONE; Emit(isa::npu_set_weight_format_t(wgtFormat, wgtSparsity)); int depth = stripe->weightRangeDepth; diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_weight_encoder.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_weight_encoder.cpp index 16bd0719..6357a29b 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_weight_encoder.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_weight_encoder.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -119,7 +119,7 @@ std::unique_ptr EthosU85WeightEncoder::GetEncodingConfig( int EthosU85WeightEncoder::StreamsRequired(IWeightEncodingConfig *config, const Shape & /*weightShape*/, int &scaleStreamsRequired) { scaleStreamsRequired = 1; - return config->Format() & WeightFormat::Fast ? 1 : _arch->_cores; + return config->Format() % WeightFormat::Fast ? 1 : _arch->_cores; } static int EncodeBias32(int64_t bias, int32_t scale, int shift, uint8_t data[10]) @@ -588,8 +588,8 @@ std::unique_ptr EthosU85WeightEncoder::GetWeightSource( int ofmUBlockDepth = cfg->ofmUBlock.Depth(); int ifmBitDepth = DataTypeSizeBits(cfg->ifmType); - bool isFast = cfg->Format() & WeightFormat::Fast; - bool isSparse = cfg->Format() & WeightFormat::Sparse2_4; + bool isFast = cfg->Format() % WeightFormat::Fast; + bool isSparse = cfg->Format() % WeightFormat::Sparse2_4; if ( weightType == DataType::UInt8 ) { @@ -757,7 +757,7 @@ Quantization EthosU85WeightEncoder::MakeExplicit(const Quantization &ifmQ, const WeightsInfo EthosU85WeightEncoder::EncodeWeights(IWeightEncodingConfig *config, IWeightSource *source, std::vector &result) { EthosUEncodingConfig *cfg = static_cast(config); - auto fn = (cfg->Format() & WeightFormat::Fast) ? mle_encode_fwd_proxy : mle_encode_proxy; + auto fn = (cfg->Format() % WeightFormat::Fast) ? mle_encode_fwd_proxy : mle_encode_proxy; unsigned flags = MLW_ENCODE_FLAG_NONE; if ( cfg->Format().All(WeightFormat::Fast, WeightFormat::Sparse2_4) ) flags |= MLW_ENCODE_NO_PALETTE_LUT; auto res = fn(source, 128 * 1024, result, flags); diff --git a/ethosu/regor/common/bit_flags.hpp b/ethosu/regor/common/bit_flags.hpp index 435c7ef0..dce06ac6 100644 --- a/ethosu/regor/common/bit_flags.hpp +++ b/ethosu/regor/common/bit_flags.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021, 2023-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021, 2023-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -74,7 +74,6 @@ public: bool operator==(ENUM val) const { return _raw == TYPE(val); } bool operator!=(ENUM val) const { return _raw != TYPE(val); } bool operator<(ENUM val) const { return _raw < TYPE(val); } - operator bool() const { return _raw != 0; } operator ENUM() const { return ENUM(_raw); } explicit operator unsigned() const { return unsigned(_raw); } @@ -123,6 +122,9 @@ public: Flags operator|(ENUM val) const { return Flags(ENUM(_raw | TYPE(val))); } Flags operator^(ENUM val) const { return Flags(ENUM(_raw ^ TYPE(val))); } Flags operator~() const { return Flags(ENUM(~_raw)); } + // Flag presence check. Use as if an infix replacement for '&' for testing flags. + bool operator%(ENUM val) const { return (_raw & TYPE(val)) != 0; } + bool operator!() const { return _raw == 0; } // Extract non-bitfield item unsigned GetUInt(ENUM offset, int bits) { return (_raw >> int(offset)) & ((1u << bits) - 1u); } diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp index 8d1e181d..214ba7cc 100644 --- a/ethosu/regor/compiler/scheduler.cpp +++ b/ethosu/regor/compiler/scheduler.cpp @@ -500,7 +500,7 @@ std::unique_ptr MaybeGetSparsityConfig(regor::Architecture { using WF = Flags; std::unique_ptr blockConfigSparse; - if ( supportedFormat & WF(WeightFormat::Sparse2_4) ) + if ( supportedFormat % WeightFormat::Sparse2_4 ) { blockConfigSparse = GetOpConfig(arch, op, ifmShape, ifm2Shape, ofmShape, WF(WeightFormat::Default, WeightFormat::Sparse2_4)); } @@ -524,12 +524,12 @@ WeightScaleEncoding Scheduler::EncodeBestWeightFormat( auto perfSparse = EstimateOpPerformanceForSparsity(op, blockConfigSparse.get(), op->OFM()->SliceShape().Depth()); if ( perfSparse.opCycles > perfDefault.opCycles ) { - supportedFormats &= ~WF(WeightFormat::Sparse2_4); + supportedFormats.Unset(WeightFormat::Sparse2_4); } } - else if ( supportedFormats & WeightFormat::Sparse2_4 ) + else if ( supportedFormats % WeightFormat::Sparse2_4 ) { // No block config available for sparse 2_4, so disable. - supportedFormats &= ~WF(WeightFormat::Sparse2_4); + supportedFormats.Unset(WeightFormat::Sparse2_4); } std::vector encodingResults; @@ -546,9 +546,9 @@ WeightScaleEncoding Scheduler::EncodeBestWeightFormat( for ( auto weightFormat : formatList ) { if ( (weightFormat & supportedFormats) != weightFormat ) continue; - bool checkFastDecoder = !(weightFormat & WF(WeightFormat::Fast)) && (supportedFormats & WF(WeightFormat::Fast)); + bool checkFastDecoder = !(weightFormat % WeightFormat::Fast) && (supportedFormats % WeightFormat::Fast); - auto *blockConfig = (weightFormat & WF(WeightFormat::Sparse2_4)) ? blockConfigSparse.get() : blockConfigDefault.get(); + auto *blockConfig = (weightFormat % WeightFormat::Sparse2_4) ? blockConfigSparse.get() : blockConfigDefault.get(); if ( !blockConfig ) { throw std::runtime_error("Failed to find block configuration\n"); @@ -565,15 +565,15 @@ WeightScaleEncoding Scheduler::EncodeBestWeightFormat( if ( checkFastDecoder && !UseFastDecoder(_arch, op, _options.optimizationStrategy, encoding.weightScales.npuWeightsTensor.get()) ) { - supportedFormats &= ~WF(WeightFormat::Fast); + supportedFormats.Unset(WeightFormat::Fast); } encodingResults.emplace_back(std::move(encoding)); } catch ( const WeightEncodeException & ) { - if ( weightFormat & WF(WeightFormat::Sparse2_4) ) + if ( weightFormat % WeightFormat::Sparse2_4 ) { - supportedFormats &= ~WF(WeightFormat::Sparse2_4); + supportedFormats.Unset(WeightFormat::Sparse2_4); } continue; } @@ -581,7 +581,7 @@ WeightScaleEncoding Scheduler::EncodeBestWeightFormat( assert(!encodingResults.empty()); auto bestEncoding = ChooseBestWeightFormat(_arch, op, _options.optimizationStrategy, encodingResults); bestEncoding.blockConfig = - (bestEncoding.weightScales.npuWeightsTensor->config->Format() & WF(WeightFormat::Sparse2_4)) ? std::move(blockConfigSparse) : std::move(blockConfigDefault); + (bestEncoding.weightScales.npuWeightsTensor->config->Format() % WeightFormat::Sparse2_4) ? std::move(blockConfigSparse) : std::move(blockConfigDefault); return bestEncoding; } diff --git a/ethosu/regor/tflite/custom_operator_ethosu.hpp b/ethosu/regor/tflite/custom_operator_ethosu.hpp index dcf911d8..ec4c7eba 100644 --- a/ethosu/regor/tflite/custom_operator_ethosu.hpp +++ b/ethosu/regor/tflite/custom_operator_ethosu.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021, 2023-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021, 2023-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -222,7 +222,7 @@ private: const auto allocation = tensor->AllocationSizeBytes(); const auto size = tensor->srcTensor->View().Buffer()->Size(); - assert(tensor->memArea.usage & MemUsage::ReadOnly); + assert(tensor->memArea.usage % MemUsage::ReadOnly); assert((offset >= 0) && (allocation >= 0)); // Has been allocated assert((offset + allocation) <= Address(_readOnlyBuffer->Size())); // Allocation fits in buffer assert(size <= allocation); // Tensor fits in allocation -- GitLab