From f91d6f4aa3178581b151d526a64e5b14da4148fa Mon Sep 17 00:00:00 2001 From: Philip Hall Date: Wed, 5 Mar 2025 12:47:17 +0000 Subject: [PATCH 1/2] MLBEDSW-10106: Update Ethos-U55 MatMul performance stats - Updated performance calculations for the Ethos-U55 MatMul implementation. This is required to maintain the Ethos-U55/Ethos-U85 abstraction (both must return a result) when using the performance interface. - Fixed incomplete implementation of encoded weights byte transfer values. - Replaced manual datatype related scaling to use the DataType scaling functions. Signed-off-by: Philip Hall Change-Id: I7c8deb4e2740518874530786481d4ef57822bac4 -- GitLab From 3d8a9f2622a2d4ecef0637da89a2bee24760ed28 Mon Sep 17 00:00:00 2001 From: Philip Hall Date: Thu, 20 Feb 2025 11:37:58 +0000 Subject: [PATCH 2/2] MLBEDSW-10550: Streamline HLCS buffer access - Removes an unnecessary level of indirection from the HLCS buffer representation; that is only used to access scalar constants. - Allow Buffer to hold single value constants without an additional allocation. Signed-off-by: Philip Hall Change-Id: I3b32f13b3f71a21a38b83bf7e9e89b67cddf68c5 --- .../ethos_u55_register_cs_generator.cpp | 12 ++-- .../ethos_u85_register_cs_generator.cpp | 14 ++--- ethosu/regor/common/buffer_view.hpp | 60 ++++++++++++++----- .../compiler/high_level_command_stream.hpp | 2 +- .../high_level_command_stream_generator.cpp | 5 +- ethosu/regor/compiler/scheduler_decompose.cpp | 2 +- 6 files changed, 63 insertions(+), 32 deletions(-) diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp index d1942579..098f9757 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp @@ -436,26 +436,26 @@ uint32_t EthosU55RCSGenerator::ToRegion(const MemArea &memArea) // quantized value in scalarValue. bool EthosU55RCSGenerator::IsScalar(const HLCFeatureMap &fm, int32_t &scalarValue) { - const auto &view = fm.bufferView; + const auto &buffer = fm.constBuffer; // A 1-sized feature map in constant memory is a scalar - bool isScalar = fm.shape.Elements() == 1 && view.HasBuffer(); + bool isScalar = fm.shape.Elements() == 1 && buffer; if ( isScalar ) { if ( fm.dataType == DataType::Int8 ) { - scalarValue = view.Values()[0]; + scalarValue = buffer->Data()[0]; } else if ( fm.dataType == DataType::UInt8 ) { - scalarValue = view.Values()[0]; + scalarValue = buffer->Data()[0]; } else if ( fm.dataType == DataType::Int16 ) { - scalarValue = view.Values()[0]; + scalarValue = buffer->Data()[0]; } else if ( fm.dataType == DataType::UInt16 ) { - scalarValue = view.Values()[0]; + scalarValue = buffer->Data()[0]; } else { // Unsupported scalar value diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp index 6a41e69e..ea4100bd 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp @@ -591,30 +591,30 @@ uint32_t EthosU85RCSGenerator::ToRegion(const MemArea &memArea) // quantized value in scalarValue. bool EthosU85RCSGenerator::IsScalar(const HLCFeatureMap &fm, int32_t &scalarValue) { - const auto &view = fm.bufferView; + const auto &buffer = fm.constBuffer; // A 1-sized feature map in constant memory is a scalar - bool isScalar = fm.shape.Elements() == 1 && view.HasBuffer(); + bool isScalar = fm.shape.Elements() == 1 && buffer; if ( isScalar ) { if ( fm.dataType == DataType::Int8 ) { - scalarValue = view.Values()[0]; + scalarValue = buffer->Data()[0]; } else if ( fm.dataType == DataType::UInt8 ) { - scalarValue = view.Values()[0]; + scalarValue = buffer->Data()[0]; } else if ( fm.dataType == DataType::Int16 ) { - scalarValue = view.Values()[0]; + scalarValue = buffer->Data()[0]; } else if ( fm.dataType == DataType::UInt16 ) { - scalarValue = view.Values()[0]; + scalarValue = buffer->Data()[0]; } else if ( fm.dataType == DataType::Int32 ) { - scalarValue = view.Values()[0]; + scalarValue = buffer->Data()[0]; } else { // Unsupported scalar value diff --git a/ethosu/regor/common/buffer_view.hpp b/ethosu/regor/common/buffer_view.hpp index f23208fd..494172fc 100644 --- a/ethosu/regor/common/buffer_view.hpp +++ b/ethosu/regor/common/buffer_view.hpp @@ -72,6 +72,15 @@ class Buffer : public std::enable_shared_from_this #undef TYPE_FUNC }; + // Data storage method + enum Placement : uint8_t + { + Remote, + LocalConst, + LocalAlloc, + LocalVector + }; + template struct IsSupportedIntegral { @@ -98,6 +107,7 @@ class Buffer : public std::enable_shared_from_this { void *data; const void *cdata; + uint64_t constValue; }; private: @@ -105,7 +115,7 @@ private: size_t _sizeBytes = 0; const uint32_t _typeHash; const uint32_t _utypeHash; - bool _isLocal = false; + Placement _placement = Placement::Remote; LocalStorage _localStorage; DeleteFunc _deleter = nullptr; Hash128 _dataHash; @@ -114,6 +124,23 @@ public: Buffer(const Buffer &) = delete; Buffer &operator=(const Buffer &) = delete; + template + struct ConstValue + { + TYPE _value; + ConstValue(TYPE value) : _value(value) {} + }; + + template::value, int> = 0> + Buffer(const ConstValue &value) : + _typeHash(TypeHash::value), _utypeHash(TypeHash>::value) + { + _refData.constValue = uint64_t(value._value); + _sizeBytes = sizeof(TYPE); + _placement = Placement::LocalConst; + _dataHash.v32[0] = uint32_t(value._value); + } + template::value, int> = 0> Buffer(size_t sizeElements, const TYPE *buffer = nullptr, bool alias = false) : _typeHash(TypeHash::value), _utypeHash(TypeHash>::value) @@ -129,27 +156,18 @@ public: } _refData.data = ref; _deleter = &Buffer::DeleteArray; + _placement = Placement::LocalAlloc; } else { assert(alias && buffer); _refData.cdata = buffer; + _placement = Placement::Remote; } Rehash(); } - template::value, int> = 0> - Buffer(std::unique_ptr ptr) : - _typeHash(TypeHash::value), _utypeHash(TypeHash>::value) - { - _refData.data = ptr.release(); - _sizeBytes = sizeof(TYPE); - _deleter = &Buffer::Delete; - - Rehash(); - } - template::value, int> = 0> Buffer(std::unique_ptr ptr, int sizeElements) : _typeHash(TypeHash::value), _utypeHash(TypeHash>::value) @@ -159,6 +177,7 @@ public: assert(INT_MAX / int(sizeof(TYPE)) >= sizeElements); _sizeBytes = sizeof(TYPE) * sizeElements; _deleter = &Buffer::DeleteArray; + _placement = Placement::LocalAlloc; Rehash(); } @@ -170,7 +189,7 @@ public: new (&GetLocalVector()) std::vector(std::move(buffer)); _deleter = &Buffer::DeleteVector; _refData.data = &GetLocalVector(); - _isLocal = true; + _placement = Placement::LocalVector; Rehash(); } @@ -189,7 +208,7 @@ public: { // Follow strict reinterpret_cast type aliasing rules assert(IsByte::value || (TypeHash>::value == _utypeHash)); - if ( _isLocal ) + if ( _placement == Placement::LocalVector ) { if constexpr ( IsByte::value ) { @@ -221,6 +240,11 @@ public: } } } + else if ( _placement == Placement::LocalConst ) + { + assert(false && "Writing to const value"); + return reinterpret_cast(&_refData.constValue); + } else { assert(_deleter && "reading const buffer as non-const"); @@ -232,7 +256,7 @@ public: template const T *Data() const { - if ( _isLocal ) + if ( _placement == Placement::LocalVector ) { // Follow strict reinterpret_cast type aliasing rules assert(IsByte::value || (TypeHash>::value == _utypeHash)); @@ -270,6 +294,10 @@ public: } } } + else if ( _placement == Placement::LocalConst ) + { + return reinterpret_cast(&_refData.constValue); + } else { assert(uintptr_t(_deleter ? _refData.data : _refData.cdata) % alignof(T) == 0); @@ -279,7 +307,7 @@ public: int Size() const { - if ( _isLocal ) + if ( _placement == Placement::LocalVector ) { switch ( _typeHash ) { diff --git a/ethosu/regor/compiler/high_level_command_stream.hpp b/ethosu/regor/compiler/high_level_command_stream.hpp index 648509fa..d6c87868 100644 --- a/ethosu/regor/compiler/high_level_command_stream.hpp +++ b/ethosu/regor/compiler/high_level_command_stream.hpp @@ -74,7 +74,7 @@ struct HLCFeatureMap Shape shape; Shape strides; MemArea memArea; - BufferView bufferView; + std::shared_ptr constBuffer; Quantization quantization; Point2i stepXY = {1, 1}; Address address = -1; diff --git a/ethosu/regor/compiler/high_level_command_stream_generator.cpp b/ethosu/regor/compiler/high_level_command_stream_generator.cpp index 6974e4b7..bd3ded4c 100644 --- a/ethosu/regor/compiler/high_level_command_stream_generator.cpp +++ b/ethosu/regor/compiler/high_level_command_stream_generator.cpp @@ -247,7 +247,10 @@ static void MakeFeatureMap(TensorUsage usage, const SchedulerConnection *schedCo fm.usage = usage; fm.address = schedTens->AllocatedAddress(); fm.quantization = schedConn->quantization; - fm.bufferView = schedTens->bufferView; + if ( schedTens->bufferView.HasBuffer() ) + { + fm.constBuffer = schedTens->bufferView.Buffer()->shared_from_this(); + } fm.strides = GetStrides(fm); fm.stepXY = schedConn->stepXY; fm.transpose = schedConn->transpose; diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index deb1aada..c8918d00 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -969,7 +969,7 @@ std::vector> DecomposeConv3D(Architecture *a // Create SchedulerTensor for 0 (no) bias auto bias0 = bias->tensor->Clone(); - auto bias0buf = std::make_shared(std::make_unique(0)); + auto bias0buf = std::make_shared(Buffer::ConstValue(0)); assert(DataTypeStorageSizeBits(bias0->dataType) <= int(8 * sizeof(int64_t))); bias0->bufferView = BufferView(bias0buf, 0, DataTypeStorageSizeBits(bias0->dataType), {1}, {}); bias0->storageShape = bias0->bufferView.ViewShape(); -- GitLab