From ae708a2f3d8bd3ad940800698d3ab65581555946 Mon Sep 17 00:00:00 2001 From: Johan Gunnarsson Date: Mon, 24 Mar 2025 16:27:11 +0100 Subject: [PATCH] MLBEDSW-10573: Fix buffered weights and unbuffered scales When two convolution ops share the same weight tensor, but have different bias tensor or output scaling, we can end up with a convolution op that has buffered weights and unbuffered scales. In this case we didn't properly handle depth slicing of the scales tensor. The SCALE_BASE register was always set to the first slice of encoded scales. Also change high level command stream log output to print addresses as hex values. Signed-off-by: Johan Gunnarsson Change-Id: I59c067e36e9c67c84b885bfa72380354c3707591 --- .../ethos_u55_register_cs_generator.cpp | 32 +++++++++++++------ .../ethos_u55_register_cs_generator.hpp | 2 +- .../ethos_u85_register_cs_generator.cpp | 22 +++++++------ .../ethos_u85_register_cs_generator.hpp | 2 +- .../compiler/high_level_command_stream.hpp | 6 ++-- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp index e7fd2bef..96d51b3e 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp @@ -241,19 +241,23 @@ void EthosU55RCSGenerator::Emit(uint64_t instr) _emit.Emit(instr); } -int EthosU55RCSGenerator::GetDoubleBufferOffset(HLCWeights *weights, int rangeIndex) +int EthosU55RCSGenerator::GetBufferOffset(HLCWeights *weights, const WeightRange &range) { - int doubleBufferOffset = 0; + int bufferOffset = 0; if ( weights->buffering == Buffering::Double ) { assert(weights->subStreams > 0); - int depthIndex = rangeIndex / weights->subStreams; + int depthIndex = range.index / weights->subStreams; if ( depthIndex % 2 == 1 ) { - doubleBufferOffset = weights->doubleBufferOffset; + bufferOffset = weights->doubleBufferOffset; } } - return doubleBufferOffset; + else if ( weights->buffering == Buffering::None ) + { + bufferOffset = range.offset; + } + return bufferOffset; } void EthosU55RCSGenerator::CheckAddressRange(ArchitectureMemory *memory, Address address, int size) @@ -1067,23 +1071,27 @@ void EthosU55RCSGenerator::GenerateWeights(const HLCStripe *stripe, MemoryAccess { return; } + + // Handle WEIGHT int depth = stripe->weightRangeDepth; Emit(isa::npu_set_weight_region_t(ToRegion(weights->memArea))); auto item0 = weights->encodedRanges.find(WeightKey(0, depth)); assert(item0 != weights->encodedRanges.end()); auto &range0 = item0->second; - int doubleBufferOffset = GetDoubleBufferOffset(weights, range0.index); - Address address = weights->address + range0.weightOffset + doubleBufferOffset; + int bufferOffset = GetBufferOffset(weights, range0); + Address address = weights->address + range0.weightOffset + bufferOffset; int length = RoundAway(range0.weightBytes, 16); CheckAddressRange(weights->memArea.memory, address, length); Emit(isa::npu_set_weight_base_t(address)); Emit(isa::npu_set_weight_length_t(length)); memoryAccesses.emplace_back(AccessDirection::Read, weights->memArea, address, address + length); + + // Handle WEIGHT1 auto item1 = weights->encodedRanges.find(WeightKey(1, depth)); if ( item1 != weights->encodedRanges.end() ) { auto &range1 = item1->second; - Address address1 = weights->address + RoundAway(range0.TotalBytes(), 16) + range1.weightOffset + doubleBufferOffset; + Address address1 = weights->address + RoundAway(range0.TotalBytes(), 16) + range1.weightOffset + bufferOffset; int length1 = RoundAway(range1.weightBytes, 16); CheckAddressRange(weights->memArea.memory, address1, length1); Emit(isa::npu_set_weight1_base_t(address1)); @@ -1105,18 +1113,22 @@ void EthosU55RCSGenerator::GenerateScales(const HLCStripe *stripe, MemoryAccesse assert(!stripe->operation->weights); return; } + + // Handle SCALES int depth = stripe->weightRangeDepth; Emit(isa::npu_set_scale_region_t(ToRegion(scales->memArea))); auto item0 = scales->encodedRanges.find(WeightKey(0, depth)); assert(item0 != scales->encodedRanges.end()); auto &range0 = item0->second; - int doubleBufferOffset = GetDoubleBufferOffset(scales, range0.index); - Address address = scales->address + doubleBufferOffset; + int bufferOffset = GetBufferOffset(scales, range0); + Address address = scales->address + bufferOffset; int length = RoundAway(range0.scaleBytes, 16); CheckAddressRange(scales->memArea.memory, address, length); Emit(isa::npu_set_scale_base_t(address)); Emit(isa::npu_set_scale_length_t(length)); memoryAccesses.emplace_back(AccessDirection::Read, scales->memArea, address, address + length); + + // Handle SCALES1 auto item1 = scales->encodedRanges.find(WeightKey(1, depth)); if ( item1 != scales->encodedRanges.end() ) { diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp index 233b90ba..ced304a6 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp @@ -143,7 +143,7 @@ protected: void Emit(uint32_t instr); void Emit(uint64_t instr); - static int GetDoubleBufferOffset(HLCWeights *weights, int rangeIndex); + static int GetBufferOffset(HLCWeights *weights, const WeightRange &range); static void CheckAddressRange(ArchitectureMemory *memory, Address address, int size); static void CheckAddresses(const HLCFeatureMap &fm); // Calculates the rolling buffer address of the given coordinate. diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp index ea4100bd..208b203b 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp @@ -404,19 +404,23 @@ void EthosU85RCSGenerator::Emit(uint64_t instr) } -int EthosU85RCSGenerator::GetDoubleBufferOffset(HLCWeights *weights, int rangeIndex) +int EthosU85RCSGenerator::GetBufferOffset(HLCWeights *weights, const WeightRange &range) { - int doubleBufferOffset = 0; + int bufferOffset = 0; if ( weights->buffering == Buffering::Double ) { assert(weights->subStreams > 0); - int depthIndex = rangeIndex / weights->subStreams; + int depthIndex = range.index / weights->subStreams; if ( depthIndex % 2 == 1 ) { - doubleBufferOffset = weights->doubleBufferOffset; + bufferOffset = weights->doubleBufferOffset; } } - return doubleBufferOffset; + else if ( weights->buffering == Buffering::None ) + { + bufferOffset = range.offset; + } + return bufferOffset; } @@ -1359,8 +1363,8 @@ void EthosU85RCSGenerator::GenerateWeights(const HLCStripe *stripe, MemoryAccess if ( item != weights->encodedRanges.end() ) { const auto &range = item->second; - int doubleBufferOffset = GetDoubleBufferOffset(weights, range.index); - address = weights->address + offset + range.weightOffset + doubleBufferOffset; + int bufferOffset = GetBufferOffset(weights, range); + address = weights->address + offset + range.weightOffset + bufferOffset; length = RoundAway(range.weightBytes, 16); CheckAddressRange(weights->memArea.memory, address, length); memoryAccesses.emplace_back(AccessDirection::Read, weights->memArea, address, address + length); @@ -1405,8 +1409,8 @@ void EthosU85RCSGenerator::GenerateScales(const HLCStripe *stripe, MemoryAccesse auto item0 = scales->encodedRanges.find(WeightKey(0, depth)); assert(item0 != scales->encodedRanges.end()); auto &range0 = item0->second; - int doubleBufferOffset = GetDoubleBufferOffset(scales, range0.index); - Address address = scales->address + doubleBufferOffset; + int bufferOffset = GetBufferOffset(scales, range0); + Address address = scales->address + bufferOffset; int length = RoundAway(range0.scaleBytes, 16); CheckAddressRange(scales->memArea.memory, address, length); diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp index d3790624..8c7729da 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp @@ -127,7 +127,7 @@ protected: void Emit(uint32_t instr); void Emit(uint64_t instr); - static int GetDoubleBufferOffset(HLCWeights *weights, int rangeIndex); + static int GetBufferOffset(HLCWeights *weights, const WeightRange &range); static void CheckAddressRange(ArchitectureMemory *memory, Address address, int size); static void CheckAddresses(const HLCFeatureMap &fm); // Calculates the rolling buffer address of the given coordinate. diff --git a/ethosu/regor/compiler/high_level_command_stream.hpp b/ethosu/regor/compiler/high_level_command_stream.hpp index d6c87868..463aea56 100644 --- a/ethosu/regor/compiler/high_level_command_stream.hpp +++ b/ethosu/regor/compiler/high_level_command_stream.hpp @@ -86,7 +86,7 @@ struct HLCFeatureMap std::string ToString() const { - return fmt::format("[{}], format: {}, {}:{}, address: {}", shape.ToString(), format, memArea.memory->Name(), + return fmt::format("[{}], format: {}, {}:{}, address: 0x{:x}", shape.ToString(), format, memArea.memory->Name(), memArea.usage.ToString(), address); } }; @@ -106,7 +106,7 @@ struct HLCWeights std::string ToString() const { - return fmt::format("{} ranges, buffering: {}, {}:{}, address: {}, format: {}", encodedRanges.size(), + return fmt::format("{} ranges, buffering: {}, {}:{}, address: 0x{:x}, format: {}", encodedRanges.size(), int(buffering), memArea.memory->Name(), memArea.usage, address, format.ToString()); } }; @@ -272,7 +272,7 @@ public: std::string ToString() const override { - return fmt::format("DMA src: {}:{}, address: {}, dest: {}:{}, address: {}, sizes: ({}), length: {}", + return fmt::format("DMA src: {}:{}, address: 0x{:x}, dest: {}:{}, address: 0x{:x}, sizes: ({}), length: {}", srcMemArea.memory->Name(), srcMemArea.usage, srcAddress, destMemArea.memory->Name(), destMemArea.usage, destAddress, sizes ? sizes.ToString() : "N/A", std::to_string(length)); } -- GitLab