diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp index 96d51b3ea21d77a285e8af54ca032dfc07dce19c..cfbcd53abf043040f504472a208195a054eb95ec 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp @@ -241,23 +241,19 @@ void EthosU55RCSGenerator::Emit(uint64_t instr) _emit.Emit(instr); } -int EthosU55RCSGenerator::GetBufferOffset(HLCWeights *weights, const WeightRange &range) +int EthosU55RCSGenerator::GetDoubleBufferOffset(HLCWeights *weights, int rangeIndex) { - int bufferOffset = 0; + int doubleBufferOffset = 0; if ( weights->buffering == Buffering::Double ) { assert(weights->subStreams > 0); - int depthIndex = range.index / weights->subStreams; + int depthIndex = rangeIndex / weights->subStreams; if ( depthIndex % 2 == 1 ) { - bufferOffset = weights->doubleBufferOffset; + doubleBufferOffset = weights->doubleBufferOffset; } } - else if ( weights->buffering == Buffering::None ) - { - bufferOffset = range.offset; - } - return bufferOffset; + return doubleBufferOffset; } void EthosU55RCSGenerator::CheckAddressRange(ArchitectureMemory *memory, Address address, int size) @@ -1078,8 +1074,8 @@ void EthosU55RCSGenerator::GenerateWeights(const HLCStripe *stripe, MemoryAccess auto item0 = weights->encodedRanges.find(WeightKey(0, depth)); assert(item0 != weights->encodedRanges.end()); auto &range0 = item0->second; - int bufferOffset = GetBufferOffset(weights, range0); - Address address = weights->address + range0.weightOffset + bufferOffset; + int doubleBufferOffset = GetDoubleBufferOffset(weights, range0.index); + Address address = weights->address + range0.weightOffset + doubleBufferOffset; int length = RoundAway(range0.weightBytes, 16); CheckAddressRange(weights->memArea.memory, address, length); Emit(isa::npu_set_weight_base_t(address)); @@ -1091,7 +1087,7 @@ void EthosU55RCSGenerator::GenerateWeights(const HLCStripe *stripe, MemoryAccess if ( item1 != weights->encodedRanges.end() ) { auto &range1 = item1->second; - Address address1 = weights->address + RoundAway(range0.TotalBytes(), 16) + range1.weightOffset + bufferOffset; + Address address1 = weights->address + RoundAway(range0.TotalBytes(), 16) + range1.weightOffset + doubleBufferOffset; int length1 = RoundAway(range1.weightBytes, 16); CheckAddressRange(weights->memArea.memory, address1, length1); Emit(isa::npu_set_weight1_base_t(address1)); @@ -1120,8 +1116,18 @@ void EthosU55RCSGenerator::GenerateScales(const HLCStripe *stripe, MemoryAccesse auto item0 = scales->encodedRanges.find(WeightKey(0, depth)); assert(item0 != scales->encodedRanges.end()); auto &range0 = item0->second; - int bufferOffset = GetBufferOffset(scales, range0); - Address address = scales->address + bufferOffset; + Address address = scales->address; + if ( scales->buffering == Buffering::None ) + { + // For unbuffered scales, address points to the buffer that contains the encoded weights for all slices + address += range0.offset; + } + else + { + // For buffered scales, address points to the buffer in fast storage that contains the encoded weights of one + // (if single buffered) or two (if double buffered) slices + address += GetDoubleBufferOffset(scales, range0.index); + } int length = RoundAway(range0.scaleBytes, 16); CheckAddressRange(scales->memArea.memory, address, length); Emit(isa::npu_set_scale_base_t(address)); diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp index ced304a617bafa95c9eb4b1500398f0e1381d5ed..233b90ba316c89e9c84ab89f0cc5afbfb1c350ee 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp @@ -143,7 +143,7 @@ protected: void Emit(uint32_t instr); void Emit(uint64_t instr); - static int GetBufferOffset(HLCWeights *weights, const WeightRange &range); + static int GetDoubleBufferOffset(HLCWeights *weights, int rangeIndex); static void CheckAddressRange(ArchitectureMemory *memory, Address address, int size); static void CheckAddresses(const HLCFeatureMap &fm); // Calculates the rolling buffer address of the given coordinate. diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp index 208b203b04df0676f3ec9c09593ff67eaee0fd2f..be931cf746287dc7cfc01c4d02e2f83847d970b4 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp @@ -404,23 +404,19 @@ void EthosU85RCSGenerator::Emit(uint64_t instr) } -int EthosU85RCSGenerator::GetBufferOffset(HLCWeights *weights, const WeightRange &range) +int EthosU85RCSGenerator::GetDoubleBufferOffset(HLCWeights *weights, int rangeIndex) { - int bufferOffset = 0; + int doubleBufferOffset = 0; if ( weights->buffering == Buffering::Double ) { assert(weights->subStreams > 0); - int depthIndex = range.index / weights->subStreams; + int depthIndex = rangeIndex / weights->subStreams; if ( depthIndex % 2 == 1 ) { - bufferOffset = weights->doubleBufferOffset; + doubleBufferOffset = weights->doubleBufferOffset; } } - else if ( weights->buffering == Buffering::None ) - { - bufferOffset = range.offset; - } - return bufferOffset; + return doubleBufferOffset; } @@ -1363,8 +1359,8 @@ void EthosU85RCSGenerator::GenerateWeights(const HLCStripe *stripe, MemoryAccess if ( item != weights->encodedRanges.end() ) { const auto &range = item->second; - int bufferOffset = GetBufferOffset(weights, range); - address = weights->address + offset + range.weightOffset + bufferOffset; + int doubleBufferOffset = GetDoubleBufferOffset(weights, range.index); + address = weights->address + offset + range.weightOffset + doubleBufferOffset; length = RoundAway(range.weightBytes, 16); CheckAddressRange(weights->memArea.memory, address, length); memoryAccesses.emplace_back(AccessDirection::Read, weights->memArea, address, address + length); @@ -1409,8 +1405,18 @@ void EthosU85RCSGenerator::GenerateScales(const HLCStripe *stripe, MemoryAccesse auto item0 = scales->encodedRanges.find(WeightKey(0, depth)); assert(item0 != scales->encodedRanges.end()); auto &range0 = item0->second; - int bufferOffset = GetBufferOffset(scales, range0); - Address address = scales->address + bufferOffset; + Address address = scales->address; + if ( scales->buffering == Buffering::None ) + { + // For unbuffered scales, address points to the buffer that contains the encoded weights for all slices + address += range0.offset; + } + else + { + // For buffered scales, address points to the buffer in fast storage that contains the encoded weights of one + // (if single buffered) or two (if double buffered) slices + address += GetDoubleBufferOffset(scales, range0.index); + } int length = RoundAway(range0.scaleBytes, 16); CheckAddressRange(scales->memArea.memory, address, length); diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp index 8c7729da9572bb594014005defd31b59a9800655..d37906246c9ec5f09e348767a60cbf7896690a99 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp @@ -127,7 +127,7 @@ protected: void Emit(uint32_t instr); void Emit(uint64_t instr); - static int GetBufferOffset(HLCWeights *weights, const WeightRange &range); + static int GetDoubleBufferOffset(HLCWeights *weights, int rangeIndex); static void CheckAddressRange(ArchitectureMemory *memory, Address address, int size); static void CheckAddresses(const HLCFeatureMap &fm); // Calculates the rolling buffer address of the given coordinate. diff --git a/ethosu/regor/architecture/weight_encoder.hpp b/ethosu/regor/architecture/weight_encoder.hpp index 1db9d71e14240178d95225a3a76353784c88e215..bb3efaad3e5013f663448d844f4a8033c2c7accf 100644 --- a/ethosu/regor/architecture/weight_encoder.hpp +++ b/ethosu/regor/architecture/weight_encoder.hpp @@ -36,11 +36,11 @@ namespace regor /// struct WeightRange { - int offset = 0; - int scaleBytes = 0; - int weightOffset = 0; - int weightBytes = 0; - int index = 0; + int offset = 0; // Offset to this slice in the encoded weights + int scaleBytes = 0; // Size of the scales in this slice + int weightOffset = 0; // Offset to the weights in the encoded weights for this slice + int weightBytes = 0; // Size of the weights in this slice + int index = 0; // The slice number in this op int TotalBytes() const { return scaleBytes + weightBytes; } };