From 5d5632e637583f88a37d81f89e188d9621818e0d Mon Sep 17 00:00:00 2001 From: Philip Hall Date: Wed, 5 Mar 2025 12:47:17 +0000 Subject: [PATCH 1/2] MLBEDSW-10106: Update Ethos-U55 MatMul performance stats - Updated performance calculations for the Ethos-U55 MatMul implementation. This is required to maintain the Ethos-U55/Ethos-U85 abstraction (both must return a result) when using the performance interface. - Fixed incomplete implementation of encoded weights byte transfer values. - Replaced manual datatype related scaling to use the DataType scaling functions. Signed-off-by: Philip Hall Change-Id: I7c8deb4e2740518874530786481d4ef57822bac4 -- GitLab From 55b3949e4b9b3dcf8bb3c614b8141c1618a31f7c Mon Sep 17 00:00:00 2001 From: Philip Hall Date: Wed, 12 Mar 2025 17:17:37 +0000 Subject: [PATCH 2/2] MLBEDSW-10532: Fix transpose block dependency Ethos-U55 transpose adjusts the FM strides independently of the signalled areas in order to perform the transform. This means that the block dependency calculations are only valid for the simple transposes (or none). This commit forces block dependency to zero for those cases. Signed-off-by: Philip Hall Change-Id: Ic6a545f1691253a88e33b4aed9da4b622b6c9438 --- .../ethosu55/ethos_u55_register_cs_generator.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp index 3bd11ed5..e7fd2bef 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp @@ -681,9 +681,18 @@ int EthosU55RCSGenerator::CalcBlockDep(const HLCStripe *prevStripe, const HLCStr { return 0; } + const auto &op = stripe->operation; const auto &prevOp = prevStripe->operation; - const auto &prevOfm = prevOp->ofm; + const auto &prevOfm = !prevOp->subOps.empty() ? prevOp->subOps.back().ofm : prevOp->ofm; + + // Multi-pass transposes may overlap because the implementation adjusts + // the input/output strides independently of the OFM area. + if ( !IsNone(prevOfm.transpose) && (prevOfm.transpose != TransposeType::NWHC) ) + { + return 0; + } + if ( _arch->_shram.reservedEndBanks == 0 ) { // SHRAM has no reserved LUT banks -- GitLab