From c932624f8b2a2ee4d5ae869c50a58e0715f99b41 Mon Sep 17 00:00:00 2001 From: Jacob Bohlin Date: Fri, 2 May 2025 11:58:54 +0100 Subject: [PATCH] MLBEDSW-10756 Fix chaining issue with fusing transpose with activation Change-Id: Iec466c83c207822c48b2c1746fe39f46f4541a72 Signed-off-by: Jacob Bohlin --- .../regor/architecture/ethosu85/ethos_u85.cpp | 6 +++++ .../ethos_u85_register_cs_generator.cpp | 25 +++++++++++++------ ethosu/regor/compiler/scheduler_packing.cpp | 14 ++++++++--- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp index c070b529..49432315 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp @@ -1421,6 +1421,12 @@ bool EthosU85OpGroup::Fuse(const ArchitectureOpGroupQuery &op, const std::vector return false; } + if ( _chainLength > 1 && !IsActivation(op.type) ) + { + // TODO MLBEDSW-10769: support fusing Transpose and Reverse chained ops + return false; + } + if ( dependsOn.size() > 1 ) { // Can only fuse with one op diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp index 564322a4..c5d1fea8 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp @@ -968,14 +968,25 @@ void EthosU85RCSGenerator::GenerateActivation(const HLCStripe *stripe, MemoryAcc } else if ( op->subOps.size() > 0 ) { - // Check if the first subOp is a fused activation. - auto &subOp = op->subOps[0]; - if ( opGroup->IsFused(subOp.ifm[0].uid) && IsActivation(subOp.type) ) + for ( auto &subOp : op->subOps ) { - // Fused activation - activationOp = &subOp; - // Use subOp ifm datatype to calculate clip range - clipDataType = subOp.ifm[0].dataType; + if ( opGroup->IsFused(subOp.ifm[0].uid) ) + { + if ( IsActivation(subOp.type) ) + { + // Fused activation + activationOp = &subOp; + // Use subOp ifm datatype to calculate clip range + clipDataType = subOp.ifm[0].dataType; + // There can be only one fused activation + break; + } + } + else + { + // subOp is a chained op - which means this stripe doesn't have any fused activation + break; + } } } diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index 82a77fc9..60377ce9 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -262,6 +262,7 @@ void SchedulerPacking::SchedulerPacking::PackOperations() // Pack any future ops that will fit auto prevOp = primaryOp; + auto lastNonFusedOp = primaryOp; // Try chaining subsequent ops into the primary while ( cur != _schedList.end() ) @@ -281,10 +282,10 @@ void SchedulerPacking::SchedulerPacking::PackOperations() LOG_TRACE1("Added {} (key {}) to {} (key {})\n", OpTypeToString(nextOp->Type()), key, OpTypeToString(prevOp->Type()), prevOpKey); - // Replace previous op's OFM by nextOp's OFM + // Replace the last non-fused op's OFM by nextOp's OFM if nextOp is fused if ( IsActivation(nextOp->Type()) ) { - auto *ofmConn = prevOp->OFM(); + auto *ofmConn = lastNonFusedOp->OFM(); ofmConn->tensor = nextOp->OFM()->tensor; ofmConn->SetType(nextOp->OFM()->Type()); ofmConn->quantization.quantMin = nextOp->Output(TensorUsage::OFM)->quantization.quantMin; @@ -292,7 +293,7 @@ void SchedulerPacking::SchedulerPacking::PackOperations() } else if ( nextOp->Type() == OpType::Transpose ) { - auto *ofmConn = primaryOp->OFM(); + auto *ofmConn = lastNonFusedOp->OFM(); ofmConn->tensor = nextOp->OFM()->tensor; ofmConn->SetType(nextOp->OFM()->Type()); ofmConn->shape = nextOp->OFM()->shape; @@ -300,12 +301,17 @@ void SchedulerPacking::SchedulerPacking::PackOperations() } else if ( nextOp->Type() == OpType::Reverse ) { - auto *ofmConn = primaryOp->OFM(); + auto *ofmConn = lastNonFusedOp->OFM(); ofmConn->tensor = nextOp->OFM()->tensor; ofmConn->SetType(nextOp->OFM()->Type()); ofmConn->shape = nextOp->OFM()->shape; ofmConn->reverse = nextOp->OFM()->reverse; } + else + { + // This is a non-fused op - update the tracking variable + lastNonFusedOp = nextOp; + } primaryOp->AddSubOp(std::move(*cur)); prevOpKey = key; -- GitLab