diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp index c070b5297c65e483a4eb2d16e06b097765ed85f9..494323158f192db1fd856ef6505cf530b9499ea0 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp @@ -1421,6 +1421,12 @@ bool EthosU85OpGroup::Fuse(const ArchitectureOpGroupQuery &op, const std::vector return false; } + if ( _chainLength > 1 && !IsActivation(op.type) ) + { + // TODO MLBEDSW-10769: support fusing Transpose and Reverse chained ops + return false; + } + if ( dependsOn.size() > 1 ) { // Can only fuse with one op diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp index 564322a4395e80e974c653ea757309dca691d588..c5d1fea8fab2bc4bcc2c56817ae4fff60e9ed833 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp @@ -968,14 +968,25 @@ void EthosU85RCSGenerator::GenerateActivation(const HLCStripe *stripe, MemoryAcc } else if ( op->subOps.size() > 0 ) { - // Check if the first subOp is a fused activation. - auto &subOp = op->subOps[0]; - if ( opGroup->IsFused(subOp.ifm[0].uid) && IsActivation(subOp.type) ) + for ( auto &subOp : op->subOps ) { - // Fused activation - activationOp = &subOp; - // Use subOp ifm datatype to calculate clip range - clipDataType = subOp.ifm[0].dataType; + if ( opGroup->IsFused(subOp.ifm[0].uid) ) + { + if ( IsActivation(subOp.type) ) + { + // Fused activation + activationOp = &subOp; + // Use subOp ifm datatype to calculate clip range + clipDataType = subOp.ifm[0].dataType; + // There can be only one fused activation + break; + } + } + else + { + // subOp is a chained op - which means this stripe doesn't have any fused activation + break; + } } } diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index 82a77fc992f42bfe767603d4cde868d4d91df33e..60377ce9459f4161d36fe0ead377e1274691ef4c 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -262,6 +262,7 @@ void SchedulerPacking::SchedulerPacking::PackOperations() // Pack any future ops that will fit auto prevOp = primaryOp; + auto lastNonFusedOp = primaryOp; // Try chaining subsequent ops into the primary while ( cur != _schedList.end() ) @@ -281,10 +282,10 @@ void SchedulerPacking::SchedulerPacking::PackOperations() LOG_TRACE1("Added {} (key {}) to {} (key {})\n", OpTypeToString(nextOp->Type()), key, OpTypeToString(prevOp->Type()), prevOpKey); - // Replace previous op's OFM by nextOp's OFM + // Replace the last non-fused op's OFM by nextOp's OFM if nextOp is fused if ( IsActivation(nextOp->Type()) ) { - auto *ofmConn = prevOp->OFM(); + auto *ofmConn = lastNonFusedOp->OFM(); ofmConn->tensor = nextOp->OFM()->tensor; ofmConn->SetType(nextOp->OFM()->Type()); ofmConn->quantization.quantMin = nextOp->Output(TensorUsage::OFM)->quantization.quantMin; @@ -292,7 +293,7 @@ void SchedulerPacking::SchedulerPacking::PackOperations() } else if ( nextOp->Type() == OpType::Transpose ) { - auto *ofmConn = primaryOp->OFM(); + auto *ofmConn = lastNonFusedOp->OFM(); ofmConn->tensor = nextOp->OFM()->tensor; ofmConn->SetType(nextOp->OFM()->Type()); ofmConn->shape = nextOp->OFM()->shape; @@ -300,12 +301,17 @@ void SchedulerPacking::SchedulerPacking::PackOperations() } else if ( nextOp->Type() == OpType::Reverse ) { - auto *ofmConn = primaryOp->OFM(); + auto *ofmConn = lastNonFusedOp->OFM(); ofmConn->tensor = nextOp->OFM()->tensor; ofmConn->SetType(nextOp->OFM()->Type()); ofmConn->shape = nextOp->OFM()->shape; ofmConn->reverse = nextOp->OFM()->reverse; } + else + { + // This is a non-fused op - update the tracking variable + lastNonFusedOp = nextOp; + } primaryOp->AddSubOp(std::move(*cur)); prevOpKey = key;