From 911fb5fb23c3a90428187ea1323370380889f64f Mon Sep 17 00:00:00 2001 From: Fredrik Svedberg Date: Tue, 11 Mar 2025 15:57:54 +0100 Subject: [PATCH] MLBEDSW-9902 Decomposition large strides/dims DepthwiseConv2D Add support for stride > 3 and large FM dimensions by calling the block and stride decomposition functions from the DepthwiseConv2D decomposition. Also make sure the AxisOrder for the weights are in a format handled by decomposition functions. Change-Id: I26bea579d86ca00a4b8af66275eb3a686e510064 Signed-off-by: Fredrik Svedberg --- ethosu/regor/compiler/scheduler_decompose.cpp | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index 81234b4a..9dafb6ce 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -1082,6 +1082,7 @@ std::vector> DecomposeDepthwiseConv2D(Archit auto subWeightOffset = weightsShape.WithZeros().WithDepth(multiplier); auto subWeightsConn = subOp->Input(TensorUsage::Weights); subWeightsConn->tensor = Slice(weightsConn->tensor.get(), subWeightOffset, subWeightsShape, subWeightsReadShape); + // Tensor is now in AxisOrder::HWCM with M=1 subWeightsConn->tensor->srcTensor->SetAxisOrder(AxisOrder::HWCM); subWeightsConn->tensor->consumers.push_back(subOp.get()); subWeightsConn->shape = subWeightsShape; @@ -1124,19 +1125,52 @@ std::vector> DecomposeDepthwiseConv2D(Archit } return result; } + if ( CanRunOnHardware(arch, op.get()) ) { UpdatePaddingAndIfmOffset(op.get()); result.emplace_back(std::move(op)); return result; } + + // If weight tensor is in AxisOrder::HWCM (with M=1, since depthMultiplier=1 at this point), + // reshape and set AxisOrder::IHWO with I=1, since the rest of the decomposition code + // only handles weight tensors with AxisOrder::OHWI or AxisOrder::IHWO + if ( weightsConn->tensor->srcTensor->AxisOrder() == AxisOrder::HWCM ) + { + auto weightShapeIHWO = weightsConn->SliceShape().Permute(0x0321); + weightsConn->tensor->srcTensor->SetAxisOrder(AxisOrder::IHWO); + weightsConn->tensor->bufferView = weightsConn->tensor->bufferView.Reshape(weightShapeIHWO); + weightsConn->tensor->storageShape = weightShapeIHWO; + weightsConn->shape = weightShapeIHWO; + } + auto &dilation = kernel->Dilation(); if ( dilation.x > 1 || dilation.y > 1 ) { return HandleDilation(arch, std::move(op), DecomposeDepthwiseConv2D); } - // TODO: MLBEDSW-8783 Decompose convolutions with large stride + try + { + if ( auto newBlockShape = NewOfmBlockShape(arch, op.get()) ) + { + return DecomposeBlocks(arch, std::move(op), newBlockShape, DecomposeDepthwiseConv2D); + } + } + catch ( const DecompositionFailure & ) + { + UpdatePaddingAndIfmOffset(op.get()); + result.emplace_back(std::move(op)); + return result; + } + + if ( arch->Constraints()->SupportsAccumulatorSaveRestore() && + op->Input(TensorUsage::Weights)->tensor->IsConstant() && op->Kernel()->Stride().AreaXY() > 1 ) + { + return DecomposeForStrides(arch, std::move(op), DecomposeDepthwiseConv2D); + } // If we get here, decomposition has failed, the resulting operations will be executed on CPU + UpdatePaddingAndIfmOffset(op.get()); result.emplace_back(std::move(op)); return result; } -- GitLab