From ab36eec4b03d9685a2e0b8c3a372db3abb27501e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Davidsson?= Date: Wed, 20 Nov 2024 11:06:25 +0100 Subject: [PATCH] MLBEDSW-9759: Decompose batch for AvgPool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add decomposition for AvgPool, handling batch > 1. - Convert padding to offsets for AvgPool and MaxPool decomposition. Change-Id: I47faaaddedb0295abc084e3e966daa53817c2586 Signed-off-by: Björn Davidsson --- ethosu/regor/compiler/scheduler_decompose.cpp | 59 +++++++++++++++---- ethosu/regor/compiler/scheduler_decompose.hpp | 1 + ethosu/regor/compiler/scheduler_packing.cpp | 3 + 3 files changed, 50 insertions(+), 13 deletions(-) diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index c3c74440..b16aa14e 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -245,6 +245,7 @@ bool CanDecompose(Architecture *, const SchedulerOperation *schedOp) if ( schedOp->Type() == OpType::ArgMax ) return true; if ( schedOp->Type() == OpType::Reverse ) return true; if ( schedOp->Type() == OpType::Transpose ) return true; + if ( schedOp->Type() == OpType::AvgPool ) return true; if ( schedOp->Type() == OpType::MaxPool ) return true; if ( schedOp->Type() == OpType::Resize ) return true; return false; @@ -1812,33 +1813,65 @@ std::vector> DecomposeTranspose(Architecture return result; } -std::vector> DecomposeMaxPool(Architecture *arch, std::unique_ptr op) +std::vector> DecomposeAvgPool(Architecture *arch, std::unique_ptr op) { std::vector> result; - auto ofmConn = op->Output(TensorUsage::OFM); - auto &ofmShape = ofmConn->SliceShape(); + auto *ofmConn = op->Output(TensorUsage::OFM); + auto *ifmConn = op->Input(TensorUsage::IFM); + const auto &ofmShape = ofmConn->SliceShape(); + const auto &ifmShape = ifmConn->SliceShape(); auto &ofmSlice = ofmConn->slice; - auto ifmConn = op->Input(TensorUsage::IFM); - auto &ifmShape = ifmConn->SliceShape(); auto &ifmSlice = ifmConn->slice; - + auto *kernel = op->Kernel(); + auto &padding = kernel->Padding(); ofmSlice.Initialize(ofmShape.WithZeros(), ofmShape); - ifmSlice.Initialize(ifmShape.WithZeros(), ifmShape); - - if ( auto ifm2Conn = op->TryInput(TensorUsage::IFM1) ) + ifmSlice.Initialize(ifmShape.WithZeros().WithHW(-padding.Top(), -padding.Left()), ifmShape); + auto ofmRank = ofmShape.Size(); + if ( ofmRank > 3 && (ofmShape.Elements() > ofmShape.Height() * ofmShape.Width() * ofmShape.Depth()) ) { - auto &ifm2Shape = ifm2Conn->shape; - auto &ifm2Slice = ifm2Conn->slice; + return DecomposeLeadingDimensions(ofmRank - 3, arch, std::move(op), DecomposeAvgPool); + } - ifm2Slice.Initialize(ifm2Shape.WithZeros(), ifm2Shape); + if ( !NeedsDecompose(arch, op.get()) ) + { + UpdatePaddingAndIfmOffset(op.get()); + result.emplace_back(std::move(op)); + return result; } + // Decomposition for large dimensions & strides is needed here. + // If we get here, decomposition has failed, the resulting operations will be executed on CPU + UpdatePaddingAndIfmOffset(op.get()); + result.emplace_back(std::move(op)); + return result; +} +std::vector> DecomposeMaxPool(Architecture *arch, std::unique_ptr op) +{ + std::vector> result; + auto *ofmConn = op->Output(TensorUsage::OFM); + auto *ifmConn = op->Input(TensorUsage::IFM); + const auto &ofmShape = ofmConn->SliceShape(); + const auto &ifmShape = ifmConn->SliceShape(); + auto &ofmSlice = ofmConn->slice; + auto &ifmSlice = ifmConn->slice; + auto *kernel = op->Kernel(); + auto &padding = kernel->Padding(); + ofmSlice.Initialize(ofmShape.WithZeros(), ofmShape); + ifmSlice.Initialize(ifmShape.WithZeros().WithHW(-padding.Top(), -padding.Left()), ifmShape); auto ofmRank = ofmShape.Size(); if ( ofmRank > 3 && (ofmShape.Elements() > ofmShape.Height() * ofmShape.Width() * ofmShape.Depth()) ) { return DecomposeLeadingDimensions(ofmRank - 3, arch, std::move(op), DecomposeMaxPool); } - + if ( !NeedsDecompose(arch, op.get()) ) + { + UpdatePaddingAndIfmOffset(op.get()); + result.emplace_back(std::move(op)); + return result; + } + // Decomposition for large dimensions & strides is needed here. + // If we get here, decomposition has failed, the resulting operations will be executed on CPU + UpdatePaddingAndIfmOffset(op.get()); result.emplace_back(std::move(op)); return result; } diff --git a/ethosu/regor/compiler/scheduler_decompose.hpp b/ethosu/regor/compiler/scheduler_decompose.hpp index baf3d81e..7dca942d 100644 --- a/ethosu/regor/compiler/scheduler_decompose.hpp +++ b/ethosu/regor/compiler/scheduler_decompose.hpp @@ -44,6 +44,7 @@ std::vector> DecomposeMatmul(Architecture *a std::vector> DecomposeReduce(Architecture *arch, std::unique_ptr op); std::vector> DecomposeReverse(Architecture *arch, std::unique_ptr op); std::vector> DecomposeTranspose(Architecture *arch, std::unique_ptr op); +std::vector> DecomposeAvgPool(Architecture *arch, std::unique_ptr op); std::vector> DecomposeMaxPool(Architecture *arch, std::unique_ptr op); std::vector> DecomposeResize(Architecture *arch, std::unique_ptr op); std::vector> LegaliseResize(Architecture *arch, std::unique_ptr op); diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index 550f8df2..3de2f3df 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -691,6 +691,9 @@ std::vector> SchedulerPacking::DecomposeSche case OpType::Transpose: result = DecomposeTranspose(_arch, std::move(op)); break; + case OpType::AvgPool: + result = DecomposeAvgPool(_arch, std::move(op)); + break; case OpType::MaxPool: result = DecomposeMaxPool(_arch, std::move(op)); break; -- GitLab