From a3fc80da41812bcadb0be15ea2d8570490b49d7f Mon Sep 17 00:00:00 2001 From: William Isaksson Date: Tue, 18 Mar 2025 17:12:46 +0100 Subject: [PATCH] MLBEDSW-9291: Support basic resize bilinear for Ethos-U55/U65 -Adds limited support for resize bilinear with align corners and half pixel centers both set to false, and analogous for TOSA. Change-Id: I013759e0cb23f3ebc9037d3b41b0f449256a673a Signed-off-by: William Isaksson --- .../regor/architecture/ethosu55/ethos_u55.cpp | 1 - .../ethosu55/ethos_u55_constraints.cpp | 10 +++ ethosu/regor/compiler/scheduler_decompose.cpp | 87 ++++++++++++++++++- ethosu/regor/compiler/scheduler_decompose.hpp | 1 + ethosu/regor/compiler/scheduler_packing.cpp | 16 +++- .../regor/compiler/tflite_graph_optimiser.cpp | 5 -- .../tflite/tflite_supported_operators_u55.cpp | 69 +++++++++++++++ .../tflite/tflite_supported_operators_u55.hpp | 1 + .../tflite/tflite_supported_operators_u85.cpp | 4 +- 9 files changed, 180 insertions(+), 14 deletions(-) diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp index 39c486da..150890bf 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp @@ -693,7 +693,6 @@ EthosU55NpuOp ArchEthosU55::GetHWOp(OpType type) {OpType::AvgPool, EthosU55NpuOp::Pooling}, {OpType::QuantizedAvgPool, EthosU55NpuOp::Pooling}, {OpType::QuantizedMaxPool, EthosU55NpuOp::Pooling}, - {OpType::ResizeBilinear, EthosU55NpuOp::Pooling}, {OpType::ReduceSum, EthosU55NpuOp::ReduceSum}, {OpType::Rescale, EthosU55NpuOp::Pooling}, {OpType::Tile, EthosU55NpuOp::Dma}, diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp index 5bb3c4c4..fd657dcd 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp @@ -323,6 +323,16 @@ Flags EthosU55Constraints::OperatorQuery(OpType opType, const ArchO result.Set(QueryResult::HasRequirements); } } + if ( opType == OpType::Resize ) + { + if ( req ) + { + req->req = ArchRequirement::Decompose; + req->substitution = OpType::AvgPool; + } + result.Set(QueryResult::HasRequirements); + return result; + } // TransposeConv2D and Conv3D are legalized during decomposition if ( opType == OpType::TransposeConv2D || opType == OpType::Conv3D ) diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index 41167bbb..81527ea4 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -236,6 +236,7 @@ bool CanDecompose(Architecture *, const SchedulerOperation *schedOp) if ( schedOp->Type() == OpType::TransposeConv2D ) return true; if ( DecomposeAsElementwise(schedOp->Type()) || schedOp->Type() == OpType::MemoryCopy ) return true; if ( schedOp->Type() == OpType::MatMul ) return true; + if ( schedOp->Type() == OpType::Resize ) return true; if ( schedOp->Type() == OpType::ReduceSum ) return true; if ( schedOp->Type() == OpType::ReduceMin ) return true; if ( schedOp->Type() == OpType::ReduceMax ) return true; @@ -1280,6 +1281,88 @@ std::vector> DecomposeTransposeConv2D(Archit return result; } +// TODO: Move this to run prior to decomposition. +std::vector> LegaliseResize(Architecture *arch, std::unique_ptr op) +{ + // Convert ResizeBilinear/NearestNeighbor to a number of kernel 1x1 average pools with nearest neighbor x2 upScaling + // and a final average pool with a kernel size that depends upon the resize ops upScaling factor (x2, x4 or x8). The + // maximum upscale factor is limited to x8 because of the limit 8x8 kernel size limit for average pool with padding. + + std::vector> result; + + auto ifmConn = op->Input(TensorUsage::IFM); + auto ofmConn = op->Output(TensorUsage::OFM); + assert(ifmConn); + assert(ofmConn); + + auto *attr = op->Attribute(); + auto upscaleH = attr->scaleY.n; + auto upscaleW = attr->scaleX.n; + auto remainingUpscale = std::max(upscaleW, upscaleH); + bool canLegalise = true; + + ArchRequirements req{}; + OperatorQuery(arch, op.get(), &req); + auto reqScale = QuantizedScale(1, IntLog2(attr->scaleX.n * attr->scaleY.n)); + + + if ( !IsPowerOfTwo(remainingUpscale) || remainingUpscale > 8 || remainingUpscale < 2 ) + { + canLegalise = false; + } + else if ( (upscaleH == 1 && ifmConn->shape.Height() != 1) || (upscaleW == 1 && ifmConn->shape.Width() != 1) ) + { + canLegalise = false; + } + else if ( ofmConn->quantization.scales[0] != reqScale ) + { + canLegalise = false; + } + + if ( !canLegalise ) + { + result.emplace_back(std::move(op)); + return result; + } + + auto ofmShape = ofmConn->shape; + auto ifmShape = ifmConn->shape; + + ofmConn->tensor->dataType = ifmConn->tensor->dataType; + ifmConn->resamplingMode = ArchResampling::Nearest; + // Perform 2x upScaling up to the last required + while ( remainingUpscale > 2 ) + { + auto newOp = std::make_unique(OpType::AvgPool); + *newOp->ConnectInput(TensorUsage::IFM, ifmConn->tensor) = *ifmConn; + std::shared_ptr tens = ofmConn->tensor->Clone(); + auto shape = ofmShape.WithHW(ifmConn->shape.Height() * std::min(2, upscaleH), ifmConn->shape.Width() * std::min(2, upscaleW)); + tens->storageShape = shape; + ifmConn = newOp->ConnectOutput(TensorUsage::OFM, tens); + ifmConn->quantization = Quantization::Unit(); + ifmConn->shape = shape; + ifmConn->resamplingMode = ArchResampling::Nearest; + auto kernel = Kernel::UnitKernel(); + newOp->SetKernel(&kernel); + result.emplace_back(std::move(newOp)); + + remainingUpscale /= 2; + } + + // Perform last 2x upScaling and post-processing. + ifmConn->resamplingMode = ArchResampling::Nearest; + auto newOp = std::make_unique(OpType::AvgPool); + *newOp->ConnectInput(TensorUsage::IFM, ifmConn->tensor) = *ifmConn; + + Kernel kernel = Kernel::UnitKernel().WithPadding({0, 0, upscaleH - 1, upscaleW - 1, 0, 0}).WithSize({upscaleW, upscaleH}); + newOp->SetKernel(&kernel); + ofmConn->quantization = Quantization::Unit(); + ofmConn->rounding = RoundMode::AUTO; + *newOp->ConnectOutput(TensorUsage::OFM, ofmConn->tensor) = *ofmConn; + result.emplace_back(std::move(newOp)); + return result; +} + std::vector> DecomposeElementwise(Architecture *arch, std::unique_ptr op) { std::vector> result; @@ -1603,7 +1686,7 @@ std::vector> DecomposeTranspose(Architecture const auto &ifmShape = ifmConn->SliceShape(); const auto axes = ifmShape.Size(); - auto req = ArchRequirements(); + ArchRequirements req{}; auto qResult = OperatorQuery(arch, op.get(), &req); bool decomposeMask = false; bool decomposeAxes = false; @@ -1773,7 +1856,7 @@ std::vector> DecomposeResize(Architecture *a ofmSlice.Initialize(ofmShape.WithZeros(), ofmShape); ifmSlice.Initialize(ifmShape.WithZeros(), ifmShape); - auto req = ArchRequirements(); + ArchRequirements req{}; auto qResult = OperatorQuery(arch, op.get(), &req); bool decomposeLeadingDims = false; if ( qResult.Any(QueryResult::HasRequirements) && req.req.Any(ArchRequirement::Decompose) ) diff --git a/ethosu/regor/compiler/scheduler_decompose.hpp b/ethosu/regor/compiler/scheduler_decompose.hpp index a872ee82..baf3d81e 100644 --- a/ethosu/regor/compiler/scheduler_decompose.hpp +++ b/ethosu/regor/compiler/scheduler_decompose.hpp @@ -46,6 +46,7 @@ std::vector> DecomposeReverse(Architecture * std::vector> DecomposeTranspose(Architecture *arch, std::unique_ptr op); std::vector> DecomposeMaxPool(Architecture *arch, std::unique_ptr op); std::vector> DecomposeResize(Architecture *arch, std::unique_ptr op); +std::vector> LegaliseResize(Architecture *arch, std::unique_ptr op); // Operator query helpers diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index 2810c792..a6ff98b3 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -615,6 +615,8 @@ std::unique_ptr SchedulerPacking::MakeSchedulerOperation(Ope std::vector> SchedulerPacking::DecomposeSchedulerOperation(std::unique_ptr op) { std::vector> result; + ArchRequirements req{}; + switch ( op->Type() ) { case OpType::Conv2D: @@ -648,15 +650,23 @@ std::vector> SchedulerPacking::DecomposeSche case OpType::Reverse: result = DecomposeReverse(_arch, std::move(op)); break; + case OpType::Resize: + OperatorQuery(_arch, op.get(), &req); + if ( req.substitution == OpType::AvgPool ) + { + result = LegaliseResize(_arch, std::move(op)); + } + else + { + result = DecomposeResize(_arch, std::move(op)); + } + break; case OpType::Transpose: result = DecomposeTranspose(_arch, std::move(op)); break; case OpType::MaxPool: result = DecomposeMaxPool(_arch, std::move(op)); break; - case OpType::Resize: - result = DecomposeResize(_arch, std::move(op)); - break; default: if ( DecomposeAsElementwise(op->Type()) || op->Type() == OpType::MemoryCopy ) { diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp index 1b6468a7..4ac1eaa7 100644 --- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp +++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp @@ -1090,11 +1090,6 @@ Operation *TFLiteGraphOptimiser::ConvertResize(Graph *const graph, Operation *co Operation *returnOp = operation; OpType opType = operation->Type(); - if ( _constraints->OperatorQuery(OpType::Resize).Any(QueryResult::Unsupported) ) - { - // Only run if HW has native Resize support - return returnOp; - } if ( opType == OpType::ResizeBilinear || opType == OpType::ResizeNearestNeighbor ) { auto ifmConn = operation->Input(TensorUsage::IFM); diff --git a/ethosu/regor/tflite/tflite_supported_operators_u55.cpp b/ethosu/regor/tflite/tflite_supported_operators_u55.cpp index a6252e5b..f49ccdb8 100644 --- a/ethosu/regor/tflite/tflite_supported_operators_u55.cpp +++ b/ethosu/regor/tflite/tflite_supported_operators_u55.cpp @@ -67,6 +67,7 @@ TfLiteSupportedOperatorsU55::TfLiteSupportedOperatorsU55(IArchitectureConstraint OpType::Tile, OpType::ExpandDims, OpType::ReduceSum, + OpType::ResizeBilinear, OpType::Rsqrt, OpType::Pack, OpType::Unpack, @@ -103,6 +104,7 @@ TfLiteSupportedOperatorsU55::TfLiteSupportedOperatorsU55(IArchitectureConstraint &TfLiteSupportedOperatorsU55::ConstraintUnrolledKernelStride, &TfLiteSupportedOperatorsU55::ConstraintMatmul, &TfLiteSupportedOperatorsU55::ConstraintTranspose, + &TfLiteSupportedOperatorsU55::ConstraintResize, }; } @@ -143,6 +145,73 @@ bool TfLiteSupportedOperatorsU55::ConstraintBroadcastShapes(const Operation *op) return true; } +bool TfLiteSupportedOperatorsU55::ConstraintResize(const Operation *op) +{ + if ( op->Type() != OpType::ResizeBilinear ) + { + return true; + } + auto ifmConn = op->Input(TensorUsage::IFM); + auto ofmConn = op->Output(TensorUsage::OFM); + assert(ifmConn); + assert(ofmConn); + Shape ifmShape = Shape::PadAxes(ifmConn->shape, 4, 1); + Shape ofmShape = Shape::PadAxes(ofmConn->shape, 4, 1); + + if ( ifmShape.Height() == ofmShape.Height() && ifmShape.Height() == ofmShape.Height() ) + { + return true; + } + if ( ifmShape.Height() == 1 && ifmShape.Width() == 1 ) + { + return true; + } + + const auto *passthrough = static_cast(op->Passthrough()); + assert(passthrough); + const auto *opt = passthrough->builtin_options_as_ResizeBilinearOptions(); + assert(opt); + if ( opt->align_corners() ) + { + Failure(op, "Align Corners attribute is true", "Align Corners must be false"); + return false; + } + if ( opt->half_pixel_centers() ) + { + Failure(op, "Half Pixel Centers attribute is true", "Half Pixel Centers must be false"); + return false; + } + std::string constraint = + "If not (IFM H == IFM W == 1) and not IFM Shape == OFM Shape\n" + "\tIf W upScale != H upScale:\n" + "\t\tOFM W or H must be 1, and scaling in the dim that is must also be 1\n" + "\tIF W upScale == H upScale \n" + "\t\tupScale needs to be one of: 2x/4x/8x"; + + int hUpscale = ofmShape.Height() / ifmShape.Height(); + int wUpscale = ofmShape.Width() / ifmShape.Width(); + + if ( hUpscale != wUpscale ) + { + if ( !((ofmShape.Height() == 1 && hUpscale == 1) || (ofmShape.Width() == 1 && wUpscale == 1)) ) + { + Failure(op, + fmt::format("HW upScaling is not equal and operation has unsupported parameter combination ofm h={}, h up-scale={}, ofm w={}, w up-scale={}.", + ofmShape.Height(), hUpscale, ofmShape.Width(), wUpscale), + constraint); + return false; + } + } + else if ( !((ifmShape.Height() == 1 && ifmShape.Width() == 1) || (ofmShape.Height() % (2 * ifmShape.Height()) == 0 && hUpscale > 1 && hUpscale <= 8)) ) + { + Failure(op, + fmt::format("Scaling matches and operation has unsupported scaling={}", float(ofmShape.Height()) / ifmShape.Height()), constraint); + return false; + } + return true; +} + + bool TfLiteSupportedOperatorsU55::ConstraintReverse(const Operation *op) { if ( op->Type() != OpType::Reverse && op->Type() != OpType::ReverseV2 ) diff --git a/ethosu/regor/tflite/tflite_supported_operators_u55.hpp b/ethosu/regor/tflite/tflite_supported_operators_u55.hpp index 1daa1743..fe855925 100644 --- a/ethosu/regor/tflite/tflite_supported_operators_u55.hpp +++ b/ethosu/regor/tflite/tflite_supported_operators_u55.hpp @@ -50,5 +50,6 @@ private: bool ConstraintArgMaxAxis(const Operation *op); bool ConstraintArgMaxOverflow(const Operation *op); // TODO: Remove after MLBEDSW-9758: TOSA MaxPool decomp bool ConstraintTranspose(const Operation *op); + bool ConstraintResize(const Operation *op); }; } // namespace regor diff --git a/ethosu/regor/tflite/tflite_supported_operators_u85.cpp b/ethosu/regor/tflite/tflite_supported_operators_u85.cpp index 335c3fa6..9b9fc635 100644 --- a/ethosu/regor/tflite/tflite_supported_operators_u85.cpp +++ b/ethosu/regor/tflite/tflite_supported_operators_u85.cpp @@ -235,7 +235,7 @@ bool TfLiteSupportedOperatorsU85::ConstraintResizeBilinear(const Operation *op) "if IFM HxW > 1x1\n" "\tand ALIGN_CORNERS:\n" "\t\tOFM W-1 and H-1 must be a power-of-two integer-multiple of IFM W-1 and H-1\n" - "\tor HALF_PIXEL_CENTERS:\n" + "\telse:\n" "\t\tOFM W and H must be a power-of-two integer-multiple of IFM W and H\n"; OpType opType = op->Type(); if ( opType != OpType::ResizeBilinear ) @@ -250,7 +250,6 @@ bool TfLiteSupportedOperatorsU85::ConstraintResizeBilinear(const Operation *op) int width_d = ifmConn->shape.Width(); int height_n = ofmConn->shape.Height(); int height_d = ifmConn->shape.Height(); - bool halfPixelCenters = false; bool alignCorners = false; const tflite::Operator *passthrough = static_cast(op->Passthrough()); assert(passthrough); @@ -263,7 +262,6 @@ bool TfLiteSupportedOperatorsU85::ConstraintResizeBilinear(const Operation *op) const auto *opt = passthrough->builtin_options_as_ResizeBilinearOptions(); assert(opt); alignCorners = opt->align_corners(); - halfPixelCenters = opt->half_pixel_centers(); if ( alignCorners ) { -- GitLab