From a3fc80da41812bcadb0be15ea2d8570490b49d7f Mon Sep 17 00:00:00 2001
From: William Isaksson <william.isaksson@arm.com>
Date: Tue, 18 Mar 2025 17:12:46 +0100
Subject: [PATCH] MLBEDSW-9291: Support basic resize bilinear for Ethos-U55/U65

-Adds limited support for resize bilinear with align corners and half
pixel centers both set to false, and analogous for TOSA.

Change-Id: I013759e0cb23f3ebc9037d3b41b0f449256a673a
Signed-off-by: William Isaksson <william.isaksson@arm.com>
---
 .../regor/architecture/ethosu55/ethos_u55.cpp |  1 -
 .../ethosu55/ethos_u55_constraints.cpp        | 10 +++
 ethosu/regor/compiler/scheduler_decompose.cpp | 87 ++++++++++++++++++-
 ethosu/regor/compiler/scheduler_decompose.hpp |  1 +
 ethosu/regor/compiler/scheduler_packing.cpp   | 16 +++-
 .../regor/compiler/tflite_graph_optimiser.cpp |  5 --
 .../tflite/tflite_supported_operators_u55.cpp | 69 +++++++++++++++
 .../tflite/tflite_supported_operators_u55.hpp |  1 +
 .../tflite/tflite_supported_operators_u85.cpp |  4 +-
 9 files changed, 180 insertions(+), 14 deletions(-)
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp
index 39c486da..150890bf 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp
@@ -693,7 +693,6 @@ EthosU55NpuOp ArchEthosU55::GetHWOp(OpType type)
         {OpType::AvgPool, EthosU55NpuOp::Pooling},
         {OpType::QuantizedAvgPool, EthosU55NpuOp::Pooling},
         {OpType::QuantizedMaxPool, EthosU55NpuOp::Pooling},
-        {OpType::ResizeBilinear, EthosU55NpuOp::Pooling},
         {OpType::ReduceSum, EthosU55NpuOp::ReduceSum},
         {OpType::Rescale, EthosU55NpuOp::Pooling},
         {OpType::Tile, EthosU55NpuOp::Dma},
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp
index 5bb3c4c4..fd657dcd 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp
@@ -323,6 +323,16 @@ Flags<QueryResult> EthosU55Constraints::OperatorQuery(OpType opType, const ArchO
             result.Set(QueryResult::HasRequirements);
         }
     }
+    if ( opType == OpType::Resize )
+    {
+        if ( req )
+        {
+            req->req = ArchRequirement::Decompose;
+            req->substitution = OpType::AvgPool;
+        }
+        result.Set(QueryResult::HasRequirements);
+        return result;
+    }
 
     // TransposeConv2D and Conv3D are legalized during decomposition
     if ( opType == OpType::TransposeConv2D || opType == OpType::Conv3D )
diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp
index 41167bbb..81527ea4 100644
--- a/ethosu/regor/compiler/scheduler_decompose.cpp
+++ b/ethosu/regor/compiler/scheduler_decompose.cpp
@@ -236,6 +236,7 @@ bool CanDecompose(Architecture *, const SchedulerOperation *schedOp)
     if ( schedOp->Type() == OpType::TransposeConv2D ) return true;
     if ( DecomposeAsElementwise(schedOp->Type()) || schedOp->Type() == OpType::MemoryCopy ) return true;
     if ( schedOp->Type() == OpType::MatMul ) return true;
+    if ( schedOp->Type() == OpType::Resize ) return true;
     if ( schedOp->Type() == OpType::ReduceSum ) return true;
     if ( schedOp->Type() == OpType::ReduceMin ) return true;
     if ( schedOp->Type() == OpType::ReduceMax ) return true;
@@ -1280,6 +1281,88 @@ std::vector<std::unique_ptr<SchedulerOperation>> DecomposeTransposeConv2D(Archit
     return result;
 }
 
+// TODO: Move this to run prior to decomposition.
+std::vector<std::unique_ptr<SchedulerOperation>> LegaliseResize(Architecture *arch, std::unique_ptr<SchedulerOperation> op)
+{
+    // Convert ResizeBilinear/NearestNeighbor to a number of kernel 1x1 average pools with nearest neighbor x2 upScaling
+    // and a final average pool with a kernel size that depends upon the resize ops upScaling factor (x2, x4 or x8). The
+    // maximum upscale factor is limited to x8 because of the limit 8x8 kernel size limit for average pool with padding.
+
+    std::vector<std::unique_ptr<SchedulerOperation>> result;
+
+    auto ifmConn = op->Input(TensorUsage::IFM);
+    auto ofmConn = op->Output(TensorUsage::OFM);
+    assert(ifmConn);
+    assert(ofmConn);
+
+    auto *attr = op->Attribute<resize_attr_t>();
+    auto upscaleH = attr->scaleY.n;
+    auto upscaleW = attr->scaleX.n;
+    auto remainingUpscale = std::max(upscaleW, upscaleH);
+    bool canLegalise = true;
+
+    ArchRequirements req{};
+    OperatorQuery(arch, op.get(), &req);
+    auto reqScale = QuantizedScale(1, IntLog2(attr->scaleX.n * attr->scaleY.n));
+
+
+    if ( !IsPowerOfTwo(remainingUpscale) || remainingUpscale > 8 || remainingUpscale < 2 )
+    {
+        canLegalise = false;
+    }
+    else if ( (upscaleH == 1 && ifmConn->shape.Height() != 1) || (upscaleW == 1 && ifmConn->shape.Width() != 1) )
+    {
+        canLegalise = false;
+    }
+    else if ( ofmConn->quantization.scales[0] != reqScale )
+    {
+        canLegalise = false;
+    }
+
+    if ( !canLegalise )
+    {
+        result.emplace_back(std::move(op));
+        return result;
+    }
+
+    auto ofmShape = ofmConn->shape;
+    auto ifmShape = ifmConn->shape;
+
+    ofmConn->tensor->dataType = ifmConn->tensor->dataType;
+    ifmConn->resamplingMode = ArchResampling::Nearest;
+    // Perform 2x upScaling up to the last required
+    while ( remainingUpscale > 2 )
+    {
+        auto newOp = std::make_unique<SchedulerOperation>(OpType::AvgPool);
+        *newOp->ConnectInput(TensorUsage::IFM, ifmConn->tensor) = *ifmConn;
+        std::shared_ptr<SchedulerTensor> tens = ofmConn->tensor->Clone();
+        auto shape = ofmShape.WithHW(ifmConn->shape.Height() * std::min(2, upscaleH), ifmConn->shape.Width() * std::min(2, upscaleW));
+        tens->storageShape = shape;
+        ifmConn = newOp->ConnectOutput(TensorUsage::OFM, tens);
+        ifmConn->quantization = Quantization::Unit();
+        ifmConn->shape = shape;
+        ifmConn->resamplingMode = ArchResampling::Nearest;
+        auto kernel = Kernel::UnitKernel();
+        newOp->SetKernel(&kernel);
+        result.emplace_back(std::move(newOp));
+
+        remainingUpscale /= 2;
+    }
+
+    // Perform last 2x upScaling and post-processing.
+    ifmConn->resamplingMode = ArchResampling::Nearest;
+    auto newOp = std::make_unique<SchedulerOperation>(OpType::AvgPool);
+    *newOp->ConnectInput(TensorUsage::IFM, ifmConn->tensor) = *ifmConn;
+
+    Kernel kernel = Kernel::UnitKernel().WithPadding({0, 0, upscaleH - 1, upscaleW - 1, 0, 0}).WithSize({upscaleW, upscaleH});
+    newOp->SetKernel(&kernel);
+    ofmConn->quantization = Quantization::Unit();
+    ofmConn->rounding = RoundMode::AUTO;
+    *newOp->ConnectOutput(TensorUsage::OFM, ofmConn->tensor) = *ofmConn;
+    result.emplace_back(std::move(newOp));
+    return result;
+}
+
 std::vector<std::unique_ptr<SchedulerOperation>> DecomposeElementwise(Architecture *arch, std::unique_ptr<SchedulerOperation> op)
 {
     std::vector<std::unique_ptr<SchedulerOperation>> result;
@@ -1603,7 +1686,7 @@ std::vector<std::unique_ptr<SchedulerOperation>> DecomposeTranspose(Architecture
     const auto &ifmShape = ifmConn->SliceShape();
     const auto axes = ifmShape.Size();
 
-    auto req = ArchRequirements();
+    ArchRequirements req{};
     auto qResult = OperatorQuery(arch, op.get(), &req);
     bool decomposeMask = false;
     bool decomposeAxes = false;
@@ -1773,7 +1856,7 @@ std::vector<std::unique_ptr<SchedulerOperation>> DecomposeResize(Architecture *a
     ofmSlice.Initialize(ofmShape.WithZeros(), ofmShape);
     ifmSlice.Initialize(ifmShape.WithZeros(), ifmShape);
 
-    auto req = ArchRequirements();
+    ArchRequirements req{};
     auto qResult = OperatorQuery(arch, op.get(), &req);
     bool decomposeLeadingDims = false;
     if ( qResult.Any(QueryResult::HasRequirements) && req.req.Any(ArchRequirement::Decompose) )
diff --git a/ethosu/regor/compiler/scheduler_decompose.hpp b/ethosu/regor/compiler/scheduler_decompose.hpp
index a872ee82..baf3d81e 100644
--- a/ethosu/regor/compiler/scheduler_decompose.hpp
+++ b/ethosu/regor/compiler/scheduler_decompose.hpp
@@ -46,6 +46,7 @@ std::vector<std::unique_ptr<SchedulerOperation>> DecomposeReverse(Architecture *
 std::vector<std::unique_ptr<SchedulerOperation>> DecomposeTranspose(Architecture *arch, std::unique_ptr<SchedulerOperation> op);
 std::vector<std::unique_ptr<SchedulerOperation>> DecomposeMaxPool(Architecture *arch, std::unique_ptr<SchedulerOperation> op);
 std::vector<std::unique_ptr<SchedulerOperation>> DecomposeResize(Architecture *arch, std::unique_ptr<SchedulerOperation> op);
+std::vector<std::unique_ptr<SchedulerOperation>> LegaliseResize(Architecture *arch, std::unique_ptr<SchedulerOperation> op);
 
 
 // Operator query helpers
diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp
index 2810c792..a6ff98b3 100644
--- a/ethosu/regor/compiler/scheduler_packing.cpp
+++ b/ethosu/regor/compiler/scheduler_packing.cpp
@@ -615,6 +615,8 @@ std::unique_ptr<SchedulerOperation> SchedulerPacking::MakeSchedulerOperation(Ope
 std::vector<std::unique_ptr<SchedulerOperation>> SchedulerPacking::DecomposeSchedulerOperation(std::unique_ptr<SchedulerOperation> op)
 {
     std::vector<std::unique_ptr<SchedulerOperation>> result;
+    ArchRequirements req{};
+
     switch ( op->Type() )
     {
         case OpType::Conv2D:
@@ -648,15 +650,23 @@ std::vector<std::unique_ptr<SchedulerOperation>> SchedulerPacking::DecomposeSche
         case OpType::Reverse:
             result = DecomposeReverse(_arch, std::move(op));
             break;
+        case OpType::Resize:
+            OperatorQuery(_arch, op.get(), &req);
+            if ( req.substitution == OpType::AvgPool )
+            {
+                result = LegaliseResize(_arch, std::move(op));
+            }
+            else
+            {
+                result = DecomposeResize(_arch, std::move(op));
+            }
+            break;
         case OpType::Transpose:
             result = DecomposeTranspose(_arch, std::move(op));
             break;
         case OpType::MaxPool:
             result = DecomposeMaxPool(_arch, std::move(op));
             break;
-        case OpType::Resize:
-            result = DecomposeResize(_arch, std::move(op));
-            break;
         default:
             if ( DecomposeAsElementwise(op->Type()) || op->Type() == OpType::MemoryCopy )
             {
diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
index 1b6468a7..4ac1eaa7 100644
--- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp
+++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
@@ -1090,11 +1090,6 @@ Operation *TFLiteGraphOptimiser::ConvertResize(Graph *const graph, Operation *co
     Operation *returnOp = operation;
     OpType opType = operation->Type();
 
-    if ( _constraints->OperatorQuery(OpType::Resize).Any(QueryResult::Unsupported) )
-    {
-        // Only run if HW has native Resize support
-        return returnOp;
-    }
     if ( opType == OpType::ResizeBilinear || opType == OpType::ResizeNearestNeighbor )
     {
         auto ifmConn = operation->Input(TensorUsage::IFM);
diff --git a/ethosu/regor/tflite/tflite_supported_operators_u55.cpp b/ethosu/regor/tflite/tflite_supported_operators_u55.cpp
index a6252e5b..f49ccdb8 100644
--- a/ethosu/regor/tflite/tflite_supported_operators_u55.cpp
+++ b/ethosu/regor/tflite/tflite_supported_operators_u55.cpp
@@ -67,6 +67,7 @@ TfLiteSupportedOperatorsU55::TfLiteSupportedOperatorsU55(IArchitectureConstraint
         OpType::Tile,
         OpType::ExpandDims,
         OpType::ReduceSum,
+        OpType::ResizeBilinear,
         OpType::Rsqrt,
         OpType::Pack,
         OpType::Unpack,
@@ -103,6 +104,7 @@ TfLiteSupportedOperatorsU55::TfLiteSupportedOperatorsU55(IArchitectureConstraint
         &TfLiteSupportedOperatorsU55::ConstraintUnrolledKernelStride,
         &TfLiteSupportedOperatorsU55::ConstraintMatmul,
         &TfLiteSupportedOperatorsU55::ConstraintTranspose,
+        &TfLiteSupportedOperatorsU55::ConstraintResize,
     };
 }
 
@@ -143,6 +145,73 @@ bool TfLiteSupportedOperatorsU55::ConstraintBroadcastShapes(const Operation *op)
     return true;
 }
 
+bool TfLiteSupportedOperatorsU55::ConstraintResize(const Operation *op)
+{
+    if ( op->Type() != OpType::ResizeBilinear )
+    {
+        return true;
+    }
+    auto ifmConn = op->Input(TensorUsage::IFM);
+    auto ofmConn = op->Output(TensorUsage::OFM);
+    assert(ifmConn);
+    assert(ofmConn);
+    Shape ifmShape = Shape::PadAxes(ifmConn->shape, 4, 1);
+    Shape ofmShape = Shape::PadAxes(ofmConn->shape, 4, 1);
+
+    if ( ifmShape.Height() == ofmShape.Height() && ifmShape.Height() == ofmShape.Height() )
+    {
+        return true;
+    }
+    if ( ifmShape.Height() == 1 && ifmShape.Width() == 1 )
+    {
+        return true;
+    }
+
+    const auto *passthrough = static_cast<const tflite::Operator *>(op->Passthrough());
+    assert(passthrough);
+    const auto *opt = passthrough->builtin_options_as_ResizeBilinearOptions();
+    assert(opt);
+    if ( opt->align_corners() )
+    {
+        Failure(op, "Align Corners attribute is true", "Align Corners must be false");
+        return false;
+    }
+    if ( opt->half_pixel_centers() )
+    {
+        Failure(op, "Half Pixel Centers attribute is true", "Half Pixel Centers must be false");
+        return false;
+    }
+    std::string constraint =
+        "If not (IFM H == IFM W == 1) and not IFM Shape == OFM Shape\n"
+        "\tIf W upScale != H upScale:\n"
+        "\t\tOFM W or H must be 1, and scaling in the dim that is must also be 1\n"
+        "\tIF W upScale == H upScale \n"
+        "\t\tupScale needs to be one of: 2x/4x/8x";
+
+    int hUpscale = ofmShape.Height() / ifmShape.Height();
+    int wUpscale = ofmShape.Width() / ifmShape.Width();
+
+    if ( hUpscale != wUpscale )
+    {
+        if ( !((ofmShape.Height() == 1 && hUpscale == 1) || (ofmShape.Width() == 1 && wUpscale == 1)) )
+        {
+            Failure(op,
+                fmt::format("HW upScaling is not equal and operation has unsupported parameter combination ofm h={}, h up-scale={}, ofm w={}, w up-scale={}.",
+                    ofmShape.Height(), hUpscale, ofmShape.Width(), wUpscale),
+                constraint);
+            return false;
+        }
+    }
+    else if ( !((ifmShape.Height() == 1 && ifmShape.Width() == 1) || (ofmShape.Height() % (2 * ifmShape.Height()) == 0 && hUpscale > 1 && hUpscale <= 8)) )
+    {
+        Failure(op,
+            fmt::format("Scaling matches and operation has unsupported scaling={}", float(ofmShape.Height()) / ifmShape.Height()), constraint);
+        return false;
+    }
+    return true;
+}
+
+
 bool TfLiteSupportedOperatorsU55::ConstraintReverse(const Operation *op)
 {
     if ( op->Type() != OpType::Reverse && op->Type() != OpType::ReverseV2 )
diff --git a/ethosu/regor/tflite/tflite_supported_operators_u55.hpp b/ethosu/regor/tflite/tflite_supported_operators_u55.hpp
index 1daa1743..fe855925 100644
--- a/ethosu/regor/tflite/tflite_supported_operators_u55.hpp
+++ b/ethosu/regor/tflite/tflite_supported_operators_u55.hpp
@@ -50,5 +50,6 @@ private:
     bool ConstraintArgMaxAxis(const Operation *op);
     bool ConstraintArgMaxOverflow(const Operation *op);  // TODO: Remove after MLBEDSW-9758: TOSA MaxPool decomp
     bool ConstraintTranspose(const Operation *op);
+    bool ConstraintResize(const Operation *op);
 };
 }  // namespace regor
diff --git a/ethosu/regor/tflite/tflite_supported_operators_u85.cpp b/ethosu/regor/tflite/tflite_supported_operators_u85.cpp
index 335c3fa6..9b9fc635 100644
--- a/ethosu/regor/tflite/tflite_supported_operators_u85.cpp
+++ b/ethosu/regor/tflite/tflite_supported_operators_u85.cpp
@@ -235,7 +235,7 @@ bool TfLiteSupportedOperatorsU85::ConstraintResizeBilinear(const Operation *op)
         "if IFM HxW > 1x1\n"
         "\tand ALIGN_CORNERS:\n"
         "\t\tOFM W-1 and H-1 must be a power-of-two integer-multiple of IFM W-1 and H-1\n"
-        "\tor HALF_PIXEL_CENTERS:\n"
+        "\telse:\n"
         "\t\tOFM W and H must be a power-of-two integer-multiple of IFM W and H\n";
     OpType opType = op->Type();
     if ( opType != OpType::ResizeBilinear )
@@ -250,7 +250,6 @@ bool TfLiteSupportedOperatorsU85::ConstraintResizeBilinear(const Operation *op)
     int width_d = ifmConn->shape.Width();
     int height_n = ofmConn->shape.Height();
     int height_d = ifmConn->shape.Height();
-    bool halfPixelCenters = false;
     bool alignCorners = false;
     const tflite::Operator *passthrough = static_cast<const tflite::Operator *>(op->Passthrough());
     assert(passthrough);
@@ -263,7 +262,6 @@ bool TfLiteSupportedOperatorsU85::ConstraintResizeBilinear(const Operation *op)
     const auto *opt = passthrough->builtin_options_as_ResizeBilinearOptions();
     assert(opt);
     alignCorners = opt->align_corners();
-    halfPixelCenters = opt->half_pixel_centers();
 
     if ( alignCorners )
     {
-- 
GitLab