From a25debfc1d1a1f151ce7d65d339eeb3ad4d10eb8 Mon Sep 17 00:00:00 2001
From: Philip Hall <philip.hall@arm.com>
Date: Thu, 1 May 2025 15:12:03 +0100
Subject: [PATCH] MLBEDSW-10754: Make kernel object usage more consistent

This commit rationalises the use of the kernel object to
make it simpler to use the unit kernel, and ensures that
called functions have the correct kernel constness to
prevent accidental modification.

Signed-off-by: Philip Hall <philip.hall@arm.com>
Change-Id: I27c63feb8e876f359b434a916bed50f41c97f411
---
 ethosu/regor/architecture/architecture.hpp    |  6 ++---
 .../architecture/architecture_constraints.hpp |  2 +-
 ethosu/regor/compiler/kernel.hpp              |  8 +++++--
 ethosu/regor/compiler/scheduler.cpp           |  2 +-
 ethosu/regor/compiler/scheduler.hpp           |  2 +-
 ethosu/regor/compiler/scheduler_decompose.cpp | 23 ++++++++-----------
 ethosu/regor/compiler/scheduler_operation.hpp |  6 ++---
 ethosu/regor/compiler/scheduler_packing.cpp   |  2 +-
 .../regor/test/test_scheduler_decompose.cpp   |  6 +----
 ethosu/regor/test/util.cpp                    |  2 +-
 10 files changed, 27 insertions(+), 32 deletions(-)
diff --git a/ethosu/regor/architecture/architecture.hpp b/ethosu/regor/architecture/architecture.hpp
index 39654377..b517ea2f 100644
--- a/ethosu/regor/architecture/architecture.hpp
+++ b/ethosu/regor/architecture/architecture.hpp
@@ -204,7 +204,7 @@ struct ArchitectureConfigQuery
     Shape ifmShape[2];
     int ifmBits;
     int ofmBits;
-    Kernel *kernel;
+    const Kernel *kernel;
     int lutBytes;
     bool scaled;
     ArchResampling ifmResampling;
@@ -227,7 +227,7 @@ struct ArchitectureConfigQuery
 struct PerformanceQuery
 {
     OpType type;
-    Kernel *kernel;
+    const Kernel *kernel;
     ArchitectureOpConfig *config;
     Shape ifmShape[2];
     ArchitectureMemory *ifmMemory[2];
@@ -259,7 +259,7 @@ struct WeightStats
 struct FusionQuery
 {
     OpType type;
-    Kernel *kernel = nullptr;
+    const Kernel *kernel = nullptr;
     Shape ifm2Shape;
     ArchitectureMemory *ifm2Memory = nullptr;
     DataType ifm2Type;
diff --git a/ethosu/regor/architecture/architecture_constraints.hpp b/ethosu/regor/architecture/architecture_constraints.hpp
index 7003e0a9..a695aee1 100644
--- a/ethosu/regor/architecture/architecture_constraints.hpp
+++ b/ethosu/regor/architecture/architecture_constraints.hpp
@@ -52,7 +52,7 @@ struct ArchOperatorQuery
     ArchFM ofm;
     ReverseType reverseMask = ReverseType::None;
     TransposeType transposeMask = TransposeType::None;
-    Kernel *kernel = nullptr;
+    const Kernel *kernel = nullptr;
     ~ArchOperatorQuery(){};
 };
 
diff --git a/ethosu/regor/compiler/kernel.hpp b/ethosu/regor/compiler/kernel.hpp
index a0b8f8f0..6c7a5ad4 100644
--- a/ethosu/regor/compiler/kernel.hpp
+++ b/ethosu/regor/compiler/kernel.hpp
@@ -1,5 +1,5 @@
 //
-// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -159,7 +159,11 @@ public:
             _dilation.x, _dilation.y, _padding.ToString());
     }
 
-    static Kernel UnitKernel() { return Kernel({1, 1}, {1, 1}, {1, 1}); }
+    static const Kernel &UnitKernel()
+    {
+        static const Kernel s_kernel({1, 1}, {1, 1}, {1, 1});
+        return s_kernel;
+    }
 };
 
 static inline int RequiredInputSize(int value, int stride, int border, int upscale, int rounding = 0)
diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp
index 9beb52fd..6558ea5e 100644
--- a/ethosu/regor/compiler/scheduler.cpp
+++ b/ethosu/regor/compiler/scheduler.cpp
@@ -153,7 +153,7 @@ std::shared_ptr<Schedule> Scheduler::Process()
     return chosenSchedule;
 }
 
-Point2i Scheduler::GetStripeInputRequirement(const Shape &ofmShape, Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling)
+Point2i Scheduler::GetStripeInputRequirement(const Shape &ofmShape, const Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling)
 {
     int rounding;
     int upscale = _arch->UpscaleAndRounding(resampling, rounding);
diff --git a/ethosu/regor/compiler/scheduler.hpp b/ethosu/regor/compiler/scheduler.hpp
index eb32f60f..20ab84a4 100644
--- a/ethosu/regor/compiler/scheduler.hpp
+++ b/ethosu/regor/compiler/scheduler.hpp
@@ -316,7 +316,7 @@ private:
 
     Address CreateSchedulerRepresentation();
 
-    Point2i GetStripeInputRequirement(const Shape &ofmShape, Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling);
+    Point2i GetStripeInputRequirement(const Shape &ofmShape, const Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling);
 
     std::unique_ptr<SchedulerOpInfo> CreateSchedulerOpInfo(SchedulerOperation *op, const Shape &ofmStripeShape,
         const std::unique_ptr<SchedulerOpInfo> &parentInfo = nullptr);
diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp
index 0623f295..c2ae7462 100644
--- a/ethosu/regor/compiler/scheduler_decompose.cpp
+++ b/ethosu/regor/compiler/scheduler_decompose.cpp
@@ -54,8 +54,7 @@ static std::unique_ptr<SchedulerOperation> MakeMemCopy(const std::shared_ptr<Sch
     assert(ofmSlice == nullptr || ofmSlice->shape + ofmSlice->offset <= dest->storageShape);
     auto op = std::make_unique<SchedulerOperation>(OpType::MemoryCopy);
 
-    auto kernel = Kernel({1, 1}, {1, 1}, {1, 1});
-    op->SetKernel(&kernel);
+    op->SetKernel(Kernel::UnitKernel());
 
     auto ofmConn = op->AddOutput(TensorUsage::OFM);
     ofmConn->tensor = dest;
@@ -98,8 +97,7 @@ static std::unique_ptr<SchedulerOperation> MakeTransposeOp(
     auto ofmConn = op->AddOutput(TensorUsage::OFM);
     assert(ifmConn->Type() == ofmConn->Type());
 
-    auto kernel = Kernel({1, 1}, {1, 1}, {1, 1});
-    op->SetKernel(&kernel);
+    op->SetKernel(Kernel::UnitKernel());
 
     const auto attr = op->Attribute<transpose_attr_t>();
     attr->perm = perm;
@@ -136,7 +134,7 @@ MakeSubOperation(const SchedulerOperation *schedOp, const Kernel *newKernel = nu
     assert(schedOp->SubOps().empty());
     assert(schedOp->Parent() == nullptr);
     auto subOp = std::make_unique<SchedulerOperation>(type != OpType::None ? type : schedOp->Type());
-    subOp->SetKernel(newKernel ? newKernel : schedOp->Kernel());
+    subOp->SetKernel(newKernel ? *newKernel : *schedOp->Kernel());
     subOp->SetHasScaling(schedOp->HasScaling());
     subOp->_srcKey = schedOp->_srcKey;
     subOp->SetPrimaryIfmIndex(schedOp->PrimaryIfmIndex());
@@ -459,7 +457,7 @@ static void UpdatePaddingAndIfmOffset(SchedulerOperation *op)
     ifmSlice.offset = ifmSlice.offset.WithHeight(newHeight).WithWidth(newWidth);
     auto newPadding = Margin(topPad, leftPad, padding.Bottom(), padding.Right());
     auto newKernel = kernel->WithPadding(newPadding);
-    op->SetKernel(&newKernel);
+    op->SetKernel(newKernel);
 }
 
 // Return a slice of a tensor
@@ -558,9 +556,9 @@ static Shape NewOfmBlockShape(Architecture *arch, SchedulerOperation *op)
     // Get block config for the op after decomposition to smaller kernel
     // Avoids problems where a block config can't be found as ifm gets too big for RAM
     auto minKernel = kernel.WithSize({1, 1}).WithStride({1, 1});
-    op->SetKernel(&minKernel);
+    op->SetKernel(minKernel);
     auto config = GetOpConfig(arch, op);
-    op->SetKernel(&kernel);
+    op->SetKernel(kernel);
     assert(config && "No config found.");
     if ( !config ) throw DecompositionFailure("No config found");
     auto HW = config->OptimalStripeGranule();
@@ -1273,7 +1271,7 @@ std::vector<std::unique_ptr<SchedulerOperation>> DecomposeTransposeConv2D(Archit
         weightsConn->tensor->consumers.push_back(op.get());
         Kernel newKernel = kernel->WithStride({1, 1});
         op->_type = OpType::Conv2D;
-        op->SetKernel(&newKernel);
+        op->SetKernel(newKernel);
         result.emplace_back(std::move(op));
     }
     else
@@ -1346,8 +1344,6 @@ std::vector<std::unique_ptr<SchedulerOperation>> LegaliseResize(Architecture *ar
         ifmConn->quantization = Quantization::Unit();
         ifmConn->shape = shape;
         ifmConn->resamplingMode = ArchResampling::Nearest;
-        auto kernel = Kernel::UnitKernel();
-        newOp->SetKernel(&kernel);
         result.emplace_back(std::move(newOp));
 
         remainingUpscale /= 2;
@@ -1359,7 +1355,7 @@ std::vector<std::unique_ptr<SchedulerOperation>> LegaliseResize(Architecture *ar
     *newOp->ConnectInput(TensorUsage::IFM, ifmConn->tensor) = *ifmConn;
 
     Kernel kernel = Kernel::UnitKernel().WithPadding({0, 0, upscaleH - 1, upscaleW - 1, 0, 0}).WithSize({upscaleW, upscaleH});
-    newOp->SetKernel(&kernel);
+    newOp->SetKernel(kernel);
     ofmConn->quantization = Quantization::Unit();
     ofmConn->rounding = RoundMode::AUTO;
     *newOp->ConnectOutput(TensorUsage::OFM, ofmConn->tensor) = *ofmConn;
@@ -1565,8 +1561,7 @@ static std::vector<std::unique_ptr<SchedulerOperation>> SwapAxes(Architecture *a
 
         // Create SchedulerOperation
         auto op = std::make_unique<SchedulerOperation>(OpType::Transpose);
-        Kernel kernel({1, 1} /* size */, {1, 1} /* stride */, {1, 1} /* dilation */);
-        op->SetKernel(&kernel);
+        op->SetKernel(Kernel::UnitKernel());
         auto ifmConn = op->AddInput(TensorUsage::IFM);
         auto ofmConn = op->AddOutput(TensorUsage::OFM);
 
diff --git a/ethosu/regor/compiler/scheduler_operation.hpp b/ethosu/regor/compiler/scheduler_operation.hpp
index d88d9a4f..43a0a21a 100644
--- a/ethosu/regor/compiler/scheduler_operation.hpp
+++ b/ethosu/regor/compiler/scheduler_operation.hpp
@@ -177,11 +177,11 @@ class SchedulerOperation : public Attributable
 {
     friend class SchedulerPacking;
     friend class Scheduler;
+    std::unique_ptr<class Kernel> _kernel;
 
 public:
     OpType _type;
     int _index = -1;  // Execution index
-    std::unique_ptr<class Kernel> _kernel;
     bool _npuOp = false;
     bool _hasScaling = false;
     void *_srcKey = nullptr;
@@ -214,8 +214,8 @@ public:
     bool IsNpuOp() const { return _npuOp; }
     void SetNpuOp(bool npuOp) { _npuOp = npuOp; }
 
-    class Kernel *Kernel() const { return _kernel.get(); }
-    void SetKernel(const class Kernel *kernel) { _kernel = std::make_unique<class Kernel>(*kernel); }
+    const class Kernel *Kernel() const { return _kernel ? _kernel.get() : &regor::Kernel::UnitKernel(); }
+    void SetKernel(const class Kernel &kernel) { _kernel = std::make_unique<class Kernel>(kernel); }
 
     bool HasScaling() const { return _hasScaling; }
     void SetHasScaling(bool hasScaling) { _hasScaling = hasScaling; }
diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp
index 3de2f3df..82a77fc9 100644
--- a/ethosu/regor/compiler/scheduler_packing.cpp
+++ b/ethosu/regor/compiler/scheduler_packing.cpp
@@ -532,7 +532,7 @@ std::unique_ptr<SchedulerOperation> SchedulerPacking::MakeSchedulerOperation(Ope
 
     std::unique_ptr<SchedulerOperation> schedOp = std::make_unique<SchedulerOperation>(op->Type());
 
-    schedOp->SetKernel(op->Kernel());
+    schedOp->SetKernel(*op->Kernel());
     schedOp->SetHasScaling(op->HasScaling());
     schedOp->SetAttributes(op->AttributeRef());
     schedOp->_srcKey = op;
diff --git a/ethosu/regor/test/test_scheduler_decompose.cpp b/ethosu/regor/test/test_scheduler_decompose.cpp
index c0c5cf6b..7c4d327d 100644
--- a/ethosu/regor/test/test_scheduler_decompose.cpp
+++ b/ethosu/regor/test/test_scheduler_decompose.cpp
@@ -1,5 +1,5 @@
 //
-// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -40,8 +40,6 @@ std::unique_ptr<SchedulerOperation> CreateOperation(OpType opType, Shape ifmShap
     std::unique_ptr<SchedulerOperation> op = CreateSchedulerOperation(
         opType, TensorUsage::IFM0, ifm1, TensorUsage::IFM1, ifm2, TensorUsage::OFM, ofm);
 
-    // set default kernel
-    op->_kernel = std::make_unique<class Kernel>(Point2i(1, 1), Point2i(1, 1), Point2i(1, 1));
     return op;
 }
 
@@ -52,8 +50,6 @@ std::unique_ptr<SchedulerOperation> CreateOperation(OpType opType, Shape ifmShap
 
     std::unique_ptr<SchedulerOperation> op = CreateSchedulerOperation(opType, TensorUsage::IFM0, ifm1, TensorUsage::OFM, ofm);
 
-    // set default kernel
-    op->_kernel = std::make_unique<class Kernel>(Point2i(1, 1), Point2i(1, 1), Point2i(1, 1));
     return op;
 }
 
diff --git a/ethosu/regor/test/util.cpp b/ethosu/regor/test/util.cpp
index 1bd614ca..7ee08773 100644
--- a/ethosu/regor/test/util.cpp
+++ b/ethosu/regor/test/util.cpp
@@ -225,7 +225,7 @@ std::unique_ptr<SchedulerOperation> CreateSchedulerOperation(OpType opType, Tens
     s_ops.add_op(op);
 
     auto schedOp = std::make_unique<SchedulerOperation>(opType);
-    schedOp->SetKernel(op->Kernel());
+    schedOp->SetKernel(*op->Kernel());
     schedOp->_srcKey = static_cast<void *>(op.get());
     // ifm
     auto *ifmConn = schedOp->AddInput(ifmUsage);
-- 
GitLab