diff --git a/ethosu/regor/architecture/architecture.hpp b/ethosu/regor/architecture/architecture.hpp
index 39654377c85bfa49860b7a1205eae49357d8b4bd..b517ea2fad6822ea08881d419440bda6fee8e152 100644
--- a/ethosu/regor/architecture/architecture.hpp
+++ b/ethosu/regor/architecture/architecture.hpp
@@ -204,7 +204,7 @@ struct ArchitectureConfigQuery
     Shape ifmShape[2];
     int ifmBits;
     int ofmBits;
-    Kernel *kernel;
+    const Kernel *kernel;
     int lutBytes;
     bool scaled;
     ArchResampling ifmResampling;
@@ -227,7 +227,7 @@ struct ArchitectureConfigQuery
 struct PerformanceQuery
 {
     OpType type;
-    Kernel *kernel;
+    const Kernel *kernel;
     ArchitectureOpConfig *config;
     Shape ifmShape[2];
     ArchitectureMemory *ifmMemory[2];
@@ -259,7 +259,7 @@ struct WeightStats
 struct FusionQuery
 {
     OpType type;
-    Kernel *kernel = nullptr;
+    const Kernel *kernel = nullptr;
     Shape ifm2Shape;
     ArchitectureMemory *ifm2Memory = nullptr;
     DataType ifm2Type;
diff --git a/ethosu/regor/architecture/architecture_constraints.hpp b/ethosu/regor/architecture/architecture_constraints.hpp
index 7003e0a943dce172ed05509f65029711de301a0a..a695aee187e939d97de71284a4bfa3797d4a374d 100644
--- a/ethosu/regor/architecture/architecture_constraints.hpp
+++ b/ethosu/regor/architecture/architecture_constraints.hpp
@@ -52,7 +52,7 @@ struct ArchOperatorQuery
     ArchFM ofm;
     ReverseType reverseMask = ReverseType::None;
     TransposeType transposeMask = TransposeType::None;
-    Kernel *kernel = nullptr;
+    const Kernel *kernel = nullptr;
     ~ArchOperatorQuery(){};
 };
 
diff --git a/ethosu/regor/compiler/kernel.hpp b/ethosu/regor/compiler/kernel.hpp
index a0b8f8f07b22a9ee72cbff12f21e72c62153b376..6c7a5ad4c96382068f9e63046fe037fc455ec8ee 100644
--- a/ethosu/regor/compiler/kernel.hpp
+++ b/ethosu/regor/compiler/kernel.hpp
@@ -1,5 +1,5 @@
 //
-// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -159,7 +159,11 @@ public:
             _dilation.x, _dilation.y, _padding.ToString());
     }
 
-    static Kernel UnitKernel() { return Kernel({1, 1}, {1, 1}, {1, 1}); }
+    static const Kernel &UnitKernel()
+    {
+        static const Kernel s_kernel({1, 1}, {1, 1}, {1, 1});
+        return s_kernel;
+    }
 };
 
 static inline int RequiredInputSize(int value, int stride, int border, int upscale, int rounding = 0)
diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp
index 9beb52fdc99723fadd76a994fb02ef281fea7fed..6558ea5e6776cc62d23f58bb9fad69dde7d08f3b 100644
--- a/ethosu/regor/compiler/scheduler.cpp
+++ b/ethosu/regor/compiler/scheduler.cpp
@@ -153,7 +153,7 @@ std::shared_ptr<Schedule> Scheduler::Process()
     return chosenSchedule;
 }
 
-Point2i Scheduler::GetStripeInputRequirement(const Shape &ofmShape, Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling)
+Point2i Scheduler::GetStripeInputRequirement(const Shape &ofmShape, const Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling)
 {
     int rounding;
     int upscale = _arch->UpscaleAndRounding(resampling, rounding);
diff --git a/ethosu/regor/compiler/scheduler.hpp b/ethosu/regor/compiler/scheduler.hpp
index eb32f60fa8731bc11db63dcf42cfcd503282b54c..20ab84a4ff11275121869c68b55f80015204d863 100644
--- a/ethosu/regor/compiler/scheduler.hpp
+++ b/ethosu/regor/compiler/scheduler.hpp
@@ -316,7 +316,7 @@ private:
 
     Address CreateSchedulerRepresentation();
 
-    Point2i GetStripeInputRequirement(const Shape &ofmShape, Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling);
+    Point2i GetStripeInputRequirement(const Shape &ofmShape, const Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling);
 
     std::unique_ptr<SchedulerOpInfo> CreateSchedulerOpInfo(SchedulerOperation *op, const Shape &ofmStripeShape,
         const std::unique_ptr<SchedulerOpInfo> &parentInfo = nullptr);
diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp
index 0623f2959d90c2ea2a87a13cc6e811fc09653572..c2ae7462d65afbe143125c0f3ec1329995ce3f33 100644
--- a/ethosu/regor/compiler/scheduler_decompose.cpp
+++ b/ethosu/regor/compiler/scheduler_decompose.cpp
@@ -54,8 +54,7 @@ static std::unique_ptr<SchedulerOperation> MakeMemCopy(const std::shared_ptr<Sch
     assert(ofmSlice == nullptr || ofmSlice->shape + ofmSlice->offset <= dest->storageShape);
     auto op = std::make_unique<SchedulerOperation>(OpType::MemoryCopy);
 
-    auto kernel = Kernel({1, 1}, {1, 1}, {1, 1});
-    op->SetKernel(&kernel);
+    op->SetKernel(Kernel::UnitKernel());
 
     auto ofmConn = op->AddOutput(TensorUsage::OFM);
     ofmConn->tensor = dest;
@@ -98,8 +97,7 @@ static std::unique_ptr<SchedulerOperation> MakeTransposeOp(
     auto ofmConn = op->AddOutput(TensorUsage::OFM);
     assert(ifmConn->Type() == ofmConn->Type());
 
-    auto kernel = Kernel({1, 1}, {1, 1}, {1, 1});
-    op->SetKernel(&kernel);
+    op->SetKernel(Kernel::UnitKernel());
 
     const auto attr = op->Attribute<transpose_attr_t>();
     attr->perm = perm;
@@ -136,7 +134,7 @@ MakeSubOperation(const SchedulerOperation *schedOp, const Kernel *newKernel = nu
     assert(schedOp->SubOps().empty());
     assert(schedOp->Parent() == nullptr);
     auto subOp = std::make_unique<SchedulerOperation>(type != OpType::None ? type : schedOp->Type());
-    subOp->SetKernel(newKernel ? newKernel : schedOp->Kernel());
+    subOp->SetKernel(newKernel ? *newKernel : *schedOp->Kernel());
     subOp->SetHasScaling(schedOp->HasScaling());
     subOp->_srcKey = schedOp->_srcKey;
     subOp->SetPrimaryIfmIndex(schedOp->PrimaryIfmIndex());
@@ -459,7 +457,7 @@ static void UpdatePaddingAndIfmOffset(SchedulerOperation *op)
     ifmSlice.offset = ifmSlice.offset.WithHeight(newHeight).WithWidth(newWidth);
     auto newPadding = Margin(topPad, leftPad, padding.Bottom(), padding.Right());
     auto newKernel = kernel->WithPadding(newPadding);
-    op->SetKernel(&newKernel);
+    op->SetKernel(newKernel);
 }
 
 // Return a slice of a tensor
@@ -558,9 +556,9 @@ static Shape NewOfmBlockShape(Architecture *arch, SchedulerOperation *op)
     // Get block config for the op after decomposition to smaller kernel
     // Avoids problems where a block config can't be found as ifm gets too big for RAM
     auto minKernel = kernel.WithSize({1, 1}).WithStride({1, 1});
-    op->SetKernel(&minKernel);
+    op->SetKernel(minKernel);
     auto config = GetOpConfig(arch, op);
-    op->SetKernel(&kernel);
+    op->SetKernel(kernel);
     assert(config && "No config found.");
     if ( !config ) throw DecompositionFailure("No config found");
     auto HW = config->OptimalStripeGranule();
@@ -1273,7 +1271,7 @@ std::vector<std::unique_ptr<SchedulerOperation>> DecomposeTransposeConv2D(Archit
         weightsConn->tensor->consumers.push_back(op.get());
         Kernel newKernel = kernel->WithStride({1, 1});
         op->_type = OpType::Conv2D;
-        op->SetKernel(&newKernel);
+        op->SetKernel(newKernel);
         result.emplace_back(std::move(op));
     }
     else
@@ -1346,8 +1344,6 @@ std::vector<std::unique_ptr<SchedulerOperation>> LegaliseResize(Architecture *ar
         ifmConn->quantization = Quantization::Unit();
         ifmConn->shape = shape;
         ifmConn->resamplingMode = ArchResampling::Nearest;
-        auto kernel = Kernel::UnitKernel();
-        newOp->SetKernel(&kernel);
         result.emplace_back(std::move(newOp));
 
         remainingUpscale /= 2;
@@ -1359,7 +1355,7 @@ std::vector<std::unique_ptr<SchedulerOperation>> LegaliseResize(Architecture *ar
     *newOp->ConnectInput(TensorUsage::IFM, ifmConn->tensor) = *ifmConn;
 
     Kernel kernel = Kernel::UnitKernel().WithPadding({0, 0, upscaleH - 1, upscaleW - 1, 0, 0}).WithSize({upscaleW, upscaleH});
-    newOp->SetKernel(&kernel);
+    newOp->SetKernel(kernel);
     ofmConn->quantization = Quantization::Unit();
     ofmConn->rounding = RoundMode::AUTO;
     *newOp->ConnectOutput(TensorUsage::OFM, ofmConn->tensor) = *ofmConn;
@@ -1565,8 +1561,7 @@ static std::vector<std::unique_ptr<SchedulerOperation>> SwapAxes(Architecture *a
 
         // Create SchedulerOperation
         auto op = std::make_unique<SchedulerOperation>(OpType::Transpose);
-        Kernel kernel({1, 1} /* size */, {1, 1} /* stride */, {1, 1} /* dilation */);
-        op->SetKernel(&kernel);
+        op->SetKernel(Kernel::UnitKernel());
         auto ifmConn = op->AddInput(TensorUsage::IFM);
         auto ofmConn = op->AddOutput(TensorUsage::OFM);
 
diff --git a/ethosu/regor/compiler/scheduler_operation.hpp b/ethosu/regor/compiler/scheduler_operation.hpp
index d88d9a4fd30a7fa31ef098a4bbcb49dce66c87a7..43a0a21a41d1b3537475d2af7a3f5dcce68886f7 100644
--- a/ethosu/regor/compiler/scheduler_operation.hpp
+++ b/ethosu/regor/compiler/scheduler_operation.hpp
@@ -177,11 +177,11 @@ class SchedulerOperation : public Attributable
 {
     friend class SchedulerPacking;
     friend class Scheduler;
+    std::unique_ptr<class Kernel> _kernel;
 
 public:
     OpType _type;
     int _index = -1;  // Execution index
-    std::unique_ptr<class Kernel> _kernel;
     bool _npuOp = false;
     bool _hasScaling = false;
     void *_srcKey = nullptr;
@@ -214,8 +214,8 @@ public:
     bool IsNpuOp() const { return _npuOp; }
     void SetNpuOp(bool npuOp) { _npuOp = npuOp; }
 
-    class Kernel *Kernel() const { return _kernel.get(); }
-    void SetKernel(const class Kernel *kernel) { _kernel = std::make_unique<class Kernel>(*kernel); }
+    const class Kernel *Kernel() const { return _kernel ? _kernel.get() : &regor::Kernel::UnitKernel(); }
+    void SetKernel(const class Kernel &kernel) { _kernel = std::make_unique<class Kernel>(kernel); }
 
     bool HasScaling() const { return _hasScaling; }
     void SetHasScaling(bool hasScaling) { _hasScaling = hasScaling; }
diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp
index 3de2f3df4fb3e1c9262908672dd863ad770cff5b..82a77fc992f42bfe767603d4cde868d4d91df33e 100644
--- a/ethosu/regor/compiler/scheduler_packing.cpp
+++ b/ethosu/regor/compiler/scheduler_packing.cpp
@@ -532,7 +532,7 @@ std::unique_ptr<SchedulerOperation> SchedulerPacking::MakeSchedulerOperation(Ope
 
     std::unique_ptr<SchedulerOperation> schedOp = std::make_unique<SchedulerOperation>(op->Type());
 
-    schedOp->SetKernel(op->Kernel());
+    schedOp->SetKernel(*op->Kernel());
     schedOp->SetHasScaling(op->HasScaling());
     schedOp->SetAttributes(op->AttributeRef());
     schedOp->_srcKey = op;
diff --git a/ethosu/regor/test/test_scheduler_decompose.cpp b/ethosu/regor/test/test_scheduler_decompose.cpp
index c0c5cf6bf2962b642355005ca3061e7d5d7a96e2..7c4d327de22b3efb9532793413276c6127fd0e5f 100644
--- a/ethosu/regor/test/test_scheduler_decompose.cpp
+++ b/ethosu/regor/test/test_scheduler_decompose.cpp
@@ -1,5 +1,5 @@
 //
-// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -40,8 +40,6 @@ std::unique_ptr<SchedulerOperation> CreateOperation(OpType opType, Shape ifmShap
     std::unique_ptr<SchedulerOperation> op = CreateSchedulerOperation(
         opType, TensorUsage::IFM0, ifm1, TensorUsage::IFM1, ifm2, TensorUsage::OFM, ofm);
 
-    // set default kernel
-    op->_kernel = std::make_unique<class Kernel>(Point2i(1, 1), Point2i(1, 1), Point2i(1, 1));
     return op;
 }
 
@@ -52,8 +50,6 @@ std::unique_ptr<SchedulerOperation> CreateOperation(OpType opType, Shape ifmShap
 
     std::unique_ptr<SchedulerOperation> op = CreateSchedulerOperation(opType, TensorUsage::IFM0, ifm1, TensorUsage::OFM, ofm);
 
-    // set default kernel
-    op->_kernel = std::make_unique<class Kernel>(Point2i(1, 1), Point2i(1, 1), Point2i(1, 1));
     return op;
 }
 
diff --git a/ethosu/regor/test/util.cpp b/ethosu/regor/test/util.cpp
index 1bd614caa7b30d02b0e8fc09a7b3535eb2412069..7ee087735a73d8d563b8d4a3fbe01faba75fdbfb 100644
--- a/ethosu/regor/test/util.cpp
+++ b/ethosu/regor/test/util.cpp
@@ -225,7 +225,7 @@ std::unique_ptr<SchedulerOperation> CreateSchedulerOperation(OpType opType, Tens
     s_ops.add_op(op);
 
     auto schedOp = std::make_unique<SchedulerOperation>(opType);
-    schedOp->SetKernel(op->Kernel());
+    schedOp->SetKernel(*op->Kernel());
     schedOp->_srcKey = static_cast<void *>(op.get());
     // ifm
     auto *ifmConn = schedOp->AddInput(ifmUsage);