diff --git a/ethosu/regor/architecture/architecture.hpp b/ethosu/regor/architecture/architecture.hpp index 39654377c85bfa49860b7a1205eae49357d8b4bd..b517ea2fad6822ea08881d419440bda6fee8e152 100644 --- a/ethosu/regor/architecture/architecture.hpp +++ b/ethosu/regor/architecture/architecture.hpp @@ -204,7 +204,7 @@ struct ArchitectureConfigQuery Shape ifmShape[2]; int ifmBits; int ofmBits; - Kernel *kernel; + const Kernel *kernel; int lutBytes; bool scaled; ArchResampling ifmResampling; @@ -227,7 +227,7 @@ struct ArchitectureConfigQuery struct PerformanceQuery { OpType type; - Kernel *kernel; + const Kernel *kernel; ArchitectureOpConfig *config; Shape ifmShape[2]; ArchitectureMemory *ifmMemory[2]; @@ -259,7 +259,7 @@ struct WeightStats struct FusionQuery { OpType type; - Kernel *kernel = nullptr; + const Kernel *kernel = nullptr; Shape ifm2Shape; ArchitectureMemory *ifm2Memory = nullptr; DataType ifm2Type; diff --git a/ethosu/regor/architecture/architecture_constraints.hpp b/ethosu/regor/architecture/architecture_constraints.hpp index 7003e0a943dce172ed05509f65029711de301a0a..a695aee187e939d97de71284a4bfa3797d4a374d 100644 --- a/ethosu/regor/architecture/architecture_constraints.hpp +++ b/ethosu/regor/architecture/architecture_constraints.hpp @@ -52,7 +52,7 @@ struct ArchOperatorQuery ArchFM ofm; ReverseType reverseMask = ReverseType::None; TransposeType transposeMask = TransposeType::None; - Kernel *kernel = nullptr; + const Kernel *kernel = nullptr; ~ArchOperatorQuery(){}; }; diff --git a/ethosu/regor/compiler/kernel.hpp b/ethosu/regor/compiler/kernel.hpp index a0b8f8f07b22a9ee72cbff12f21e72c62153b376..6c7a5ad4c96382068f9e63046fe037fc455ec8ee 100644 --- a/ethosu/regor/compiler/kernel.hpp +++ b/ethosu/regor/compiler/kernel.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -159,7 +159,11 @@ public: _dilation.x, _dilation.y, _padding.ToString()); } - static Kernel UnitKernel() { return Kernel({1, 1}, {1, 1}, {1, 1}); } + static const Kernel &UnitKernel() + { + static const Kernel s_kernel({1, 1}, {1, 1}, {1, 1}); + return s_kernel; + } }; static inline int RequiredInputSize(int value, int stride, int border, int upscale, int rounding = 0) diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp index 9beb52fdc99723fadd76a994fb02ef281fea7fed..6558ea5e6776cc62d23f58bb9fad69dde7d08f3b 100644 --- a/ethosu/regor/compiler/scheduler.cpp +++ b/ethosu/regor/compiler/scheduler.cpp @@ -153,7 +153,7 @@ std::shared_ptr Scheduler::Process() return chosenSchedule; } -Point2i Scheduler::GetStripeInputRequirement(const Shape &ofmShape, Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling) +Point2i Scheduler::GetStripeInputRequirement(const Shape &ofmShape, const Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling) { int rounding; int upscale = _arch->UpscaleAndRounding(resampling, rounding); diff --git a/ethosu/regor/compiler/scheduler.hpp b/ethosu/regor/compiler/scheduler.hpp index eb32f60fa8731bc11db63dcf42cfcd503282b54c..20ab84a4ff11275121869c68b55f80015204d863 100644 --- a/ethosu/regor/compiler/scheduler.hpp +++ b/ethosu/regor/compiler/scheduler.hpp @@ -316,7 +316,7 @@ private: Address CreateSchedulerRepresentation(); - Point2i GetStripeInputRequirement(const Shape &ofmShape, Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling); + Point2i GetStripeInputRequirement(const Shape &ofmShape, const Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling); std::unique_ptr CreateSchedulerOpInfo(SchedulerOperation *op, const Shape &ofmStripeShape, const std::unique_ptr &parentInfo = nullptr); diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index 0623f2959d90c2ea2a87a13cc6e811fc09653572..c2ae7462d65afbe143125c0f3ec1329995ce3f33 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -54,8 +54,7 @@ static std::unique_ptr MakeMemCopy(const std::shared_ptrshape + ofmSlice->offset <= dest->storageShape); auto op = std::make_unique(OpType::MemoryCopy); - auto kernel = Kernel({1, 1}, {1, 1}, {1, 1}); - op->SetKernel(&kernel); + op->SetKernel(Kernel::UnitKernel()); auto ofmConn = op->AddOutput(TensorUsage::OFM); ofmConn->tensor = dest; @@ -98,8 +97,7 @@ static std::unique_ptr MakeTransposeOp( auto ofmConn = op->AddOutput(TensorUsage::OFM); assert(ifmConn->Type() == ofmConn->Type()); - auto kernel = Kernel({1, 1}, {1, 1}, {1, 1}); - op->SetKernel(&kernel); + op->SetKernel(Kernel::UnitKernel()); const auto attr = op->Attribute(); attr->perm = perm; @@ -136,7 +134,7 @@ MakeSubOperation(const SchedulerOperation *schedOp, const Kernel *newKernel = nu assert(schedOp->SubOps().empty()); assert(schedOp->Parent() == nullptr); auto subOp = std::make_unique(type != OpType::None ? type : schedOp->Type()); - subOp->SetKernel(newKernel ? newKernel : schedOp->Kernel()); + subOp->SetKernel(newKernel ? *newKernel : *schedOp->Kernel()); subOp->SetHasScaling(schedOp->HasScaling()); subOp->_srcKey = schedOp->_srcKey; subOp->SetPrimaryIfmIndex(schedOp->PrimaryIfmIndex()); @@ -459,7 +457,7 @@ static void UpdatePaddingAndIfmOffset(SchedulerOperation *op) ifmSlice.offset = ifmSlice.offset.WithHeight(newHeight).WithWidth(newWidth); auto newPadding = Margin(topPad, leftPad, padding.Bottom(), padding.Right()); auto newKernel = kernel->WithPadding(newPadding); - op->SetKernel(&newKernel); + op->SetKernel(newKernel); } // Return a slice of a tensor @@ -558,9 +556,9 @@ static Shape NewOfmBlockShape(Architecture *arch, SchedulerOperation *op) // Get block config for the op after decomposition to smaller kernel // Avoids problems where a block config can't be found as ifm gets too big for RAM auto minKernel = kernel.WithSize({1, 1}).WithStride({1, 1}); - op->SetKernel(&minKernel); + op->SetKernel(minKernel); auto config = GetOpConfig(arch, op); - op->SetKernel(&kernel); + op->SetKernel(kernel); assert(config && "No config found."); if ( !config ) throw DecompositionFailure("No config found"); auto HW = config->OptimalStripeGranule(); @@ -1273,7 +1271,7 @@ std::vector> DecomposeTransposeConv2D(Archit weightsConn->tensor->consumers.push_back(op.get()); Kernel newKernel = kernel->WithStride({1, 1}); op->_type = OpType::Conv2D; - op->SetKernel(&newKernel); + op->SetKernel(newKernel); result.emplace_back(std::move(op)); } else @@ -1346,8 +1344,6 @@ std::vector> LegaliseResize(Architecture *ar ifmConn->quantization = Quantization::Unit(); ifmConn->shape = shape; ifmConn->resamplingMode = ArchResampling::Nearest; - auto kernel = Kernel::UnitKernel(); - newOp->SetKernel(&kernel); result.emplace_back(std::move(newOp)); remainingUpscale /= 2; @@ -1359,7 +1355,7 @@ std::vector> LegaliseResize(Architecture *ar *newOp->ConnectInput(TensorUsage::IFM, ifmConn->tensor) = *ifmConn; Kernel kernel = Kernel::UnitKernel().WithPadding({0, 0, upscaleH - 1, upscaleW - 1, 0, 0}).WithSize({upscaleW, upscaleH}); - newOp->SetKernel(&kernel); + newOp->SetKernel(kernel); ofmConn->quantization = Quantization::Unit(); ofmConn->rounding = RoundMode::AUTO; *newOp->ConnectOutput(TensorUsage::OFM, ofmConn->tensor) = *ofmConn; @@ -1565,8 +1561,7 @@ static std::vector> SwapAxes(Architecture *a // Create SchedulerOperation auto op = std::make_unique(OpType::Transpose); - Kernel kernel({1, 1} /* size */, {1, 1} /* stride */, {1, 1} /* dilation */); - op->SetKernel(&kernel); + op->SetKernel(Kernel::UnitKernel()); auto ifmConn = op->AddInput(TensorUsage::IFM); auto ofmConn = op->AddOutput(TensorUsage::OFM); diff --git a/ethosu/regor/compiler/scheduler_operation.hpp b/ethosu/regor/compiler/scheduler_operation.hpp index d88d9a4fd30a7fa31ef098a4bbcb49dce66c87a7..43a0a21a41d1b3537475d2af7a3f5dcce68886f7 100644 --- a/ethosu/regor/compiler/scheduler_operation.hpp +++ b/ethosu/regor/compiler/scheduler_operation.hpp @@ -177,11 +177,11 @@ class SchedulerOperation : public Attributable { friend class SchedulerPacking; friend class Scheduler; + std::unique_ptr _kernel; public: OpType _type; int _index = -1; // Execution index - std::unique_ptr _kernel; bool _npuOp = false; bool _hasScaling = false; void *_srcKey = nullptr; @@ -214,8 +214,8 @@ public: bool IsNpuOp() const { return _npuOp; } void SetNpuOp(bool npuOp) { _npuOp = npuOp; } - class Kernel *Kernel() const { return _kernel.get(); } - void SetKernel(const class Kernel *kernel) { _kernel = std::make_unique(*kernel); } + const class Kernel *Kernel() const { return _kernel ? _kernel.get() : ®or::Kernel::UnitKernel(); } + void SetKernel(const class Kernel &kernel) { _kernel = std::make_unique(kernel); } bool HasScaling() const { return _hasScaling; } void SetHasScaling(bool hasScaling) { _hasScaling = hasScaling; } diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index 3de2f3df4fb3e1c9262908672dd863ad770cff5b..82a77fc992f42bfe767603d4cde868d4d91df33e 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -532,7 +532,7 @@ std::unique_ptr SchedulerPacking::MakeSchedulerOperation(Ope std::unique_ptr schedOp = std::make_unique(op->Type()); - schedOp->SetKernel(op->Kernel()); + schedOp->SetKernel(*op->Kernel()); schedOp->SetHasScaling(op->HasScaling()); schedOp->SetAttributes(op->AttributeRef()); schedOp->_srcKey = op; diff --git a/ethosu/regor/test/test_scheduler_decompose.cpp b/ethosu/regor/test/test_scheduler_decompose.cpp index c0c5cf6bf2962b642355005ca3061e7d5d7a96e2..7c4d327de22b3efb9532793413276c6127fd0e5f 100644 --- a/ethosu/regor/test/test_scheduler_decompose.cpp +++ b/ethosu/regor/test/test_scheduler_decompose.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -40,8 +40,6 @@ std::unique_ptr CreateOperation(OpType opType, Shape ifmShap std::unique_ptr op = CreateSchedulerOperation( opType, TensorUsage::IFM0, ifm1, TensorUsage::IFM1, ifm2, TensorUsage::OFM, ofm); - // set default kernel - op->_kernel = std::make_unique(Point2i(1, 1), Point2i(1, 1), Point2i(1, 1)); return op; } @@ -52,8 +50,6 @@ std::unique_ptr CreateOperation(OpType opType, Shape ifmShap std::unique_ptr op = CreateSchedulerOperation(opType, TensorUsage::IFM0, ifm1, TensorUsage::OFM, ofm); - // set default kernel - op->_kernel = std::make_unique(Point2i(1, 1), Point2i(1, 1), Point2i(1, 1)); return op; } diff --git a/ethosu/regor/test/util.cpp b/ethosu/regor/test/util.cpp index 1bd614caa7b30d02b0e8fc09a7b3535eb2412069..7ee087735a73d8d563b8d4a3fbe01faba75fdbfb 100644 --- a/ethosu/regor/test/util.cpp +++ b/ethosu/regor/test/util.cpp @@ -225,7 +225,7 @@ std::unique_ptr CreateSchedulerOperation(OpType opType, Tens s_ops.add_op(op); auto schedOp = std::make_unique(opType); - schedOp->SetKernel(op->Kernel()); + schedOp->SetKernel(*op->Kernel()); schedOp->_srcKey = static_cast(op.get()); // ifm auto *ifmConn = schedOp->AddInput(ifmUsage);