From a25debfc1d1a1f151ce7d65d339eeb3ad4d10eb8 Mon Sep 17 00:00:00 2001 From: Philip Hall Date: Thu, 1 May 2025 15:12:03 +0100 Subject: [PATCH] MLBEDSW-10754: Make kernel object usage more consistent This commit rationalises the use of the kernel object to make it simpler to use the unit kernel, and ensures that called functions have the correct kernel constness to prevent accidental modification. Signed-off-by: Philip Hall Change-Id: I27c63feb8e876f359b434a916bed50f41c97f411 --- ethosu/regor/architecture/architecture.hpp | 6 ++--- .../architecture/architecture_constraints.hpp | 2 +- ethosu/regor/compiler/kernel.hpp | 8 +++++-- ethosu/regor/compiler/scheduler.cpp | 2 +- ethosu/regor/compiler/scheduler.hpp | 2 +- ethosu/regor/compiler/scheduler_decompose.cpp | 23 ++++++++----------- ethosu/regor/compiler/scheduler_operation.hpp | 6 ++--- ethosu/regor/compiler/scheduler_packing.cpp | 2 +- .../regor/test/test_scheduler_decompose.cpp | 6 +---- ethosu/regor/test/util.cpp | 2 +- 10 files changed, 27 insertions(+), 32 deletions(-) diff --git a/ethosu/regor/architecture/architecture.hpp b/ethosu/regor/architecture/architecture.hpp index 39654377..b517ea2f 100644 --- a/ethosu/regor/architecture/architecture.hpp +++ b/ethosu/regor/architecture/architecture.hpp @@ -204,7 +204,7 @@ struct ArchitectureConfigQuery Shape ifmShape[2]; int ifmBits; int ofmBits; - Kernel *kernel; + const Kernel *kernel; int lutBytes; bool scaled; ArchResampling ifmResampling; @@ -227,7 +227,7 @@ struct ArchitectureConfigQuery struct PerformanceQuery { OpType type; - Kernel *kernel; + const Kernel *kernel; ArchitectureOpConfig *config; Shape ifmShape[2]; ArchitectureMemory *ifmMemory[2]; @@ -259,7 +259,7 @@ struct WeightStats struct FusionQuery { OpType type; - Kernel *kernel = nullptr; + const Kernel *kernel = nullptr; Shape ifm2Shape; ArchitectureMemory *ifm2Memory = nullptr; DataType ifm2Type; diff --git a/ethosu/regor/architecture/architecture_constraints.hpp b/ethosu/regor/architecture/architecture_constraints.hpp index 7003e0a9..a695aee1 100644 --- a/ethosu/regor/architecture/architecture_constraints.hpp +++ b/ethosu/regor/architecture/architecture_constraints.hpp @@ -52,7 +52,7 @@ struct ArchOperatorQuery ArchFM ofm; ReverseType reverseMask = ReverseType::None; TransposeType transposeMask = TransposeType::None; - Kernel *kernel = nullptr; + const Kernel *kernel = nullptr; ~ArchOperatorQuery(){}; }; diff --git a/ethosu/regor/compiler/kernel.hpp b/ethosu/regor/compiler/kernel.hpp index a0b8f8f0..6c7a5ad4 100644 --- a/ethosu/regor/compiler/kernel.hpp +++ b/ethosu/regor/compiler/kernel.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -159,7 +159,11 @@ public: _dilation.x, _dilation.y, _padding.ToString()); } - static Kernel UnitKernel() { return Kernel({1, 1}, {1, 1}, {1, 1}); } + static const Kernel &UnitKernel() + { + static const Kernel s_kernel({1, 1}, {1, 1}, {1, 1}); + return s_kernel; + } }; static inline int RequiredInputSize(int value, int stride, int border, int upscale, int rounding = 0) diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp index 9beb52fd..6558ea5e 100644 --- a/ethosu/regor/compiler/scheduler.cpp +++ b/ethosu/regor/compiler/scheduler.cpp @@ -153,7 +153,7 @@ std::shared_ptr Scheduler::Process() return chosenSchedule; } -Point2i Scheduler::GetStripeInputRequirement(const Shape &ofmShape, Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling) +Point2i Scheduler::GetStripeInputRequirement(const Shape &ofmShape, const Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling) { int rounding; int upscale = _arch->UpscaleAndRounding(resampling, rounding); diff --git a/ethosu/regor/compiler/scheduler.hpp b/ethosu/regor/compiler/scheduler.hpp index eb32f60f..20ab84a4 100644 --- a/ethosu/regor/compiler/scheduler.hpp +++ b/ethosu/regor/compiler/scheduler.hpp @@ -316,7 +316,7 @@ private: Address CreateSchedulerRepresentation(); - Point2i GetStripeInputRequirement(const Shape &ofmShape, Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling); + Point2i GetStripeInputRequirement(const Shape &ofmShape, const Kernel *kernel, const Point2i &ifmStep, ArchResampling resampling); std::unique_ptr CreateSchedulerOpInfo(SchedulerOperation *op, const Shape &ofmStripeShape, const std::unique_ptr &parentInfo = nullptr); diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index 0623f295..c2ae7462 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -54,8 +54,7 @@ static std::unique_ptr MakeMemCopy(const std::shared_ptrshape + ofmSlice->offset <= dest->storageShape); auto op = std::make_unique(OpType::MemoryCopy); - auto kernel = Kernel({1, 1}, {1, 1}, {1, 1}); - op->SetKernel(&kernel); + op->SetKernel(Kernel::UnitKernel()); auto ofmConn = op->AddOutput(TensorUsage::OFM); ofmConn->tensor = dest; @@ -98,8 +97,7 @@ static std::unique_ptr MakeTransposeOp( auto ofmConn = op->AddOutput(TensorUsage::OFM); assert(ifmConn->Type() == ofmConn->Type()); - auto kernel = Kernel({1, 1}, {1, 1}, {1, 1}); - op->SetKernel(&kernel); + op->SetKernel(Kernel::UnitKernel()); const auto attr = op->Attribute(); attr->perm = perm; @@ -136,7 +134,7 @@ MakeSubOperation(const SchedulerOperation *schedOp, const Kernel *newKernel = nu assert(schedOp->SubOps().empty()); assert(schedOp->Parent() == nullptr); auto subOp = std::make_unique(type != OpType::None ? type : schedOp->Type()); - subOp->SetKernel(newKernel ? newKernel : schedOp->Kernel()); + subOp->SetKernel(newKernel ? *newKernel : *schedOp->Kernel()); subOp->SetHasScaling(schedOp->HasScaling()); subOp->_srcKey = schedOp->_srcKey; subOp->SetPrimaryIfmIndex(schedOp->PrimaryIfmIndex()); @@ -459,7 +457,7 @@ static void UpdatePaddingAndIfmOffset(SchedulerOperation *op) ifmSlice.offset = ifmSlice.offset.WithHeight(newHeight).WithWidth(newWidth); auto newPadding = Margin(topPad, leftPad, padding.Bottom(), padding.Right()); auto newKernel = kernel->WithPadding(newPadding); - op->SetKernel(&newKernel); + op->SetKernel(newKernel); } // Return a slice of a tensor @@ -558,9 +556,9 @@ static Shape NewOfmBlockShape(Architecture *arch, SchedulerOperation *op) // Get block config for the op after decomposition to smaller kernel // Avoids problems where a block config can't be found as ifm gets too big for RAM auto minKernel = kernel.WithSize({1, 1}).WithStride({1, 1}); - op->SetKernel(&minKernel); + op->SetKernel(minKernel); auto config = GetOpConfig(arch, op); - op->SetKernel(&kernel); + op->SetKernel(kernel); assert(config && "No config found."); if ( !config ) throw DecompositionFailure("No config found"); auto HW = config->OptimalStripeGranule(); @@ -1273,7 +1271,7 @@ std::vector> DecomposeTransposeConv2D(Archit weightsConn->tensor->consumers.push_back(op.get()); Kernel newKernel = kernel->WithStride({1, 1}); op->_type = OpType::Conv2D; - op->SetKernel(&newKernel); + op->SetKernel(newKernel); result.emplace_back(std::move(op)); } else @@ -1346,8 +1344,6 @@ std::vector> LegaliseResize(Architecture *ar ifmConn->quantization = Quantization::Unit(); ifmConn->shape = shape; ifmConn->resamplingMode = ArchResampling::Nearest; - auto kernel = Kernel::UnitKernel(); - newOp->SetKernel(&kernel); result.emplace_back(std::move(newOp)); remainingUpscale /= 2; @@ -1359,7 +1355,7 @@ std::vector> LegaliseResize(Architecture *ar *newOp->ConnectInput(TensorUsage::IFM, ifmConn->tensor) = *ifmConn; Kernel kernel = Kernel::UnitKernel().WithPadding({0, 0, upscaleH - 1, upscaleW - 1, 0, 0}).WithSize({upscaleW, upscaleH}); - newOp->SetKernel(&kernel); + newOp->SetKernel(kernel); ofmConn->quantization = Quantization::Unit(); ofmConn->rounding = RoundMode::AUTO; *newOp->ConnectOutput(TensorUsage::OFM, ofmConn->tensor) = *ofmConn; @@ -1565,8 +1561,7 @@ static std::vector> SwapAxes(Architecture *a // Create SchedulerOperation auto op = std::make_unique(OpType::Transpose); - Kernel kernel({1, 1} /* size */, {1, 1} /* stride */, {1, 1} /* dilation */); - op->SetKernel(&kernel); + op->SetKernel(Kernel::UnitKernel()); auto ifmConn = op->AddInput(TensorUsage::IFM); auto ofmConn = op->AddOutput(TensorUsage::OFM); diff --git a/ethosu/regor/compiler/scheduler_operation.hpp b/ethosu/regor/compiler/scheduler_operation.hpp index d88d9a4f..43a0a21a 100644 --- a/ethosu/regor/compiler/scheduler_operation.hpp +++ b/ethosu/regor/compiler/scheduler_operation.hpp @@ -177,11 +177,11 @@ class SchedulerOperation : public Attributable { friend class SchedulerPacking; friend class Scheduler; + std::unique_ptr _kernel; public: OpType _type; int _index = -1; // Execution index - std::unique_ptr _kernel; bool _npuOp = false; bool _hasScaling = false; void *_srcKey = nullptr; @@ -214,8 +214,8 @@ public: bool IsNpuOp() const { return _npuOp; } void SetNpuOp(bool npuOp) { _npuOp = npuOp; } - class Kernel *Kernel() const { return _kernel.get(); } - void SetKernel(const class Kernel *kernel) { _kernel = std::make_unique(*kernel); } + const class Kernel *Kernel() const { return _kernel ? _kernel.get() : ®or::Kernel::UnitKernel(); } + void SetKernel(const class Kernel &kernel) { _kernel = std::make_unique(kernel); } bool HasScaling() const { return _hasScaling; } void SetHasScaling(bool hasScaling) { _hasScaling = hasScaling; } diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index 3de2f3df..82a77fc9 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -532,7 +532,7 @@ std::unique_ptr SchedulerPacking::MakeSchedulerOperation(Ope std::unique_ptr schedOp = std::make_unique(op->Type()); - schedOp->SetKernel(op->Kernel()); + schedOp->SetKernel(*op->Kernel()); schedOp->SetHasScaling(op->HasScaling()); schedOp->SetAttributes(op->AttributeRef()); schedOp->_srcKey = op; diff --git a/ethosu/regor/test/test_scheduler_decompose.cpp b/ethosu/regor/test/test_scheduler_decompose.cpp index c0c5cf6b..7c4d327d 100644 --- a/ethosu/regor/test/test_scheduler_decompose.cpp +++ b/ethosu/regor/test/test_scheduler_decompose.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -40,8 +40,6 @@ std::unique_ptr CreateOperation(OpType opType, Shape ifmShap std::unique_ptr op = CreateSchedulerOperation( opType, TensorUsage::IFM0, ifm1, TensorUsage::IFM1, ifm2, TensorUsage::OFM, ofm); - // set default kernel - op->_kernel = std::make_unique(Point2i(1, 1), Point2i(1, 1), Point2i(1, 1)); return op; } @@ -52,8 +50,6 @@ std::unique_ptr CreateOperation(OpType opType, Shape ifmShap std::unique_ptr op = CreateSchedulerOperation(opType, TensorUsage::IFM0, ifm1, TensorUsage::OFM, ofm); - // set default kernel - op->_kernel = std::make_unique(Point2i(1, 1), Point2i(1, 1), Point2i(1, 1)); return op; } diff --git a/ethosu/regor/test/util.cpp b/ethosu/regor/test/util.cpp index 1bd614ca..7ee08773 100644 --- a/ethosu/regor/test/util.cpp +++ b/ethosu/regor/test/util.cpp @@ -225,7 +225,7 @@ std::unique_ptr CreateSchedulerOperation(OpType opType, Tens s_ops.add_op(op); auto schedOp = std::make_unique(opType); - schedOp->SetKernel(op->Kernel()); + schedOp->SetKernel(*op->Kernel()); schedOp->_srcKey = static_cast(op.get()); // ifm auto *ifmConn = schedOp->AddInput(ifmUsage); -- GitLab