From af5f7df9748be6943c5bdd49e47e1568e53712d6 Mon Sep 17 00:00:00 2001 From: Jacob Bohlin Date: Tue, 21 Jan 2025 09:21:13 +0000 Subject: [PATCH] MLBEDSW-10274 Add TOSA Pad support for Ethos-U55/U65 Change-Id: I91e1fdc69807b0a8702663932944b327f4728a1e Signed-off-by: Jacob Bohlin --- .../architecture/architecture_constraints.hpp | 1 + .../ethosu55/ethos_u55_constraints.hpp | 1 + .../ethosu85/ethos_u85_constraints.hpp | 1 + ethosu/regor/compiler/graphir_optimiser.cpp | 88 ++++++++++++++----- ethosu/regor/compiler/graphir_optimiser.hpp | 6 +- 5 files changed, 74 insertions(+), 23 deletions(-) diff --git a/ethosu/regor/architecture/architecture_constraints.hpp b/ethosu/regor/architecture/architecture_constraints.hpp index c5e24beb..dab6adc1 100644 --- a/ethosu/regor/architecture/architecture_constraints.hpp +++ b/ethosu/regor/architecture/architecture_constraints.hpp @@ -103,6 +103,7 @@ public: virtual bool SupportsAccumulatorSaveRestore() = 0; virtual bool SupportsLeakyRelu(bool quantized, DataType type) = 0; virtual bool SupportsNegativeStrides() = 0; + virtual bool SupportsNot() = 0; bool CanExecute(const ExecutionQuery &query) { diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp index b0355f65..b091ee5a 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp @@ -43,6 +43,7 @@ public: bool SupportsCast(OpType opType, DataType ifmType, DataType ofmType) override; bool SupportsNonMatchingShapes(const Shape &ifmShape, const Shape &ifm2Shape, const Shape &ofmShape) override; bool SupportsNegativeStrides() override { return true; }; + bool SupportsNot() override { return false; }; private: ArchEthosU55 *_arch; diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp index eab0a264..fcd6a369 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp @@ -43,6 +43,7 @@ public: bool SupportsCast(OpType opType, DataType ifmType, DataType ofmType) override; bool SupportsNonMatchingShapes(const Shape &ifmShape, const Shape &ifm2Shape, const Shape &ofmShape) override; bool SupportsNegativeStrides() override { return false; }; + bool SupportsNot() override { return true; }; private: ArchEthosU85 *_arch; diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp index 30add616..5236b0c3 100644 --- a/ethosu/regor/compiler/graphir_optimiser.cpp +++ b/ethosu/regor/compiler/graphir_optimiser.cpp @@ -675,7 +675,20 @@ Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const opera return returnOp; } -// Rewrite TOSA PAD to number of MemoryCopy ops +Operation *GraphIrOptimiser::MakeFillOperation(TensorConnection *const ofmConn, const Shape &ofmShape, + const TensorSlice &ofmSlice, std::shared_ptr padTensor, OpType opType) +{ + auto fillOp = std::make_shared(opType); + auto &ifmConn = fillOp->ConnectInput(TensorUsage::IFM, padTensor); + if ( opType == OpType::MemoryCopy ) + { + ifmConn.Set(ofmSlice.shape); + } + fillOp->CopyOutput(TensorUsage::OFM, *ofmConn); + fillOp->Output(TensorUsage::OFM)->Set(ofmShape).Set(ofmSlice).Set(RoundMode::NATURAL); + return fillOp.get(); +} + Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation) { Operation *returnOp = operation; @@ -684,48 +697,79 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation { const auto &ifmConn = operation->Input(TensorUsage::IFM0); const auto &ofmConn = operation->Output(TensorUsage::OFM); + const Shape ofmShape = ofmConn->shape; const auto ¶msConn = operation->Input(TensorUsage::Params); const auto &attr = operation->Attribute(); - const double pad_const = attr->pad_const; - const int not_pad_const = ~int(pad_const); + const int padConst = int(attr->pad_const); // Decode the padding before and after each dimension as two shapes Shape paddingBefore = TensorToShape(paramsConn->tensor.get(), paramsConn->shape.Width(), 2, 0); Shape paddingAfter = TensorToShape(paramsConn->tensor.get(), paramsConn->shape.Width(), 2, 1); + OpType fillOpType; + std::shared_ptr padTensor; + DataType dataType = ofmConn->tensor->Type(); + if ( _constraints->SupportsNot() ) + { + // Native support for elementwise Not can be utilized to broadcast a single scalar value + // to the whole area to be filled. + fillOpType = OpType::Not; + padTensor = CreateConstTensor("pad_const", dataType, ~padConst); + } + else + { + // Fallback case - find the largest required pad area and create a constant of that size + // filled with the padding value. Then memcopy slices of this tensor to the different + // axes to be padded. + int maxElements = 0; + for ( int axis = 0; axis < ofmShape.Size(); axis++ ) + { + int padElements = (ofmShape.Elements() / ofmShape[axis]) * std::max(paddingBefore[axis], paddingAfter[axis]); + maxElements = std::max(maxElements, padElements); + } + + fillOpType = OpType::MemoryCopy; + int bits = DataTypeSizeBits(dataType); + // Mask out the bits from the original constant to force a zero extension regardless + // of signedness. + uint32_t fillPattern = uint32_t(padConst) & (~0u >> std::max(32 - bits, 0)); + // Then replicate the bits from the original constant to the rest of the 32-bit value if needed. + // So for example the 8-bit value -2 (0xfe) is replicated to 0xfefefefe, while the 16-bit value + // -2 (0xfffe) becomes 0xfffefffe. + if ( bits < 16 ) + { + fillPattern |= fillPattern << 8; + } + if ( bits < 32 ) + { + fillPattern |= fillPattern << 16; + } + std::vector buffer(DivRoundUp(DataTypeStorageSizeBytes(dataType, maxElements), 4), fillPattern); + const Shape padShape = Shape(maxElements); + padTensor = CreateConstTensor("pad_const", dataType, std::make_shared(std::move(buffer)), &padShape); + } + for ( int axis = 0; axis < ifmConn->shape.Size(); axis++ ) { // Reshape the IFM/OFM/padding to a 3D shape (HWC) where W dimension is the dimension to pad Shape newIfmShape = ReshapeTo3DAroundAxis(ifmConn->shape, axis); - Shape newOfmShape = ReshapeTo3DAroundAxis(ofmConn->shape, axis); + Shape newOfmShape = ReshapeTo3DAroundAxis(ofmShape, axis); Shape newPaddingBefore = ReshapeTo3DAroundAxis(paddingBefore, axis, 0); const int padBefore = paddingBefore[axis]; if ( padBefore ) { - Shape newOfmSliceOffset = newPaddingBefore.WithWidth(0); - Shape newOfmSliceShape = newOfmShape.WithWidth(padBefore); - - // Fill padded elements with pad_const - auto fillOp = std::make_shared(OpType::Not); - fillOp->ConnectInput(TensorUsage::IFM, CreateConstTensor("pad_const", ifmConn->tensor->Type(), not_pad_const)); - fillOp->CopyOutput(TensorUsage::OFM, *ofmConn); - fillOp->Output(TensorUsage::OFM)->Set(newOfmShape).Set({newOfmSliceOffset, newOfmSliceShape}).Set(RoundMode::NATURAL); - RecordOptimisation(operation, fillOp.get()); + TensorSlice newOfmSlice = {newPaddingBefore.WithWidth(0), newOfmShape.WithWidth(padBefore)}; + auto fillOp = MakeFillOperation(ofmConn, newOfmShape, newOfmSlice, padTensor, fillOpType); + RecordOptimisation(operation, fillOp); } const int padAfter = paddingAfter[axis]; if ( padAfter ) { - Shape newOfmSliceOffset = newPaddingBefore.WithWidth(padBefore + newIfmShape.Width()); - Shape newOfmSliceShape = newOfmShape.WithWidth(padAfter); - - // Fill padded elements with pad_const - auto fillOp = std::make_shared(OpType::Not); - fillOp->ConnectInput(TensorUsage::IFM, CreateConstTensor("pad_const", ifmConn->tensor->Type(), not_pad_const)); - fillOp->CopyOutput(TensorUsage::OFM, *ofmConn); - fillOp->Output(TensorUsage::OFM)->Set(newOfmShape).Set({newOfmSliceOffset, newOfmSliceShape}).Set(RoundMode::NATURAL); - RecordOptimisation(operation, fillOp.get()); + TensorSlice newOfmSlice = {newPaddingBefore.WithWidth(padBefore + newIfmShape.Width()), newOfmShape.WithWidth(padAfter)}; + auto fillOp = MakeFillOperation(ofmConn, newOfmShape, newOfmSlice, padTensor, fillOpType); + RecordOptimisation(operation, fillOp); } } diff --git a/ethosu/regor/compiler/graphir_optimiser.hpp b/ethosu/regor/compiler/graphir_optimiser.hpp index 86dec4a3..8a14c700 100644 --- a/ethosu/regor/compiler/graphir_optimiser.hpp +++ b/ethosu/regor/compiler/graphir_optimiser.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -73,6 +73,10 @@ private: void MoveToConsumer(const Operation *const operation, Operation *const cons); Operation *MoveSplitSliceToConsumer(Graph *const, Operation *const operation); Operation *UnrollConv(Graph *const, Operation *const operation); + // Utility/Helper methods + Operation *MakeFillOperation(TensorConnection *const ofmConn, const Shape &ofmShape, const TensorSlice &ofmSlice, + std::shared_ptr padTensor, OpType opType); + // The graph optimisation steps. // Order matters, array of rewrites processed in order. // clang-format off -- GitLab