From 1e3e465339af3be57fd85c8f29f6f491d8960ce9 Mon Sep 17 00:00:00 2001 From: Jacob Bohlin Date: Tue, 21 Jan 2025 12:10:54 +0000 Subject: [PATCH] MLBEDSW-10274 Lower TFLite Pad to TOSA Pad Avoid having separate handling of TFLite Pad now that GraphIR lowering supports all Ethos-U targets. TFLite graph optimiser will now convert TFLite Pad to TOSA Pad which is then target-dependetly lowered in GraphIR optimiser. Change-Id: Id133173173721556f85a4231184bc70018333fe2 Signed-off-by: Jacob Bohlin --- ethosu/regor/common/buffer_view.hpp | 27 ++++++ .../regor/compiler/tflite_graph_optimiser.cpp | 96 +++---------------- .../regor/compiler/tflite_graph_optimiser.hpp | 11 +-- ethosu/regor/tflite/tflite_reader.cpp | 9 +- 4 files changed, 52 insertions(+), 91 deletions(-) diff --git a/ethosu/regor/common/buffer_view.hpp b/ethosu/regor/common/buffer_view.hpp index 3dc282cd..70acb123 100644 --- a/ethosu/regor/common/buffer_view.hpp +++ b/ethosu/regor/common/buffer_view.hpp @@ -655,6 +655,33 @@ public: return start + _baseOffset; } + template + BufferReader Values(DataType dataType) + { + switch ( dataType ) + { + case DataType::Int8: + return Values(); + case DataType::UInt8: + return Values(); + case DataType::Int16: + return Values(); + case DataType::UInt16: + return Values(); + case DataType::Int32: + return Values(); + case DataType::UInt32: + return Values(); + case DataType::Int64: + return Values(); + case DataType::UInt64: + return Values(); + default: + assert(false && "Unexepected DataType"); + return Values(); + } + } + const class Buffer *Buffer() const { return _buffer.get(); } }; diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp index ed0af7bd..8364f5a6 100644 --- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp +++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp @@ -2595,7 +2595,7 @@ Operation *TFLiteGraphOptimiser::ReplacePadByExplicitPadding(Graph *const graph, // Potential for future optimization: in certain cases also Pad+AvgPool can be handled // by changing to Depthwise. auto padOp = operation->IFM(0)->Writers()[0].get(); - if ( padOp->Type() != OpType::Pad ) + if ( padOp->Type() != OpType::Pad || padOp->Attribute()->pad_const != 0 ) { return operation; } @@ -2640,92 +2640,26 @@ Operation *TFLiteGraphOptimiser::ReplacePadByExplicitPadding(Graph *const graph, return operation; } -void TFLiteGraphOptimiser::MakeMemoryCopyForPad( - const char *name, const Operation *operation, TensorConnection *ofmConn, const Shape &shape, const Shape &offset) -{ - auto dtype = ofmConn->tensor->Type(); - std::vector zeroBuf(DataTypeStorageSizeBytes(dtype, shape.Elements())); - std::fill(zeroBuf.begin(), zeroBuf.end(), uint8_t(ofmConn->quantization.zeroPoints[0])); - - auto zeroTens = CreateConstTensor(ofmConn->tensor->Name() + "/" + name, dtype, std::make_shared(std::move(zeroBuf)), &shape); - auto op = std::make_shared(OpType::MemoryCopy); - - op->ConnectInput(TensorUsage::IFM0, zeroTens).Set(ofmConn->quantization); - op->ConnectOutput(TensorUsage::OFM, ofmConn->tensor) - .Set(ofmConn->shape) - .Set(ofmConn->quantization) - .Set({offset, shape}) - .Set(RoundMode::NATURAL); - RecordOptimisation(operation, op.get()); -} - -// Rewrites PAD operator to a MemoryCopy that copies the IFM to the OFM -// + up to 4 MemoryCopy operators that fill the OFM with zeros at the borders. -// This is done as fall-back for the PAD operators that remain after ReplacePadByExplicitPadding -Operation *TFLiteGraphOptimiser::ConvertPad(Graph *const graph, Operation *const operation) +// Lower PadV2 to TOSA Pad +Operation *TFLiteGraphOptimiser::ConvertPadV2(Graph *const graph, Operation *const operation) { UNUSED(graph); - if ( operation->Type() != OpType::Pad ) + if ( operation->Type() == OpType::PadV2 ) { - return operation; - } - const auto &ifmConn = operation->Input(TensorUsage::IFM0); - const auto &ifmShape = ifmConn->shape; - const auto &ofmConn = operation->Output(TensorUsage::OFM); - const auto &ofmShape = ofmConn->shape; - const auto ¶msConn = operation->Input(TensorUsage::Params); + auto padOp = std::make_shared(OpType::Pad); + padOp->CopyInput(TensorUsage::IFM, *operation->Input(TensorUsage::IFM)); + padOp->CopyInput(TensorUsage::Params, *operation->Input(TensorUsage::Params0)); + padOp->CopyOutput(TensorUsage::OFM, *operation->Output(TensorUsage::OFM)); - BufferReader padValues = GetPadValuesFromTensor(paramsConn->tensor); - int numPadValues = paramsConn->tensor->View().Elements(); - int top = GetPadValue(padValues, numPadValues, PadAxis::Top); - int bottom = GetPadValue(padValues, numPadValues, PadAxis::Bottom); - int left = GetPadValue(padValues, numPadValues, PadAxis::Left); - int right = GetPadValue(padValues, numPadValues, PadAxis::Right); - int near = GetPadValue(padValues, numPadValues, PadAxis::Near); - int far = GetPadValue(padValues, numPadValues, PadAxis::Far); + const auto &attr = padOp->Attribute(); + const auto padConstTens = operation->Input(TensorUsage::Params1)->tensor; + attr->pad_const = padConstTens->View().Values(padConstTens->Type())[0]; - // Create MemoryCopy op that copies IFM to the right place inside the OFM - Shape shp0 = ofmShape.WithZeros(); - auto mainOp = MakeMemoryCopyForConcat(ofmConn, ifmConn, shp0.WithHeight(top).WithWidth(left).WithDepth(near)); - RecordOptimisation(operation, mainOp.get()); - // Add operations that fill the borders of the OFM - if ( top > 0 ) - { - Shape shape = ofmShape.WithHeight(top); - MakeMemoryCopyForPad("top", operation, ofmConn, shape, shp0); - } - if ( bottom > 0 ) - { - Shape shape = ofmShape.WithHeight(bottom); - Shape offset = shp0.WithHeight(ofmShape.Height() - bottom); - MakeMemoryCopyForPad("bottom", operation, ofmConn, shape, offset); - } - if ( left > 0 ) - { - Shape shape = ifmShape.WithWidth(left).WithDepth(ofmShape.Depth()); - Shape offset = shp0.WithHeight(top); - MakeMemoryCopyForPad("left", operation, ofmConn, shape, offset); - } - if ( right > 0 ) - { - Shape shape = ifmShape.WithWidth(right).WithDepth(ofmShape.Depth()); - Shape offset = shp0.WithHeight(top).WithWidth(ofmShape.Width() - right); - MakeMemoryCopyForPad("right", operation, ofmConn, shape, offset); - } - if ( near > 0 ) - { - Shape shape = ifmShape.WithDepth(near); - Shape offset = shp0.WithHeight(top).WithWidth(left); - MakeMemoryCopyForPad("near", operation, ofmConn, shape, offset); - } - if ( far > 0 ) - { - Shape shape = ifmShape.WithDepth(far); - Shape offset = shp0.WithHeight(top).WithWidth(left).WithDepth(ofmShape.Depth() - far); - MakeMemoryCopyForPad("far", operation, ofmConn, shape, offset); + RecordOptimisation(operation, padOp.get()); + operation->Disconnect(); + return padOp.get(); } - operation->Disconnect(); - return mainOp.get(); + return operation; } void TFLiteGraphOptimiser::MakeMemoryCopyForMirrorPad(const Operation *operation, TensorConnection *ifmConn, const Shape &readShape, diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.hpp b/ethosu/regor/compiler/tflite_graph_optimiser.hpp index bc78d113..3cf68bdb 100644 --- a/ethosu/regor/compiler/tflite_graph_optimiser.hpp +++ b/ethosu/regor/compiler/tflite_graph_optimiser.hpp @@ -162,13 +162,8 @@ private: // This is the most efficient way to implement PAD, but cannot be done for all pad sizes. Operation *ReplacePadByExplicitPadding(Graph *const graph, Operation *const operation); - void MakeMemoryCopyForPad(const char *name, const Operation *operation, TensorConnection *ofmConn, - const Shape &shape, const Shape &offset); - - // Rewrites PAD operator to a MemoryCopy that copies the IFM to the OFM - // + up to 4 MemoryCopy operators that fill the OFM with zeros at the borders. - // This is done as fall-back for the PAD operators that remain after ReplacePadByExplicitPadding - Operation *ConvertPad(Graph *const graph, Operation *const operation); + // Lower PadV2 to TOSA Pad + Operation *ConvertPadV2(Graph *const graph, Operation *const operation); void MakeMemoryCopyForMirrorPad(const Operation *operation, TensorConnection *ifmConn, const Shape &readShape, const Shape &readOffset, TensorConnection *ofmConn, const Shape &writeShape, const Shape &writeOffset, ReverseType reverseAxis); @@ -249,12 +244,12 @@ public: &TFLiteGraphOptimiser::ConvertResize, &TFLiteGraphOptimiser::ConvertTranspose, &TFLiteGraphOptimiser::ConvertMirrorPad, + &TFLiteGraphOptimiser::ConvertPadV2, } }, { {}, { - &TFLiteGraphOptimiser::ConvertPad, &TFLiteGraphOptimiser::ConvertZeroPoint, } }, diff --git a/ethosu/regor/tflite/tflite_reader.cpp b/ethosu/regor/tflite/tflite_reader.cpp index eb2514ce..328a831c 100644 --- a/ethosu/regor/tflite/tflite_reader.cpp +++ b/ethosu/regor/tflite/tflite_reader.cpp @@ -727,6 +727,12 @@ void TfLiteReader::ParseOperatorOptions(const std::shared_ptr &operat } break; + case tflite::BuiltinOptions::PadOptions: + { + operation->Attribute()->pad_const = 0; + } + break; + case tflite::BuiltinOptions::ResizeBilinearOptions: case tflite::BuiltinOptions::ResizeNearestNeighborOptions: break; @@ -745,7 +751,7 @@ void TfLiteReader::ParseOperatorOptions(const std::shared_ptr &operat case tflite::BuiltinOptions::NONE: case tflite::BuiltinOptions::HardSwishOptions: case tflite::BuiltinOptions::MaximumMinimumOptions: - case tflite::BuiltinOptions::PadOptions: + case tflite::BuiltinOptions::PadV2Options: case tflite::BuiltinOptions::DequantizeOptions: case tflite::BuiltinOptions::QuantizeOptions: case tflite::BuiltinOptions::TransposeOptions: @@ -775,7 +781,6 @@ void TfLiteReader::ParseOperatorOptions(const std::shared_ptr &operat case tflite::BuiltinOptions::CastOptions: case tflite::BuiltinOptions::LessOptions: case tflite::BuiltinOptions::NegOptions: - case tflite::BuiltinOptions::PadV2Options: case tflite::BuiltinOptions::GreaterOptions: case tflite::BuiltinOptions::GreaterEqualOptions: case tflite::BuiltinOptions::LessEqualOptions: -- GitLab