From 0eac8b088c9b46fdaa55261c864b563d9c8c2809 Mon Sep 17 00:00:00 2001 From: Fredrik Svedberg Date: Thu, 3 Apr 2025 17:02:41 +0200 Subject: [PATCH 1/3] MLBEDSW-10662 Fix tensor connections in decompose The scheduler tensors connections was not always updated correctly in scheduler decompose. Added functions for easier management of connections to SchedulerOperation. Change-Id: I2d54f026e2adbf9659c3c6719e1b1c6bb57a3a93 Signed-off-by: Fredrik Svedberg --- ethosu/regor/compiler/scheduler_decompose.cpp | 36 +++++++------------ ethosu/regor/compiler/scheduler_operation.hpp | 29 ++++++++++----- ethosu/regor/compiler/scheduler_packing.cpp | 2 +- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index a5b91100..38cfad2e 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -76,7 +76,7 @@ static std::unique_ptr MakeMemCopy(const std::shared_ptrshape = Shape::PadAxes(ofmConn->tensor->storageShape, 4, 1); ofmConn->tensor->producers.push_back(op.get()); - auto ifmConn = op->AddInput(TensorUsage::IFM, source); + auto ifmConn = op->ConnectInput(TensorUsage::IFM, source); if ( ifmConn->tensor->dataType == DataType::Int64 ) { // Copy int64 data as int32 data with 2 x C by cloning source tensor ifmConn->tensor = std::make_shared(*source); @@ -875,8 +875,8 @@ std::vector> DecomposeConv3D(Architecture *a if ( KD > 1 ) { auto offset = subOpWeights->shape.WithZeros().With(1, kd); - subOpWeights->tensor = Slice(subOpWeights->tensor.get(), offset, subOpWeights->shape.With(1, 1)); - subOpWeights->tensor->consumers.push_back(subOp.get()); + auto subOpWeightsSlice = Slice(subOpWeights->tensor.get(), offset, subOpWeights->shape.With(1, 1)); + subOp->ConnectInput(TensorUsage::Weights, subOpWeightsSlice); } // New weight shape auto subOpWeightShape = subOpWeights->shape.Erase(1); @@ -903,7 +903,7 @@ std::vector> DecomposeConv3D(Architecture *a // Setup SchedulerTensor for 0 input ifm0->uid = GenerateUniqueId(); ifm0->dataType = subOpIfm->tensor->dataType; - ifm0->memArea = subOp->Input(TensorUsage::Scales)->tensor->memArea; + ifm0->memArea = arch->ReadonlyMemory(); ifm0->format = TensorFormat::NHWC; const auto bufSize = ifm0shape.Elements(); const auto &zeroPoints = subOpIfm->quantization.zeroPoints; @@ -924,8 +924,7 @@ std::vector> DecomposeConv3D(Architecture *a ifm0->bufferView = BufferView(ifm0buf, 0, DataTypeStorageSizeBits(ifm0->dataType), ifm0shape, {}); ifm0->storageShape = ifm0->bufferView.ViewShape(); } - subOpIfm->tensor = ifm0; - subOpIfm->tensor->consumers.push_back(subOp.get()); + subOp->ConnectInput(TensorUsage::IFM, ifm0); subOpIfm->shape = ifm0shape; subOpIfm->slice.offset = ifm0shape.WithZeros(); subOpIfm->slice.shape = ifm0shape; @@ -950,27 +949,22 @@ std::vector> DecomposeConv3D(Architecture *a if ( subOp != conv2dSubOps.begin() ) { // Acc source ifm2 for all but first subop - (*subOp)->AddInput(TensorUsage::IFM1, acc)->shape = acc->storageShape; + (*subOp)->ConnectInput(TensorUsage::IFM1, acc)->shape = acc->storageShape; (*subOp)->SetAccumulatorMode({AccumulatorSource::Ifm2, true}); } if ( *subOp != tail ) { // Remove scaling and bias and set ofm = acc tensor // (used as acc input for next op) for all but last subop - auto subOpOfm = (*subOp)->OFM(); + auto subOpOfm = (*subOp)->ConnectOutput(TensorUsage::OFM, acc); auto subOpIfm = (*subOp)->IFM(0); auto subOpWeights = (*subOp)->Input(TensorUsage::Weights); - auto subOpBias = (*subOp)->Input(TensorUsage::Scales); - subOpOfm->tensor = acc; - subOpOfm->tensor->producers.push_back((*subOp).get()); + auto subOpBias = (*subOp)->ConnectInput(TensorUsage::Scales, bias0); subOpOfm->shape = acc->storageShape; subOpOfm->slice.offset = subOpOfm->shape.WithZeros(); subOpOfm->quantization.scales = {QuantizedScale::Unit()}; subOpIfm->quantization.scales = {QuantizedScale::Unit()}; subOpWeights->quantization.scales = {QuantizedScale::Unit()}; - subOpBias->tensor->RemoveReader((*subOp).get()); - subOpBias->tensor = bias0; - subOpBias->tensor->consumers.push_back((*subOp).get()); subOpBias->shape = bias0->storageShape; } } @@ -1051,11 +1045,10 @@ std::vector> DecomposeDepthwiseConv2D(Archit auto subWeightsReadShape = Shape(kernel->Size().y, kernel->Size().x, subOfmDepth, depthMultiplier); auto subWeightsShape = subWeightsReadShape.WithDepth(1); auto subWeightOffset = weightsShape.WithZeros().WithDepth(multiplier); - auto subWeightsConn = subOp->Input(TensorUsage::Weights); - subWeightsConn->tensor = Slice(weightsConn->tensor.get(), subWeightOffset, subWeightsShape, subWeightsReadShape); + auto subWeights = Slice(weightsConn->tensor.get(), subWeightOffset, subWeightsShape, subWeightsReadShape); + auto subWeightsConn = subOp->ConnectInput(TensorUsage::Weights, subWeights); // Tensor is now in AxisOrder::HWCM with M=1 subWeightsConn->tensor->srcTensor->SetAxisOrder(AxisOrder::HWCM); - subWeightsConn->tensor->consumers.push_back(subOp.get()); subWeightsConn->shape = subWeightsShape; subWeightsConn->quantization = SliceQ(subWeightsConn->quantization, multiplier, depthMultiplier); if ( biasShape.Depth() > 1 ) @@ -1063,16 +1056,13 @@ std::vector> DecomposeDepthwiseConv2D(Archit auto subBiasReadShape = Shape(subOfmDepth, depthMultiplier); auto subBiasShape = Shape(subBiasReadShape.WithDepth(1), biasShape.Size(), 1); auto subBiasOffset = biasShape.WithZeros().WithDepth(multiplier); - auto subBiasConn = subOp->Input(TensorUsage::Scales); - subBiasConn->tensor = Slice(biasConn->tensor.get(), subBiasOffset, subBiasShape, subBiasReadShape); + auto subBias = Slice(biasConn->tensor.get(), subBiasOffset, subBiasShape, subBiasReadShape); + auto subBiasConn = subOp->ConnectInput(TensorUsage::Scales, subBias); subBiasConn->tensor->bufferView = subBiasConn->tensor->bufferView.Reshape({subOfmDepth}); - subBiasConn->tensor->consumers.push_back(subOp.get()); subBiasConn->shape = biasShape.WithDepth(subOfmDepth); subBiasConn->quantization = SliceQ(subBiasConn->quantization, multiplier, depthMultiplier); } - auto subOfmConn = subOp->Output(TensorUsage::OFM); - subOfmConn->tensor = transposedOfm; - subOfmConn->tensor->producers.push_back(subOp.get()); + auto subOfmConn = subOp->ConnectOutput(TensorUsage::OFM, transposedOfm); subOfmConn->shape = transposedOfm->storageShape; subOfmConn->slice.offset = ofmShape.WithZeros().WithBatch(multiplier); subOfmConn->slice.shape = ofmShape.WithDepth(subOfmDepth); diff --git a/ethosu/regor/compiler/scheduler_operation.hpp b/ethosu/regor/compiler/scheduler_operation.hpp index 391ee668..12976f01 100644 --- a/ethosu/regor/compiler/scheduler_operation.hpp +++ b/ethosu/regor/compiler/scheduler_operation.hpp @@ -90,6 +90,12 @@ public: consumers.erase(end, consumers.end()); } + void RemoveWriter(const SchedulerOperation *op) + { + auto end = std::remove(producers.begin(), producers.end(), op); + producers.erase(end, producers.end()); + } + void SetAddress(Address address) { assert(address >= 0); @@ -220,15 +226,14 @@ public: void SetAttributes(const Attributes &attr) { _attr = attr; } // Input connections - SchedulerConnection *AddInput(TensorUsage usage, const std::shared_ptr &tensor = {}) + SchedulerConnection *AddInput(TensorUsage usage) { return &inputs[usage]; } + SchedulerConnection *ConnectInput(TensorUsage usage, const std::shared_ptr &tensor) { - auto &conn = inputs[usage]; - if ( tensor ) - { - conn.tensor = tensor; - tensor->consumers.push_back(this); - } - return &conn; + auto conn = &inputs[usage]; + if ( conn->tensor && conn->tensor != tensor ) conn->tensor->RemoveReader(this); + conn->tensor = tensor; + tensor->consumers.push_back(this); + return conn; } const SchedulerConnection *TryInput(TensorUsage usage) const { return inputs.try_ref(usage); } @@ -270,6 +275,14 @@ public: // Output connections SchedulerConnection *AddOutput(TensorUsage usage) { return &outputs[usage]; } + SchedulerConnection *ConnectOutput(TensorUsage usage, const std::shared_ptr &tensor) + { + auto conn = &outputs[usage]; + if ( conn->tensor && conn->tensor != tensor ) conn->tensor->RemoveWriter(this); + conn->tensor = tensor; + tensor->producers.push_back(this); + return conn; + } SchedulerConnection *TryOutput(TensorUsage usage) { return outputs.try_ref(usage); } SchedulerConnection *Output(TensorUsage usage) { return &outputs.at(usage); } diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index e0942de4..8ed4d9e5 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -590,7 +590,7 @@ std::unique_ptr SchedulerPacking::MakeSchedulerOperation(Ope if ( req.req.Any(ArchRequirement::ScratchTensor) && req.scratch.size ) { auto scratchTensor = std::make_shared(req.scratch.type, req.scratch.size, req.scratch.format); - SchedulerConnection *scratchConn = schedOp->AddInput(TensorUsage::Scratch0, scratchTensor); + SchedulerConnection *scratchConn = schedOp->ConnectInput(TensorUsage::Scratch0, scratchTensor); scratchConn->shape = req.scratch.size; scratchTensor->memArea = _arch->FeatureMapMemory(); } -- GitLab From 15c70072f31405b3450b0b44f9c1eb673b00a61c Mon Sep 17 00:00:00 2001 From: Fredrik Svedberg Date: Tue, 1 Apr 2025 17:02:56 +0200 Subject: [PATCH 2/3] MLBEDSW-10662 Fix DecomposeTransposeConv2D batch decompose Batch decompose of TransposeConv2D called the regular Conv2D decompose function. Change-Id: I0bd71f28d5eda261376488b4f716d80b318d9619 Signed-off-by: Fredrik Svedberg --- ethosu/regor/compiler/scheduler_decompose.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index 38cfad2e..35cf4423 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -1180,7 +1180,6 @@ static std::shared_ptr ReverseHW(SchedulerTensor *tensor) { assert(tensor->IsConstant()); assert(tensor->producers.size() == 0); - assert(tensor->consumers.size() == 1); switch ( tensor->dataType ) { @@ -1218,7 +1217,7 @@ std::vector> DecomposeTransposeConv2D(Archit if ( ofmShape.Batch() > 1 ) { - return DecomposeLeadingDimensions(1, arch, std::move(op), DecomposeConv2D); + return DecomposeLeadingDimensions(1, arch, std::move(op), DecomposeTransposeConv2D); } // Convert TransposeConv2D to Conv2D by -- GitLab From 81312f0e66d96dfae7f84e1eb91e89d5ce5d957a Mon Sep 17 00:00:00 2001 From: Fredrik Svedberg Date: Tue, 1 Apr 2025 17:04:05 +0200 Subject: [PATCH 3/3] MLBEDSW-10662 Add missing TFLite DataType mappings Some DataType mappings were missing in tflite_mapping.cpp Change-Id: Ib5068dd947057a184d493b134db4aee720e302b8 Signed-off-by: Fredrik Svedberg --- ethosu/regor/tflite/tflite_mapping.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ethosu/regor/tflite/tflite_mapping.cpp b/ethosu/regor/tflite/tflite_mapping.cpp index d29420b7..769e2bbf 100644 --- a/ethosu/regor/tflite/tflite_mapping.cpp +++ b/ethosu/regor/tflite/tflite_mapping.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021, 2023-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021, 2023-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -47,7 +47,9 @@ const std::map TfLiteMapping::_tensorTypeToDataTyp {tflite::TensorType::RESOURCE, DataType::Resource}, {tflite::TensorType::VARIANT, DataType::Variant}, {tflite::TensorType::UINT32, DataType::UInt32}, - {tflite::TensorType::UINT16, DataType::UInt16} + {tflite::TensorType::UINT16, DataType::UInt16}, + {tflite::TensorType::INT4, DataType::Int4Packed8}, + {tflite::TensorType::BFLOAT16, DataType::BFloat16}, // clang-format on }; -- GitLab