From 8e57c7605377627e5faf885240cd27c1619e7a97 Mon Sep 17 00:00:00 2001 From: William Isaksson Date: Sat, 7 Jun 2025 23:50:32 +0200 Subject: [PATCH] MLBEDSW-10890: Add ext key tracking for TOSA-networks -Adds ext key tracking for TOSA networks. This means tracking the order of operators in the serialized input file Change-Id: I01b9438b503fde02e7071c5e8cd8b1f1483a9b3e Signed-off-by: William Isaksson --- ethosu/regor/compiler/compiler.cpp | 19 +++- ethosu/regor/compiler/graph_builder.cpp | 8 ++ ethosu/regor/compiler/graph_builder.hpp | 4 +- ethosu/regor/compiler/graph_optimiser.cpp | 47 ++++---- ethosu/regor/compiler/graph_optimiser.hpp | 2 +- ethosu/regor/compiler/graph_optimiser_db.hpp | 12 +- ethosu/regor/compiler/graphir_optimiser.cpp | 65 ++++++----- ethosu/regor/compiler/lstm.cpp | 2 +- ethosu/regor/compiler/network_performance.cpp | 6 +- ethosu/regor/compiler/operation.cpp | 1 + ethosu/regor/compiler/operation.hpp | 5 + ethosu/regor/compiler/softmax.cpp | 4 +- .../regor/compiler/tflite_graph_optimiser.cpp | 106 +++++++++--------- .../compiler/tflite_graph_optimiser_tp.cpp | 2 +- ethosu/regor/include/graphapi.hpp | 3 +- ethosu/regor/tflite/tflite_reader.cpp | 2 +- ethosu/regor/tosa/tosa_reader.cpp | 1 + 17 files changed, 156 insertions(+), 133 deletions(-) diff --git a/ethosu/regor/compiler/compiler.cpp b/ethosu/regor/compiler/compiler.cpp index 85d73b0e..56427ec9 100644 --- a/ethosu/regor/compiler/compiler.cpp +++ b/ethosu/regor/compiler/compiler.cpp @@ -345,6 +345,20 @@ bool Compiler::BuildNetwork(const char *entryGraph) _entryPoint = graph.get(); } _graphs.push_back(std::move(graph)); + if ( _optDb ) + { + for ( const auto &op : builder._operations ) + { + if ( auto it = builder._uidToExt.find(op->Uid()); it != builder._uidToExt.end() ) + { + _optDb->SourceOp(op.get(), it->second); + } + else + { + _optDb->SourceOp(op.get()); + } + } + } } if ( _graphs.empty() ) @@ -392,11 +406,12 @@ void Compiler::RecordNPUOp(const NPUOperation &npuOp, const CmdRanges &cmdRanges if ( opMap.try_get(std::get<0>(cmd), scheduleOp) ) { assert(scheduleOp); - _optDb->AddCommand(scheduleOp->_srcKey, streamId, std::get<2>(cmd) - 1, std::get<0>(cmd)); + auto op = static_cast(scheduleOp->_srcKey); + _optDb->AddCommand(op->Uid(), streamId, std::get<2>(cmd) - 1, std::get<0>(cmd)); } else { - _optDb->AddCommand(nullptr, streamId, std::get<2>(cmd) - 1, std::get<0>(cmd)); + _optDb->AddCommand(INVALID_UID, streamId, std::get<2>(cmd) - 1, std::get<0>(cmd)); } } } diff --git a/ethosu/regor/compiler/graph_builder.cpp b/ethosu/regor/compiler/graph_builder.cpp index 7a11736a..0ff963c8 100644 --- a/ethosu/regor/compiler/graph_builder.cpp +++ b/ethosu/regor/compiler/graph_builder.cpp @@ -197,6 +197,7 @@ GraphApi::GraphOperation *GraphBuilder::CreateOp(tosa::Op tosaType, const GraphK } auto op = std::make_shared(type); + if ( kernel ) { op->SetKernel(std::make_unique(kernel)); @@ -542,6 +543,13 @@ void GraphBuilder::SetAxisStrides([[maybe_unused]] GraphTensor *graphTensor, [[m assert(axisStrides == nullptr && "Not currently implemented"); } +void GraphBuilder::SetExternalId(GraphOperation *graphOp, int extId) +{ + auto op = static_cast(graphOp); + + _uidToExt[op->Uid()] = extId; +} + void GraphBuilder::FreeUnconnected() { try diff --git a/ethosu/regor/compiler/graph_builder.hpp b/ethosu/regor/compiler/graph_builder.hpp index bc5a294a..932f550e 100644 --- a/ethosu/regor/compiler/graph_builder.hpp +++ b/ethosu/regor/compiler/graph_builder.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2022-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -59,6 +59,7 @@ protected: std::vector> _outputs; std::vector> _persistent; std::vector> _buffers; + std::unordered_map _uidToExt; public: GraphBuilder(const std::string &name); @@ -89,6 +90,7 @@ public: void SetZeroPoint(GraphOperation *op, GraphTensorUsage usage, double zeroPoint) override; void SetAxisOrder(GraphTensor *graphTensor, GraphApi::AxisOrder order) override; void SetAxisStrides(GraphTensor *graphTensor, const GraphApi::GraphShape *axisStrides) override; + void SetExternalId(GraphOperation *graphOp, int extId) override; // Utility const std::string &Name() const { return _graphName; } uint32_t SyntaxVersion() const { return _syntaxVersion; } diff --git a/ethosu/regor/compiler/graph_optimiser.cpp b/ethosu/regor/compiler/graph_optimiser.cpp index 0f7b0789..4a456a6f 100644 --- a/ethosu/regor/compiler/graph_optimiser.cpp +++ b/ethosu/regor/compiler/graph_optimiser.cpp @@ -118,7 +118,6 @@ Operation *GraphOptimiser::RecordOperation(Graph *const graph, Operation *const UNUSED(graph); if ( _db ) { - // TODO: implement ext key tracking for TOSA Networks. _db->SourceOp(operation); } return operation; @@ -130,7 +129,7 @@ Operation *GraphOptimiser::RecordOptimisation(Graph *const graph, Operation *con // Remaining ops probably reference themselves if ( _db ) { - _db->AddOptimised(operation, operation); + _db->AddOptimised(*operation, operation); } return operation; } @@ -170,7 +169,7 @@ Operation *GraphOptimiser::RemoveReshape(Graph *const graph, Operation *const op ifmConn = operation->Input(TensorUsage::IFM0); ifm = ifmConn->tensor.get(); returnOp = copyOp.get(); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); // Reshape still needs to be removed. } @@ -206,11 +205,11 @@ Operation *GraphOptimiser::RemoveReshape(Graph *const graph, Operation *const op return returnOp; } -void GraphOptimiser::RecordOptimisation(const Operation *operation, const Operation *op) +void GraphOptimiser::RecordOptimisation(UniqueId fromId, const Operation *op) { if ( _db ) { - _db->AddOptimised(operation, op); + _db->AddOptimised(fromId, op); } } @@ -332,15 +331,15 @@ Database *OptimiserDatabase::Get() return _db; } -int OptimiserDatabase::SourceId(const void *op) +int OptimiserDatabase::SourceId(UniqueId uid) { // lookup op in optimised - auto pos = _optimised.find(op); + auto pos = _optimised.find(uid); if ( pos != std::end(_optimised) ) { return std::get<0>(pos->second); } - else if ( auto ptr = _source.find(op); ptr != std::end(_source) ) + else if ( auto ptr = _source.find(uid); ptr != std::end(_source) ) { // op is original-op return ptr->second; @@ -348,10 +347,10 @@ int OptimiserDatabase::SourceId(const void *op) return 0; } -int OptimiserDatabase::OptimisedId(const void *op) +int OptimiserDatabase::OptimisedId(UniqueId uid) { // lookup op in optimised - auto pos = _optimised.find(op); + auto pos = _optimised.find(uid); if ( pos != std::end(_optimised) ) { return std::get<1>(pos->second); @@ -362,13 +361,13 @@ int OptimiserDatabase::OptimisedId(const void *op) int OptimiserDatabase::SourceOp(const Operation *op, int ext_key) { // Op may be a source op or originate from optimised ops in previous graph optimisation pass - auto id = SourceId(op); + auto id = SourceId(*op); if ( id != 0 ) { return id; } _sourceId++; - _source.emplace(op, _sourceId); + _source.emplace(*op, _sourceId); auto k = op->Kernel()->Size(); auto o = Shape::PadAxes(op->OFM()->StorageShape(), 3, 1); @@ -378,24 +377,24 @@ int OptimiserDatabase::SourceOp(const Operation *op, int ext_key) return _sourceId; } -void OptimiserDatabase::AddOptimised(const void *from, const Operation *to) +void OptimiserDatabase::AddOptimised(UniqueId fromId, const Operation *to) { assert(to); // Locate the source operation Id (if any) int sourceId = 0; - if ( from != nullptr ) + if ( fromId != INVALID_UID ) { // Look for source op in optimised list first and use that op's parent // (source replacement doesn't matter) - auto pos = _optimised.find(from); + auto pos = _optimised.find(fromId); if ( pos != _optimised.end() ) { sourceId = std::get<0>(pos->second); } else { - auto srcPos = _source.find(from); + auto srcPos = _source.find(fromId); if ( srcPos != _source.end() ) { sourceId = srcPos->second; @@ -404,7 +403,7 @@ void OptimiserDatabase::AddOptimised(const void *from, const Operation *to) } _optId++; - _optimised[to] = std::tuple(sourceId, _optId); + _optimised[*to] = std::tuple(sourceId, _optId); auto k = to->Kernel()->Size(); Shape o = Shape::PadAxes(to->OFM()->StorageShape(), 3, 1); @@ -420,18 +419,10 @@ void OptimiserDatabase::AddSubOp(UniqueId primaryUid, UniqueId subOpUid) _db->AddRow(_groupTable, subOpUid, {std::to_string(primaryUid)}); } -void OptimiserDatabase::AddCommand(void *key, int stream, int cmdIndex, UniqueId schedId) +void OptimiserDatabase::AddCommand(UniqueId opId, int stream, int cmdIndex, UniqueId schedId) { - auto pos = _optimised.find(key); - if ( key && pos != _optimised.end() ) - { - int optId = std::get<1>(pos->second); - _db->AddRow(_cmdTable, 0, {std::to_string(4 * cmdIndex), std::to_string(stream), std::to_string(optId), std::to_string(schedId)}); - } - else - { - _db->AddRow(_cmdTable, 0, {std::to_string(4 * cmdIndex), std::to_string(stream), "0", std::to_string(schedId)}); - } + auto optID = OptimisedId(opId); + _db->AddRow(_cmdTable, 0, {std::to_string(4 * cmdIndex), std::to_string(stream), std::to_string(optID), std::to_string(schedId)}); } int OptimiserDatabase::AddStream() diff --git a/ethosu/regor/compiler/graph_optimiser.hpp b/ethosu/regor/compiler/graph_optimiser.hpp index f4c799c5..200c4623 100644 --- a/ethosu/regor/compiler/graph_optimiser.hpp +++ b/ethosu/regor/compiler/graph_optimiser.hpp @@ -247,7 +247,7 @@ public: Operation *RecordOperation(Graph *const graph, Operation *const operation); Operation *RecordOptimisation(Graph *const graph, Operation *const operation); Operation *RemoveReshape(Graph *const graph, Operation *const operation); - void RecordOptimisation(const Operation *operation, const Operation *op); + void RecordOptimisation(UniqueId fromId, const Operation *op); void PrintGraph(const Graph *graph, const std::string &label) const; void PrintQuantization(const Graph *graph, const std::string &label) const; virtual ~GraphOptimiser() = default; diff --git a/ethosu/regor/compiler/graph_optimiser_db.hpp b/ethosu/regor/compiler/graph_optimiser_db.hpp index da057722..5711e122 100644 --- a/ethosu/regor/compiler/graph_optimiser_db.hpp +++ b/ethosu/regor/compiler/graph_optimiser_db.hpp @@ -44,18 +44,18 @@ private: int _groupTable = 0; int _cmdTable = 0; int _streamTable = 0; - std::unordered_map _source; - std::unordered_map> _optimised; + std::unordered_map _source; + std::unordered_map> _optimised; public: OptimiserDatabase(Database *db); Database *Get(); - int SourceId(const void *op); - int OptimisedId(const void *op); + int SourceId(UniqueId uid); + int OptimisedId(UniqueId uid); int SourceOp(const Operation *op, int ext_key = -1); - void AddOptimised(const void *from, const Operation *to); + void AddOptimised(UniqueId fromId, const Operation *to); void AddSubOp(UniqueId primaryUid, UniqueId subOpUid); - void AddCommand(void *key, int stream, int cmdIndex, UniqueId id); + void AddCommand(UniqueId opId, int stream, int cmdIndex, UniqueId id); int AddStream(); }; diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp index 21326689..dc67759a 100644 --- a/ethosu/regor/compiler/graphir_optimiser.cpp +++ b/ethosu/regor/compiler/graphir_optimiser.cpp @@ -424,7 +424,7 @@ Operation *GraphIrOptimiser::RewriteConst(Graph *const graph, Operation *const o identityOp->CopyOutput(TensorUsage::OFM, *ofmConn); returnOp = identityOp.get(); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } return returnOp; @@ -633,7 +633,7 @@ Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const opera signAttr->input_unsigned = false; ifm32Tens->ChangeType(DataType::Int32); - RecordOptimisation(operation, castOp.get()); + RecordOptimisation(*operation, castOp.get()); operation->ConnectInput(TensorUsage::IFM, ifm32Tens); ifmType = DataType::Int32; } @@ -696,7 +696,7 @@ Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const opera auto mulOp = CreateRescalingMul(startChannel, endChannel, scales, shift); auto mulAttr = mulOp->Attribute(); mulAttr->output_unsigned = signAttr->output_unsigned; - RecordOptimisation(operation, mulOp.get()); + RecordOptimisation(*operation, mulOp.get()); // reset scales and startChannel startChannel = endChannel; @@ -713,7 +713,7 @@ Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const opera auto mulOp = CreateRescalingMul(startChannel, endChannel, scales, shift); auto mulAttr = mulOp->Attribute(); mulAttr->output_unsigned = signAttr->output_unsigned; - RecordOptimisation(operation, mulOp.get()); + RecordOptimisation(*operation, mulOp.get()); returnOp = mulOp.get(); operation->Disconnect(); } @@ -875,7 +875,7 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation { TensorSlice newOfmSlice = {zeroShape, newOfmShape.With(padAxis, padBefore)}; auto fillOp = MakeFillOperation(ofmConn, newOfmShape, newOfmSlice, padTensor); - RecordOptimisation(operation, fillOp); + RecordOptimisation(*operation, fillOp); } const int padAfter = paddingAfter[axis]; @@ -883,7 +883,7 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation { TensorSlice newOfmSlice = {zeroShape.With(padAxis, newOfmShape[padAxis] - padAfter), newOfmShape.With(padAxis, padAfter)}; auto fillOp = MakeFillOperation(ofmConn, newOfmShape, newOfmSlice, padTensor); - RecordOptimisation(operation, fillOp); + RecordOptimisation(*operation, fillOp); } } @@ -892,7 +892,7 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation copyOp->CopyInput(TensorUsage::IFM, *ifmConn); copyOp->CopyOutput(TensorUsage::OFM, *ofmConn); copyOp->Output(TensorUsage::OFM)->Set({paddingBefore, ifmShape}).Set(RoundMode::NATURAL); - RecordOptimisation(operation, copyOp.get()); + RecordOptimisation(*operation, copyOp.get()); returnOp = copyOp.get(); // Remove original pad @@ -981,7 +981,7 @@ Operation *GraphIrOptimiser::UnrollConv(Graph *const, Operation *const operation op->CopyInput(TensorUsage::Scales, *scalesConn); op->CopyOutput(TensorUsage::OFM, *ofmConn); op->Output(TensorUsage::OFM)->Set(ofmSlice); - RecordOptimisation(operation, op.get()); + RecordOptimisation(*operation, op.get()); returnOp = op.get(); } @@ -1132,7 +1132,6 @@ Operation *GraphIrOptimiser::FuseRescale(Graph *const graph, Operation *const op } if ( returnOp != operation ) { - RecordOptimisation(operation, returnOp); operation->Disconnect(); } return returnOp; @@ -1246,25 +1245,25 @@ Operation *GraphIrOptimiser::RewriteCast(Graph *const, Operation *const operatio const auto castOp1 = std::make_shared(OpType::Cast); castOp1->CopyInput(TensorUsage::IFM, *ifmConn); castOp1->ConnectOutput(TensorUsage::OFM, intermediate32Bit); - RecordOptimisation(operation, castOp1.get()); + RecordOptimisation(*operation, castOp1.get()); // Create reinterpret cast op to reinterpret to 16 bit, double size const auto reinterpretOp1 = std::make_shared(OpType::ReinterpretCast); reinterpretOp1->ConnectInput(TensorUsage::IFM, intermediate32Bit); reinterpretOp1->ConnectOutput(TensorUsage::OFM, intermediate16Bit2xSize); - RecordOptimisation(operation, reinterpretOp1.get()); + RecordOptimisation(*operation, reinterpretOp1.get()); // Create additional cast op const auto castOp2 = std::make_shared(OpType::Cast); castOp2->ConnectInput(TensorUsage::IFM, intermediate16Bit2xSize).Set(ifmConn->shape.WithDepth(2 * c)); castOp2->ConnectOutput(TensorUsage::OFM, intermediate32Bit2xSize).Set(ifmConn->shape.WithDepth(2 * c)); - RecordOptimisation(operation, castOp2.get()); + RecordOptimisation(*operation, castOp2.get()); // Create the final reinterpret cast to reinterpret the result as an int64 tensor const auto reinterpretOp2 = std::make_shared(OpType::ReinterpretCast); reinterpretOp2->ConnectInput(TensorUsage::IFM, intermediate32Bit2xSize).Set(ifmConn->shape.WithDepth(2 * c)); reinterpretOp2->CopyOutput(TensorUsage::OFM, *ofmConn); - RecordOptimisation(operation, reinterpretOp2.get()); + RecordOptimisation(*operation, reinterpretOp2.get()); ofmConn->quantization = Quantization::Unit(); operation->Disconnect(); @@ -1279,7 +1278,7 @@ Operation *GraphIrOptimiser::RewriteCast(Graph *const, Operation *const operatio newOp->CopyInput(TensorUsage::IFM0, *ifmConn); newOp->ConnectInput(TensorUsage::IFM1, CreateConstTensor("const_one", int8_t(1))); newOp->CopyOutput(TensorUsage::OFM, *ofmConn); - RecordOptimisation(operation, newOp.get()); + RecordOptimisation(*operation, newOp.get()); operation->Disconnect(); returnOp = newOp.get(); } @@ -1290,7 +1289,7 @@ Operation *GraphIrOptimiser::RewriteCast(Graph *const, Operation *const operatio newOp->CopyInput(TensorUsage::IFM0, *ifmConn); newOp->ConnectInput(TensorUsage::IFM1, CreateConstTensor("const_zero", ifmConn->tensor->Type(), 0)); newOp->CopyOutput(TensorUsage::OFM, *ofmConn); - RecordOptimisation(operation, newOp.get()); + RecordOptimisation(*operation, newOp.get()); operation->Disconnect(); returnOp = newOp.get(); } @@ -1300,7 +1299,7 @@ Operation *GraphIrOptimiser::RewriteCast(Graph *const, Operation *const operatio auto copyOp = std::make_shared(OpType::Add); ReplaceOperation(operation, copyOp.get()); copyOp->ConnectInput(TensorUsage::IFM1, CreateConstTensor("const_zero", ifmConn->tensor->Type(), 0)); - RecordOptimisation(operation, copyOp.get()); + RecordOptimisation(*operation, copyOp.get()); returnOp = copyOp.get(); // Set max range to disable clipping @@ -1389,7 +1388,7 @@ Operation *GraphIrOptimiser::RewriteConcat(Graph *const graph, Operation *const copyOp->CopyOutput(TensorUsage::OFM, *ofmConn); copyOp->Output(TensorUsage::OFM)->Set({ofmSliceOffset, ifmConn.shape}); copyOp->Output(TensorUsage::OFM)->Set(RoundMode::NATURAL); - RecordOptimisation(operation, copyOp.get()); + RecordOptimisation(*operation, copyOp.get()); returnOp = copyOp.get(); ofmSliceOffset[axis] += ifmConn.shape[axis]; @@ -1418,7 +1417,7 @@ Operation *GraphIrOptimiser::RewriteSlice(Graph *const graph, Operation *const o copyOp->Input(TensorUsage::IFM)->Set({begin, size}); copyOp->CopyOutput(TensorUsage::OFM, *ofmConn); copyOp->Output(TensorUsage::OFM)->Set(RoundMode::NATURAL); - RecordOptimisation(operation, copyOp.get()); + RecordOptimisation(*operation, copyOp.get()); returnOp = copyOp.get(); operation->Disconnect(); } @@ -1442,7 +1441,7 @@ Operation *GraphIrOptimiser::RewriteNegate(Graph *const graph, Operation *const newOp->CopyInput(TensorUsage::IFM1, *ifmConn); newOp->CopyOutput(TensorUsage::OFM, *ofmConn); newOp->Output(TensorUsage::OFM)->Set(RoundMode::NATURAL); - RecordOptimisation(operation, newOp.get()); + RecordOptimisation(*operation, newOp.get()); returnOp = newOp.get(); operation->Disconnect(); } @@ -1469,7 +1468,7 @@ Operation *GraphIrOptimiser::RewriteSelect(Graph *const graph, Operation *const auto addOp = CreateAdd(selectorConn->tensor, CreateConstTensor("const_zero", DataType::Int8, 0), selectorConn->quantization, Quantization::Unit(), Quantization::Unit(), ofmConn->tensor->Type()); selectorConn = addOp->Output(TensorUsage::OFM); - RecordOptimisation(operation, addOp); + RecordOptimisation(*operation, addOp); } // Break down SELECT(selector, a, b) into OR(AND(a, selector), AND_NOT(b, selector)) @@ -1481,9 +1480,9 @@ Operation *GraphIrOptimiser::RewriteSelect(Graph *const graph, Operation *const andNotOp->Output(TensorUsage::OFM)->tensor, ofmConn->quantization, ofmConn->quantization, ofmConn->quantization, ofmConn->tensor->Type()); orOp->CopyOutput(TensorUsage::OFM, *ofmConn); - RecordOptimisation(operation, andOp); - RecordOptimisation(operation, andNotOp); - RecordOptimisation(operation, orOp); + RecordOptimisation(*operation, andOp); + RecordOptimisation(*operation, andNotOp); + RecordOptimisation(*operation, orOp); returnOp = orOp; // Remove old select op @@ -1531,7 +1530,7 @@ Operation *GraphIrOptimiser::RewriteReduceSum(Graph *const graph, Operation *con transposeOp->CopyInput(TensorUsage::IFM, *ifmConn); transposeOp->Input(TensorUsage::IFM)->Set(ifmShape3D).Set(Quantization::Unit()); transposeOp->ConnectOutput(TensorUsage::OFM, transposeTens); - RecordOptimisation(operation, transposeOp.get()); + RecordOptimisation(*operation, transposeOp.get()); // Create ReduceSum op auto reduceSumOp = std::make_shared(OpType::ReduceSum); @@ -1540,7 +1539,7 @@ Operation *GraphIrOptimiser::RewriteReduceSum(Graph *const graph, Operation *con reduceSumOp->ConnectInput(TensorUsage::IFM, transposeTens).Set(ifmConn->quantization).Set(ifmConn->rounding); reduceSumOp->CopyOutput(TensorUsage::OFM, *ofmConn); reduceSumOp->Output(TensorUsage::OFM)->Set(transposeTens->StorageShape().WithDepth(1)).Set(ofmConn->rounding); - RecordOptimisation(operation, reduceSumOp.get()); + RecordOptimisation(*operation, reduceSumOp.get()); returnOp = reduceSumOp.get(); // Remove old ReduceSum op @@ -1569,7 +1568,7 @@ Operation *GraphIrOptimiser::RewriteReduceSum(Graph *const graph, Operation *con subOp->ConnectInput(TensorUsage::IFM1, zpTens); subOp->CopyOutput(TensorUsage::OFM, *ofmConn); subOp->Output(TensorUsage::OFM)->Set(ofmConn->rounding); - RecordOptimisation(operation, subOp.get()); + RecordOptimisation(*operation, subOp.get()); returnOp = subOp.get(); // Connect temporary tensor to reduceSum and remove the zero point @@ -1615,7 +1614,7 @@ Operation *GraphIrOptimiser::RewriteReduceSum(Graph *const graph, Operation *con convOp->ConnectInput(TensorUsage::Scales, biasTens).Set(biasQuant); convOp->CopyOutput(TensorUsage::OFM, *ofmConn); convOp->Output(TensorUsage::OFM)->Set(ifmShape4D.WithDepth(1)).Set(ofmConn->rounding); - RecordOptimisation(operation, convOp.get()); + RecordOptimisation(*operation, convOp.get()); returnOp = convOp.get(); // Remove old ReduceSum op @@ -1716,7 +1715,7 @@ Operation *GraphIrOptimiser::RewriteTile(Graph *const, Operation *const operatio "multiples", DataType::Int32, std::make_shared(newMultiples.size(), newMultiples.data())); tileOp->ConnectInput(TensorUsage::Params, newParamtensor); - RecordOptimisation(operation, tileOp.get()); + RecordOptimisation(*operation, tileOp.get()); returnOp = tileOp.get(); inputConn = tileOp->Output(TensorUsage::OFM); @@ -1759,7 +1758,7 @@ Operation *GraphIrOptimiser::MergeTransposes(Graph *const graph, Operation *cons newOp->CopyOutput(TensorUsage::OFM, *ofmConn); operation->Disconnect(); returnOp = newOp.get(); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); } // Disconnect from surrounding ops, if this is a graph input // or output it remains untouched. @@ -1958,7 +1957,7 @@ Operation *GraphIrOptimiser::RewriteMatmul(Graph *const graph, Operation *const ifm1Conn->tensor->Type(), transposedIfm1Shape); transposeOp->ConnectInput(TensorUsage::IFM0, ifm1Conn->tensor).Set(ifm1Shape); transposeOp->ConnectOutput(TensorUsage::OFM, transposedIfm1); - RecordOptimisation(operation, transposeOp.get()); + RecordOptimisation(*operation, transposeOp.get()); // replace IFM2 with transposed output operation->ConnectInput(TensorUsage::IFM1, transposedIfm1).Set(ifm1Conn->quantization); @@ -1996,7 +1995,7 @@ Operation *GraphIrOptimiser::RewriteDepthwise(Graph *const graph, Operation *con ReplaceOperation(operation, newOp.get()); newOp->Output(TensorUsage::OFM)->Set(ofm->rounding); returnOp = newOp.get(); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); } } @@ -2084,7 +2083,7 @@ Operation *GraphIrOptimiser::RewriteTransposeConvOFMPadding(Graph *const graph, dwOp->CopyOutput(TensorUsage::OFM, *ofmConn); dwOp->Output(TensorUsage::OFM)->Set(padSlice).Set(rounding); - RecordOptimisation(operation, dwOp.get()); + RecordOptimisation(*operation, dwOp.get()); }; // Positive output-padding is handled by adjusting the write slice of the OFM @@ -2374,7 +2373,7 @@ Operation *GraphIrOptimiser::RewriteResize(Graph *const, Operation *const operat auto copyOp = std::make_shared(OpType::Add); ReplaceOperation(operation, copyOp.get()); copyOp->ConnectInput(TensorUsage::IFM1, zeroTensor).Set(copyOp->Input(TensorUsage::IFM)->quantization); - RecordOptimisation(operation, copyOp.get()); + RecordOptimisation(*operation, copyOp.get()); return copyOp.get(); } diff --git a/ethosu/regor/compiler/lstm.cpp b/ethosu/regor/compiler/lstm.cpp index f112770e..9cb9dd1b 100644 --- a/ethosu/regor/compiler/lstm.cpp +++ b/ethosu/regor/compiler/lstm.cpp @@ -52,7 +52,7 @@ void LSTM::RecordOptimisation(Operation *op) { if ( _db ) { - _db->AddOptimised(_lstmOp, op); + _db->AddOptimised(*_lstmOp, op); } } diff --git a/ethosu/regor/compiler/network_performance.cpp b/ethosu/regor/compiler/network_performance.cpp index 1e935557..085d9bec 100644 --- a/ethosu/regor/compiler/network_performance.cpp +++ b/ethosu/regor/compiler/network_performance.cpp @@ -280,9 +280,9 @@ void NetworkPerformance::AddToDatabase(const PerformanceResult &perf, SchedulerO { opName = conn->tensor->srcTensor->Name(); } - - int sourceId = optDb->SourceId(schedOp->_srcKey); - int optId = optDb->OptimisedId(schedOp->_srcKey); + auto op = static_cast(schedOp->_srcKey); + int sourceId = optDb->SourceId(*op); + int optId = optDb->OptimisedId(*op); row = { std::to_string(sourceId), std::to_string(optId), diff --git a/ethosu/regor/compiler/operation.cpp b/ethosu/regor/compiler/operation.cpp index d8bdb4ac..1aa57eee 100644 --- a/ethosu/regor/compiler/operation.cpp +++ b/ethosu/regor/compiler/operation.cpp @@ -34,6 +34,7 @@ Operation::Operation(OpType opType) : _type(opType) { // Default 1x1 kernel for ops without a kernel _kernel = std::make_unique(Point2i(1, 1), Point2i(1, 1), Point2i(1, 1)); + _uid = GenerateUniqueId(); } Operation::Operation(const Operation &op) : Operation(op._type) diff --git a/ethosu/regor/compiler/operation.hpp b/ethosu/regor/compiler/operation.hpp index f30a8a10..88eb6dc9 100644 --- a/ethosu/regor/compiler/operation.hpp +++ b/ethosu/regor/compiler/operation.hpp @@ -141,6 +141,7 @@ private: OpType _type; std::unique_ptr _kernel; const void *_passthrough = nullptr; // Original flatbuffer description of this op (if it was loaded from one) + UniqueId _uid; public: Operation(OpType opType); @@ -151,6 +152,10 @@ public: public: OpType Type() const { return _type; } + + UniqueId Uid() const { return _uid; } + operator UniqueId() const { return _uid; } + const ordered_map &Outputs() const { return _outputs; } const ordered_map &Inputs() const { return _inputs; } diff --git a/ethosu/regor/compiler/softmax.cpp b/ethosu/regor/compiler/softmax.cpp index f8663ff8..e87a4521 100644 --- a/ethosu/regor/compiler/softmax.cpp +++ b/ethosu/regor/compiler/softmax.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -219,7 +219,7 @@ void Softmax::RecordOptimisation(Operation *const operation, Operation *op) { if ( _db ) { - _db->AddOptimised(operation, op); + _db->AddOptimised(*operation, op); } } diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp index 78b1bed0..dc15f09e 100644 --- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp +++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp @@ -149,7 +149,7 @@ Operation *TFLiteGraphOptimiser::ConvertLeakyRelu16bit(TensorConnection &ifmConn // Maximum(alpha * IFM, identity * IFM) auto fmAlpha = CreateConstTensor("lrelu_alpha", int16_t(scalar)); auto alphaMulOp = MakeMulWithConstTensor("alpha", ifmConn, ofmConn, fmAlpha, alphaQuant); - RecordOptimisation(operation, alphaMulOp); + RecordOptimisation(*operation, alphaMulOp); TensorConnection *identityConn = &ifmConn; if ( !IsScalingValidAndEqual(ifmConn, ofmConn) ) @@ -162,7 +162,7 @@ Operation *TFLiteGraphOptimiser::ConvertLeakyRelu16bit(TensorConnection &ifmConn identityQuant.scales[0] = {1, 0}; auto fmIdentity = CreateConstTensor("lrelu_ident", int16_t(1)); auto identityMulOp = MakeMulWithConstTensor("identity", ifmConn, ofmConn, fmIdentity, identityQuant); - RecordOptimisation(operation, identityMulOp); + RecordOptimisation(*operation, identityMulOp); identityConn = identityMulOp->Output(TensorUsage::OFM); } @@ -171,7 +171,7 @@ Operation *TFLiteGraphOptimiser::ConvertLeakyRelu16bit(TensorConnection &ifmConn // Maximum(positive * alpha, positive) = positive auto maxOp = MakeOperation(OpType::Maximum, alphaMulOp->Output(TensorUsage::OFM), identityConn, &ofmConn); maxOp->Input(TensorUsage::IFM)->Set(ofmConn.quantization); - RecordOptimisation(operation, maxOp); + RecordOptimisation(*operation, maxOp); returnOp = maxOp; } else @@ -190,12 +190,12 @@ Operation *TFLiteGraphOptimiser::ConvertLeakyRelu16bit(TensorConnection &ifmConn minOp->CopyInput(TensorUsage::IFM0, ifmConn); minOp->ConnectInput(TensorUsage::IFM1, zeroTens).Set(ifmConn.quantization); minOp->ConnectOutput(TensorUsage::OFM, fmNegative).Set(ifmConn.quantization).Set(RoundMode::DBL); - RecordOptimisation(operation, minOp.get()); + RecordOptimisation(*operation, minOp.get()); // create Mul(alpha) auto fmAlpha = CreateConstTensor("lrelu_alpha", int16_t(scalar)); auto alphaMulOp = MakeMulWithConstTensor("alpha", *minOp->Output(TensorUsage::OFM), ofmConn, fmAlpha, alphaQuant); - RecordOptimisation(operation, alphaMulOp); + RecordOptimisation(*operation, alphaMulOp); // create ReLU(IFM) to Select (and scale) values > 0 std::shared_ptr fmScaled = ofmConn.tensor->Clone(); @@ -204,14 +204,14 @@ Operation *TFLiteGraphOptimiser::ConvertLeakyRelu16bit(TensorConnection &ifmConn reluOp->ConnectOutput(TensorUsage::OFM, fmScaled).Set(ofmConn.quantization); reluOp->Output(TensorUsage::OFM)->quantization.quantMin.push_back(ofmConn.quantization.zeroPoints[0]); reluOp->Output(TensorUsage::OFM)->Set(RoundMode::DBL); - RecordOptimisation(operation, reluOp.get()); + RecordOptimisation(*operation, reluOp.get()); // Create Add(Relu, Mul) to add scaled and alpha-multiplied values auto addOp = std::make_shared(OpType::Add); addOp->CopyInput(TensorUsage::IFM0, *reluOp->Output(TensorUsage::OFM)); addOp->CopyInput(TensorUsage::IFM1, *alphaMulOp->Output(TensorUsage::OFM)); addOp->CopyOutput(TensorUsage::OFM, ofmConn); - RecordOptimisation(operation, addOp.get()); + RecordOptimisation(*operation, addOp.get()); returnOp = addOp.get(); } return returnOp; @@ -566,14 +566,14 @@ Operation *TFLiteGraphOptimiser::ConvertExpToLUT(Graph *const graph, Operation * { returnOp = ConvertToLUT8( operation, [](double x) -> float { return expf(float(x)); }, "Exp"); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } else if ( ifmType == DataType::Int16 ) { returnOp = ConvertToInterpolatingLUT16( operation, [](double x) -> float { return expf(float(x)); }, "Exp16(interp)"); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } return returnOp; @@ -603,14 +603,14 @@ Operation *TFLiteGraphOptimiser::ConvertLogToLUT(Graph *const graph, Operation * { returnOp = ConvertToLUT8( operation, [&](double x) -> float { return x <= 0.0f ? minVal : std::log(float(x)); }, "Log"); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } else if ( ifmType == DataType::Int16 ) { returnOp = ConvertToInterpolatingLUT16( operation, [&](double x) -> float { return x <= 0.0f ? minVal : std::log(float(x)); }, "Log16(interp)"); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } return returnOp; @@ -782,7 +782,7 @@ Operation *TFLiteGraphOptimiser::RewriteStridedSlice(Graph *const graph, Operati copyOp->Input(TensorUsage::IFM)->Set({sliceOffset, sliceShape, sliceStride}); copyOp->CopyOutput(TensorUsage::OFM, *ofmConn); copyOp->Output(TensorUsage::OFM)->Set(Shape::DivRoundUp(sliceShape, sliceStride)); - RecordOptimisation(operation, copyOp.get()); + RecordOptimisation(*operation, copyOp.get()); returnOp = copyOp.get(); // Remove original op @@ -826,7 +826,7 @@ Operation *TFLiteGraphOptimiser::RewriteUnpack(Graph *const graph, Operation *co assert(sliceOffset >= ifmConn->shape.WithZeros()); attr->size = sliceShape; attr->begin = sliceOffset; - RecordOptimisation(operation, sliceOp.get()); + RecordOptimisation(*operation, sliceOp.get()); returnOp = sliceOp.get(); // Offset of next slice @@ -885,7 +885,7 @@ Operation *TFLiteGraphOptimiser::ConvertReverse(Graph *const graph, Operation *c auto *attr = reverseOp->Attribute(); attr->axis = axis; inputConn = reverseOp->Output(TensorUsage::OFM); - RecordOptimisation(operation, reverseOp.get()); + RecordOptimisation(*operation, reverseOp.get()); returnOp = reverseOp.get(); } @@ -1000,7 +1000,7 @@ Operation *TFLiteGraphOptimiser::ConvertGather(Graph *const graph, Operation *co auto gatherOp = std::make_shared(OpType::Gather); ReplaceOperation(operation, gatherOp.get()); gatherOp->Output(TensorUsage::OFM)->Set(RoundMode::DBL); - RecordOptimisation(operation, gatherOp.get()); + RecordOptimisation(*operation, gatherOp.get()); returnOp = gatherOp.get(); } @@ -1076,7 +1076,7 @@ Operation *TFLiteGraphOptimiser::ConvertScatter(Graph *const graph, Operation *c // Remove TFLite ScatterNd op operation->Disconnect(); - RecordOptimisation(operation, scatterOp.get()); + RecordOptimisation(*operation, scatterOp.get()); returnOp = scatterOp.get(); } @@ -1204,7 +1204,7 @@ Operation *TFLiteGraphOptimiser::ConvertResize(Graph *const graph, Operation *co quant.scales[0] = QuantizedScale(1, 0); resizeOp->Input(TensorUsage::IFM)->Set(quant); - RecordOptimisation(operation, resizeOp.get()); + RecordOptimisation(*operation, resizeOp.get()); returnOp = resizeOp.get(); operation->Disconnect(); } @@ -1283,7 +1283,7 @@ Operation *TFLiteGraphOptimiser::ConvertReduceMinMaxAnyAll(Graph *const graph, O const auto ofmShape = prevConn->shape.With(axis, 1); const auto ofmTensor = std::make_shared(ofmName, ofmType, ofmShape); reduceOp->ConnectOutput(TensorUsage::OFM, ofmTensor).Set(prevConn->quantization).Set(RoundMode::NATURAL); - RecordOptimisation(operation, reduceOp.get()); + RecordOptimisation(*operation, reduceOp.get()); returnOp = reduceOp.get(); prevOp = reduceOp.get(); @@ -1360,7 +1360,7 @@ Operation *TFLiteGraphOptimiser::RewriteBatchMatMul(Graph *const, Operation *con // Add Transpose op, ifm: 1,n,W,C -> 1,n,C,W ifmReshaped = Shape(1, ifmReshaped.Height(), ifmReshaped.Depth(), ifmReshaped.Width()); auto op = CreateTransposeForMatMul(ifm->tensor, ifmReshaped); - RecordOptimisation(operation, op); + RecordOptimisation(*operation, op); ifmTensor = op->Output(TensorUsage::OFM)->tensor; } @@ -1372,7 +1372,7 @@ Operation *TFLiteGraphOptimiser::RewriteBatchMatMul(Graph *const, Operation *con // Add Transpose op, ifm2: 1,n,W,C -> 1,n,C,W ifm2Reshaped = Shape(1, ifm2Reshaped.Height(), ifm2Reshaped.Depth(), ifm2Reshaped.Width()); auto op = CreateTransposeForMatMul(ifm2->tensor, ifm2Reshaped); - RecordOptimisation(operation, op); + RecordOptimisation(*operation, op); ifm2Tensor = op->Output(TensorUsage::OFM)->tensor; } @@ -1385,7 +1385,7 @@ Operation *TFLiteGraphOptimiser::RewriteBatchMatMul(Graph *const, Operation *con newOp->CopyOutput(TensorUsage::OFM, *ofm); newOp->Output(TensorUsage::OFM)->Set(ofmReshaped).Set(rounding); returnOp = newOp.get(); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } return returnOp; @@ -1410,7 +1410,7 @@ Operation *TFLiteGraphOptimiser::RewriteFullyConnectDynamic(Graph *const, Operat // ifm2Transposed is both a reshape from N,1,1,C to 1,1,N,C and then a transpose to 1,1,C,N auto ifm2Transposed = Shape(1, 1, ifm2->shape.Depth(), ifm2->shape.Batch()); auto transposeOp = CreateTransposeForMatMul(ifm2->tensor, ifm2Transposed); - RecordOptimisation(operation, transposeOp); + RecordOptimisation(*operation, transposeOp); auto ifm2Tensor = transposeOp->Output(TensorUsage::OFM)->tensor; auto matMulOp = std::make_shared(OpType::MatMul); @@ -1420,7 +1420,7 @@ Operation *TFLiteGraphOptimiser::RewriteFullyConnectDynamic(Graph *const, Operat matMulOp->ConnectInput(TensorUsage::IFM1, ifm2Tensor).Set(ifm2Transposed).Set(ifm2->quantization).Set(ifm2->slice); matMulOp->ConnectOutput(TensorUsage::OFM, ofm->tensor).Set(ofmShape).Set(ofm->quantization).Set(ofm->slice).Set(rounding); - RecordOptimisation(operation, matMulOp.get()); + RecordOptimisation(*operation, matMulOp.get()); returnOp = matMulOp.get(); operation->Disconnect(); @@ -1487,7 +1487,7 @@ Operation *TFLiteGraphOptimiser::RewriteSquaredDifference(Graph *const, Operatio mulOp->Output(TensorUsage::OFM)->quantization.scales.push_back(QuantizedScale(1, quantizedRealInput1.shift)); mulOp->Output(TensorUsage::OFM)->quantization.type = QuantizationType::EXPLICIT; auto ifmScaled = mulOp->Output(TensorUsage::OFM); - RecordOptimisation(operation, mulOp); + RecordOptimisation(*operation, mulOp); // Convert ifm2 to 32 bit castOp = CreateCastToInt32(ifm2Conn); @@ -1495,7 +1495,7 @@ Operation *TFLiteGraphOptimiser::RewriteSquaredDifference(Graph *const, Operatio castOp->Output(TensorUsage::OFM)->quantization.scales.clear(); castOp->Output(TensorUsage::OFM)->quantization.scales.push_back(QuantizedScale(1 << leftShift, 0)); castOp->Output(TensorUsage::OFM)->quantization.type = QuantizationType::EXPLICIT; - RecordOptimisation(operation, castOp); + RecordOptimisation(*operation, castOp); // Scale/shift ifm2 (for 32-bit operations, scale is not applied but shift is) mulOp = CreateMul(castOp->Output(TensorUsage::OFM)->tensor, input2MultiplierConst, noScaleQuant, noScaleQuant, noScaleQuant); @@ -1504,19 +1504,19 @@ Operation *TFLiteGraphOptimiser::RewriteSquaredDifference(Graph *const, Operatio mulOp->Output(TensorUsage::OFM)->quantization.scales.push_back(QuantizedScale(1, quantizedRealInput2.shift)); mulOp->Output(TensorUsage::OFM)->quantization.type = QuantizationType::EXPLICIT; auto ifm2Scaled = mulOp->Output(TensorUsage::OFM); - RecordOptimisation(operation, mulOp); + RecordOptimisation(*operation, mulOp); // Calculate the raw diff auto subOp = CreateSub(ifmScaled->tensor, ifm2Scaled->tensor, noScaleQuant, noScaleQuant, noScaleQuant); subOp->Output(TensorUsage::OFM)->Set(RoundMode::DBL); auto rawDiff = subOp->Output(TensorUsage::OFM); - RecordOptimisation(operation, subOp); + RecordOptimisation(*operation, subOp); // Calculate the squared diff mulOp = CreateMul(rawDiff->tensor, rawDiff->tensor, noScaleQuant, noScaleQuant, noScaleQuant); mulOp->Output(TensorUsage::OFM)->Set(RoundMode::DBL); auto squaredRaw = mulOp->Output(TensorUsage::OFM); - RecordOptimisation(operation, mulOp); + RecordOptimisation(*operation, mulOp); // Scale/shift ofm ((for 32-bit operations, scale is not applied but shift is) returnOp = CreateMul(squaredRaw->tensor, outputMultiplierConst, noScaleQuant, noScaleQuant, ofmConn->quantization); @@ -1525,7 +1525,7 @@ Operation *TFLiteGraphOptimiser::RewriteSquaredDifference(Graph *const, Operatio returnOp->Output(TensorUsage::OFM)->quantization.scales.clear(); returnOp->Output(TensorUsage::OFM)->quantization.scales.push_back(QuantizedScale(1, quantizedRealOutput.shift)); returnOp->Output(TensorUsage::OFM)->quantization.type = QuantizationType::EXPLICIT; - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } @@ -1581,7 +1581,7 @@ Operation *TFLiteGraphOptimiser::RewriteSpaceToBatchConvBatchToSpace(Graph *cons if ( _supportedOps->Check(newOp.get()) ) { returnOp = newOp.get(); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); // Disconnect matched pattern prevOp->Disconnect(); nextOp->Disconnect(); @@ -1861,7 +1861,7 @@ Operation *TFLiteGraphOptimiser::ConvertMeanOps(Graph *const, Operation *const o auto op = MakeDepthwiseMeanOp(ifmConn, ifmShape4D, readShape, readOffset, intermediateShape, w, kh, ofmConn->tensor->Name(), weightTensor, biasTensor, oneScaleQuant, weightQuant, oneScaleQuantZp0); - RecordOptimisation(operation, op); + RecordOptimisation(*operation, op); if ( i > 0 ) { @@ -1872,7 +1872,7 @@ Operation *TFLiteGraphOptimiser::ConvertMeanOps(Graph *const, Operation *const o op->Output(TensorUsage::OFM)->quantization.scales.clear(); op->Output(TensorUsage::OFM)->quantization.scales.push_back(QuantizedScale(1, 0)); op->Output(TensorUsage::OFM)->quantization.type = QuantizationType::EXPLICIT; - RecordOptimisation(operation, op); + RecordOptimisation(*operation, op); } accTensor = op->Output(TensorUsage::OFM)->tensor; } @@ -1905,7 +1905,7 @@ Operation *TFLiteGraphOptimiser::ConvertMeanOps(Graph *const, Operation *const o outQuant.scales.push_back({1, outputShift}); outQuant.type = QuantizationType::EXPLICIT; op->ConnectOutput(TensorUsage::OFM, ofmConn->tensor).Set(intermediateShape).Set(outQuant); - RecordOptimisation(operation, op); + RecordOptimisation(*operation, op); operation->Disconnect(); returnOp = op; } @@ -1958,7 +1958,7 @@ Operation *TFLiteGraphOptimiser::ConvertTanhSigmoidToLUT(Graph *const, Operation if ( operation != returnOp ) { - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } @@ -2050,7 +2050,7 @@ Operation *TFLiteGraphOptimiser::ConvertPrelu(Graph *const graph, Operation *con auto *attr = lreluOp->Attribute(); attr->alpha = scaledAlphaMin; returnOp = lreluOp.get(); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); return returnOp; } @@ -2078,7 +2078,7 @@ Operation *TFLiteGraphOptimiser::ConvertPrelu(Graph *const graph, Operation *con .Set(ofmConn->quantization) .Set(ofmConn->slice) .Set(RoundMode::DBL); - RecordOptimisation(operation, mulAlpha.get()); + RecordOptimisation(*operation, mulAlpha.get()); TensorConnection *alphaConn = mulAlpha->Output(TensorUsage::OFM); TensorConnection *identityConn = ifmConn; @@ -2096,7 +2096,7 @@ Operation *TFLiteGraphOptimiser::ConvertPrelu(Graph *const graph, Operation *con oneTens = CreateConstTensor("one_const", int8_t(1)); } auto mulIdentity = MakeMulWithConstTensor("rescaled", *ifmConn, *ofmConn, oneTens, Quantization::Unit()); - RecordOptimisation(operation, mulIdentity); + RecordOptimisation(*operation, mulIdentity); identityConn = mulIdentity->Output(TensorUsage::OFM); } // Create Maximum operation that combines identity and alphaConn @@ -2105,7 +2105,7 @@ Operation *TFLiteGraphOptimiser::ConvertPrelu(Graph *const graph, Operation *con maxOp->CopyInput(TensorUsage::IFM1, *identityConn); maxOp->CopyOutput(TensorUsage::OFM, *ofmConn); maxOp->Output(TensorUsage::OFM)->Set(RoundMode::DBL); - RecordOptimisation(operation, maxOp.get()); + RecordOptimisation(*operation, maxOp.get()); returnOp = maxOp.get(); operation->Disconnect(); return returnOp; @@ -2138,14 +2138,14 @@ Operation *TFLiteGraphOptimiser::ConvertPrelu(Graph *const graph, Operation *con minOp->ConnectInput(TensorUsage::IFM1, zeroTens).Set(noScaleQuant); minOp->ConnectOutput(TensorUsage::OFM, fmNegative).Set(ifmConn->quantization); minOp->Output(TensorUsage::OFM)->Set(RoundMode::DBL); - RecordOptimisation(operation, minOp.get()); + RecordOptimisation(*operation, minOp.get()); // and multiply with alpha tensor auto mulAlpha = std::make_shared(OpType::Mul); mulAlpha->CopyInput(TensorUsage::IFM0, *minOp->Output(TensorUsage::OFM)); mulAlpha->CopyInput(TensorUsage::IFM1, *params); mulAlpha->ConnectOutput(TensorUsage::OFM, fmAlpha).Set(ofmConn->quantization).Set(RoundMode::DBL); - RecordOptimisation(operation, mulAlpha.get()); + RecordOptimisation(*operation, mulAlpha.get()); // Select (and scale) values > 0 auto reluOp = std::make_shared(OpType::Relu); @@ -2153,7 +2153,7 @@ Operation *TFLiteGraphOptimiser::ConvertPrelu(Graph *const graph, Operation *con reluOp->ConnectOutput(TensorUsage::OFM, fmScaled).Set(ofmConn->quantization); reluOp->Output(TensorUsage::OFM)->quantization.quantMin.push_back(ofmConn->quantization.zeroPoints[0]); reluOp->Output(TensorUsage::OFM)->Set(RoundMode::DBL); - RecordOptimisation(operation, reluOp.get()); + RecordOptimisation(*operation, reluOp.get()); // Add scaled and alpha multiplied values auto addOp = std::make_shared(OpType::Add); @@ -2161,7 +2161,7 @@ Operation *TFLiteGraphOptimiser::ConvertPrelu(Graph *const graph, Operation *con addOp->ConnectInput(TensorUsage::IFM1, fmScaled).Set(unitQuantOfmZp); addOp->CopyOutput(TensorUsage::OFM, *ofmConn); addOp->Output(TensorUsage::OFM)->Set(unitQuantOfmZp).Set(RoundMode::DBL); - RecordOptimisation(operation, addOp.get()); + RecordOptimisation(*operation, addOp.get()); returnOp = addOp.get(); operation->Disconnect(); } @@ -2205,14 +2205,14 @@ Operation *TFLiteGraphOptimiser::ConvertLeakyRelu(Graph *const graph, Operation // alpha == 0 can be converted to ReLU auto reluOp = MakeOperation(OpType::Relu, ifmConn, nullptr, ofmConn); reluOp->Output(TensorUsage::OFM)->quantization.quantMin.push_back(ofmConn->quantization.zeroPoints[0]); - RecordOptimisation(operation, reluOp); + RecordOptimisation(*operation, reluOp); returnOp = reluOp; } else if ( alpha == -1 ) { // alpha == -1 can be converted to Abs auto absOp = MakeOperation(OpType::Abs, ifmConn, nullptr, ofmConn); - RecordOptimisation(operation, absOp); + RecordOptimisation(*operation, absOp); returnOp = absOp; } else if ( (ifm->Type() == DataType::Int8 || ifm->Type() == DataType::UInt8) ) @@ -2220,7 +2220,7 @@ Operation *TFLiteGraphOptimiser::ConvertLeakyRelu(Graph *const graph, Operation // convert to 8-bit LUT assert(ifm->Type() == ofm->Type()); returnOp = Convert8bitLeakyReluToLUT(graph, operation, alpha); - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); } else if ( alpha < 0 || isConvertedPrelu || !_constraints->SupportsElementwiseLeakyRelu(!IsScalingValidAndEqual(*ifmConn, *ofmConn), ifm->Type()) ) @@ -2425,7 +2425,7 @@ Operation *TFLiteGraphOptimiser::ConvertRSqrtToLUT(Graph *const graph, Operation if ( operation != returnOp ) { - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } @@ -2471,7 +2471,7 @@ Operation *TFLiteGraphOptimiser::ConvertPadV2(Graph *const graph, Operation *con int zeroPoint = ofmConn->quantization.IsValid() ? static_cast(ofmConn->quantization.zeroPoints[0]) : 0; attr->pad_const = padConstTens->View().Values(padConstTens->Type())[0] - zeroPoint; - RecordOptimisation(operation, padOp.get()); + RecordOptimisation(*operation, padOp.get()); operation->Disconnect(); return padOp.get(); } @@ -2492,7 +2492,7 @@ void TFLiteGraphOptimiser::MakeMemoryCopyForMirrorPad(const Operation *operation .Set(RoundMode::NATURAL) .Set(reverseAxis); - RecordOptimisation(operation, op.get()); + RecordOptimisation(*operation, op.get()); } Operation *TFLiteGraphOptimiser::ConvertMirrorPad(Graph *const graph, Operation *const operation) @@ -2522,7 +2522,7 @@ Operation *TFLiteGraphOptimiser::ConvertMirrorPad(Graph *const graph, Operation // Create MemoryCopy op that copies IFM to the right place inside the OFM Shape zeroShape = ofmShape.WithZeros(); auto mainOp = MakeMemoryCopyForConcat(ofmConn, ifmConn, zeroShape.WithHeight(top).WithWidth(left)); - RecordOptimisation(operation, mainOp.get()); + RecordOptimisation(*operation, mainOp.get()); // Add operations that fill the borders of the OFM if ( top > 0 ) @@ -2673,7 +2673,7 @@ Operation *TFLiteGraphOptimiser::LegalizeAsymmetricQuantization(Graph *const gra ifmConn->quantization = unitQuant; weightConn->quantization = unitQuant; - RecordOptimisation(operation, zpCorrectOp.get()); + RecordOptimisation(*operation, zpCorrectOp.get()); returnOp = zpCorrectOp.get(); } } @@ -2708,7 +2708,7 @@ Operation *TFLiteGraphOptimiser::LegalizeAsymmetricQuantization(Graph *const gra operation->ConnectOutput(TensorUsage::OFM, intermediateTensor).Set(ofmConn->shape).Set(ofmQuantNoZP); - RecordOptimisation(operation, zpCorrectOp.get()); + RecordOptimisation(*operation, zpCorrectOp.get()); returnOp = zpCorrectOp.get(); } @@ -2935,10 +2935,10 @@ Operation *TFLiteGraphOptimiser::ConvertConvolutionGroup(Graph *const graph, Ope .Set(ofmSlice) .Set(convGroupOp->Output(TensorUsage::OFM)->quantization); - RecordOptimisation(operation, convGroupOp.get()); + RecordOptimisation(*operation, convGroupOp.get()); } - RecordOptimisation(operation, concatOp.get()); + RecordOptimisation(*operation, concatOp.get()); operation->Disconnect(); return concatOp.get(); } diff --git a/ethosu/regor/compiler/tflite_graph_optimiser_tp.cpp b/ethosu/regor/compiler/tflite_graph_optimiser_tp.cpp index fe7676b6..cc08184f 100644 --- a/ethosu/regor/compiler/tflite_graph_optimiser_tp.cpp +++ b/ethosu/regor/compiler/tflite_graph_optimiser_tp.cpp @@ -142,7 +142,7 @@ Operation *TFLiteGraphOptimiser::ConvertHardSwishToLUT(Graph *const graph, Opera if ( operation != returnOp ) { - RecordOptimisation(operation, returnOp); + RecordOptimisation(*operation, returnOp); operation->Disconnect(); } diff --git a/ethosu/regor/include/graphapi.hpp b/ethosu/regor/include/graphapi.hpp index e25b1ca2..ff215ea0 100644 --- a/ethosu/regor/include/graphapi.hpp +++ b/ethosu/regor/include/graphapi.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2022-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -208,6 +208,7 @@ struct IGraphBuilder virtual void SetZeroPoint(GraphOperation *op, GraphTensorUsage tensor, double zeroPoint) = 0; virtual void SetAxisOrder(GraphTensor *tensor, AxisOrder order) = 0; virtual void SetAxisStrides(GraphTensor *tensor, const GraphShape *axisStrides) = 0; + virtual void SetExternalId(GraphOperation *op, int extId) = 0; }; } // namespace GraphApi diff --git a/ethosu/regor/tflite/tflite_reader.cpp b/ethosu/regor/tflite/tflite_reader.cpp index d0e17bcf..7778eb80 100644 --- a/ethosu/regor/tflite/tflite_reader.cpp +++ b/ethosu/regor/tflite/tflite_reader.cpp @@ -895,7 +895,7 @@ void TfLiteReader::UnFuseActivation(const std::shared_ptr &operation, activation->ConnectInput(TensorUsage::IFM, intermediate_tensor).Set(quantization); if ( optDb ) { - optDb->AddOptimised(operation.get(), activation.get()); + optDb->AddOptimised(*operation, activation.get()); } } diff --git a/ethosu/regor/tosa/tosa_reader.cpp b/ethosu/regor/tosa/tosa_reader.cpp index 902dde18..1f79b4a7 100644 --- a/ethosu/regor/tosa/tosa_reader.cpp +++ b/ethosu/regor/tosa/tosa_reader.cpp @@ -529,6 +529,7 @@ void TosaReader::LoadGraphs(const tosaFb::TosaGraph *model, std::listCreateOp(TosaMapping::FBOpToOp(tosa_operator.op()), kernelPtr); + builder->SetExternalId(op, tosa_op_index); builder_assert(op, "Failed to create operation"); // Fix op Attributes -- GitLab