diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp index 0d6b3f8b9b6b36642cc4de3143af41fdfbab8d02..840d7dd34f9dc3c243c3b47e1a1fa2b0839e28e8 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp @@ -1724,8 +1724,8 @@ void EthosU55RCSGenerator::PrepareCommand(int index, HighLevelCommand *cmd, Temp } -std::vector EthosU55RCSGenerator::GenerateCommandStream(std::vector> &highLevelCommandStream, - std::vector> *cmdRanges, bool verbose) +std::vector EthosU55RCSGenerator::GenerateCommandStream( + std::vector> &highLevelCommandStream, CmdRanges *cmdRanges, bool verbose) { _emit.Clear(); _stripeToLutSlot.clear(); @@ -1784,7 +1784,7 @@ std::vector EthosU55RCSGenerator::GenerateCommandStream(std::vectorIsStripe() ) { - cmdRanges->emplace_back(static_cast(cmd.get())->operation->_srcKey, emitStart, _emit.Position()); + cmdRanges->emplace_back(static_cast(cmd.get())->operation->_srcId, emitStart, _emit.Position()); } cmdIndex++; } diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp index 8c01bff9805cf6aee46f761ec05b84142876d91e..2e1fdebd8e5dfa76bda5f8cf9047e33b5a91c6f3 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -272,7 +272,7 @@ protected: public: std::vector GenerateCommandStream(std::vector> &highLevelCommandStream, - std::vector> *cmdRanges, bool verbose) override; + CmdRanges *cmdRanges, bool verbose) override; static uint32_t IdRegister(); static bool IsSupportedElementwise(const OpType opType); diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp index 72dacd2e825eb56ef9b7c31ce34c4e1e25dfe5e4..2fc0e2201cbeddb9d2b9f76f4ed2c3b953279534 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2023-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -1973,7 +1973,7 @@ std::shared_ptr EthosU85RCSGenerator::MakeStripeForSubOp(HLCStripe *s op->type = subOp.type; op->ifm = subOp.ifm; op->ofm = subOp.ofm; - op->_srcKey = subOp._srcKey; + op->_srcId = subOp._srcId; if ( IsLUTType(subOp.type) ) { op->parameters.lut = subOp.parameters.lut; @@ -2003,8 +2003,7 @@ std::shared_ptr EthosU85RCSGenerator::MakeStripeForSubOp(HLCStripe *s } bool EthosU85RCSGenerator::GenerateOpGroup(HLCStripe *stripe, HLCStripe *prevOp, MemoryAccesses &memoryAccesses, - std::deque &outstandingDmaAccesses, std::vector> &debugInfo, - std::vector> *cmdRanges) + std::deque &outstandingDmaAccesses, std::vector> &debugInfo, CmdRanges *cmdRanges) { assert(stripe->opGroup != nullptr); EthosU85OpGroup *opGroup = static_cast(stripe->opGroup); @@ -2066,7 +2065,7 @@ bool EthosU85RCSGenerator::GenerateOpGroup(HLCStripe *stripe, HLCStripe *prevOp, // Return command mapping information to the caller if ( cmdRanges ) { - cmdRanges->emplace_back(stripe->operation->_srcKey, emitStart, _emit.Position()); + cmdRanges->emplace_back(stripe->operation->_srcId, emitStart, _emit.Position()); } if ( isChained ) @@ -2177,8 +2176,8 @@ void EthosU85RCSGenerator::GenerateDMA(const HLCDMA *dma, MemoryAccesses &memory } } -std::vector EthosU85RCSGenerator::GenerateCommandStream(std::vector> &highLevelCommandStream, - std::vector> *cmdRanges, bool verbose) +std::vector EthosU85RCSGenerator::GenerateCommandStream( + std::vector> &highLevelCommandStream, CmdRanges *cmdRanges, bool verbose) { _emit.Clear(); _stripeToLutSlot.clear(); diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp index 96a403f095b5c63d92ddf8471a1981c1050d2c72..3811d2da024622e7c3ce553b182252f9d9934f4e 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -231,7 +231,7 @@ protected: bool GenerateStripe(HLCStripe *stripe, MemoryAccesses &memoryAccesses); std::shared_ptr MakeStripeForSubOp(HLCStripe *stripe, HLCSubOperation &subOp); bool GenerateOpGroup(HLCStripe *stripe, HLCStripe *prevOp, MemoryAccesses &memoryAccesses, std::deque &outstandingDmaAccesses, - std::vector> &debugInfo, std::vector> *cmdRanges); + std::vector> &debugInfo, CmdRanges *cmdRanges); // Generates register commands for DMA operations void GenerateDMA(const HLCDMA *dma, MemoryAccesses &memoryAccesses); @@ -242,7 +242,7 @@ protected: public: std::vector GenerateCommandStream(std::vector> &highLevelCommandStream, - std::vector> *cmdRanges, bool verbose) override; + CmdRanges *cmdRanges, bool verbose) override; static uint32_t ConfigRegister(int macs, int cmdStreamVersion, int numAxiSram, int numAxiExt, int numWd, int product); static bool IsSupportedElementwise(const OpType opType); static uint32_t IdRegister(); diff --git a/ethosu/regor/architecture/register_command_stream_generator.hpp b/ethosu/regor/architecture/register_command_stream_generator.hpp index 5f4dc1d505039a54fd5462a07380fa4658358d49..98022d3b5baaf9e95fe43f133bc6d55f5d7e14ef 100644 --- a/ethosu/regor/architecture/register_command_stream_generator.hpp +++ b/ethosu/regor/architecture/register_command_stream_generator.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -29,12 +29,14 @@ namespace regor { +using CmdRanges = std::vector>; + class IRegisterCommandStreamGenerator { public: virtual ~IRegisterCommandStreamGenerator() = default; - virtual std::vector GenerateCommandStream(std::vector> &highLevelCommandStream, - std::vector> *genRanges, bool verbose) = 0; + virtual std::vector GenerateCommandStream( + std::vector> &highLevelCommandStream, CmdRanges *cmdRanges, bool verbose) = 0; virtual void PrintCommandStream(const std::vector &stream, std::vector> &debugInfo) = 0; }; diff --git a/ethosu/regor/common/ordered_map.hpp b/ethosu/regor/common/ordered_map.hpp index 35c1703d5c4314ff3f9d9f0b1571f8389d6417cd..d1dea42dfbf72a589d769866cb2b34b13534490c 100644 --- a/ethosu/regor/common/ordered_map.hpp +++ b/ethosu/regor/common/ordered_map.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021, 2023-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021, 2023-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -96,18 +96,17 @@ protected: // - The traversal order list struct Node { - typename std::aligned_storage::type value; // Untyped value storage (place - // first) - KEY key; // Map key - INDEXER order_next = NODE_UNLINKED; // order forwards traversal - INDEXER order_prev = NODE_UNLINKED; // order backwards traversal + alignas(alignof(VALUE)) uint8_t value[sizeof(VALUE)]; // Untyped value storage (place first) + KEY key; // Map key + INDEXER order_next = NODE_UNLINKED; // order forwards traversal + INDEXER order_prev = NODE_UNLINKED; // order backwards traversal INDEXER hash_next = HASH_FREE; // Same hash collision relocation (-2=free, -1=used/end, otherwise=next bucket) Node() = default; - VALUE &Value() { return *reinterpret_cast(&value); } + VALUE &Value() { return *reinterpret_cast(reinterpret_cast(&value)); } - const VALUE &Value() const { return *reinterpret_cast(&value); } + const VALUE &Value() const { return *reinterpret_cast(reinterpret_cast(&value)); } void copy_links(Node &other) { diff --git a/ethosu/regor/compiler/compiler.cpp b/ethosu/regor/compiler/compiler.cpp index e81547df4ae96aeac3eacfcb4fafd9b5c1907576..4ef542327d1c7436ef85db0118ed4e0255659026 100644 --- a/ethosu/regor/compiler/compiler.cpp +++ b/ethosu/regor/compiler/compiler.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -349,18 +349,35 @@ bool Compiler::BuildNetwork(const char *entryGraph) return true; } -void Compiler::RecordSubOps(const std::vector> &scheduleOps) +void Compiler::RecordNPUOp(const NPUOperation &npuOp, const CmdRanges &cmdRanges) { - if ( _optDb ) + assert(_optDb); + const auto &scheduleOps = npuOp.Operations(); + ordered_map opMap(scheduleOps.size()); + + // Record scheduler operations + for ( const auto &scheduleOp : scheduleOps ) { - for ( auto &scheduleOp : scheduleOps ) + opMap.emplace(scheduleOp->Uid(), scheduleOp.get()); + + // Add subOps to DB + std::vector subOpKeys; + for ( auto &subOp : scheduleOp->SubOps() ) { - std::vector subOpKeys; - for ( auto &subOp : scheduleOp->SubOps() ) - { - subOpKeys.push_back(subOp->_srcKey); - } - _optDb->AddSubOps(scheduleOp->_srcKey, subOpKeys); + subOpKeys.push_back(subOp->_srcKey); + } + _optDb->AddSubOps(scheduleOp->_srcKey, subOpKeys); + } + + // Record command stream op ranges for this NPU op + int streamId = _optDb->AddStream(); + for ( auto const &cmd : cmdRanges ) + { + SchedulerOperation *scheduleOp; + if ( opMap.try_get(std::get<0>(cmd), scheduleOp) ) + { + assert(scheduleOp); + _optDb->AddCommand(scheduleOp->_srcKey, streamId, std::get<2>(cmd) - 1); } } } @@ -415,9 +432,6 @@ std::unique_ptr Compiler::CompileGraph(std::unique_ptr &graph, SchedulerPacking packing(_architecture.get(), _schedulerOptions.disabled.All(SchedulerFeature::Grouping)); auto scheduleOps = packing.Process(graph.get()); - // Add subOps to debugDB - RecordSubOps(scheduleOps); - // Schedule the linearised operation sequence Scheduler scheduler(_architecture.get(), _schedulerOptions, "graph", scheduleOps); std::shared_ptr schedule; @@ -467,7 +481,7 @@ std::unique_ptr Compiler::CompileGraph(std::unique_ptr &graph, auto highLevelCommandStream = hlcsGenerator.GenerateCommandStream(npuOp, schedule.get(), _compilerOptions.verboseHighLevelCommandStream); // Generate LLCS for output - std::vector> cmdRanges; + CmdRanges cmdRanges; auto registerCommandStream = _architecture->RegisterCommandStreamGenerator()->GenerateCommandStream( highLevelCommandStream, &cmdRanges, _compilerOptions.verboseRegisterCommandStream); @@ -479,12 +493,9 @@ std::unique_ptr Compiler::CompileGraph(std::unique_ptr &graph, if ( _optDb ) { - int streamId = _optDb->AddStream(); - for ( auto const &cmd : cmdRanges ) - { - _optDb->AddCommand(std::get<0>(cmd), streamId, std::get<2>(cmd) - 1); - } + RecordNPUOp(*npuOp, cmdRanges); } + try { customOperatorBuilder.Serialise(graphOp, npuOp, registerCommandStream); diff --git a/ethosu/regor/compiler/compiler.hpp b/ethosu/regor/compiler/compiler.hpp index 260cd3c637e2a1e7cf2328aa7bb5633cb57549da..ddf91b117065265a9bc857338ae9514493cad89a 100644 --- a/ethosu/regor/compiler/compiler.hpp +++ b/ethosu/regor/compiler/compiler.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -21,6 +21,7 @@ #include "common/common.hpp" #include "architecture/architecture.hpp" +#include "architecture/register_command_stream_generator.hpp" #include "database.hpp" #include "graph.hpp" #include "graph_builder.hpp" @@ -124,7 +125,7 @@ public: private: bool BuildNetwork(const char *entryGraph); - void RecordSubOps(const std::vector> &scheduleOps); + void RecordNPUOp(const NPUOperation &npuOp, const CmdRanges &cmdRanges); std::unique_ptr CompileGraph(std::unique_ptr &graph, IncrementalLinearAllocator &readOnlyAllocator, std::unordered_map &tensorAddressMap); diff --git a/ethosu/regor/compiler/high_level_command_stream.hpp b/ethosu/regor/compiler/high_level_command_stream.hpp index f652278855d8d46bf0dc9b643b21a11df58a00f1..9f248defe6764543a1d4fcda6eedc944b9a293b4 100644 --- a/ethosu/regor/compiler/high_level_command_stream.hpp +++ b/ethosu/regor/compiler/high_level_command_stream.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -158,7 +158,7 @@ struct HLCSubOperation std::vector ifm; HLCFeatureMap ofm; HLCParameters parameters = {}; - void *_srcKey = nullptr; + UniqueId _srcId = 0; }; /// diff --git a/ethosu/regor/compiler/high_level_command_stream_generator.cpp b/ethosu/regor/compiler/high_level_command_stream_generator.cpp index 4e5100d995eaa8cf01bdff7f1c5a46e1c2ac4ff8..904f6a7ecd53e51bff98fb581d80530af847f332 100644 --- a/ethosu/regor/compiler/high_level_command_stream_generator.cpp +++ b/ethosu/regor/compiler/high_level_command_stream_generator.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -291,7 +291,7 @@ static HLCSubOperation MakeSubOperation(const std::unique_ptrOFM(), hlcSubOp.ofm); - hlcSubOp._srcKey = schedOp->_srcKey; + hlcSubOp._srcId = schedOp->Uid(); if ( schedOp->Type() == OpType::LeakyRelu ) { @@ -317,7 +317,7 @@ static std::shared_ptr MakeOperation(SchedulerOperation *schedOp, op->type = schedOp->Type(); op->kernel = *schedOp->Kernel(); op->config = opInfo->Config(); - op->_srcKey = schedOp->_srcKey; + op->_srcId = schedOp->Uid(); for ( int i = 0; i < MAX_NUM_IFM; ++i ) {