From a2b644770207d69b2d475aee50ea30002657277a Mon Sep 17 00:00:00 2001 From: Johan Gunnarsson Date: Tue, 25 Mar 2025 16:36:58 +0100 Subject: [PATCH] MLBEDSW-10589: Fix the "NPU only" tensor condition in graph packing An "NPU only" tensor means a tensor that has only NPU producers and only NPU consumers. The logic was wrong as it allowed CPU consumers, this lead to some CPU tensors being added as placeholders and missing from the resulting graph. Also changed the supported ops unit test to silence log output. Signed-off-by: Johan Gunnarsson Change-Id: I511d7bb2419c17dc4230eb791a0a84cd3ce7dedb --- ethosu/regor/compiler/graph_packing.cpp | 8 +- ethosu/regor/test/test_graph_packing.cpp | 79 +++++++++++++++++-- .../test/test_tflite_supported_operators.cpp | 7 ++ ethosu/regor/test/util.hpp | 13 +++ 4 files changed, 98 insertions(+), 9 deletions(-) diff --git a/ethosu/regor/compiler/graph_packing.cpp b/ethosu/regor/compiler/graph_packing.cpp index 5503a336..93b19775 100644 --- a/ethosu/regor/compiler/graph_packing.cpp +++ b/ethosu/regor/compiler/graph_packing.cpp @@ -214,9 +214,9 @@ void GraphPacking::ConnectTensors(Operation *op, const std::unique_ptrsrcTensor; if ( oldTensor ) { - const bool isConsumedByNPU = std::any_of(schedTensor->consumers.begin(), schedTensor->consumers.end(), isNpuOp); + const bool isConsumedByNPUOnly = std::all_of(schedTensor->consumers.begin(), schedTensor->consumers.end(), isNpuOp); const bool isProducedByNPUOnly = std::all_of(schedTensor->producers.begin(), schedTensor->producers.end(), isNpuOp); - if ( isConsumedByNPU && isProducedByNPUOnly && !schedTensor->isGraphInput && !schedTensor->isPersistent ) + if ( isConsumedByNPUOnly && isProducedByNPUOnly && !schedTensor->isGraphInput && !schedTensor->isPersistent ) { // Remember NPU only tensors so we can add them as placeholder later npuOnly.insert(oldTensor.get()); @@ -247,9 +247,9 @@ void GraphPacking::ConnectTensors(Operation *op, const std::unique_ptrsrcTensor; if ( oldTensor ) { - const bool isProducedByNPU = std::any_of(schedTensor->producers.begin(), schedTensor->producers.end(), isNpuOp); + const bool isProducedByNPUOnly = std::all_of(schedTensor->producers.begin(), schedTensor->producers.end(), isNpuOp); const bool isConsumedByNPUOnly = std::all_of(schedTensor->consumers.begin(), schedTensor->consumers.end(), isNpuOp); - if ( isProducedByNPU && isConsumedByNPUOnly && !schedTensor->isGraphOutput && !schedTensor->isPersistent ) + if ( isProducedByNPUOnly && isConsumedByNPUOnly && !schedTensor->isGraphOutput && !schedTensor->isPersistent ) { // Remember NPU only tensors so we can add them as placeholder later npuOnly.insert(oldTensor.get()); diff --git a/ethosu/regor/test/test_graph_packing.cpp b/ethosu/regor/test/test_graph_packing.cpp index 463e4141..7ff3b8dc 100644 --- a/ethosu/regor/test/test_graph_packing.cpp +++ b/ethosu/regor/test/test_graph_packing.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -85,6 +85,8 @@ TEST_CASE("test_graph_packing") REQUIRE(newGraph->ScheduledOrder().size() == 1); REQUIRE(newGraph->Inputs().size() == 1); REQUIRE(newGraph->Outputs().size() == 1); + REQUIRE(newGraph->Persistent().size() == 0); + REQUIRE(newGraph->Placeholder().size() == 0); // Check packing REQUIRE(npuOps[0].second->Operations().size() == 3); @@ -129,6 +131,7 @@ TEST_CASE("test_graph_packing") REQUIRE(newGraph->Inputs().size() == 1); REQUIRE(newGraph->Outputs().size() == 1); REQUIRE(newGraph->Persistent().size() == 1); + REQUIRE(newGraph->Placeholder().size() == 0); // Check packing REQUIRE(npuOps[0].second->Operations().size() == 3); @@ -175,6 +178,8 @@ TEST_CASE("test_graph_packing") REQUIRE(newGraph->ScheduledOrder().size() == 3); REQUIRE(newGraph->Inputs().size() == 2); REQUIRE(newGraph->Outputs().size() == 1); + REQUIRE(newGraph->Persistent().size() == 0); + REQUIRE(newGraph->Placeholder().size() == 0); // Check new graph operations REQUIRE(newGraph->ScheduledOrder()[0]->Type() == OpType::AvgPool); @@ -231,6 +236,7 @@ TEST_CASE("test_graph_packing") REQUIRE(newGraph->Inputs().size() == 2); REQUIRE(newGraph->Outputs().size() == 1); REQUIRE(newGraph->Persistent().size() == 1); + REQUIRE(newGraph->Placeholder().size() == 0); // Check new graph operations REQUIRE(newGraph->ScheduledOrder()[0]->Type() == OpType::AvgPool); @@ -259,7 +265,7 @@ TEST_CASE("test_graph_packing") // Check new graph I/O REQUIRE(newGraph->IsInput(newGraph->ScheduledOrder()[0]->IFM(0))); - REQUIRE(newGraph->IsPersistent(newGraph->ScheduledOrder()[1]->Input(TensorUsage::Params)->tensor.get())); + REQUIRE(newGraph->IsPersistent(newGraph->ScheduledOrder()[1]->Input(TensorUsage::Params)->tensor.get())); // var1 REQUIRE(newGraph->IsOutput(newGraph->ScheduledOrder()[2]->OFM())); } @@ -284,6 +290,8 @@ TEST_CASE("test_graph_packing") REQUIRE(newGraph->ScheduledOrder().size() == 2); REQUIRE(newGraph->Inputs().size() == 1); REQUIRE(newGraph->Outputs().size() == 1); + REQUIRE(newGraph->Persistent().size() == 0); + REQUIRE(newGraph->Placeholder().size() == 0); // Check new graph I/O REQUIRE(newGraph->IsInput(newGraph->ScheduledOrder()[0]->IFM(0))); @@ -331,12 +339,12 @@ TEST_CASE("test_graph_packing") REQUIRE(newGraph->Inputs().size() == 1); REQUIRE(newGraph->Outputs().size() == 1); REQUIRE(newGraph->Persistent().size() == 1); - + REQUIRE(newGraph->Placeholder().size() == 0); // Check new graph I/O REQUIRE(newGraph->IsInput(newGraph->ScheduledOrder()[0]->IFM(0))); REQUIRE(newGraph->IsOutput(newGraph->ScheduledOrder()[1]->OFM())); - REQUIRE(newGraph->IsPersistent(newGraph->ScheduledOrder()[1]->IFM(1))); + REQUIRE(newGraph->IsPersistent(newGraph->ScheduledOrder()[1]->IFM(1))); // var1 // Check new graph operations REQUIRE(newGraph->ScheduledOrder()[0]->Type() == OpType::CustomNpuOp); @@ -380,11 +388,14 @@ TEST_CASE("test_graph_packing") REQUIRE(newGraph->ScheduledOrder().size() == 3); REQUIRE(newGraph->Inputs().size() == 2); REQUIRE(newGraph->Outputs().size() == 1); + REQUIRE(newGraph->Persistent().size() == 0); + REQUIRE(newGraph->Placeholder().size() == 1); // Check new graph I/O REQUIRE(newGraph->IsInput(newGraph->ScheduledOrder()[0]->IFM(0))); REQUIRE(newGraph->IsInput(newGraph->ScheduledOrder()[1]->IFM(0))); REQUIRE(newGraph->IsOutput(newGraph->ScheduledOrder()[2]->OFM())); + REQUIRE(newGraph->IsPlaceholder(newGraph->ScheduledOrder()[0]->OFM())); // tens2 // Check new graph operations REQUIRE(newGraph->ScheduledOrder()[0]->Type() == OpType::CustomNpuOp); @@ -438,12 +449,14 @@ TEST_CASE("test_graph_packing") REQUIRE(newGraph->Inputs().size() == 2); REQUIRE(newGraph->Outputs().size() == 1); REQUIRE(newGraph->Persistent().size() == 1); + REQUIRE(newGraph->Placeholder().size() == 1); // Check new graph I/O REQUIRE(newGraph->IsInput(newGraph->ScheduledOrder()[0]->IFM(0))); REQUIRE(newGraph->IsInput(newGraph->ScheduledOrder()[1]->IFM(0))); REQUIRE(newGraph->IsOutput(newGraph->ScheduledOrder()[2]->OFM())); - REQUIRE(newGraph->IsPersistent(newGraph->ScheduledOrder()[0]->Input(TensorUsage::IFM1)->tensor.get())); + REQUIRE(newGraph->IsPersistent(newGraph->ScheduledOrder()[0]->IFM(1))); // var1 + REQUIRE(newGraph->IsPlaceholder(newGraph->ScheduledOrder()[0]->OFM())); // tens2 // Check new graph operations REQUIRE(newGraph->ScheduledOrder()[0]->Type() == OpType::CustomNpuOp); @@ -472,4 +485,60 @@ TEST_CASE("test_graph_packing") REQUIRE(newGraph->ScheduledOrder()[1]->OFM() == newGraph->ScheduledOrder()[2]->IFM(1)); REQUIRE(newGraph->ScheduledOrder()[2]->OFM() == newGraph->Outputs()[0].get()); } + + SECTION("Mixed NPU/CPU consumers") + { + std::vector> ops; + ops.push_back(CreateSchedulerOperation(true, TensorUsage::IFM, tens1, TensorUsage::OFM, tens2)); + ops.push_back(CreateSchedulerOperation(false, TensorUsage::IFM, tens2, TensorUsage::OFM, tens4)); + ops.push_back(CreateSchedulerOperation(true, TensorUsage::IFM, tens2, TensorUsage::OFM, tens3)); + + auto oldGraph = std::make_unique(GraphNotation::TFLite); + oldGraph->AddInput(tens1->srcTensor); + tens1->isGraphInput = true; + oldGraph->AddOutput(tens3->srcTensor); + tens3->isGraphOutput = true; + oldGraph->AddOutput(tens4->srcTensor); + tens4->isGraphOutput = true; + + std::vector>> npuOps; + auto newGraph = PackScheduleToGraph(npuOps, ops, tensorAddressMap, oldGraph.get()); + + REQUIRE(npuOps.size() == 2); + REQUIRE(ops.size() == 0); + REQUIRE(newGraph->ScheduledOrder().size() == 3); + REQUIRE(newGraph->Inputs().size() == 1); + REQUIRE(newGraph->Outputs().size() == 2); + REQUIRE(newGraph->Persistent().size() == 0); + REQUIRE(newGraph->Placeholder().size() == 0); + + // Check new graph I/O + REQUIRE(newGraph->IsInput(newGraph->ScheduledOrder()[0]->IFM(0))); + REQUIRE(newGraph->IsOutput(newGraph->ScheduledOrder()[1]->OFM())); + REQUIRE(newGraph->IsOutput(newGraph->ScheduledOrder()[2]->OFM())); + + // Check new graph operations + REQUIRE(newGraph->ScheduledOrder()[0]->Type() == OpType::CustomNpuOp); + REQUIRE(newGraph->ScheduledOrder()[0]->Inputs().size() == 1); + REQUIRE(newGraph->ScheduledOrder()[0]->Inputs().contains(TensorUsage::IFM)); + REQUIRE(newGraph->ScheduledOrder()[0]->Outputs().size() == 1); + REQUIRE(newGraph->ScheduledOrder()[0]->Outputs().contains(TensorUsage::OFM)); + REQUIRE(newGraph->ScheduledOrder()[1]->Type() == OpType::AvgPool); + REQUIRE(newGraph->ScheduledOrder()[1]->Inputs().size() == 1); + REQUIRE(newGraph->ScheduledOrder()[1]->Inputs().contains(TensorUsage::IFM)); + REQUIRE(newGraph->ScheduledOrder()[1]->Outputs().size() == 1); + REQUIRE(newGraph->ScheduledOrder()[1]->Outputs().contains(TensorUsage::OFM)); + REQUIRE(newGraph->ScheduledOrder()[2]->Type() == OpType::CustomNpuOp); + REQUIRE(newGraph->ScheduledOrder()[2]->Inputs().size() == 1); + REQUIRE(newGraph->ScheduledOrder()[2]->Inputs().contains(TensorUsage::IFM)); + REQUIRE(newGraph->ScheduledOrder()[2]->Outputs().size() == 1); + REQUIRE(newGraph->ScheduledOrder()[2]->Outputs().contains(TensorUsage::OFM)); + + // Check new graph connections + REQUIRE(newGraph->ScheduledOrder()[0]->IFM(0) == newGraph->Inputs()[0].get()); + REQUIRE(newGraph->ScheduledOrder()[0]->OFM() == newGraph->ScheduledOrder()[1]->IFM(0)); + REQUIRE(newGraph->ScheduledOrder()[0]->OFM() == newGraph->ScheduledOrder()[2]->IFM(0)); + REQUIRE(newGraph->ScheduledOrder()[1]->OFM() == newGraph->Outputs()[1].get()); + REQUIRE(newGraph->ScheduledOrder()[2]->OFM() == newGraph->Outputs()[0].get()); + } } diff --git a/ethosu/regor/test/test_tflite_supported_operators.cpp b/ethosu/regor/test/test_tflite_supported_operators.cpp index 66a0a5d7..2970e346 100644 --- a/ethosu/regor/test/test_tflite_supported_operators.cpp +++ b/ethosu/regor/test/test_tflite_supported_operators.cpp @@ -17,6 +17,7 @@ // #include "common/common.hpp" +#include "common/logging.hpp" #include "architecture/ethosu55/ethos_u55.hpp" #include "architecture/ethosu65/ethos_u65.hpp" @@ -76,6 +77,8 @@ std::shared_ptr CreateOperation(OpType opType, Shape ifmShape, DataTy TEST_CASE("Supported operators Common") { + DisableLogging disableLogging; + std::shared_ptr arch = CreateArchDefault(256); std::string err = "noerror"; arch->CheckConfiguration(err); @@ -450,6 +453,8 @@ TEST_CASE("Supported operators Common") TEST_CASE("Supported operators EthosU55") { + DisableLogging disableLogging; + std::shared_ptr arch = CreateArchDefault(256); std::string err = "noerror"; arch->CheckConfiguration(err); @@ -564,6 +569,8 @@ TEST_CASE("Supported operators EthosU55") TEST_CASE("Supported operators EthosU85") { + DisableLogging disableLogging; + std::shared_ptr arch = CreateArchDefault(256); std::string err = "noerror"; arch->CheckConfiguration(err); diff --git a/ethosu/regor/test/util.hpp b/ethosu/regor/test/util.hpp index 8e605b36..247329af 100644 --- a/ethosu/regor/test/util.hpp +++ b/ethosu/regor/test/util.hpp @@ -18,6 +18,8 @@ #pragma once +#include "common/logging.hpp" + #include "architecture/architecture.hpp" #include "compiler/graph.hpp" #include "compiler/scheduler.hpp" @@ -25,6 +27,17 @@ using namespace regor; +// Helpers for common +// ----------------------------- + +// Disable logging until scope exit +struct DisableLogging +{ + unsigned filterMask; + DisableLogging() : filterMask(Logging::Out.FilterMask()) { Logging::Out.SetFilterMask(0u); } + ~DisableLogging() { Logging::Out.SetFilterMask(filterMask); } +}; + // Helpers for Architecture // ----------------------------- // Creates a default test-config -- GitLab