diff --git a/ethosu/regor/compiler/network_performance.cpp b/ethosu/regor/compiler/network_performance.cpp index 1f2db8e915b69a55003f49f1504eb560794d740a..1e935557c8ba97a425f61f9d06e7ab03954fc1f3 100644 --- a/ethosu/regor/compiler/network_performance.cpp +++ b/ethosu/regor/compiler/network_performance.cpp @@ -52,14 +52,15 @@ PerformanceResult NetworkPerformance::Measure(Schedule *schedule, OptimiserDatab std::unordered_set regions( {_arch->ReadonlyMemory(), _arch->FeatureMapMemory(), _arch->LUTMemory(), _arch->StagingMemory()}); std::unordered_set tensorUids; - int opTable = 0; + int perfTable = 0; int perfDebugTable = 0; + int perfDebugConnectivityTable = 0; if ( optDb ) { db = optDb->Get(); _arch->Performance()->InitDatabase(db); - opTable = db->AddTable("perf"); + perfTable = db->AddTable("perf"); std::vector columns = { "source_id", "optimised_id", @@ -75,7 +76,7 @@ PerformanceResult NetworkPerformance::Measure(Schedule *schedule, OptimiserDatab std::string label = mem->Name() + "_ac"; columns.push_back(label); } - db->AddColumns(opTable, columns); + db->AddColumns(perfTable, columns); perfDebugTable = db->AddTable("perf_debug"); @@ -174,6 +175,14 @@ PerformanceResult NetworkPerformance::Measure(Schedule *schedule, OptimiserDatab } db->AddColumns(perfDebugTable, std::move(columns)); + + perfDebugConnectivityTable = db->AddTable("perf_debug_conn"); + + columns = { + "input_op_id", + "input_index", + }; + db->AddColumns(perfDebugConnectivityTable, std::move(columns)); } for ( auto const &schedOp : _ops ) @@ -196,7 +205,7 @@ PerformanceResult NetworkPerformance::Measure(Schedule *schedule, OptimiserDatab } if ( optDb != nullptr ) { - AddToDatabase(perf, schedOp.get(), cost, opTable, perfDebugTable, memories, optDb); + AddToDatabase(perf, schedOp.get(), cost, perfTable, perfDebugTable, perfDebugConnectivityTable, memories, optDb); } performance += perf; prevOp = schedOp.get(); @@ -208,7 +217,7 @@ PerformanceResult NetworkPerformance::Measure(Schedule *schedule, OptimiserDatab perf = ProcessOpPerformance(subOp.get(), cost, schedule, prevOp, prevCost, memories); if ( optDb != nullptr ) { - AddToDatabase(perf, subOp.get(), cost, opTable, perfDebugTable, memories, optDb); + AddToDatabase(perf, subOp.get(), cost, perfTable, perfDebugTable, perfDebugConnectivityTable, memories, optDb); } if ( !IsActivation(subOp->Type()) ) { @@ -257,8 +266,8 @@ PerformanceResult NetworkPerformance::ProcessOpPerformance(SchedulerOperation *s } -void NetworkPerformance::AddToDatabase(const PerformanceResult &perf, SchedulerOperation *schedOp, SchedulerOpInfo *cost, - int opTable, int perfDebugTable, const std::unordered_set &memories, OptimiserDatabase *optDb) +void NetworkPerformance::AddToDatabase(const PerformanceResult &perf, SchedulerOperation *schedOp, SchedulerOpInfo *cost, int perfTable, + int perfDebugTable, int perfDebugConnectivityTable, const std::unordered_set &memories, OptimiserDatabase *optDb) { // Per-layer calculations assert(optDb != nullptr); @@ -290,7 +299,7 @@ void NetworkPerformance::AddToDatabase(const PerformanceResult &perf, SchedulerO row.push_back(std::to_string(perf.memory.at(mem).accessCycles)); } - db->AddRow(opTable, schedOp->Uid(), std::move(row)); + db->AddRow(perfTable, schedOp->Uid(), std::move(row)); row = {}; auto shapeToStrings = [&row](const std::vector &shape) @@ -298,7 +307,7 @@ void NetworkPerformance::AddToDatabase(const PerformanceResult &perf, SchedulerO std::transform(shape.begin(), shape.end(), std::back_inserter(row), [](int n) -> std::string { return n ? std::to_string(n) : ""; }); }; - // clang-format off + // FM shapes shapeToStrings(ReshapeToNHWC(schedOp->IFM(0)->shape).ToList()); shapeToStrings(ReshapeToNHWC(schedOp->TryIFM(1) ? schedOp->IFM(1)->shape : Shape()).ToList()); @@ -312,6 +321,7 @@ void NetworkPerformance::AddToDatabase(const PerformanceResult &perf, SchedulerO shapeToStrings(ReshapeToNHWC(schedOp->TryIFM(1) ? cost->stripeInput[1] : Shape()).ToList()); shapeToStrings(ReshapeToNHWC(cost->stripe).ToList()); + // clang-format off row.insert(row.end(), { // FM Memory fmt::format("{}", schedOp->IFM(0)->tensor->memArea.memory->Name()), @@ -373,6 +383,7 @@ void NetworkPerformance::AddToDatabase(const PerformanceResult &perf, SchedulerO std::to_string(schedOp->Kernel()->Stride3D().z), }); // clang-format on + for ( const auto mem : memories ) { // Add read/write transferEfficiencies for all memories @@ -397,6 +408,35 @@ void NetworkPerformance::AddToDatabase(const PerformanceResult &perf, SchedulerO } db->AddRow(perfDebugTable, schedOp->Uid(), std::move(row)); + + // Store graph connectivity + if ( perfDebugConnectivityTable ) + { + for ( auto [usage, ifmConn] : schedOp->inputs.pairs() ) + { + if ( !IsIFM(usage) ) continue; + + const auto index = GetUsageIndex(usage); + if ( ifmConn.tensor->isGraphInput ) + { + db->AddRow(perfDebugConnectivityTable, schedOp->Uid(), {std::to_string(-1), std::to_string(index)}); + } + for ( auto &prod : ifmConn.tensor->producers ) + { + db->AddRow(perfDebugConnectivityTable, schedOp->Uid(), {std::to_string(prod->Uid()), std::to_string(index)}); + } + } + + for ( auto [usage, ofmConn] : schedOp->outputs.pairs() ) + { + if ( !IsOFM(usage) ) continue; + + if ( ofmConn.tensor->isGraphOutput ) + { + db->AddRow(perfDebugConnectivityTable, -2, {std::to_string(schedOp->Uid()), std::to_string(0)}); + } + } + } } diff --git a/ethosu/regor/compiler/network_performance.hpp b/ethosu/regor/compiler/network_performance.hpp index b379e0e1af49b135edf941898c4a8c9ff13aa430..4e41ff8a84b5f46f7e0ebbf3ee078d2f1e265060 100644 --- a/ethosu/regor/compiler/network_performance.hpp +++ b/ethosu/regor/compiler/network_performance.hpp @@ -143,8 +143,8 @@ private: SchedulerOperation *prevOp, SchedulerOpInfo *prevCost, const std::unordered_set &memories); PerformanceResult EstimateFullOpPerformance( SchedulerOperation *schedOp, SchedulerOpInfo *cost, SchedulerOperation *prevOp, SchedulerOpInfo *prevCost); - void AddToDatabase(const PerformanceResult &perf, SchedulerOperation *schedOp, SchedulerOpInfo *cost, int opTable, - int perfDebugTable, const std::unordered_set &memories, OptimiserDatabase *optDb); + void AddToDatabase(const PerformanceResult &perf, SchedulerOperation *schedOp, SchedulerOpInfo *cost, int opTable, int perfDebugTable, + int perfDebugConnectivityTable, const std::unordered_set &memories, OptimiserDatabase *optDb); }; diff --git a/ethosu/regor/compiler/scheduler_operation.hpp b/ethosu/regor/compiler/scheduler_operation.hpp index 3b5ce98f2207a26345950f750e871943b15f3791..0ced982c94acff01d08c4ea4435b7671a629172f 100644 --- a/ethosu/regor/compiler/scheduler_operation.hpp +++ b/ethosu/regor/compiler/scheduler_operation.hpp @@ -84,6 +84,8 @@ public: std::shared_ptr Clone() const { auto clone = std::make_shared(*this); + clone->isGraphInput = false; // Cloned tensor is never graph input + clone->isGraphOutput = false; // Cloned tensor is never graph output clone->uid = GenerateUniqueId(); clone->equivalenceId = GenerateUniqueId(); clone->consumers.clear(); diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp index 5570a264610f4fb2765ff1e42ad5d444ae53c28a..78b1bed011fda7292e95ec202eb3d3b22e94b27b 100644 --- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp +++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp @@ -1581,6 +1581,7 @@ Operation *TFLiteGraphOptimiser::RewriteSpaceToBatchConvBatchToSpace(Graph *cons if ( _supportedOps->Check(newOp.get()) ) { returnOp = newOp.get(); + RecordOptimisation(operation, returnOp); // Disconnect matched pattern prevOp->Disconnect(); nextOp->Disconnect();