From cd96dfdc069870977304d866e973e9e1afff6610 Mon Sep 17 00:00:00 2001
From: Fredrik Svedberg <fredrik.svedberg@arm.com>
Date: Mon, 19 May 2025 15:57:28 +0200
Subject: [PATCH] MLBEDSW-10809 Fix Ethos-U55 RSCG assert

LUT size was not accounted for when selecting block configuration
for operations with fused LUT activations on Ethos-U55.

Change-Id: Ic9b070f9949d8eb9385299aeb2d24960bd16147c
Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
---
 ethosu/regor/architecture/architecture.hpp       | 7 +++++++
 ethosu/regor/architecture/ethosu55/ethos_u55.cpp | 4 ++++
 ethosu/regor/architecture/ethosu55/ethos_u55.hpp | 2 ++
 ethosu/regor/architecture/ethosu85/ethos_u85.cpp | 3 +++
 ethosu/regor/architecture/ethosu85/ethos_u85.hpp | 2 ++
 ethosu/regor/compiler/scheduler.cpp              | 3 ++-
 6 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/ethosu/regor/architecture/architecture.hpp b/ethosu/regor/architecture/architecture.hpp
index b5d354aa..299f1bb8 100644
--- a/ethosu/regor/architecture/architecture.hpp
+++ b/ethosu/regor/architecture/architecture.hpp
@@ -184,9 +184,16 @@ struct ArchitectureOpGroupQuery
 class ArchitectureOpGroup
 {
 public:
+    enum class Requirement
+    {
+        None = 0,
+        UsesLUT = 1,
+    };
+
     virtual ~ArchitectureOpGroup() = default;
     virtual int Add(const ArchitectureOpGroupQuery &op, const std::vector<int> &dependsOn = {}) = 0;
     virtual bool NeedsAllocation(UniqueId tensorUID) = 0;
+    virtual Flags<Requirement> Requirements() = 0;
 };
 
 enum class ArchAccumulatorSource : uint8_t
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp
index d488e56e..f9d30105 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp
@@ -783,8 +783,12 @@ int EthosU55OpGroup::Add(const ArchitectureOpGroupQuery &op, const std::vector<i
     _opsInternal[_opsCount].dependsOn = dependsOn;
     _opsCount++;
 
+    // Update requirements
+    if ( op.type == OpType::LUT ) _requirements.Set(Requirement::UsesLUT);
+
     return key;
 }
+
 bool EthosU55OpGroup::NeedsAllocation(UniqueId tensorUID)
 {
     return _fusedTensors.count(tensorUID) == 0;
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55.hpp
index 5b3de60e..76b7f20a 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55.hpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55.hpp
@@ -132,10 +132,12 @@ private:
     std::array<InternalOpInfo, 2> _opsInternal;
     int _opsCount = 0;
     std::unordered_set<UniqueId> _fusedTensors;
+    Flags<Requirement> _requirements = Requirement::None;
 
 public:
     int Add(const ArchitectureOpGroupQuery &op, const std::vector<int> &dependsOn = {}) override;
     bool NeedsAllocation(UniqueId TensorUID) override;
+    Flags<Requirement> Requirements() override { return _requirements; };
 };
 
 /// <summary>
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp
index a11d2b1e..40070b88 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp
@@ -1602,6 +1602,9 @@ int EthosU85OpGroup::Add(const ArchitectureOpGroupQuery &op, const std::vector<i
     _opsInternal[_opsCount].dependsOn = dependsOn;
     _opsCount++;
 
+    // Update requirements
+    if ( op.type == OpType::LUT ) _requirements.Set(Requirement::UsesLUT);
+
     return key;
 }
 
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85.hpp
index 5f734239..7a0a5766 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85.hpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85.hpp
@@ -114,6 +114,7 @@ class EthosU85OpGroup : public ArchitectureOpGroup
 
 private:
     ArchEthosU85 *_arch;
+    Flags<Requirement> _requirements = Requirement::None;
     std::array<OpInfo, 8> _ops;
     std::array<InternalOpInfo, 8> _opsInternal;
     std::unordered_map<UniqueId, int> _tensorCbMap;
@@ -133,6 +134,7 @@ public:
     EthosU85OpGroup(ArchEthosU85 *arch) : _arch(arch){};
     int Add(const ArchitectureOpGroupQuery &op, const std::vector<int> &dependsOn = {}) override;
     bool NeedsAllocation(UniqueId tensorUID) override;
+    Flags<Requirement> Requirements() override { return _requirements; };
 
 protected:
     int ChainingBuffer(UniqueId tensorUID);
diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp
index c0509540..fc61376b 100644
--- a/ethosu/regor/compiler/scheduler.cpp
+++ b/ethosu/regor/compiler/scheduler.cpp
@@ -415,6 +415,7 @@ std::unique_ptr<ArchitectureOpConfig> GetOpConfig(Architecture *arch, SchedulerO
     const Shape &ifm2Shape, const Shape &ofmShape, WeightFormat wgtFormat)
 {
     assert(op->IsNpuOp());
+    using OpGroupReq = ArchitectureOpGroup::Requirement;
 
     SchedulerConnection *ifm = op->IFM(0);
     SchedulerConnection *ifm2 = op->TryIFM(1);
@@ -427,7 +428,7 @@ std::unique_ptr<ArchitectureOpConfig> GetOpConfig(Architecture *arch, SchedulerO
     query.ifmBits = DataTypeSizeBits(ifm->Type());
     query.ofmBits = DataTypeSizeBits(ofm->Type());
     query.kernel = op->Kernel();
-    query.lutBytes = op->TryInput(TensorUsage::LUT) ? 2048 : 0;
+    query.lutBytes = op->OpGroup()->Requirements().Any(OpGroupReq::UsesLUT) ? 2048 : 0;
     query.scaled = op->HasScaling();
     query.ifmResampling = ifm->resamplingMode;
     query.ofmShape = query.ofmShape.Unpermute(uint32_t(ofm->transpose));
-- 
GitLab