diff --git a/ethosu/regor/architecture/architecture.hpp b/ethosu/regor/architecture/architecture.hpp index b5d354aa0e85ff4f199a5b1f97f5b6ce16293eb4..299f1bb8e0c0fcdc5aff31645b808724c847414e 100644 --- a/ethosu/regor/architecture/architecture.hpp +++ b/ethosu/regor/architecture/architecture.hpp @@ -184,9 +184,16 @@ struct ArchitectureOpGroupQuery class ArchitectureOpGroup { public: + enum class Requirement + { + None = 0, + UsesLUT = 1, + }; + virtual ~ArchitectureOpGroup() = default; virtual int Add(const ArchitectureOpGroupQuery &op, const std::vector &dependsOn = {}) = 0; virtual bool NeedsAllocation(UniqueId tensorUID) = 0; + virtual Flags Requirements() = 0; }; enum class ArchAccumulatorSource : uint8_t diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp index d488e56ee51162b05d00f9d099c88b3cf3bbd298..f9d30105e7ee59e89e500880f1d174d511faa924 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp @@ -783,8 +783,12 @@ int EthosU55OpGroup::Add(const ArchitectureOpGroupQuery &op, const std::vector _opsInternal; int _opsCount = 0; std::unordered_set _fusedTensors; + Flags _requirements = Requirement::None; public: int Add(const ArchitectureOpGroupQuery &op, const std::vector &dependsOn = {}) override; bool NeedsAllocation(UniqueId TensorUID) override; + Flags Requirements() override { return _requirements; }; }; /// diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp index a11d2b1e5f11e4e0f385e3d5f0a393bd05e718c2..40070b88d90cfc4e7a761103dc5c3049f6809cf4 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp @@ -1602,6 +1602,9 @@ int EthosU85OpGroup::Add(const ArchitectureOpGroupQuery &op, const std::vector _requirements = Requirement::None; std::array _ops; std::array _opsInternal; std::unordered_map _tensorCbMap; @@ -133,6 +134,7 @@ public: EthosU85OpGroup(ArchEthosU85 *arch) : _arch(arch){}; int Add(const ArchitectureOpGroupQuery &op, const std::vector &dependsOn = {}) override; bool NeedsAllocation(UniqueId tensorUID) override; + Flags Requirements() override { return _requirements; }; protected: int ChainingBuffer(UniqueId tensorUID); diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp index c0509540aad2ee10e80d2317832f7be65849a4ff..fc61376b3fa3f342ca239ae1d0ecd9ac96846591 100644 --- a/ethosu/regor/compiler/scheduler.cpp +++ b/ethosu/regor/compiler/scheduler.cpp @@ -415,6 +415,7 @@ std::unique_ptr GetOpConfig(Architecture *arch, SchedulerO const Shape &ifm2Shape, const Shape &ofmShape, WeightFormat wgtFormat) { assert(op->IsNpuOp()); + using OpGroupReq = ArchitectureOpGroup::Requirement; SchedulerConnection *ifm = op->IFM(0); SchedulerConnection *ifm2 = op->TryIFM(1); @@ -427,7 +428,7 @@ std::unique_ptr GetOpConfig(Architecture *arch, SchedulerO query.ifmBits = DataTypeSizeBits(ifm->Type()); query.ofmBits = DataTypeSizeBits(ofm->Type()); query.kernel = op->Kernel(); - query.lutBytes = op->TryInput(TensorUsage::LUT) ? 2048 : 0; + query.lutBytes = op->OpGroup()->Requirements().Any(OpGroupReq::UsesLUT) ? 2048 : 0; query.scaled = op->HasScaling(); query.ifmResampling = ifm->resamplingMode; query.ofmShape = query.ofmShape.Unpermute(uint32_t(ofm->transpose));