From d884d7dc81ddb4e553c60282b67e38a17b40db34 Mon Sep 17 00:00:00 2001 From: Philip Hall Date: Mon, 27 Jan 2025 12:22:48 +0000 Subject: [PATCH] MLBEDSW-10106: Prerequisite work for Ethos-U55 matmul. Update constraints query mechanism to allow the architecture to return additional requirements for executing operators (including allocating scratch tensors). - Added matmul handling stubs for Ethos-U55. - Fixed tflite Resize operator check. - Fixed missing Sigmoid/LUT conversion checks. - Fixed bisecting search failure test in graph builder. Signed-off-by: Philip Hall Change-Id: I8a612f709cc2d846caf13ec8b18d309c4cc66753 --- .../architecture/architecture_constraints.hpp | 56 ++++++++- .../regor/architecture/ethosu55/ethos_u55.cpp | 5 +- .../ethosu55/ethos_u55_constraints.cpp | 108 ++++++++++++++++- .../ethosu55/ethos_u55_constraints.hpp | 10 +- .../ethosu55/ethos_u55_performance.cpp | 10 +- .../ethos_u55_register_cs_generator.cpp | 4 + .../regor/architecture/ethosu85/ethos_u85.cpp | 24 ++-- .../ethosu85/ethos_u85_constraints.cpp | 109 +++++++++++++++--- .../ethosu85/ethos_u85_constraints.hpp | 2 +- ethosu/regor/common/common.hpp | 24 ++++ ethosu/regor/compiler/graph_builder.cpp | 22 +--- ethosu/regor/compiler/graphir_optimiser.cpp | 10 +- ethosu/regor/compiler/operation_util.hpp | 12 +- ethosu/regor/compiler/scheduler.cpp | 22 +++- ethosu/regor/compiler/scheduler_decompose.cpp | 26 ++++- ethosu/regor/compiler/scheduler_decompose.hpp | 16 ++- ethosu/regor/compiler/scheduler_operation.hpp | 25 +++- ethosu/regor/compiler/scheduler_packing.cpp | 26 +++++ ethosu/regor/compiler/tensor_properties.hpp | 2 + .../regor/compiler/tflite_graph_optimiser.cpp | 45 +++++--- ethosu/regor/tflite/tflite_reader.cpp | 1 + 21 files changed, 458 insertions(+), 101 deletions(-) diff --git a/ethosu/regor/architecture/architecture_constraints.hpp b/ethosu/regor/architecture/architecture_constraints.hpp index dab6adc1..7cc9c9f1 100644 --- a/ethosu/regor/architecture/architecture_constraints.hpp +++ b/ethosu/regor/architecture/architecture_constraints.hpp @@ -79,6 +79,40 @@ struct ExecutionQuery bool quantScalingInvalidOrUnequal = false; }; +struct ArchOperatorQuery +{ + ArchFM ifm[2]; + ArchFM ofm; + ReverseType reverseMask = ReverseType::None; + TransposeType transposeMask = TransposeType::None; + struct + { + ResizeSupportQuery resize; + } specific; + ~ArchOperatorQuery(){}; +}; + +enum class ArchRequirement +{ + None = 0, + ScratchTensor = 1, + OutputFormat = 2, + OpSubstitution = 4, +}; + +struct ArchRequirements +{ + Flags req; + struct + { + Shape size; + DataType type = DataType::None; + TensorFormat format = TensorFormat::Unknown; + } scratch; + TensorFormat ofmFormat = TensorFormat::Unknown; + OpType substitution = OpType::None; +}; + enum class TransposeSupport { None, @@ -87,6 +121,23 @@ enum class TransposeSupport Any = NHWC | NHCWB16, }; +// Results for operator queries can return a combination of the +// following flags. +// Native - Operator supported natively in some or all cases (see other flags). +// Constrained - Not all operator cases have support (detailed queries may fail). +// HasRequirements - Cases are supported if architecture requirements are met. +enum class QueryResult +{ + None = 0, + Unsupported = 1, + Native = 2, + Constrained = 4, + HasRequirements = 8, + NativeHasReq = Native | HasRequirements, + NativeConstrained = Native | Constrained, + NativeConstrainedHasReq = Native | Constrained | HasRequirements, +}; + /// /// Architecture capabilties query /// @@ -104,6 +155,7 @@ public: virtual bool SupportsLeakyRelu(bool quantized, DataType type) = 0; virtual bool SupportsNegativeStrides() = 0; virtual bool SupportsNot() = 0; + virtual Flags OperatorQuery(OpType opType, const ArchOperatorQuery *query, ArchRequirements *req = nullptr) = 0; bool CanExecute(const ExecutionQuery &query) { @@ -133,6 +185,9 @@ public: case OpType::Cast: valid = SupportsCast(query.opType, query.ifmType, query.ofmType); break; + case OpType::Resize: + valid = SupportsResize(query.resizeQuery); + break; default: break; } @@ -147,7 +202,6 @@ protected: virtual bool SupportsMatMul(OpType opType) = 0; virtual bool SupportsGather(OpType opType) = 0; virtual bool SupportsScatter(OpType opType) = 0; - virtual bool SupportsSigmoidTanhLutInt16(OpType opType) = 0; virtual bool SupportsResize(const ResizeSupportQuery &query) = 0; virtual bool SupportsArgMax(OpType opType) = 0; virtual bool SupportsCast(OpType opType, DataType ifmType, DataType ofmType) = 0; diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp index 15a7ba8c..71a68f24 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -634,6 +634,7 @@ EthosU55NpuOp ArchEthosU55::GetHWOp(OpType type) {OpType::Rescale, EthosU55NpuOp::Pooling}, {OpType::Tile, EthosU55NpuOp::Dma}, {OpType::Transpose, EthosU55NpuOp::Compound}, + {OpType::MatMul, EthosU55NpuOp::Compound}, }; auto pos = toNpuOp.find(type); if ( pos != toNpuOp.end() ) @@ -812,7 +813,7 @@ bool EthosU55OpGroup::CanRunOnNPU(const ArchitectureOpGroupQuery &op) if ( npuOp != EthosU55NpuOp::Elementwise ) { if ( op.type == OpType::LUT || op.type == OpType::MemoryCopy || op.type == OpType::Rescale || - op.type == OpType::Tile || op.type == OpType::Transpose ) + op.type == OpType::Tile || op.type == OpType::Transpose || npuOp == EthosU55NpuOp::Compound ) { // TODO: LUT operations end up here due to UseAvgPoolNop although the rules are not the same as // for a Pooling operation, so skip checks for now. return true; diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp index a97ca2bf..021959a2 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp @@ -23,6 +23,30 @@ namespace regor { +// Unsupported operators - must be sorted ascending +static constexpr OpType s_unsupportedU55[] = { + OpType::None, + OpType::ArgMax, + OpType::Gather, + OpType::Scatter, + OpType::Resize, + OpType::Cast, +}; + +static_assert(is_sorted(s_unsupportedU55), "list must be sorted"); + +// Short query +static constexpr std::pair s_shortU55[] = { + {OpType::Transpose, QueryResult::NativeConstrained}, +}; + +static_assert(is_sorted(s_shortU55, [](const auto &a, const auto &b) { return a.first < b.first; }), "list must be sorted"); + + +EthosU55Constraints::EthosU55Constraints(ArchEthosU55 *arch) : _arch(arch) +{ +} + bool EthosU55Constraints::SupportsLeakyRelu(bool quantized, DataType type) { return quantized == false && type == DataType::Int16; @@ -155,12 +179,6 @@ bool EthosU55Constraints::SupportsResize(const ResizeSupportQuery &query) return false; } -bool EthosU55Constraints::SupportsSigmoidTanhLutInt16(OpType opType) -{ - UNUSED(opType); - return false; -} - bool EthosU55Constraints::SupportsArgMax(OpType opType) { UNUSED(opType); @@ -174,9 +192,87 @@ bool EthosU55Constraints::SupportsCast(OpType opType, DataType ifmType, DataType UNUSED(ofmType); return false; } + bool EthosU55Constraints::SupportsNonMatchingShapes(const Shape &ifmShape, const Shape &ifm2Shape, const Shape &ofmShape) { return (ifmShape == ofmShape) || (ifm2Shape && (ifm2Shape == ofmShape)); } + +Flags EthosU55Constraints::OperatorQuery(OpType opType, const ArchOperatorQuery *query, ArchRequirements *req) +{ + // Check unsupported operator list before further checks + auto posUnsupported = std::equal_range(std::begin(s_unsupportedU55), std::end(s_unsupportedU55), opType); + if ( posUnsupported.first != posUnsupported.second ) + { + return QueryResult::Unsupported; + } + + // Short query (no additional detail) + if ( !query ) + { + auto posShort = std::equal_range(std::begin(s_shortU55), std::end(s_shortU55), + std::pair{opType, {}}, [](const auto &a, const auto &b) { return a.first < b.first; }); + if ( posShort.first != posShort.second ) + { + return posShort.first->second; + } + return QueryResult::Native; + } + + // Float types always unsupported + if ( (query->ifm[0].shape && IsFloat(query->ifm[0].type)) || (query->ifm[1].shape && IsFloat(query->ifm[1].type)) || + (query->ofm.shape && IsFloat(query->ofm.type)) ) + { + return QueryResult::Unsupported; + } + + // Reverse never supported + if ( query->reverseMask != ReverseType::None ) + { + return QueryResult::Unsupported; + } + + // Detailed operator queries + if ( !IsNone(query->transposeMask) ) + { + if ( opType == OpType::Transpose ) + { + if ( query->transposeMask == TransposeType::NWHC || query->transposeMask == TransposeType::NHCW || + query->transposeMask == TransposeType::NCWH ) + { + if ( req ) req->ofmFormat = TensorFormat::NHWC; + return QueryResult::NativeConstrainedHasReq; + } + } + return QueryResult::Unsupported; + } + + if ( opType == OpType::MatMul ) + { + if ( req ) + { + req->req = ArchRequirement::ScratchTensor; + req->scratch.size = query->ofm.shape; + req->scratch.type = DataType::Int32; + req->scratch.format = TensorFormat::NHWC; + } + return QueryResult::Unsupported; + } + else if ( (opType == OpType::Sigmoid) || (opType == OpType::Tanh) ) + { + if ( query->ifm[0].type != DataType::Int16 ) + { + if ( req ) + { + req->req = ArchRequirement::OpSubstitution; + req->substitution = OpType::LUT; + } + return QueryResult::NativeHasReq; + } + } + return QueryResult::Native; +} + + } // namespace regor diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp index b091ee5a..b9ed473c 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp @@ -24,8 +24,11 @@ namespace regor class EthosU55Constraints : public IArchitectureConstraints { +private: + ArchEthosU55 *_arch = nullptr; + public: - EthosU55Constraints(ArchEthosU55 *arch) : _arch(arch) {} + EthosU55Constraints(ArchEthosU55 *arch); bool SupportsLeakyRelu(bool quantized, DataType type) override; bool SupportsMatMul(OpType opType) override; @@ -37,16 +40,13 @@ public: bool SupportsAccumulatorSaveRestore() override { return false; } bool SupportsGather(OpType opType) override; bool SupportsScatter(OpType opType) override; - bool SupportsSigmoidTanhLutInt16(OpType opType) override; bool SupportsResize(const ResizeSupportQuery &query) override; bool SupportsArgMax(OpType opType) override; bool SupportsCast(OpType opType, DataType ifmType, DataType ofmType) override; bool SupportsNonMatchingShapes(const Shape &ifmShape, const Shape &ifm2Shape, const Shape &ofmShape) override; bool SupportsNegativeStrides() override { return true; }; bool SupportsNot() override { return false; }; - -private: - ArchEthosU55 *_arch; + Flags OperatorQuery(OpType opType, const ArchOperatorQuery *query, ArchRequirements *req) override; }; } // namespace regor diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp index be5a3b90..9032c504 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -98,7 +98,7 @@ CycleCost EthosU55Performance::MeasureCycleCost(const PerformanceQuery &query, c else if ( npuOp == EthosU55NpuOp::Compound ) { // TODO: Measure variable-implementation ops - assert(query.type == OpType::Transpose); + assert(query.type == OpType::Transpose || query.type == OpType::MatMul); cycles.opCycles = EstimateMinimumMemoryCycles(query); } else @@ -566,7 +566,11 @@ ElementAccess EthosU55Performance::MeasureElementAccess(const PerformanceQuery & else if ( query.type == OpType::Transpose ) { access.ifmRead[0] = query.ifmShape[0].Elements(); - access.ofmWrite = query.ofmShape.Elements(); + } + else if ( query.type == OpType::MatMul ) + { + access.ifmRead[0] = query.ifmShape[0].Elements(); + access.ifmRead[1] = query.ifmShape[1].Elements(); } else { diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp index e04aac3c..11469d1e 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp @@ -1752,6 +1752,10 @@ void EthosU55RCSGenerator::PrepareCommand(int index, HighLevelCommand *cmd, Temp InsertTransposeCommand(stripe, temps, emitted); return; } + else if ( op->type == OpType::MatMul ) + { + return; // Delete until implemented + } else if ( _arch->_shram.reservedEndBanks == 0 ) { // LUT is overwritten by SHRAM accumulator buffers; clear slots diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp index dff971ff..28e23e70 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp @@ -1456,14 +1456,13 @@ bool EthosU85OpGroup::Fuse(const ArchitectureOpGroupQuery &op, const std::vector return false; } - // Can't fuse a transpose type that's not supported by primaryOp in opgroup - if ( _arch->_constraints->SupportsTranspose(_ops[0].type, op.ofm.transpose) == TransposeSupport::None ) - { - return false; - } + EthosU85Constraints *constraints = static_cast(_arch->_constraints.get()); - // Can't fuse a reverse type that's not supported by primaryOp in opgroup - if ( !_arch->_constraints->SupportsReverse(_ops[0].type, op.ofm.reverse) ) + // Can't fuse a transpose or reverse type that's not supported by primaryOp in opgroup + ArchOperatorQuery query; + query.reverseMask = op.ofm.reverse; + query.transposeMask = op.ofm.transpose; + if ( !constraints->OperatorQuery(_ops[0].type, &query, nullptr).Any(QueryResult::Native) ) { return false; } @@ -1780,15 +1779,14 @@ bool EthosU85OpGroup::CanRunOnNPU(const ArchitectureOpGroupQuery &op) return true; } - if ( op.type == OpType::Transpose ) + ArchOperatorQuery query; + query.transposeMask = op.ofm.transpose; + query.reverseMask = op.ofm.reverse; + if ( !_arch->_constraints->OperatorQuery(OpType::MemoryCopy, &query, nullptr).Any(QueryResult::Native) ) { - return _arch->_constraints->SupportsTranspose(OpType::MemoryCopy, op.ofm.transpose) != TransposeSupport::None; + return false; } - if ( op.type == OpType::Reverse ) - { - return _arch->_constraints->SupportsReverse(OpType::MemoryCopy, op.ofm.reverse); - } auto map = s_opDataTypeSupport.find(npuOp); if ( map == s_opDataTypeSupport.end() ) { diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp index b24ebb40..6ab78def 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp @@ -23,6 +23,21 @@ namespace regor { + +// Unsupported operators - must be sorted ascending +static constexpr OpType s_unsupportedU85[] = {OpType::None}; + +static_assert(is_sorted(s_unsupportedU85), "list must be sorted"); + + +// Short query +static constexpr std::pair s_shortU85[] = { + {OpType::Transpose, QueryResult::Native}, +}; + +static_assert(is_sorted(s_shortU85, [](const auto &a, const auto &b) { return a.first < b.first; }), "list must be sorted"); + + bool EthosU85Constraints::SupportsLeakyRelu(bool /*quantized*/, DataType /*type*/) { return true; @@ -41,7 +56,7 @@ bool EthosU85Constraints::SupportsMatMul(OpType opType) TransposeSupport EthosU85Constraints::SupportsTranspose(OpType opType, TransposeType transposeType) { - if ( IsNone(transposeType) ) return TransposeSupport::Any; + if ( transposeType == TransposeType::None ) return TransposeSupport::Any; EthosU85NpuOp npuOp = ArchEthosU85::GetHWOp(opType); if ( npuOp == EthosU85NpuOp::None || npuOp == EthosU85NpuOp::Resize || npuOp == EthosU85NpuOp::Dma ) @@ -184,11 +199,6 @@ bool EthosU85Constraints::SupportsScatter(OpType opType) return true; } -bool EthosU85Constraints::SupportsSigmoidTanhLutInt16(OpType opType) -{ - return (opType == OpType::Sigmoid || opType == OpType::Tanh); -} - bool EthosU85Constraints::SupportsArgMax(OpType opType) { EthosU85NpuOp npuOp = ArchEthosU85::GetHWOp(opType); @@ -226,60 +236,64 @@ bool EthosU85Constraints::SupportsResize(const ResizeSupportQuery &query) int d_w = query.scaleX.d; int n_h = query.scaleY.n; int d_h = query.scaleY.d; - bool supported = true; if ( n_h > 2048 ) { LOG_WARN("Resize height scale numerator ({}) exceeds maximum size (2048).\n", n_h); - supported = false; + return false; } if ( n_w > 2048 ) { LOG_WARN("Resize width scale numerator ({}) exceeds maximum size (2048).\n", n_w); - supported = false; + return false; } if ( query.offsetY >= n_h || query.offsetY < -n_h ) { LOG_WARN("Resize height offset: {} is outside the valid range [-height_numerator, height_numerator) = [{}, {})\n", query.offsetY, -n_h, n_h); - supported = false; + return false; } if ( query.offsetX >= n_w || query.offsetX < -n_w ) { LOG_WARN("Resize width offset: {} is outside the valid range [-with_numerator, width_numerator) = [{}, {})\n", query.offsetX, -n_w, n_w); - supported = false; + return false; } if ( query.mode == ArchResizeMode::Bilinear ) { - // Get scale fractions and verify that scale-factor is a power of two. + if ( d_w == 0 || d_h == 0 ) + { + LOG_WARN("ResizeBilinear w/h divisors can't be zero\n"); + return false; + } + // Get scale fractions and verify that scale-factor is a power of two. if ( n_w % d_w != 0 ) { LOG_WARN("ResizeBilinear width scale-factor is not an integer: {}/{}\n", n_w, d_w); - supported = false; + return false; } if ( n_h % d_h != 0 ) { LOG_WARN("ResizeBilinear height scale-factor is not an integer: {}/{}\n", n_h, d_h); - supported = false; + return false; } int scale_w = n_w / d_w; int scale_h = n_h / d_h; if ( !IsPowerOfTwo(scale_w) ) { LOG_WARN("ResizeBilinear width scale-factor is not a power of two: {}\n", double(n_w) / d_w); - supported = false; + return false; } if ( !IsPowerOfTwo(scale_h) ) { LOG_WARN("ResizeBilinear height scale-factor is not a power of two: {}\n", double(n_h) / d_h); - supported = false; + return false; } - return supported; } - return supported; + + return true; } bool EthosU85Constraints::SupportsCast(OpType opType, DataType ifmType, DataType ofmType) @@ -292,4 +306,63 @@ bool EthosU85Constraints::SupportsNonMatchingShapes(const Shape &ifmShape, const return true; } + +Flags EthosU85Constraints::OperatorQuery(OpType opType, const ArchOperatorQuery *query, ArchRequirements *req) +{ + // Check unsupported operator list first + auto posUnsupported = std::equal_range(std::begin(s_unsupportedU85), std::end(s_unsupportedU85), opType); + if ( posUnsupported.first != std::end(s_unsupportedU85) ) + { + return QueryResult::Unsupported; + } + + // Short query (no additional detail) + if ( !query ) + { + auto posShort = std::equal_range(std::begin(s_shortU85), std::end(s_shortU85), + std::pair{opType, {}}, [](const auto &a, const auto &b) { return a.first < b.first; }); + if ( posShort.first != std::end(s_shortU85) ) + { + return posShort.first->second; + } + return QueryResult::Native; + } + + // Float types always unsupported + if ( (query->ifm[0].shape && IsFloat(query->ifm[0].type)) || (query->ifm[1].shape && IsFloat(query->ifm[1].type)) || + (query->ofm.shape && IsFloat(query->ofm.type)) ) + { + return QueryResult::Unsupported; + } + + if ( query->transposeMask != TransposeType::None ) + { + TransposeSupport tmp = SupportsTranspose(opType, query->transposeMask); + if ( tmp == TransposeSupport::None ) return QueryResult::Unsupported; + } + + if ( query->reverseMask != ReverseType::None ) + { + if ( !SupportsReverse(opType, query->reverseMask) ) return QueryResult::Unsupported; + } + + // Operator specific + if ( opType == OpType::Resize ) + { + if ( !query->specific.resize.ifmShape ) return QueryResult::Unsupported; // TODO: remove from ResizeQuery + if ( !SupportsResize(query->specific.resize) ) return QueryResult::Unsupported; + } + else if ( (opType == OpType::Sigmoid) || (opType == OpType::Tanh) ) + { + if ( req ) + { + req->req = ArchRequirement::OpSubstitution; + req->substitution = OpType::LUT; + } + return QueryResult::NativeHasReq; + } + + return QueryResult::Native; +} + } // namespace regor diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp index fcd6a369..784d9266 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp @@ -37,13 +37,13 @@ public: bool SupportsAccumulatorSaveRestore() override { return true; } bool SupportsGather(OpType opType) override; bool SupportsScatter(OpType opType) override; - bool SupportsSigmoidTanhLutInt16(OpType opType) override; bool SupportsResize(const ResizeSupportQuery &query) override; bool SupportsArgMax(OpType opType) override; bool SupportsCast(OpType opType, DataType ifmType, DataType ofmType) override; bool SupportsNonMatchingShapes(const Shape &ifmShape, const Shape &ifm2Shape, const Shape &ofmShape) override; bool SupportsNegativeStrides() override { return false; }; bool SupportsNot() override { return true; }; + Flags OperatorQuery(OpType opType, const ArchOperatorQuery *query, ArchRequirements *req) override; private: ArchEthosU85 *_arch; diff --git a/ethosu/regor/common/common.hpp b/ethosu/regor/common/common.hpp index 878374cd..62a709d1 100644 --- a/ethosu/regor/common/common.hpp +++ b/ethosu/regor/common/common.hpp @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -165,4 +166,27 @@ static constexpr uint32_t PlatformTypeHash() return FNVHashBytes(p, int(e - p)); } +// version not constexpr until C++20 +template +constexpr bool is_sorted(const TYPE (&list)[SIZE], LESS func) +{ + if constexpr ( SIZE > 1 ) + { + const TYPE *v = list; + for ( size_t i = 1; i < SIZE; i++ ) + { + if ( func(list[i], *v) ) return false; + v = list + i; + } + } + return true; +} + +template +constexpr bool is_sorted(const TYPE (&list)[SIZE]) +{ + return is_sorted(list, std::less()); +} + + } // namespace regor diff --git a/ethosu/regor/compiler/graph_builder.cpp b/ethosu/regor/compiler/graph_builder.cpp index 29013452..3dd3ec06 100644 --- a/ethosu/regor/compiler/graph_builder.cpp +++ b/ethosu/regor/compiler/graph_builder.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2022-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -133,25 +133,13 @@ static constexpr std::pair s_aTypeMapp }; // clang-format on -template -constexpr bool is_sorted(const std::pair (&list)[SIZE]) -{ - A v = list[0].first; - for ( size_t i = 1; i < SIZE; i++ ) - { - if ( list[i].first < v ) return false; - v = list[i].first; - } - return true; -} - -static_assert(is_sorted(s_aTosaMapping), "TOSA mapping must be sorted"); +static_assert(is_sorted(s_aTosaMapping, [](const auto &a, const auto &b) { return a.first < b.first; }), "TOSA mapping must be sorted"); bool map_tosa_op(tosa::Op op, regor::OpType &tosaOp) { auto pos = std::equal_range(std::begin(s_aTosaMapping), std::end(s_aTosaMapping), std::pair(op, {}), [](const auto &a, const auto &b) { return a.first < b.first; }); - if ( pos.first == std::end(s_aTosaMapping) ) + if ( pos.first == pos.second ) { return false; } @@ -160,14 +148,14 @@ bool map_tosa_op(tosa::Op op, regor::OpType &tosaOp) return true; } -static_assert(is_sorted(s_aTypeMapping), "Type mapping must be sorted"); +static_assert(is_sorted(s_aTypeMapping, [](const auto &a, const auto &b) { return a.first < b.first; }), "Type mapping must be sorted"); bool map_data_type(GraphApi::GraphDataType type, regor::DataType &out) { auto pos = std::equal_range(std::begin(s_aTypeMapping), std::end(s_aTypeMapping), std::pair(type, {}), [](const auto &a, const auto &b) { return a.first < b.first; }); - if ( pos.first == std::end(s_aTypeMapping) ) + if ( pos.first == pos.second ) { return false; } diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp index ddcca184..1ac16c86 100644 --- a/ethosu/regor/compiler/graphir_optimiser.cpp +++ b/ethosu/regor/compiler/graphir_optimiser.cpp @@ -1603,8 +1603,9 @@ Operation *GraphIrOptimiser::MergeTransposes(Graph *const graph, Operation *cons Shape finalMapping = prevMapping.Permute(unsigned(curTranspose)); TransposeType mergedTranspose = TransposeTypeFromShape(finalMapping); - // The single merged transpose is supported - if ( _constraints->SupportsTranspose(OpType::Transpose, mergedTranspose) != TransposeSupport::None ) + ArchOperatorQuery query; + query.transposeMask = mergedTranspose; + if ( _constraints->OperatorQuery(OpType::Transpose, &query, nullptr).Any(QueryResult::Native) ) { // Change the transpose attribute on the preceding transpose and remove this one prevAttr->perm = finalMapping; @@ -1650,8 +1651,9 @@ Operation *GraphIrOptimiser::RearrangeTranspose(Graph *const graph, Operation *c Shape perm = attr->perm; // Don't bother with rearrangement if transpose type is already supported - auto transposeType = TransposeTypeFromShape(perm); - if ( _constraints->SupportsTranspose(OpType::Transpose, transposeType) != TransposeSupport::None ) + ArchOperatorQuery query; + query.transposeMask = TransposeTypeFromShape(perm); + if ( _constraints->OperatorQuery(OpType::Transpose, &query, nullptr).Any(QueryResult::Native) ) { return returnOp; } diff --git a/ethosu/regor/compiler/operation_util.hpp b/ethosu/regor/compiler/operation_util.hpp index 1bf62b5a..2d32aa56 100644 --- a/ethosu/regor/compiler/operation_util.hpp +++ b/ethosu/regor/compiler/operation_util.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -282,4 +282,14 @@ inline bool IsScalingValidAndEqual(const TensorConnection &a, const TensorConnec #undef FOR_ALL_INT_TYPES +inline ArchFM &Set(ArchFM &fm, const Tensor *src) +{ + if ( src ) + { + fm.type = src->Type(); + fm.shape = src->StorageShape(); + } + return fm; +} + } // namespace regor diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp index 214ba7cc..9a5ea29a 100644 --- a/ethosu/regor/compiler/scheduler.cpp +++ b/ethosu/regor/compiler/scheduler.cpp @@ -230,10 +230,15 @@ static int UpdateSchedulerTensor(Architecture *arch, TensorUsage usage, Schedule } else if ( producer->Type() == OpType::Transpose ) { - TransposeSupport supported = arch->Constraints()->SupportsTranspose(OpType::Transpose, producer->OFM()->transpose); - if ( supported == TransposeSupport::NHWC ) + ArchRequirements req; + ArchOperatorQuery query; + query.transposeMask = producer->OFM()->transpose; + if ( arch->Constraints()->OperatorQuery(OpType::Transpose, &query, &req).Any(QueryResult::Native) ) { - tensor->needsLinearFormat = true; + if ( req.ofmFormat == TensorFormat::NHWC ) + { + tensor->needsLinearFormat = true; + } } } @@ -270,10 +275,15 @@ static int UpdateSchedulerTensor(Architecture *arch, TensorUsage usage, Schedule } else if ( consumer->Type() == OpType::Transpose ) { - TransposeSupport supported = arch->Constraints()->SupportsTranspose(OpType::Transpose, consumer->OFM()->transpose); - if ( supported == TransposeSupport::NHWC ) + ArchRequirements req; + ArchOperatorQuery query; + query.transposeMask = consumer->OFM()->transpose; + if ( arch->Constraints()->OperatorQuery(OpType::Transpose, &query, &req).Any(QueryResult::Native) ) { - tensor->needsLinearFormat = true; + if ( req.ofmFormat == TensorFormat::NHWC ) + { + tensor->needsLinearFormat = true; + } } } diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index 50cc6714..f5292271 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -20,7 +20,6 @@ #include "common/logging.hpp" -#include "architecture/architecture_constraints.hpp" #include "shape_util.hpp" #include @@ -206,6 +205,17 @@ bool CanRunOnHardware(Architecture *arch, const SchedulerOperation *schedOp) { auto &ofmShape = schedOp->OFM()->SliceShape(); if ( ofmShape.Size() > 2 && ofmShape.Elements() > ofmShape.Width() * ofmShape.Depth() ) return false; + + const auto ofmConn = schedOp->OFM(); + ArchOperatorQuery query; + Set(query.ifm[0], schedOp->IFM(0)); + Set(query.ifm[1], schedOp->IFM(1)); + Set(query.ofm, ofmConn); + query.transposeMask = ofmConn->transpose; + if ( !arch->Constraints()->OperatorQuery(OpType::MatMul, &query, nullptr).Any(QueryResult::Native) ) + { + return false; + } } if ( IsConvolution(schedOp->Type()) ) { @@ -220,8 +230,13 @@ bool CanRunOnHardware(Architecture *arch, const SchedulerOperation *schedOp) auto &ofmShape = schedOp->OFM()->SliceShape(); if ( ofmShape.Size() > 3 && ofmShape.Elements() > ofmShape.Width() * ofmShape.Height() * ofmShape.Depth() ) return false; - if ( arch->Constraints()->SupportsTranspose(schedOp->Type(), schedOp->OFM()->transpose) == TransposeSupport::None ) + + ArchOperatorQuery query; + query.transposeMask = schedOp->OFM()->transpose; + if ( !arch->Constraints()->OperatorQuery(OpType::Transpose, &query, nullptr).Any(QueryResult::Native) ) + { return false; + } } auto *ifm = schedOp->TryIFM(0); auto *ifm2 = schedOp->TryIFM(1); @@ -1381,9 +1396,12 @@ std::vector> DecomposeTranspose(Architecture const auto &ifmShape = ifmConn->SliceShape(); const auto axes = ifmShape.Size(); + ArchOperatorQuery query; + query.transposeMask = ofmConn->transpose; + bool supported = arch->Constraints()->OperatorQuery(OpType::Transpose, &query, nullptr).Any(QueryResult::Native); + // We can handle all transpositions in a 3D shape - if ( (axes < 4 || ifmShape.Elements() == ifmShape.Height() * ifmShape.Width() * ifmShape.Depth()) && - arch->Constraints()->SupportsTranspose(op->Type(), ofmConn->transpose) != TransposeSupport::None ) + if ( (axes < 4 || ifmShape.Elements() == ifmShape.Height() * ifmShape.Width() * ifmShape.Depth()) && supported ) { for ( int axis = 0; axis < axes; axis++ ) { diff --git a/ethosu/regor/compiler/scheduler_decompose.hpp b/ethosu/regor/compiler/scheduler_decompose.hpp index fee68181..81c16930 100644 --- a/ethosu/regor/compiler/scheduler_decompose.hpp +++ b/ethosu/regor/compiler/scheduler_decompose.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -18,6 +18,7 @@ #pragma once +#include "architecture/architecture_constraints.hpp" #include "graph.hpp" #include "scheduler_operation.hpp" @@ -43,4 +44,17 @@ std::vector> DecomposeReduce(Architecture *a std::vector> DecomposeReverse(Architecture *arch, std::unique_ptr op); std::vector> DecomposeTranspose(Architecture *arch, std::unique_ptr op); + +// Operator query helpers +inline ArchFM &Set(ArchFM &fm, const SchedulerConnection *conn) +{ + if ( conn ) + { + fm.type = conn->tensor->dataType; + fm.shape = conn->slice.shape ? conn->slice.shape : conn->shape; + fm.format = conn->tensor->format; + } + return fm; +} + } // namespace regor diff --git a/ethosu/regor/compiler/scheduler_operation.hpp b/ethosu/regor/compiler/scheduler_operation.hpp index ea463633..f1462d07 100644 --- a/ethosu/regor/compiler/scheduler_operation.hpp +++ b/ethosu/regor/compiler/scheduler_operation.hpp @@ -50,7 +50,7 @@ public: MemArea memArea; Shape storageShape; BufferView bufferView; - DataType dataType; + DataType dataType = DataType::None; bool hasCPUReaders = false; bool hasCPUWriters = false; bool isGraphInput = false; @@ -63,6 +63,14 @@ public: std::vector producers; std::vector consumers; + SchedulerTensor() {} + + SchedulerTensor(DataType type, const Shape &shape, TensorFormat fmt = TensorFormat::Unknown) : + format(fmt), storageShape(shape), dataType(type) + { + this->uid = GenerateUniqueId(); + } + void SetAddress(Address address) { assert(allocatedAddress == -1 && address >= 0); @@ -193,7 +201,16 @@ public: void SetAttributes(const Attributes &attr) { _attr = attr; } // Input connections - SchedulerConnection *AddInput(TensorUsage usage) { return &inputs[usage]; } + SchedulerConnection *AddInput(TensorUsage usage, const std::shared_ptr &tensor = {}) + { + auto &conn = inputs[usage]; + if ( tensor ) + { + conn.tensor = tensor; + tensor->consumers.push_back(this); + } + return &conn; + } const SchedulerConnection *TryInput(TensorUsage usage) const { return inputs.try_ref(usage); } SchedulerConnection *TryInput(TensorUsage usage) { return inputs.try_ref(usage); } @@ -242,7 +259,7 @@ public: for ( const auto &item : list->pairs() ) { auto usage = item.first & TensorUsage::TypeMask; - if ( usage == TensorUsage::IFM || usage == TensorUsage::OFM || usage == TensorUsage::LUT ) + if ( usage == TensorUsage::IFM || usage == TensorUsage::OFM || usage == TensorUsage::LUT || usage == TensorUsage::Scratch ) { if ( _opGroup == nullptr || _opGroup->NeedsAllocation(item.second.tensor->uid) ) { @@ -259,7 +276,7 @@ public: for ( const auto &item : list->pairs() ) { auto usage = item.first & TensorUsage::TypeMask; - if ( usage == TensorUsage::IFM || usage == TensorUsage::OFM || usage == TensorUsage::LUT ) + if ( usage == TensorUsage::IFM || usage == TensorUsage::OFM || usage == TensorUsage::LUT || usage == TensorUsage::Scratch ) { if ( _opGroup == nullptr || _opGroup->NeedsAllocation(item.second.tensor->uid) ) { diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index 4a9e6cc1..dfa3c657 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -125,6 +125,7 @@ void SchedulerPacking::FilterOperations(const std::vector &executio for ( Operation *op : executionList ) { auto schedOp = MakeSchedulerOperation(op, graph); + if ( NeedsDecompose(_arch, schedOp.get()) ) { auto schedOps = DecomposeSchedulerOperation(std::move(schedOp)); @@ -521,6 +522,31 @@ std::unique_ptr SchedulerPacking::MakeSchedulerOperation(Ope schedOp->SetPrimaryIfmIndex(1); } } + + // Check that the Architecture understands what do to with this operator + const auto ofmConn = schedOp->OFM(); + const auto ifm0Conn = schedOp->TryIFM(0); + const auto ifm1Conn = schedOp->TryIFM(1); + ArchOperatorQuery query; + Set(query.ifm[0], ifm0Conn); + Set(query.ifm[1], ifm1Conn); + Set(query.ofm, ofmConn); + query.reverseMask = ofmConn->reverse; + query.transposeMask = ofmConn->transpose; + query.specific.resize = {}; + + ArchRequirements req; + if ( _arch->Constraints()->OperatorQuery(op->Type(), &query, &req).Any(QueryResult::Native) ) + { + // Operator requires a scratch tensor + if ( req.req.Any(ArchRequirement::ScratchTensor) && req.scratch.size ) + { + auto scratchTensor = std::make_shared(req.scratch.type, req.scratch.size, req.scratch.format); + SchedulerConnection *scratchConn = schedOp->AddInput(TensorUsage::Scratch0, scratchTensor); + scratchConn->shape = req.scratch.size; + } + } + return schedOp; } diff --git a/ethosu/regor/compiler/tensor_properties.hpp b/ethosu/regor/compiler/tensor_properties.hpp index ac206e92..6ab0cf10 100644 --- a/ethosu/regor/compiler/tensor_properties.hpp +++ b/ethosu/regor/compiler/tensor_properties.hpp @@ -48,6 +48,7 @@ enum class TensorUsage : uint32_t Params = 0x05, LUT = 0x06, State = 0x07, + Scratch = 0x08, UserDefined = 0x1E, Last, TypeMask = 0x1F, @@ -59,6 +60,7 @@ enum class TensorUsage : uint32_t Params0 = Params, Params1 = 0x100 | Params, Params2 = 0x200 | Params, + Scratch0 = Scratch, }; DECLARE_ENUM_AS_FLAGS(TensorUsage) diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp index 0a4e6b50..ed0af7bd 100644 --- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp +++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp @@ -2049,26 +2049,41 @@ Operation *TFLiteGraphOptimiser::ConvertTanhSigmoidToLUT(Graph *const, Operation auto ifmConn = operation->Input(TensorUsage::IFM0); auto ifm = ifmConn->tensor.get(); - if ( ifm->Type() == DataType::Int16 && (opType == OpType::Sigmoid || opType == OpType::Tanh) ) + if ( !(opType == OpType::Sigmoid || opType == OpType::Tanh) ) { - ExecutionQuery query{}; - query.opType = opType; - if ( _constraints->CanExecute(query) ) - { - returnOp = ConvertTanhSigmoidToLUT16(operation); - } - } - else if ( opType == OpType::Sigmoid ) - { - returnOp = ConvertToLUT8(operation, ClampSigmoid8, "sigmoid"); + return returnOp; } - else if ( opType == OpType::Tanh ) + + ArchOperatorQuery query; + Set(query.ifm[0], ifm); + Set(query.ofm, operation->OFM()); + ArchRequirements req; + auto qresult = _constraints->OperatorQuery(opType, &query, &req); + assert(qresult.Any(QueryResult::Native)); + + if ( qresult.Any(QueryResult::HasRequirements) ) { - returnOp = ConvertToLUT8( - operation, [](double x) -> double { return std::tanh(x); }, "tanh"); + if ( req.req.Any(ArchRequirement::OpSubstitution) && (req.substitution == OpType::LUT) ) + { + if ( ifm->Type() == DataType::Int16 ) + { + returnOp = ConvertTanhSigmoidToLUT16(operation); + } + else + { + if ( opType == OpType::Tanh ) + { + returnOp = ConvertToLUT8( + operation, [](double x) -> double { return std::tanh(x); }, "tanh"); + } + else + { + returnOp = ConvertToLUT8(operation, ClampSigmoid8, "sigmoid"); + } + } + } } - if ( operation != returnOp ) { RecordOptimisation(operation, returnOp); diff --git a/ethosu/regor/tflite/tflite_reader.cpp b/ethosu/regor/tflite/tflite_reader.cpp index e5ad5f34..eb2514ce 100644 --- a/ethosu/regor/tflite/tflite_reader.cpp +++ b/ethosu/regor/tflite/tflite_reader.cpp @@ -1052,6 +1052,7 @@ ExecutionQuery TfLiteReader::OperationToExecQuery(const Operation &operation) } case OpType::ResizeBilinear: case OpType::ResizeNearestNeighbor: + query.opType = OpType::Resize; query.resizeQuery = CalculateResizeSupportQuery(operation); break; default: -- GitLab