diff --git a/ethosu/regor/architecture/architecture.hpp b/ethosu/regor/architecture/architecture.hpp index c188d83384286aeb9ffbca455d8c219583459328..02a6b7e299d03526cbcff03fbdc01664233aec87 100644 --- a/ethosu/regor/architecture/architecture.hpp +++ b/ethosu/regor/architecture/architecture.hpp @@ -186,9 +186,6 @@ public: virtual ~ArchitectureOpGroup() = default; virtual int Add(const ArchitectureOpGroupQuery &op, const std::vector &dependsOn = {}) = 0; virtual bool NeedsAllocation(UniqueId tensorUID) = 0; - -protected: - virtual bool CanRunOnNPU(const ArchitectureOpGroupQuery &op) = 0; }; enum class ArchAccumulatorSource : uint8_t diff --git a/ethosu/regor/architecture/architecture_constraints.hpp b/ethosu/regor/architecture/architecture_constraints.hpp index e08b1c24dae038b48336832d57d990406bdb2004..adad6f59962dd5bff74575320bf0ac30b87b9868 100644 --- a/ethosu/regor/architecture/architecture_constraints.hpp +++ b/ethosu/regor/architecture/architecture_constraints.hpp @@ -51,16 +51,29 @@ struct ArchOperatorQuery ArchFM ofm; ReverseType reverseMask = ReverseType::None; TransposeType transposeMask = TransposeType::None; + Kernel *kernel = nullptr; ~ArchOperatorQuery(){}; }; enum class ArchRequirement { None = 0, - ScratchTensor = 1, - OpSubstitution = 2, - OutputFormat = 4, - InputFormat = 8, + ScratchTensor = 1 << 0, + OutputFormat = 1 << 1, + InputFormat = 1 << 2, + OpSubstitution = 1 << 3, + Decompose = 1 << 4, +}; + +enum class ArchProperty +{ + None = 0, + TensorAxis = 1 << 0, + TensorDims = 1 << 1, + KernelStride = 1 << 2, + KernelDilation = 1 << 3, + DepthMultiplier = 1 << 4, + TransposeMask = 1 << 5, }; struct ArchRequirements @@ -76,6 +89,7 @@ struct ArchRequirements TensorFormat ifm1Format = TensorFormat::Unknown; TensorFormat ofmFormat = TensorFormat::Unknown; OpType substitution = OpType::None; + Flags decomposeProps; }; enum class TransposeSupport @@ -98,10 +112,8 @@ enum class QueryResult Native = 2, Constrained = 4, HasRequirements = 8, - Decompose = 16, NativeHasReq = Native | HasRequirements, NativeConstrained = Native | Constrained, - NativeDecompose = Native | Decompose, NativeConstrainedHasReq = Native | Constrained | HasRequirements, }; @@ -112,15 +124,18 @@ class IArchitectureConstraints { public: virtual ~IArchitectureConstraints() = default; - virtual bool SupportsFusedReverse(OpType opType, ReverseType reverseTypeMask) = 0; virtual bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType rescaleFromType, DataType rescaleToType, DataType opFromType, DataType opToType, const Quantization &quantization) = 0; - virtual TransposeSupport SupportsFusedTranspose(OpType opType, TransposeType transposeType) = 0; virtual bool SupportsAccumulatorSaveRestore() = 0; virtual bool SupportsNegativeStrides() = 0; virtual bool SupportsElementwiseLeakyRelu(bool quantized, DataType type) = 0; virtual bool SupportsRescale(DataType fromType, DataType toType) = 0; virtual Flags OperatorQuery(OpType opType, const ArchOperatorQuery *query = nullptr, ArchRequirements *req = nullptr) = 0; + +private: + virtual bool SupportedDtypes(OpType opType, DataType ifmType, DataType ifm2Type, DataType ofmType) = 0; + virtual bool SupportsFusedReverse(OpType opType, ReverseType reverseTypeMask) = 0; + virtual TransposeSupport SupportsFusedTranspose(OpType opType, TransposeType transposeType) = 0; }; } // namespace regor diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp index 39c8112043116ac64d1cb038050bc13ae7dee0eb..39c486daf7b7c842b9019a203618e8254ad3b3b0 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp @@ -98,9 +98,6 @@ static const int s_SHRAMElementBits[] = { static_assert(std::size(s_SHRAMElementBits) == int(SHRAM_Last) + 1, "Bad element mapping"); -// max size for tensor axes -const static Shape MAX_SHAPE(nullptr, 8, 65536); - ArchEthosU55::ArchEthosU55() : _subkernelMax(8, 8, 65536), _ofmBlockMax(32, 64, 128) { _weightEncoder = std::make_unique(this); @@ -742,12 +739,6 @@ int EthosU55OpGroup::Add(const ArchitectureOpGroupQuery &op, const std::vector 0 && !IsActivation(op.type) ) { // Can only fuse with activation @@ -800,178 +791,4 @@ bool EthosU55OpGroup::NeedsAllocation(UniqueId tensorUID) return _fusedTensors.count(tensorUID) == 0; } -// Table of allowed ifm/ofm data type combinations for each HWOp -static const std::unordered_map>> s_opDataTypeSupport = { - {EthosU55NpuOp::Convolution, // HWOp - { - // IFM data type | OFM data type(s) - {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - }}, - {EthosU55NpuOp::Depthwise, - { - {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - }}, - {EthosU55NpuOp::VectorProduct, - { - {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - }}, - {EthosU55NpuOp::Pooling, - { - {DataType::UInt8, {DataType::UInt8}}, - {DataType::Int8, {DataType::Int8}}, - {DataType::Int16, {DataType::Int16}}, - }}, - {EthosU55NpuOp::ReduceSum, - { - {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int32, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - }}, -}; - -bool EthosU55OpGroup::CanRunOnNPU(const ArchitectureOpGroupQuery &op) -{ - EthosU55NpuOp npuOp = ArchEthosU55::GetHWOp(op.type); - - if ( IsFloat(op.ifm[0].type | op.ifm[1].type | op.ofm.type) ) - { - return false; - } - - if ( npuOp == EthosU55NpuOp::None || npuOp > EthosU55NpuOp::Compound ) - { - return false; - } - - auto k = op.kernel; - if ( k->Stride().x > 3 || k->Stride().y > 3 ) - { - return false; - } - - if ( k->Dilation().x > 2 || k->Dilation().y > 2 ) - { - return false; - } - - if ( k->DepthMultiplier() > 1 ) - { - return false; - } - - // Validate that input/outputs shapes don't overflow - if ( npuOp != EthosU55NpuOp::Dma ) - { - const auto &ifmShape = op.ifm[0].shape; - const auto &ofmShape = op.ofm.shape; - if ( ifmShape.GreaterMask(MAX_SHAPE) != 0 ) - { - return false; - } - if ( ofmShape.GreaterMask(MAX_SHAPE) != 0 ) - { - return false; - } - if ( op.inputs > 1 ) - { - const auto &ifm2Shape = op.ifm[1].shape; - if ( ifm2Shape.GreaterMask(MAX_SHAPE) != 0 ) - { - return false; - } - } - } - - // Check allowed ifm/ofm type mapping - if ( npuOp != EthosU55NpuOp::Elementwise ) - { - if ( op.type == OpType::LUT || op.type == OpType::MemoryCopy || op.type == OpType::Rescale || - op.type == OpType::Tile || op.type == OpType::Transpose || npuOp == EthosU55NpuOp::Compound ) - { // TODO: LUT operations end up here due to UseAvgPoolNop although the rules are not the same as - // for a Pooling operation, so skip checks for now. - return true; - } - - auto map = s_opDataTypeSupport.find(npuOp); - if ( map == s_opDataTypeSupport.end() ) - { - assert(false && "Data type mapping for HWOp missing"); - return false; - } - auto &typeMap = map->second; - auto ifmEntry = typeMap.find(op.ifm[0].type); - if ( ifmEntry == typeMap.end() ) - { // Unsupported ifm data type - return false; - } - auto &ofmTypes = ifmEntry->second; - if ( 0 == std::count(ofmTypes.begin(), ofmTypes.end(), op.ofm.type) ) - { // Unsupported ofm data type - return false; - } - } - else - { - std::vector validIfmTypes; - std::vector validOfmTypes; - switch ( op.type ) - { - case OpType::Add: - case OpType::Sub: - case OpType::Mul: - { - validIfmTypes = {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}; - validOfmTypes = validIfmTypes; - } - break; - case OpType::Minimum: - case OpType::Maximum: - case OpType::LeakyRelu: - case OpType::Abs: - { - validIfmTypes = {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}; - validOfmTypes = {op.ifm[0].type}; - } - break; - case OpType::CLZ: - case OpType::SHL: - case OpType::Asr: - { - validIfmTypes = {DataType::Int32}; - validOfmTypes = {DataType::Int32}; - if ( op.type == OpType::Asr ) - { - validOfmTypes.insert(validOfmTypes.begin(), {DataType::UInt8, DataType::Int8, DataType::Int16}); - } - } - break; - default: - assert(false && "Unkown elementwise type"); - break; - } - - if ( 0 == std::count(validIfmTypes.begin(), validIfmTypes.end(), op.ifm[0].type) ) - { // Unsupported ifm data type - return false; - } - if ( IsBinaryElementwise(op.type) && op.ifm[1].type != op.ifm[0].type ) - { // ifm2 data type must match ifm data type - return false; - } - if ( 0 == std::count(validOfmTypes.begin(), validOfmTypes.end(), op.ofm.type) ) - { // Unsupported ofm data type - return false; - } - } - - return true; -} - } // namespace regor diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55.hpp index f8e606449b8ed7fe8921b18ca43b86f560f91fe8..5b3de60e9b7873266265778e7ba5725eebe6b244 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55.hpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55.hpp @@ -136,9 +136,6 @@ private: public: int Add(const ArchitectureOpGroupQuery &op, const std::vector &dependsOn = {}) override; bool NeedsAllocation(UniqueId TensorUID) override; - -protected: - bool CanRunOnNPU(const ArchitectureOpGroupQuery &op) override; }; /// diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp index 7f2101c96d6779ba33a0a160dacd9a8fc107b61f..03fd3d7b918d175f5accf79ca9e611f742f8fe9a 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp @@ -23,27 +23,42 @@ namespace regor { -// Unsupported operators - must be sorted ascending -static constexpr OpType s_unsupportedU55[] = { - OpType::None, - OpType::ArgMax, - OpType::Not, - OpType::Gather, - OpType::Scatter, - OpType::Resize, - OpType::Cast, -}; - -static_assert(is_sorted(s_unsupportedU55), "list must be sorted"); - -// Short query -static constexpr std::pair s_shortU55[] = { - {OpType::Transpose, QueryResult::NativeConstrained}, +// Table of allowed ifm/ofm data type combinations for each HWOp +static const std::unordered_map>> s_opDataTypeSupport = { + {EthosU55NpuOp::Convolution, // HWOp + { + // IFM data type | OFM data type(s) + {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + }}, + {EthosU55NpuOp::Depthwise, + { + {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + }}, + {EthosU55NpuOp::VectorProduct, + { + {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + }}, + {EthosU55NpuOp::Pooling, + { + {DataType::UInt8, {DataType::UInt8}}, + {DataType::Int8, {DataType::Int8}}, + {DataType::Int16, {DataType::Int16}}, + }}, + {EthosU55NpuOp::ReduceSum, + { + {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int32, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + }}, }; -static_assert(is_sorted(s_shortU55, [](const auto &a, const auto &b) { return a.first < b.first; }), "list must be sorted"); - - EthosU55Constraints::EthosU55Constraints(ArchEthosU55 *arch) : _arch(arch) { } @@ -157,40 +172,192 @@ bool EthosU55Constraints::SupportsRescale(DataType fromType, DataType toType) return true; } +bool EthosU55Constraints::SupportedDtypes(OpType opType, DataType ifmType, DataType ifm2Type, DataType ofmType) +{ + auto npuOp = _arch->GetHWOp(opType); + if ( IsFloat(ifmType | ifm2Type | ofmType) ) + { + return false; + } + + if ( _arch->UseAvgPoolNop(opType) ) + { + // The rules for UseAvgPoolNop are not the same as for a Pooling operation, so skip checks for now + return true; + } + + if ( npuOp == EthosU55NpuOp::Compound || npuOp == EthosU55NpuOp::Dma ) + { + return true; + } + + // Check allowed ifm/ofm type mapping + if ( npuOp != EthosU55NpuOp::Elementwise ) + { + auto map = s_opDataTypeSupport.find(npuOp); + if ( map == s_opDataTypeSupport.end() ) + { + assert(false && "Data type mapping for HWOp missing"); + return false; + } + auto &typeMap = map->second; + auto ifmEntry = typeMap.find(ifmType); + if ( ifmEntry == typeMap.end() ) + { // Unsupported ifm data type + return false; + } + auto &ofmTypes = ifmEntry->second; + if ( 0 == std::count(ofmTypes.begin(), ofmTypes.end(), ofmType) ) + { // Unsupported ofm data type + return false; + } + } + else + { + std::vector validIfmTypes; + std::vector validOfmTypes; + switch ( opType ) + { + case OpType::Add: + case OpType::Sub: + case OpType::Mul: + { + validIfmTypes = {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}; + validOfmTypes = validIfmTypes; + } + break; + case OpType::Minimum: + case OpType::Maximum: + case OpType::LeakyRelu: + case OpType::Abs: + { + validIfmTypes = {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}; + validOfmTypes = {ifmType}; + } + break; + case OpType::CLZ: + case OpType::SHL: + case OpType::Asr: + { + validIfmTypes = {DataType::Int32}; + validOfmTypes = {DataType::Int32}; + if ( opType == OpType::Asr ) + { + validOfmTypes.insert(validOfmTypes.begin(), {DataType::UInt8, DataType::Int8, DataType::Int16}); + } + } + break; + default: + assert(false && "Unkown elementwise type"); + break; + } + if ( 0 == std::count(validIfmTypes.begin(), validIfmTypes.end(), ifmType) ) + { // Unsupported ifm data type + return false; + } + if ( IsBinaryElementwise(opType) && ifm2Type != ifmType ) + { // ifm2 data type must match ifm data type + return false; + } + if ( 0 == std::count(validOfmTypes.begin(), validOfmTypes.end(), ofmType) ) + { // Unsupported ofm data type + return false; + } + } + return true; +} + Flags EthosU55Constraints::OperatorQuery(OpType opType, const ArchOperatorQuery *query, ArchRequirements *req) { - // Check unsupported operator list before further checks - auto posUnsupported = std::equal_range(std::begin(s_unsupportedU55), std::end(s_unsupportedU55), opType); - if ( posUnsupported.first != posUnsupported.second ) + Flags result = QueryResult::Native; + static constexpr int32_t MAX_AXIS = (1 << 16); + + // Check hardware-required substitutions first + if ( (opType == OpType::Sigmoid) || (opType == OpType::Tanh) ) + { + if ( query->ifm[0].type != DataType::Int16 ) + { + if ( req ) + { + req->req = ArchRequirement::OpSubstitution; + req->substitution = OpType::LUT; + } + result.Set(QueryResult::HasRequirements); + } + } + + // Check direct native support of the opType + auto npuOp = _arch->GetHWOp(opType); + if ( npuOp == EthosU55NpuOp::None ) { return QueryResult::Unsupported; } + else if ( npuOp == EthosU55NpuOp::Dma ) + { + return result; + } // Short query (no additional detail) if ( !query ) { - auto posShort = std::equal_range(std::begin(s_shortU55), std::end(s_shortU55), - std::pair{opType, {}}, [](const auto &a, const auto &b) { return a.first < b.first; }); - if ( posShort.first != posShort.second ) - { - return posShort.first->second; - } - return QueryResult::Native; + // more detailed query might fail + return QueryResult::NativeConstrained; } - // Float types always unsupported - if ( (query->ifm[0].shape && IsFloat(query->ifm[0].type)) || (query->ifm[1].shape && IsFloat(query->ifm[1].type)) || - (query->ofm.shape && IsFloat(query->ofm.type)) ) + const auto &ifmShape = query->ifm[0].shape; + const auto &ifm2Shape = query->ifm[1].shape; + const auto &ofmShape = query->ofm.shape; + bool typeInfo = (query->ifm[0].type != DataType::None && query->ofm.type != DataType::None); + bool shapeInfo = (ifmShape && ofmShape); + + if ( !typeInfo || !shapeInfo || !query->kernel ) { - return QueryResult::Unsupported; + // missing detail, more detailed queries might fail + result.Set(QueryResult::Constrained); } - // Reverse never supported - if ( query->reverseMask != ReverseType::None ) + // Validate DataTypes + if ( typeInfo && !SupportedDtypes(opType, query->ifm[0].type, query->ifm[1].type, query->ofm.type) ) { return QueryResult::Unsupported; } + // Validate tensor-shapes + if ( shapeInfo ) + { + for ( const auto &s : {ifmShape, ifm2Shape, ofmShape} ) + { + if ( !s ) continue; + auto shape = Shape::PadAxes(s, 4, 1); + // validate that leading dimensions are unit + for ( int i = 0; i < shape.Size() - 3; i++ ) + { + if ( shape[i] > 1 ) + { + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::TensorDims); + } + result.Set(QueryResult::HasRequirements); + } + } + // validate that HWC are within valid range + for ( int i = shape.Size() - 3; i < shape.Size(); i++ ) + { + if ( shape[i] > MAX_AXIS ) + { + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::TensorAxis); + } + result.Set(QueryResult::HasRequirements); + } + } + } + } + // Detailed operator queries if ( !IsNone(query->transposeMask) ) { @@ -205,9 +372,27 @@ Flags EthosU55Constraints::OperatorQuery(OpType opType, const ArchO req->ifmFormat = TensorFormat::NHWC; req->ofmFormat = TensorFormat::NHWC; } - return QueryResult::NativeConstrainedHasReq; + result.Set(QueryResult::HasRequirements); + } + else + { + // supported with decomposition requirements + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::TransposeMask); + } + result.Set(QueryResult::HasRequirements); } } + else + { + return QueryResult::Unsupported; + } + } + + if ( query->reverseMask != ReverseType::None ) + { return QueryResult::Unsupported; } @@ -226,22 +411,50 @@ Flags EthosU55Constraints::OperatorQuery(OpType opType, const ArchO req->ifm1Format = TensorFormat::NHWC; // IFM1 and OFM are depth-sliced req->ofmFormat = TensorFormat::NHWC; // and cannot be addressed if B16 } - return QueryResult::NativeHasReq; + result.Set(QueryResult::HasRequirements); } - else if ( (opType == OpType::Sigmoid) || (opType == OpType::Tanh) ) + + // kernel constraint-checks + if ( query->kernel ) { - if ( query->ifm[0].type != DataType::Int16 ) + auto k = query->kernel; + if ( k->Stride().x > 3 || k->Stride().y > 3 ) { if ( req ) { - req->req = ArchRequirement::OpSubstitution; - req->substitution = OpType::LUT; + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::KernelStride); } - return QueryResult::NativeHasReq; + result.Set(QueryResult::HasRequirements); + } + + if ( k->Dilation().x > 2 || k->Dilation().y > 2 ) + { + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::KernelDilation); + } + result.Set(QueryResult::HasRequirements); + } + + if ( k->DepthMultiplier() > 1 ) + { + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::DepthMultiplier); + } + result.Set(QueryResult::HasRequirements); } } - return QueryResult::Native; -} + else + { + // no kernel provided, more detailed queries might fail + result.Set(QueryResult::Constrained); + } + return result; +} } // namespace regor diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp index a15b3f47181605ebd75466048c2a14a84705b74a..045c18a5ebfacc73b68389e027d5c102d8084ad0 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp @@ -30,15 +30,18 @@ private: public: EthosU55Constraints(ArchEthosU55 *arch); - bool SupportsFusedReverse(OpType opType, ReverseType reverseTypeMask) override; bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType rescaleFromType, DataType rescaleToType, DataType opFromType, DataType opToType, const Quantization &quantization) override; - TransposeSupport SupportsFusedTranspose(OpType opType, TransposeType transposeType) override; bool SupportsAccumulatorSaveRestore() override { return false; } bool SupportsNegativeStrides() override { return true; }; bool SupportsElementwiseLeakyRelu(bool quantized, DataType type) override; bool SupportsRescale(DataType fromType, DataType toType) override; Flags OperatorQuery(OpType opType, const ArchOperatorQuery *query, ArchRequirements *req) override; + +protected: + bool SupportedDtypes(OpType opType, DataType ifmType, DataType ifm2Type, DataType ofmType) override; + bool SupportsFusedReverse(OpType opType, ReverseType reverseTypeMask) override; + TransposeSupport SupportsFusedTranspose(OpType opType, TransposeType transposeType) override; }; } // namespace regor diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp index 5ff354547bd5d3da4695882caf737ffdfd1cae8b..3312c0ba8a491a1e4ece1bec26d89f355251a806 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp @@ -83,8 +83,6 @@ static const ArchEthosU85::AcceleratorConfig s_EthosU85Configs[] = { constexpr int CB_SLOTS = 6; constexpr int BRICK_ELEMENTS = 16; -// max size for tensors -const static Shape MAX_SHAPE(nullptr, 8, 65536); constexpr int ACC_DEPTH_GRANULE = 16; // Accumulator depth granularity enum class ElementwiseUsage @@ -243,7 +241,8 @@ Flags ArchEthosU85::SupportedWeightFormat(OpType op) bool ArchEthosU85::UseAvgPoolNop(OpType type) { - return IsActivation(type) || type == OpType::Quantize || type == OpType::MemoryCopy || type == OpType::Transpose || type == OpType::Reverse; + return IsActivation(type) || type == OpType::Quantize || type == OpType::MemoryCopy || type == OpType::Transpose || + type == OpType::Reverse || type == OpType::Rescale; } bool ArchEthosU85::UseNullPool(OpType opType, int bits) @@ -1339,12 +1338,10 @@ EthosU85NpuOp ArchEthosU85::GetHWOp(OpType type) {OpType::ReduceMax, EthosU85NpuOp::ReduceMinMax}, {OpType::ReduceAny, EthosU85NpuOp::ReduceMinMax}, {OpType::ReduceAll, EthosU85NpuOp::ReduceMinMax}, - // TODO MLBEDSW-7986 add none pooling {OpType::Resize, EthosU85NpuOp::Resize}, {OpType::Gather, EthosU85NpuOp::Dma}, {OpType::Scatter, EthosU85NpuOp::Dma}, {OpType::Tile, EthosU85NpuOp::Dma}, - {OpType::Rescale, EthosU85NpuOp::Pooling}, }; auto pos = toNpuOp.find(type); @@ -1580,12 +1577,6 @@ int EthosU85OpGroup::Add(const ArchitectureOpGroupQuery &op, const std::vector>> s_opDataTypeSupport = { - {EthosU85NpuOp::Convolution, - { - {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - }}, - {EthosU85NpuOp::Depthwise, - { - {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - }}, - {EthosU85NpuOp::VectorProduct, - { - {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - }}, - {EthosU85NpuOp::Pooling, - { - {DataType::Bool8, {DataType::Bool8, DataType::Int32, DataType::Int64}}, - {DataType::UInt8, {DataType::UInt8, DataType::Int32, DataType::Int64}}, - {DataType::Int8, {DataType::Int8, DataType::Int32, DataType::Int64}}, - {DataType::Int16, {DataType::Int16}}, - }}, - {EthosU85NpuOp::ReduceMinMax, - { - {DataType::Bool8, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::UInt8, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int8, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int16, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int32, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - }}, - {EthosU85NpuOp::ReduceSum, - { - {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - {DataType::Int32, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, - }}, - {EthosU85NpuOp::ArgMax, - { - {DataType::Bool8, {DataType::Int32, DataType::Int64}}, - {DataType::UInt8, {DataType::Int32, DataType::Int64}}, - {DataType::Int8, {DataType::Int32, DataType::Int64}}, - {DataType::Int16, {DataType::Int32, DataType::Int64}}, - }}, - {EthosU85NpuOp::Dma, - { - {DataType::Bool8, {DataType::Bool8}}, - {DataType::UInt8, {DataType::UInt8}}, - {DataType::Int8, {DataType::Int8}}, - {DataType::Int16, {DataType::Int16}}, - {DataType::Int32, {DataType::Int32}}, - }}, - {EthosU85NpuOp::Resize, - { - {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, - }}, -}; - -bool EthosU85OpGroup::CanRunOnNPU(const ArchitectureOpGroupQuery &op) -{ - EthosU85NpuOp npuOp = ArchEthosU85::GetHWOp(op.type); - - if ( IsFloat(op.ifm[0].type | op.ifm[1].type | op.ofm.type) ) - { - return false; - } - - if ( npuOp == EthosU85NpuOp::None ) - { - return false; - } - - auto k = op.kernel; - if ( k->Stride().x > 3 || k->Stride().y > 3 ) - { - return false; - } - - if ( k->Dilation().x > 2 || k->Dilation().y > 2 ) - { - return false; - } - - if ( k->DepthMultiplier() > 1 ) - { - return false; - } - - switch ( npuOp ) - { - case EthosU85NpuOp::Convolution: - case EthosU85NpuOp::Depthwise: - case EthosU85NpuOp::VectorProduct: - case EthosU85NpuOp::Pooling: - case EthosU85NpuOp::ReduceMinMax: - case EthosU85NpuOp::ReduceSum: - case EthosU85NpuOp::ArgMax: - case EthosU85NpuOp::Elementwise: - case EthosU85NpuOp::Resize: - case EthosU85NpuOp::Dma: - break; - default: - assert(false && "Unrecognized HWOp"); - return false; - } - - // Validate that input/outputs shapes don't overflow - if ( npuOp != EthosU85NpuOp::Dma ) - { - const auto &ifmShape = op.ifm[0].shape; - const auto &ofmShape = op.ofm.shape; - - if ( ifmShape.GreaterMask(MAX_SHAPE) != 0 ) - { - return false; - } - if ( ofmShape.GreaterMask(MAX_SHAPE) != 0 ) - { - return false; - } - if ( op.inputs > 1 ) - { - const auto &ifm2Shape = op.ifm[1].shape; - if ( ifm2Shape.GreaterMask(MAX_SHAPE) != 0 ) - { - return false; - } - } - } - - // Check allowed ifm/ofm data type mapping - if ( npuOp != EthosU85NpuOp::Elementwise ) - { - if ( op.type == OpType::LUT || op.type == OpType::MemoryCopy || op.type == OpType::Rescale || op.type == OpType::Tile ) - { // TODO: LUT operations end up here due to UseAvgPoolNop although the rules are not the same as - // for a Pooling operation, so skip checks for now. - return true; - } - - if ( op.type == OpType::Transpose || op.type == OpType::Reverse ) - { - ArchOperatorQuery query; - query.transposeMask = op.ofm.transpose; - query.reverseMask = op.ofm.reverse; - return _arch->_constraints->OperatorQuery(OpType::MemoryCopy, &query, nullptr).Any(QueryResult::Native); - } - - auto map = s_opDataTypeSupport.find(npuOp); - if ( map == s_opDataTypeSupport.end() ) - { - assert(false && "Data type mapping for HWOp missing"); - return false; - } - auto &typeMap = map->second; - auto ifmEntry = typeMap.find(op.ifm[0].type); - if ( ifmEntry == typeMap.end() ) - { // Unsupported ifm data type - return false; - } - auto &ofmTypes = ifmEntry->second; - if ( 0 == std::count(ofmTypes.begin(), ofmTypes.end(), op.ofm.type) ) - { // Unsupported ofm data type - return false; - } - } - else - { - // TODO: Elementwise - } - - return true; -} - } // namespace regor diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85.hpp index c4cbbad2d695631ae7d04b0c3c32468a158b3301..d23e64f74ab020a9794fb5a580a336c9ed94195e 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85.hpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85.hpp @@ -136,7 +136,6 @@ public: bool NeedsAllocation(UniqueId tensorUID) override; protected: - bool CanRunOnNPU(const ArchitectureOpGroupQuery &op) override; int ChainingBuffer(UniqueId tensorUID); bool IsChained(UniqueId tensorUID); bool IsFused(UniqueId tensorUID); diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp index 535ede5ca4e0390d4d75603a34a3fe4b125ff5cb..8a2ea87c076fe4331afe72f01a451dc8db5aad85 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp @@ -21,22 +21,69 @@ #include "ethos_u85.hpp" #include "ethos_u85_register_cs_generator.hpp" +#include + namespace regor { -// Unsupported operators - must be sorted ascending -static constexpr OpType s_unsupportedU85[] = {OpType::None}; - -static_assert(is_sorted(s_unsupportedU85), "list must be sorted"); - - -// Short query -static constexpr std::pair s_shortU85[] = { - {OpType::Transpose, QueryResult::Native}, +// TODO: This table is from the EthosU55/U65 Embedded NPU Interface Specification, it's not completely valid for +// Ethos U85 since the allowed data types depend on ifm/ofm as well as selected acc and scaling. +static const std::unordered_map>> s_opDataTypeSupport = { + {EthosU85NpuOp::Convolution, + { + {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + }}, + {EthosU85NpuOp::Depthwise, + { + {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + }}, + {EthosU85NpuOp::VectorProduct, + { + {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + }}, + {EthosU85NpuOp::Pooling, + { + {DataType::Bool8, {DataType::Bool8, DataType::Int32, DataType::Int64}}, + {DataType::UInt8, {DataType::UInt8, DataType::Int32, DataType::Int64}}, + {DataType::Int8, {DataType::Int8, DataType::Int32, DataType::Int64}}, + {DataType::Int16, {DataType::Int16}}, + }}, + {EthosU85NpuOp::ReduceMinMax, + { + {DataType::Bool8, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::UInt8, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int8, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int16, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int32, {DataType::Bool8, DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + }}, + {EthosU85NpuOp::ReduceSum, + { + {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + {DataType::Int32, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32}}, + }}, + {EthosU85NpuOp::ArgMax, + { + {DataType::Bool8, {DataType::Int32, DataType::Int64}}, + {DataType::UInt8, {DataType::Int32, DataType::Int64}}, + {DataType::Int8, {DataType::Int32, DataType::Int64}}, + {DataType::Int16, {DataType::Int32, DataType::Int64}}, + }}, + {EthosU85NpuOp::Resize, + { + {DataType::UInt8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + {DataType::Int8, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + {DataType::Int16, {DataType::UInt8, DataType::Int8, DataType::Int16, DataType::Int32, DataType::Int64}}, + }}, }; -static_assert(is_sorted(s_shortU85, [](const auto &a, const auto &b) { return a.first < b.first; }), "list must be sorted"); - TransposeSupport EthosU85Constraints::SupportsFusedTranspose(OpType opType, TransposeType transposeType) { if ( transposeType == TransposeType::None ) return TransposeSupport::Any; @@ -162,11 +209,68 @@ bool EthosU85Constraints::SupportsRescale(DataType fromType, DataType toType) return fromType != DataType::UInt16; } +bool EthosU85Constraints::SupportedDtypes(OpType opType, DataType ifmType, DataType ifm2Type, DataType ofmType) +{ + auto npuOp = _arch->GetHWOp(opType); + if ( IsFloat(ifmType | ifm2Type | ofmType) ) + { + return false; + } + + if ( _arch->UseAvgPoolNop(opType) ) + { + // The rules for UseAvgPoolNop are not the same as for a Pooling operation, skip checks for now + return true; + } + + if ( npuOp != EthosU85NpuOp::Elementwise ) + { + auto map = s_opDataTypeSupport.find(npuOp); + if ( map == s_opDataTypeSupport.end() ) + { + assert(false && "Data type mapping for HWOp missing"); + return false; + } + auto &typeMap = map->second; + auto ifmEntry = typeMap.find(ifmType); + if ( ifmEntry == typeMap.end() ) + { + // Unsupported ifm data type + return false; + } + auto &ofmTypes = ifmEntry->second; + if ( 0 == std::count(ofmTypes.begin(), ofmTypes.end(), ofmType) ) + { + // Unsupported ofm data type + return false; + } + } + else + { + // TODO elementwise + } + return true; +} + Flags EthosU85Constraints::OperatorQuery(OpType opType, const ArchOperatorQuery *query, ArchRequirements *req) { - // Check unsupported operator list first - auto posUnsupported = std::equal_range(std::begin(s_unsupportedU85), std::end(s_unsupportedU85), opType); - if ( posUnsupported.first != std::end(s_unsupportedU85) ) + Flags result = QueryResult::Native; + static constexpr int32_t MAX_AXIS = (1 << 16); + + // Check hardware-required substitutions first + if ( (opType == OpType::Sigmoid) || (opType == OpType::Tanh) ) + { + if ( req ) + { + req->req = ArchRequirement::OpSubstitution; + req->substitution = OpType::LUT; + } + result.Set(QueryResult::HasRequirements); + } + + // Check direct native support of the opType + auto npuOp = _arch->GetHWOp(opType); + if ( npuOp == EthosU85NpuOp::None ) { return QueryResult::Unsupported; } @@ -174,52 +278,151 @@ Flags EthosU85Constraints::OperatorQuery(OpType opType, const ArchO // Short query (no additional detail) if ( !query ) { - auto posShort = std::equal_range(std::begin(s_shortU85), std::end(s_shortU85), - std::pair{opType, {}}, [](const auto &a, const auto &b) { return a.first < b.first; }); - if ( posShort.first != std::end(s_shortU85) ) + // more detailed query might fail + return QueryResult::NativeConstrained; + } + + // Fusing checks + if ( query->transposeMask != TransposeType::None ) + { + TransposeSupport tmp = SupportsFusedTranspose(opType, query->transposeMask); + if ( tmp == TransposeSupport::None ) + { + if ( opType == OpType::Transpose ) + { + // unsupported mask for standalone transpose, requires decomposition + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::TransposeMask); + result.Set(QueryResult::HasRequirements); + } + } + else + { + // unsupported transpose-fusing + return QueryResult::Unsupported; + } + } + } + if ( query->reverseMask != ReverseType::None ) + { + if ( !SupportsFusedReverse(opType, query->reverseMask) ) { - return posShort.first->second; + return QueryResult::Unsupported; } - return QueryResult::Native; } - // Float types always unsupported - if ( (query->ifm[0].shape && IsFloat(query->ifm[0].type)) || (query->ifm[1].shape && IsFloat(query->ifm[1].type)) || - (query->ofm.shape && IsFloat(query->ofm.type)) ) + if ( npuOp == EthosU85NpuOp::Dma ) { - return QueryResult::Unsupported; + return result; } - if ( query->transposeMask != TransposeType::None ) + const auto &ifmShape = query->ifm[0].shape; + const auto &ifm2Shape = query->ifm[1].shape; + const auto &ofmShape = query->ofm.shape; + bool typeInfo = (query->ifm[0].type != DataType::None && query->ofm.type != DataType::None); + bool shapeInfo = (ifmShape && ofmShape); + + if ( !typeInfo || !shapeInfo || !query->kernel ) { - TransposeSupport tmp = SupportsFusedTranspose(opType, query->transposeMask); - if ( tmp == TransposeSupport::None ) return QueryResult::Unsupported; + // missing detail, more detailed queries might fail + result.Set(QueryResult::Constrained); } - if ( query->reverseMask != ReverseType::None ) + // Validate dataTypes + if ( typeInfo && !SupportedDtypes(opType, query->ifm[0].type, query->ifm[1].type, query->ofm.type) ) { - if ( !SupportsFusedReverse(opType, query->reverseMask) ) return QueryResult::Unsupported; + return QueryResult::Unsupported; } - // Operator specific - if ( (opType == OpType::Sigmoid) || (opType == OpType::Tanh) ) + // Validate tensor-shapes + if ( shapeInfo ) { - if ( req ) + for ( const auto &s : {ifmShape, ifm2Shape, ofmShape} ) { - req->req = ArchRequirement::OpSubstitution; - req->substitution = OpType::LUT; + if ( !s ) continue; + auto shape = Shape::PadAxes(s, 4, 1); + // validate that leading dimensions are unit + for ( int i = 0; i < shape.Size() - 3; i++ ) + { + if ( shape[i] > 1 ) + { + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::TensorDims); + } + result.Set(QueryResult::HasRequirements); + } + } + // validate that HWC are within valid range + for ( int i = shape.Size() - 3; i < shape.Size(); i++ ) + { + if ( shape[i] > MAX_AXIS ) + { + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::TensorAxis); + } + result.Set(QueryResult::HasRequirements); + } + } + } + } + + // Detailed operator queries + if ( opType == OpType::MatMul ) + { + // Constrain Matmul height to 1 + if ( ofmShape.Size() > 2 && ofmShape.Height() > 1 ) + { + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::TensorAxis); + } + result.Set(QueryResult::HasRequirements); } - return QueryResult::NativeHasReq; } - else if ( opType == OpType::MatMul ) + + // kernel constraint-checks + if ( query->kernel ) { - if ( (query->ofm.shape.Size() >= 2) && query->ofm.shape.Elements() > query->ofm.shape.ElementsWC() ) + auto k = query->kernel; + if ( k->Stride().x > 3 || k->Stride().y > 3 ) { - return QueryResult::NativeDecompose; + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::KernelStride); + } + result.Set(QueryResult::HasRequirements); + } + + if ( k->Dilation().x > 2 || k->Dilation().y > 2 ) + { + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::KernelDilation); + } + result.Set(QueryResult::HasRequirements); + } + + if ( k->DepthMultiplier() > 1 ) + { + if ( req ) + { + req->req.Set(ArchRequirement::Decompose); + req->decomposeProps.Set(ArchProperty::DepthMultiplier); + } + result.Set(QueryResult::HasRequirements); } } - return QueryResult::Native; + return result; } } // namespace regor diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp index c193d6d3f583d815ec8bc0e999ee0c5a2f0757a6..b682369832c2ea2f1322f5da63dbe6db9f6c4725 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp @@ -38,6 +38,9 @@ public: bool SupportsElementwiseLeakyRelu(bool quantized, DataType type) override { return true; }; bool SupportsRescale(DataType fromType, DataType toType) override; Flags OperatorQuery(OpType opType, const ArchOperatorQuery *query, ArchRequirements *req) override; + +protected: + bool SupportedDtypes(OpType opType, DataType ifmType, DataType ifm2Type, DataType ofmType) override; }; } // namespace regor diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp index 7d0c68d8f133b06ee76e80862deb0618c416d934..962e86aa8a46bc9889888c8d15c40148b3f8ba3a 100644 --- a/ethosu/regor/compiler/graphir_optimiser.cpp +++ b/ethosu/regor/compiler/graphir_optimiser.cpp @@ -1627,6 +1627,7 @@ Operation *GraphIrOptimiser::MergeTransposes(Graph *const graph, Operation *cons // Can't merge if both apply quantization bool prevHasQuant = prevConn->quantization.IsValid() && !prevConn->quantization.IsUnitScale(); + if ( opHasQuant && prevHasQuant ) return returnOp; // Examine previous op's transpose @@ -1643,16 +1644,22 @@ Operation *GraphIrOptimiser::MergeTransposes(Graph *const graph, Operation *cons TransposeType mergedTranspose = TransposeTypeFromShape(finalMapping); ArchOperatorQuery query; + ArchRequirements req; query.transposeMask = mergedTranspose; - if ( _constraints->OperatorQuery(OpType::Transpose, &query, nullptr).Any(QueryResult::Native) ) + if ( _constraints->OperatorQuery(OpType::Transpose, &query, &req).Any(QueryResult::Native) ) { - // Change the transpose attribute on the preceding transpose and remove this one - prevAttr->perm = finalMapping; - TensorConnection &newConn = prevOp->ConnectOutput(TensorUsage::OFM, ofm); - newConn.Set(ofmConn->slice).Set(ofmConn->reverse).Set(ofmConn->shape); - if ( !prevHasQuant && opHasQuant ) newConn.Set(ofmConn->quantization); - operation->Disconnect(); - return prevOp; + // only merge the transpose if the new mask is natively supported + // without mask-decomp + if ( !req.decomposeProps.Any(ArchProperty::TransposeMask) ) + { + // Change the transpose attribute on the preceding transpose and remove this one + prevAttr->perm = finalMapping; + TensorConnection &newConn = prevOp->ConnectOutput(TensorUsage::OFM, ofm); + newConn.Set(ofmConn->slice).Set(ofmConn->reverse).Set(ofmConn->shape); + if ( !prevHasQuant && opHasQuant ) newConn.Set(ofmConn->quantization); + operation->Disconnect(); + return prevOp; + } } } } diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index bd02b2417ab76c00e73fec5068b119e376cd7e52..836f590deaed73b5763abf8a46037a202b662353 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -30,9 +30,22 @@ namespace regor static constexpr int MAX_DIM = 65536; -bool NeedsDecompose(Architecture *arch, const SchedulerOperation *schedOp) +Flags OperatorQuery(Architecture *arch, const SchedulerOperation *schedOp, ArchRequirements *req) +{ + ArchOperatorQuery query{}; + const SchedulerConnection *ofmConn = schedOp->OFM(); + Set(query.ifm[0], schedOp->IFM(0)); + Set(query.ifm[1], schedOp->TryIFM(1)); + Set(query.ofm, ofmConn); + query.transposeMask = ofmConn->transpose; + query.reverseMask = ofmConn->reverse; + query.kernel = schedOp->Kernel(); + return arch->Constraints()->OperatorQuery(schedOp->Type(), &query, req); +} + +bool ShouldDecompose(Architecture *arch, const SchedulerOperation *schedOp) { - return CanDecompose(arch, schedOp) && !CanRunOnHardware(arch, schedOp); + return CanDecompose(arch, schedOp) && NeedsDecompose(arch, schedOp); } static std::unique_ptr MakeMemCopy(const std::shared_ptr &source, @@ -193,70 +206,27 @@ static std::unique_ptr GetOpConfig(Architecture *arch, con return arch->GetOpConfig(schedOp->Type(), qConfig); } -bool CanRunOnHardware(Architecture *arch, const SchedulerOperation *schedOp) +bool NeedsDecompose(Architecture *arch, const SchedulerOperation *schedOp) { - regor::ArchitectureOpGroupQuery qOpGroup{}; - if ( DecomposeAsElementwise(schedOp->Type()) || schedOp->Type() == OpType::MemoryCopy ) + ArchRequirements req{}; + Flags result = OperatorQuery(arch, schedOp, &req); + // Assert complete query + assert(result.Any(QueryResult::Constrained) == false && "Constrained result from complete OperatorQuery"); + if ( result.Any(QueryResult::Unsupported) ) { - auto &ofmShape = schedOp->OFM()->SliceShape(); - if ( ofmShape.Size() > 3 && ofmShape.Elements() > ofmShape.Width() * ofmShape.Height() * ofmShape.Depth() ) - return false; + // Operations completely unsupported by HW should not be decomposed + return false; } - if ( schedOp->Type() == OpType::MatMul ) + if ( result.Any(QueryResult::HasRequirements) ) { - const auto ofmConn = schedOp->OFM(); - ArchOperatorQuery query; - Set(query.ifm[0], schedOp->IFM(0)); - Set(query.ifm[1], schedOp->IFM(1)); - Set(query.ofm, ofmConn); - query.transposeMask = ofmConn->transpose; - if ( (arch->Constraints()->OperatorQuery(OpType::MatMul, &query, nullptr) & QueryResult::NativeDecompose) != QueryResult::Native ) + if ( req.req.Any(ArchRequirement::Decompose) ) { - return false; + return true; } + // Has requirements but not decomposition-related } - if ( IsConvolution(schedOp->Type()) || IsPooling(schedOp->Type()) ) - { - auto &ofmShape = schedOp->OFM()->SliceShape(); - if ( ofmShape.Size() > 3 && ofmShape.Batch() > 1 ) return false; - } - if ( schedOp->Type() == OpType::Transpose ) - { - auto &ifmShape = schedOp->IFM(0)->SliceShape(); - if ( ifmShape.Size() > 3 && ifmShape.Elements() > ifmShape.Width() * ifmShape.Height() * ifmShape.Depth() ) - return false; - auto &ofmShape = schedOp->OFM()->SliceShape(); - if ( ofmShape.Size() > 3 && ofmShape.Elements() > ofmShape.Width() * ofmShape.Height() * ofmShape.Depth() ) - return false; - - ArchOperatorQuery query; - query.transposeMask = schedOp->OFM()->transpose; - if ( !arch->Constraints()->OperatorQuery(OpType::Transpose, &query, nullptr).Any(QueryResult::Native) ) - { - return false; - } - } - auto *ifm = schedOp->TryIFM(0); - auto *ifm2 = schedOp->TryIFM(1); - auto *ofm = schedOp->TryOFM(); - if ( !ifm || !ofm ) return false; - qOpGroup.type = schedOp->Type(); - qOpGroup.kernel = schedOp->Kernel(); - qOpGroup.ifm[0].key = ifm->tensor->uid; - qOpGroup.ifm[0].type = ifm->tensor->dataType; - qOpGroup.ifm[0].shape = ifm->SliceShape(); - if ( ifm2 ) - { - qOpGroup.ifm[1].key = ifm2->tensor->uid; - qOpGroup.ifm[1].type = ifm2->tensor->dataType; - qOpGroup.ifm[1].shape = ifm2->SliceShape(); - } - qOpGroup.ofm.key = ofm->tensor->uid; - qOpGroup.ofm.type = ofm->tensor->dataType; - qOpGroup.ofm.shape = ofm->SliceShape(); - qOpGroup.ofm.transpose = ofm->transpose; - if ( arch->CreateOpGroup(qOpGroup) == nullptr ) return false; - return GetOpConfig(arch, schedOp) != nullptr; + // no opconfig requires decomposition + return !GetOpConfig(arch, schedOp); } bool CanDecompose(Architecture *, const SchedulerOperation *schedOp) @@ -810,7 +780,7 @@ std::vector> DecomposeConv2D(Architecture *a { return DecomposeLeadingDimensions(1, arch, std::move(op), DecomposeConv2D); } - if ( CanRunOnHardware(arch, op.get()) ) + if ( !NeedsDecompose(arch, op.get()) ) { UpdatePaddingAndIfmOffset(op.get()); result.emplace_back(std::move(op)); @@ -1128,7 +1098,7 @@ std::vector> DecomposeDepthwiseConv2D(Archit return result; } - if ( CanRunOnHardware(arch, op.get()) ) + if ( !NeedsDecompose(arch, op.get()) ) { UpdatePaddingAndIfmOffset(op.get()); result.emplace_back(std::move(op)); @@ -1621,12 +1591,106 @@ std::vector> DecomposeTranspose(Architecture const auto &ifmShape = ifmConn->SliceShape(); const auto axes = ifmShape.Size(); - ArchOperatorQuery query; - query.transposeMask = ofmConn->transpose; - bool supported = arch->Constraints()->OperatorQuery(OpType::Transpose, &query, nullptr).Any(QueryResult::Native); + auto req = ArchRequirements(); + auto qResult = OperatorQuery(arch, op.get(), &req); + bool decomposeMask = false; + bool decomposeAxes = false; + bool decomposeLeadingDims = false; + + if ( qResult.Any(QueryResult::HasRequirements) && req.req.Any(ArchRequirement::Decompose) ) + { + decomposeMask = req.decomposeProps.Any(ArchProperty::TransposeMask); + decomposeAxes = req.decomposeProps.Any(ArchProperty::TensorAxis); + decomposeLeadingDims = req.decomposeProps.Any(ArchProperty::TensorDims); + } + + if ( decomposeMask || decomposeLeadingDims ) + { + // Decompose unsupported transpose-masks or large IFM-dimensions + // by unrolling the transpose-mask into many 3D-transpose operations. + + // We can handle TransposeType::None as an elementwise, because it's basically a memory copy + if ( ofmConn->transpose == TransposeType::None ) + { + LOG_TRACE1("DecomposeTranspose: Decomposing as elementwise\n"); + return DecomposeElementwise(arch, std::move(op)); + } + + assert(ifmConn->slice.offset.IsEmpty() && ifmConn->slice.shape.IsEmpty()); + assert(ofmConn->slice.offset.IsEmpty() && ofmConn->slice.shape.IsEmpty()); + + // Decompose a transpose by peforming a selection sort of the axes. Each swap in the selection sort algorithm + // expands to one or more transpose ops. + // + // Example: + // + // Input shape: [ 3, 7, 11, 13] + // Permutation vector: [ 1, 3, 0, 2] + // Sort order: [ 2, 0, 3, 1] + // Output shape: [ 7, 13, 3, 11] + // + // Selection sort swaps: + // + // Swap 1: Pos 0 <-> Pos 1: [7, 3, 11, 13] + // Swap 2: Pos 1 <-> Pos 3: [7, 13, 11, 3] + // Swap 3: Pos 2 <-> Pos 3: [7, 13, 3, 11] + + // Calculate sort order + Shape order(nullptr, axes); + uint32_t mask = uint32_t(ofmConn->transpose); + for ( int i = axes - 1; i >= 0; i-- ) + { + const int pos = axes - 1 - (mask & 0xF); + order[pos] = i; + mask = mask >> 4; + } + + auto shape = ifmConn->shape; + + LOG_TRACE1("DecomposeTranspose: Sort order ({})\n", order.ToString()); + LOG_TRACE1("DecomposeTranspose: Initial shape ({})\n", shape.ToString()); + + for ( int axis = 0; axis < axes; axis++ ) + { + // Check if axis is already in the right place + if ( order[axis] == axis ) continue; + + // Find where the axis is + int i; + for ( i = axis + 1; i < axes; i++ ) + if ( order[i] == axis ) break; + assert(i < axes); + + // Move axis to right place + LOG_TRACE1("DecomposeTranspose: Swap {} <-> {}\n", axis, i); + auto tail = !result.empty() ? result.back()->OFM() : ifmConn; + auto subOps = SwapAxes(arch, shape, tail, axis, i); + result.insert(result.end(), std::make_move_iterator(subOps.begin()), std::make_move_iterator(subOps.end())); + std::swap(order[axis], order[i]); + LOG_TRACE1("DecomposeTranspose: Shape is now ({})\n", shape.ToString()); + } + + LOG_TRACE1("DecomposeTranspose: Final shape ({})\n", shape.ToString()); + + assert(!result.empty()); + + const auto &lastTensor = result.back()->OFM()->tensor; + for ( auto &subOp : result ) + { + auto ofm = subOp->OFM(); + if ( ofm->tensor == lastTensor ) + { + // Adjust to that last output is written to the original OFM + ofm->tensor = ofmConn->tensor; + ofm->tensor->producers.push_back(subOp.get()); + ofm->quantization = ofmConn->quantization; + } + } + return result; + } // We can handle all transpositions in a 3D shape - if ( (axes < 4 || ifmShape.Elements() == ifmShape.Height() * ifmShape.Width() * ifmShape.Depth()) && supported ) + if ( decomposeAxes ) { for ( int axis = 0; axis < axes; axis++ ) { @@ -1641,90 +1705,11 @@ std::vector> DecomposeTranspose(Architecture return DecomposeLargeAxis(axis, MAX_DIM, arch, std::move(op), DecomposeTranspose); } } - - // No decomposition required - result.push_back(std::move(op)); - return result; - } - - // We can handle TransposeType::None as an elementwise, because it's basically a memory copy - if ( ofmConn->transpose == TransposeType::None ) - { - LOG_TRACE1("DecomposeTranspose: Decomposing as elementwise\n"); - return DecomposeElementwise(arch, std::move(op)); } - assert(ifmConn->slice.offset.IsEmpty() && ifmConn->slice.shape.IsEmpty()); - assert(ofmConn->slice.offset.IsEmpty() && ofmConn->slice.shape.IsEmpty()); - - // Decompose a transpose by peforming a selection sort of the axes. Each swap in the selection sort algorithm - // expands to one or more transpose ops. - // - // Example: - // - // Input shape: [ 3, 7, 11, 13] - // Permutation vector: [ 1, 3, 0, 2] - // Sort order: [ 2, 0, 3, 1] - // Output shape: [ 7, 13, 3, 11] - // - // Selection sort swaps: - // - // Swap 1: Pos 0 <-> Pos 1: [7, 3, 11, 13] - // Swap 2: Pos 1 <-> Pos 3: [7, 13, 11, 3] - // Swap 3: Pos 2 <-> Pos 3: [7, 13, 3, 11] - - // Calculate sort order - Shape order(nullptr, axes); - uint32_t mask = uint32_t(ofmConn->transpose); - for ( int i = axes - 1; i >= 0; i-- ) - { - const int pos = axes - 1 - (mask & 0xF); - order[pos] = i; - mask = mask >> 4; - } - - auto shape = ifmConn->shape; - - LOG_TRACE1("DecomposeTranspose: Sort order ({})\n", order.ToString()); - LOG_TRACE1("DecomposeTranspose: Initial shape ({})\n", shape.ToString()); - - for ( int axis = 0; axis < axes; axis++ ) - { - // Check if axis is already in the right place - if ( order[axis] == axis ) continue; - - // Find where the axis is - int i; - for ( i = axis + 1; i < axes; i++ ) - if ( order[i] == axis ) break; - assert(i < axes); - - // Move axis to right place - LOG_TRACE1("DecomposeTranspose: Swap {} <-> {}\n", axis, i); - auto tail = !result.empty() ? result.back()->OFM() : ifmConn; - auto subOps = SwapAxes(arch, shape, tail, axis, i); - result.insert(result.end(), std::make_move_iterator(subOps.begin()), std::make_move_iterator(subOps.end())); - std::swap(order[axis], order[i]); - LOG_TRACE1("DecomposeTranspose: Shape is now ({})\n", shape.ToString()); - } - - LOG_TRACE1("DecomposeTranspose: Final shape ({})\n", shape.ToString()); - - assert(!result.empty()); - - const auto &lastTensor = result.back()->OFM()->tensor; - for ( auto &subOp : result ) - { - auto ofm = subOp->OFM(); - if ( ofm->tensor == lastTensor ) - { - // Adjust to that last output is written to the original OFM - ofm->tensor = ofmConn->tensor; - ofm->tensor->producers.push_back(subOp.get()); - ofm->quantization = ofmConn->quantization; - } - } + // No decomposition required + result.push_back(std::move(op)); return result; } diff --git a/ethosu/regor/compiler/scheduler_decompose.hpp b/ethosu/regor/compiler/scheduler_decompose.hpp index 20c392a70ea53dbf6a67e32331dfdc7461de48ce..8853c7979ade75de9f4f26d720f9509237b25847 100644 --- a/ethosu/regor/compiler/scheduler_decompose.hpp +++ b/ethosu/regor/compiler/scheduler_decompose.hpp @@ -31,9 +31,9 @@ class DecompositionFailure : public std::runtime_error public: DecompositionFailure(const std::string &what = "") : std::runtime_error(what) {} }; - +Flags OperatorQuery(Architecture *arch, const SchedulerOperation *schedOp, ArchRequirements *req); +bool ShouldDecompose(Architecture *arch, const SchedulerOperation *schedOp); bool NeedsDecompose(Architecture *arch, const SchedulerOperation *schedOp); -bool CanRunOnHardware(Architecture *arch, const SchedulerOperation *schedOp); bool CanDecompose(Architecture *arch, const SchedulerOperation *schedOp); std::vector> DecomposeConv2D(Architecture *arch, std::unique_ptr op); std::vector> DecomposeConv3D(Architecture *arch, std::unique_ptr op); @@ -53,7 +53,7 @@ inline ArchFM &Set(ArchFM &fm, const SchedulerConnection *conn) if ( conn ) { fm.type = conn->tensor->dataType; - fm.shape = conn->slice.shape ? conn->slice.shape : conn->shape; + fm.shape = conn->SliceShape(); fm.format = conn->tensor->format; } return fm; diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index 0007053b0bf7a2788a0ed368dffc2ceb8d1d9e18..e0942de450611a3c790474522d082e3fd62c8d36 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -105,7 +105,7 @@ bool IsConnected(const SchedulerOperation &first, const SchedulerOperation &seco } // namespace SchedulerPacking::SchedulerPacking(Architecture *arch, bool disableChaining) : - _arch(arch), _disableChaining(disableChaining) + _arch(arch), _constraints(arch->Constraints()), _disableChaining(disableChaining) { } @@ -122,6 +122,8 @@ std::vector> SchedulerPacking::Process(const FilterOperations(executionList, graph); + PrePackOperations(); + PackOperations(); ReorderOperations(); @@ -136,7 +138,7 @@ void SchedulerPacking::FilterOperations(const std::vector &executio { auto schedOp = MakeSchedulerOperation(op, graph); - if ( NeedsDecompose(_arch, schedOp.get()) ) + if ( ShouldDecompose(_arch, schedOp.get()) ) { auto schedOps = DecomposeSchedulerOperation(std::move(schedOp)); _schedList.insert( @@ -183,6 +185,35 @@ ArchitectureOpGroupQuery SchedulerPacking::CreateOpGroupQuery(const SchedulerOpe return query; } +void SchedulerPacking::SchedulerPacking::PrePackOperations() +{ + // Determine if each operation can run on NPU + for ( auto &schedOp : _schedList ) + { + ArchRequirements oReq{}; + Flags result = OperatorQuery(_arch, schedOp.get(), &oReq); + // Assert complete query + assert(result.Any(QueryResult::Constrained) == false && "Constrained result from complete OperatorQuery"); + if ( result.Any(QueryResult::Native) ) + { + // TODO MLBEDSW-10643: This should be a direct-check against QueryResult::Native + // HasRequirements at this point should result in CPU-fallback + if ( result.Any(QueryResult::HasRequirements) && oReq.req.Any(ArchRequirement::Decompose) ) + { + schedOp->SetNpuOp(false); + } + else + { + schedOp->SetNpuOp(true); + } + } + else + { + schedOp->SetNpuOp(false); + } + } +} + void SchedulerPacking::SchedulerPacking::PackOperations() { LOG_TRACE1("Scheduler Packing (of {0} Ops)\n", _schedList.size()); @@ -203,17 +234,14 @@ void SchedulerPacking::SchedulerPacking::PackOperations() cur++; - LOG_TRACE1("Creating new group with {}\n", OpTypeToString(primaryOp->Type())); - - auto op0 = CreateOpGroupQuery(primaryOp); - - // Try to create OpGroup - auto group = _arch->CreateOpGroup(op0); - // OpGroup is nullptr if op can't run on NPU - if ( group ) + if ( primaryOp->IsNpuOp() ) { - primaryOp->SetNpuOp(true); + LOG_TRACE1("Creating new group with {}\n", OpTypeToString(primaryOp->Type())); + auto op0 = CreateOpGroupQuery(primaryOp); + // Try to create OpGroup + auto group = _arch->CreateOpGroup(op0); + assert(group); // First op in group has key 0 int prevOpKey = 0; @@ -238,7 +266,6 @@ void SchedulerPacking::SchedulerPacking::PackOperations() LOG_TRACE1("Can't add next op\n"); break; } - nextOp->SetNpuOp(true); nextOp->SetParent(primaryOp); nextOp->SetOpGroupKey(key); @@ -384,6 +411,12 @@ int SchedulerPacking::CanPack(const SchedulerOperation *schedOp, const Scheduler assert(prevOFM && "primary/prev op must have OFM"); assert(ifmTensor && "next op must have IFM"); + // can't pack CPU operations + if ( !schedOp->IsNpuOp() ) + { + return 0; + } + // Previous op in execution order doesn't connect to this one if ( prevOFM != ifmTensor && prevOFM != ifm2Tensor ) { @@ -548,6 +581,7 @@ std::unique_ptr SchedulerPacking::MakeSchedulerOperation(Ope Set(query.ofm, ofmConn); query.reverseMask = ofmConn->reverse; query.transposeMask = ofmConn->transpose; + query.kernel = schedOp->Kernel(); ArchRequirements req; if ( _arch->Constraints()->OperatorQuery(op->Type(), &query, &req).Any(QueryResult::Native) ) diff --git a/ethosu/regor/compiler/scheduler_packing.hpp b/ethosu/regor/compiler/scheduler_packing.hpp index 78abbc139ec4dafe11a820ba5ab056a2636bc0ae..4062185168d0be15a55741e3f0f519b7049a757d 100644 --- a/ethosu/regor/compiler/scheduler_packing.hpp +++ b/ethosu/regor/compiler/scheduler_packing.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -21,6 +21,7 @@ #include "common/common.hpp" #include "common/logging.hpp" +#include "architecture/architecture_constraints.hpp" #include "common/shape.hpp" #include "graph.hpp" #include "operation.hpp" @@ -45,6 +46,7 @@ class SchedulerPacking { protected: Architecture *_arch = nullptr; + IArchitectureConstraints *_constraints = nullptr; bool _disableChaining = false; std::vector> _schedList; std::unordered_map> _tensorMap; @@ -56,8 +58,13 @@ public: std::vector> Process(const Graph *graph); private: + // Decomposes operations void FilterOperations(const std::vector &executionList, const Graph *graph); + // Determines NPU/CPU-target + void PrePackOperations(); + // Performs fusing/chaining void PackOperations(); + // Reorders CPU-operations void ReorderOperations(); int CanPack(const SchedulerOperation *schedOp, const SchedulerOperation *prevOp, const SchedulerOperation *op, const int prevOpKey) const; @@ -67,6 +74,7 @@ private: std::vector> DecomposeSchedulerOperation(std::unique_ptr op); ArchResampling ResamplingMode(TensorUsage usage, OpType opType) const; ArchitectureOpGroupQuery CreateOpGroupQuery(const SchedulerOperation *schedOp) const; + ArchOperatorQuery CreateOperatorQuery(const SchedulerOperation *schedOp) const; }; } // namespace regor