From a7c609044e7966f00266d8da641ac0aef4f17e5d Mon Sep 17 00:00:00 2001 From: Alexander Bengtsson Date: Tue, 3 Dec 2024 16:33:24 +0100 Subject: [PATCH] MLBEDSW-9431: Convert unsupported Rescales to elementwise Mul - Handle Rescale fallback by converting to Elementwise Mul - Adds support for int32 input to Rescales for Ethos-U55 and Ethos-U65 Change-Id: I0043f0b015f5dd52ddb8d4112e2c2ddf67a25dd7 Signed-off-by: Alexander Bengtsson --- .../architecture/architecture_constraints.hpp | 1 + .../ethosu55/ethos_u55_constraints.cpp | 6 + .../ethosu55/ethos_u55_constraints.hpp | 1 + .../ethosu85/ethos_u85_constraints.cpp | 7 + .../ethosu85/ethos_u85_constraints.hpp | 1 + ethosu/regor/compiler/graphir_optimiser.cpp | 121 +++++++++++++++++- ethosu/regor/compiler/graphir_optimiser.hpp | 4 +- 7 files changed, 139 insertions(+), 2 deletions(-) diff --git a/ethosu/regor/architecture/architecture_constraints.hpp b/ethosu/regor/architecture/architecture_constraints.hpp index f2bde6a0..70931bcf 100644 --- a/ethosu/regor/architecture/architecture_constraints.hpp +++ b/ethosu/regor/architecture/architecture_constraints.hpp @@ -95,6 +95,7 @@ public: virtual bool SupportsReverse(OpType opType, ReverseType reverseTypeMask) = 0; virtual bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType fromType, DataType toType, const Quantization &quantization) = 0; + virtual bool SupportsRescale(DataType fromType, DataType toType) = 0; virtual bool SupportsTranspose(OpType opType, TransposeType transposeType) = 0; virtual bool SupportsAccumulatorSaveRestore() = 0; diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp index 29bf2b5b..7bdea0d1 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp @@ -113,6 +113,12 @@ bool EthosU55Constraints::SupportsFusedRescale( return false; } +bool EthosU55Constraints::SupportsRescale(DataType fromType, DataType toType) +{ + UNUSED(toType); + return DataTypeSizeBits(fromType) <= 16; +} + bool EthosU55Constraints::SupportsGather(OpType opType) { UNUSED(opType); diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp index 23e3f59c..b12223c7 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.hpp @@ -32,6 +32,7 @@ public: bool SupportsTranspose(OpType opType, TransposeType transposeType) override; bool SupportsReverse(OpType opType, ReverseType reverseTypeMask) override; bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType fromType, DataType toType, const Quantization &quantization) override; + bool SupportsRescale(DataType fromType, DataType toType) override; bool SupportsAccumulatorSaveRestore() override { return false; } bool SupportsGather(OpType opType) override; bool SupportsScatter(OpType opType) override; diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp index 11b58b3c..339b3cd4 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp @@ -146,6 +146,13 @@ bool EthosU85Constraints::SupportsFusedRescale( return false; } +bool EthosU85Constraints::SupportsRescale(DataType fromType, DataType toType) +{ + UNUSED(fromType); + UNUSED(toType); + return true; +} + bool EthosU85Constraints::SupportsGather(OpType opType) { EthosU85NpuOp npuOp = ArchEthosU85::GetHWOp(opType); diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp index ae3f2593..b06795e5 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.hpp @@ -32,6 +32,7 @@ public: bool SupportsTranspose(OpType opType, TransposeType transposeType) override; bool SupportsReverse(OpType opType, ReverseType reverseTypeMask) override; bool SupportsFusedRescale(OpType opType, TensorUsage tensorUsage, DataType fromType, DataType toType, const Quantization &quantization) override; + bool SupportsRescale(DataType fromType, DataType toType) override; bool SupportsAccumulatorSaveRestore() override { return true; } bool SupportsGather(OpType opType) override; bool SupportsScatter(OpType opType) override; diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp index 2d5e84bb..c96c4858 100644 --- a/ethosu/regor/compiler/graphir_optimiser.cpp +++ b/ethosu/regor/compiler/graphir_optimiser.cpp @@ -490,7 +490,7 @@ Operation *GraphIrOptimiser::RewriteFullyConnected(Graph *const graph, Operation return returnOp; } -Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const operation) +Operation *GraphIrOptimiser::RewriteRescaleInputs(Graph *const, Operation *const operation) { Operation *returnOp = operation; OpType opType = operation->Type(); @@ -530,6 +530,125 @@ Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const opera return returnOp; } +/* + * Lower 32-bit Rescale into one (or more) elementwise MUL operations. + * Multipliers are moved to a constant-tensor, while the shift value is kept as ofm-quantization + * + * IFM (32-bit) IFM (32-bit) Multipliers (32-bit) + * | \ / + * Rescale ---> MUL + * | | + * OFM (any format) OFM (any format) + * + * Global-scaling (one global multiplier): + * Converted into one MUL operation + * + * Per-Channel scaling (one multiplier per channel): + * The algorithm will attempt to adjust scales to a common shift representation + * to pack consecutive channels into the same MUL operation. + * This can be done as long as the adjustment can be made without precision-loss. + * Worst-case, per-channel scaling is handled with one MUL-operation per channel + */ +Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const operation) +{ + Operation *returnOp = operation; + OpType opType = operation->Type(); + if ( opType == OpType::Rescale ) + { + const auto &ifmConn = operation->Input(TensorUsage::IFM0); + const auto &ofmConn = operation->Output(TensorUsage::OFM); + const Quantization &quant = ofmConn->quantization; + DataType ifmType = ifmConn->tensor->Type(); + DataType ofmType = ofmConn->tensor->Type(); + const auto attr = operation->Attribute(); + if ( attr->input_unsigned ) + { + ifmType = ifmType & ~unsigned(DataType::Signed); + } + if ( attr->output_unsigned ) + { + ofmType = ofmType & ~unsigned(DataType::Signed); + } + if ( ifmType == DataType::Int32 && !_constraints->SupportsRescale(ifmType, ofmType) ) + { + auto CreateRescalingMul = [ifmConn, ofmConn](int startChannel, int endChannel, std::vector &scales, int shift) + { + Shape sliceOffset = ifmConn->shape.WithZeros().WithDepth(startChannel); + Shape sliceShape = ifmConn->shape.WithDepth(endChannel - startChannel); + TensorSlice slice{sliceOffset, sliceShape}; + + auto mulOp = std::make_shared(OpType::Mul); + auto buf = std::make_shared(scales.size(), scales.data()); + auto scaleTensor = CreateConstTensor(fmt::format("multipliers_{}_{}", startChannel, endChannel - 1), DataType::Int32, buf); + + Quantization scaleQuant = Quantization::Unit(); + scaleQuant.type = QuantizationType::EXPLICIT; + + Quantization ifmQuant = ifmConn->quantization; + ifmQuant.scales.clear(); + ifmQuant.scales.push_back({1, 0}); + ifmQuant.type = QuantizationType::EXPLICIT; + + Quantization ofmQuant = ofmConn->quantization; + ofmQuant.scales.clear(); + ofmQuant.scales.push_back({1, shift}); + ofmQuant.type = QuantizationType::EXPLICIT; + + mulOp->ConnectInput(TensorUsage::IFM1, scaleTensor); + mulOp->CopyInput(TensorUsage::IFM0, *ifmConn); + mulOp->CopyOutput(TensorUsage::OFM, *ofmConn); + + mulOp->Input(TensorUsage::IFM1)->Set(scaleQuant); + mulOp->Input(TensorUsage::IFM0)->Set(ifmQuant).Set(slice); + mulOp->Output(TensorUsage::OFM)->Set(ofmQuant).Set(slice); + return mulOp; + }; + + // Use the first channels shift-value as reference shift + // try to adjust multipliers to pack as many consecutive channels in the same mul-operation + int shift = quant.scales[0].shift; + std::vector scales; + int startChannel = 0; + for ( auto qscale : quant.scales ) + { + int shiftDiff = qscale.shift - shift; + // try to right-shift scale without precision-loss + // This can be done if the scale is evenly divisible by 2^shift + if ( (shiftDiff >= 0) && (qscale.scale % (1UL << shiftDiff) == 0) ) + { + scales.push_back(qscale.scale >> shiftDiff); + } + else + { + // Could not adjust the scale without precision loss. + // Create elementwise mul operation to handle all the previous scales + int endChannel = startChannel + scales.size(); + auto mulOp = CreateRescalingMul(startChannel, endChannel, scales, shift); + mulOp->SetRounding(operation->Rounding()); + RecordOptimisation(operation, mulOp.get()); + + // reset scales and startChannel + startChannel = endChannel; + scales.clear(); + scales.push_back(qscale.scale); + + // update target shift to the current shift-value + shift = qscale.shift; + } + } + + // Emit the final mul operation (or the only one for global scaling) + int endChannel = ifmConn->shape.Depth(); + auto mulOp = CreateRescalingMul(startChannel, endChannel, scales, shift); + mulOp->SetRounding(operation->Rounding()); + RecordOptimisation(operation, mulOp.get()); + returnOp = mulOp.get(); + operation->Disconnect(); + } + } + return returnOp; +} + // Rewrite TOSA PAD to number of MemoryCopy ops Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation) { diff --git a/ethosu/regor/compiler/graphir_optimiser.hpp b/ethosu/regor/compiler/graphir_optimiser.hpp index 3ff0b410..cd3720ff 100644 --- a/ethosu/regor/compiler/graphir_optimiser.hpp +++ b/ethosu/regor/compiler/graphir_optimiser.hpp @@ -49,6 +49,7 @@ private: Tensor *ConvertInt4Tensors(Graph *graph, Tensor *tensor); Operation *RewriteFullyConnected(Graph *const graph, Operation *const operation); Operation *FixupPoolStrides(Graph *const, Operation *const operation); + Operation *RewriteRescaleInputs(Graph *const graph, Operation *const operation); Operation *RewriteRescale(Graph *const graph, Operation *const operation); Operation *RewritePad(Graph *const graph, Operation *const operation); Operation *FuseRescale(Graph *const graph, Operation *const operation); @@ -110,7 +111,7 @@ private: {}, { &GraphIrOptimiser::ConvertAttributes, - &GraphIrOptimiser::RewriteRescale, + &GraphIrOptimiser::RewriteRescaleInputs, &GraphIrOptimiser::FuseRescale, // First pass fuse all possible ifm and ofm rescales } }, @@ -124,6 +125,7 @@ private: {}, { &GraphIrOptimiser::ConvertAttributes, + &GraphIrOptimiser::RewriteRescale, &GraphIrOptimiser::ConvertResizeOffsets, &GraphIrOptimiser::RewriteFullyConnected, &GraphIrOptimiser::FixupPoolStrides, -- GitLab