diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp index 49c664855409b32ea13d755d49648534b775566e..e44f5a766f8336e9833136b3883943bd43946ae0 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -124,8 +124,15 @@ bool EthosU55Constraints::SupportsFusedRescale( bool EthosU55Constraints::SupportsRescale(DataType fromType, DataType toType) { - UNUSED(toType); - return DataTypeSizeBits(fromType) <= 16; + if ( DataTypeSizeBits(toType) > 16 ) + { + return false; + } + if ( DataTypeSizeBits(fromType) > 16 ) + { + return false; + } + return true; } bool EthosU55Constraints::SupportsGather(OpType opType) diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp index 840d7dd34f9dc3c243c3b47e1a1fa2b0839e28e8..7d49aa4942dde54ad5e88432ad098ad8a1918bbe 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp @@ -433,6 +433,10 @@ bool EthosU55RCSGenerator::IsScalar(const HLCFeatureMap &fm, int32_t &scalarValu { scalarValue = view.Values()[0]; } + else if ( fm.dataType == DataType::UInt16 ) + { + scalarValue = view.Values()[0]; + } else { // Unsupported scalar value isScalar = false; diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp index 888942d750b9e26f8276cac7bccbce12d7090c25..c64550aa71fbe7f36e579c0ebe4019b0c87f027f 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_constraints.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -156,9 +156,8 @@ bool EthosU85Constraints::SupportsFusedRescale( bool EthosU85Constraints::SupportsRescale(DataType fromType, DataType toType) { - UNUSED(fromType); UNUSED(toType); - return true; + return fromType != DataType::UInt16; } bool EthosU85Constraints::SupportsGather(OpType opType) diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp index 2fc0e2201cbeddb9d2b9f76f4ed2c3b953279534..8aa3501f875880dcbd8c1c0aab16dcf62aeccaf3 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp @@ -600,6 +600,10 @@ bool EthosU85RCSGenerator::IsScalar(const HLCFeatureMap &fm, int32_t &scalarValu { scalarValue = view.Values()[0]; } + else if ( fm.dataType == DataType::UInt16 ) + { + scalarValue = view.Values()[0]; + } else if ( fm.dataType == DataType::Int32 ) { scalarValue = view.Values()[0]; diff --git a/ethosu/regor/compiler/attributes.cpp b/ethosu/regor/compiler/attributes.cpp index 7ceda1a78f7b090c5569fd02807659465dac763d..659745ab1f427e0eb47fd719d31175c3ff799ccc 100644 --- a/ethosu/regor/compiler/attributes.cpp +++ b/ethosu/regor/compiler/attributes.cpp @@ -46,6 +46,7 @@ DynamicRef CreateAttribute(uint32_t reducedHash) CASE_MAKE_ATTR_INSTANCE(pad_attr_t); CASE_MAKE_ATTR_INSTANCE(pooling_attr_t); CASE_MAKE_ATTR_INSTANCE(rescale_attr_t); + CASE_MAKE_ATTR_INSTANCE(sign_attr_t); CASE_MAKE_ATTR_INSTANCE(resize_attr_t); CASE_MAKE_ATTR_INSTANCE(slice_attr_t); CASE_MAKE_ATTR_INSTANCE(softmax_attr_t); diff --git a/ethosu/regor/compiler/attributes.hpp b/ethosu/regor/compiler/attributes.hpp index 5feb982522f9c67f26c2d8541d32af6acc438eeb..4ce8260e4ee19d7ea171c23407346fea5b742230 100644 --- a/ethosu/regor/compiler/attributes.hpp +++ b/ethosu/regor/compiler/attributes.hpp @@ -112,19 +112,25 @@ struct clamp_attr_t END_FIELD_TABLE() }; +struct sign_attr_t +{ + bool input_unsigned; + bool output_unsigned; + BEGIN_FIELD_TABLE(sign_attr_t) + ATTR_FIELD(input_unsigned, 0) + ATTR_FIELD(output_unsigned, 1) + END_FIELD_TABLE() +}; + struct rescale_attr_t { bool scale32; bool double_round; bool per_channel; - bool input_unsigned; - bool output_unsigned; BEGIN_FIELD_TABLE(rescale_attr_t) ATTR_FIELD(scale32, 0) ATTR_FIELD(double_round, 1) ATTR_FIELD(per_channel, 2) - ATTR_FIELD(input_unsigned, 3) - ATTR_FIELD(output_unsigned, 4) END_FIELD_TABLE() }; diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp index aafa001773b36527371801b187eb27a2844f58c7..2b44bd02ea2aeea00a27f7c9a68d64855f66c8ff 100644 --- a/ethosu/regor/compiler/graphir_optimiser.cpp +++ b/ethosu/regor/compiler/graphir_optimiser.cpp @@ -526,14 +526,16 @@ Operation *GraphIrOptimiser::RewriteRescaleInputs(Graph *const, Operation *const } /* - * Lower 32-bit Rescale into one (or more) elementwise MUL operations. - * Multipliers are moved to a constant-tensor, while the shift value is kept as ofm-quantization + * Lower Rescale into one (or more) 32-bit elementwise MUL operations. + * Multipliers are moved to a constant-tensor, while the shift value is keps as ofm-quantization * - * IFM (32-bit) IFM (32-bit) Multipliers (32-bit) + * Cast to 32-bit (if necessary) + * | + * IFM IFM (32-bit) Multipliers (32-bit) * | \ / * Rescale ---> MUL * | | - * OFM (any format) OFM (any format) + * OFM OFM * * Global-scaling (one global multiplier): * Converted into one MUL operation @@ -555,15 +557,41 @@ Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const opera const Quantization &quant = ofmConn->quantization; DataType ifmType = ifmConn->tensor->Type(); DataType ofmType = ofmConn->tensor->Type(); - const auto attr = operation->Attribute(); - if ( attr->input_unsigned ) + const auto rescaleAttr = operation->Attribute(); + auto signAttr = operation->Attribute(); + if ( signAttr->input_unsigned ) { ifmType = ifmType & ~unsigned(DataType::Signed); } - if ( attr->output_unsigned ) + if ( signAttr->output_unsigned ) { ofmType = ofmType & ~unsigned(DataType::Signed); } + if ( ifmType != DataType::Int32 && !_constraints->SupportsRescale(ifmType, ofmType) ) + { + // create cast op to convert to 32-bit ifm + if ( ifmConn->tensor->Type() != DataType::Int32 ) + { + auto castOp = std::make_shared(OpType::Cast); + std::shared_ptr ifm32Tens = ifmConn->tensor->Clone(); + + castOp->ConnectInput(TensorUsage::IFM, ifmConn->tensor).quantization.zeroPoints = ifmConn->quantization.zeroPoints; + ifmConn->quantization.zeroPoints.clear(); + ifmConn->quantization.zeroPoints.push_back(0); + + castOp->ConnectOutput(TensorUsage::OFM, ifm32Tens); + auto castAttr = castOp->Attribute(); + + // move input_unsigned to cast input + castAttr->input_unsigned = signAttr->input_unsigned; + signAttr->input_unsigned = false; + + ifm32Tens->ChangeType(DataType::Int32); + RecordOptimisation(operation, castOp.get()); + operation->ConnectInput(TensorUsage::IFM, ifm32Tens); + ifmType = DataType::Int32; + } + } if ( ifmType == DataType::Int32 && !_constraints->SupportsRescale(ifmType, ofmType) ) { auto CreateRescalingMul = [ifmConn, ofmConn](int startChannel, int endChannel, std::vector &scales, int shift) @@ -620,6 +648,8 @@ Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const opera // Create elementwise mul operation to handle all the previous scales int endChannel = startChannel + scales.size(); auto mulOp = CreateRescalingMul(startChannel, endChannel, scales, shift); + auto mulAttr = mulOp->Attribute(); + mulAttr->output_unsigned = signAttr->output_unsigned; RecordOptimisation(operation, mulOp.get()); // reset scales and startChannel @@ -635,6 +665,8 @@ Operation *GraphIrOptimiser::RewriteRescale(Graph *const, Operation *const opera // Emit the final mul operation (or the only one for global scaling) int endChannel = ifmConn->shape.Depth(); auto mulOp = CreateRescalingMul(startChannel, endChannel, scales, shift); + auto mulAttr = mulOp->Attribute(); + mulAttr->output_unsigned = signAttr->output_unsigned; RecordOptimisation(operation, mulOp.get()); returnOp = mulOp.get(); operation->Disconnect(); @@ -817,7 +849,8 @@ Operation *GraphIrOptimiser::FuseRescale(Graph *const graph, Operation *const op if ( opType == OpType::Rescale ) { auto *attr = operation->Attribute(); - if ( attr && (attr->input_unsigned || attr->output_unsigned) ) + auto *signAttr = operation->Attribute(); + if ( signAttr && (signAttr->input_unsigned || signAttr->output_unsigned) ) { // These type of rescales needs special handling and cannot be fused return returnOp; @@ -1047,6 +1080,14 @@ Operation *GraphIrOptimiser::RewriteCast(Graph *const, Operation *const operatio copyOpConn->quantization.quantMin = {std::numeric_limits::min()}; copyOpConn->quantization.quantMax = {std::numeric_limits::max()}; } + + // Copy sign attribute to new operation + if ( operation->HasAttribute() ) + { + auto signAttr = operation->Attribute(); + auto newAttr = returnOp->Attribute(); + *newAttr = *signAttr; + } } return returnOp; } diff --git a/ethosu/regor/compiler/high_level_command_stream_generator.cpp b/ethosu/regor/compiler/high_level_command_stream_generator.cpp index 904f6a7ecd53e51bff98fb581d80530af847f332..b760a42007ae0303c1d519f014948a8148710193 100644 --- a/ethosu/regor/compiler/high_level_command_stream_generator.cpp +++ b/ethosu/regor/compiler/high_level_command_stream_generator.cpp @@ -437,21 +437,25 @@ static std::shared_ptr MakeOperation(SchedulerOperation *schedOp, op->parameters.tile.multiplier = multiples[axis]; } break; - case OpType::Rescale: + default: + break; + } + + if ( schedOp->HasAttribute() ) + { + const auto *attr = schedOp->Attribute(); + if ( attr->input_unsigned ) { - const auto *rescale = schedOp->Attribute(); - if ( rescale->input_unsigned ) - { - op->ifm[0].dataType = op->ifm[0].dataType & ~unsigned(DataType::Signed); - } - if ( rescale->output_unsigned ) + op->ifm[0].dataType = op->ifm[0].dataType & ~unsigned(DataType::Signed); + if ( op->ifm.size() > 1 ) { - op->ofm.dataType = op->ofm.dataType & ~unsigned(DataType::Signed); + op->ifm[1].dataType = op->ifm[1].dataType & ~unsigned(DataType::Signed); } } - break; - default: - break; + if ( attr->output_unsigned ) + { + op->ofm.dataType = op->ofm.dataType & ~unsigned(DataType::Signed); + } } return op; } diff --git a/ethosu/regor/include/graphapi_attr.hpp b/ethosu/regor/include/graphapi_attr.hpp index 1a1cb300df2992215ca42f3c14e3e73ef54db8ed..441604e889a9f1fc39e3d436a4edeeaeba953767 100644 --- a/ethosu/regor/include/graphapi_attr.hpp +++ b/ethosu/regor/include/graphapi_attr.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2022-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -84,8 +84,9 @@ enum class OpAttr : uint32_t RESCALE_SCALE32 = GRAPHAPI_MAKE_ATTR(rescale, bool, 0), RESCALE_DOUBLE_ROUND = GRAPHAPI_MAKE_ATTR(rescale, bool, 1), RESCALE_PER_CHANNEL = GRAPHAPI_MAKE_ATTR(rescale, bool, 2), - RESCALE_INPUT_UNSIGNED = GRAPHAPI_MAKE_ATTR(rescale, bool, 3), - RESCALE_OUTPUT_UNSIGNED = GRAPHAPI_MAKE_ATTR(rescale, bool, 4), + // Sign + RESCALE_INPUT_UNSIGNED = GRAPHAPI_MAKE_ATTR(sign, bool, 0), + RESCALE_OUTPUT_UNSIGNED = GRAPHAPI_MAKE_ATTR(sign, bool, 1), // Mul MUL_SHIFT = GRAPHAPI_MAKE_ATTR(mul, int32, 0), // Asr diff --git a/ethosu/regor/tosa/tosa_argument_checks.cpp b/ethosu/regor/tosa/tosa_argument_checks.cpp index a442418902bee9eff25e37b245a77cfe3477deea..b56af12951630d4f486792f7192bf130aefe01af 100644 --- a/ethosu/regor/tosa/tosa_argument_checks.cpp +++ b/ethosu/regor/tosa/tosa_argument_checks.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2023-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -256,7 +256,7 @@ bool ResolveAndValidateArgument(const regor::Operation *op, const Argument *argu if ( !expectedType ) return false; if ( op->Type() == OpType::Rescale ) { - auto *attr = op->Attribute(); + auto *attr = op->Attribute(); if ( ((argument->category == Category::Input && argument->name == "input" && attr->input_unsigned) || (argument->category == Category::Output && argument->name == "output" && attr->output_unsigned)) ) { diff --git a/ethosu/regor/tosa/tosa_error_checks.cpp b/ethosu/regor/tosa/tosa_error_checks.cpp index 6cab044ff9c4fb76259891749cc7dcf235e56eab..3ee9a4b666da6aed07f9f703d68cecb87aabdf23 100644 --- a/ethosu/regor/tosa/tosa_error_checks.cpp +++ b/ethosu/regor/tosa/tosa_error_checks.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2023-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -1720,7 +1720,7 @@ void ErrorIfCheck_1wbutqm1lq6qy(const regor::Operation *op, [[maybe_unused]] con auto bits = DataTypeSizeBits(op->IFM(0)->Type()); auto &zp = op->Input(TensorUsage::IFM)->quantization.zeroPoints; auto input_zp = zp.empty() ? 0 : zp[0]; - auto *attr = op->Attribute(); + auto *attr = op->Attribute(); if ( bits != 8 && (bits != 16 || !attr->input_unsigned) && input_zp != 0 ) throw std::invalid_argument(constraint); } @@ -1731,7 +1731,7 @@ void ErrorIfCheck_2x883ovw61v55(const regor::Operation *op, [[maybe_unused]] con auto bits = DataTypeSizeBits(op->OFM()->Type()); auto &zp = op->Output(TensorUsage::OFM)->quantization.zeroPoints; auto output_zp = zp.empty() ? 0 : zp[0]; - auto *attr = op->Attribute(); + auto *attr = op->Attribute(); if ( bits != 8 && (bits != 16 || !attr->output_unsigned) && output_zp != 0 ) throw std::invalid_argument(constraint); } @@ -1743,7 +1743,7 @@ void ErrorIfCheck_7yfu5xo1ii36(const regor::Operation *op, [[maybe_unused]] cons auto bits = DataTypeSizeBits(op->IFM(0)->Type()); auto &zp = op->Input(TensorUsage::IFM)->quantization.zeroPoints; auto input_zp = zp.empty() ? 0 : zp[0]; - auto *attr = op->Attribute(); + auto *attr = op->Attribute(); if ( bits == 16 && attr->input_unsigned && input_zp != 0 && input_zp != 32768 ) throw std::invalid_argument(constraint); } @@ -1755,7 +1755,7 @@ void ErrorIfCheck_3kc0n1wjhehqz(const regor::Operation *op, [[maybe_unused]] con auto bits = DataTypeSizeBits(op->OFM()->Type()); auto &zp = op->Output(TensorUsage::OFM)->quantization.zeroPoints; auto output_zp = zp.empty() ? 0 : zp[0]; - auto *attr = op->Attribute(); + auto *attr = op->Attribute(); if ( bits == 16 && attr->output_unsigned && output_zp != 0 && output_zp != 32768 ) throw std::invalid_argument(constraint); } @@ -1775,7 +1775,7 @@ void ErrorIfCheck_23cyq2l8quj8p(const regor::Operation *op, [[maybe_unused]] con static constexpr char constraint[] = "ERROR_IF(in_t == i16_t && out_t == i32_t && input_unsigned)"; auto in_t = op->IFM(0)->Type(); auto out_t = op->OFM()->Type(); - auto *attr = op->Attribute(); + auto *attr = op->Attribute(); if ( DataTypeSizeBits(in_t) == 16 && DataTypeSizeBits(out_t) == 32 && attr->input_unsigned ) throw std::invalid_argument(constraint); } @@ -1786,7 +1786,7 @@ void ErrorIfCheck_13bcaagzywlqq(const regor::Operation *op, [[maybe_unused]] con static constexpr char constraint[] = "ERROR_IF(in_t == i32_t && out_t == i16_t && output_unsigned)"; auto in_t = op->IFM(0)->Type(); auto out_t = op->OFM()->Type(); - auto *attr = op->Attribute(); + auto *attr = op->Attribute(); if ( DataTypeSizeBits(in_t) == 32 && DataTypeSizeBits(out_t) == 16 && attr->output_unsigned ) throw std::invalid_argument(constraint); }