diff --git a/ethosu/regor/common/scaling.cpp b/ethosu/regor/common/scaling.cpp index fd2c3364f9ee78f8d10bffdd3e93ddb2efecc8a2..1032a10267d16ad86a3d69516122bea6e229b68c 100644 --- a/ethosu/regor/common/scaling.cpp +++ b/ethosu/regor/common/scaling.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -44,10 +44,20 @@ QuantizedScale::QuantizedScale(double scale_, bool reduced) if ( reduced ) scale = ClampToType(scale); shift = leftShift - exponent; // if shift is out of bounds [0,63], try to get back within bounds - if ( shift > 63 && scale > std::exp2(shift - 63) ) + if ( shift > 63 ) { - scale = scale >> (shift - 63); - shift = 63; + if ( scale > std::exp2(shift - 63) ) + { + scale = scale >> (shift - 63); + shift = 63; + } + else + { + // Not possible to get back within bounds, set scale and shift to 0 + // as the shift would shift away all relevant bits anyway. + scale = 0; + shift = 0; + } } else if ( shift < 0 && scale < std::exp2(shift + 32) ) { diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 3918e8475bd7a29cf0a31004df0cd81675131f55..1834638dfa30bbb805b1b535ea5e822af8ed7996 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -593,7 +593,7 @@ def convert_argmax_to_depthwise_conv_and_max_pool(op: Operation, arch, nng) -> O # To extract 7 least significant bits and swap reverse index back to real index using a LUT activation, we set # the base value to c-1 and slope to -128. The 16-bit LUT uses a table of 32-bit values where the top 16 bits # represent the slope and bottom 16 bits the base which are used to interpolate the activation value. - slope = (-128 & 0xFFFF) << 16 # Top 16 bits of 32 bit LUT table value + slope = np.uint32((-128 & 0xFFFF) << 16) # Top 16 bits of 32 bit LUT table value base = c - 1 # Bottom 16 bits of the LUT table value lut_tensor = create_const_tensor( "maxpool_LUT_extract_7_LSB", @@ -2535,7 +2535,7 @@ def convert_mean_to_depthwise_conv(op, arch, nng): shift = round_down_log2(num_elements_in_axis) shift = min(shift, 32) shift = min(shift, 31 + output_shift) - output_multiplier = (output_multiplier << shift) // num_elements_in_axis + output_multiplier = np.int32((np.int64(output_multiplier) << shift) // num_elements_in_axis) output_shift = output_shift - shift # Convert to vela representation shift