diff --git a/ethosu/regor/common/scaling.cpp b/ethosu/regor/common/scaling.cpp
index fd2c3364f9ee78f8d10bffdd3e93ddb2efecc8a2..1032a10267d16ad86a3d69516122bea6e229b68c 100644
--- a/ethosu/regor/common/scaling.cpp
+++ b/ethosu/regor/common/scaling.cpp
@@ -1,5 +1,5 @@
 //
-// SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+// SPDX-FileCopyrightText: Copyright 2021-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -44,10 +44,20 @@ QuantizedScale::QuantizedScale(double scale_, bool reduced)
     if ( reduced ) scale = ClampToType<int16_t>(scale);
     shift = leftShift - exponent;
     // if shift is out of bounds [0,63], try to get back within bounds
-    if ( shift > 63 && scale > std::exp2(shift - 63) )
+    if ( shift > 63 )
     {
-        scale = scale >> (shift - 63);
-        shift = 63;
+        if ( scale > std::exp2(shift - 63) )
+        {
+            scale = scale >> (shift - 63);
+            shift = 63;
+        }
+        else
+        {
+            // Not possible to get back within bounds, set scale and shift to 0
+            // as the shift would shift away all relevant bits anyway.
+            scale = 0;
+            shift = 0;
+        }
     }
     else if ( shift < 0 && scale < std::exp2(shift + 32) )
     {
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 3918e8475bd7a29cf0a31004df0cd81675131f55..1834638dfa30bbb805b1b535ea5e822af8ed7996 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -593,7 +593,7 @@ def convert_argmax_to_depthwise_conv_and_max_pool(op: Operation, arch, nng) -> O
         # To extract 7 least significant bits and swap reverse index back to real index using a LUT activation, we set
         # the base value to c-1 and slope to -128. The 16-bit LUT uses a table of 32-bit values where the top 16 bits
         # represent the slope and bottom 16 bits the base which are used to interpolate the activation value.
-        slope = (-128 & 0xFFFF) << 16  # Top 16 bits of 32 bit LUT table value
+        slope = np.uint32((-128 & 0xFFFF) << 16)  # Top 16 bits of 32 bit LUT table value
         base = c - 1  # Bottom 16 bits of the LUT table value
         lut_tensor = create_const_tensor(
             "maxpool_LUT_extract_7_LSB",
@@ -2535,7 +2535,7 @@ def convert_mean_to_depthwise_conv(op, arch, nng):
         shift = round_down_log2(num_elements_in_axis)
         shift = min(shift, 32)
         shift = min(shift, 31 + output_shift)
-        output_multiplier = (output_multiplier << shift) // num_elements_in_axis
+        output_multiplier = np.int32((np.int64(output_multiplier) << shift) // num_elements_in_axis)
         output_shift = output_shift - shift
 
         # Convert to vela representation shift