diff --git a/ethosu/regor/architecture/ethos_u_scaling.cpp b/ethosu/regor/architecture/ethos_u_scaling.cpp
index 9ef4bdd1c18887441838a3a482b2d0aa6d046688..fad664e95caa7efb55e128a36cc433d52df4ff1a 100644
--- a/ethosu/regor/architecture/ethos_u_scaling.cpp
+++ b/ethosu/regor/architecture/ethos_u_scaling.cpp
@@ -82,7 +82,6 @@ Quantization RescalePerChannel(const Quantization &ifmQuant, const Quantization
     quantResult.quantMin = ofmQuant.quantMin;
     quantResult.quantMax = ofmQuant.quantMax;
     quantResult.dimension = ofmQuant.dimension;
-    quantResult.forceZeroPoint = ofmQuant.forceZeroPoint;
 
     if ( !ifmQuant.scales.empty() && !ofmQuant.scales.empty() && !weightQuant.scales.empty() )
     {
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp
index 75db63dd117b77da45ac3b60c6b9eeb3d399b200..dbb4b37a3cae952235410f16582bd33dcb9f3bd9 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp
@@ -427,19 +427,6 @@ uint32_t EthosU55RCSGenerator::ToRegion(const MemArea &memArea)
     return uint32_t(region);
 }
 
-bool EthosU55RCSGenerator::UseZeroPoint0(OpType opType, const HLCFeatureMap &fm, bool isOFM)
-{
-    if ( fm.quantization.forceZeroPoint )
-    {
-        return false;
-    }
-    if ( fm.quantization.zeroPoints.empty() || (fm.dataType == DataType::Int32 && !isOFM) )
-    {
-        return true;
-    }
-    return opType == OpType::AvgPool || opType == OpType::ResizeBilinear || opType == OpType::CLZ || opType == OpType::SHL;
-}
-
 // Checks if the feature map is a scalar, and if so, returns the
 // quantized value in scalarValue.
 bool EthosU55RCSGenerator::IsScalar(const HLCFeatureMap &fm, int32_t &scalarValue)
@@ -969,7 +956,7 @@ void EthosU55RCSGenerator::GenerateIFM(OpType opType, const HLCFeatureMap &fm, c
     Emit(isa::npu_set_ifm_stride_c_t(strides.Depth()));
     // IFM_ZERO_POINT register
     auto &quant = fm.quantization;
-    uint32_t zp = UseZeroPoint0(opType, fm, false) ? 0 : uint32_t(quant.zeroPoints[0]);
+    uint32_t zp = quant.zeroPoints.empty() ? 0 : uint32_t(quant.zeroPoints[0]);
     Emit(isa::npu_set_ifm_zero_point_t(zp));
 }
 
@@ -1002,7 +989,7 @@ void EthosU55RCSGenerator::GenerateIFM2(OpType opType, const HLCFeatureMap &fm,
     }
     // IFM2_ZERO_POINT register
     auto &quant = fm.quantization;
-    uint32_t zp = UseZeroPoint0(opType, fm, false) ? 0 : uint32_t(quant.zeroPoints[0]);
+    uint32_t zp = quant.zeroPoints.empty() ? 0 : uint32_t(quant.zeroPoints[0]);
     Emit(isa::npu_set_ifm2_zero_point_t(zp));
 }
 
@@ -1033,7 +1020,7 @@ void EthosU55RCSGenerator::GenerateOFM(OpType opType, const HLCFeatureMap &fm, c
     Emit(isa::npu_set_ofm_stride_c_t(strides.Depth()));
     // OFM_ZERO_POINT register
     auto &quant = fm.quantization;
-    uint32_t zp = UseZeroPoint0(opType, fm, true) ? 0 : uint32_t(quant.zeroPoints[0]);
+    uint32_t zp = quant.zeroPoints.empty() ? 0 : uint32_t(quant.zeroPoints[0]);
     Emit(isa::npu_set_ofm_zero_point_t(zp));
 }
 
diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp
index fb271ccffac0bd14421cd76f74d6d6582aee0742..ec3c6d4c3415ef23bb923ef585feff2b0db9f6a6 100644
--- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp
+++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.hpp
@@ -151,7 +151,6 @@ protected:
     MemoryAccess ToMemoryAccess(const HLCFeatureMap &fm, const Box &area, AccessDirection direction);
     // Returns region number used in NPU_SET_..._REGION
     uint32_t ToRegion(const MemArea &memArea);
-    static bool UseZeroPoint0(OpType opType, const HLCFeatureMap &fm, bool isOFM);
     // Checks if the feature map is a scalar, and if so, returns the
     // quantized value in scalarValue.
     static bool IsScalar(const HLCFeatureMap &fm, int32_t &scalarValue);
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp
index 1c6b6b1d0b1e288a9db52bd68ccf0acca03a85fd..0bc746bea5a0dfbd0230097f4a4fbdd02daac0a4 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp
@@ -1375,13 +1375,15 @@ bool EthosU85OpGroup::CanStartChain(const ArchitectureOpGroupQuery &op)
 {
     OpType opType = op.type;
     EthosU85NpuOp npuOp = ArchEthosU85::GetHWOp(opType);
+    if ( npuOp == EthosU85NpuOp::None || npuOp == EthosU85NpuOp::Resize || npuOp == EthosU85NpuOp::Dma )
+    {
+        return false;
+    }
     if ( npuOp == EthosU85NpuOp::Pooling && _arch->UseNullPool(opType, DataTypeSizeBits(op.ifm[0].type)) )
     {
         return false;
     }
-    return (
-        npuOp == EthosU85NpuOp::Convolution || npuOp == EthosU85NpuOp::Depthwise ||
-        npuOp == EthosU85NpuOp::Elementwise || npuOp == EthosU85NpuOp::Pooling || npuOp == EthosU85NpuOp::VectorProduct);
+    return true;
 }
 
 int EthosU85OpGroup::ExternalIfms(const ArchitectureOpGroupQuery &op)
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp
index 6e653dde884aebadebfc6d333ad7518a98e40dac..b4fcea89116dcbdac2119d7b379c211f9314381b 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.cpp
@@ -562,24 +562,6 @@ uint32_t EthosU85RCSGenerator::ToRegion(const MemArea &memArea)
     return uint32_t(region);
 }
 
-bool EthosU85RCSGenerator::UseZeroPoint0(OpType opType, const HLCFeatureMap &fm, bool isOFM)
-{
-    if ( fm.quantization.forceZeroPoint )
-    {
-        return false;
-    }
-    if ( fm.quantization.zeroPoints.empty() || (DataTypeSizeBits(fm.dataType) >= 32 && !isOFM) )
-    {
-        return true;
-    }
-    if ( opType == OpType::ArgMax && isOFM )
-    {
-        return true;
-    }
-    return opType == OpType::AvgPool || opType == OpType::Resize || opType == OpType::CLZ || opType == OpType::SHL || opType == OpType::Div;
-}
-
-
 // Checks if the feature map is a scalar, and if so, returns the
 // quantized value in scalarValue.
 bool EthosU85RCSGenerator::IsScalar(const HLCFeatureMap &fm, int32_t &scalarValue)
@@ -1228,7 +1210,7 @@ void EthosU85RCSGenerator::GenerateIFM(OpType opType, const HLCFeatureMap &fm, c
     }
     // IFM_ZERO_POINT register
     auto &quant = fm.quantization;
-    uint32_t zp = UseZeroPoint0(opType, fm, false) ? 0 : uint32_t(quant.zeroPoints[0]);
+    uint32_t zp = quant.zeroPoints.empty() ? 0 : uint32_t(quant.zeroPoints[0]);
 
     // ifm zero-point is force-emitted if ofm is chained
     Emit(isa::npu_set_ifm_zero_point_t(zp));
@@ -1272,7 +1254,7 @@ void EthosU85RCSGenerator::GenerateIFM2(
     }
     // IFM2_ZERO_POINT register
     auto &quant = fm.quantization;
-    uint32_t zp = UseZeroPoint0(opType, fm, false) ? 0 : uint32_t(quant.zeroPoints[0]);
+    uint32_t zp = quant.zeroPoints.empty() ? 0 : uint32_t(quant.zeroPoints[0]);
 
     // ifm zero-point is force-emitted if ofm is chained
     Emit(isa::npu_set_ifm2_zero_point_t(zp));
@@ -1316,7 +1298,7 @@ void EthosU85RCSGenerator::GenerateOFM(OpType opType, const HLCFeatureMap &fm, c
     }
     // OFM_ZERO_POINT register
     auto &quant = fm.quantization;
-    uint32_t zp = UseZeroPoint0(opType, fm, true) ? 0 : uint32_t(quant.zeroPoints[0]);
+    uint32_t zp = quant.zeroPoints.empty() ? 0 : uint32_t(quant.zeroPoints[0]);
     Emit(isa::npu_set_ofm_zero_point_t(zp));
 }
 
diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp
index 6b5d48fe078c6e0edeb3c3b29fd9d420b1d79a27..96a403f095b5c63d92ddf8471a1981c1050d2c72 100644
--- a/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp
+++ b/ethosu/regor/architecture/ethosu85/ethos_u85_register_cs_generator.hpp
@@ -135,7 +135,6 @@ protected:
     MemoryAccess ToMemoryAccess(const HLCFeatureMap &fm, const Box &area, AccessDirection direction);
     // Returns region number used in NPU_SET_..._REGION
     uint32_t ToRegion(const MemArea &memArea);
-    static bool UseZeroPoint0(OpType opType, const HLCFeatureMap &fm, bool isOFM);
     // Checks if the feature map is a scalar, and if so, returns the
     // quantized value in scalarValue.
     static bool IsScalar(const HLCFeatureMap &fm, int32_t &scalarValue);
diff --git a/ethosu/regor/compiler/graph_builder.cpp b/ethosu/regor/compiler/graph_builder.cpp
index 24a4353bc3c59bde64fc996cbfec082bb00328cb..12e98657662ef85b3013516ba4d240241e19c4e9 100644
--- a/ethosu/regor/compiler/graph_builder.cpp
+++ b/ethosu/regor/compiler/graph_builder.cpp
@@ -540,7 +540,6 @@ void GraphBuilder::SetZeroPoint(GraphOperation *graphOp, GraphTensorUsage tensor
     if ( conn )
     {
         conn->quantization.zeroPoints = {int64_t(zeroPoint)};
-        conn->quantization.forceZeroPoint = true;
     }
 }
 
diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp
index c96c485853ff613f07b252faf32de8c6eff816d1..bde354e8a8cdce04f25018513798a72d9f64d00d 100644
--- a/ethosu/regor/compiler/graphir_optimiser.cpp
+++ b/ethosu/regor/compiler/graphir_optimiser.cpp
@@ -1310,52 +1310,27 @@ Operation *GraphIrOptimiser::RewriteReduceSum(Graph *const graph, Operation *con
         else
         {
             const int64_t zp = ifmConn->quantization.zeroPoints.empty() ? 0 : ifmConn->quantization.zeroPoints[0];
-            const Shape &padInputShape = ifmConn->shape;
-            const Shape padOutputShape = Shape::RoundAway(padInputShape, padInputShape.WithOnes().WithDepth(8));
-            const Shape padPaddings = padOutputShape - padInputShape;
-
-            if ( zp != 0 && padPaddings.GreaterMask(padPaddings.WithZeros()) )
+            if ( zp != 0 )
             {
-                // Replace ReduceSum (zp != 0, depth % 8 != 0, axis = C) with 1x1 Conv2D:
-                //
-                // 1. Reshape to 3D shape (HWC) where C dimension is the dimension to reduce.
-                // 2. 1x1 Conv2D (1x1x1xC weights): HxWxC -> HxWx1.
-
-                // Reshape to 4D shape (NHWC) where C dimension is the dimension to reduce
-                const Shape ifmShape3D = ReshapeTo3D(Shape::PadAxes(ifmConn->shape, 3, 1), {ifmConn->shape.Size() - 2, 1, 1});
-                const Shape ifmShape4D = Shape::PadAxes(ifmShape3D, 4, 1);
-
-                // Create an identity 1x1x1xC weights tensor
-                auto weightsBuffer = std::make_shared<Buffer>(std::vector<int8_t>(ifmShape4D.Depth(), 1));
-                auto weightsTens = CreateConstTensor("weights", DataType::Int8, weightsBuffer);
-                weightsTens->SetStorageShape({1, 1, 1, ifmShape4D.Depth()});
-                weightsTens->SetAxisOrder(AxisOrder::OHWI);
-                auto weightsQuant = ifmConn->quantization;
-                weightsQuant.quantMin = {IntegerMin(DataType::Int8)};
-                weightsQuant.quantMax = {IntegerMax(DataType::Int8)};
-                weightsQuant.zeroPoints = {0};
-                weightsQuant.scales = {{1, 0}};  // Identity
-
-                // Create an identity bias tensor
-                auto biasTens = CreateConstTensor("bias", DataType::Int32, 0);
-                auto biasQuant = ifmConn->quantization;
-                biasQuant.zeroPoints = {0};
-
-                // Replace ReduceSum with a 1x1 Conv2D
-                Kernel kernel({1, 1}, {1, 1}, {1, 1});
-                auto convOp = std::make_shared<Operation>(OpType::Conv2D);
-                convOp->SetKernel(std::make_unique<Kernel>(kernel));
-                convOp->CopyInput(TensorUsage::IFM, *ifmConn);
-                convOp->Input(TensorUsage::IFM)->Set(ifmShape4D);
-                convOp->ConnectInput(TensorUsage::Weights, weightsTens).Set(weightsQuant);
-                convOp->ConnectInput(TensorUsage::Scales, biasTens).Set(biasQuant);
-                convOp->CopyOutput(TensorUsage::OFM, *ofmConn);
-                convOp->Output(TensorUsage::OFM)->Set(ifmShape4D.WithDepth(1));
-                RecordOptimisation(operation, convOp.get());
-                returnOp = convOp.get();
-
-                // Remove old ReduceSum op
-                operation->Disconnect();
+                // Replace ReduceSum (zp != 0) with ReduceSum->Sub(zp):
+
+                // Temporary tensor between ReduceSum and Sub
+                std::shared_ptr<Tensor> reduceSumTens = ofmConn->tensor->Clone();
+                reduceSumTens->SetName(ofmConn->tensor->Name() + "_reducesum");
+                reduceSumTens->ChangeType(DataType::Int32);
+
+                // Sub op with zero point
+                auto zpTens = CreateConstTensor("zero_point", DataType::Int32, int(ifmConn->shape.Depth() * zp));
+                auto subOp = std::make_shared<Operation>(OpType::Sub);
+                subOp->ConnectInput(TensorUsage::IFM, reduceSumTens);
+                subOp->ConnectInput(TensorUsage::IFM1, zpTens);
+                subOp->CopyOutput(TensorUsage::OFM, *ofmConn);
+                RecordOptimisation(operation, subOp.get());
+                returnOp = subOp.get();
+
+                // Connect temporary tensor to reduceSum and remove the zero point
+                operation->ConnectOutput(TensorUsage::OFM, reduceSumTens).Set(Quantization::Unit());
+                ifmConn->quantization.zeroPoints[0] = 0;
             }
             else if ( ifmConn->shape.Size() > 3 )
             {
diff --git a/ethosu/regor/compiler/quantization.cpp b/ethosu/regor/compiler/quantization.cpp
index 0bc4d4c89d891693f0bfbc1c296f50a550bcf681..d508662818aece9aaa5547089b4ccae6994c4efa 100644
--- a/ethosu/regor/compiler/quantization.cpp
+++ b/ethosu/regor/compiler/quantization.cpp
@@ -30,14 +30,14 @@ std::string Quantization::ToString() const
     {
         scale.push_back(fmt::format("(scale:{}, shift:{})", s.scale, s.shift));
     }
-    return fmt::format("scale: [{}], zero_point: [{}], quantMin: [{}], quantMax: [{}], dimension: {}, force_zero_point: {}",
-        fmt::join(scale, ", "), fmt::join(zeroPoints, ", "), fmt::join(quantMin, ", "), fmt::join(quantMax, ", "), dimension, forceZeroPoint);
+    return fmt::format("scale: [{}], zero_point: [{}], quantMin: [{}], quantMax: [{}], dimension: {}",
+        fmt::join(scale, ", "), fmt::join(zeroPoints, ", "), fmt::join(quantMin, ", "), fmt::join(quantMax, ", "), dimension);
 }
 
 bool Quantization::operator==(const Quantization &rhs) const
 {
-    return std::tie(scales, zeroPoints, quantMin, quantMax, dimension, forceZeroPoint) ==
-           std::tie(rhs.scales, rhs.zeroPoints, rhs.quantMin, rhs.quantMax, rhs.dimension, rhs.forceZeroPoint);
+    return std::tie(scales, zeroPoints, quantMin, quantMax, dimension) ==
+           std::tie(rhs.scales, rhs.zeroPoints, rhs.quantMin, rhs.quantMax, rhs.dimension);
 }
 
 bool Quantization::operator!=(const Quantization &rhs) const
diff --git a/ethosu/regor/compiler/quantization.hpp b/ethosu/regor/compiler/quantization.hpp
index e9fd6313b0f6069f381109a9d28e4fcf68cdd9af..a79212d08f9fb0603c6dee464fe90ebd24645717 100644
--- a/ethosu/regor/compiler/quantization.hpp
+++ b/ethosu/regor/compiler/quantization.hpp
@@ -42,7 +42,6 @@ public:
     std::vector<int64_t> quantMin;
     std::vector<int64_t> quantMax;
     int dimension = 0;
-    bool forceZeroPoint = false;
 
 public:
     Quantization() = default;
@@ -71,7 +70,6 @@ public:
             quantMin = other.quantMin;
             quantMax = other.quantMax;
             dimension = other.dimension;
-            forceZeroPoint = other.forceZeroPoint;
         }
         return *this;
     }
@@ -86,7 +84,6 @@ public:
             quantMin = std::move(other.quantMin);
             quantMax = std::move(other.quantMax);
             dimension = other.dimension;
-            forceZeroPoint = other.forceZeroPoint;
         }
         return *this;
     }
diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp
index 5fd1c251c8824d25ab9616df656b8a5cb847fe42..8adcd1c20cb09136975c1a2513df4126ec736b2f 100644
--- a/ethosu/regor/compiler/scheduler_decompose.cpp
+++ b/ethosu/regor/compiler/scheduler_decompose.cpp
@@ -1267,7 +1267,6 @@ static std::vector<std::unique_ptr<SchedulerOperation>> SwapAxes(Architecture *a
         // Connect input/output
         Quantization unitQuantZp = Quantization::Unit();
         unitQuantZp.zeroPoints = tail->quantization.zeroPoints;
-        unitQuantZp.forceZeroPoint = tail->quantization.forceZeroPoint;
         ifmConn->tensor = tail->tensor;
         ifmConn->tensor->consumers.push_back(op.get());
         ifmConn->shape = ifmShape;
diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
index a8a6ec610ada6c75f9f413ffb54cf05a781f3e46..ee079d83773425e4d0cadc9ae424376d28bad6a1 100644
--- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp
+++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
@@ -2758,6 +2758,31 @@ Operation *TFLiteGraphOptimiser::ConvertPad(Graph *const graph, Operation *const
     return mainOp.get();
 }
 
+Operation *TFLiteGraphOptimiser::ConvertZeroPoint(Graph *const graph, Operation *const operation)
+{
+    UNUSED(graph);
+    auto opType = operation->Type();
+    bool zeroPoint0ForType =
+        opType == OpType::AvgPool || opType == OpType::Resize || opType == OpType::CLZ || opType == OpType::SHL || opType == OpType::Div;
+
+    for ( auto [usage, ifmConn] : operation->Inputs().pairs() )
+    {
+        if ( IsIFM(usage) )
+        {
+            if ( zeroPoint0ForType || DataTypeSizeBits(ifmConn.tensor->Type()) >= 32 )
+                ifmConn.quantization.zeroPoints.clear();
+        }
+    }
+    for ( auto [usage, ofmConn] : operation->Outputs().pairs() )
+    {
+        if ( IsOFM(usage) )
+        {
+            if ( zeroPoint0ForType || opType == OpType::ArgMax ) ofmConn.quantization.zeroPoints.clear();
+        }
+    }
+    return operation;
+}
+
 TFLiteGraphOptimiser::TFLiteGraphOptimiser(IArchitectureConstraints *constraints, const GraphOptimiserOptions &options, OptimiserDatabase *db) :
         GraphOptimiser(constraints, options, db)
 {
diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.hpp b/ethosu/regor/compiler/tflite_graph_optimiser.hpp
index 90da7cc524856b3b6037ec2afe8d1bf95061b473..ef4535802e8ffc6d5aa6da8ce81cfc97992a2a43 100644
--- a/ethosu/regor/compiler/tflite_graph_optimiser.hpp
+++ b/ethosu/regor/compiler/tflite_graph_optimiser.hpp
@@ -165,6 +165,9 @@ private:
     // This is done as fall-back for the PAD operators that remain after ReplacePadByExplicitPadding
     Operation *ConvertPad(Graph *const graph, Operation *const operation);
 
+    // Rewrites zero point as expected by reference
+    Operation *ConvertZeroPoint(Graph *const graph, Operation *const operation);
+
 public:
     // The graph optimisation steps.
     // Order matters, array of rewrites processed in order.
@@ -238,6 +241,7 @@ public:
             {},
             {
                 &TFLiteGraphOptimiser::ConvertPad,
+                &TFLiteGraphOptimiser::ConvertZeroPoint,
             }
         },
         {
diff --git a/ethosu/regor/test/test_graphir_optimiser.cpp b/ethosu/regor/test/test_graphir_optimiser.cpp
index e32122b0132bbebee797e03b021240ec040516e2..e5893162cc7205a46848112db84eb65ccca78a4d 100644
--- a/ethosu/regor/test/test_graphir_optimiser.cpp
+++ b/ethosu/regor/test/test_graphir_optimiser.cpp
@@ -67,6 +67,7 @@ TEST_CASE("test_graphir_optimiser - constant propagation")
         graph->GetAllOperations(allOps);
         REQUIRE(allOps.size() == 2);
 
+        REQUIRE(bool(optimiser));
         optimiser->Process(graph.get());
         allOps.clear();
 
@@ -112,6 +113,7 @@ TEST_CASE("test_graphir_optimiser - constant propagation")
         graph->GetAllOperations(allOps);
         REQUIRE(allOps.size() == 3);
 
+        REQUIRE(bool(optimiser));
         optimiser->Process(graph.get());
         allOps.clear();
 
@@ -126,3 +128,50 @@ TEST_CASE("test_graphir_optimiser - constant propagation")
         }
     }
 }
+
+TEST_CASE("test_graphir_optimiser - ReduceSum")
+{
+    // Create arch
+    auto arch = CreateArchDefault<ArchEthosU85>();
+    std::string err = "noerror";
+    arch->CheckConfiguration(err);
+    REQUIRE(err == "noerror");
+
+    SECTION("Zero point")
+    {
+        constexpr int ZP = 10;
+
+        auto graph = [&]()
+        {
+            std::vector<std::shared_ptr<Operation>> ops;
+            auto ifm = CreateTensor("IFM", Shape(1, 4, 4, 25), DataType::Int8);
+            auto ofm = CreateTensor("OFM", ifm->StorageShape().WithDepth(1), DataType::Int8);
+            auto op = CreateOperation(OpType::ReduceSum, TensorUsage::IFM, ifm, TensorUsage::OFM, ofm);
+            op->Input(TensorUsage::IFM)->quantization.zeroPoints.push_back(ZP);
+            op->Attribute<axis_attr_t>()->axis = ifm->StorageShape().Size() - 1;
+            ops.push_back(std::move(op));
+
+            // Create graph with ops
+            return CreateGraph(ops);
+        }();
+
+        GraphOptimiserOptions options;
+        auto optimiser = GraphOptimiser::MakeGraphOptimiser(graph->Notation(), arch->Constraints(), options, nullptr);
+
+        REQUIRE(bool(optimiser));
+        optimiser->Process(graph.get());
+
+        SchedulerPacking packing(arch.get(), false);
+        auto scheduleOps = packing.Process(graph.get());
+
+        REQUIRE(scheduleOps.size() == 1);
+        REQUIRE(scheduleOps[0]->SubOps().size() == 1);
+        REQUIRE(scheduleOps[0]->SubOps()[0]->IFM(1)->tensor->IsConstant());
+        REQUIRE(scheduleOps[0]->SubOps()[0]->IFM(1)->tensor->bufferView.Elements() == 1);
+        REQUIRE(scheduleOps[0]->SubOps()[0]->IFM(1)->tensor->bufferView.StrideBytes() == sizeof(int32_t));
+        auto view = scheduleOps[0]->SubOps()[0]->IFM(1)->tensor->bufferView.Values<int32_t>();
+        REQUIRE(view[0] == scheduleOps[0]->IFM(0)->shape.Depth() * ZP);
+        if ( scheduleOps[0]->IFM(0)->quantization.zeroPoints.size() > 0 )
+            REQUIRE(scheduleOps[0]->IFM(0)->quantization.zeroPoints[0] == 0);
+    }
+}