From 46d88f56902be0706e051c10153ffb7620e01ee3 Mon Sep 17 00:00:00 2001
From: Jacob Bohlin <jacob.bohlin@arm.com>
Date: Wed, 19 Feb 2025 18:45:10 +0000
Subject: [PATCH] MLBEDSW-10460 Apply missing zero point to padding constant

Also includes:
* Zero point fix for PadV2.
* Fix OFM slices for paddings.
* Added quantization to IFM of the MemoryCopys.

Change-Id: I60cc1c2693d38709a42cb2614395aec241687a5d
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
---
 ethosu/regor/compiler/graphir_optimiser.cpp      | 10 ++++++----
 ethosu/regor/compiler/tflite_graph_optimiser.cpp |  9 ++++++---
 2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp
index 220e6f31..de1f6f34 100644
--- a/ethosu/regor/compiler/graphir_optimiser.cpp
+++ b/ethosu/regor/compiler/graphir_optimiser.cpp
@@ -679,7 +679,7 @@ Operation *GraphIrOptimiser::MakeFillOperation(TensorConnection *const ofmConn,
     const TensorSlice &ofmSlice, std::shared_ptr<Tensor> padTensor)
 {
     auto fillOp = std::make_shared<Operation>(OpType::MemoryCopy);
-    fillOp->ConnectInput(TensorUsage::IFM, padTensor).Set(ofmSlice.shape);
+    fillOp->ConnectInput(TensorUsage::IFM, padTensor).Set(ofmSlice.shape).Set(ofmConn->quantization);
     fillOp->CopyOutput(TensorUsage::OFM, *ofmConn);
     fillOp->Output(TensorUsage::OFM)->Set(ofmShape).Set(ofmSlice).Set(RoundMode::NATURAL);
     return fillOp.get();
@@ -696,7 +696,8 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation
         const Shape ofmShape = ofmConn->shape;
         const auto &paramsConn = operation->Input(TensorUsage::Params);
         const auto &attr = operation->Attribute<pad_attr_t>();
-        const int padConst = int(attr->pad_const);
+        uint8_t zeroPoint = ofmConn->quantization.IsValid() ? uint8_t(ofmConn->quantization.zeroPoints[0]) : 0;
+        const int padConst = int(attr->pad_const) + zeroPoint;
 
         // Decode the padding before and after each dimension as two shapes
         Shape paddingBefore = TensorToShape(paramsConn->tensor.get(), paramsConn->shape.Width(), 2, 0);
@@ -738,6 +739,7 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation
         // (HWC) where W is the dimension to pad. Only use this strategy when necessary since it is often slower.
         const Shape ifmShape = ifmConn->shape;
         bool reshapeAndPadW = ifmShape.Size() > 4 || (ifmShape.Size() == 4 && ifmShape.Batch() > 1);
+        const Shape zeroShape = reshapeAndPadW ? Shape(0, 0, 0) : ofmShape.WithZeros();
         for ( int axis = 0; axis < ifmShape.Size(); axis++ )
         {
             Shape newOfmShape = reshapeAndPadW ? ReshapeTo3DAroundAxis(ofmShape, axis) : ofmShape;
@@ -746,7 +748,7 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation
             const int padBefore = paddingBefore[axis];
             if ( padBefore )
             {
-                TensorSlice newOfmSlice = {newOfmShape.WithZeros(), newOfmShape.With(padAxis, padBefore)};
+                TensorSlice newOfmSlice = {zeroShape, newOfmShape.With(padAxis, padBefore)};
                 auto fillOp = MakeFillOperation(ofmConn, newOfmShape, newOfmSlice, padTensor);
                 RecordOptimisation(operation, fillOp);
             }
@@ -754,7 +756,7 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation
             const int padAfter = paddingAfter[axis];
             if ( padAfter )
             {
-                TensorSlice newOfmSlice = {newOfmShape.With(padAxis, newOfmShape[padAxis] - padAfter), newOfmShape.With(padAxis, padAfter)};
+                TensorSlice newOfmSlice = {zeroShape.With(padAxis, newOfmShape[padAxis] - padAfter), newOfmShape.With(padAxis, padAfter)};
                 auto fillOp = MakeFillOperation(ofmConn, newOfmShape, newOfmSlice, padTensor);
                 RecordOptimisation(operation, fillOp);
             }
diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
index 8364f5a6..744c272a 100644
--- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp
+++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
@@ -2649,11 +2649,14 @@ Operation *TFLiteGraphOptimiser::ConvertPadV2(Graph *const graph, Operation *con
         auto padOp = std::make_shared<Operation>(OpType::Pad);
         padOp->CopyInput(TensorUsage::IFM, *operation->Input(TensorUsage::IFM));
         padOp->CopyInput(TensorUsage::Params, *operation->Input(TensorUsage::Params0));
-        padOp->CopyOutput(TensorUsage::OFM, *operation->Output(TensorUsage::OFM));
-
+        const auto &ofmConn = operation->Output(TensorUsage::OFM);
+        padOp->CopyOutput(TensorUsage::OFM, *ofmConn);
         const auto &attr = padOp->Attribute<pad_attr_t>();
         const auto padConstTens = operation->Input(TensorUsage::Params1)->tensor;
-        attr->pad_const = padConstTens->View().Values<int>(padConstTens->Type())[0];
+        // This is undoing the existing zero point adjustment to counteract the zero point adjustment
+        // which is done in GraphIR lowering of Pad.
+        uint8_t zeroPoint = ofmConn->quantization.IsValid() ? uint8_t(ofmConn->quantization.zeroPoints[0]) : 0;
+        attr->pad_const = padConstTens->View().Values<int>(padConstTens->Type())[0] - zeroPoint;
 
         RecordOptimisation(operation, padOp.get());
         operation->Disconnect();
-- 
GitLab