From 46d88f56902be0706e051c10153ffb7620e01ee3 Mon Sep 17 00:00:00 2001 From: Jacob Bohlin Date: Wed, 19 Feb 2025 18:45:10 +0000 Subject: [PATCH] MLBEDSW-10460 Apply missing zero point to padding constant Also includes: * Zero point fix for PadV2. * Fix OFM slices for paddings. * Added quantization to IFM of the MemoryCopys. Change-Id: I60cc1c2693d38709a42cb2614395aec241687a5d Signed-off-by: Jacob Bohlin --- ethosu/regor/compiler/graphir_optimiser.cpp | 10 ++++++---- ethosu/regor/compiler/tflite_graph_optimiser.cpp | 9 ++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp index 220e6f31..de1f6f34 100644 --- a/ethosu/regor/compiler/graphir_optimiser.cpp +++ b/ethosu/regor/compiler/graphir_optimiser.cpp @@ -679,7 +679,7 @@ Operation *GraphIrOptimiser::MakeFillOperation(TensorConnection *const ofmConn, const TensorSlice &ofmSlice, std::shared_ptr padTensor) { auto fillOp = std::make_shared(OpType::MemoryCopy); - fillOp->ConnectInput(TensorUsage::IFM, padTensor).Set(ofmSlice.shape); + fillOp->ConnectInput(TensorUsage::IFM, padTensor).Set(ofmSlice.shape).Set(ofmConn->quantization); fillOp->CopyOutput(TensorUsage::OFM, *ofmConn); fillOp->Output(TensorUsage::OFM)->Set(ofmShape).Set(ofmSlice).Set(RoundMode::NATURAL); return fillOp.get(); @@ -696,7 +696,8 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation const Shape ofmShape = ofmConn->shape; const auto ¶msConn = operation->Input(TensorUsage::Params); const auto &attr = operation->Attribute(); - const int padConst = int(attr->pad_const); + uint8_t zeroPoint = ofmConn->quantization.IsValid() ? uint8_t(ofmConn->quantization.zeroPoints[0]) : 0; + const int padConst = int(attr->pad_const) + zeroPoint; // Decode the padding before and after each dimension as two shapes Shape paddingBefore = TensorToShape(paramsConn->tensor.get(), paramsConn->shape.Width(), 2, 0); @@ -738,6 +739,7 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation // (HWC) where W is the dimension to pad. Only use this strategy when necessary since it is often slower. const Shape ifmShape = ifmConn->shape; bool reshapeAndPadW = ifmShape.Size() > 4 || (ifmShape.Size() == 4 && ifmShape.Batch() > 1); + const Shape zeroShape = reshapeAndPadW ? Shape(0, 0, 0) : ofmShape.WithZeros(); for ( int axis = 0; axis < ifmShape.Size(); axis++ ) { Shape newOfmShape = reshapeAndPadW ? ReshapeTo3DAroundAxis(ofmShape, axis) : ofmShape; @@ -746,7 +748,7 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation const int padBefore = paddingBefore[axis]; if ( padBefore ) { - TensorSlice newOfmSlice = {newOfmShape.WithZeros(), newOfmShape.With(padAxis, padBefore)}; + TensorSlice newOfmSlice = {zeroShape, newOfmShape.With(padAxis, padBefore)}; auto fillOp = MakeFillOperation(ofmConn, newOfmShape, newOfmSlice, padTensor); RecordOptimisation(operation, fillOp); } @@ -754,7 +756,7 @@ Operation *GraphIrOptimiser::RewritePad(Graph *const, Operation *const operation const int padAfter = paddingAfter[axis]; if ( padAfter ) { - TensorSlice newOfmSlice = {newOfmShape.With(padAxis, newOfmShape[padAxis] - padAfter), newOfmShape.With(padAxis, padAfter)}; + TensorSlice newOfmSlice = {zeroShape.With(padAxis, newOfmShape[padAxis] - padAfter), newOfmShape.With(padAxis, padAfter)}; auto fillOp = MakeFillOperation(ofmConn, newOfmShape, newOfmSlice, padTensor); RecordOptimisation(operation, fillOp); } diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp index 8364f5a6..744c272a 100644 --- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp +++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp @@ -2649,11 +2649,14 @@ Operation *TFLiteGraphOptimiser::ConvertPadV2(Graph *const graph, Operation *con auto padOp = std::make_shared(OpType::Pad); padOp->CopyInput(TensorUsage::IFM, *operation->Input(TensorUsage::IFM)); padOp->CopyInput(TensorUsage::Params, *operation->Input(TensorUsage::Params0)); - padOp->CopyOutput(TensorUsage::OFM, *operation->Output(TensorUsage::OFM)); - + const auto &ofmConn = operation->Output(TensorUsage::OFM); + padOp->CopyOutput(TensorUsage::OFM, *ofmConn); const auto &attr = padOp->Attribute(); const auto padConstTens = operation->Input(TensorUsage::Params1)->tensor; - attr->pad_const = padConstTens->View().Values(padConstTens->Type())[0]; + // This is undoing the existing zero point adjustment to counteract the zero point adjustment + // which is done in GraphIR lowering of Pad. + uint8_t zeroPoint = ofmConn->quantization.IsValid() ? uint8_t(ofmConn->quantization.zeroPoints[0]) : 0; + attr->pad_const = padConstTens->View().Values(padConstTens->Type())[0] - zeroPoint; RecordOptimisation(operation, padOp.get()); operation->Disconnect(); -- GitLab