From 9a1e9a4dea566b6a154ef080ac69a33c285f1384 Mon Sep 17 00:00:00 2001 From: William Isaksson Date: Sun, 11 May 2025 14:01:42 +0200 Subject: [PATCH] MLBEDSW-10236: Port RB half_pixel_centers support Ethos-U55/U65 Adds support for ResizeBilinear half_pixel_centers with 2x upscaling. Change-Id: I4f2b78f1c2154b94b17ad8f685179ad1849924f9 Signed-off-by: William Isaksson --- .../ethosu55/ethos_u55_constraints.cpp | 3 +- ethosu/regor/compiler/scheduler_decompose.cpp | 182 ++++++++++++++++-- ethosu/regor/compiler/scheduler_packing.cpp | 2 +- .../tflite/tflite_supported_operators_u55.cpp | 32 +-- 4 files changed, 189 insertions(+), 30 deletions(-) diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp index f828d305..c5401a16 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_constraints.cpp @@ -362,8 +362,7 @@ Flags EthosU55Constraints::OperatorQuery(OpType opType, const ArchO } if ( req ) { - req->req = ArchRequirement::Decompose; - req->substitution = OpType::AvgPool; + req->req.Set(ArchRequirement::OpSubstitution, ArchRequirement::Decompose); } return QueryResult::NativeHasReq; } diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index d475c183..3503973e 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -1295,10 +1295,160 @@ std::vector> DecomposeTransposeConv2D(Archit return result; } +static bool IsHalfPixelCenters(const resize_attr_t &attr) +{ + if ( attr.scaleY.d == 2 && attr.scaleX.d == 2 && attr.scaleY.n % 2 == 0 && attr.scaleX.n % 2 == 0 ) + { + if ( attr.offset.x == -1 * (attr.scaleX.n / 2 - 1) && attr.offset.y == -1 * (attr.scaleY.n / 2 - 1) ) + { + return true; + } + } + return false; +} + +static std::shared_ptr MakeDepthwiseHPCKernel(Architecture *arch, int channels, int8_t w00, int8_t w01, int8_t w10, int8_t w11) +{ + Shape kShape(1, 2, 2, channels); + const auto wSize = kShape.Elements(); + auto buffer = std::make_shared(std::make_unique(wSize), wSize); + BufferView bufferView(buffer, 0, 8, kShape, {}); + auto bufferValues = bufferView.WritableValues(); + for ( int c = 0; c < channels; ++c ) + { + bufferValues[{0, 0, 0, c}] = w00; + bufferValues[{0, 0, 1, c}] = w01; + bufferValues[{0, 1, 0, c}] = w10; + bufferValues[{0, 1, 1, c}] = w11; + } + + auto tens = std::make_shared(DataType::Int8, kShape); + auto srcTensor = std::make_shared("resize_weights", DataType::Int8, kShape); + srcTensor->SetAxisOrder(AxisOrder::IHWO); + + tens->uid = GenerateUniqueId(); + tens->srcTensor = std::move(srcTensor); + tens->memArea = arch->ReadonlyMemory(); + tens->bufferView = std::move(bufferView); + tens->storageShape = tens->bufferView.ViewShape(); + return tens; +} + +static std::vector> +ConvertResizeBilinearHPCToDepthwise(Architecture *arch, std::unique_ptr op) +{ + // Transform Resize Bilinear Half pixel centers -> Reflect pad with border=1 followed by 4 interleaved depthwise ops + std::vector> result; + + auto ifmConn = op->Input(TensorUsage::IFM); + auto ofmConn = op->Output(TensorUsage::OFM); + ifmConn->quantization = Quantization::Unit(); + Shape ifmShape = ifmConn->SliceShape(); + Shape ofmShape = ofmConn->SliceShape(); + int channels = ifmShape.Depth(); + // Kernels to do Bilinear interpolation on interleaved slices of the input feature map + constexpr int8_t kCoeff[4][4] = { + {1, 3, 3, 9}, + {3, 1, 9, 3}, + {3, 9, 1, 3}, + {9, 3, 3, 1}, + }; + + auto tens = ifmConn->tensor; + auto padT = ifmConn->tensor->Clone(); + padT->isGraphInput = false; + padT->storageShape = ifmShape.WithHeight(ifmShape.Height() + 2).WithWidth(ifmShape.Width() + 2); + // Reflect Padding to provide the setup for bilinear half pixel centers interpolation + + // Centre + { + TensorSlice dst = {{0, 1, 1, 0}, ifmShape}; + auto mc = MakeMemCopy(tens, padT, &dst); + ifmConn = mc->Output(TensorUsage::OFM); + result.emplace_back(std::move(mc)); + } + // Top and Bottom + auto makeRowCopy = [&](int srcH, int dstH) + { + TensorSlice dst = {{0, dstH, 1, 0}, ifmShape.WithHeight(1)}; + auto mc = MakeMemCopy(tens, padT, &dst); + auto *ifm = mc->IFM(0); + ifm->slice.offset = Shape(0, srcH, 0, 0); + ifm->slice.shape = dst.shape; + ifm->shape = ifmShape; + mc->Output(TensorUsage::OFM)->reverse = ReverseType::H; + return mc; + }; + // Right and Left + auto makeColCopy = [&](int srcW, int dstW) + { + TensorSlice dst = {{0, 0, dstW, 0}, padT->storageShape.WithWidth(1)}; + auto mc = MakeMemCopy(padT, padT, &dst); + auto *ifm = mc->IFM(0); + ifm->slice.offset = Shape(0, 0, srcW, 0); + ifm->slice.shape = dst.shape; + ifm->shape = padT->storageShape; + mc->Output(TensorUsage::OFM)->reverse = ReverseType::W; + + return mc; + }; + result.emplace_back(makeRowCopy(0, 0)); + result.emplace_back(makeRowCopy(ifmShape.Height() - 1, ifmShape.Height() + 1)); + result.emplace_back(makeColCopy(ifmShape.Width(), ifmShape.Width() + 1)); + result.emplace_back(makeColCopy(1, 0)); + + // Create 4 interleaved Depthwise ops to do implement Bilinear interpolation with half pixel centers + for ( int tile = 0; tile < 4; ++tile ) + { + int ty = tile / 2; + int tx = tile % 2; + + auto subOp = std::make_unique(OpType::DepthwiseConv2D); + + // IFM + auto ifmSub = subOp->ConnectInput(TensorUsage::IFM, padT); + ifmSub->tensor = padT; + ifmSub->shape = padT->storageShape; + ifmSub->quantization = Quantization::Unit(); + ifmSub->slice = {Shape(0, ty, tx, 0), + Shape(1, padT->storageShape.Height() - 1, padT->storageShape.Width() - 1, ofmShape.Depth())}; + + // Weights + auto W = MakeDepthwiseHPCKernel(arch, channels, kCoeff[tile][0], kCoeff[tile][1], kCoeff[tile][2], kCoeff[tile][3]); + auto wConn = subOp->ConnectInput(TensorUsage::Weights, W); + wConn->shape = W->storageShape; + wConn->quantization = Quantization::Unit(); + + auto biasTensor = std::make_shared(DataType::Int32, Shape(1)); + auto bufBias = std::make_shared(Buffer::ConstValue(0)); + biasTensor->memArea = arch->ReadonlyMemory(); + biasTensor->bufferView = BufferView(bufBias, 0, DataTypeStorageSizeBits(biasTensor->dataType), {1}, {}); + biasTensor->storageShape = biasTensor->bufferView.ViewShape(); + subOp->ConnectInput(TensorUsage::Scales, biasTensor); + + // OFM + auto ofmSub = subOp->ConnectOutput(TensorUsage::OFM, ofmConn->tensor); + ofmSub->quantization = Quantization::Unit(); + // Special quantization for bit-exact Resize Bilinear with half pixel centers + // 16x downscale is to normalize the values, and +1 addition is for correct rounding + ofmSub->quantization.scales[0] = {1 + (1 << 31), 35}; + ofmSub->shape = ofmConn->shape; + ofmSub->slice.offset = Shape(0, ty, tx, 0); + ofmSub->slice.shape = ofmConn->shape.WithHW(ofmConn->shape.Height() - 1, ofmConn->shape.Width() - 1); + ofmSub->stepXY = Point2i{2, 2}; + + subOp->SetKernel(Kernel::UnitKernel().WithSize({2, 2})); + + result.emplace_back(std::move(subOp)); + } + + return result; +} + std::vector> LegaliseResize(Architecture *arch, std::unique_ptr op) { // Convert Resize (Bilinear or Nearest) into a sequence of 1×1 AvgPool ops followed by a final - // larger AvgPool / DepthwiseConv2D with kernel up to 8x8. + // larger AvgPool or DepthwiseConv2D with kernel up to 8x8. std::vector> result; @@ -1317,18 +1467,12 @@ std::vector> LegaliseResize(Architecture *ar auto ofmShape = ofmConn->shape; auto ifmShape = ifmConn->shape; - - // half_pixel_centers / align_corners pattern match - bool isHalfPixelCenter = false; - if ( attr->scaleY.d == 2 && attr->scaleX.d == 2 && upscaleH % 2 == 0 && upscaleW % 2 == 0 ) + // half pixel centers / align corners pattern match + bool isHalfPixelCenter = IsHalfPixelCenters(*attr); + if ( isHalfPixelCenter ) { upscaleW /= 2; upscaleH /= 2; - - if ( attr->offset.x == -1 * (upscaleW - 1) && attr->offset.y == -1 * (upscaleH - 1) ) - { - isHalfPixelCenter = true; - } } auto remainingUpscale = std::max(upscaleW, upscaleH); @@ -1354,9 +1498,21 @@ std::vector> LegaliseResize(Architecture *ar else if ( attr->mode == tosa::ResizeMode::BILINEAR ) { auto reqScale = QuantizedScale(1, IntLog2(attr->scaleX.n * attr->scaleY.n)); - - if ( ofmConn->quantization.scales[0] != reqScale || isHalfPixelCenter || attr->offset.x != 0 || - attr->offset.y != 0 || attr->scaleX.d != 1 || attr->scaleY.d != 1 ) + // ResizeBilinear has quantization requirement, and this function only legalises ResizeBilinear without half + // pixel centers + if ( ofmConn->quantization.scales[0] == reqScale ) + { + // Only 2x upscale supported for half pixel centers Resize Bilinear + if ( isHalfPixelCenter && attr->scaleX.n == 4 && attr->scaleY.n == 4 ) + { + return ConvertResizeBilinearHPCToDepthwise(arch, std::move(op)); + } + else if ( attr->offset.x != 0 || attr->offset.y != 0 || attr->scaleX.d != 1 || attr->scaleY.d != 1 ) + { + canLegalise = false; + } + } + else { canLegalise = false; } diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp index a1861d3b..a8e19b0e 100644 --- a/ethosu/regor/compiler/scheduler_packing.cpp +++ b/ethosu/regor/compiler/scheduler_packing.cpp @@ -768,7 +768,7 @@ std::vector> SchedulerPacking::DecomposeSche break; case OpType::Resize: OperatorQuery(_arch, op.get(), &req); - if ( req.substitution == OpType::AvgPool ) + if ( req.req.Any(ArchRequirement::OpSubstitution) ) { result = LegaliseResize(_arch, std::move(op)); } diff --git a/ethosu/regor/tflite/tflite_supported_operators_u55.cpp b/ethosu/regor/tflite/tflite_supported_operators_u55.cpp index 912965e0..fe9f4e3b 100644 --- a/ethosu/regor/tflite/tflite_supported_operators_u55.cpp +++ b/ethosu/regor/tflite/tflite_supported_operators_u55.cpp @@ -167,7 +167,7 @@ bool TfLiteSupportedOperatorsU55::ConstraintResize(const Operation *op) alignCorners = opt->align_corners(); halfPixelCentersRB = opt->half_pixel_centers(); } - else if ( opType == OpType::ResizeNearestNeighbor ) + else { const auto *opt = passthrough->builtin_options_as_ResizeNearestNeighborOptions(); assert(opt); @@ -201,21 +201,18 @@ bool TfLiteSupportedOperatorsU55::ConstraintResize(const Operation *op) hUpscale = float(ofmShape.Height()) / ifmShape.Height(); wUpscale = float(ofmShape.Width()) / ifmShape.Width(); } - - if ( halfPixelCentersRB ) - { - Failure(op, "Half Pixel Centers attribute is true", "Half Pixel Centers must be false for Resize Bilinear"); - return false; - } std::string constraint = - "If not (IFM H == IFM W == 1) and not IFM Shape == OFM Shape\n" + "If not (IFM H == IFM W == 1) and not IFM Shape == OFM Shape:\n" "\tIf W upScale != H upScale:\n" "\t\tOFM W or H must be 1, and scaling in the dim that is must also be 1\n" - "\tIf align corners:" - "\t\tupScale is definied as OFM H-1 / IFM H - 1" - "\tElse:" - "\t\tupScale is defined as OFM H/IFM H" - "\tupScale needs to be one of: 2x/4x/8x"; + "\tIf align corners:\n" + "\t\tupScale is definied as OFM H-1 / IFM H - 1\n" + "\tElse:\n" + "\t\tupScale is defined as OFM H/IFM H\n" + "\t\tIF Resize Bilinear and half pixel centers:\n" + "\t\t\tupscale needs to be 2x\n" + "\t\tElse:\n" + "\t\t\tupScale needs to be one of: 2x/4x/8x\n"; if ( hUpscale != wUpscale ) @@ -228,11 +225,18 @@ bool TfLiteSupportedOperatorsU55::ConstraintResize(const Operation *op) constraint); return false; } + else if ( halfPixelCentersRB ) + { + Failure(op, fmt::format("HW upScaling is not equal and Resize Bilinear has half pixel centers, h up-scale={}, w up-scale={}.", hUpscale, wUpscale), + constraint); + return false; + } } + auto maxUpscale = halfPixelCentersRB ? 2 : 8; auto upscale = std::max(hUpscale, wUpscale); if ( !((ifmShape.Height() == 1 && ifmShape.Width() == 1) || - (std::trunc(upscale) == upscale && IsPowerOfTwo(int(upscale)) && upscale > 1 && upscale <= 8)) ) + (std::trunc(upscale) == upscale && IsPowerOfTwo(int(upscale)) && upscale > 1 && upscale <= maxUpscale)) ) { Failure(op, fmt::format("Scaling matches and operation has unsupported upScaling={}", upscale), constraint); return false; -- GitLab