From dda222bdfdc385447681f7d29889ecfe45a953f2 Mon Sep 17 00:00:00 2001 From: Fredrik Svedberg Date: Mon, 19 May 2025 17:00:21 +0200 Subject: [PATCH] MLBEDSW-10838 Add Shape::ElementsHWC Added Shape::ElementsHWC and replaced occurrences where possible. Change-Id: Iaea05eb108bc6c3130474cca4812aa9e45d44d11 Signed-off-by: Fredrik Svedberg --- ethosu/regor/architecture/ethosu55/ethos_u55.cpp | 2 +- .../ethosu55/ethos_u55_performance.cpp | 6 +++--- .../ethosu55/ethos_u55_register_cs_generator.cpp | 6 +++--- ethosu/regor/architecture/ethosu85/ethos_u85.cpp | 5 +++-- .../ethosu85/ethos_u85_performance.cpp | 6 +++--- ethosu/regor/common/shape.hpp | 14 ++++++++++++-- ethosu/regor/compiler/graphir_optimiser.cpp | 4 ++-- ethosu/regor/compiler/scheduler_decompose.cpp | 8 ++++---- 8 files changed, 31 insertions(+), 20 deletions(-) diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp index d92b74e7..693884ec 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55.cpp @@ -499,7 +499,7 @@ std::unique_ptr ArchEthosU55::FindBlockConfig(OpType opType, c if ( relativeCost == bestCost ) { Shape coverageShape = Shape::Min(ifmShape, ifmBlock); - float coverage = float(ifmShape.ElementsWH()) / float(coverageShape.ElementsWH()); + float coverage = float(ifmShape.ElementsWH()) / float(std::max(coverageShape.ElementsWH(), 1)); // Small 4x4 IFM constraint found through analysis of networks if ( coverage <= bestCoverage && (height <= 4 && width <= 4) ) { diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp index d150264b..fbf37486 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_performance.cpp @@ -372,7 +372,7 @@ static int EstimateMemoryTransfer(int cores, bool isRead, ArchitectureMemory *me int zStride = (shape.Width() * elementBits * 16) / 8; if ( zStride == block.Depth() ) { - burstLen = elementBits * block.Depth() * block.Width(); + burstLen = elementBits * block.ElementsWC(); } else if ( isRead ) { @@ -390,7 +390,7 @@ static int EstimateMemoryTransfer(int cores, bool isRead, ArchitectureMemory *me { if ( xStride == block.Depth() ) { - burstLen = elementBits * block.Depth() * block.Width(); + burstLen = elementBits * block.ElementsWC(); } else { @@ -401,7 +401,7 @@ static int EstimateMemoryTransfer(int cores, bool isRead, ArchitectureMemory *me { if ( (block.Depth() <= 16) && xStride == block.Depth() ) { - burstLen = elementBits * block.Depth() * block.Width(); + burstLen = elementBits * block.ElementsWC(); } else { diff --git a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp index 2a802a49..c12f32b7 100644 --- a/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp +++ b/ethosu/regor/architecture/ethosu55/ethos_u55_register_cs_generator.cpp @@ -1424,7 +1424,7 @@ void EthosU55RCSGenerator::InsertTransposeCommand(const HLCStripe *stripe, Tempo { depth = 1; slices = ifm.shape.Height(); - ifmStep = ofmStep = ifm.shape.Depth() * ifm.shape.Width() * elementSize; + ifmStep = ofmStep = ifm.shape.ElementsWC() * elementSize; assert(from == 2 && to == 3); } else if ( ofm.transpose == TransposeType::NCWH ) @@ -1468,8 +1468,8 @@ void EthosU55RCSGenerator::InsertTransposeCommand(const HLCStripe *stripe, Tempo // Special case for IFM with sparse strides if ( (slices > 1) && (ofm.transpose == TransposeType::NCWH) ) { - outFM.strides = Shape(1, elementSize, elementSize * ifm.shape.Width() * ifm.shape.Height(), elementSize); - inFM.strides = Shape(1, elementSize * ifm.shape.Width() * ifm.shape.Depth(), elementSize, elementSize); + outFM.strides = Shape(1, elementSize, elementSize * ifm.shape.ElementsWH(), elementSize); + inFM.strides = Shape(1, elementSize * ifm.shape.ElementsWC(), elementSize, elementSize); } else if ( ofm.transpose == TransposeType::NWHC ) { diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp index 4d37a800..a7d5eb52 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85.cpp @@ -690,6 +690,7 @@ Shape ArchEthosU85::AreaFit(const FindConfigCommon &common, const Shape &ofmShap const int accElements = (_accRamSizeBytes * 8) / common.accBits; const int ibElements = (_ifmRamSizeBytes * 8) / common.ifmBits; const int ubAccElements = common.ublock.ElementsWH() * ACC_DEPTH_GRANULE; + assert(ubAccElements); double aspect = double(ofmShape.Height()) / ofmShape.Width(); bool prioritiseDepth = kernel->DilatedWH() == Point2i(1, 1); @@ -698,7 +699,7 @@ Shape ArchEthosU85::AreaFit(const FindConfigCommon &common, const Shape &ofmShap Shape fitShape; double bestMetric = std::numeric_limits::max(); int maxDepth = std::min(std::max(_macs, accElements / ubAccElements), ofmBlockLimit.Depth()); - double ofmArea = prioritiseDepth ? ofmShape.Width() * ofmShape.Depth() : ofmShape.Width() * ofmShape.Height(); + double ofmArea = prioritiseDepth ? ofmShape.ElementsWC() : ofmShape.ElementsWH(); for ( int depth = ACC_DEPTH_GRANULE; (depth <= maxDepth); depth += ACC_DEPTH_GRANULE ) { @@ -1143,7 +1144,7 @@ std::unique_ptr ArchEthosU85::FindBlockConfig(OpType opTyp if ( relativeCost == bestCost ) { Shape coverageShape = Shape::Min(ifmShape, ifmBlock); - float coverage = float(ifmShape.ElementsWH()) / float(coverageShape.ElementsWH()); + float coverage = float(ifmShape.ElementsWH()) / float(std::max(coverageShape.ElementsWH(), 1)); // Small 4x4 IFM constraint found through analysis of networks if ( coverage <= bestCoverage && ((height <= 4 && width <= 4) || isMatmul) ) { diff --git a/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp b/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp index 0b9fedf4..1893df9b 100644 --- a/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp +++ b/ethosu/regor/architecture/ethosu85/ethos_u85_performance.cpp @@ -306,7 +306,7 @@ static int64_t EstimateMemoryTransfer(int cores, bool isRead, ArchitectureMemory int zStride = (shape.Width() * elementBits * 16) / 8; if ( zStride == block.Depth() ) { - burstLen = elementBits * block.Depth() * block.Width(); + burstLen = elementBits * block.ElementsWC(); } else if ( isRead ) { @@ -324,7 +324,7 @@ static int64_t EstimateMemoryTransfer(int cores, bool isRead, ArchitectureMemory { if ( xStride == block.Depth() ) { - burstLen = elementBits * block.Depth() * block.Width(); + burstLen = elementBits * block.ElementsWC(); } else { @@ -335,7 +335,7 @@ static int64_t EstimateMemoryTransfer(int cores, bool isRead, ArchitectureMemory { if ( (block.Depth() <= 16) && xStride == block.Depth() ) { - burstLen = elementBits * block.Depth() * block.Width(); + burstLen = elementBits * block.ElementsWC(); } else { diff --git a/ethosu/regor/common/shape.hpp b/ethosu/regor/common/shape.hpp index 0c08229c..88dfc812 100644 --- a/ethosu/regor/common/shape.hpp +++ b/ethosu/regor/common/shape.hpp @@ -485,16 +485,26 @@ public: return Point3(TYPE(Height()), TYPE(Width()), TYPE(Depth())); } + int ElementsHWC() const + { + int64_t result = ElementsWC(); + if ( _last >= 2 ) result *= Height(); + assert(result <= std::numeric_limits::max()); + return int(result); + } + int ElementsWH() const { - int64_t result = int64_t(Width()) * Height(); + int64_t result = _last >= 1 ? Width() : 0; + if ( _last >= 2 ) result *= Height(); assert(result <= std::numeric_limits::max()); return int(result); } int ElementsWC() const { - int64_t result = int64_t(Width()) * Depth(); + int64_t result = Depth(); + if ( _last >= 1 ) result *= Width(); assert(result <= std::numeric_limits::max()); return int(result); } diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp index 4732e464..9f36244e 100644 --- a/ethosu/regor/compiler/graphir_optimiser.cpp +++ b/ethosu/regor/compiler/graphir_optimiser.cpp @@ -1249,7 +1249,7 @@ Operation *GraphIrOptimiser::OptimiseElementwise(Graph *const, Operation *const { auto ofmShape = operation->Output(TensorUsage::OFM)->shape; if ( ofmShape.Size() > 3 && (ofmShape.Depth() == 1 || ofmShape.Width() == 1 || ofmShape.Height() == 1) && - ofmShape.Elements() > ofmShape.Depth() * ofmShape.Width() * ofmShape.Height() ) + ofmShape.Elements() > ofmShape.ElementsHWC() ) { auto *ofmConn = returnOp->Output(TensorUsage::OFM); auto *ifmConn = returnOp->Input(TensorUsage::IFM0); @@ -1832,7 +1832,7 @@ Operation *GraphIrOptimiser::RewriteMatmul(Graph *const graph, Operation *const // Reshape non-WC axes into height auto ReshapeFunc = [](const Shape &s) { - int height = s.Elements() / (s.Width() * s.Depth()); + int height = s.Elements() / s.ElementsWC(); return Shape(1, height, s.Width(), s.Depth()); }; diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index fe612ec2..d475c183 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -1483,7 +1483,7 @@ std::vector> DecomposeElementwise(Architectu ifm2Slice.Initialize(ifm2Shape.WithZeros(), ifm2Shape); } auto ofmRank = ofmShape.Size(); - if ( ofmRank > 3 && ofmShape.Elements() > ofmShape.Width() * ofmShape.Height() * ofmShape.Depth() ) + if ( ofmRank > 3 && ofmShape.Elements() > ofmShape.ElementsHWC() ) { return DecomposeLeadingDimensions(ofmRank - 3, arch, std::move(op), DecomposeElementwise); } @@ -1514,7 +1514,7 @@ std::vector> DecomposeMatmul(Architecture *a // Decompose Batching auto ofmRank = ofmShape.Size(); - if ( ofmRank > 2 && (ofmShape.Elements() > ofmShape.Width() * ofmShape.Depth()) ) + if ( ofmRank > 2 && ofmShape.Elements() > ofmShape.ElementsWC() ) { return DecomposeLeadingDimensions(ofmRank - 2, arch, std::move(op), DecomposeMatmul); } @@ -2281,7 +2281,7 @@ std::vector> DecomposeAvgPool(Architecture * ofmSlice.Initialize(ofmShape.WithZeros(), ofmShape); ifmSlice.Initialize(ifmShape.WithZeros().WithHW(-padding.Top(), -padding.Left()), ifmShape); auto ofmRank = ofmShape.Size(); - if ( ofmRank > 3 && (ofmShape.Elements() > ofmShape.Height() * ofmShape.Width() * ofmShape.Depth()) ) + if ( ofmRank > 3 && (ofmShape.Elements() > ofmShape.ElementsHWC()) ) { return DecomposeLeadingDimensions(ofmRank - 3, arch, std::move(op), DecomposeAvgPool); } @@ -2437,7 +2437,7 @@ std::vector> DecomposeMaxPool(Architecture * ofmSlice.Initialize(ofmShape.WithZeros(), ofmShape); ifmSlice.Initialize(ifmShape.WithZeros().WithHW(-padding.Top(), -padding.Left()), ifmShape); auto ofmRank = ofmShape.Size(); - if ( ofmRank > 3 && (ofmShape.Elements() > ofmShape.Height() * ofmShape.Width() * ofmShape.Depth()) ) + if ( ofmRank > 3 && (ofmShape.Elements() > ofmShape.ElementsHWC()) ) { return DecomposeLeadingDimensions(ofmRank - 3, arch, std::move(op), DecomposeMaxPool); } -- GitLab