diff --git a/ethosu/regor/compiler/high_level_command_stream_generator.cpp b/ethosu/regor/compiler/high_level_command_stream_generator.cpp index c73a089186feb013d1a935f8a88cca0783560df9..4eb673c62d6c8ce84ee180ca883222e0d7f74086 100644 --- a/ethosu/regor/compiler/high_level_command_stream_generator.cpp +++ b/ethosu/regor/compiler/high_level_command_stream_generator.cpp @@ -671,7 +671,7 @@ void HLCStreamGenerator::GenerateHLCStripeCommands(SchedulerOperation *op, const auto inputArea = TransformWithStridesAndSkirt(outputArea, &strides, ifmConn->stepXY, &skirt, ifmConn->shape, opType, ofmConn->slice.offset, ifmConn->slice.offset, ifmConn->slice.shape, dilatedKernelHeight, upscaling, hlcStripe->padding.top, hlcStripe->padding.bottom, ifmLimit, ofmConn->transpose, accIfm); - if ( ofmConn->stepXY != Point2i{1, 1} || ifmConn->stepXY != Point2i{1, 1} ) + if ( !accIfm && (ofmConn->stepXY != Point2i{1, 1} || ifmConn->stepXY != Point2i{1, 1}) ) { std::tie(inputArea, hlcStripe->padding) = TransformWithInputOutputSteps(inputArea, ifmConn->stepXY, outputArea, ofmConn->stepXY, kernel, hlcStripe->padding, ifmConn->shape); diff --git a/ethosu/regor/compiler/scheduler_decompose.cpp b/ethosu/regor/compiler/scheduler_decompose.cpp index 947f7821ff9b6a2da35373dadc7436ba86f8048c..defa6679c8627c8cacbc02464d05288eb2929ca2 100644 --- a/ethosu/regor/compiler/scheduler_decompose.cpp +++ b/ethosu/regor/compiler/scheduler_decompose.cpp @@ -448,6 +448,14 @@ HandleDilation(Architecture *arch, std::unique_ptr op, const auto *subOfmConn = subOp->Output(TensorUsage::OFM); subOfmConn->slice = std::move(newOfmSlice); subOfmConn->stepXY = ofmStrides; + if ( subOp->AccumulatorMode().source == AccumulatorSource::Ifm2 ) + { + auto *subIfm2Conn = subOp->Input(TensorUsage::IFM1); + subIfm2Conn->slice.shape = subOfmConn->slice.shape; + subIfm2Conn->slice.offset = subIfm2Conn->slice.shape.WithZeros().WithHW( + subOfmConn->slice.offset.Height(), subOfmConn->slice.offset.Width()); + subIfm2Conn->stepXY = subOfmConn->stepXY; + } auto subOps = doDecompose(arch, std::move(subOp)); result.insert(result.end(), std::make_move_iterator(subOps.begin()), std::make_move_iterator(subOps.end())); }