diff --git a/ethosu/regor/compiler/graphir_optimiser.cpp b/ethosu/regor/compiler/graphir_optimiser.cpp
index de1f6f348eec359705e5de12fbc89e1a97c915c9..644b55be1ab27fc512e17b6c80ee51a7c798af2f 100644
--- a/ethosu/regor/compiler/graphir_optimiser.cpp
+++ b/ethosu/regor/compiler/graphir_optimiser.cpp
@@ -2122,6 +2122,7 @@ Operation *GraphIrOptimiser::MoveSplitSliceToConsumer(Graph *const, Operation *c
             auto *consIfm1 = cons->IFM(1);
 
             bool ifmShapeEqual = false;
+            bool bothHaveIfmStride = false;
 
             // Don't move to CPU, Reshape or Tile operations
             // low-level implementation of TILE requires unsliced inputs
@@ -2135,12 +2136,24 @@ Operation *GraphIrOptimiser::MoveSplitSliceToConsumer(Graph *const, Operation *c
                 // Check if ifm0 consumer has correct shape
                 auto *consIfm0Conn = cons->Input(TensorUsage::IFM0);
                 ifmShapeEqual = Shape::IsReducedEqual(consIfm0Conn->shape, ofmConn->shape);
+
+                // Check if both ifm and ifm0 consumer have stride
+                const auto &ifmStride = ifmConn->slice.stride;
+                const auto &conIfmStride = consIfm0Conn->slice.stride;
+                bothHaveIfmStride =
+                    ifmStride && ifmStride != ifmStride.WithOnes() && conIfmStride && conIfmStride != conIfmStride.WithOnes();
             }
             else if ( consIfm1 != nullptr && consIfm1 == ofm )
             {
                 // Check if ifm1 consumer has correct shape
                 auto *consIfm1Conn = cons->Input(TensorUsage::IFM1);
                 ifmShapeEqual = Shape::IsReducedEqual(consIfm1Conn->shape, ofmConn->shape);
+
+                // Check if both ifm and ifm1 consumer have stride
+                const auto &ifmStride = ifmConn->slice.stride;
+                const auto &conIfmStride = consIfm1Conn->slice.stride;
+                bothHaveIfmStride =
+                    ifmStride && ifmStride != ifmStride.WithOnes() && conIfmStride && conIfmStride != conIfmStride.WithOnes();
             }
 
             // Calculate the consumer transpose type
@@ -2152,7 +2165,7 @@ Operation *GraphIrOptimiser::MoveSplitSliceToConsumer(Graph *const, Operation *c
 
             // We can only move to consumer if there is no transpose on the op that we move to,
             // otherwise the IFM shape may change and transposition will be wrong.
-            if ( Shape::IsReducedEqual(ofmConn->shape, ofm->StorageShape()) && IsNone(consumerTranspose) && ifmShapeEqual )
+            if ( Shape::IsReducedEqual(ofmConn->shape, ofm->StorageShape()) && IsNone(consumerTranspose) && ifmShapeEqual && !bothHaveIfmStride )
             {
                 // Split/Slice can be performed by tensor consumer
                 MoveToConsumer(operation, cons.get());
diff --git a/ethosu/regor/compiler/operation.hpp b/ethosu/regor/compiler/operation.hpp
index 200a8216aab360f56261ccbd146fcff7959839da..c2a72a9e020ae48f760e4d2675f673aabd43e96e 100644
--- a/ethosu/regor/compiler/operation.hpp
+++ b/ethosu/regor/compiler/operation.hpp
@@ -49,17 +49,37 @@ enum class RoundMode : uint8_t
 struct TensorSlice
 {
     Shape offset;
-    Shape shape;
+    Shape shape;  // Shape before striding
+    Shape stride;
+
+    TensorSlice() {}
+    TensorSlice(const Shape &offset_, const Shape &shape_) : offset(offset_), shape(shape_) {}
+    TensorSlice(const Shape &offset_, const Shape &shape_, const Shape &stride_) :
+            offset(offset_), shape(shape_), stride(stride_)
+    {
+    }
+
     // Initialize a TensorSlice if current offset/shape are invalid
-    void Initialize(const Shape &_offset, const Shape &_shape)
+    void Initialize(const Shape &offset_, const Shape &shape_)
     {
         if ( !shape )
         {
-            shape = _shape;
+            shape = shape_;
         }
         if ( !offset )
         {
-            offset = _offset;
+            offset = offset_;
+        }
+    }
+
+    // Initialize a TensorSlice if current offset/shape/stride are invalid
+    void Initialize(const Shape &offset_, const Shape &shape_, const Shape &stride_)
+    {
+        Initialize(offset_, shape_);
+
+        if ( !stride )
+        {
+            stride = stride_;
         }
     }
 };
diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp
index e0df646da5c83873fd254a684f5077253245d9cb..6c74bf3f05375311e8efef5419787e2f5bc6c519 100644
--- a/ethosu/regor/compiler/scheduler.cpp
+++ b/ethosu/regor/compiler/scheduler.cpp
@@ -205,6 +205,12 @@ int Scheduler::UpdateSchedulerTensor(TensorUsage usage, SchedulerConnection *con
         conn->requireFullTensor = true;
     }
 
+    // Force linear format for read only tensors
+    if ( tensor->IsConstant() )
+    {
+        tensor->needsLinearFormat = true;
+    }
+
     // Force linear output from Reverse for C dimension because brick output from Reverse has special requirements
     if ( IsOFM(usage) && conn->reverse == ReverseType::C )
     {
@@ -216,6 +222,12 @@ int Scheduler::UpdateSchedulerTensor(TensorUsage usage, SchedulerConnection *con
         tensor->needsLinearFormat = true;
     }
 
+    // Force linear format for strided access in the width dimension
+    if ( conn->stepXY.x != 1 )
+    {
+        tensor->needsLinearFormat = true;
+    }
+
     for ( auto producer : tensor->producers )
     {
         // TODO: Gather doesn't support brick format yet (MLBEDSW-8410)
@@ -323,8 +335,8 @@ int Scheduler::UpdateSchedulerTensor(TensorUsage usage, SchedulerConnection *con
         tensor->memArea = _arch->OutputFeatureMapMemory();
     }
 
-    // Set tensor format to NHCWB16 for output FeatureMaps, if possible
-    if ( IsOFM(usage) )
+    // Set tensor format to NHCWB16 for FeatureMaps, if possible
+    if ( IsIFM(usage) || IsOFM(usage) )
     {
         tensor->format = tensor->needsLinearFormat ? TensorFormat::NHWC : TensorFormat::NHCWB16;
     }
diff --git a/ethosu/regor/compiler/scheduler_packing.cpp b/ethosu/regor/compiler/scheduler_packing.cpp
index 3b56b1a3cf5c572b53d52c2ed0177dbf68f685c4..cadef74fad5c41ca0adbdbb0d37b831d33c8856a 100644
--- a/ethosu/regor/compiler/scheduler_packing.cpp
+++ b/ethosu/regor/compiler/scheduler_packing.cpp
@@ -415,12 +415,17 @@ void SchedulerPacking::InitSchedulerConnection(
 {
     schedConn->tensor = tensor;
     // Convert to (minimum) 4D-shapes in scheduler-IR
-    schedConn->slice = {Shape::PadAxes(conn.slice.offset, 4, 0), Shape::PadAxes(conn.slice.shape, 4, 1)};
+    schedConn->slice = {Shape::PadAxes(conn.slice.offset, 4, 0), Shape::PadAxes(conn.slice.shape, 4, 1),
+        Shape::PadAxes(conn.slice.stride, 4, 1)};
     schedConn->shape = Shape::PadAxes(conn.shape, 4, 1);
     schedConn->quantization = conn.quantization;
     schedConn->reverse = conn.reverse;
     schedConn->resamplingMode = ArchResampling::None;
     schedConn->rounding = conn.rounding;
+    if ( schedConn->slice.stride )
+    {
+        schedConn->stepXY = schedConn->slice.stride.WH<int>();
+    }
 }
 
 void SchedulerPacking::InitSchedulerTensor(SchedulerTensor *schedTensor, Tensor *tensor, const Graph *graph)
diff --git a/ethosu/regor/compiler/tflite_graph_optimiser.cpp b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
index 2a1698561d43c9044bec3202ceacd239193477c8..e8fe64432b24a411dbd6265c4e0d18d561e00f41 100644
--- a/ethosu/regor/compiler/tflite_graph_optimiser.cpp
+++ b/ethosu/regor/compiler/tflite_graph_optimiser.cpp
@@ -733,28 +733,24 @@ Operation *TFLiteGraphOptimiser::RewriteStridedSlice(Graph *const graph, Operati
             }
         }
 
-        // TODO MLBEDSW-10165: Handle stride != 1
-        if ( sliceStride != sliceStride.WithOnes() )
+        // TODO MLBEDSW-10165: Handle stride < 0 and other dimensions than H and W
+        if ( sliceStride.LessMask(sliceStride.WithZeros()) ||
+             sliceStride.WithHeight(1).WithWidth(1) != Shape::PadAxes(sliceShape.WithOnes(), 3, 1) )
         {
             returnOp->SetPassthroughOp();
             return returnOp;
         }
 
-        // Adjust resulting shape for stride
-        sliceShape = Shape::DivRoundUp(sliceShape, sliceStride);
-
-        // Create a new SLICE op
-        auto sliceOp = std::make_shared<Operation>(OpType::Slice);
-        sliceOp->CopyInput(TensorUsage::IFM, *ifmConn);
-        sliceOp->CopyOutput(TensorUsage::OFM, *ofmConn);
-        sliceOp->Output(TensorUsage::OFM)->Set(sliceShape);
-        auto *attr = sliceOp->Attribute<slice_attr_t>();
+        // Create a new memory copy op
         assert(sliceOffset + sliceShape <= ifmConn->shape);
         assert(sliceOffset >= ifmConn->shape.WithZeros());
-        attr->size = sliceShape;
-        attr->begin = sliceOffset;
-        RecordOptimisation(operation, sliceOp.get());
-        returnOp = sliceOp.get();
+        auto copyOp = std::make_shared<Operation>(OpType::MemoryCopy);
+        copyOp->CopyInput(TensorUsage::IFM, *ifmConn);
+        copyOp->Input(TensorUsage::IFM)->Set({sliceOffset, sliceShape, sliceStride});
+        copyOp->CopyOutput(TensorUsage::OFM, *ofmConn);
+        copyOp->Output(TensorUsage::OFM)->Set(Shape::DivRoundUp(sliceShape, sliceStride));
+        RecordOptimisation(operation, copyOp.get());
+        returnOp = copyOp.get();
 
         // Remove original op
         operation->Disconnect();