diff --git a/ethosu/regor/compiler/compiler.cpp b/ethosu/regor/compiler/compiler.cpp
index 3927855b4ccd75acb21958354f83c8c68be8dd04..9cb6c7de65e341186822f17ce4c876bc4e7c0a03 100644
--- a/ethosu/regor/compiler/compiler.cpp
+++ b/ethosu/regor/compiler/compiler.cpp
@@ -512,23 +512,6 @@ std::unique_ptr<Graph> Compiler::CompileGraph(std::unique_ptr<Graph> &graph,
         return nullptr;
     }
 
-    // At most 1 CustomNpuOp is supported when compiling with separate IO regions
-    if ( _schedulerOptions.separateIORegions )
-    {
-        std::vector<Operation *> ops;
-        newGraph->GetAllOperations(ops);
-        int customNpuOps = 0;
-        for ( auto op : ops )
-        {
-            if ( op->Type() == OpType::CustomNpuOp ) customNpuOps++;
-        }
-        if ( customNpuOps > 1 )
-        {
-            SetLastError("More than 1 CustomNpuOp is not supported with separate IO regions");
-            return nullptr;
-        }
-    }
-
     auto customOperatorBuilder = CustomOperatorBuilder(_architecture.get(), schedule.get());
     customOperatorBuilder.AllocateScratchTensors(tensorAddressMap);
 
@@ -538,6 +521,9 @@ std::unique_ptr<Graph> Compiler::CompileGraph(std::unique_ptr<Graph> &graph,
         auto *graphOp = pair.first;
         const auto *npuOp = pair.second.get();
 
+        // Allocate addresses for IO tensors with an address space that is local for this sequence of NPU ops
+        scheduler.AllocateIOAddresses(schedule.get(), npuOp->Operations());
+
         // Generate HLCS
         auto hlcsGenerator = HLCStreamGenerator();
         auto highLevelCommandStream = hlcsGenerator.GenerateCommandStream(npuOp, schedule.get(), _compilerOptions.verboseHighLevelCommandStream);
diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp
index 6a69f55984d2dec20f3bea5549f1a6a4da7b5b5f..926c0232050a5f365eb214607c1c0eaa62ba001d 100644
--- a/ethosu/regor/compiler/scheduler.cpp
+++ b/ethosu/regor/compiler/scheduler.cpp
@@ -329,7 +329,8 @@ int Scheduler::UpdateSchedulerTensor(TensorUsage usage, SchedulerConnection *con
     }
 
     // Initial criteria (may change)
-    bool cpuTensor = tensor->hasCPUWriters || tensor->hasCPUReaders || tensor->isGraphInput || tensor->isGraphOutput;
+    bool cpuTensor =
+        tensor->hasCPUWriters || tensor->hasCPUReaders || tensor->isGraphInput || tensor->isGraphOutput || tensor->isPersistent;
     conn->requireFullTensor = conn->requireFullTensor || cpuTensor;
     tensor->needsLinearFormat = tensor->needsLinearFormat || cpuTensor || CheckLinearFormatForConcatSplit(tensor);
 
@@ -337,13 +338,9 @@ int Scheduler::UpdateSchedulerTensor(TensorUsage usage, SchedulerConnection *con
     {
         tensor->memArea = _arch->CPUMemory();
     }
-    else if ( _options.separateIORegions && !tensor->IsConstant() && cpuTensor && tensor->hasNPUReaders )
+    else if ( _options.separateIORegions && !tensor->IsConstant() && cpuTensor )
     {
-        tensor->memArea = _arch->InputFeatureMapMemory();
-    }
-    else if ( _options.separateIORegions && !tensor->IsConstant() && cpuTensor && tensor->hasNPUWriters )
-    {
-        tensor->memArea = _arch->OutputFeatureMapMemory();
+        tensor->memArea = tensor->hasNPUWriters ? _arch->OutputFeatureMapMemory() : _arch->InputFeatureMapMemory();
     }
 
     // Set tensor format to NHCWB16 for FeatureMaps, if possible
@@ -831,13 +828,7 @@ void Scheduler::MoveConstantData(Schedule *refSchedule)
 bool Scheduler::AllocateAddresses(Schedule *schedule)
 {
     const auto verbose = _options.verboseAllocation;
-    const auto separateIORegions = _options.separateIORegions;
     AllocateTensors(_ops, schedule, _arch->FeatureMapMemory(), TensorAllocator::HillClimb, AlignmentQuantum, verbose);
-    if ( separateIORegions )
-    {
-        AllocateTensors(_ops, schedule, _arch->InputFeatureMapMemory(), TensorAllocator::LinearAlloc, AlignmentQuantum, verbose);
-        AllocateTensors(_ops, schedule, _arch->OutputFeatureMapMemory(), TensorAllocator::LinearAlloc, AlignmentQuantum, verbose);
-    }
     if ( _spilling )
     {
         const auto limit = _options.optimizationStagingLimit;
@@ -905,6 +896,20 @@ void Scheduler::AllocateReadOnlyAddresses(Schedule *schedule, IncrementalLinearA
 }
 
 
+void Scheduler::AllocateIOAddresses(Schedule *schedule, const std::vector<std::unique_ptr<SchedulerOperation>> &ops)
+{
+    const auto verbose = _options.verboseAllocation;
+    const auto separateIORegions = _options.separateIORegions;
+    if ( separateIORegions )
+    {
+        assert(_arch->InputFeatureMapMemory() != _arch->OutputFeatureMapMemory());
+
+        AllocateTensors(ops, schedule, _arch->InputFeatureMapMemory(), TensorAllocator::LinearAlloc, AlignmentQuantum, verbose);
+        AllocateTensors(ops, schedule, _arch->OutputFeatureMapMemory(), TensorAllocator::LinearAlloc, AlignmentQuantum, verbose);
+    }
+}
+
+
 void Scheduler::UpdateOpMemorySnapshot(Schedule *schedule)
 {
     const auto fastStorage = _arch->StagingMemory();
diff --git a/ethosu/regor/compiler/scheduler.hpp b/ethosu/regor/compiler/scheduler.hpp
index c137540bbab189d0cbb782fda0bdded765e57332..5747b907607ea11e34c48882845173ccbf8e34d8 100644
--- a/ethosu/regor/compiler/scheduler.hpp
+++ b/ethosu/regor/compiler/scheduler.hpp
@@ -302,6 +302,8 @@ public:
 
     void AllocateReadOnlyAddresses(Schedule *schedule, IncrementalLinearAllocator &readOnlyAllocator);
 
+    void AllocateIOAddresses(Schedule *schedule, const std::vector<std::unique_ptr<SchedulerOperation>> &ops);
+
     static PerformanceQuery InitPerfQuery(SchedulerOperation *op, ArchitectureOpConfig *config, int ofm_depth = -1,
         WeightFormat wgtFormat = WeightFormat::Default, SchedulerOpInfo *cost = nullptr);
     static std::vector<FusionQuery> InitFusionQuery(SchedulerOperation *op);
diff --git a/ethosu/regor/compiler/scheduler_operation.hpp b/ethosu/regor/compiler/scheduler_operation.hpp
index 4cb6456b94e2d0037e91fab45a342616c684b078..391ee668635143f5ac5f8a9ab3df0d20d2074790 100644
--- a/ethosu/regor/compiler/scheduler_operation.hpp
+++ b/ethosu/regor/compiler/scheduler_operation.hpp
@@ -92,7 +92,7 @@ public:
 
     void SetAddress(Address address)
     {
-        assert(allocatedAddress == -1 && address >= 0);
+        assert(address >= 0);
         allocatedAddress = address;
     }