diff --git a/ethosu/regor/compiler/compiler.cpp b/ethosu/regor/compiler/compiler.cpp index 9cb6c7de65e341186822f17ce4c876bc4e7c0a03..9b972fd5a3aaba38055c6798dabddc5051f8c294 100644 --- a/ethosu/regor/compiler/compiler.cpp +++ b/ethosu/regor/compiler/compiler.cpp @@ -178,7 +178,11 @@ bool Compiler::ParseOptions(const char *text, size_t size) } else if ( section == "scheduler" ) { - ParseSchedulerOptions(_schedulerOptions, reader); + if ( !ParseSchedulerOptions(_schedulerOptions, reader) ) + { + SetLastError(fmt::format("Error parsing [{}]", section)); + return false; + } } else if ( section == "graph" ) { diff --git a/ethosu/regor/compiler/scheduler.cpp b/ethosu/regor/compiler/scheduler.cpp index 926c0232050a5f365eb214607c1c0eaa62ba001d..112b9486a45101d500793310cf21fba038f7db1a 100644 --- a/ethosu/regor/compiler/scheduler.cpp +++ b/ethosu/regor/compiler/scheduler.cpp @@ -50,7 +50,7 @@ namespace regor { constexpr int AllocationQuantum = 16; -constexpr int AlignmentQuantum = 16; +constexpr int NPUTensorAlignment = 16; static Shape GetShapeForFormat(const Shape &shape, TensorFormat format) { @@ -828,11 +828,13 @@ void Scheduler::MoveConstantData(Schedule *refSchedule) bool Scheduler::AllocateAddresses(Schedule *schedule) { const auto verbose = _options.verboseAllocation; - AllocateTensors(_ops, schedule, _arch->FeatureMapMemory(), TensorAllocator::HillClimb, AlignmentQuantum, verbose); + // If graph input/outputs tensors are in FeatureMap memory, allocate with user-specified tensor alignment + AllocateTensors(_ops, schedule, _arch->FeatureMapMemory(), TensorAllocator::HillClimb, + _options.separateIORegions ? NPUTensorAlignment : _options.cpuTensorAlignment, verbose); if ( _spilling ) { const auto limit = _options.optimizationStagingLimit; - AllocateTensors(_ops, schedule, _arch->StagingMemory(), TensorAllocator::HillClimb, AlignmentQuantum, verbose, limit); + AllocateTensors(_ops, schedule, _arch->StagingMemory(), TensorAllocator::HillClimb, NPUTensorAlignment, verbose, limit); return schedule->memoryUsage[_arch->StagingMemory()] <= limit; } @@ -891,7 +893,7 @@ void Scheduler::AllocateReadOnlyAddresses(Schedule *schedule, IncrementalLinearA { auto lrGraph = ReadOnlyLiveRangeGraph(_arch); lrGraph.ExtractLiveRangesFromCascades(_ops, schedule, _arch->ReadonlyMemory(), false); - auto totalSize = readOnlyAllocator.Allocate(&lrGraph, AlignmentQuantum, _options.verboseAllocation); + auto totalSize = readOnlyAllocator.Allocate(&lrGraph, NPUTensorAlignment, _options.verboseAllocation); schedule->memoryUsage[_arch->ReadonlyMemory()] = int(totalSize); } @@ -904,8 +906,8 @@ void Scheduler::AllocateIOAddresses(Schedule *schedule, const std::vectorInputFeatureMapMemory() != _arch->OutputFeatureMapMemory()); - AllocateTensors(ops, schedule, _arch->InputFeatureMapMemory(), TensorAllocator::LinearAlloc, AlignmentQuantum, verbose); - AllocateTensors(ops, schedule, _arch->OutputFeatureMapMemory(), TensorAllocator::LinearAlloc, AlignmentQuantum, verbose); + AllocateTensors(ops, schedule, _arch->InputFeatureMapMemory(), TensorAllocator::LinearAlloc, NPUTensorAlignment, verbose); + AllocateTensors(ops, schedule, _arch->OutputFeatureMapMemory(), TensorAllocator::LinearAlloc, NPUTensorAlignment, verbose); } } @@ -1847,7 +1849,7 @@ void Scheduler::PrintSchedule(Schedule *schedule) } -void ParseSchedulerOptions(SchedulerOptions &opt, IniReader &reader) +bool ParseSchedulerOptions(SchedulerOptions &opt, IniReader &reader) { // Parse debug settings std::string key; @@ -1904,9 +1906,21 @@ void ParseSchedulerOptions(SchedulerOptions &opt, IniReader &reader) { opt.separateIORegions = reader.Get(); } + else if ( key == "cpu_tensor_alignment" ) + { + opt.cpuTensorAlignment = reader.Get(); + } reader.End(); } + + if ( opt.cpuTensorAlignment <= 0 || opt.cpuTensorAlignment % NPUTensorAlignment != 0 ) + { + LOG_ERROR("CPU tensor alignment ({}) must be a multiple of {}\n", opt.cpuTensorAlignment, NPUTensorAlignment); + return false; + } + + return true; } diff --git a/ethosu/regor/compiler/scheduler.hpp b/ethosu/regor/compiler/scheduler.hpp index 5747b907607ea11e34c48882845173ccbf8e34d8..d327003d10621f4aa991dbb8683abd069d26d716 100644 --- a/ethosu/regor/compiler/scheduler.hpp +++ b/ethosu/regor/compiler/scheduler.hpp @@ -66,6 +66,7 @@ struct SchedulerOptions bool verboseAllocation = false; Flags disabled; bool separateIORegions = false; + int cpuTensorAlignment = 16; }; struct WeightScaleEncoding @@ -370,6 +371,6 @@ private: Shape &ofmShape, Flags supportedFormats); }; -void ParseSchedulerOptions(SchedulerOptions &opt, IniReader &reader); +bool ParseSchedulerOptions(SchedulerOptions &opt, IniReader &reader); } // namespace regor diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py index 4ef48ce402d340f6e3430589c3dc2b5d4608748c..686ab4e76080a5fd17b548b4925b2ffa78bdca02 100755 --- a/ethosu/vela/vela.py +++ b/ethosu/vela/vela.py @@ -825,6 +825,7 @@ def get_compiler_config( disable_buffering: bool, cop_format: str, separate_io_regions: bool, + cpu_tensor_alignment: int, ) -> str: """Build compiler config file.""" config = "\n[compiler]\n" @@ -859,6 +860,7 @@ def get_compiler_config( config = config.rstrip("|") + "\n" if separate_io_regions: config += "separate_io_regions=true\n" + config += f"cpu_tensor_alignment={cpu_tensor_alignment}\n" config += "\n[graph]\n" if verbose_graph: @@ -1063,7 +1065,7 @@ def main(argv: Optional[List[str]] = None) -> int: type=int, default=Tensor.AllocationQuantum, help=( - "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom" + "Controls the allocation byte alignment of CPU tensors including Ethos-U Custom" " operator inputs and outputs (default: %(default)s Bytes)" ), ) @@ -1228,6 +1230,7 @@ def main(argv: Optional[List[str]] = None) -> int: args.disable_buffering, args.cop_format, args.separate_io_regions, + args.cpu_tensor_alignment, ) process_regor(