diff --git a/Android.bp b/Android.bp
index 76a43caf97230b28df94ced64b2db6f5403ad31b..14290b9e1c9d0926add9eb57a6c0054187e7c025 100644
--- a/Android.bp
+++ b/Android.bp
@@ -51,6 +51,7 @@ opencl_srcs = [
"src/core/CL/cl_kernels/common/instance_normalization.cl",
"src/core/CL/cl_kernels/common/l2_normalize.cl",
"src/core/CL/cl_kernels/common/mat_mul.cl",
+ "src/core/CL/cl_kernels/common/mat_mul_mmul.cl",
"src/core/CL/cl_kernels/common/mat_mul_quantized.cl",
"src/core/CL/cl_kernels/common/mean_stddev_normalization.cl",
"src/core/CL/cl_kernels/common/memset.cl",
@@ -165,7 +166,9 @@ arm_compute_library_defaults {
"-DARM_COMPUTE_ENABLE_NEON",
"-Wno-unused-parameter",
"-DNO_DOT_IN_TOOLCHAIN",
- "-Wno-implicit-fallthrough"
+ "-Wno-implicit-fallthrough",
+ "-fPIC",
+ "-DACL_INTERNAL_TEST_CKW_IN_DF"
],
rtti: true,
}
@@ -176,6 +179,8 @@ cc_library_static {
proprietary: true,
local_include_dirs: ["build/android-arm64v8a/src/core",
"build/android-arm64v8a/src/core/CL",
+ "compute_kernel_writer/prototype/include",
+ "compute_kernel_writer/prototype",
"src/core/common",
"src/core/helpers",
"src/core/NEON/kernels/arm_gemm",
@@ -185,6 +190,15 @@ cc_library_static {
"src/cpu/kernels/assembly"],
export_include_dirs: [".", "./include"],
srcs: [
+ "compute_kernel_writer/prototype/src/Kernel.cpp",
+ "compute_kernel_writer/prototype/src/KernelArgument.cpp",
+ "compute_kernel_writer/prototype/src/KernelWriter.cpp",
+ "compute_kernel_writer/prototype/src/OperandBase.cpp",
+ "compute_kernel_writer/prototype/src/TensorInfo.cpp",
+ "compute_kernel_writer/prototype/src/TensorOperand.cpp",
+ "compute_kernel_writer/prototype/src/TensorTileSampler.cpp",
+ "compute_kernel_writer/prototype/src/TileInfo.cpp",
+ "compute_kernel_writer/prototype/src/TileOperand.cpp",
"src/c/AclContext.cpp",
"src/c/AclOperator.cpp",
"src/c/AclQueue.cpp",
@@ -313,9 +327,9 @@ cc_library_static {
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_strategies_common.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/generic_quantized_dot_product.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/premultiply.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp",
"src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp",
@@ -363,7 +377,6 @@ cc_library_static {
"src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_4x4_3x3.cpp",
"src/core/NEON/kernels/convolution/winograd/output_transforms_fp16.cpp",
"src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp",
- "src/core/NEON/kernels/convolution/winograd/padding.cpp",
"src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_2x2_3x3.cpp",
"src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_2x2_5x5.cpp",
"src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_4x4_3x3.cpp",
@@ -384,8 +397,14 @@ cc_library_static {
"src/core/Version.cpp",
"src/core/helpers/SoftmaxHelpers.cpp",
"src/core/helpers/WindowHelpers.cpp",
+ "src/core/utils/ActivationFunctionUtils.cpp",
"src/core/utils/AssemblyUtils.cpp",
+ "src/core/utils/DataLayoutUtils.cpp",
+ "src/core/utils/DataTypeUtils.cpp",
+ "src/core/utils/FormatUtils.cpp",
+ "src/core/utils/InterpolationPolicyUtils.cpp",
"src/core/utils/ScaleUtils.cpp",
+ "src/core/utils/StringUtils.cpp",
"src/core/utils/helpers/fft.cpp",
"src/core/utils/helpers/tensor_transform.cpp",
"src/core/utils/io/FileHandler.cpp",
@@ -604,6 +623,7 @@ cc_library_static {
"src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp",
"src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp",
"src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp",
+ "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp",
"src/dynamic_fusion/sketch/attributes/CastAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
@@ -620,6 +640,16 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp",
"src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp",
"src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
@@ -700,6 +730,7 @@ cc_library_static {
"src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp",
"src/gpu/cl/kernels/ClMatMulLowpNativeKernel.cpp",
"src/gpu/cl/kernels/ClMatMulNativeKernel.cpp",
+ "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp",
"src/gpu/cl/kernels/ClMulKernel.cpp",
"src/gpu/cl/kernels/ClPermuteKernel.cpp",
"src/gpu/cl/kernels/ClPool2dKernel.cpp",
@@ -1020,7 +1051,6 @@ cc_library_static {
srcs: [
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_8b_mla.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
diff --git a/BUILD.bazel b/BUILD.bazel
index f1766d9582ca433953cd780649d8662027cb3eda..3a7d941a0e74811dd66794f95788ab9bd6f3c942 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -382,6 +382,7 @@ cc_library(
}),
includes = [
"arm_compute/runtime",
+ "src/core/NEON/kernels/arm_gemm",
"src/core/NEON/kernels/assembly",
"src/core/NEON/kernels/convolution/common",
"src/core/NEON/kernels/convolution/winograd",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8ab69f73115bc27bda220d6b77582a3d0a73f042..bf029a7e9e15d032c4f6da46f74ccd9f34b0783c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
project(
ArmCompute
- VERSION 31.0.1
+ VERSION 32.0.0
DESCRIPTION
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
LANGUAGES C CXX ASM)
@@ -197,6 +197,7 @@ target_include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
PRIVATE src
src/cpu/kernels/assembly
+ src/core/NEON/kernels/arm_gemm
src/core/NEON/kernels/assembly
src/core/NEON/kernels/convolution/common
src/core/NEON/kernels/arm_conv/depthwise
@@ -223,6 +224,7 @@ target_include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
PRIVATE src
src/cpu/kernels/assembly
+ src/core/NEON/kernels/arm_gemm
src/core/NEON/kernels/assembly
src/core/NEON/kernels/convolution/common
src/core/NEON/kernels/arm_conv/depthwise
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b11239035a13658c58b48e745baf7f62976313f7..171d101bd11dffb48b592db3b3ae8361ef2d2684 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-Please read https://arm-software.github.io/ComputeLibrary/v23.02.1/contribution_guidelines.xhtml
+Please read https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml
Here on github we only publish a snapshot of the main development branch for each release, that's the reason why we don't accept pull requests.
diff --git a/README.md b/README.md
index 8585ddd94fef5c5881b0922b3084b2e80e607086..a8f0def7a18b9ae2a940dcda026142f00c86b9a9 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
> **⚠ Important**
-> From release 22.05: 'master' branch has been replaced with 'main' following our inclusive language update, more information [here](https://arm-software.github.io/ComputeLibrary/v23.05.1/contribution_guidelines.xhtml#S5_0_inc_lang).
+> From release 22.05: 'master' branch has been replaced with 'main' following our inclusive language update, more information [here](https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml#S5_0_inc_lang).
> **⚠ Important**
> From release 22.08: armv7a with Android build will no longer be tested or maintained.
@@ -16,7 +16,7 @@

-# Compute Library 
+# Compute Library 
The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.
@@ -44,7 +44,7 @@ Key Features:
## Documentation
-[](https://arm-software.github.io/ComputeLibrary/v23.05.1)
+[](https://arm-software.github.io/ComputeLibrary/latest)
> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
@@ -57,24 +57,24 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
| Platform | Operating System | Release archive (Download) |
| -------------- | ---------------- | -------------------------- |
-| Raspberry Pi 4 | Linux 32bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-armv7a-neon.tar.gz) |
-| Raspberry Pi 4 | Linux 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-neon.tar.gz) |
-| Odroid N2 | Linux 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| HiKey960 | Linux 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| Raspberry Pi 4 | Linux® 32bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-armv7a-neon.tar.gz) |
+| Raspberry Pi 4 | Linux® 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon.tar.gz) |
+| Odroid N2 | Linux® 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| HiKey960 | Linux® 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon-cl.tar.gz) |
| Architecture | Operating System | Release archive (Download) |
| ------------ | ---------------- | -------------------------- |
-| armv7 | Linux | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-armv7a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-armv7a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-armv7a-neon-cl.tar.gz) |
-| arm64-v8a | Android | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-android-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-android-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-android-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8a | Linux | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Android | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-android-arm64-v8.2-a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-android-arm64-v8.2-a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Linux | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8.2-a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8.2-a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.05.1/arm_compute-v23.05.1-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
+| armv7 | Linux® | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-armv7a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-armv7a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-armv7a-neon-cl.tar.gz) |
+| arm64-v8a | Android™ | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8a | Linux® | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Android™ | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8.2-a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8.2-a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Linux® | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8.2-a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8.2-a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
-Please refer to the following link for more pre-built binaries: [](https://github.com/ARM-software/ComputeLibrary/releases/tag/v23.05.1)
+Please refer to the following link for more pre-built binaries: [](https://github.com/ARM-software/ComputeLibrary/releases/tag/v23.08)
Pre-build binaries are generated with the following security / good coding practices related flags:
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
@@ -116,13 +116,13 @@ Pre-build binaries are generated with the following security / good coding pract
## Experimental builds
-**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://arm-software.github.io/ComputeLibrary/v23.05.1/how_to_build.xhtml) for more details.
+**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://arm-software.github.io/ComputeLibrary/latest/how_to_build.xhtml) for more details.
## How to contribute
-Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://arm-software.github.io/ComputeLibrary/v23.05.1/contribution_guidelines.xhtml).
+Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml).
### Developer Certificate of Origin (DCO)
Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/)
diff --git a/SConscript b/SConscript
index 75aedbf1cf692b4c29f462eab778f11947949251..467f84cb55ae335738975332b2859aa56c10be97 100644
--- a/SConscript
+++ b/SConscript
@@ -31,10 +31,17 @@ import zlib
import json
import codecs
-VERSION = "v23.05.1"
-LIBRARY_VERSION_MAJOR = 31
+from SCons.Warnings import warn, DeprecatedWarning
+
+warn(DeprecatedWarning,
+ "DEPRECATION NOTICE: Legacy libarm_compute_core has been deprecated and is scheduled for removal in 24.02 release."
+ " Link your application only to libarm_compute for core library functionality"
+ )
+
+VERSION = "v23.08"
+LIBRARY_VERSION_MAJOR = 32
LIBRARY_VERSION_MINOR = 0
-LIBRARY_VERSION_PATCH = 1
+LIBRARY_VERSION_PATCH = 0
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
Import('env')
@@ -49,8 +56,6 @@ def build_bootcode_objs(sources):
return obj
-
-
# @brief Create a list of object from a given file list.
#
# @param arch_info A dictionary represents the architecture info such as the
@@ -113,6 +118,24 @@ def build_lib_objects():
return lib_static_objs, lib_shared_objs
+# The built-in SCons Glob() method does not support recursive searching of directories, thus we implement our own:
+def recursive_glob(root_dir, pattern):
+ files = []
+ regex = re.compile(pattern)
+
+ for dirpath, _, filenames in os.walk(root_dir):
+ for f in filenames:
+ f = os.path.join(dirpath, f)
+ if regex.match(f):
+ files.append(f)
+
+ return files
+
+
+def get_ckw_obj_list():
+ cmake_obj_dir = os.path.abspath("prototype/CMakeFiles/ckw_prototype.dir/src")
+ return recursive_glob(root_dir=cmake_obj_dir, pattern=".*.o$")
+
def build_library(name, build_env, sources, static=False, libs=[]):
cloned_build_env = build_env.Clone()
@@ -120,9 +143,25 @@ def build_library(name, build_env, sources, static=False, libs=[]):
cloned_build_env["LINKFLAGS"].remove('-pie')
cloned_build_env["LINKFLAGS"].remove('-static-libstdc++')
+ # -- Static Library --
if static:
- obj = cloned_build_env.StaticLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs)
+ # Recreate the list to avoid mutating the original
+ static_sources = list(sources)
+
+ # Dynamic Fusion has a direct dependency on the Compute Kernel Writer (CKW) subproject, therefore we collect the
+ # built CKW objects to pack into the Compute Library archive.
+ if env['experimental_dynamic_fusion'] and name == "arm_compute-static":
+ static_sources += get_ckw_obj_list()
+
+ obj = cloned_build_env.StaticLibrary(name, source=static_sources, LIBS=arm_compute_env["LIBS"] + libs)
+
+ # -- Shared Library --
else:
+ # Always statically link Compute Library against CKW
+ if env['experimental_dynamic_fusion'] and name == "arm_compute":
+ libs.append('libckw_prototype.a')
+
+ # Add shared library versioning
if env['set_soname']:
obj = cloned_build_env.SharedLibrary(name, source=sources, SHLIBVERSION = SONAME_VERSION, LIBS = arm_compute_env["LIBS"] + libs)
else:
@@ -220,7 +259,8 @@ def create_version_file(target, source, env):
except (OSError, subprocess.CalledProcessError):
git_hash="unknown"
- build_info = "\"arm_compute_version=%s Build options: %s Git hash=%s\"" % (VERSION, vars.args, git_hash.strip())
+ build_options = str(vars.args).replace('"', '\\"')
+ build_info = "\"arm_compute_version=%s Build options: %s Git hash=%s\"" % (VERSION,build_options, git_hash.strip())
with open(target[0].get_path(), "w") as fd:
fd.write(build_info)
@@ -394,6 +434,7 @@ if env['opencl'] and env['embed_kernels']:
'src/core/CL/cl_kernels/common/instance_normalization.cl',
'src/core/CL/cl_kernels/common/l2_normalize.cl',
'src/core/CL/cl_kernels/common/mat_mul.cl',
+ 'src/core/CL/cl_kernels/common/mat_mul_mmul.cl',
'src/core/CL/cl_kernels/common/mat_mul_quantized.cl',
'src/core/CL/cl_kernels/common/mean_stddev_normalization.cl',
'src/core/CL/cl_kernels/common/memset.cl',
@@ -493,7 +534,8 @@ arm_compute_env.Append(CPPDEFINES = [('ARM_COMPUTE_VERSION_MAJOR', LIBRARY_VERSI
# Don't allow undefined references in the libraries:
undefined_flag = '-Wl,-undefined,error' if 'macos' in arm_compute_env["os"] else '-Wl,--no-undefined'
-arm_compute_env.Append(LINKFLAGS=[undefined_flag])
+if not env['thread_sanitizer']:
+ arm_compute_env.Append(LINKFLAGS=[undefined_flag])
arm_compute_env.Append(CPPPATH =[Dir("./src/core/").path] )
if env['os'] != 'openbsd':
@@ -522,7 +564,14 @@ if env['fixed_format_kernels']:
# Experimental files
# Dynamic fusion
if env['experimental_dynamic_fusion']:
- lib_files += filelist['experimental']['dynamic_fusion']
+ lib_files += filelist['experimental']['dynamic_fusion']['common']
+ lib_files += filelist['experimental']['dynamic_fusion']['template_writer']
+
+if "ACL_INTERNAL_TEST_CKW_IN_DF" in env["extra_cxx_flags"]:
+ if not env["experimental_dynamic_fusion"]:
+ print("To use ACL_INTERNAL_TEST_CKW_IN_DF experimental_dynamic_fusion must be set to 1")
+ Exit(1)
+ lib_files += filelist['experimental']['dynamic_fusion']['ckw_driver']
# Logging files
if env["logging"]:
@@ -548,7 +597,7 @@ custom_operators = []
custom_types = []
custom_layouts = []
-use_custom_ops = env['high_priority'] or env['build_config'];
+use_custom_ops = env['high_priority'] or env['build_config']
if env['high_priority']:
custom_operators = filelist['high_priority']
@@ -574,7 +623,8 @@ lib_files_sve2 = []
if env['neon']:
# build winograd/depthwise sources for either v7a / v8a
- arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/convolution/common/",
+ arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/arm_gemm",
+ "src/core/NEON/kernels/convolution/common/",
"src/core/NEON/kernels/convolution/winograd/",
"src/core/NEON/kernels/arm_conv/depthwise/",
"src/core/NEON/kernels/arm_conv/pooling/",
@@ -682,7 +732,7 @@ arm_compute_graph_env = arm_compute_env.Clone()
# Build graph libraries
arm_compute_graph_env.Append(CXXFLAGS = ['-Wno-redundant-move', '-Wno-pessimizing-move'])
-arm_compute_graph_a = build_library('arm_compute_graph-static', arm_compute_graph_env, graph_files, static=True, libs = [ arm_compute_a ])
+arm_compute_graph_a = build_library('arm_compute_graph-static', arm_compute_graph_env, graph_files, static=True)
Export('arm_compute_graph_a')
if env['os'] != 'bare_metal' and not env['standalone']:
diff --git a/SConstruct b/SConstruct
index f6c90c3098d2f4793ccef605779800927f74d7d6..68c518a4a0af20a978d9ef0487d3f6e7e41d3282 100644
--- a/SConstruct
+++ b/SConstruct
@@ -25,7 +25,7 @@
import SCons
import json
import os
-from subprocess import check_output
+import subprocess
def version_at_least(version, required):
@@ -125,7 +125,7 @@ vars.AddVariables(
├── datasets
├── fixtures
└── Neon\n""", "", PathVariable.PathAccept),
- BoolVariable("experimental_dynamic_fusion", "Build the experimental dynamic fusion files", False),
+ BoolVariable("experimental_dynamic_fusion", "Build the experimental dynamic fusion files. This option also enables opencl=1 on which it has a direct dependency.", False),
BoolVariable("fixed_format_kernels", "Enable fixed format kernels for GEMM", False),
BoolVariable("mapfile", "Generate a map file", False),
ListVariable("custom_options", "Custom options that can be used to turn on/off features", "none", ["disable_mmla_fp"]),
@@ -133,6 +133,7 @@ vars.AddVariables(
ListVariable("data_layout_support", "Enable a list of data layout to support", "all", ["nhwc", "nchw"]),
("toolchain_prefix", "Override the toolchain prefix; used by all toolchain components: compilers, linker, assembler etc. If unspecified, use default(auto) prefixes; if passed an empty string '' prefixes would be disabled", "auto"),
("compiler_prefix", "Override the compiler prefix; used by just compilers (CC,CXX); further overrides toolchain_prefix for compilers; this is for when the compiler prefixes are different from that of the linkers, archivers etc. If unspecified, this is the same as toolchain_prefix; if passed an empty string '' prefixes would be disabled", "auto"),
+ BoolVariable("thread_sanitizer", "Enable ThreadSanitizer", False),
("extra_cxx_flags", "Extra CXX flags to be appended to the build command", ""),
("extra_link_flags", "Extra LD flags to be appended to the build command", ""),
("compiler_cache", "Command to prefix to the C and C++ compiler (e.g ccache)", ""),
@@ -160,7 +161,7 @@ install_path = env['install_dir']
if not env['install_dir'].startswith('/') and install_path != "":
install_path = "%s/%s" % (build_path, install_path)
-env.Append(LIBPATH = [build_path])
+env.Append(LIBPATH = [build_path, os.path.join(build_path, "prototype")])
Export('env')
Export('vars')
@@ -214,6 +215,10 @@ if env['os'] == 'bare_metal':
print("ERROR: OpenMP and C++11 threads not supported in bare_metal. Use cppthreads=0 openmp=0")
Exit(1)
+if env['experimental_dynamic_fusion']:
+ # Dynamic Fusion on GPU has a direct dependency on OpenCL and Compute Kernel Writer
+ env['opencl'] = 1
+
if env['opencl'] and env['embed_kernels'] and env['compress_kernels'] and env['os'] not in ['android']:
print("Compressed kernels are supported only for android builds")
Exit(1)
@@ -415,12 +420,57 @@ print("Using compilers:")
print("CC", env['CC'])
print("CXX", env['CXX'])
+"""Build the Compute Kernel Writer subproject"""
+if env['experimental_dynamic_fusion']:
+ # Strip ccache prefix from CC and CXX to obtain only the target triple
+ CKW_CC = env['CC'].replace(env['compiler_cache'] + " ", "")
+ CKW_CXX = env['CXX'].replace(env['compiler_cache'] + " ", "")
+ CKW_CCACHE = 1 if env['compiler_cache'] else 0
+
+ CKW_BUILD_TYPE = "Debug" if env['debug'] else "Release"
+
+ CKW_ENABLE_OPENCL = env['opencl']
+ CKW_ENABLE_ASSERTS = env['debug'] or env['asserts']
+
+ CKW_PROJECT_DIR = Dir('.').path + "/compute_kernel_writer"
+ CKW_INCLUDE_DIR = CKW_PROJECT_DIR + "/prototype/include"
+ CKW_BUILD_DIR = build_path.replace("#", "")
+
+ CKW_CMAKE_CMD = "CC={CKW_CC} CXX={CKW_CXX} cmake -G \"Unix Makefiles\" " \
+ "-S {CKW_PROJECT_DIR} -B {CKW_BUILD_DIR} " \
+ "-DCMAKE_BUILD_TYPE={CKW_BUILD_TYPE} " \
+ "-DCKW_ENABLE_OPENCL={CKW_ENABLE_OPENCL} " \
+ "-DCKW_ENABLE_ASSERTS={CKW_ENABLE_ASSERTS} " \
+ "-DCKW_BUILD_PROTOTYPE=ON " \
+ "-DCKW_CCACHE={CKW_CCACHE} ".format(CKW_CC=CKW_CC,
+ CKW_CXX=CKW_CXX,
+ CKW_PROJECT_DIR=CKW_PROJECT_DIR,
+ CKW_BUILD_DIR=CKW_BUILD_DIR,
+ CKW_BUILD_TYPE=CKW_BUILD_TYPE,
+ CKW_ENABLE_OPENCL=CKW_ENABLE_OPENCL,
+ CKW_ENABLE_ASSERTS=CKW_ENABLE_ASSERTS,
+ CKW_CCACHE=CKW_CCACHE
+ )
+
+ # Configure CKW static objects with -fPIC (CMAKE_POSITION_INDEPENDENT_CODE) option to enable linking statically to ACL
+ CKW_CMAKE_CONFIGURE_STATIC = CKW_CMAKE_CMD + "-DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON"
+ CKW_CMAKE_BUILD = "cmake --build {CKW_BUILD_DIR} --target ckw_prototype -j{NUM_JOBS}".format(CKW_BUILD_DIR=CKW_BUILD_DIR,
+ NUM_JOBS=GetOption('num_jobs')
+ )
+
+ # Build Compute Kernel Writer Static Library
+ subprocess.check_call(CKW_CMAKE_CONFIGURE_STATIC, stderr=subprocess.STDOUT, shell=True)
+ subprocess.check_call(CKW_CMAKE_BUILD, stderr=subprocess.STDOUT, shell=True)
+
+ # Let ACL know where to find CKW headers
+ env.Append(CPPPATH = CKW_INCLUDE_DIR)
+
if not GetOption("help"):
try:
if env['os'] == 'windows':
- compiler_ver = check_output("clang++ -dumpversion").decode().strip()
+ compiler_ver = subprocess.check_output("clang++ -dumpversion").decode().strip()
else:
- compiler_ver = check_output(env['CXX'].split() + ["-dumpversion"]).decode().strip()
+ compiler_ver = subprocess.check_output(env['CXX'].split() + ["-dumpversion"]).decode().strip()
except OSError:
print("ERROR: Compiler '%s' not found" % env['CXX'])
Exit(1)
@@ -564,6 +614,10 @@ if env['asserts']:
if env['logging']:
env.Append(CPPDEFINES = ['ARM_COMPUTE_LOGGING_ENABLED'])
+if env['thread_sanitizer']:
+ env.Append(CXXFLAGS = ['-fsanitize=thread'])
+ env.Append(LINKFLAGS = ['-fsanitize=thread'])
+
env.Append(CPPPATH = ['#/include', "#"])
env.Append(CXXFLAGS = env['extra_cxx_flags'])
env.Append(LINKFLAGS = env['extra_link_flags'])
diff --git a/arm_compute/BUILD.bazel b/arm_compute/BUILD.bazel
index 641a327d515431d4ab78797781d79424e943f522..d1219015b7599a7d4f28a2c9d32de00b876be2f8 100644
--- a/arm_compute/BUILD.bazel
+++ b/arm_compute/BUILD.bazel
@@ -28,6 +28,7 @@ cc_library(
"*.h",
"*.hpp",
"dynamic_fusion/**/*.h",
+ "function_info/**/*.h",
]),
visibility = ["//visibility:public"],
)
diff --git a/arm_compute/core/CoreTypes.h b/arm_compute/core/CoreTypes.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a48a366518b99cc82893a102ee62360a8f3da10
--- /dev/null
+++ b/arm_compute/core/CoreTypes.h
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_CORE_CORETYPES
+#define ACL_ARM_COMPUTE_CORE_CORETYPES
+
+#include "arm_compute/core/Strides.h"
+#include "support/Half.h"
+
+/** CoreTypes.h groups together essential small types that are used across functions */
+
+namespace arm_compute
+{
+/** 16-bit floating point type */
+using half = half_float::half;
+/** Permutation vector */
+using PermutationVector = Strides;
+
+/** Available channels */
+enum class Channel
+{
+ UNKNOWN, /** Unknown channel format */
+ C0, /**< First channel (used by formats with unknown channel types). */
+ C1, /**< Second channel (used by formats with unknown channel types). */
+ C2, /**< Third channel (used by formats with unknown channel types). */
+ C3, /**< Fourth channel (used by formats with unknown channel types). */
+ R, /**< Red channel. */
+ G, /**< Green channel. */
+ B, /**< Blue channel. */
+ A, /**< Alpha channel. */
+ Y, /**< Luma channel. */
+ U, /**< Cb/U channel. */
+ V /**< Cr/V/Value channel. */
+};
+
+/** Image colour formats */
+enum class Format
+{
+ UNKNOWN, /**< Unknown image format */
+ U8, /**< 1 channel, 1 U8 per channel */
+ S16, /**< 1 channel, 1 S16 per channel */
+ U16, /**< 1 channel, 1 U16 per channel */
+ S32, /**< 1 channel, 1 S32 per channel */
+ U32, /**< 1 channel, 1 U32 per channel */
+ S64, /**< 1 channel, 1 S64 per channel */
+ U64, /**< 1 channel, 1 U64 per channel */
+ BFLOAT16, /**< 16-bit brain floating-point number */
+ F16, /**< 1 channel, 1 F16 per channel */
+ F32, /**< 1 channel, 1 F32 per channel */
+ UV88, /**< 2 channel, 1 U8 per channel */
+ RGB888, /**< 3 channels, 1 U8 per channel */
+ RGBA8888, /**< 4 channels, 1 U8 per channel */
+ YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
+ YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
+ NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
+ NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
+ IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
+ UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
+};
+
+/** Available data types */
+enum class DataType
+{
+ UNKNOWN, /**< Unknown data type */
+ U8, /**< unsigned 8-bit number */
+ S8, /**< signed 8-bit number */
+ QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */
+ QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */
+ QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */
+ QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
+ U16, /**< unsigned 16-bit number */
+ S16, /**< signed 16-bit number */
+ QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */
+ QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */
+ U32, /**< unsigned 32-bit number */
+ S32, /**< signed 32-bit number */
+ U64, /**< unsigned 64-bit number */
+ S64, /**< signed 64-bit number */
+ BFLOAT16, /**< 16-bit brain floating-point number */
+ F16, /**< 16-bit floating-point number */
+ F32, /**< 32-bit floating-point number */
+ F64, /**< 64-bit floating-point number */
+ SIZET /**< size_t */
+};
+
+/** [DataLayout enum definition] **/
+
+/** Supported tensor data layouts */
+enum class DataLayout
+{
+ UNKNOWN, /**< Unknown data layout */
+ NCHW, /**< Num samples, channels, height, width */
+ NHWC, /**< Num samples, height, width, channels */
+ NCDHW, /**< Num samples, channels, depth, height, width */
+ NDHWC /**< Num samples, depth, height, width, channels */
+};
+/** [DataLayout enum definition] **/
+
+/** Supported tensor data layout dimensions */
+enum class DataLayoutDimension
+{
+ CHANNEL, /**< channel */
+ HEIGHT, /**< height */
+ WIDTH, /**< width */
+ DEPTH, /**< depth */
+ BATCHES /**< batches */
+};
+
+/** Dimension rounding type when down-scaling on CNNs
+ * @note Used in pooling and convolution layer
+ */
+enum class DimensionRoundingType
+{
+ FLOOR, /**< Floor rounding */
+ CEIL /**< Ceil rounding */
+};
+
+class PadStrideInfo
+{
+public:
+ /** Constructor
+ *
+ * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
+ * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
+ * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0.
+ * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0.
+ * @param[in] round (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR.
+ */
+ PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
+ unsigned int pad_x = 0, unsigned int pad_y = 0,
+ DimensionRoundingType round = DimensionRoundingType::FLOOR)
+ : _stride(std::make_pair(stride_x, stride_y)),
+ _pad_left(pad_x),
+ _pad_top(pad_y),
+ _pad_right(pad_x),
+ _pad_bottom(pad_y),
+ _round_type(round)
+ {
+ }
+ /** Constructor
+ *
+ * @param[in] stride_x Stride, in elements, across x.
+ * @param[in] stride_y Stride, in elements, across y.
+ * @param[in] pad_left Padding across x on the left, in elements.
+ * @param[in] pad_right Padding across x on the right, in elements.
+ * @param[in] pad_top Padding across y on the top, in elements.
+ * @param[in] pad_bottom Padding across y on the bottom, in elements.
+ * @param[in] round Dimensions rounding.
+ */
+ PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
+ unsigned int pad_left, unsigned int pad_right,
+ unsigned int pad_top, unsigned int pad_bottom,
+ DimensionRoundingType round)
+ : _stride(std::make_pair(stride_x, stride_y)),
+ _pad_left(pad_left),
+ _pad_top(pad_top),
+ _pad_right(pad_right),
+ _pad_bottom(pad_bottom),
+ _round_type(round)
+ {
+ }
+ /** Get the stride.
+ *
+ * @return a pair: stride x, stride y.
+ */
+ std::pair stride() const
+ {
+ return _stride;
+ }
+ /** Check whether the padding is symmetric.
+ *
+ * @return True if the padding is symmetric.
+ */
+ bool padding_is_symmetric() const
+ {
+ return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
+ }
+ /** Get the padding.
+ *
+ * @note This should only be used when the padding is symmetric.
+ *
+ * @return a pair: padding left/right, padding top/bottom
+ */
+ std::pair pad() const
+ {
+ //this accessor should be used only when padding is symmetric
+ ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
+ return std::make_pair(_pad_left, _pad_top);
+ }
+
+ /** Get the left padding */
+ unsigned int pad_left() const
+ {
+ return _pad_left;
+ }
+ /** Get the right padding */
+ unsigned int pad_right() const
+ {
+ return _pad_right;
+ }
+ /** Get the top padding */
+ unsigned int pad_top() const
+ {
+ return _pad_top;
+ }
+ /** Get the bottom padding */
+ unsigned int pad_bottom() const
+ {
+ return _pad_bottom;
+ }
+
+ /** Get the rounding type */
+ DimensionRoundingType round() const
+ {
+ return _round_type;
+ }
+
+ /** Check whether this has any padding */
+ bool has_padding() const
+ {
+ return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
+ }
+
+private:
+ std::pair _stride;
+ unsigned int _pad_left;
+ unsigned int _pad_top;
+ unsigned int _pad_right;
+ unsigned int _pad_bottom;
+
+ DimensionRoundingType _round_type;
+};
+
+/** Memory layouts for the weights tensor.
+ *
+ * * UNSPECIFIED is used to select kernels that do not run in
+ * variable weights mode.
+ *
+ * * ANY is used to query the kernel database to retrieve any of the
+ * kernels that runs in variable weights mode. Once a kernel is
+ * found, the specific format expected by the kernel can be
+ * retrieved by the user for reordering the weights tensor
+ * accordingly.
+ *
+ * The other values OHWIo{interleave_by}i{block_by} describe the
+ * memory layout of a 4D tensor with layout OHWI that has been
+ * transformed into a 4D tensor with dimensions O'HWI' where:
+ *
+ * O' = first multiple of {interleave_by} s.t. O<=O'
+ * I' = first multiple of {block_by} s.t. I<=I'
+ *
+ * The total size of the dst tensor is O' x H x W x I'
+ *
+ * The access function of the tensor with layout
+ * OHWIo{interleave_by}i{block_by} and size O'HWI' is a 6-parameter
+ * access function, where the 6 parameters are computed as follows:
+ *
+ * x5 = floor(o/{interleave_by}) RANGE [0, O'/{interleave_by} -1] SIZE: O'/{interleave_by}
+ *
+ * x4 = h RANGE [0, H-1] SIZE: H
+ * x3 = w RANGE [0, W-1] SIZE: W
+ * x2 = floor(i/{block_by}) RANGE [0, I'/{block_by} -1] SIZE: I'/{block_by}
+ * x1 = o%{interleave_by} RANGE [0, {interleave_by} -1] SIZE: {interleave_by}
+ * x0 = i%{block_by} RANGE [0, {block_by} -1] SIZE: {block_by}
+ * TOTAL SIZE: O' * H * W * I'
+ *
+ * 4D 6D
+ * ----------------- -----------------------------------
+ * value(o, h, w, i) = x5 * H * W * I' * {interleave_by}
+ * + x4 * W * I' * {interleave_by}
+ * + x3 * I' * {interleave_by}
+ * + x2 * {interleave_by} * {block_by}
+ * + x1 * {block_by}
+ * + x0
+ *
+ * Notice that in arm_gemm the 4D tensor of dimension O'HWI' created
+ * for the OHWIo{interleave_by}i{block_by} format is in reality seen
+ * as a 2D tensor, where the number of rows is O'/{interleave_by}
+ * and the number of columns is {interleave_by} * H * W * I'.
+ *
+ * The postfix *_bf16 is for the memory layout needed for the
+ * fast-mode kernels, in which the weights are passed in bfloat16
+ * format.
+ */
+enum class WeightFormat
+{
+ UNSPECIFIED = 0x1,
+ ANY = 0x2,
+ OHWI = 0x100100,
+ OHWIo2 = 0x100200,
+ OHWIo4 = 0x100400,
+ OHWIo8 = 0x100800,
+ OHWIo16 = 0x101000,
+ OHWIo32 = 0x102000,
+ OHWIo64 = 0x104000,
+ OHWIo128 = 0x108000,
+ OHWIo4i2 = 0x200400,
+ OHWIo4i2_bf16 = 0x200410,
+ OHWIo8i2 = 0x200800,
+ OHWIo8i2_bf16 = 0x200810,
+ OHWIo16i2 = 0x201000,
+ OHWIo16i2_bf16 = 0x201010,
+ OHWIo32i2 = 0x202000,
+ OHWIo32i2_bf16 = 0x202010,
+ OHWIo64i2 = 0x204000,
+ OHWIo64i2_bf16 = 0x204010,
+ OHWIo4i4 = 0x400400,
+ OHWIo4i4_bf16 = 0x400410,
+ OHWIo8i4 = 0x400800,
+ OHWIo8i4_bf16 = 0x400810,
+ OHWIo16i4 = 0x401000,
+ OHWIo16i4_bf16 = 0x401010,
+ OHWIo32i4 = 0x402000,
+ OHWIo32i4_bf16 = 0x402010,
+ OHWIo64i4 = 0x404000,
+ OHWIo64i4_bf16 = 0x404010,
+ OHWIo2i8 = 0x800200,
+ OHWIo4i8 = 0x800400,
+ OHWIo8i8 = 0x800800,
+ OHWIo16i8 = 0x801000,
+ OHWIo32i8 = 0x802000,
+ OHWIo64i8 = 0x804000
+};
+
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_CORE_CORETYPES */
diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h
index c9a0d85f0c24aa2f6582ccfa74edfc1386cdd7e1..0854f2c5274eddb3a5ac5052c4732548719d6d95 100644
--- a/arm_compute/core/Error.h
+++ b/arm_compute/core/Error.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019, 2021 Arm Limited.
+ * Copyright (c) 2016-2019, 2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -204,9 +204,10 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_RETURN_ON_ERROR(status) \
do \
{ \
- if(!bool(status)) \
+ const auto s = status; \
+ if(!bool(s)) \
{ \
- return status; \
+ return s; \
} \
} while(false)
diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h
index 7b0fd1c2b70a539a724f5a362bb6b8967f1af3a3..e7c0b182c69e5f192dafb2d33f8498a815f33d38 100644
--- a/arm_compute/core/ITensorInfo.h
+++ b/arm_compute/core/ITensorInfo.h
@@ -28,7 +28,6 @@
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/Utility.h"
#include "support/ICloneable.h"
@@ -36,6 +35,7 @@
namespace arm_compute
{
+class QuantizationInfo;
// Note: Any changes to the fields of the class below that have setters should be mirrored
// (if possible) in the auto_init_if_empty function in AutoConfiguration.h
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index f637351e265f0aec559d90a588649ba9578ff1b0..305766e8251721935efcd581388951674921ffc7 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/experimental/IPostOp.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
namespace arm_compute
{
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index 0e3d26c515b0f05a4e9ee7c778aca30b3a3a6fd9..790f58a7935ab3f210a1038695506c177e5dbd6f 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_PIXELVALUE_H
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/QuantizationInfo.h"
#include
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index ddf1342e3227ced70c10a862459edb92987d7bc0..8fa513eee1e61e442eae70589bcbe6166da27fd3 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,13 +24,10 @@
#ifndef ARM_COMPUTE_QUANTIZATION_INFO_H
#define ARM_COMPUTE_QUANTIZATION_INFO_H
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Rounding.h"
+#include "arm_compute/core/utils/misc/Utility.h"
#include "support/ToolchainSupport.h"
-#include "utils/misc/Utility.h"
-#include
-#include
#include
namespace arm_compute
@@ -399,57 +396,6 @@ inline float dequantize_qsymm8(int8_t value, const UniformQuantizationInfo &qinf
return value * qinfo.scale;
}
-inline qasymm8_t qasymm8_hard_swish(qasymm8_t in,
- const UniformQuantizationInfo &qi_in,
- const UniformQuantizationInfo &qi_out)
-{
- float tmp_f = dequantize_qasymm8(in, qi_in);
- tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
- const qasymm8_t tmp = quantize_qasymm8(tmp_f, qi_out);
- return tmp;
-}
-
-inline qasymm8_signed_t qasymm8_signed_hard_swish(qasymm8_signed_t in,
- const UniformQuantizationInfo &qi_in,
- const UniformQuantizationInfo &qi_out)
-{
- float tmp_f = dequantize_qasymm8_signed(in, qi_in);
- tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
- const qasymm8_t tmp = quantize_qasymm8_signed(tmp_f, qi_out);
- return tmp;
-}
-
-inline qasymm8_t qasymm8_leaky_relu(qasymm8_t in,
- const UniformQuantizationInfo &qi_in,
- const UniformQuantizationInfo &qi_out,
- float alpha)
-{
- float tmp_f = dequantize_qasymm8(in, qi_in);
- tmp_f = tmp_f > 0 ? tmp_f : tmp_f * alpha;
- const qasymm8_t tmp = quantize_qasymm8(tmp_f, qi_out);
- return tmp;
-}
-
-inline qasymm8_t qasymm8_logistic(qasymm8_t in,
- const UniformQuantizationInfo &qi_in,
- const UniformQuantizationInfo &qi_out)
-{
- float tmp_f = dequantize_qasymm8(in, qi_in);
- tmp_f = 1.f / (1.f + std::exp(-tmp_f));
- const qasymm8_t tmp = quantize_qasymm8(tmp_f, qi_out);
- return tmp;
-}
-
-inline qasymm8_signed_t qasymm8_signed_logistic(qasymm8_signed_t in,
- const UniformQuantizationInfo &qi_in,
- const UniformQuantizationInfo &qi_out)
-{
- float tmp_f = dequantize_qasymm8_signed(in, qi_in);
- tmp_f = 1.f / (1.f + std::exp(-tmp_f));
- const qasymm8_signed_t tmp = quantize_qasymm8_signed(tmp_f, qi_out);
- return tmp;
-}
-
/** Dequantize a value given a 8-bit symmetric quantization scheme
*
* @param[in] value Value to dequantize
diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h
index 8436407a7521f975ed681fcf9d62fda2eb705178..e738a797b298ff3d468a8e55f74fa627350e5ccf 100644
--- a/arm_compute/core/TensorInfo.h
+++ b/arm_compute/core/TensorInfo.h
@@ -32,7 +32,6 @@
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
#include
#include
diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h
index b6ab9dc75a7392d1efa3ec1ed16fae1971456d86..4c9186ac64cfb5c847692781de6ab0d7f2faed6a 100644
--- a/arm_compute/core/TensorShape.h
+++ b/arm_compute/core/TensorShape.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -106,9 +106,10 @@ public:
*
* @note The upper dimensions of the tensor shape will be shifted down by 1
*
- * @param[in] n Dimension to remove
+ * @param[in] n Dimension to remove
+ * @param[in] apply_dim_correction (Optional) Flag to state whether apply dimension correction (removing trailing dimensions with size of 1) after removing a dimension.
*/
- void remove_dimension(size_t n)
+ void remove_dimension(size_t n, bool apply_dim_correction = true)
{
ARM_COMPUTE_ERROR_ON(_num_dimensions < 1);
ARM_COMPUTE_ERROR_ON(n >= _num_dimensions);
@@ -122,7 +123,10 @@ public:
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
// Correct number dimensions to ignore trailing dimensions of size 1
- apply_dimension_correction();
+ if(apply_dim_correction)
+ {
+ apply_dimension_correction();
+ }
}
/** Collapse the first n dimensions.
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 8a1d189e8ea6d28040ed4084ba03b589de864012..12d860205ead9c91301fc4af498d248e75e0556b 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -21,19 +21,53 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TYPES_H
-#define ARM_COMPUTE_TYPES_H
+#ifndef ACL_ARM_COMPUTE_CORE_TYPES
+#define ACL_ARM_COMPUTE_CORE_TYPES
+
+/** The following symbols have been moved to:
+ * half
+ * PermutationVector
+ * Format
+ * DataType
+ * DataLayout
+ * DataLayoutDimension
+ * PadStrideInfo
+ * WeightFormat
+ * Channel
+ * DimensionRoundingType
+ */
+#include "arm_compute/core/CoreTypes.h"
+/** The following symbols have been moved to:
+ * ActivationFunction
+ * ActivationLayerInfo
+ */
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+/** The following symbols have been moved to:
+ * ConvolutionInfo
+ */
+#include "arm_compute/function_info/ConvolutionInfo.h"
+/** The following symbols have been moved to:
+ * FullyConnectedLayerInfo
+ */
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
+/** The following symbols have been moved to:
+ * GEMMLowpOutputStageType
+ * GEMMLowpOutputStageInfo
+ * GEMMInfo
+ */
+#include "arm_compute/function_info/GEMMInfo.h"
+/** The following symbols have been moved to:
+ * MatMulInfo
+ */
+#include "arm_compute/function_info/MatMulInfo.h"
#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/QuantizationInfo.h"
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Size3D.h"
-#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/experimental/IPostOp.h"
#include "arm_compute/core/utils/misc/Macros.h"
#include "support/Bfloat16.h"
-#include "support/Half.h"
#include
#include
@@ -44,62 +78,9 @@
namespace arm_compute
{
-/** 16-bit floating point type */
-using half = half_float::half;
-
-/** Permutation vector */
-using PermutationVector = Strides;
/** Bidirectional strides */
using BiStrides = Coordinates;
-/** Image colour formats */
-enum class Format
-{
- UNKNOWN, /**< Unknown image format */
- U8, /**< 1 channel, 1 U8 per channel */
- S16, /**< 1 channel, 1 S16 per channel */
- U16, /**< 1 channel, 1 U16 per channel */
- S32, /**< 1 channel, 1 S32 per channel */
- U32, /**< 1 channel, 1 U32 per channel */
- BFLOAT16, /**< 16-bit brain floating-point number */
- F16, /**< 1 channel, 1 F16 per channel */
- F32, /**< 1 channel, 1 F32 per channel */
- UV88, /**< 2 channel, 1 U8 per channel */
- RGB888, /**< 3 channels, 1 U8 per channel */
- RGBA8888, /**< 4 channels, 1 U8 per channel */
- YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
- YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
- NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
- NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
- IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
- UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
-};
-
-/** Available data types */
-enum class DataType
-{
- UNKNOWN, /**< Unknown data type */
- U8, /**< unsigned 8-bit number */
- S8, /**< signed 8-bit number */
- QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */
- QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */
- QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */
- QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
- U16, /**< unsigned 16-bit number */
- S16, /**< signed 16-bit number */
- QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */
- QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */
- U32, /**< unsigned 32-bit number */
- S32, /**< signed 32-bit number */
- U64, /**< unsigned 64-bit number */
- S64, /**< signed 64-bit number */
- BFLOAT16, /**< 16-bit brain floating-point number */
- F16, /**< 16-bit floating-point number */
- F32, /**< 32-bit floating-point number */
- F64, /**< 64-bit floating-point number */
- SIZET /**< size_t */
-};
-
/** Available Sampling Policies */
enum class SamplingPolicy
{
@@ -107,29 +88,6 @@ enum class SamplingPolicy
TOP_LEFT /**< Samples are taken at pixel top left corner */
};
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layouts */
-enum class DataLayout
-{
- UNKNOWN, /**< Unknown data layout */
- NCHW, /**< Num samples, channels, height, width */
- NHWC, /**< Num samples, height, width, channels */
- NCDHW, /**< Num samples, channels, depth, height, width */
- NDHWC /**< Num samples, depth, height, width, channels */
-};
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layout dimensions */
-enum class DataLayoutDimension
-{
- CHANNEL, /**< channel */
- HEIGHT, /**< height */
- WIDTH, /**< width */
- DEPTH, /**< depth */
- BATCHES /**< batches */
-};
-
/** Available ConvolutionMethod*/
enum class ConvolutionMethod
{
@@ -457,23 +415,6 @@ using PaddingList = std::vector;
/** Information to produce a tiled version of a Tensor */
using Multiples = std::vector;
-/** Available channels */
-enum class Channel
-{
- UNKNOWN, /** Unknown channel format */
- C0, /**< First channel (used by formats with unknown channel types). */
- C1, /**< Second channel (used by formats with unknown channel types). */
- C2, /**< Third channel (used by formats with unknown channel types). */
- C3, /**< Fourth channel (used by formats with unknown channel types). */
- R, /**< Red channel. */
- G, /**< Green channel. */
- B, /**< Blue channel. */
- A, /**< Alpha channel. */
- Y, /**< Luma channel. */
- U, /**< Cb/U channel. */
- V /**< Cr/V/Value channel. */
-};
-
/** Available reduction operations */
enum class ReductionOperation
{
@@ -546,15 +487,6 @@ struct DetectionWindow
float score{ 0.f }; /**< Confidence value for the detection window */
};
-/** Dimension rounding type when down-scaling on CNNs
- * @note Used in pooling and convolution layer
- */
-enum class DimensionRoundingType
-{
- FLOOR, /**< Floor rounding */
- CEIL /**< Ceil rounding */
-};
-
/** Available pooling types */
enum class PoolingType
{
@@ -668,122 +600,6 @@ private:
};
/** Padding and stride information class */
-class PadStrideInfo
-{
-public:
- /** Constructor
- *
- * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
- * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
- * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0.
- * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0.
- * @param[in] round (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR.
- */
- PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
- unsigned int pad_x = 0, unsigned int pad_y = 0,
- DimensionRoundingType round = DimensionRoundingType::FLOOR)
- : _stride(std::make_pair(stride_x, stride_y)),
- _pad_left(pad_x),
- _pad_top(pad_y),
- _pad_right(pad_x),
- _pad_bottom(pad_y),
- _round_type(round)
- {
- }
- /** Constructor
- *
- * @param[in] stride_x Stride, in elements, across x.
- * @param[in] stride_y Stride, in elements, across y.
- * @param[in] pad_left Padding across x on the left, in elements.
- * @param[in] pad_right Padding across x on the right, in elements.
- * @param[in] pad_top Padding across y on the top, in elements.
- * @param[in] pad_bottom Padding across y on the bottom, in elements.
- * @param[in] round Dimensions rounding.
- */
- PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
- unsigned int pad_left, unsigned int pad_right,
- unsigned int pad_top, unsigned int pad_bottom,
- DimensionRoundingType round)
- : _stride(std::make_pair(stride_x, stride_y)),
- _pad_left(pad_left),
- _pad_top(pad_top),
- _pad_right(pad_right),
- _pad_bottom(pad_bottom),
- _round_type(round)
- {
- }
- /** Get the stride.
- *
- * @return a pair: stride x, stride y.
- */
- std::pair stride() const
- {
- return _stride;
- }
- /** Check whether the padding is symmetric.
- *
- * @return True if the padding is symmetric.
- */
- bool padding_is_symmetric() const
- {
- return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
- }
- /** Get the padding.
- *
- * @note This should only be used when the padding is symmetric.
- *
- * @return a pair: padding left/right, padding top/bottom
- */
- std::pair pad() const
- {
- //this accessor should be used only when padding is symmetric
- ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
- return std::make_pair(_pad_left, _pad_top);
- }
-
- /** Get the left padding */
- unsigned int pad_left() const
- {
- return _pad_left;
- }
- /** Get the right padding */
- unsigned int pad_right() const
- {
- return _pad_right;
- }
- /** Get the top padding */
- unsigned int pad_top() const
- {
- return _pad_top;
- }
- /** Get the bottom padding */
- unsigned int pad_bottom() const
- {
- return _pad_bottom;
- }
-
- /** Get the rounding type */
- DimensionRoundingType round() const
- {
- return _round_type;
- }
-
- /** Check whether this has any padding */
- bool has_padding() const
- {
- return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
- }
-
-private:
- std::pair _stride;
- unsigned int _pad_left;
- unsigned int _pad_top;
- unsigned int _pad_right;
- unsigned int _pad_bottom;
-
- DimensionRoundingType _round_type;
-};
-
/** Padding information for 2D operations like Conv2d */
struct Padding2D
{
@@ -1655,214 +1471,6 @@ private:
float _bbox_xform_clip;
};
-/** Activation Layer Information class */
-class ActivationLayerInfo
-{
-public:
- /** Available activation functions */
- enum class ActivationFunction
- {
- LOGISTIC, /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */
- TANH, /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */
- RELU, /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */
- BOUNDED_RELU, /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */
- LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */
- LEAKY_RELU, /**< Leaky Rectifier ( \f$ f(x) = \begin{cases} \alpha x & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
- SOFT_RELU, /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */
- ELU, /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases} \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
- ABS, /**< Absolute ( \f$ f(x)= |x| \f$ ) */
- SQUARE, /**< Square ( \f$ f(x)= x^2 \f$ )*/
- SQRT, /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/
- LINEAR, /**< Linear ( \f$ f(x)= ax + b \f$ ) */
- IDENTITY, /**< Identity ( \f$ f(x)= x \f$ ) */
- HARD_SWISH, /**< Hard-swish ( \f$ f(x) = (x \text{ReLU6}(x+3))/6 = x \min(\max(0,x+3),6)/6 \f$ ) */
- SWISH, /**< Swish ( \f$ f(x) = \frac{x}{1 + e^{-ax}} = x \text{logistic}(ax) \f$ ) */
- GELU /**< GELU ( \f$ f(x) = x * 1/2 * 1 + erf(x / \sqrt{2}) \f$ ) */
- };
-
- /** Lookup table */
- using LookupTable256 = std::array;
-
- ActivationLayerInfo() = default;
- /** Default Constructor
- *
- * @param[in] f The activation function to use.
- * @param[in] a (Optional) The alpha parameter used by some activation functions
- * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH).
- * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH).
- */
- ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f)
- : _act(f), _a(a), _b(b), _enabled(true)
- {
- }
- /** Get the type of activation function */
- ActivationFunction activation() const
- {
- return _act;
- }
- /** Get the alpha value */
- float a() const
- {
- return _a;
- }
- /** Get the beta value */
- float b() const
- {
- return _b;
- }
- /** Check if initialised */
- bool enabled() const
- {
- return _enabled;
- }
-
-#ifdef __aarch64__
- const LookupTable256 &lut() const
- {
- return _lut;
- }
-
- void init_lut(DataType data_type, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
- {
- if(_act == ActivationFunction::HARD_SWISH)
- {
- if(data_type == DataType::QASYMM8)
- {
- qasymm8_hard_swish_populate_table(_lut, qi_in, qi_out);
- }
- else
- {
- qasymm8_signed_hard_swish_populate_table(_lut, qi_in, qi_out);
- }
- }
- else if(_act == ActivationFunction::LEAKY_RELU)
- {
- qasymm8_leaky_relu_populate_table(_lut, qi_in, qi_out, _a);
- }
- else if(_act == ActivationFunction::LOGISTIC)
- {
- if(data_type == DataType::QASYMM8)
- {
- qasymm8_logistic_populate_table(_lut, qi_in, qi_out);
- }
- else
- {
- qasymm8_signed_logistic_populate_table(_lut, qi_in, qi_out);
- }
- }
- }
-#endif // __aarch64__
-
- static inline bool is_lut_supported(ActivationFunction act_func, DataType data_type)
- {
-#ifdef __aarch64__
- switch(act_func)
- {
- case ActivationFunction::HARD_SWISH:
- return data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED;
- case ActivationFunction::LEAKY_RELU:
- return data_type == DataType::QASYMM8;
- case ActivationFunction::LOGISTIC:
- return data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED;
- default:
- return false;
- }
-#else // __aarch64__
- ARM_COMPUTE_UNUSED(act_func);
- ARM_COMPUTE_UNUSED(data_type);
- return false;
-#endif // __aarch64__
- }
-
-private:
- ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::IDENTITY };
- float _a = {};
- float _b = {};
- bool _enabled = { false };
-
-#ifdef __aarch64__
- LookupTable256 _lut = {};
-
- static inline void qasymm8_hard_swish_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
- {
- for(size_t i = 0; i < lut.size(); ++i)
- {
- lut[i] = qasymm8_hard_swish(i, qi_in, qi_out);
- }
- }
-
- static inline void qasymm8_signed_hard_swish_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
- {
- for(size_t i = 0; i < lut.size(); ++i)
- {
- lut[i] = qasymm8_signed_hard_swish(i, qi_in, qi_out);
- }
- }
-
- static inline void qasymm8_leaky_relu_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out, float alpha)
- {
- for(size_t i = 0; i < lut.size(); ++i)
- {
- lut[i] = qasymm8_leaky_relu(i, qi_in, qi_out, alpha);
- }
- }
-
- static inline void qasymm8_logistic_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
- {
- for(size_t i = 0; i < lut.size(); ++i)
- {
- lut[i] = qasymm8_logistic(i, qi_in, qi_out);
- }
- }
-
- static inline void qasymm8_signed_logistic_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
- {
- for(size_t i = 0; i < lut.size(); ++i)
- {
- lut[i] = qasymm8_signed_logistic(static_cast(i), qi_in, qi_out);
- }
- }
-#endif // __aarch64__
-};
-
-/** Fully connected layer info */
-struct FullyConnectedLayerInfo
-{
- /* Fused-activation parameters */
- ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */
- /* Information about weights */
- DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */
- bool transpose_weights{ true }; /**< Transpose weights if true. */
- bool are_weights_reshaped{ false }; /**< @deprecated Reshape the weights tensor if false. */
- bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */
- bool enable_fast_math{ false }; /**< Enable fast math computation. */
- /* Other parameters */
- bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
-
- /** Sets the weights trained data layout
- *
- * @param[in] layout Data layout that the weights were trained with
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
- {
- weights_trained_layout = layout;
- return *this;
- }
- /** Sets the transpose weights flag
- *
- * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
- {
- transpose_weights = should_transpose_weights;
- return *this;
- }
-};
-
/** Normalization Layer Information class */
class NormalizationLayerInfo
{
@@ -1981,96 +1589,6 @@ private:
int32_t _shrink_axis_mask;
};
-/** Memory layouts for the weights tensor.
- *
- * * UNSPECIFIED is used to select kernels that do not run in
- * variable weights mode.
- *
- * * ANY is used to query the kernel database to retrieve any of the
- * kernels that runs in variable weights mode. Once a kernel is
- * found, the specific format expected by the kernel can be
- * retrieved by the user for reordering the weights tensor
- * accordingly.
- *
- * The other values OHWIo{interleave_by}i{block_by} describe the
- * memory layout of a 4D tensor with layout OHWI that has been
- * transformed into a 4D tensor with dimensions O'HWI' where:
- *
- * O' = first multiple of {interleave_by} s.t. O<=O'
- * I' = first multiple of {block_by} s.t. I<=I'
- *
- * The total size of the dst tensor is O' x H x W x I'
- *
- * The access function of the tensor with layout
- * OHWIo{interleave_by}i{block_by} and size O'HWI' is a 6-parameter
- * access function, where the 6 parameters are computed as follows:
- *
- * x5 = floor(o/{interleave_by}) RANGE [0, O'/{interleave_by} -1] SIZE: O'/{interleave_by}
- *
- * x4 = h RANGE [0, H-1] SIZE: H
- * x3 = w RANGE [0, W-1] SIZE: W
- * x2 = floor(i/{block_by}) RANGE [0, I'/{block_by} -1] SIZE: I'/{block_by}
- * x1 = o%{interleave_by} RANGE [0, {interleave_by} -1] SIZE: {interleave_by}
- * x0 = i%{block_by} RANGE [0, {block_by} -1] SIZE: {block_by}
- * TOTAL SIZE: O' * H * W * I'
- *
- * 4D 6D
- * ----------------- -----------------------------------
- * value(o, h, w, i) = x5 * H * W * I' * {interleave_by}
- * + x4 * W * I' * {interleave_by}
- * + x3 * I' * {interleave_by}
- * + x2 * {interleave_by} * {block_by}
- * + x1 * {block_by}
- * + x0
- *
- * Notice that in arm_gemm the 4D tensor of dimension O'HWI' created
- * for the OHWIo{interleave_by}i{block_by} format is in reality seen
- * as a 2D tensor, where the number of rows is O'/{interleave_by}
- * and the number of columns is {interleave_by} * H * W * I'.
- *
- * The postfix *_bf16 is for the memory layout needed for the
- * fast-mode kernels, in which the weights are passed in bfloat16
- * format.
- */
-enum class WeightFormat
-{
- UNSPECIFIED = 0x1,
- ANY = 0x2,
- OHWI = 0x100100,
- OHWIo2 = 0x100200,
- OHWIo4 = 0x100400,
- OHWIo8 = 0x100800,
- OHWIo16 = 0x101000,
- OHWIo32 = 0x102000,
- OHWIo64 = 0x104000,
- OHWIo128 = 0x108000,
- OHWIo4i2 = 0x200400,
- OHWIo4i2_bf16 = 0x200410,
- OHWIo8i2 = 0x200800,
- OHWIo8i2_bf16 = 0x200810,
- OHWIo16i2 = 0x201000,
- OHWIo16i2_bf16 = 0x201010,
- OHWIo32i2 = 0x202000,
- OHWIo32i2_bf16 = 0x202010,
- OHWIo64i2 = 0x204000,
- OHWIo64i2_bf16 = 0x204010,
- OHWIo4i4 = 0x400400,
- OHWIo4i4_bf16 = 0x400410,
- OHWIo8i4 = 0x400800,
- OHWIo8i4_bf16 = 0x400810,
- OHWIo16i4 = 0x401000,
- OHWIo16i4_bf16 = 0x401010,
- OHWIo32i4 = 0x402000,
- OHWIo32i4_bf16 = 0x402010,
- OHWIo64i4 = 0x404000,
- OHWIo64i4_bf16 = 0x404010,
- OHWIo2i8 = 0x800200,
- OHWIo4i8 = 0x800400,
- OHWIo8i8 = 0x800800,
- OHWIo16i8 = 0x801000,
- OHWIo32i8 = 0x802000,
- OHWIo64i8 = 0x804000
-};
// OHWIoi
inline int interleave_by(const WeightFormat wf)
{
@@ -2281,44 +1799,6 @@ private:
bool _broadcast_bias;
};
-struct ConvolutionInfo
-{
- ConvolutionInfo() = default;
- ConvolutionInfo(const PadStrideInfo &pad_stride_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
- : pad_stride_info(pad_stride_info), depth_multiplier(depth_multiplier), act_info(act_info), dilation(dilation)
- {
- }
- PadStrideInfo pad_stride_info{}; /**< Convolution info (Pads, strides,...) */
- unsigned int depth_multiplier{ 1 }; /**< Multiplier to apply to input's depth to retrieve the output depth. Defaults to 1 */
- ActivationLayerInfo act_info{}; /**< Fused activation to apply after convolution. */
- Size2D dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */
-};
-
-/** GEMMLowp output stage type */
-enum class GEMMLowpOutputStageType
-{
- NONE, /**< No quantization */
- QUANTIZE_DOWN, /**< Quantize using an integer multiplication */
- QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */
- QUANTIZE_DOWN_FLOAT /**< Quantize using a floating point multiplication */
-};
-
-/** GEMMLowp output stage info */
-struct GEMMLowpOutputStageInfo
-{
- GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */
- int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
- int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */
- int32_t gemmlowp_min_bound{ std::numeric_limits::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
- int32_t gemmlowp_max_bound{ std::numeric_limits::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
- std::vector gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- std::vector gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- float gemmlowp_real_multiplier{ 0 }; /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */
- bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */
- DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
-};
-
/** GEMM LHS (Left Hand Side) matrix information */
struct GEMMLHSMatrixInfo
{
@@ -2351,287 +1831,6 @@ struct GEMMRHSMatrixInfo
};
class ITensorInfo;
-/** GEMM information class. This class stores the necessary information to compute GEMM functions
- *
- * This object also contains the information about how matrix A and matrix B have been reshaped
- *
- */
-class GEMMInfo
-{
-public:
- /** Default constructor */
- GEMMInfo() noexcept
- : _is_a_reshaped(false),
- _is_b_reshaped(false),
- _reshape_b_only_on_first_run(true),
- _depth_output_gemm3d(0),
- _reinterpret_input_as_3d(false),
- _retain_internal_weights(false),
- _gemmlowp_output_stage(),
- _fast_math(false),
- _fp_mixed_precision(false),
- _broadcast_bias(false),
- _pretranspose_A(false),
- _pretranspose_B(false),
- _activation_info(),
- _post_ops(),
- _fixed_format(false),
- _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
- {
- }
- /** Constructor
- *
- * @param[in] is_a_reshaped True if the matrix A has been reshaped
- * @param[in] is_b_reshaped True if the matrix B has been reshaped
- * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run
- * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel
- * If 0 the output will not be reinterpreted as 3D. Default 0
- * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used
- * to perform 1x1 convolutions with the NHWC data layout)
- * @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run
- * @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info
- * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
- * @param[in] fast_math (Optional) Use a data type of shorter width to improve performance
- * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
- * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
- * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation.
- * @param[in] fixed_format (Optional) Specify the selection of fixed format kernels for variable weights support in GEMM. These kernels expect the weights tensor to be in amemory format that is fixed by the kernel itself. For more information, see arm_compute::WeightFormat.
- * @param[in] weight_format (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
- */
- GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
- GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool fast_math = false, bool broadcast_bias = false,
- const ActivationLayerInfo &activation_info = ActivationLayerInfo(), const experimental::PostOpList &post_ops = experimental::PostOpList(),
- bool fixed_format = false, arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED) noexcept
- : _is_a_reshaped(is_a_reshaped),
- _is_b_reshaped(is_b_reshaped),
- _reshape_b_only_on_first_run(reshape_b_only_on_first_run),
- _depth_output_gemm3d(depth_output_gemm3d),
- _reinterpret_input_as_3d(reinterpret_input_as_3d),
- _retain_internal_weights(retain_internal_weights),
- _gemmlowp_output_stage(gemmlowp_output_stage),
- _fast_math(fast_math),
- _fp_mixed_precision(fp_mixed_precision),
- _broadcast_bias(broadcast_bias),
- _pretranspose_A(false),
- _pretranspose_B(false),
- _activation_info(activation_info),
- _post_ops(post_ops),
- _fixed_format(fixed_format),
- _weight_format(weight_format)
- {
- }
- /** Flag which specifies if the matrix A has been reshaped
- *
- * @return True if the matrix A has been reshaped
- */
- bool is_a_reshaped() const
- {
- return _is_a_reshaped;
- };
- /** Flag which specifies if the matrix B has been reshaped
- *
- * @return True if the matrix B has been reshaped
- */
- bool is_b_reshaped() const
- {
- return _is_b_reshaped;
- };
- /** Flag which specifies if the reshape of matrix B should executed only for the first
- *
- * @note This flag could be set to TRUE when GEMM is used to accelerate convolution layer
- *
- * @return True if the reshaped of matrix B happens only for the first run
- */
- bool reshape_b_only_on_first_run() const
- {
- return _reshape_b_only_on_first_run;
- };
- /** Depth of the output when GEMM output is reinterpreted as 3D tensor
- *
- * @return the depth of the output tensor
- */
- int depth_output_gemm3d() const
- {
- return _depth_output_gemm3d;
- };
- /** Flag which specifies if the input tensor has to be reinterpreted as 3D
- *
- * @return True if the input tensor has to be reinterpreted as 3D tensor
- */
- bool reinterpret_input_as_3d() const
- {
- return _reinterpret_input_as_3d;
- };
- /** Flag which specifies if the weights tensor has to be retained from previous run
- *
- * @return True if the weights tensor has to be retained
- */
- bool retain_internal_weights() const
- {
- return _retain_internal_weights;
- };
- /** GEMMLowp output stage
- *
- * @return the GEMMLowp output stage info
- */
- GEMMLowpOutputStageInfo gemmlowp_output_stage() const
- {
- return _gemmlowp_output_stage;
- };
- /** Sets GEMMLowp output stage
- *
- * @param[in] output_stage Output stage to set
- */
- void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage)
- {
- _gemmlowp_output_stage = output_stage;
- };
- /** Flag which specifies if a wider accumulator should be used.
- *
- * @return True if a wider accumulator has to be used
- */
- bool fp_mixed_precision() const
- {
- return _fp_mixed_precision;
- };
- /** Flag which specifies if a shorter accumulator to be used.
- *
- * @return True if a shorter accumulator has to be used
- */
- bool fast_math() const
- {
- return _fast_math;
- };
- /** Set fast math flag
- *
- * @param[in] fast_math Flag to set
- */
- void set_fast_math(bool fast_math)
- {
- _fast_math = fast_math;
- }
- /** Flag which specifies whether to broadcast the shape of the bias tensor.
- *
- * @return True if the shape of the bias tensor is to be broadcasted.
- */
- bool broadcast_bias() const
- {
- return _broadcast_bias;
- };
- /** Flag which specifies whether A should be pre-transposed if supported.
- *
- * @return True if A should be pre-transposed else false.
- */
- bool pretranspose_A() const
- {
- return _pretranspose_A;
- };
- /** Set pre-transpose A flag
- *
- * @param[in] flag Flag to set
- */
- void set_pretranspose_A(bool flag)
- {
- _pretranspose_A = flag;
- }
- /** Flag which specifies whether b should be pre-transposed if supported.
- *
- * @return True if b should be pre-transposed else false.
- */
- bool pretranspose_B() const
- {
- return _pretranspose_B;
- };
- /** Set pre-transpose b flag
- *
- * @param[in] flag Flag to set
- */
- void set_pretranspose_B(bool flag)
- {
- _pretranspose_B = flag;
- }
- /** Activation layer to apply after the matrix multiplication
- *
- * @return ActivationLayerInfo object
- */
- ActivationLayerInfo activation_info() const
- {
- return _activation_info;
- }
- /** Set activation layer info
- *
- * @param[in] activation_info ActivationLayerInfo object to set
- */
- void set_activation_info(const ActivationLayerInfo &activation_info)
- {
- _activation_info = activation_info;
- }
- /** Post operations to apply after the matrix multiplication
- *
- * @return experimental::PostOpList object
- */
- const experimental::PostOpList &post_ops() const
- {
- return _post_ops;
- }
- /** Set post ops
- *
- * @param[in] post_ops experimental::PostOpList object to set
- */
- void set_post_ops(const experimental::PostOpList &post_ops)
- {
- _post_ops = post_ops;
- }
- /** Flag which specifies if the GEMM operation is running fixed-format kernels.
- *
- * @return True if the GEMM operation is running fixed-format kernel else false.
- */
- bool fixed_format() const
- {
- return _fixed_format;
- }
-
- /** Set fixed-format flag
- *
- * @param[in] fixed_format sets whether or not to use fixed-format kernels
- */
- void set_fixed_format(bool fixed_format)
- {
- _fixed_format = fixed_format;
- }
-
- arm_compute::WeightFormat weight_format() const
- {
- return _weight_format;
- }
-
- /** Set weight format to be used
- *
- * @param[in] weight_format arm_compute::WeightFormat enumeration
- */
- void set_weight_format(arm_compute::WeightFormat weight_format)
- {
- _weight_format = weight_format;
- }
-
-private:
- bool _is_a_reshaped;
- bool _is_b_reshaped;
- bool _reshape_b_only_on_first_run;
- int _depth_output_gemm3d;
- bool _reinterpret_input_as_3d;
- bool _retain_internal_weights;
- GEMMLowpOutputStageInfo _gemmlowp_output_stage;
- bool _fast_math;
- bool _fp_mixed_precision;
- bool _broadcast_bias;
- bool _pretranspose_A;
- bool _pretranspose_B;
- ActivationLayerInfo _activation_info;
- experimental::PostOpList _post_ops;
- bool _fixed_format;
- arm_compute::WeightFormat _weight_format;
-};
/** Winograd information */
struct WinogradInfo
@@ -2713,52 +1912,7 @@ struct IOFormatInfo
bool align_columns;
};
-/** Class for holding information related to matrix multiplication function
- */
-class MatMulInfo
-{
-public:
- /* Get Adjoint LHS flag value */
- bool adj_lhs() const
- {
- return _adj_lhs;
- }
- /* Get Adjoint RHS flag value */
- bool adj_rhs() const
- {
- return _adj_rhs;
- }
- /* Get Fused Activation Layer Info */
- ActivationLayerInfo fused_activation() const
- {
- return _fused_act;
- }
- /* Set Adjoint LHS flag */
- MatMulInfo &adj_lhs(bool adj_lhs)
- {
- _adj_lhs = adj_lhs;
- return *this;
- }
- /* Set Adjoint RHS flag */
- MatMulInfo &adj_rhs(bool adj_rhs)
- {
- _adj_rhs = adj_rhs;
- return *this;
- }
- /* Set Fused Activation Layer Info */
- MatMulInfo &fused_activation(const ActivationLayerInfo &act_info)
- {
- _fused_act = act_info;
- return *this;
- }
-
-private:
- bool _adj_lhs{ false };
- bool _adj_rhs{ false };
- ActivationLayerInfo _fused_act{}; // disabled by default
-};
-
/** Class for holding information related to cropping */
using CropInfo = Padding2D;
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TYPES_H */
+#endif /* ACL_ARM_COMPUTE_CORE_TYPES */
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index a47cfbdec6e19aa8be7038d0c60ccb8ddc3eae59..c5b50167bf6eaecd4b3d2f471dc6ac3877adc3c2 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2022 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,67 +26,29 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Rounding.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Version.h"
-#include
-#include
-#include
-#include
+#include
#include
#include
#include
#include
#include
#include
-#include
+
+/* Convenience / backwards compatibility includes */
+#include "arm_compute/core/utils/ActivationFunctionUtils.h"
+#include "arm_compute/core/utils/DataLayoutUtils.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
+#include "arm_compute/core/utils/FormatUtils.h"
+#include "arm_compute/core/utils/InterpolationPolicyUtils.h"
+#include "arm_compute/core/utils/StringUtils.h"
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-
-/** Calculate the rounded up quotient of val / m.
- *
- * @param[in] val Value to divide and round up.
- * @param[in] m Value to divide by.
- *
- * @return the result.
- */
-template
-constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m)
-{
- return (val + m - 1) / m;
-}
-
-/** Computes the smallest number larger or equal to value that is a multiple of divisor.
- *
- * @param[in] value Lower bound value
- * @param[in] divisor Value to compute multiple of.
- *
- * @return the result.
- */
-template
-inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor)
-{
- ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
- return DIV_CEIL(value, divisor) * divisor;
-}
-
-/** Computes the largest number smaller or equal to value that is a multiple of divisor.
- *
- * @param[in] value Upper bound value
- * @param[in] divisor Value to compute multiple of.
- *
- * @return the result.
- */
-template
-inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor)
-{
- ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
- return (value / divisor) * divisor;
-}
+class ActivationLayerInfo;
/** Load an entire file in memory
*
@@ -97,627 +59,6 @@ inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor)
*/
std::string read_file(const std::string &filename, bool binary);
-/** The size in bytes of the data type
- *
- * @param[in] data_type Input data type
- *
- * @return The size in bytes of the data type
- */
-inline size_t data_size_from_type(DataType data_type)
-{
- switch(data_type)
- {
- case DataType::U8:
- case DataType::S8:
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- return 1;
- case DataType::U16:
- case DataType::S16:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- case DataType::BFLOAT16:
- case DataType::F16:
- return 2;
- case DataType::F32:
- case DataType::U32:
- case DataType::S32:
- return 4;
- case DataType::F64:
- case DataType::U64:
- case DataType::S64:
- return 8;
- case DataType::SIZET:
- return sizeof(size_t);
- default:
- ARM_COMPUTE_ERROR("Invalid data type");
- return 0;
- }
-}
-
-/** The size in bytes of the pixel format
- *
- * @param[in] format Input format
- *
- * @return The size in bytes of the pixel format
- */
-inline size_t pixel_size_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- return 1;
- case Format::U16:
- case Format::S16:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::UV88:
- case Format::YUYV422:
- case Format::UYVY422:
- return 2;
- case Format::RGB888:
- return 3;
- case Format::RGBA8888:
- return 4;
- case Format::U32:
- case Format::S32:
- case Format::F32:
- return 4;
- //Doesn't make sense for planar formats:
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- case Format::YUV444:
- default:
- ARM_COMPUTE_ERROR("Undefined pixel size for given format");
- return 0;
- }
-}
-
-/** The size in bytes of the data type
- *
- * @param[in] dt Input data type
- *
- * @return The size in bytes of the data type
- */
-inline size_t element_size_from_data_type(DataType dt)
-{
- switch(dt)
- {
- case DataType::S8:
- case DataType::U8:
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- return 1;
- case DataType::U16:
- case DataType::S16:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- case DataType::BFLOAT16:
- case DataType::F16:
- return 2;
- case DataType::U32:
- case DataType::S32:
- case DataType::F32:
- return 4;
- default:
- ARM_COMPUTE_ERROR("Undefined element size for given data type");
- return 0;
- }
-}
-
-/** Return the data type used by a given single-planar pixel format
- *
- * @param[in] format Input format
- *
- * @return The size in bytes of the pixel format
- */
-inline DataType data_type_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- case Format::UV88:
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- return DataType::U8;
- case Format::U16:
- return DataType::U16;
- case Format::S16:
- return DataType::S16;
- case Format::U32:
- return DataType::U32;
- case Format::S32:
- return DataType::S32;
- case Format::BFLOAT16:
- return DataType::BFLOAT16;
- case Format::F16:
- return DataType::F16;
- case Format::F32:
- return DataType::F32;
- //Doesn't make sense for planar formats:
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- case Format::YUV444:
- default:
- ARM_COMPUTE_ERROR("Not supported data_type for given format");
- return DataType::UNKNOWN;
- }
-}
-
-/** Return the plane index of a given channel given an input format.
- *
- * @param[in] format Input format
- * @param[in] channel Input channel
- *
- * @return The plane index of the specific channel of the specific format
- */
-inline int plane_idx_from_channel(Format format, Channel channel)
-{
- switch(format)
- {
- // Single planar formats have a single plane
- case Format::U8:
- case Format::U16:
- case Format::S16:
- case Format::U32:
- case Format::S32:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::F32:
- case Format::UV88:
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- return 0;
- // Multi planar formats
- case Format::NV12:
- case Format::NV21:
- {
- // Channel U and V share the same plane of format UV88
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- case Channel::V:
- return 1;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::IYUV:
- case Format::YUV444:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 1;
- case Channel::V:
- return 2;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- default:
- ARM_COMPUTE_ERROR("Not supported format");
- return 0;
- }
-}
-
-/** Return the channel index of a given channel given an input format.
- *
- * @param[in] format Input format
- * @param[in] channel Input channel
- *
- * @return The channel index of the specific channel of the specific format
- */
-inline int channel_idx_from_format(Format format, Channel channel)
-{
- switch(format)
- {
- case Format::RGB888:
- {
- switch(channel)
- {
- case Channel::R:
- return 0;
- case Channel::G:
- return 1;
- case Channel::B:
- return 2;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::RGBA8888:
- {
- switch(channel)
- {
- case Channel::R:
- return 0;
- case Channel::G:
- return 1;
- case Channel::B:
- return 2;
- case Channel::A:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::YUYV422:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 1;
- case Channel::V:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::UYVY422:
- {
- switch(channel)
- {
- case Channel::Y:
- return 1;
- case Channel::U:
- return 0;
- case Channel::V:
- return 2;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::NV12:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 0;
- case Channel::V:
- return 1;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::NV21:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 1;
- case Channel::V:
- return 0;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::YUV444:
- case Format::IYUV:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 0;
- case Channel::V:
- return 0;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- default:
- ARM_COMPUTE_ERROR("Not supported format");
- return 0;
- }
-}
-
-/** Return the number of planes for a given format
- *
- * @param[in] format Input format
- *
- * @return The number of planes for a given image format.
- */
-inline size_t num_planes_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- case Format::S16:
- case Format::U16:
- case Format::S32:
- case Format::U32:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::F32:
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- return 1;
- case Format::NV12:
- case Format::NV21:
- return 2;
- case Format::IYUV:
- case Format::YUV444:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported format");
- return 0;
- }
-}
-
-/** Return the number of channels for a given single-planar pixel format
- *
- * @param[in] format Input format
- *
- * @return The number of channels for a given image format.
- */
-inline size_t num_channels_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- case Format::U16:
- case Format::S16:
- case Format::U32:
- case Format::S32:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::F32:
- return 1;
- // Because the U and V channels are subsampled
- // these formats appear like having only 2 channels:
- case Format::YUYV422:
- case Format::UYVY422:
- return 2;
- case Format::UV88:
- return 2;
- case Format::RGB888:
- return 3;
- case Format::RGBA8888:
- return 4;
- //Doesn't make sense for planar formats:
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- case Format::YUV444:
- default:
- return 0;
- }
-}
-
-/** Return the promoted data type of a given data type.
- *
- * @note If promoted data type is not supported an error will be thrown
- *
- * @param[in] dt Data type to get the promoted type of.
- *
- * @return Promoted data type
- */
-inline DataType get_promoted_data_type(DataType dt)
-{
- switch(dt)
- {
- case DataType::U8:
- return DataType::U16;
- case DataType::S8:
- return DataType::S16;
- case DataType::U16:
- return DataType::U32;
- case DataType::S16:
- return DataType::S32;
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- case DataType::BFLOAT16:
- case DataType::F16:
- case DataType::U32:
- case DataType::S32:
- case DataType::F32:
- ARM_COMPUTE_ERROR("Unsupported data type promotions!");
- default:
- ARM_COMPUTE_ERROR("Undefined data type!");
- }
- return DataType::UNKNOWN;
-}
-
-/** Compute the mininum and maximum values a data type can take
- *
- * @param[in] dt Data type to get the min/max bounds of
- *
- * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue.
- */
-inline std::tuple get_min_max(DataType dt)
-{
- PixelValue min{};
- PixelValue max{};
- switch(dt)
- {
- case DataType::U8:
- case DataType::QASYMM8:
- {
- min = PixelValue(static_cast(std::numeric_limits::lowest()));
- max = PixelValue(static_cast(std::numeric_limits::max()));
- break;
- }
- case DataType::S8:
- case DataType::QSYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- {
- min = PixelValue(static_cast(std::numeric_limits::lowest()));
- max = PixelValue(static_cast(std::numeric_limits::max()));
- break;
- }
- case DataType::U16:
- case DataType::QASYMM16:
- {
- min = PixelValue(static_cast(std::numeric_limits::lowest()));
- max = PixelValue(static_cast(std::numeric_limits::max()));
- break;
- }
- case DataType::S16:
- case DataType::QSYMM16:
- {
- min = PixelValue(static_cast(std::numeric_limits::lowest()));
- max = PixelValue(static_cast(std::numeric_limits::max()));
- break;
- }
- case DataType::U32:
- {
- min = PixelValue(std::numeric_limits::lowest());
- max = PixelValue(std::numeric_limits::max());
- break;
- }
- case DataType::S32:
- {
- min = PixelValue(std::numeric_limits::lowest());
- max = PixelValue(std::numeric_limits::max());
- break;
- }
- case DataType::BFLOAT16:
- {
- min = PixelValue(bfloat16::lowest());
- max = PixelValue(bfloat16::max());
- break;
- }
- case DataType::F16:
- {
- min = PixelValue(std::numeric_limits::lowest());
- max = PixelValue(std::numeric_limits::max());
- break;
- }
- case DataType::F32:
- {
- min = PixelValue(std::numeric_limits::lowest());
- max = PixelValue(std::numeric_limits::max());
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Undefined data type!");
- }
- return std::make_tuple(min, max);
-}
-
-/** Return true if the given format has horizontal subsampling.
- *
- * @param[in] format Format to determine subsampling.
- *
- * @return True if the format can be subsampled horizontaly.
- */
-inline bool has_format_horizontal_subsampling(Format format)
-{
- return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
-}
-
-/** Return true if the given format has vertical subsampling.
- *
- * @param[in] format Format to determine subsampling.
- *
- * @return True if the format can be subsampled verticaly.
- */
-inline bool has_format_vertical_subsampling(Format format)
-{
- return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
-}
-
-/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats.
- *
- * @note Adding here a few links discussing the issue of odd size and sharing the same solution:
- * Android Source
- * WebM
- * libYUV
- * YUVPlayer *
- *
- * @param[in, out] shape Tensor shape of 2D size
- * @param[in] format Format of the tensor
- *
- * @return The adjusted tensor shape.
- */
-inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
-{
- TensorShape output{ shape };
-
- // Force width to be even for formats which require subsampling of the U and V channels
- if(has_format_horizontal_subsampling(format))
- {
- output.set(0, (output.x() + 1) & ~1U);
- }
-
- // Force height to be even for formats which require subsampling of the U and V channels
- if(has_format_vertical_subsampling(format))
- {
- output.set(1, (output.y() + 1) & ~1U);
- }
-
- return output;
-}
-
-/** Calculate subsampled shape for a given format and channel
- *
- * @param[in] shape Shape of the tensor to calculate the extracted channel.
- * @param[in] format Format of the tensor.
- * @param[in] channel Channel to create tensor shape to be extracted.
- *
- * @return The subsampled tensor shape.
- */
-inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
-{
- TensorShape output{ shape };
-
- // Subsample shape only for U or V channel
- if(Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
- {
- // Subsample width for the tensor shape when channel is U or V
- if(has_format_horizontal_subsampling(format))
- {
- output.set(0, output.x() / 2U);
- }
-
- // Subsample height for the tensor shape when channel is U or V
- if(has_format_vertical_subsampling(format))
- {
- output.set(1, output.y() / 2U);
- }
- }
-
- return output;
-}
-
/** Permutes the given dimensions according the permutation vector
*
* @param[in,out] dimensions Dimensions to be permuted.
@@ -837,15 +178,7 @@ QuantizationInfo get_softmax_output_quantization_info(DataType input_type, bool
*
* @return The pair with minimum and maximum values
*/
-std::pair get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info);
-
-/** Convert a tensor format into a string.
- *
- * @param[in] format @ref Format to be translated to string.
- *
- * @return The string describing the format.
- */
-const std::string &string_from_format(Format format);
+std::pair get_quantized_activation_min_max(const ActivationLayerInfo &act_info, DataType data_type, UniformQuantizationInfo oq_info);
/** Convert a channel identity into a string.
*
@@ -854,34 +187,7 @@ const std::string &string_from_format(Format format);
* @return The string describing the channel.
*/
const std::string &string_from_channel(Channel channel);
-/** Convert a data layout identity into a string.
- *
- * @param[in] dl @ref DataLayout to be translated to string.
- *
- * @return The string describing the data layout.
- */
-const std::string &string_from_data_layout(DataLayout dl);
-/** Convert a data type identity into a string.
- *
- * @param[in] dt @ref DataType to be translated to string.
- *
- * @return The string describing the data type.
- */
-const std::string &string_from_data_type(DataType dt);
-/** Translates a given activation function to a string.
- *
- * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string.
- *
- * @return The string describing the activation function.
- */
-const std::string &string_from_activation_func(ActivationLayerInfo::ActivationFunction act);
-/** Translates a given interpolation policy to a string.
- *
- * @param[in] policy @ref InterpolationPolicy to be translated to string.
- *
- * @return The string describing the interpolation policy.
- */
-const std::string &string_from_interpolation_policy(InterpolationPolicy policy);
+
/** Translates a given border mode policy to a string.
*
* @param[in] border_mode @ref BorderMode to be translated to string.
@@ -923,7 +229,7 @@ bool is_pool_3d_region_entirely_outside_input(const Pooling3dLayerInfo &info);
*
* @return True if padding is symmetric
*/
-inline bool is_symmetric(const Padding3D& info)
+inline bool is_symmetric(const Padding3D &info)
{
return ((info.left == info.right) && (info.top == info.bottom) && (info.front == info.back));
}
@@ -942,13 +248,7 @@ const std::string &string_from_gemmlowp_output_stage(GEMMLowpOutputStageType out
* @return String representation of the PixelValue through the given data type.
*/
std::string string_from_pixel_value(const PixelValue &value, const DataType data_type);
-/** Convert a string to DataType
- *
- * @param[in] name The name of the data type
- *
- * @return DataType
- */
-DataType data_type_from_name(const std::string &name);
+
/** Stores padding information before configuring a kernel
*
* @param[in] infos list of tensor infos to store the padding info for
@@ -971,170 +271,6 @@ std::unordered_map get_padding_info(std::initi
*/
bool has_padding_changed(const std::unordered_map &padding_map);
-/** Input Stream operator for @ref DataType
- *
- * @param[in] stream Stream to parse
- * @param[out] data_type Output data type
- *
- * @return Updated stream
- */
-inline ::std::istream &operator>>(::std::istream &stream, DataType &data_type)
-{
- std::string value;
- stream >> value;
- data_type = data_type_from_name(value);
- return stream;
-}
-/** Lower a given string.
- *
- * @param[in] val Given string to lower.
- *
- * @return The lowered string
- */
-std::string lower_string(const std::string &val);
-
-/** Raise a given string to upper case
- *
- * @param[in] val Given string to lower.
- *
- * @return The upper case string
- */
-std::string upper_string(const std::string &val);
-
-/** Check if a given data type is of floating point type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of floating point type, else false.
- */
-inline bool is_data_type_float(DataType dt)
-{
- switch(dt)
- {
- case DataType::F16:
- case DataType::F32:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of quantized type
- *
- * @note Quantized is considered a super-set of fixed-point and asymmetric data types.
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of quantized type, else false.
- */
-inline bool is_data_type_quantized(DataType dt)
-{
- switch(dt)
- {
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of asymmetric quantized type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of asymmetric quantized type, else false.
- */
-inline bool is_data_type_quantized_asymmetric(DataType dt)
-{
- switch(dt)
- {
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QASYMM16:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of asymmetric quantized signed type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of asymmetric quantized signed type, else false.
- */
-inline bool is_data_type_quantized_asymmetric_signed(DataType dt)
-{
- switch(dt)
- {
- case DataType::QASYMM8_SIGNED:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of symmetric quantized type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of symmetric quantized type, else false.
- */
-inline bool is_data_type_quantized_symmetric(DataType dt)
-{
- switch(dt)
- {
- case DataType::QSYMM8:
- case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QSYMM16:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of per channel type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of per channel type, else false.
- */
-inline bool is_data_type_quantized_per_channel(DataType dt)
-{
- switch(dt)
- {
- case DataType::QSYMM8_PER_CHANNEL:
- return true;
- default:
- return false;
- }
-}
-
-/** Create a string with the float in full precision.
- *
- * @param val Floating point value
- *
- * @return String with the floating point value.
- */
-inline std::string float_to_string_with_full_precision(float val)
-{
- std::stringstream ss;
- ss.precision(std::numeric_limits::max_digits10);
- ss << val;
-
- if(val != static_cast(val))
- {
- ss << "f";
- }
-
- return ss.str();
-}
-
/** Returns the number of elements required to go from start to end with the wanted step
*
* @param[in] start start value
@@ -1149,142 +285,6 @@ inline size_t num_of_elements_in_range(const float start, const float end, const
return size_t(std::ceil((end - start) / step));
}
-/** Returns true if the value can be represented by the given data type
- *
- * @param[in] val value to be checked
- * @param[in] dt data type that is checked
- * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8
- *
- * @return true if the data type can hold the value.
- */
-template
-bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo())
-{
- switch(dt)
- {
- case DataType::U8:
- {
- const auto val_u8 = static_cast(val);
- return ((val_u8 == val) && val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
- }
- case DataType::QASYMM8:
- {
- double min = static_cast(dequantize_qasymm8(0, qinfo));
- double max = static_cast(dequantize_qasymm8(std::numeric_limits::max(), qinfo));
- return ((double)val >= min && (double)val <= max);
- }
- case DataType::S8:
- {
- const auto val_s8 = static_cast(val);
- return ((val_s8 == val) && val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
- }
- case DataType::U16:
- {
- const auto val_u16 = static_cast(val);
- return ((val_u16 == val) && val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
- }
- case DataType::S16:
- {
- const auto val_s16 = static_cast(val);
- return ((val_s16 == val) && val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
- }
- case DataType::U32:
- {
- const auto val_d64 = static_cast(val);
- const auto val_u32 = static_cast(val);
- return ((val_u32 == val_d64) && val_d64 >= std::numeric_limits::lowest() && val_d64 <= std::numeric_limits::max());
- }
- case DataType::S32:
- {
- const auto val_d64 = static_cast(val);
- const auto val_s32 = static_cast(val);
- return ((val_s32 == val_d64) && val_d64 >= std::numeric_limits::lowest() && val_d64 <= std::numeric_limits::max());
- }
- case DataType::BFLOAT16:
- return (val >= bfloat16::lowest() && val <= bfloat16::max());
- case DataType::F16:
- return (val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
- case DataType::F32:
- return (val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
- default:
- ARM_COMPUTE_ERROR("Data type not supported");
- return false;
- }
-}
-
-/** Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size
- *
- * @param[in] vec_size vector size to be adjusted
- * @param[in] dim0 size of the first dimension
- *
- * @return the number of element processed along the X axis per thread
- */
-inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
-{
- ARM_COMPUTE_ERROR_ON(vec_size > 16);
-
- if((vec_size >= dim0) && (dim0 == 3))
- {
- return dim0;
- }
-
- while(vec_size > dim0)
- {
- vec_size >>= 1;
- }
-
- return vec_size;
-}
-
-/** Returns the suffix string of CPU kernel implementation names based on the given data type
- *
- * @param[in] data_type The data type the CPU kernel implemetation uses
- *
- * @return the suffix string of CPU kernel implementations
- */
-inline std::string cpu_impl_dt(const DataType &data_type)
-{
- std::string ret = "";
-
- switch(data_type)
- {
- case DataType::F32:
- ret = "fp32";
- break;
- case DataType::F16:
- ret = "fp16";
- break;
- case DataType::U8:
- ret = "u8";
- break;
- case DataType::S16:
- ret = "s16";
- break;
- case DataType::S32:
- ret = "s32";
- break;
- case DataType::QASYMM8:
- ret = "qu8";
- break;
- case DataType::QASYMM8_SIGNED:
- ret = "qs8";
- break;
- case DataType::QSYMM16:
- ret = "qs16";
- break;
- case DataType::QSYMM8_PER_CHANNEL:
- ret = "qp8";
- break;
- case DataType::BFLOAT16:
- ret = "bf16";
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported.");
- }
-
- return ret;
-}
-
#ifdef ARM_COMPUTE_ASSERTS_ENABLED
/** Print consecutive elements to an output stream.
*
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index e755cacae60f305cf1a94be6880682419ee2cf48..5bffc16f3b56831fed298cc8ac39c582df4e2b5a 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,13 @@
#ifndef ARM_COMPUTE_VALIDATE_H
#define ARM_COMPUTE_VALIDATE_H
+#include "arm_compute/core/utils/DataLayoutUtils.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/FormatUtils.h"
#include "arm_compute/core/IKernel.h"
#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/QuantizationInfo.h"
#include "arm_compute/core/Window.h"
#include
@@ -284,6 +288,60 @@ arm_compute::Status error_on_mismatching_dimensions(const char *function, const
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...) \
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+/** Return true if the given format has horizontal subsampling.
+ *
+ * @param[in] format Format to determine subsampling.
+ *
+ * @return True if the format can be subsampled horizontaly.
+ */
+inline bool has_format_horizontal_subsampling(Format format)
+{
+ return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
+}
+
+/** Return true if the given format has vertical subsampling.
+ *
+ * @param[in] format Format to determine subsampling.
+ *
+ * @return True if the format can be subsampled verticaly.
+ */
+inline bool has_format_vertical_subsampling(Format format)
+{
+ return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
+}
+
+/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats.
+ *
+ * @note Adding here a few links discussing the issue of odd size and sharing the same solution:
+ * Android Source
+ * WebM
+ * libYUV
+ * YUVPlayer *
+ *
+ * @param[in, out] shape Tensor shape of 2D size
+ * @param[in] format Format of the tensor
+ *
+ * @return The adjusted tensor shape.
+ */
+inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
+{
+ TensorShape output{ shape };
+
+ // Force width to be even for formats which require subsampling of the U and V channels
+ if(has_format_horizontal_subsampling(format))
+ {
+ output.set(0, (output.x() + 1) & ~1U);
+ }
+
+ // Force height to be even for formats which require subsampling of the U and V channels
+ if(has_format_vertical_subsampling(format))
+ {
+ output.set(1, (output.y() + 1) & ~1U);
+ }
+
+ return output;
+}
+
/** Return an error if the passed tensor objects are not even.
*
* @param[in] function Function in which the error occurred.
@@ -316,6 +374,37 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char *
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_EVEN(...) \
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_even(__func__, __FILE__, __LINE__, __VA_ARGS__))
+/** Calculate subsampled shape for a given format and channel
+ *
+ * @param[in] shape Shape of the tensor to calculate the extracted channel.
+ * @param[in] format Format of the tensor.
+ * @param[in] channel Channel to create tensor shape to be extracted.
+ *
+ * @return The subsampled tensor shape.
+ */
+inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
+{
+ TensorShape output{ shape };
+
+ // Subsample shape only for U or V channel
+ if(Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
+ {
+ // Subsample width for the tensor shape when channel is U or V
+ if(has_format_horizontal_subsampling(format))
+ {
+ output.set(0, output.x() / 2U);
+ }
+
+ // Subsample height for the tensor shape when channel is U or V
+ if(has_format_vertical_subsampling(format))
+ {
+ output.set(1, output.y() / 2U);
+ }
+ }
+
+ return output;
+}
+
/** Return an error if the passed tensor objects are not sub-sampled.
*
* @param[in] function Function in which the error occurred.
@@ -401,9 +490,9 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_2 == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward(tensor_infos)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- const std::array < const ITensorInfo *, 2 + sizeof...(Ts) > tensors_info_array{ { tensor_info_1, tensor_info_2, std::forward(tensor_infos)... } };
+ const std::array < const ITensorInfo *, 2 + sizeof...(Ts) > tensors_info_array{ { tensor_info_1, tensor_info_2, tensor_infos... } };
ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(), [&](const ITensorInfo * tensor_info)
{
return detail::have_different_dimensions((*tensors_info_array.cbegin())->tensor_shape(), tensor_info->tensor_shape(), upper_dim);
@@ -429,7 +518,7 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_2 == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(),
detail::get_tensor_info_t()(tensors)...));
return arm_compute::Status{};
@@ -454,10 +543,10 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio
const ITensorInfo *tensor_info, Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward(tensor_infos)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
DataLayout &&tensor_data_layout = tensor_info->data_layout();
- const std::array tensors_infos_array{ { std::forward(tensor_infos)... } };
+ const std::array tensors_infos_array{ { tensor_infos... } };
ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
{
return tensor_info_obj->data_layout() != tensor_data_layout;
@@ -505,10 +594,10 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
const ITensorInfo *tensor_info, Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward(tensor_infos)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
DataType &&tensor_data_type = tensor_info->data_type();
- const std::array tensors_infos_array{ { std::forward(tensor_infos)... } };
+ const std::array tensors_infos_array{ { tensor_infos... } };
ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
{
return tensor_info_obj->data_type() != tensor_data_type;
@@ -531,7 +620,7 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
const ITensor *tensor, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(function, file, line, tensor->info(),
detail::get_tensor_info_t()(tensors)...));
return arm_compute::Status{};
diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h
index 440b942dcfc95115ac64aa7ca15c80ee78ad680d..8ae859f4b325902744fa67a40228035e6383e775 100644
--- a/arm_compute/core/Window.h
+++ b/arm_compute/core/Window.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, 2022 Arm Limited.
+ * Copyright (c) 2016-2020, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,7 +31,7 @@
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/math/Math.h"
namespace arm_compute
{
diff --git a/arm_compute/core/experimental/PostOps.h b/arm_compute/core/experimental/PostOps.h
index 4ea90fc348931907d58d849ea7e777498d15b580..a5585bab5d6ea2e777d4e4703a6c30dc53b87adc 100644
--- a/arm_compute/core/experimental/PostOps.h
+++ b/arm_compute/core/experimental/PostOps.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/experimental/IPostOp.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
#include
@@ -159,4 +160,4 @@ public:
};
} // namespace experimental
} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_POSTOPS
\ No newline at end of file
+#endif //ARM_COMPUTE_EXPERIMENTAL_POSTOPS
diff --git a/arm_compute/core/utils/ActivationFunctionUtils.h b/arm_compute/core/utils/ActivationFunctionUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..1cb66da13de6c17d9413268ca848498141260a12
--- /dev/null
+++ b/arm_compute/core/utils/ActivationFunctionUtils.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H
+
+#include "arm_compute/core/Types.h"
+
+#include
+
+namespace arm_compute
+{
+/** Translates a given activation function to a string.
+ *
+ * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string.
+ *
+ * @return The string describing the activation function.
+ */
+const std::string &string_from_activation_func(const ActivationFunction &act);
+}
+#endif /*ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H */
diff --git a/arm_compute/core/utils/DataLayoutUtils.h b/arm_compute/core/utils/DataLayoutUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..399f55c63f6ad423ef5cac7d26c4c6202ef4629b
--- /dev/null
+++ b/arm_compute/core/utils/DataLayoutUtils.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H
+#include "arm_compute/core/Types.h"
+
+#include
+
+namespace arm_compute
+{
+/** Convert a data layout identity into a string.
+ *
+ * @param[in] dl @ref DataLayout to be translated to string.
+ *
+ * @return The string describing the data layout.
+ */
+const std::string &string_from_data_layout(DataLayout dl);
+}
+#endif /*ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H */
diff --git a/arm_compute/core/utils/DataTypeUtils.h b/arm_compute/core/utils/DataTypeUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..cbb409c8a10c7ca10caf28a935fbcba9ce85936c
--- /dev/null
+++ b/arm_compute/core/utils/DataTypeUtils.h
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+/** The size in bytes of the data type
+ *
+ * @param[in] data_type Input data type
+ *
+ * @return The size in bytes of the data type
+ */
+inline size_t data_size_from_type(DataType data_type)
+{
+ switch(data_type)
+ {
+ case DataType::U8:
+ case DataType::S8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return 1;
+ case DataType::U16:
+ case DataType::S16:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ case DataType::BFLOAT16:
+ case DataType::F16:
+ return 2;
+ case DataType::F32:
+ case DataType::U32:
+ case DataType::S32:
+ return 4;
+ case DataType::F64:
+ case DataType::U64:
+ case DataType::S64:
+ return 8;
+ case DataType::SIZET:
+ return sizeof(size_t);
+ default:
+ ARM_COMPUTE_ERROR("Invalid data type");
+ return 0;
+ }
+}
+
+/** The size in bytes of the data type
+ *
+ * @param[in] dt Input data type
+ *
+ * @return The size in bytes of the data type
+ */
+inline size_t element_size_from_data_type(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::S8:
+ case DataType::U8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return 1;
+ case DataType::U16:
+ case DataType::S16:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ case DataType::BFLOAT16:
+ case DataType::F16:
+ return 2;
+ case DataType::U32:
+ case DataType::S32:
+ case DataType::F32:
+ return 4;
+ case DataType::U64:
+ case DataType::S64:
+ return 8;
+ default:
+ ARM_COMPUTE_ERROR("Undefined element size for given data type");
+ return 0;
+ }
+}
+
+/** Return the data type used by a given single-planar pixel format
+ *
+ * @param[in] format Input format
+ *
+ * @return The size in bytes of the pixel format
+ */
+inline DataType data_type_from_format(Format format)
+{
+ switch(format)
+ {
+ case Format::U8:
+ case Format::UV88:
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return DataType::U8;
+ case Format::U16:
+ return DataType::U16;
+ case Format::S16:
+ return DataType::S16;
+ case Format::U32:
+ return DataType::U32;
+ case Format::S32:
+ return DataType::S32;
+ case Format::BFLOAT16:
+ return DataType::BFLOAT16;
+ case Format::F16:
+ return DataType::F16;
+ case Format::F32:
+ return DataType::F32;
+ //Doesn't make sense for planar formats:
+ case Format::NV12:
+ case Format::NV21:
+ case Format::IYUV:
+ case Format::YUV444:
+ default:
+ ARM_COMPUTE_ERROR("Not supported data_type for given format");
+ return DataType::UNKNOWN;
+ }
+}
+
+/** Return the promoted data type of a given data type.
+ *
+ * @note If promoted data type is not supported an error will be thrown
+ *
+ * @param[in] dt Data type to get the promoted type of.
+ *
+ * @return Promoted data type
+ */
+inline DataType get_promoted_data_type(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::U8:
+ return DataType::U16;
+ case DataType::S8:
+ return DataType::S16;
+ case DataType::U16:
+ return DataType::U32;
+ case DataType::S16:
+ return DataType::S32;
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ case DataType::BFLOAT16:
+ case DataType::F16:
+ case DataType::U32:
+ case DataType::S32:
+ case DataType::F32:
+ ARM_COMPUTE_ERROR("Unsupported data type promotions!");
+ default:
+ ARM_COMPUTE_ERROR("Undefined data type!");
+ }
+ return DataType::UNKNOWN;
+}
+
+/** Compute the mininum and maximum values a data type can take
+ *
+ * @param[in] dt Data type to get the min/max bounds of
+ *
+ * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue.
+ */
+inline std::tuple get_min_max(DataType dt)
+{
+ PixelValue min{};
+ PixelValue max{};
+ switch(dt)
+ {
+ case DataType::U8:
+ case DataType::QASYMM8:
+ {
+ min = PixelValue(static_cast(std::numeric_limits::lowest()));
+ max = PixelValue(static_cast(std::numeric_limits::max()));
+ break;
+ }
+ case DataType::S8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ {
+ min = PixelValue(static_cast(std::numeric_limits::lowest()));
+ max = PixelValue(static_cast(std::numeric_limits::max()));
+ break;
+ }
+ case DataType::U16:
+ case DataType::QASYMM16:
+ {
+ min = PixelValue(static_cast(std::numeric_limits::lowest()));
+ max = PixelValue(static_cast(std::numeric_limits::max()));
+ break;
+ }
+ case DataType::S16:
+ case DataType::QSYMM16:
+ {
+ min = PixelValue(static_cast(std::numeric_limits::lowest()));
+ max = PixelValue(static_cast(std::numeric_limits::max()));
+ break;
+ }
+ case DataType::U32:
+ {
+ min = PixelValue(std::numeric_limits::lowest());
+ max = PixelValue(std::numeric_limits::max());
+ break;
+ }
+ case DataType::S32:
+ {
+ min = PixelValue(std::numeric_limits::lowest());
+ max = PixelValue(std::numeric_limits::max());
+ break;
+ }
+ case DataType::BFLOAT16:
+ {
+ min = PixelValue(bfloat16::lowest());
+ max = PixelValue(bfloat16::max());
+ break;
+ }
+ case DataType::F16:
+ {
+ min = PixelValue(std::numeric_limits::lowest());
+ max = PixelValue(std::numeric_limits::max());
+ break;
+ }
+ case DataType::F32:
+ {
+ min = PixelValue(std::numeric_limits::lowest());
+ max = PixelValue(std::numeric_limits::max());
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Undefined data type!");
+ }
+ return std::make_tuple(min, max);
+}
+
+/** Convert a data type identity into a string.
+ *
+ * @param[in] dt @ref DataType to be translated to string.
+ *
+ * @return The string describing the data type.
+ */
+const std::string &string_from_data_type(DataType dt);
+
+/** Convert a string to DataType
+ *
+ * @param[in] name The name of the data type
+ *
+ * @return DataType
+ */
+DataType data_type_from_name(const std::string &name);
+
+/** Input Stream operator for @ref DataType
+ *
+ * @param[in] stream Stream to parse
+ * @param[out] data_type Output data type
+ *
+ * @return Updated stream
+ */
+inline ::std::istream &operator>>(::std::istream &stream, DataType &data_type)
+{
+ std::string value;
+ stream >> value;
+ data_type = data_type_from_name(value);
+ return stream;
+}
+
+/** Check if a given data type is of floating point type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of floating point type, else false.
+ */
+inline bool is_data_type_float(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::F16:
+ case DataType::F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of quantized type
+ *
+ * @note Quantized is considered a super-set of fixed-point and asymmetric data types.
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of quantized type, else false.
+ */
+inline bool is_data_type_quantized(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of asymmetric quantized type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of asymmetric quantized type, else false.
+ */
+inline bool is_data_type_quantized_asymmetric(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QASYMM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of asymmetric quantized signed type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of asymmetric quantized signed type, else false.
+ */
+inline bool is_data_type_quantized_asymmetric_signed(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::QASYMM8_SIGNED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of symmetric quantized type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of symmetric quantized type, else false.
+ */
+inline bool is_data_type_quantized_symmetric(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::QSYMM8:
+ case DataType::QSYMM8_PER_CHANNEL:
+ case DataType::QSYMM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of per channel type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of per channel type, else false.
+ */
+inline bool is_data_type_quantized_per_channel(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::QSYMM8_PER_CHANNEL:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Returns true if the value can be represented by the given data type
+ *
+ * @param[in] val value to be checked
+ * @param[in] dt data type that is checked
+ * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8
+ *
+ * @return true if the data type can hold the value.
+ */
+template
+bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo())
+{
+ switch(dt)
+ {
+ case DataType::U8:
+ {
+ const auto val_u8 = static_cast(val);
+ return ((val_u8 == val) && val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
+ }
+ case DataType::QASYMM8:
+ {
+ double min = static_cast(dequantize_qasymm8(0, qinfo));
+ double max = static_cast(dequantize_qasymm8(std::numeric_limits::max(), qinfo));
+ return ((double)val >= min && (double)val <= max);
+ }
+ case DataType::S8:
+ {
+ const auto val_s8 = static_cast(val);
+ return ((val_s8 == val) && val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
+ }
+ case DataType::U16:
+ {
+ const auto val_u16 = static_cast(val);
+ return ((val_u16 == val) && val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
+ }
+ case DataType::S16:
+ {
+ const auto val_s16 = static_cast(val);
+ return ((val_s16 == val) && val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
+ }
+ case DataType::U32:
+ {
+ const auto val_d64 = static_cast(val);
+ const auto val_u32 = static_cast(val);
+ return ((val_u32 == val_d64) && val_d64 >= std::numeric_limits::lowest() && val_d64 <= std::numeric_limits::max());
+ }
+ case DataType::S32:
+ {
+ const auto val_d64 = static_cast(val);
+ const auto val_s32 = static_cast(val);
+ return ((val_s32 == val_d64) && val_d64 >= std::numeric_limits::lowest() && val_d64 <= std::numeric_limits::max());
+ }
+ case DataType::BFLOAT16:
+ return (val >= bfloat16::lowest() && val <= bfloat16::max());
+ case DataType::F16:
+ return (val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
+ case DataType::F32:
+ return (val >= std::numeric_limits::lowest() && val <= std::numeric_limits::max());
+ default:
+ ARM_COMPUTE_ERROR("Data type not supported");
+ return false;
+ }
+}
+
+/** Returns the suffix string of CPU kernel implementation names based on the given data type
+ *
+ * @param[in] data_type The data type the CPU kernel implemetation uses
+ *
+ * @return the suffix string of CPU kernel implementations
+ */
+inline std::string cpu_impl_dt(const DataType &data_type)
+{
+ std::string ret = "";
+
+ switch(data_type)
+ {
+ case DataType::F32:
+ ret = "fp32";
+ break;
+ case DataType::F16:
+ ret = "fp16";
+ break;
+ case DataType::U8:
+ ret = "u8";
+ break;
+ case DataType::S16:
+ ret = "s16";
+ break;
+ case DataType::S32:
+ ret = "s32";
+ break;
+ case DataType::QASYMM8:
+ ret = "qu8";
+ break;
+ case DataType::QASYMM8_SIGNED:
+ ret = "qs8";
+ break;
+ case DataType::QSYMM16:
+ ret = "qs16";
+ break;
+ case DataType::QSYMM8_PER_CHANNEL:
+ ret = "qp8";
+ break;
+ case DataType::BFLOAT16:
+ ret = "bf16";
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported.");
+ }
+
+ return ret;
+}
+
+}
+#endif /*ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H */
diff --git a/arm_compute/core/utils/FormatUtils.h b/arm_compute/core/utils/FormatUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..afb0f78255cf806e2ee57ecec3d473906b392529
--- /dev/null
+++ b/arm_compute/core/utils/FormatUtils.h
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/Error.h"
+
+namespace arm_compute
+{
+/** The size in bytes of the pixel format
+ *
+ * @param[in] format Input format
+ *
+ * @return The size in bytes of the pixel format
+ */
+inline size_t pixel_size_from_format(Format format)
+{
+ switch(format)
+ {
+ case Format::U8:
+ return 1;
+ case Format::U16:
+ case Format::S16:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::UV88:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 2;
+ case Format::RGB888:
+ return 3;
+ case Format::RGBA8888:
+ return 4;
+ case Format::U32:
+ case Format::S32:
+ case Format::F32:
+ return 4;
+ //Doesn't make sense for planar formats:
+ case Format::NV12:
+ case Format::NV21:
+ case Format::IYUV:
+ case Format::YUV444:
+ default:
+ ARM_COMPUTE_ERROR("Undefined pixel size for given format");
+ return 0;
+ }
+}
+
+/** Return the plane index of a given channel given an input format.
+ *
+ * @param[in] format Input format
+ * @param[in] channel Input channel
+ *
+ * @return The plane index of the specific channel of the specific format
+ */
+inline int plane_idx_from_channel(Format format, Channel channel)
+{
+ switch(format)
+ {
+ // Single planar formats have a single plane
+ case Format::U8:
+ case Format::U16:
+ case Format::S16:
+ case Format::U32:
+ case Format::S32:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::F32:
+ case Format::UV88:
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 0;
+ // Multi planar formats
+ case Format::NV12:
+ case Format::NV21:
+ {
+ // Channel U and V share the same plane of format UV88
+ switch(channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ case Channel::V:
+ return 1;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::IYUV:
+ case Format::YUV444:
+ {
+ switch(channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 1;
+ case Channel::V:
+ return 2;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported format");
+ return 0;
+ }
+}
+
+/** Return the channel index of a given channel given an input format.
+ *
+ * @param[in] format Input format
+ * @param[in] channel Input channel
+ *
+ * @return The channel index of the specific channel of the specific format
+ */
+inline int channel_idx_from_format(Format format, Channel channel)
+{
+ switch(format)
+ {
+ case Format::RGB888:
+ {
+ switch(channel)
+ {
+ case Channel::R:
+ return 0;
+ case Channel::G:
+ return 1;
+ case Channel::B:
+ return 2;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::RGBA8888:
+ {
+ switch(channel)
+ {
+ case Channel::R:
+ return 0;
+ case Channel::G:
+ return 1;
+ case Channel::B:
+ return 2;
+ case Channel::A:
+ return 3;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::YUYV422:
+ {
+ switch(channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 1;
+ case Channel::V:
+ return 3;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::UYVY422:
+ {
+ switch(channel)
+ {
+ case Channel::Y:
+ return 1;
+ case Channel::U:
+ return 0;
+ case Channel::V:
+ return 2;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::NV12:
+ {
+ switch(channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 0;
+ case Channel::V:
+ return 1;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::NV21:
+ {
+ switch(channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 1;
+ case Channel::V:
+ return 0;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::YUV444:
+ case Format::IYUV:
+ {
+ switch(channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 0;
+ case Channel::V:
+ return 0;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported format");
+ return 0;
+ }
+}
+
+/** Return the number of planes for a given format
+ *
+ * @param[in] format Input format
+ *
+ * @return The number of planes for a given image format.
+ */
+inline size_t num_planes_from_format(Format format)
+{
+ switch(format)
+ {
+ case Format::U8:
+ case Format::S16:
+ case Format::U16:
+ case Format::S32:
+ case Format::U32:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::F32:
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 1;
+ case Format::NV12:
+ case Format::NV21:
+ return 2;
+ case Format::IYUV:
+ case Format::YUV444:
+ return 3;
+ default:
+ ARM_COMPUTE_ERROR("Not supported format");
+ return 0;
+ }
+}
+
+/** Return the number of channels for a given single-planar pixel format
+ *
+ * @param[in] format Input format
+ *
+ * @return The number of channels for a given image format.
+ */
+inline size_t num_channels_from_format(Format format)
+{
+ switch(format)
+ {
+ case Format::U8:
+ case Format::U16:
+ case Format::S16:
+ case Format::U32:
+ case Format::S32:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::F32:
+ return 1;
+ // Because the U and V channels are subsampled
+ // these formats appear like having only 2 channels:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 2;
+ case Format::UV88:
+ return 2;
+ case Format::RGB888:
+ return 3;
+ case Format::RGBA8888:
+ return 4;
+ //Doesn't make sense for planar formats:
+ case Format::NV12:
+ case Format::NV21:
+ case Format::IYUV:
+ case Format::YUV444:
+ default:
+ return 0;
+ }
+}
+
+/** Convert a tensor format into a string.
+ *
+ * @param[in] format @ref Format to be translated to string.
+ *
+ * @return The string describing the format.
+ */
+const std::string &string_from_format(Format format);
+}
+#endif /*ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H */
diff --git a/arm_compute/core/utils/InterpolationPolicyUtils.h b/arm_compute/core/utils/InterpolationPolicyUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..79f6e3aa5f14ac174d09ff63d1405cd006b74fea
--- /dev/null
+++ b/arm_compute/core/utils/InterpolationPolicyUtils.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H
+
+#include "arm_compute/core/Types.h"
+
+#include
+
+namespace arm_compute
+{
+/** Translates a given interpolation policy to a string.
+ *
+ * @param[in] policy @ref InterpolationPolicy to be translated to string.
+ *
+ * @return The string describing the interpolation policy.
+ */
+const std::string &string_from_interpolation_policy(InterpolationPolicy policy);
+}
+#endif /*ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H */
diff --git a/arm_compute/core/utils/StringUtils.h b/arm_compute/core/utils/StringUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..41f29b0901686592ea8e817adb012d917ebda91b
--- /dev/null
+++ b/arm_compute/core/utils/StringUtils.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H
+
+#include
+#include
+
+namespace arm_compute
+{
+/** Lower a given string.
+ *
+ * @param[in] val Given string to lower.
+ *
+ * @return The lowered string
+ */
+std::string lower_string(const std::string &val);
+
+/** Raise a given string to upper case
+ *
+ * @param[in] val Given string to lower.
+ *
+ * @return The upper case string
+ */
+std::string upper_string(const std::string &val);
+
+/** Create a string with the float in full precision.
+ *
+ * @param val Floating point value
+ *
+ * @return String with the floating point value.
+ */
+std::string float_to_string_with_full_precision(float val);
+
+/** Join a sequence of strings with separator @p sep
+ *
+ * @param[in] strings Strings to join
+ * @param[in] sep Separator to join consecutive strings in the sequence
+ *
+ * @return std::string
+ */
+std::string join(const std::vector strings, const std::string &sep);
+}
+#endif /*ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H */
diff --git a/arm_compute/core/utils/helpers/AdjustVecSize.h b/arm_compute/core/utils/helpers/AdjustVecSize.h
new file mode 100644
index 0000000000000000000000000000000000000000..bbb3048b84c14fdf99427338a30ba0da2a56988a
--- /dev/null
+++ b/arm_compute/core/utils/helpers/AdjustVecSize.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H
+#define ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H
+
+#include "arm_compute/core/Error.h"
+
+namespace arm_compute
+{
+/** Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size
+ *
+ * @param[in] vec_size vector size to be adjusted
+ * @param[in] dim0 size of the first dimension
+ *
+ * @return the number of element processed along the X axis per thread
+ */
+inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
+{
+ ARM_COMPUTE_ERROR_ON(vec_size > 16);
+
+ if((vec_size >= dim0) && (dim0 == 3))
+ {
+ return dim0;
+ }
+
+ while(vec_size > dim0)
+ {
+ vec_size >>= 1;
+ }
+
+ return vec_size;
+}
+}
+#endif /*ARM_COMPUTE_UTILS_H */
diff --git a/arm_compute/core/utils/math/Math.h b/arm_compute/core/utils/math/Math.h
new file mode 100644
index 0000000000000000000000000000000000000000..c1dce7ff087a01a9abf6ad6aca2baa291956a030
--- /dev/null
+++ b/arm_compute/core/utils/math/Math.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017-2018, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_UTILS_MATH_H
+#define ARM_COMPUTE_UTILS_MATH_H
+
+namespace arm_compute
+{
+/** Calculate the rounded up quotient of val / m.
+ *
+ * @param[in] val Value to divide and round up.
+ * @param[in] m Value to divide by.
+ *
+ * @return the result.
+ */
+template
+constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m)
+{
+ return (val + m - 1) / m;
+}
+
+/** Computes the smallest number larger or equal to value that is a multiple of divisor.
+ *
+ * @param[in] value Lower bound value
+ * @param[in] divisor Value to compute multiple of.
+ *
+ * @return the result.
+ */
+template
+inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor)
+{
+ ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
+ return DIV_CEIL(value, divisor) * divisor;
+}
+
+/** Computes the largest number smaller or equal to value that is a multiple of divisor.
+ *
+ * @param[in] value Upper bound value
+ * @param[in] divisor Value to compute multiple of.
+ *
+ * @return the result.
+ */
+template
+inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor)
+{
+ ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
+ return (value / divisor) * divisor;
+}
+
+}
+#endif /*ARM_COMPUTE_UTILS_MATH_H */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index f9352650b62cce52eeb328b221a51361b123af45..4c2037ab8d2be7b83735bf4a49196ef9c2f67bf9 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -28,6 +28,7 @@
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Utils.h"
+#include "arm_compute/function_info/ConvolutionInfo.h"
#include "arm_compute/runtime/FunctionDescriptors.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
@@ -63,7 +64,7 @@ inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordin
std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
for(int i = 0; i < reduction_ops; ++i)
{
- out_shape.remove_dimension(axis_local[i] - i);
+ out_shape.remove_dimension(axis_local[i] - i, false);
}
return out_shape;
}
@@ -404,8 +405,8 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input)
{
TensorShape shape_transposed{ input.tensor_shape() };
- shape_transposed.set(0, input.dimension(1));
- shape_transposed.set(1, input.dimension(0));
+ shape_transposed.set(0, input.dimension(1), false);
+ shape_transposed.set(1, input.dimension(0), false);
return shape_transposed;
}
@@ -432,8 +433,8 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
- unsigned int output_width = 0;
- unsigned int output_height = 0;
+ unsigned int output_width = 0;
+ unsigned int output_height = 0;
std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx],
weights_shape[weights_width_idx], weights_shape[weights_height_idx],
info.pad_stride_info, info.dilation);
@@ -683,8 +684,8 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in
const DataLayout data_layout = winograd_info.output_data_layout;
// Compute output shape
- unsigned int output_width = 0;
- unsigned int output_height = 0;
+ unsigned int output_width = 0;
+ unsigned int output_height = 0;
std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height,
kernel_size.width, kernel_size.height, conv_info);
@@ -724,7 +725,7 @@ inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape
const unsigned int weights_out_channel = weights_shape[3];
unsigned int output_width = 0;
unsigned int output_height = 0;
- std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
+ std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
TensorShape output_shape{ input_shape };
output_shape.set(idx_width, output_width);
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h
index 1ee3c7e3ec25ac34fa459f644b04e71175fe32cd..0b6089973448002da8b0985ddc8867e345af43e5 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADCONTEXT
#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
#include
@@ -56,16 +57,20 @@ enum class GpuLanguage
class GpuWorkloadContext
{
public:
+ class Impl;
+
/** Constructor */
GpuWorkloadContext(CLCompileContext *cl_compile_context);
- /** Allow instances of this class to be copy constructed */
- GpuWorkloadContext(const GpuWorkloadContext &config) = default;
- /** Allow instances of this class to be copied */
- GpuWorkloadContext &operator=(const GpuWorkloadContext &config) = default;
+ /** Destructor */
+ ~GpuWorkloadContext();
+ /** Prohibit instances of this class to be copy constructed */
+ GpuWorkloadContext(const GpuWorkloadContext &config) = delete;
+ /** Prohibit instances of this class to be copied */
+ GpuWorkloadContext &operator=(const GpuWorkloadContext &config) = delete;
/** Allow instances of this class to be move constructed */
- GpuWorkloadContext(GpuWorkloadContext &&config) = default;
+ GpuWorkloadContext(GpuWorkloadContext &&config);
/** Allow instances of this class to be moved */
- GpuWorkloadContext &operator=(GpuWorkloadContext &&config) = default;
+ GpuWorkloadContext &operator=(GpuWorkloadContext &&config);
/** Get @ref GpuLanguage of the context */
GpuLanguage gpu_language() const;
/** Get @ref GpuTarget of the context */
@@ -75,9 +80,33 @@ public:
*/
const CLCompileContext *cl_compile_context() const;
+ /** Create a @ref TensorInfo associated with the workload context.
+ *
+ * @return TensorInfo Newly created tensor info
+ */
+ template
+ TensorInfo create_tensor_info(TArgs &&... args)
+ {
+ auto tensor_info = TensorInfo(std::forward(args)...);
+ register_user_tensor(tensor_info);
+ return tensor_info;
+ }
+
+ /** Get the internal implementation */
+ Impl &implementation();
+
+ /** Get the internal implementation */
+ const Impl &implementation() const;
+
private:
- GpuLanguage _gpu_language{ GpuLanguage::Unknown };
- CLCompileContext *_cl_compile_ctx{ nullptr };
+ /** Set a new ID to the tensor info and register its memory descriptor to the context.
+ *
+ * @param[in,out] tensor_info @ref ITensorInfo to be registered.
+ */
+ void register_user_tensor(ITensorInfo &tensor_info);
+
+ /** Internal implementation */
+ std::unique_ptr _impl;
};
} // namespace dynamic_fusion
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
index 155df293bf3789195959066c86100cd1f49f1e6b..75c2b1f5289e8f065e7186a3a94523e78f31d8ba 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCH
#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCH
-#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
#include
@@ -62,30 +61,8 @@ public:
const Implementation &implementation() const;
/** Get the gpu workload context of this sketch */
const GpuWorkloadContext *gpu_context() const;
- /** Create a @ref TensorInfo associated with the workload sketch.
- *
- * @return TensorInfo Newly created tensor info
- */
- template
- TensorInfo create_tensor_info(Args &&... args)
- {
- auto tensor_info = TensorInfo(std::forward(args)...);
- register_new_tensor(tensor_info);
- return tensor_info;
- }
- /** Create a default @ref TensorInfo associated with the workload sketch
- * It is usually used by user input or output tensors
- *
- * @return TensorInfo Newly created tensor info
- */
- TensorInfo create_tensor_info();
private:
- /** Register a new tensor by setting a new id to it and register its memory descriptor in the sketch
- *
- * @param[in,out] tensor_info @ref ITensorInfo that will be registered
- */
- void register_new_tensor(ITensorInfo &tensor_info);
std::unique_ptr _impl; /**< Internal opaque implementation*/
};
diff --git a/arm_compute/function_info/ActivationLayerInfo.h b/arm_compute/function_info/ActivationLayerInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..84e962cb3a229f76a33e829b9848ab51a30a22d5
--- /dev/null
+++ b/arm_compute/function_info/ActivationLayerInfo.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_ACTIVATIONLAYERINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_ACTIVATIONLAYERINFO
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/QuantizationInfo.h"
+
+#include
+
+namespace arm_compute
+{
+/** Available activation functions */
+enum class ActivationFunction
+{
+ LOGISTIC, /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */
+ TANH, /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */
+ RELU, /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */
+ BOUNDED_RELU, /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */
+ LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */
+ LEAKY_RELU, /**< Leaky Rectifier ( \f$ f(x) = \begin{cases} \alpha x & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
+ SOFT_RELU, /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */
+ ELU, /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases} \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
+ ABS, /**< Absolute ( \f$ f(x)= |x| \f$ ) */
+ SQUARE, /**< Square ( \f$ f(x)= x^2 \f$ )*/
+ SQRT, /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/
+ LINEAR, /**< Linear ( \f$ f(x)= ax + b \f$ ) */
+ IDENTITY, /**< Identity ( \f$ f(x)= x \f$ ) */
+ HARD_SWISH, /**< Hard-swish ( \f$ f(x) = (x \text{ReLU6}(x+3))/6 = x \min(\max(0,x+3),6)/6 \f$ ) */
+ SWISH, /**< Swish ( \f$ f(x) = \frac{x}{1 + e^{-ax}} = x \text{logistic}(ax) \f$ ) */
+ GELU /**< GELU ( \f$ f(x) = x * 1/2 * 1 + erf(x / \sqrt{2}) \f$ ) */
+};
+/** Activation Layer Information class */
+class ActivationLayerInfo
+{
+public:
+ typedef arm_compute::ActivationFunction ActivationFunction;
+
+ /** Lookup table */
+ using LookupTable256 = std::array;
+
+ ActivationLayerInfo() = default;
+ /** Default Constructor
+ *
+ * @param[in] f The activation function to use.
+ * @param[in] a (Optional) The alpha parameter used by some activation functions
+ * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH).
+ * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH).
+ */
+ ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f)
+ : _act(f), _a(a), _b(b), _enabled(true)
+ {
+ }
+ /** Get the type of activation function */
+ ActivationFunction activation() const
+ {
+ return _act;
+ }
+ /** Get the alpha value */
+ float a() const
+ {
+ return _a;
+ }
+ /** Get the beta value */
+ float b() const
+ {
+ return _b;
+ }
+ /** Check if initialised */
+ bool enabled() const
+ {
+ return _enabled;
+ }
+
+#ifdef __aarch64__
+ const LookupTable256 &lut() const
+ {
+ return _lut;
+ }
+ void setLookupTable256(LookupTable256 &lut)
+ {
+ _lut = std::move(lut);
+ }
+#endif // __aarch64__
+private:
+ ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::IDENTITY };
+ float _a = {};
+ float _b = {};
+ bool _enabled = { false };
+
+#ifdef __aarch64__
+ LookupTable256 _lut = {};
+#endif // __aarch64__
+};
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_ACTIVATIONLAYERINFO */
diff --git a/arm_compute/function_info/ConvolutionInfo.h b/arm_compute/function_info/ConvolutionInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..c27dc523c868ddf4abb83eb471ab12a9431e6115
--- /dev/null
+++ b/arm_compute/function_info/ConvolutionInfo.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_CONVOLUTIONINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_CONVOLUTIONINFO
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/Size2D.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+
+namespace arm_compute
+{
+struct ConvolutionInfo
+{
+ ConvolutionInfo() = default;
+ ConvolutionInfo(const PadStrideInfo &pad_stride_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
+ : pad_stride_info(pad_stride_info), depth_multiplier(depth_multiplier), act_info(act_info), dilation(dilation)
+ {
+ }
+ PadStrideInfo pad_stride_info{}; /**< Convolution info (Pads, strides,...) */
+ unsigned int depth_multiplier{ 1 }; /**< Multiplier to apply to input's depth to retrieve the output depth. Defaults to 1 */
+ ActivationLayerInfo act_info{}; /**< Fused activation to apply after convolution. */
+ Size2D dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */
+};
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_CONVOLUTIONINFO */
diff --git a/arm_compute/function_info/FullyConnectedLayerInfo.h b/arm_compute/function_info/FullyConnectedLayerInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..5f5578eaddb01e65e06fceaff2ade332db51dcda
--- /dev/null
+++ b/arm_compute/function_info/FullyConnectedLayerInfo.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_FULLYCONNECTEDLAYERINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_FULLYCONNECTEDLAYERINFO
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+
+namespace arm_compute
+{
+/** Fully connected layer info */
+struct FullyConnectedLayerInfo
+{
+ /* Fused-activation parameters */
+ ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */
+ /* Information about weights */
+ DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */
+ bool transpose_weights{ true }; /**< Transpose weights if true. */
+ bool are_weights_reshaped{ false }; /**< @deprecated Reshape the weights tensor if false. */
+ bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */
+ bool enable_fast_math{ false }; /**< Enable fast math computation. */
+ /* Other parameters */
+ bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
+
+ /** Sets the weights trained data layout
+ *
+ * @param[in] layout Data layout that the weights were trained with
+ *
+ * @return Updated object
+ */
+ FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
+ {
+ weights_trained_layout = layout;
+ return *this;
+ }
+ /** Sets the transpose weights flag
+ *
+ * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
+ *
+ * @return Updated object
+ */
+ FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
+ {
+ transpose_weights = should_transpose_weights;
+ return *this;
+ }
+};
+
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_FULLYCONNECTEDLAYERINFO */
diff --git a/arm_compute/function_info/GEMMInfo.h b/arm_compute/function_info/GEMMInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..daaf86243afe190b2a50f5b69580e7ab7be82a60
--- /dev/null
+++ b/arm_compute/function_info/GEMMInfo.h
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_GEMMINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_GEMMINFO
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/experimental/IPostOp.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include
+
+namespace arm_compute
+{
+class ITensorInfo;
+/** GEMMLowp output stage type */
+enum class GEMMLowpOutputStageType
+{
+ NONE, /**< No quantization */
+ QUANTIZE_DOWN, /**< Quantize using an integer multiplication */
+ QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */
+ QUANTIZE_DOWN_FLOAT /**< Quantize using a floating point multiplication */
+};
+
+/** GEMMLowp output stage info */
+struct GEMMLowpOutputStageInfo
+{
+ GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */
+ int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
+ int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */
+ int32_t gemmlowp_min_bound{ std::numeric_limits::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
+ int32_t gemmlowp_max_bound{ std::numeric_limits::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
+ std::vector gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ std::vector gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+ float gemmlowp_real_multiplier{ 0 }; /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */
+ bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */
+ DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
+};
+/** GEMM information class. This class stores the necessary information to compute GEMM functions
+ *
+ * This object also contains the information about how matrix A and matrix B have been reshaped
+ *
+ */
+class GEMMInfo
+{
+public:
+ /** Default constructor */
+ GEMMInfo() noexcept
+ : _is_a_reshaped(false),
+ _is_b_reshaped(false),
+ _reshape_b_only_on_first_run(true),
+ _depth_output_gemm3d(0),
+ _reinterpret_input_as_3d(false),
+ _retain_internal_weights(false),
+ _gemmlowp_output_stage(),
+ _fast_math(false),
+ _fp_mixed_precision(false),
+ _broadcast_bias(false),
+ _pretranspose_A(false),
+ _pretranspose_B(false),
+ _activation_info(),
+ _post_ops(),
+ _fixed_format(false),
+ _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
+ {
+ }
+ /** Constructor
+ *
+ * @param[in] is_a_reshaped True if the matrix A has been reshaped
+ * @param[in] is_b_reshaped True if the matrix B has been reshaped
+ * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run
+ * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel
+ * If 0 the output will not be reinterpreted as 3D. Default 0
+ * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used
+ * to perform 1x1 convolutions with the NHWC data layout)
+ * @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run
+ * @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] fast_math (Optional) Use a data type of shorter width to improve performance
+ * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
+ * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
+ * @param[in] post_ops (Optional) A sequence of post operations that are performed after the main operation.
+ * @param[in] fixed_format (Optional) Specify the selection of fixed format kernels for variable weights support in GEMM. These kernels expect the weights tensor to be in amemory format that is fixed by the kernel itself. For more information, see arm_compute::WeightFormat.
+ * @param[in] weight_format (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
+ */
+ GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
+ GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool fast_math = false, bool broadcast_bias = false,
+ const ActivationLayerInfo &activation_info = ActivationLayerInfo(), const experimental::PostOpList &post_ops = experimental::PostOpList(),
+ bool fixed_format = false, arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED) noexcept
+ : _is_a_reshaped(is_a_reshaped),
+ _is_b_reshaped(is_b_reshaped),
+ _reshape_b_only_on_first_run(reshape_b_only_on_first_run),
+ _depth_output_gemm3d(depth_output_gemm3d),
+ _reinterpret_input_as_3d(reinterpret_input_as_3d),
+ _retain_internal_weights(retain_internal_weights),
+ _gemmlowp_output_stage(gemmlowp_output_stage),
+ _fast_math(fast_math),
+ _fp_mixed_precision(fp_mixed_precision),
+ _broadcast_bias(broadcast_bias),
+ _pretranspose_A(false),
+ _pretranspose_B(false),
+ _activation_info(activation_info),
+ _post_ops(post_ops),
+ _fixed_format(fixed_format),
+ _weight_format(weight_format)
+ {
+ }
+ /** Flag which specifies if the matrix A has been reshaped
+ *
+ * @return True if the matrix A has been reshaped
+ */
+ bool is_a_reshaped() const
+ {
+ return _is_a_reshaped;
+ };
+ /** Flag which specifies if the matrix B has been reshaped
+ *
+ * @return True if the matrix B has been reshaped
+ */
+ bool is_b_reshaped() const
+ {
+ return _is_b_reshaped;
+ };
+ /** Flag which specifies if the reshape of matrix B should executed only for the first
+ *
+ * @note This flag could be set to TRUE when GEMM is used to accelerate convolution layer
+ *
+ * @return True if the reshaped of matrix B happens only for the first run
+ */
+ bool reshape_b_only_on_first_run() const
+ {
+ return _reshape_b_only_on_first_run;
+ };
+ /** Depth of the output when GEMM output is reinterpreted as 3D tensor
+ *
+ * @return the depth of the output tensor
+ */
+ int depth_output_gemm3d() const
+ {
+ return _depth_output_gemm3d;
+ };
+ /** Flag which specifies if the input tensor has to be reinterpreted as 3D
+ *
+ * @return True if the input tensor has to be reinterpreted as 3D tensor
+ */
+ bool reinterpret_input_as_3d() const
+ {
+ return _reinterpret_input_as_3d;
+ };
+ /** Flag which specifies if the weights tensor has to be retained from previous run
+ *
+ * @return True if the weights tensor has to be retained
+ */
+ bool retain_internal_weights() const
+ {
+ return _retain_internal_weights;
+ };
+ /** GEMMLowp output stage
+ *
+ * @return the GEMMLowp output stage info
+ */
+ GEMMLowpOutputStageInfo gemmlowp_output_stage() const
+ {
+ return _gemmlowp_output_stage;
+ };
+ /** Sets GEMMLowp output stage
+ *
+ * @param[in] output_stage Output stage to set
+ */
+ void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage)
+ {
+ _gemmlowp_output_stage = output_stage;
+ };
+ /** Flag which specifies if a wider accumulator should be used.
+ *
+ * @return True if a wider accumulator has to be used
+ */
+ bool fp_mixed_precision() const
+ {
+ return _fp_mixed_precision;
+ };
+ /** Flag which specifies if a shorter accumulator to be used.
+ *
+ * @return True if a shorter accumulator has to be used
+ */
+ bool fast_math() const
+ {
+ return _fast_math;
+ };
+ /** Set fast math flag
+ *
+ * @param[in] fast_math Flag to set
+ */
+ void set_fast_math(bool fast_math)
+ {
+ _fast_math = fast_math;
+ }
+ /** Flag which specifies whether to broadcast the shape of the bias tensor.
+ *
+ * @return True if the shape of the bias tensor is to be broadcasted.
+ */
+ bool broadcast_bias() const
+ {
+ return _broadcast_bias;
+ };
+ /** Flag which specifies whether A should be pre-transposed if supported.
+ *
+ * @return True if A should be pre-transposed else false.
+ */
+ bool pretranspose_A() const
+ {
+ return _pretranspose_A;
+ };
+ /** Set pre-transpose A flag
+ *
+ * @param[in] flag Flag to set
+ */
+ void set_pretranspose_A(bool flag)
+ {
+ _pretranspose_A = flag;
+ }
+ /** Flag which specifies whether b should be pre-transposed if supported.
+ *
+ * @return True if b should be pre-transposed else false.
+ */
+ bool pretranspose_B() const
+ {
+ return _pretranspose_B;
+ };
+ /** Set pre-transpose b flag
+ *
+ * @param[in] flag Flag to set
+ */
+ void set_pretranspose_B(bool flag)
+ {
+ _pretranspose_B = flag;
+ }
+ /** Activation layer to apply after the matrix multiplication
+ *
+ * @return ActivationLayerInfo object
+ */
+ ActivationLayerInfo activation_info() const
+ {
+ return _activation_info;
+ }
+ /** Set activation layer info
+ *
+ * @param[in] activation_info ActivationLayerInfo object to set
+ */
+ void set_activation_info(const ActivationLayerInfo &activation_info)
+ {
+ _activation_info = activation_info;
+ }
+ /** Post operations to apply after the matrix multiplication
+ *
+ * @return experimental::PostOpList object
+ */
+ const experimental::PostOpList &post_ops() const
+ {
+ return _post_ops;
+ }
+ /** Set post ops
+ *
+ * @param[in] post_ops experimental::PostOpList object to set
+ */
+ void set_post_ops(const experimental::PostOpList &post_ops)
+ {
+ _post_ops = post_ops;
+ }
+ /** Flag which specifies if the GEMM operation is running fixed-format kernels.
+ *
+ * @return True if the GEMM operation is running fixed-format kernel else false.
+ */
+ bool fixed_format() const
+ {
+ return _fixed_format;
+ }
+
+ /** Set fixed-format flag
+ *
+ * @param[in] fixed_format sets whether or not to use fixed-format kernels
+ */
+ void set_fixed_format(bool fixed_format)
+ {
+ _fixed_format = fixed_format;
+ }
+
+ arm_compute::WeightFormat weight_format() const
+ {
+ return _weight_format;
+ }
+
+ /** Set weight format to be used
+ *
+ * @param[in] weight_format arm_compute::WeightFormat enumeration
+ */
+ void set_weight_format(arm_compute::WeightFormat weight_format)
+ {
+ _weight_format = weight_format;
+ }
+
+private:
+ bool _is_a_reshaped;
+ bool _is_b_reshaped;
+ bool _reshape_b_only_on_first_run;
+ int _depth_output_gemm3d;
+ bool _reinterpret_input_as_3d;
+ bool _retain_internal_weights;
+ GEMMLowpOutputStageInfo _gemmlowp_output_stage;
+ bool _fast_math;
+ bool _fp_mixed_precision;
+ bool _broadcast_bias;
+ bool _pretranspose_A;
+ bool _pretranspose_B;
+ ActivationLayerInfo _activation_info;
+ experimental::PostOpList _post_ops;
+ bool _fixed_format;
+ arm_compute::WeightFormat _weight_format;
+};
+} //namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_GEMMINFO */
diff --git a/arm_compute/function_info/MatMulInfo.h b/arm_compute/function_info/MatMulInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..cd9ef1f4d957766c9c9a87450a27403210b0ef56
--- /dev/null
+++ b/arm_compute/function_info/MatMulInfo.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_FUNCTION_INFO_MATMULINFO
+#define ACL_ARM_COMPUTE_FUNCTION_INFO_MATMULINFO
+
+namespace arm_compute
+{
+/** Class for holding information related to matrix multiplication function
+ */
+class MatMulInfo
+{
+public:
+ /* Get Adjoint LHS flag value */
+ bool adj_lhs() const
+ {
+ return _adj_lhs;
+ }
+ /* Get Adjoint RHS flag value */
+ bool adj_rhs() const
+ {
+ return _adj_rhs;
+ }
+ /* Set Adjoint LHS flag */
+ MatMulInfo &adj_lhs(bool adj_lhs)
+ {
+ _adj_lhs = adj_lhs;
+ return *this;
+ }
+ /* Set Adjoint RHS flag */
+ MatMulInfo &adj_rhs(bool adj_rhs)
+ {
+ _adj_rhs = adj_rhs;
+ return *this;
+ }
+
+private:
+ bool _adj_lhs{ false };
+ bool _adj_rhs{ false };
+};
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_FUNCTION_INFO_MATMULINFO */
diff --git a/arm_compute/graph/Graph.h b/arm_compute/graph/Graph.h
index d8d3feb1f7f657c6e9a0a09eab6abebc4e1208e1..806d84c3fdb9daa4a50abc272a3928546ea4ee64 100644
--- a/arm_compute/graph/Graph.h
+++ b/arm_compute/graph/Graph.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020,2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,7 +30,6 @@
#include "arm_compute/graph/Types.h"
#include "support/Mutex.h"
-#include "support/ToolchainSupport.h"
#include