diff --git a/Android.bp b/Android.bp
index 14290b9e1c9d0926add9eb57a6c0054187e7c025..768120577050042ad02419034385febd838a8c7e 100644
--- a/Android.bp
+++ b/Android.bp
@@ -28,12 +28,6 @@ opencl_srcs = [
"src/core/CL/cl_kernels/common/elementwise_operation_quantized.cl",
"src/core/CL/cl_kernels/common/elementwise_unary.cl",
"src/core/CL/cl_kernels/common/elementwise_unary_quantized.cl",
- "src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h",
- "src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl",
- "src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl",
- "src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl",
- "src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/fp_elementwise_op_helpers.h",
- "src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h",
"src/core/CL/cl_kernels/common/fft.cl",
"src/core/CL/cl_kernels/common/fft_digit_reverse.cl",
"src/core/CL/cl_kernels/common/fft_scale.cl",
@@ -53,6 +47,7 @@ opencl_srcs = [
"src/core/CL/cl_kernels/common/mat_mul.cl",
"src/core/CL/cl_kernels/common/mat_mul_mmul.cl",
"src/core/CL/cl_kernels/common/mat_mul_quantized.cl",
+ "src/core/CL/cl_kernels/common/mat_mul_quantized_mmul.cl",
"src/core/CL/cl_kernels/common/mean_stddev_normalization.cl",
"src/core/CL/cl_kernels/common/memset.cl",
"src/core/CL/cl_kernels/common/minmax_layer.cl",
@@ -73,7 +68,6 @@ opencl_srcs = [
"src/core/CL/cl_kernels/common/select.cl",
"src/core/CL/cl_kernels/common/slice_ops.cl",
"src/core/CL/cl_kernels/common/softmax_layer.cl",
- "src/core/CL/cl_kernels/common/softmax_layer_quantized.cl",
"src/core/CL/cl_kernels/common/stack_layer.cl",
"src/core/CL/cl_kernels/common/tile.cl",
"src/core/CL/cl_kernels/common/transpose.cl",
@@ -218,9 +212,12 @@ cc_library_static {
"src/core/AccessWindowAutoPadding.cpp",
"src/core/AccessWindowStatic.cpp",
"src/core/AccessWindowTranspose.cpp",
+ "src/core/CL/CLCommandBuffer.cpp",
+ "src/core/CL/CLCompatCommandBuffer.cpp",
"src/core/CL/CLCompileContext.cpp",
"src/core/CL/CLHelpers.cpp",
"src/core/CL/CLKernelLibrary.cpp",
+ "src/core/CL/CLMutableCommandBuffer.cpp",
"src/core/CL/CLUtils.cpp",
"src/core/CL/DefaultLWSHeuristics.cpp",
"src/core/CL/ICLKernel.cpp",
@@ -396,6 +393,7 @@ cc_library_static {
"src/core/Validate.cpp",
"src/core/Version.cpp",
"src/core/helpers/SoftmaxHelpers.cpp",
+ "src/core/helpers/Utils.cpp",
"src/core/helpers/WindowHelpers.cpp",
"src/core/utils/ActivationFunctionUtils.cpp",
"src/core/utils/AssemblyUtils.cpp",
@@ -403,6 +401,7 @@ cc_library_static {
"src/core/utils/DataTypeUtils.cpp",
"src/core/utils/FormatUtils.cpp",
"src/core/utils/InterpolationPolicyUtils.cpp",
+ "src/core/utils/Math.cpp",
"src/core/utils/ScaleUtils.cpp",
"src/core/utils/StringUtils.cpp",
"src/core/utils/helpers/fft.cpp",
@@ -485,11 +484,9 @@ cc_library_static {
"src/cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp",
"src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp",
"src/cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp",
- "src/cpu/kernels/cast/generic/neon/bfloat16.cpp",
"src/cpu/kernels/cast/generic/neon/fp16.cpp",
"src/cpu/kernels/crop/generic/neon/fp16.cpp",
"src/cpu/kernels/crop/generic/neon/fp32.cpp",
- "src/cpu/kernels/crop/generic/neon/impl.cpp",
"src/cpu/kernels/crop/generic/neon/integer.cpp",
"src/cpu/kernels/depthwiseconv2d/generic/neon/fp16.cpp",
"src/cpu/kernels/depthwiseconv2d/generic/neon/fp32.cpp",
@@ -497,8 +494,11 @@ cc_library_static {
"src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8.cpp",
"src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8_signed.cpp",
"src/cpu/kernels/directconv2d/nchw/all.cpp",
+ "src/cpu/kernels/directconv2d/nchw/fp16.cpp",
+ "src/cpu/kernels/directconv2d/nhwc/neon/fp16.cpp",
"src/cpu/kernels/directconv2d/nhwc/neon/fp32.cpp",
"src/cpu/kernels/directconv2d/nhwc/neon/impl.cpp",
+ "src/cpu/kernels/directconv2d/nhwc/neon/qasymm8.cpp",
"src/cpu/kernels/elementwise_binary/generic/neon/fp16.cpp",
"src/cpu/kernels/elementwise_binary/generic/neon/fp32.cpp",
"src/cpu/kernels/elementwise_binary/generic/neon/integer.cpp",
@@ -506,7 +506,6 @@ cc_library_static {
"src/cpu/kernels/elementwise_binary/generic/neon/qasymm8_signed.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp",
- "src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp",
@@ -515,11 +514,9 @@ cc_library_static {
"src/cpu/kernels/floor/neon/fp32.cpp",
"src/cpu/kernels/fuse_batch_normalization/generic/fp16.cpp",
"src/cpu/kernels/fuse_batch_normalization/generic/fp32.cpp",
- "src/cpu/kernels/fuse_batch_normalization/generic/impl.cpp",
"src/cpu/kernels/fuse_batch_normalization/nchw/all.cpp",
"src/cpu/kernels/fuse_batch_normalization/nhwc/neon/fp16.cpp",
"src/cpu/kernels/fuse_batch_normalization/nhwc/neon/fp32.cpp",
- "src/cpu/kernels/fuse_batch_normalization/nhwc/neon/impl.cpp",
"src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp",
"src/cpu/kernels/gemm_matrix_add/generic/neon/fp32.cpp",
"src/cpu/kernels/gemm_matrix_add/generic/neon/impl.cpp",
@@ -537,11 +534,9 @@ cc_library_static {
"src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp",
"src/cpu/kernels/l2normlayer/generic/neon/fp16.cpp",
"src/cpu/kernels/l2normlayer/generic/neon/fp32.cpp",
- "src/cpu/kernels/l2normlayer/generic/neon/impl.cpp",
"src/cpu/kernels/lut/generic/neon/u8.cpp",
"src/cpu/kernels/maxunpool/generic/neon/fp16.cpp",
"src/cpu/kernels/maxunpool/generic/neon/fp32.cpp",
- "src/cpu/kernels/maxunpool/generic/neon/impl.cpp",
"src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp",
"src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp",
"src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp",
@@ -555,16 +550,13 @@ cc_library_static {
"src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp",
"src/cpu/kernels/pool3d/neon/fp16.cpp",
"src/cpu/kernels/pool3d/neon/fp32.cpp",
- "src/cpu/kernels/pool3d/neon/impl.cpp",
"src/cpu/kernels/pool3d/neon/qasymm8.cpp",
"src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp",
"src/cpu/kernels/range/generic/neon/fp16.cpp",
"src/cpu/kernels/range/generic/neon/fp32.cpp",
- "src/cpu/kernels/range/generic/neon/impl.cpp",
"src/cpu/kernels/range/generic/neon/integer.cpp",
"src/cpu/kernels/roialign/generic/neon/fp16.cpp",
"src/cpu/kernels/roialign/generic/neon/fp32.cpp",
- "src/cpu/kernels/roialign/generic/neon/impl.cpp",
"src/cpu/kernels/roialign/generic/neon/qasymm8.cpp",
"src/cpu/kernels/roialign/generic/neon/qasymm8_signed.cpp",
"src/cpu/kernels/scale/neon/fp16.cpp",
@@ -573,13 +565,13 @@ cc_library_static {
"src/cpu/kernels/scale/neon/qasymm8_signed.cpp",
"src/cpu/kernels/select/generic/neon/fp16.cpp",
"src/cpu/kernels/select/generic/neon/fp32.cpp",
- "src/cpu/kernels/select/generic/neon/impl.cpp",
"src/cpu/kernels/select/generic/neon/integer.cpp",
"src/cpu/kernels/softmax/generic/neon/fp16.cpp",
"src/cpu/kernels/softmax/generic/neon/fp32.cpp",
"src/cpu/kernels/softmax/generic/neon/impl.cpp",
"src/cpu/kernels/softmax/generic/neon/qasymm8.cpp",
"src/cpu/kernels/softmax/generic/neon/qasymm8_signed.cpp",
+ "src/cpu/kernels/sub/neon/fp16.cpp",
"src/cpu/kernels/sub/neon/qasymm8.cpp",
"src/cpu/kernels/sub/neon/qasymm8_signed.cpp",
"src/cpu/kernels/sub/neon/qsymm16.cpp",
@@ -628,6 +620,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
@@ -647,8 +640,12 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
@@ -657,6 +654,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp",
@@ -666,6 +664,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
@@ -729,6 +728,7 @@ cc_library_static {
"src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.cpp",
"src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp",
"src/gpu/cl/kernels/ClMatMulLowpNativeKernel.cpp",
+ "src/gpu/cl/kernels/ClMatMulLowpNativeMMULKernel.cpp",
"src/gpu/cl/kernels/ClMatMulNativeKernel.cpp",
"src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp",
"src/gpu/cl/kernels/ClMulKernel.cpp",
@@ -756,6 +756,7 @@ cc_library_static {
"src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp",
"src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp",
"src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp",
+ "src/gpu/cl/kernels/helpers/MatMulKernelHelpers.cpp",
"src/gpu/cl/operators/ClActivation.cpp",
"src/gpu/cl/operators/ClAdd.cpp",
"src/gpu/cl/operators/ClCast.cpp",
@@ -1310,6 +1311,7 @@ cc_library_static {
"src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp16fp32fp16_dot_16VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_s8qa_dot_16VL/generic.cpp",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bf029a7e9e15d032c4f6da46f74ccd9f34b0783c..9dd3e2cef7b01bdef701b586dcb119646d6fdc07 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
project(
ArmCompute
- VERSION 32.0.0
+ VERSION 33.0.0
DESCRIPTION
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
LANGUAGES C CXX ASM)
diff --git a/README.md b/README.md
index a8f0def7a18b9ae2a940dcda026142f00c86b9a9..9b06dbeabf145377e4d87bf3a018111f660ef9c9 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@

-# Compute Library 
+# Compute Library 
The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.
@@ -44,7 +44,7 @@ Key Features:
## Documentation
-[](https://arm-software.github.io/ComputeLibrary/latest)
+[](https://arm-software.github.io/ComputeLibrary/latest)
> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
@@ -57,24 +57,24 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
| Platform | Operating System | Release archive (Download) |
| -------------- | ---------------- | -------------------------- |
-| Raspberry Pi 4 | Linux® 32bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-armv7a-neon.tar.gz) |
-| Raspberry Pi 4 | Linux® 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon.tar.gz) |
-| Odroid N2 | Linux® 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| HiKey960 | Linux® 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| Raspberry Pi 4 | Linux® 32bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-armv7a-neon.tar.gz) |
+| Raspberry Pi 4 | Linux® 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-neon.tar.gz) |
+| Odroid N2 | Linux® 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| HiKey960 | Linux® 64bit | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-neon-cl.tar.gz) |
| Architecture | Operating System | Release archive (Download) |
| ------------ | ---------------- | -------------------------- |
-| armv7 | Linux® | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-armv7a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-armv7a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-armv7a-neon-cl.tar.gz) |
-| arm64-v8a | Android™ | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8a | Linux® | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Android™ | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8.2-a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8.2-a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
-| arm64-v8.2-a | Linux® | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8.2-a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8.2-a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.08/arm_compute-v23.08-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
+| armv7 | Linux® | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-armv7a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-armv7a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-armv7a-neon-cl.tar.gz) |
+| arm64-v8a | Android™ | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-android-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-android-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-android-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8a | Linux® | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Android™ | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-android-arm64-v8.2-a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-android-arm64-v8.2-a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
+| arm64-v8.2-a | Linux® | [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8.2-a-neon.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8.2-a-cl.tar.gz) [](https://github.com/ARM-software/ComputeLibrary/releases/download/v23.11/arm_compute-v23.11-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
-Please refer to the following link for more pre-built binaries: [](https://github.com/ARM-software/ComputeLibrary/releases/tag/v23.08)
+Please refer to the following link for more pre-built binaries: [](https://github.com/ARM-software/ComputeLibrary/releases/tag/v23.11)
Pre-build binaries are generated with the following security / good coding practices related flags:
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
diff --git a/SConscript b/SConscript
index 467f84cb55ae335738975332b2859aa56c10be97..099ff706add8f8254f9c68af0a8c07a5998e9eca 100644
--- a/SConscript
+++ b/SConscript
@@ -38,8 +38,8 @@ warn(DeprecatedWarning,
" Link your application only to libarm_compute for core library functionality"
)
-VERSION = "v23.08"
-LIBRARY_VERSION_MAJOR = 32
+VERSION = "v23.11"
+LIBRARY_VERSION_MAJOR = 33
LIBRARY_VERSION_MINOR = 0
LIBRARY_VERSION_PATCH = 0
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
@@ -222,7 +222,7 @@ def resolve_includes(target, source, env):
found = pattern.search(line)
if found:
# Only get the header file name and discard the relative path.
- # E.g. "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h" -> "fp_mixed_precision_helpers.h"
+ # E.g. "src/core/CL/cl_kernels/activation_float_helpers.h" -> "activation_float_helpers.h"
include_file = found.group(1).split('/')[-1]
data = files_dict[include_file].file_contents
updated_file.extend(data)
@@ -387,9 +387,6 @@ if env['opencl'] and env['embed_kernels']:
'src/core/CL/cl_kernels/tile_helpers.h',
'src/core/CL/cl_kernels/types.h',
'src/core/CL/cl_kernels/warp_helpers.h',
- 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h',
- 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h',
- 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/fp_elementwise_op_helpers.h',
]
# Common kernels
@@ -414,9 +411,6 @@ if env['opencl'] and env['embed_kernels']:
'src/core/CL/cl_kernels/common/elementwise_operation_quantized.cl',
'src/core/CL/cl_kernels/common/elementwise_unary.cl',
'src/core/CL/cl_kernels/common/elementwise_unary_quantized.cl',
- 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl',
- 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl',
- 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl',
'src/core/CL/cl_kernels/common/fft_digit_reverse.cl',
'src/core/CL/cl_kernels/common/fft.cl',
'src/core/CL/cl_kernels/common/fft_scale.cl',
@@ -436,6 +430,7 @@ if env['opencl'] and env['embed_kernels']:
'src/core/CL/cl_kernels/common/mat_mul.cl',
'src/core/CL/cl_kernels/common/mat_mul_mmul.cl',
'src/core/CL/cl_kernels/common/mat_mul_quantized.cl',
+ 'src/core/CL/cl_kernels/common/mat_mul_quantized_mmul.cl',
'src/core/CL/cl_kernels/common/mean_stddev_normalization.cl',
'src/core/CL/cl_kernels/common/memset.cl',
'src/core/CL/cl_kernels/common/minmax_layer.cl',
@@ -456,7 +451,6 @@ if env['opencl'] and env['embed_kernels']:
'src/core/CL/cl_kernels/common/select.cl',
'src/core/CL/cl_kernels/common/slice_ops.cl',
'src/core/CL/cl_kernels/common/softmax_layer.cl',
- 'src/core/CL/cl_kernels/common/softmax_layer_quantized.cl',
'src/core/CL/cl_kernels/common/stack_layer.cl',
'src/core/CL/cl_kernels/common/tile.cl',
'src/core/CL/cl_kernels/common/transpose.cl',
diff --git a/arm_compute/Acl.h b/arm_compute/Acl.h
index 1cd45d5756aeddc9102c730c9d19ae9068b91b8f..3e99a00cbe326deaf9cc130a3d3c1f166b381a2b 100644
--- a/arm_compute/Acl.h
+++ b/arm_compute/Acl.h
@@ -25,7 +25,8 @@
#define ARM_COMPUTE_ACL_H_
#ifdef __cplusplus
-extern "C" {
+extern "C"
+{
#endif /* __cplusplus */
/* Core headers */
diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp
index 55e04e876d2b529981a7e72d9c620e565728e6b8..6a9d585c142d1bb03e29964f2064b0e798b46849 100644
--- a/arm_compute/Acl.hpp
+++ b/arm_compute/Acl.hpp
@@ -75,7 +75,7 @@ struct ObjectDeleter
#define OBJECT_DELETER(obj, func) \
template <> \
struct ObjectDeleter \
- \
+ \
{ \
static inline AclStatus Destroy(obj v) \
{ \
@@ -171,7 +171,7 @@ protected:
ObjectBase() = default;
protected:
- std::shared_ptr _object{ nullptr }; /**< Library object */
+ std::shared_ptr _object{nullptr}; /**< Library object */
};
/** Equality operator for library object
@@ -221,8 +221,7 @@ public:
* @param[in] status Status returned
* @param[in] msg Error message to be bound with the exception
*/
- Status(StatusCode status, const std::string &msg)
- : _status(status), _msg(msg)
+ Status(StatusCode status, const std::string &msg) : _status(status), _msg(msg)
{
}
/** Returns an explanatory exception message
@@ -266,7 +265,7 @@ private:
*/
static inline void report_status(StatusCode status, const std::string &msg)
{
- if(status != StatusCode::Success)
+ if (status != StatusCode::Success)
{
throw Status(status, msg);
}
@@ -299,7 +298,8 @@ enum class Target
/**< Available execution modes */
enum class ExecutionMode
{
- FastRerun = AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */
+ FastRerun =
+ AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */
FastStart = AclPreferFastStart, /**< Prefer minimizing startup time */
};
@@ -372,8 +372,7 @@ public:
* @param[in] target Target to create context for
* @param[out] status Status information if requested
*/
- explicit Context(Target target, StatusCode *status = nullptr)
- : Context(target, Options(), status)
+ explicit Context(Target target, StatusCode *status = nullptr) : Context(target, Options(), status)
{
}
/** Constructor
@@ -385,10 +384,11 @@ public:
Context(Target target, const Options &options, StatusCode *status = nullptr)
{
AclContext ctx;
- const auto st = detail::as_enum(AclCreateContext(&ctx, detail::as_cenum(target), &options.copts));
+ const auto st =
+ detail::as_enum(AclCreateContext(&ctx, detail::as_cenum(target), &options.copts));
reset(ctx);
report_status(st, "[Compute Library] Failed to create context");
- if(status)
+ if (status)
{
*status = st;
}
@@ -424,15 +424,13 @@ public:
* As default options, no tuning will be performed, and the number of scheduling units will
* depends on internal device discovery functionality
*/
- Options()
- : opts{ AclTuningModeNone, 0 } {};
+ Options() : opts{AclTuningModeNone, 0} {};
/** Constructor
*
* @param[in] mode Tuning mode to be used
* @param[in] compute_units Number of scheduling units to be used
*/
- Options(TuningMode mode, int32_t compute_units)
- : opts{ detail::as_cenum(mode), compute_units }
+ Options(TuningMode mode, int32_t compute_units) : opts{detail::as_cenum(mode), compute_units}
{
}
@@ -448,8 +446,7 @@ public:
* @param[in] ctx Context to create queue for
* @param[out] status Status information if requested
*/
- explicit Queue(Context &ctx, StatusCode *status = nullptr)
- : Queue(ctx, Options(), status)
+ explicit Queue(Context &ctx, StatusCode *status = nullptr) : Queue(ctx, Options(), status)
{
}
/** Constructor
@@ -466,7 +463,7 @@ public:
const auto st = detail::as_enum(AclCreateQueue(&queue, ctx.get(), &options.opts));
reset(queue);
report_status(st, "[Compute Library] Failed to create queue!");
- if(status)
+ if (status)
{
*status = st;
}
@@ -508,8 +505,7 @@ public:
* @param[in] shape Shape of the tensor
* @param[in] data_type Data type of the tensor
*/
- TensorDescriptor(const std::vector &shape, DataType data_type)
- : _shape(shape), _data_type(data_type)
+ TensorDescriptor(const std::vector &shape, DataType data_type) : _shape(shape), _data_type(data_type)
{
_cdesc.ndims = _shape.size();
_cdesc.shape = _shape.data();
@@ -526,7 +522,7 @@ public:
_cdesc = desc;
_data_type = detail::as_enum(desc.data_type);
_shape.reserve(desc.ndims);
- for(int32_t d = 0; d < desc.ndims; ++d)
+ for (int32_t d = 0; d < desc.ndims; ++d)
{
_shape.emplace_back(desc.shape[d]);
}
@@ -552,9 +548,9 @@ public:
is_same &= _data_type == other._data_type;
is_same &= _shape.size() == other._shape.size();
- if(is_same)
+ if (is_same)
{
- for(uint32_t d = 0; d < _shape.size(); ++d)
+ for (uint32_t d = 0; d < _shape.size(); ++d)
{
is_same &= _shape[d] == other._shape[d];
}
@@ -592,8 +588,7 @@ public:
* @param[in] desc Tensor descriptor to be used
* @param[out] status Status information if requested
*/
- Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr)
- : Tensor(ctx, desc, true, status)
+ Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr) : Tensor(ctx, desc, true, status)
{
}
/** Constructor
@@ -609,7 +604,7 @@ public:
const auto st = detail::as_enum(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate));
reset(tensor);
report_status(st, "[Compute Library] Failed to create tensor!");
- if(status)
+ if (status)
{
*status = st;
}
@@ -646,7 +641,8 @@ public:
*/
StatusCode import(void *handle, ImportType type)
{
- const auto st = detail::as_enum(AclTensorImport(_object.get(), handle, detail::as_cenum(type)));
+ const auto st = detail::as_enum(
+ AclTensorImport(_object.get(), handle, detail::as_cenum(type)));
report_status(st, "[Compute Library] Failed to import external memory to tensor!");
return st;
}
@@ -658,7 +654,7 @@ public:
*/
uint64_t get_size()
{
- uint64_t size{ 0 };
+ uint64_t size{0};
const auto st = detail::as_enum(AclGetTensorSize(_object.get(), &size));
report_status(st, "[Compute Library] Failed to get the size of the tensor");
return size;
@@ -692,13 +688,12 @@ public:
* @param[in] tensor_ Tensor to pack
* @param[in] slot_id_ Slot identification of the tensor in respect with the operator
*/
- PackPair(Tensor *tensor_, int32_t slot_id_)
- : tensor(tensor_), slot_id(slot_id_)
+ PackPair(Tensor *tensor_, int32_t slot_id_) : tensor(tensor_), slot_id(slot_id_)
{
}
- Tensor *tensor{ nullptr }; /**< Tensor object */
- int32_t slot_id{ AclSlotUnknown }; /**< Slot id in respect with the operator */
+ Tensor *tensor{nullptr}; /**< Tensor object */
+ int32_t slot_id{AclSlotUnknown}; /**< Slot id in respect with the operator */
};
public:
@@ -713,7 +708,7 @@ public:
const auto st = detail::as_enum(AclCreateTensorPack(&pack, ctx.get()));
reset(pack);
report_status(st, "[Compute Library] Failure during tensor pack creation");
- if(status)
+ if (status)
{
*status = st;
}
@@ -741,7 +736,7 @@ public:
std::vector slots(size);
std::vector tensors(size);
int i = 0;
- for(auto &p : packed)
+ for (auto &p : packed)
{
slots[i] = p.slot_id;
tensors[i] = AclTensor(p.tensor);
@@ -780,13 +775,17 @@ using ActivationDesc = AclActivationDescriptor;
class Activation : public Operator
{
public:
- Activation(Context &ctx, const TensorDescriptor &src, const TensorDescriptor &dst, const ActivationDesc &desc, StatusCode *status = nullptr)
+ Activation(Context &ctx,
+ const TensorDescriptor &src,
+ const TensorDescriptor &dst,
+ const ActivationDesc &desc,
+ StatusCode *status = nullptr)
{
AclOperator op;
const auto st = detail::as_enum(AclActivation(&op, ctx.get(), src.get(), dst.get(), desc));
reset(op);
report_status(st, "[Compute Library] Failure during Activation operator creation");
- if(status)
+ if (status)
{
*status = st;
}
diff --git a/arm_compute/AclDescriptors.h b/arm_compute/AclDescriptors.h
index a564bd2141b1023bbdccd0e4a5e4afe972213c59..cdaf7c0dc80f45750bf73df5eb3cc0b477c97db7 100644
--- a/arm_compute/AclDescriptors.h
+++ b/arm_compute/AclDescriptors.h
@@ -25,37 +25,38 @@
#define ARM_COMPUTE_ACL_DESCRIPTORS_H_
#ifdef __cplusplus
-extern "C" {
+extern "C"
+{
#endif /** __cplusplus */
-/**< Supported activation types */
-typedef enum
-{
- AclActivationTypeNone = 0, /**< No activation */
- AclIdentity = 1, /**< Identity */
- AclLogistic = 2, /**< Logistic */
- AclTanh = 3, /**< Hyperbolic tangent */
- AclRelu = 4, /**< Rectifier */
- AclBoundedRelu = 5, /**< Upper Bounded Rectifier */
- AclLuBoundedRelu = 6, /**< Lower and Upper Bounded Rectifier */
- AclLeakyRelu = 7, /**< Leaky Rectifier */
- AclSoftRelu = 8, /**< Soft Rectifier */
- AclElu = 9, /**< Exponential Linear Unit */
- AclAbs = 10, /**< Absolute */
- AclSquare = 11, /**< Square */
- AclSqrt = 12, /**< Square root */
- AclLinear = 13, /**< Linear */
- AclHardSwish = 14, /**< Hard-swish */
-} AclActivationType;
+ /**< Supported activation types */
+ typedef enum
+ {
+ AclActivationTypeNone = 0, /**< No activation */
+ AclIdentity = 1, /**< Identity */
+ AclLogistic = 2, /**< Logistic */
+ AclTanh = 3, /**< Hyperbolic tangent */
+ AclRelu = 4, /**< Rectifier */
+ AclBoundedRelu = 5, /**< Upper Bounded Rectifier */
+ AclLuBoundedRelu = 6, /**< Lower and Upper Bounded Rectifier */
+ AclLeakyRelu = 7, /**< Leaky Rectifier */
+ AclSoftRelu = 8, /**< Soft Rectifier */
+ AclElu = 9, /**< Exponential Linear Unit */
+ AclAbs = 10, /**< Absolute */
+ AclSquare = 11, /**< Square */
+ AclSqrt = 12, /**< Square root */
+ AclLinear = 13, /**< Linear */
+ AclHardSwish = 14, /**< Hard-swish */
+ } AclActivationType;
-/**< Activation layer descriptor */
-typedef struct
-{
- AclActivationType type; /**< Activation type */
- float a; /**< Factor &alpha used by some activations */
- float b; /**< Factor &beta used by some activations */
- bool inplace; /**< Hint that src and dst tensors will be the same */
-} AclActivationDescriptor;
+ /**< Activation layer descriptor */
+ typedef struct
+ {
+ AclActivationType type; /**< Activation type */
+ float a; /**< Factor &alpha used by some activations */
+ float b; /**< Factor &beta used by some activations */
+ bool inplace; /**< Hint that src and dst tensors will be the same */
+ } AclActivationDescriptor;
#ifdef __cplusplus
}
#endif /** __cplusplus */
diff --git a/arm_compute/AclEntrypoints.h b/arm_compute/AclEntrypoints.h
index ca3a911f5d4bc52c5f31855155347f83935d8deb..0d4902a3d5002da48010c95faa531c6445a8146d 100644
--- a/arm_compute/AclEntrypoints.h
+++ b/arm_compute/AclEntrypoints.h
@@ -27,10 +27,11 @@
#include "arm_compute/AclTypes.h"
#ifdef __cplusplus
-extern "C" {
+extern "C"
+{
#endif /** __cplusplus */
-/** Create a context object
+ /** Create a context object
*
* Context is responsible for retaining internal information and work as an aggregate service mechanism
*
@@ -46,11 +47,9 @@ extern "C" {
* - @ref AclUnsupportedTarget if the requested target is unsupported
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclCreateContext(AclContext *ctx,
- AclTarget target,
- const AclContextOptions *options);
+ AclStatus AclCreateContext(AclContext *ctx, AclTarget target, const AclContextOptions *options);
-/** Destroy a given context object
+ /** Destroy a given context object
*
* @param[in] ctx A valid context object to destroy
*
@@ -60,9 +59,9 @@ AclStatus AclCreateContext(AclContext *ctx,
* - @ref AclSuccess if functions was completed successfully
* - @ref AclInvalidArgument if the provided context is invalid
*/
-AclStatus AclDestroyContext(AclContext ctx);
+ AclStatus AclDestroyContext(AclContext ctx);
-/** Create an operator queue
+ /** Create an operator queue
*
* Queue is responsible for any scheduling related activities
*
@@ -78,9 +77,9 @@ AclStatus AclDestroyContext(AclContext ctx);
* - @ref AclUnsupportedTarget if the requested target is unsupported
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions *options);
+ AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions *options);
-/** Wait until all elements on the queue have been completed
+ /** Wait until all elements on the queue have been completed
*
* @param[in] queue Queue to wait on completion
*
@@ -91,9 +90,9 @@ AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions
* - @ref AclInvalidArgument if the provided queue is invalid
* - @ref AclRuntimeError on any other runtime related error
*/
-AclStatus AclQueueFinish(AclQueue queue);
+ AclStatus AclQueueFinish(AclQueue queue);
-/** Destroy a given queue object
+ /** Destroy a given queue object
*
* @param[in] queue A valid context object to destroy
*
@@ -103,9 +102,9 @@ AclStatus AclQueueFinish(AclQueue queue);
* - @ref AclSuccess if functions was completed successfully
* - @ref AclInvalidArgument if the provided context is invalid
*/
-AclStatus AclDestroyQueue(AclQueue queue);
+ AclStatus AclDestroyQueue(AclQueue queue);
-/** Create a Tensor object
+ /** Create a Tensor object
*
* Tensor is a generalized matrix construct that can represent up to ND dimensionality (where N = 6 for Compute Library)
* The object holds a backing memory along-side to operate on
@@ -121,9 +120,9 @@ AclStatus AclDestroyQueue(AclQueue queue);
* - @ref AclUnsupportedTarget if the requested target is unsupported
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclCreateTensor(AclTensor *tensor, AclContext ctx, const AclTensorDescriptor *desc, bool allocate);
+ AclStatus AclCreateTensor(AclTensor *tensor, AclContext ctx, const AclTensorDescriptor *desc, bool allocate);
-/** Map a tensor's backing memory to the host
+ /** Map a tensor's backing memory to the host
*
* @param[in] tensor Tensor to be mapped
* @param[in, out] handle A handle to the underlying backing memory
@@ -134,9 +133,9 @@ AclStatus AclCreateTensor(AclTensor *tensor, AclContext ctx, const AclTensorDesc
* - @ref AclSuccess if function was completed successfully
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclMapTensor(AclTensor tensor, void **handle);
+ AclStatus AclMapTensor(AclTensor tensor, void **handle);
-/** Unmap the tensor's backing memory
+ /** Unmap the tensor's backing memory
*
* @param[in] tensor tensor to unmap memory from
* @param[in] handle Backing memory to be unmapped
@@ -147,9 +146,9 @@ AclStatus AclMapTensor(AclTensor tensor, void **handle);
* - @ref AclSuccess if function was completed successfully
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclUnmapTensor(AclTensor tensor, void *handle);
+ AclStatus AclUnmapTensor(AclTensor tensor, void *handle);
-/** Import external memory to a given tensor object
+ /** Import external memory to a given tensor object
*
* @param[in, out] tensor Tensor to import memory to
* @param[in] handle Backing memory to be imported
@@ -159,9 +158,9 @@ AclStatus AclUnmapTensor(AclTensor tensor, void *handle);
* - @ref AclSuccess if function was completed successfully
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclTensorImport(AclTensor tensor, void *handle, AclImportMemoryType type);
+ AclStatus AclTensorImport(AclTensor tensor, void *handle, AclImportMemoryType type);
-/** Destroy a given tensor object
+ /** Destroy a given tensor object
*
* @param[in,out] tensor A valid tensor object to be destroyed
*
@@ -171,9 +170,9 @@ AclStatus AclTensorImport(AclTensor tensor, void *handle, AclImportMemoryType ty
* - @ref AclSuccess if function was completed successfully
* - @ref AclInvalidArgument if the provided tensor is invalid
*/
-AclStatus AclDestroyTensor(AclTensor tensor);
+ AclStatus AclDestroyTensor(AclTensor tensor);
-/** Creates a tensor pack
+ /** Creates a tensor pack
*
* Tensor packs are used to create a collection of tensors that can be passed around for operator execution
*
@@ -187,9 +186,9 @@ AclStatus AclDestroyTensor(AclTensor tensor);
* - @ref AclOutOfMemory if there was a failure allocating memory resources
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclCreateTensorPack(AclTensorPack *pack, AclContext ctx);
+ AclStatus AclCreateTensorPack(AclTensorPack *pack, AclContext ctx);
-/** Add a tensor to a tensor pack
+ /** Add a tensor to a tensor pack
*
* @param[in,out] pack Pack to append a tensor to
* @param[in] tensor Tensor to pack
@@ -202,9 +201,9 @@ AclStatus AclCreateTensorPack(AclTensorPack *pack, AclContext ctx);
* - @ref AclOutOfMemory if there was a failure allocating memory resources
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclPackTensor(AclTensorPack pack, AclTensor tensor, int32_t slot_id);
+ AclStatus AclPackTensor(AclTensorPack pack, AclTensor tensor, int32_t slot_id);
-/** A list of tensors to a tensor pack
+ /** A list of tensors to a tensor pack
*
* @param[in,out] pack Pack to append the tensors to
* @param[in] tensors Tensors to append to the pack
@@ -218,9 +217,9 @@ AclStatus AclPackTensor(AclTensorPack pack, AclTensor tensor, int32_t slot_id);
* - @ref AclOutOfMemory if there was a failure allocating memory resources
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclPackTensors(AclTensorPack pack, AclTensor *tensors, int32_t *slot_ids, size_t num_tensors);
+ AclStatus AclPackTensors(AclTensorPack pack, AclTensor *tensors, int32_t *slot_ids, size_t num_tensors);
-/** Destroy a given tensor pack object
+ /** Destroy a given tensor pack object
*
* @param[in,out] pack A valid tensor pack object to destroy
*
@@ -230,9 +229,9 @@ AclStatus AclPackTensors(AclTensorPack pack, AclTensor *tensors, int32_t *slot_i
* - @ref AclSuccess if functions was completed successfully
* - @ref AclInvalidArgument if the provided context is invalid
*/
-AclStatus AclDestroyTensorPack(AclTensorPack pack);
+ AclStatus AclDestroyTensorPack(AclTensorPack pack);
-/** Eager execution of a given operator on a list of inputs and outputs
+ /** Eager execution of a given operator on a list of inputs and outputs
*
* @param[in] op Operator to execute
* @param[in] queue Queue to schedule the operator on
@@ -247,9 +246,9 @@ AclStatus AclDestroyTensorPack(AclTensorPack pack);
* - @ref AclInvalidArgument if a given argument is invalid
* - @ref AclRuntimeError on any other runtime related error
*/
-AclStatus AclRunOperator(AclOperator op, AclQueue queue, AclTensorPack tensors);
+ AclStatus AclRunOperator(AclOperator op, AclQueue queue, AclTensorPack tensors);
-/** Destroy a given operator object
+ /** Destroy a given operator object
*
* @param[in,out] op A valid operator object to destroy
*
@@ -259,7 +258,7 @@ AclStatus AclRunOperator(AclOperator op, AclQueue queue, AclTensorPack tensors);
* - @ref AclSuccess if functions was completed successfully
* - @ref AclInvalidArgument if the provided context is invalid
*/
-AclStatus AclDestroyOperator(AclOperator op);
+ AclStatus AclDestroyOperator(AclOperator op);
#ifdef __cplusplus
}
#endif /* __cplusplus */
diff --git a/arm_compute/AclOpenClExt.h b/arm_compute/AclOpenClExt.h
index ef80fd24434d9bb97e3a6c86a5cd7bc7b497c464..28e918d371d6f6c50ee2d0993f54bacefc60f065 100644
--- a/arm_compute/AclOpenClExt.h
+++ b/arm_compute/AclOpenClExt.h
@@ -37,10 +37,11 @@
#pragma GCC diagnostic pop
#ifdef __cplusplus
-extern "C" {
+extern "C"
+{
#endif /* __cplusplus */
-/** Extract the underlying OpenCL context used by a given Compute Library context object
+ /** Extract the underlying OpenCL context used by a given Compute Library context object
*
* @note @ref AclContext should be of an OpenCL backend target
*
@@ -49,9 +50,9 @@ extern "C" {
*
* @return Status code
*/
-AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context);
+ AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context);
-/** Extract the underlying OpenCL device id used by a given Compute Library context object
+ /** Extract the underlying OpenCL device id used by a given Compute Library context object
*
* @note @ref AclContext should be of an OpenCL backend target
*
@@ -60,9 +61,9 @@ AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context);
*
* @return Status code
*/
-AclStatus AclGetClDevice(AclContext ctx, cl_device_id *opencl_device);
+ AclStatus AclGetClDevice(AclContext ctx, cl_device_id *opencl_device);
-/** Set the underlying OpenCL context to be used by a given Compute Library context object
+ /** Set the underlying OpenCL context to be used by a given Compute Library context object
*
* @note @ref AclContext should be of an OpenCL backend target
*
@@ -71,9 +72,9 @@ AclStatus AclGetClDevice(AclContext ctx, cl_device_id *opencl_device);
*
* @return Status code
*/
-AclStatus AclSetClContext(AclContext ctx, cl_context opencl_context);
+ AclStatus AclSetClContext(AclContext ctx, cl_context opencl_context);
-/** Extract the underlying OpenCL queue used by a given Compute Library queue object
+ /** Extract the underlying OpenCL queue used by a given Compute Library queue object
*
* @note @ref AclQueue should be of an OpenCL backend target
* @note @ref AclQueue refcount should be 0, meaning not used by other objects
@@ -83,9 +84,9 @@ AclStatus AclSetClContext(AclContext ctx, cl_context opencl_context);
*
* @return Status code
*/
-AclStatus AclGetClQueue(AclQueue queue, cl_command_queue *opencl_queue);
+ AclStatus AclGetClQueue(AclQueue queue, cl_command_queue *opencl_queue);
-/** Set the underlying OpenCL queue to be used by a given Compute Library queue object
+ /** Set the underlying OpenCL queue to be used by a given Compute Library queue object
*
* @note @ref AclQueue should be of an OpenCL backend target
* @note opecl_queue needs to be created from the same context that the AclContext that the queue will use
@@ -95,16 +96,16 @@ AclStatus AclGetClQueue(AclQueue queue, cl_command_queue *opencl_queue);
*
* @return Status code
*/
-AclStatus AclSetClQueue(AclQueue queue, cl_command_queue opencl_queue);
+ AclStatus AclSetClQueue(AclQueue queue, cl_command_queue opencl_queue);
-/** Extract the underlying OpenCL memory object by a given Compute Library tensor object
+ /** Extract the underlying OpenCL memory object by a given Compute Library tensor object
*
* @param[in] tensor A valid non-zero tensor
* @param[out] opencl_mem Underlyig OpenCL memory object
*
* @return Status code
*/
-AclStatus AclGetClMem(AclTensor tensor, cl_mem *opencl_mem);
+ AclStatus AclGetClMem(AclTensor tensor, cl_mem *opencl_mem);
#ifdef __cplusplus
}
diff --git a/arm_compute/AclOperators.h b/arm_compute/AclOperators.h
index bfdd7b1b9b1dad5f753c23806b7e14d992e06998..4f6f46e9c8630cd06f19d247e0530bcd05a4a6a8 100644
--- a/arm_compute/AclOperators.h
+++ b/arm_compute/AclOperators.h
@@ -31,10 +31,11 @@
#define ARM_COMPUTE_VALIDATE_OPERATOR_SUPPORT ((AclOperator *)(size_t)-1)
#ifdef __cplusplus
-extern "C" {
+extern "C"
+{
#endif /** __cplusplus */
-/** Create an activation operator
+ /** Create an activation operator
*
* Applies an activation function to a given tensor .
* Compute Library supports a wide list of activation functions @ref AclActivationType.
@@ -75,11 +76,11 @@ extern "C" {
* - @ref AclUnsupportedTarget if operator for the requested target is unsupported
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclActivation(AclOperator *op,
- AclContext ctx,
- const AclTensorDescriptor *src,
- const AclTensorDescriptor *dst,
- const AclActivationDescriptor info);
+ AclStatus AclActivation(AclOperator *op,
+ AclContext ctx,
+ const AclTensorDescriptor *src,
+ const AclTensorDescriptor *dst,
+ const AclActivationDescriptor info);
#ifdef __cplusplus
}
#endif /** __cplusplus */
diff --git a/arm_compute/AclTypes.h b/arm_compute/AclTypes.h
index 368be1292b197d2d3a6fba73a4931f2894aaa732..9a002ad22c8cd1bfd871fe43fc93a4ffb5cddc80 100644
--- a/arm_compute/AclTypes.h
+++ b/arm_compute/AclTypes.h
@@ -28,185 +28,186 @@
#include
#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/**< Opaque Context object */
-typedef struct AclContext_ *AclContext;
-/**< Opaque Queue object */
-typedef struct AclQueue_ *AclQueue;
-/**< Opaque Tensor object */
-typedef struct AclTensor_ *AclTensor;
-/**< Opaque Tensor pack object */
-typedef struct AclTensorPack_ *AclTensorPack;
-/**< Opaque Operator object */
-typedef struct AclOperator_ *AclOperator;
-
-// Capabilities bitfield (Note: if multiple are enabled ComputeLibrary will pick the best possible)
-typedef uint64_t AclTargetCapabilities;
-
-/**< Error codes returned by the public entry-points */
-typedef enum AclStatus : int32_t
-{
- AclSuccess = 0, /**< Call succeeded, leading to valid state for all involved objects/data */
- AclRuntimeError = 1, /**< Call failed during execution */
- AclOutOfMemory = 2, /**< Call failed due to failure to allocate resources */
- AclUnimplemented = 3, /**< Call failed as requested capability is not implemented */
- AclUnsupportedTarget = 4, /**< Call failed as an invalid backend was requested */
- AclInvalidTarget = 5, /**< Call failed as invalid argument was passed */
- AclInvalidArgument = 6, /**< Call failed as invalid argument was passed */
- AclUnsupportedConfig = 7, /**< Call failed as configuration is unsupported */
- AclInvalidObjectState = 8, /**< Call failed as an object has invalid state */
-} AclStatus;
-
-/**< Supported CPU targets */
-typedef enum AclTarget
-{
- AclCpu = 0, /**< Cpu target that uses SIMD extensions */
- AclGpuOcl = 1, /**< OpenCL target for GPU */
-} AclTarget;
-
-/** Execution mode types */
-typedef enum AclExecutionMode
-{
- AclPreferFastRerun = 0, /**< Prioritize performance when multiple iterations are performed */
- AclPreferFastStart = 1, /**< Prioritize performance when a single iterations is expected to be performed */
-} AclExecutionMode;
-
-/** Available CPU capabilities */
-typedef enum AclCpuCapabilities
+extern "C"
{
- AclCpuCapabilitiesAuto = 0, /**< Automatic discovery of capabilities */
-
- AclCpuCapabilitiesNeon = (1 << 0), /**< Enable NEON optimized paths */
- AclCpuCapabilitiesSve = (1 << 1), /**< Enable SVE optimized paths */
- AclCpuCapabilitiesSve2 = (1 << 2), /**< Enable SVE2 optimized paths */
- // Reserve 3, 4, 5, 6
-
- AclCpuCapabilitiesFp16 = (1 << 7), /**< Enable float16 data-type support */
- AclCpuCapabilitiesBf16 = (1 << 8), /**< Enable bfloat16 data-type support */
- // Reserve 9, 10, 11, 12
-
- AclCpuCapabilitiesDot = (1 << 13), /**< Enable paths that use the udot/sdot instructions */
- AclCpuCapabilitiesMmlaInt8 = (1 << 14), /**< Enable paths that use the mmla integer instructions */
- AclCpuCapabilitiesMmlaFp = (1 << 15), /**< Enable paths that use the mmla float instructions */
-
- AclCpuCapabilitiesAll = ~0 /**< Enable all paths */
-} AclCpuCapabilities;
+#endif /* __cplusplus */
-/**< Allocator interface that can be passed to a context */
-typedef struct AclAllocator
-{
- /** Allocate a block of size bytes of memory.
+ /**< Opaque Context object */
+ typedef struct AclContext_ *AclContext;
+ /**< Opaque Queue object */
+ typedef struct AclQueue_ *AclQueue;
+ /**< Opaque Tensor object */
+ typedef struct AclTensor_ *AclTensor;
+ /**< Opaque Tensor pack object */
+ typedef struct AclTensorPack_ *AclTensorPack;
+ /**< Opaque Operator object */
+ typedef struct AclOperator_ *AclOperator;
+
+ // Capabilities bitfield (Note: if multiple are enabled ComputeLibrary will pick the best possible)
+ typedef uint64_t AclTargetCapabilities;
+
+ /**< Error codes returned by the public entry-points */
+ typedef enum AclStatus : int32_t
+ {
+ AclSuccess = 0, /**< Call succeeded, leading to valid state for all involved objects/data */
+ AclRuntimeError = 1, /**< Call failed during execution */
+ AclOutOfMemory = 2, /**< Call failed due to failure to allocate resources */
+ AclUnimplemented = 3, /**< Call failed as requested capability is not implemented */
+ AclUnsupportedTarget = 4, /**< Call failed as an invalid backend was requested */
+ AclInvalidTarget = 5, /**< Call failed as invalid argument was passed */
+ AclInvalidArgument = 6, /**< Call failed as invalid argument was passed */
+ AclUnsupportedConfig = 7, /**< Call failed as configuration is unsupported */
+ AclInvalidObjectState = 8, /**< Call failed as an object has invalid state */
+ } AclStatus;
+
+ /**< Supported CPU targets */
+ typedef enum AclTarget
+ {
+ AclCpu = 0, /**< Cpu target that uses SIMD extensions */
+ AclGpuOcl = 1, /**< OpenCL target for GPU */
+ } AclTarget;
+
+ /** Execution mode types */
+ typedef enum AclExecutionMode
+ {
+ AclPreferFastRerun = 0, /**< Prioritize performance when multiple iterations are performed */
+ AclPreferFastStart = 1, /**< Prioritize performance when a single iterations is expected to be performed */
+ } AclExecutionMode;
+
+ /** Available CPU capabilities */
+ typedef enum AclCpuCapabilities
+ {
+ AclCpuCapabilitiesAuto = 0, /**< Automatic discovery of capabilities */
+
+ AclCpuCapabilitiesNeon = (1 << 0), /**< Enable NEON optimized paths */
+ AclCpuCapabilitiesSve = (1 << 1), /**< Enable SVE optimized paths */
+ AclCpuCapabilitiesSve2 = (1 << 2), /**< Enable SVE2 optimized paths */
+ // Reserve 3, 4, 5, 6
+
+ AclCpuCapabilitiesFp16 = (1 << 7), /**< Enable float16 data-type support */
+ AclCpuCapabilitiesBf16 = (1 << 8), /**< Enable bfloat16 data-type support */
+ // Reserve 9, 10, 11, 12
+
+ AclCpuCapabilitiesDot = (1 << 13), /**< Enable paths that use the udot/sdot instructions */
+ AclCpuCapabilitiesMmlaInt8 = (1 << 14), /**< Enable paths that use the mmla integer instructions */
+ AclCpuCapabilitiesMmlaFp = (1 << 15), /**< Enable paths that use the mmla float instructions */
+
+ AclCpuCapabilitiesAll = ~0 /**< Enable all paths */
+ } AclCpuCapabilities;
+
+ /**< Allocator interface that can be passed to a context */
+ typedef struct AclAllocator
+ {
+ /** Allocate a block of size bytes of memory.
*
* @param[in] user_data User provided data that can be used by the allocator
* @param[in] size Size of the allocation
*
* @return A pointer to the allocated block if successfull else NULL
*/
- void *(*alloc)(void *user_data, size_t size);
- /** Release a block of size bytes of memory.
+ void *(*alloc)(void *user_data, size_t size);
+ /** Release a block of size bytes of memory.
*
* @param[in] user_data User provided data that can be used by the allocator
* @param[in] size Size of the allocation
*/
- void (*free)(void *user_data, void *ptr);
- /** Allocate a block of size bytes of memory.
+ void (*free)(void *user_data, void *ptr);
+ /** Allocate a block of size bytes of memory.
*
* @param[in] user_data User provided data that can be used by the allocator
* @param[in] size Size of the allocation
*
* @return A pointer to the allocated block if successfull else NULL
*/
- void *(*aligned_alloc)(void *user_data, size_t size, size_t alignment);
- /** Allocate a block of size bytes of memory.
+ void *(*aligned_alloc)(void *user_data, size_t size, size_t alignment);
+ /** Allocate a block of size bytes of memory.
*
* @param[in] user_data User provided data that can be used by the allocator
* @param[in] size Size of the allocation
*/
- void (*aligned_free)(void *user_data, void *ptr);
-
- /**< User provided information */
- void *user_data;
-} AclAllocator;
-
-/**< Context options */
-typedef struct AclContextOptions
-{
- AclExecutionMode mode; /**< Execution mode to use */
- AclTargetCapabilities capabilities; /**< Target capabilities */
- bool enable_fast_math; /**< Allow precision loss */
- const char *kernel_config_file; /**< Kernel cofiguration file */
- int32_t max_compute_units; /**< Max compute units that can be used by a queue created from the context.
+ void (*aligned_free)(void *user_data, void *ptr);
+
+ /**< User provided information */
+ void *user_data;
+ } AclAllocator;
+
+ /**< Context options */
+ typedef struct AclContextOptions
+ {
+ AclExecutionMode mode; /**< Execution mode to use */
+ AclTargetCapabilities capabilities; /**< Target capabilities */
+ bool enable_fast_math; /**< Allow precision loss */
+ const char *kernel_config_file; /**< Kernel cofiguration file */
+ int32_t max_compute_units; /**< Max compute units that can be used by a queue created from the context.
If <=0 the system will use the hw concurency insted */
- AclAllocator *allocator; /**< Allocator to be used by all the memory internally */
-} AclContextOptions;
-
-/**< Supported tuning modes */
-typedef enum
-{
- AclTuningModeNone = 0, /**< No tuning */
- AclRapid = 1, /**< Fast tuning mode, testing a small portion of the tuning space */
- AclNormal = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */
- AclExhaustive = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */
-} AclTuningMode;
-
-/**< Queue options */
-typedef struct
-{
- AclTuningMode mode; /**< Tuning mode */
- int32_t compute_units; /**< Compute Units that the queue will deploy */
-} AclQueueOptions;
-
-/**< Supported data types */
-typedef enum AclDataType
-{
- AclDataTypeUnknown = 0, /**< Unknown data type */
- AclUInt8 = 1, /**< 8-bit unsigned integer */
- AclInt8 = 2, /**< 8-bit signed integer */
- AclUInt16 = 3, /**< 16-bit unsigned integer */
- AclInt16 = 4, /**< 16-bit signed integer */
- AclUint32 = 5, /**< 32-bit unsigned integer */
- AclInt32 = 6, /**< 32-bit signed integer */
- AclFloat16 = 7, /**< 16-bit floating point */
- AclBFloat16 = 8, /**< 16-bit brain floating point */
- AclFloat32 = 9, /**< 32-bit floating point */
-} AclDataType;
-
-/**< Supported data layouts for operations */
-typedef enum AclDataLayout
-{
- AclDataLayoutUnknown = 0, /**< Unknown data layout */
- AclNhwc = 1, /**< Native, performant, Compute Library data layout */
- AclNchw = 2, /**< Data layout where width is the fastest changing dimension */
-} AclDataLayout;
-
-/** Type of memory to be imported */
-typedef enum AclImportMemoryType
-{
- AclHostPtr = 0 /**< Host allocated memory */
-} AclImportMemoryType;
-
-/**< Tensor Descriptor */
-typedef struct AclTensorDescriptor
-{
- int32_t ndims; /**< Number or dimensions */
- int32_t *shape; /**< Tensor Shape */
- AclDataType data_type; /**< Tensor Data type */
- int64_t *strides; /**< Strides on each dimension. Linear memory is assumed if nullptr */
- int64_t boffset; /**< Offset in terms of bytes for the first element */
-} AclTensorDescriptor;
-
-/**< Slot type of a tensor */
-typedef enum
-{
- AclSlotUnknown = -1,
- AclSrc = 0,
- AclSrc0 = 0,
- AclSrc1 = 1,
- AclDst = 30,
- AclSrcVec = 256,
-} AclTensorSlot;
+ AclAllocator *allocator; /**< Allocator to be used by all the memory internally */
+ } AclContextOptions;
+
+ /**< Supported tuning modes */
+ typedef enum
+ {
+ AclTuningModeNone = 0, /**< No tuning */
+ AclRapid = 1, /**< Fast tuning mode, testing a small portion of the tuning space */
+ AclNormal = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */
+ AclExhaustive = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */
+ } AclTuningMode;
+
+ /**< Queue options */
+ typedef struct
+ {
+ AclTuningMode mode; /**< Tuning mode */
+ int32_t compute_units; /**< Compute Units that the queue will deploy */
+ } AclQueueOptions;
+
+ /**< Supported data types */
+ typedef enum AclDataType
+ {
+ AclDataTypeUnknown = 0, /**< Unknown data type */
+ AclUInt8 = 1, /**< 8-bit unsigned integer */
+ AclInt8 = 2, /**< 8-bit signed integer */
+ AclUInt16 = 3, /**< 16-bit unsigned integer */
+ AclInt16 = 4, /**< 16-bit signed integer */
+ AclUint32 = 5, /**< 32-bit unsigned integer */
+ AclInt32 = 6, /**< 32-bit signed integer */
+ AclFloat16 = 7, /**< 16-bit floating point */
+ AclBFloat16 = 8, /**< 16-bit brain floating point */
+ AclFloat32 = 9, /**< 32-bit floating point */
+ } AclDataType;
+
+ /**< Supported data layouts for operations */
+ typedef enum AclDataLayout
+ {
+ AclDataLayoutUnknown = 0, /**< Unknown data layout */
+ AclNhwc = 1, /**< Native, performant, Compute Library data layout */
+ AclNchw = 2, /**< Data layout where width is the fastest changing dimension */
+ } AclDataLayout;
+
+ /** Type of memory to be imported */
+ typedef enum AclImportMemoryType
+ {
+ AclHostPtr = 0 /**< Host allocated memory */
+ } AclImportMemoryType;
+
+ /**< Tensor Descriptor */
+ typedef struct AclTensorDescriptor
+ {
+ int32_t ndims; /**< Number or dimensions */
+ int32_t *shape; /**< Tensor Shape */
+ AclDataType data_type; /**< Tensor Data type */
+ int64_t *strides; /**< Strides on each dimension. Linear memory is assumed if nullptr */
+ int64_t boffset; /**< Offset in terms of bytes for the first element */
+ } AclTensorDescriptor;
+
+ /**< Slot type of a tensor */
+ typedef enum
+ {
+ AclSlotUnknown = -1,
+ AclSrc = 0,
+ AclSrc0 = 0,
+ AclSrc1 = 1,
+ AclDst = 30,
+ AclSrcVec = 256,
+ } AclTensorSlot;
#ifdef __cplusplus
}
diff --git a/arm_compute/AclUtils.h b/arm_compute/AclUtils.h
index ef5fa427080399e64f3e385ce894676cc1dcec9e..61a93e606040a45794e3bb322720da1126f478a7 100644
--- a/arm_compute/AclUtils.h
+++ b/arm_compute/AclUtils.h
@@ -27,10 +27,11 @@
#include "arm_compute/AclTypes.h"
#ifdef __cplusplus
-extern "C" {
+extern "C"
+{
#endif /** __cplusplus */
-/** Get the size of the existing tensor in byte
+ /** Get the size of the existing tensor in byte
*
* @note The size isn't based on allocated memory, but based on information in its descriptor (dimensions, data type, etc.).
*
@@ -42,9 +43,9 @@ extern "C" {
* - @ref AclSuccess if function was completed successfully
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclGetTensorSize(AclTensor tensor, uint64_t *size);
+ AclStatus AclGetTensorSize(AclTensor tensor, uint64_t *size);
-/** Get the descriptor of this tensor
+ /** Get the descriptor of this tensor
*
* @param[in] tensor A tensor in interest
* @param[out] desc The descriptor of the tensor
@@ -54,7 +55,7 @@ AclStatus AclGetTensorSize(AclTensor tensor, uint64_t *size);
* - @ref AclSuccess if function was completed successfully
* - @ref AclInvalidArgument if a given argument is invalid
*/
-AclStatus AclGetTensorDescriptor(AclTensor tensor, AclTensorDescriptor *desc);
+ AclStatus AclGetTensorDescriptor(AclTensor tensor, AclTensorDescriptor *desc);
#ifdef __cplusplus
}
diff --git a/arm_compute/AclVersion.h b/arm_compute/AclVersion.h
index 0b05a5e7dc7e666d6bb1ac8b18556c2ac0f680dc..6eed13b924a8fb5c1d7da88cc78c4b22efd53c94 100644
--- a/arm_compute/AclVersion.h
+++ b/arm_compute/AclVersion.h
@@ -25,17 +25,18 @@
#define ARM_COMPUTE_ACL_VERSION_H_
#ifdef __cplusplus
-extern "C" {
+extern "C"
+{
#endif /* __cplusplus */
-/** Semantic versioning information */
-typedef struct AclVersion
-{
- int major; /**< Major version, is increased on API incompatible changes */
- int minor; /**< Minor version, is increased on adding back-ward compatible functionality */
- int patch; /**< Patch version, is increased when doing backward compatible fixes */
- const char *build_info; /**< Build related information */
-} AclVersion;
+ /** Semantic versioning information */
+ typedef struct AclVersion
+ {
+ int major; /**< Major version, is increased on API incompatible changes */
+ int minor; /**< Minor version, is increased on adding back-ward compatible functionality */
+ int patch; /**< Patch version, is increased when doing backward compatible fixes */
+ const char *build_info; /**< Build related information */
+ } AclVersion;
/**< Major version, is increased on API incompatible changes */
#define ARM_COMPUTE_LIBRARY_VERSION_MAJOR 0
@@ -44,11 +45,11 @@ typedef struct AclVersion
/**< Patch version, is increased when doing backward compatible fixes */
#define ARM_COMPUTE_LIBRARY_VERSION_PATCH 0
-/** Get library's version meta-data
+ /** Get library's version meta-data
*
* @return Version information
*/
-const AclVersion *AclVersionInfo();
+ const AclVersion *AclVersionInfo();
#ifdef __cplusplus
}
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h
index 60e0f95f8322b404d764561e8baf890ca0a15f2a..dcd3b456701b37c6309f357991b4127602453c2f 100644
--- a/arm_compute/core/CL/CLCompileContext.h
+++ b/arm_compute/core/CL/CLCompileContext.h
@@ -250,8 +250,12 @@ public:
*
* @return The created kernel.
*/
- Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
- const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const;
+ Kernel create_kernel(const std::string &kernel_name,
+ const std::string &program_name,
+ const std::string &program_source,
+ const std::string &kernel_path,
+ const StringSet &build_options_set,
+ bool is_binary) const;
/** Clear the library's cache of binary programs
*/
@@ -323,7 +327,8 @@ private:
* @param[in] program_source Source of the program.
* @param[in] is_binary Flag to indicate if the program source is binary.
*/
- const Program &load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const;
+ const Program &
+ load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const;
/** Generates the build options given a string of user defined ones
*
@@ -343,11 +348,11 @@ private:
*/
std::string stringify_set(const StringSet &s, const std::string &kernel_path) const;
- cl::Context _context; /**< Underlying CL context. */
- CLDevice _device; /**< Underlying CL device. */
+ cl::Context _context; /**< Underlying CL context. */
+ CLDevice _device; /**< Underlying CL device. */
mutable std::map _programs_map; /**< Map with all already loaded program data. */
mutable std::map _built_programs_map; /**< Map with all already built program data. */
- bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/
+ bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */
diff --git a/arm_compute/core/CL/CLDevice.h b/arm_compute/core/CL/CLDevice.h
index 5e0f86e6d9fe8d4862cf234cd6509adc8f463c5c..ded6bb84938055b53647899429e82b7796f3b342 100644
--- a/arm_compute/core/CL/CLDevice.h
+++ b/arm_compute/core/CL/CLDevice.h
@@ -44,8 +44,7 @@ class CLDevice : public IDevice
{
public:
/** Default Constructor */
- CLDevice()
- : _device(cl::Device()), _options()
+ CLDevice() : _device(cl::Device()), _options()
{
}
@@ -53,8 +52,7 @@ public:
*
* @param[in] cl_device OpenCL device
*/
- CLDevice(const cl::Device &cl_device)
- : _device(), _options()
+ CLDevice(const cl::Device &cl_device) : _device(), _options()
{
_device = cl_device;
@@ -66,13 +64,13 @@ public:
std::string extensions = _device.getInfo();
std::istringstream iss(extensions);
- for(std::string s; iss >> s;)
+ for (std::string s; iss >> s;)
{
_options.extensions.insert(s);
}
// SW workaround for G76
- if(_options.gpu_target == GPUTarget::G76)
+ if (_options.gpu_target == GPUTarget::G76)
{
_options.extensions.insert("cl_arm_integer_dot_product_int8");
}
@@ -153,15 +151,15 @@ public:
*/
std::tuple is_non_uniform_workgroup_supported() const
{
- if(version() == CLVersion::CL30 && get_cl_non_uniform_work_group_supported(_device))
+ if (version() == CLVersion::CL30 && get_cl_non_uniform_work_group_supported(_device))
{
return {true, " -cl-std=CL3.0 "};
}
- else if(version() == CLVersion::CL20)
+ else if (version() == CLVersion::CL20)
{
return {true, " -cl-std=CL2.0 "};
}
- else if(supported("cl_arm_non_uniform_work_group_size"))
+ else if (supported("cl_arm_non_uniform_work_group_size"))
{
return {true, " -cl-arm-non-uniform-work-group-size "};
}
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index a162b1c1182cf6623f60232160ee4540e09ce096..1a639e47f92cd35c19d153afcd1a0d57bfd0e36b 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2022 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -179,7 +179,9 @@ bool dot8_acc_supported(const cl::Device &device);
*
* @return True if the configuration is supported
*/
-bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout);
+bool cl_winograd_convolution_layer_supported(const Size2D &output_tile,
+ const Size2D &kernel_size,
+ DataLayout data_layout);
/** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors
*
@@ -215,7 +217,9 @@ bool image2d_from_buffer_supported(const cl::Device &device);
*
* @return An opencl kernel
*/
-cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set &build_opts = std::set());
+cl::Kernel create_kernel(const CLCompileContext &ctx,
+ const std::string &kernel_name,
+ const std::set &build_opts = std::set());
/** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size.
* If input width is smaller than 128 we can use fewer threads than 8.
@@ -267,5 +271,22 @@ void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list(max_num_values), _mapping(nullptr)
+ explicit ICLArray(size_t max_num_values) : IArray(max_num_values), _mapping(nullptr)
{
}
@@ -125,5 +124,5 @@ using ICLInt16Array = ICLArray;
using ICLInt32Array = ICLArray;
/** Interface for OpenCL Array of floats. */
using ICLFloatArray = ICLArray;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLARRAY_H*/
diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h
index 78d3757e593af9e5d38faa3e646847a429518dc0..8de5423762090b81dd26c5861f6c06eda69c0121 100644
--- a/arm_compute/core/CL/ICLTensor.h
+++ b/arm_compute/core/CL/ICLTensor.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_ICLTENSOR_H
#define ARM_COMPUTE_ICLTENSOR_H
-#include "arm_compute/core/ITensor.h"
-
#include "arm_compute/core/CL/CLTypes.h"
+#include "arm_compute/core/ITensor.h"
#include
@@ -34,7 +33,7 @@ namespace cl
{
class Buffer;
class CommandQueue;
-}
+} // namespace cl
namespace arm_compute
{
@@ -113,5 +112,5 @@ private:
};
using ICLImage = ICLTensor;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLTENSOR_H */
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index 1e1f5291d9690e2c7dbb9d4d8f1b570ba98ae76d..a5c4e39df2d9f7e42238c05e7ceb33afc425de2d 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -31,8 +31,8 @@
#ifndef ARM_COMPUTE_NO_EXCEPTIONS
#define CL_HPP_ENABLE_EXCEPTIONS
#endif // ARM_COMPUTE_NO_EXCEPTIONS
-#define CL_TARGET_OPENCL_VERSION 300
-#define CL_HPP_TARGET_OPENCL_VERSION 110
+#define CL_TARGET_OPENCL_VERSION 300
+#define CL_HPP_TARGET_OPENCL_VERSION 110
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
@@ -40,7 +40,7 @@
#pragma GCC diagnostic ignored "-Wunused-parameter"
#if defined(__GNUG__) && __GNUG__ >= 8
#pragma GCC diagnostic ignored "-Wcatch-value"
-#endif // defined(__GNUG__) && __GNUG__ >= 8
+#endif // defined(__GNUG__) && __GNUG__ >= 8
#include // include new hpp header instead of cl2.hpp
#pragma GCC diagnostic pop
@@ -88,8 +88,7 @@ public:
*/
bool load_default();
-#define DECLARE_FUNCTION_PTR(func_name) \
- std::function func_name##_ptr = nullptr
+#define DECLARE_FUNCTION_PTR(func_name) std::function func_name##_ptr = nullptr
DECLARE_FUNCTION_PTR(clCreateContext);
DECLARE_FUNCTION_PTR(clCreateContextFromType);
@@ -141,6 +140,16 @@ public:
DECLARE_FUNCTION_PTR(clCreateImage);
DECLARE_FUNCTION_PTR(clSetKernelExecInfo);
+ // Command buffer and mutable dispatch command buffer extensions
+ DECLARE_FUNCTION_PTR(clCreateCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clRetainCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clReleaseCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clFinalizeCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clEnqueueCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clCommandNDRangeKernelKHR);
+
+ DECLARE_FUNCTION_PTR(clUpdateMutableCommandsKHR);
+
// Third-party extensions
DECLARE_FUNCTION_PTR(clImportMemoryARM);
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index e4cbd9ff9b7626d02009c3edd22ee47627955975..b080a86938e8f808749e49074c0a8ff72d079277 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -78,10 +78,10 @@ public:
/* Delete move and copy constructors and assignment operator
s */
- CPUInfo(CPUInfo const &) = delete; // Copy construct
- CPUInfo(CPUInfo &&) = delete; // Move construct
+ CPUInfo(CPUInfo const &) = delete; // Copy construct
+ CPUInfo(CPUInfo &&) = delete; // Move construct
CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign
- CPUInfo &operator=(CPUInfo &&) = delete; // Move assign
+ CPUInfo &operator=(CPUInfo &&) = delete; // Move assign
/** Checks if the cpu model supports fp16.
*
@@ -179,9 +179,9 @@ private:
/** Information about executing thread and CPU. */
struct ThreadInfo
{
- int thread_id{ 0 };
- int num_threads{ 1 };
- const CPUInfo *cpu_info{ nullptr };
+ int thread_id{0};
+ int num_threads{1};
+ const CPUInfo *cpu_info{nullptr};
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CPP_TYPES_H */
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h
index 00a10555e32a78dc59926e19d4c8e80317e2d4d4..03967a536d6f4ec36194948e2f2afb805728c168 100644
--- a/arm_compute/core/CPP/ICPPKernel.h
+++ b/arm_compute/core/CPP/ICPPKernel.h
@@ -25,9 +25,9 @@
#define ARM_COMPUTE_ICPPKERNEL_H
#include "arm_compute/core/CPP/CPPTypes.h"
+#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/core/IKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/experimental/Types.h"
namespace arm_compute
{
@@ -38,7 +38,7 @@ class ITensor;
class ICPPKernel : public IKernel
{
public:
- static constexpr size_t default_mws = 1; /* Default minimum workload size value - no impact */
+ static constexpr size_t default_mws = 1; /* Default minimum workload size value - no impact */
/** Default destructor */
virtual ~ICPPKernel() = default;
diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
index 068b37d80cf4777bca0bd441910fb6acc80cc638..dd91595ea67a8f65401ebaf0523e88570df504cf 100644
--- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
@@ -63,8 +63,16 @@ public:
* @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: U32
* @param[in] info (Optional) BoxNMSLimitInfo information.
*/
- void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
- ITensor *batch_splits_out = nullptr, ITensor *keeps = nullptr, ITensor *keeps_size = nullptr, const BoxNMSLimitInfo info = BoxNMSLimitInfo());
+ void configure(const ITensor *scores_in,
+ const ITensor *boxes_in,
+ const ITensor *batch_splits_in,
+ ITensor *scores_out,
+ ITensor *boxes_out,
+ ITensor *classes,
+ ITensor *batch_splits_out = nullptr,
+ ITensor *keeps = nullptr,
+ ITensor *keeps_size = nullptr,
+ const BoxNMSLimitInfo info = BoxNMSLimitInfo());
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
@@ -74,9 +82,9 @@ public:
void run_nmslimit();
private:
- const ITensor *_scores_in;
- const ITensor *_boxes_in;
- const ITensor *_batch_splits_in;
+ const ITensor *_scores_in;
+ const ITensor *_boxes_in;
+ const ITensor *_batch_splits_in;
ITensor *_scores_out;
ITensor *_boxes_out;
ITensor *_classes;
diff --git a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
index e32b5d8f7bc3f5e18542dcc9b825576e536125b6..d1f7f8670f72d4da40ab772786f8e16edb5c687b 100644
--- a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
#define ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
namespace arm_compute
{
@@ -65,7 +64,12 @@ public:
* @param[in] iou_threshold The threshold used in non maximum suppression.
*
*/
- void configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, unsigned int max_output_size, const float score_threshold, const float iou_threshold);
+ void configure(const ITensor *input_bboxes,
+ const ITensor *input_scores,
+ ITensor *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold);
/** Static function to check if given arguments will lead to a valid configuration of @ref CPPNonMaximumSuppressionKernel
*
@@ -77,8 +81,12 @@ public:
* @param[in] iou_threshold The threshold used in non maximum suppression.
*
*/
- static Status validate(const ITensorInfo *input_bboxes, const ITensorInfo *input_scores, const ITensorInfo *output_indices, unsigned int max_output_size,
- const float score_threshold, const float iou_threshold);
+ static Status validate(const ITensorInfo *input_bboxes,
+ const ITensorInfo *input_scores,
+ const ITensorInfo *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
index 1245dbc14c401f1ab7c6be1eeb7d6c7e2f63b868..7326a10e2ffed837c5cd2f20fbd9800183edf497 100644
--- a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
@@ -69,7 +69,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
+ static Status
+ validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h
index f6e1f4d2822d673367e5ddc2e3bd39a6414024bf..d1240bb10a757c00053ef5e21d2708af83af1139 100644
--- a/arm_compute/core/Coordinates.h
+++ b/arm_compute/core/Coordinates.h
@@ -42,8 +42,7 @@ public:
* @param[in] coords Values to initialize the dimensions.
*/
template
- constexpr Coordinates(Ts... coords)
- : Dimensions{ coords... }
+ constexpr Coordinates(Ts... coords) : Dimensions{coords...}
{
}
/** Allow instances of this class to be copy constructed */
@@ -57,5 +56,5 @@ public:
/** Default destructor */
~Coordinates() = default;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_COORDINATES_H*/
diff --git a/arm_compute/core/CoreTypes.h b/arm_compute/core/CoreTypes.h
index 4a48a366518b99cc82893a102ee62360a8f3da10..1a9db1937c5f5cbe5ecf721035d5b915a754fb14 100644
--- a/arm_compute/core/CoreTypes.h
+++ b/arm_compute/core/CoreTypes.h
@@ -25,6 +25,7 @@
#define ACL_ARM_COMPUTE_CORE_CORETYPES
#include "arm_compute/core/Strides.h"
+
#include "support/Half.h"
/** CoreTypes.h groups together essential small types that are used across functions */
@@ -146,9 +147,11 @@ public:
* @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0.
* @param[in] round (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR.
*/
- PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
- unsigned int pad_x = 0, unsigned int pad_y = 0,
- DimensionRoundingType round = DimensionRoundingType::FLOOR)
+ PadStrideInfo(unsigned int stride_x = 1,
+ unsigned int stride_y = 1,
+ unsigned int pad_x = 0,
+ unsigned int pad_y = 0,
+ DimensionRoundingType round = DimensionRoundingType::FLOOR)
: _stride(std::make_pair(stride_x, stride_y)),
_pad_left(pad_x),
_pad_top(pad_y),
@@ -167,9 +170,12 @@ public:
* @param[in] pad_bottom Padding across y on the bottom, in elements.
* @param[in] round Dimensions rounding.
*/
- PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
- unsigned int pad_left, unsigned int pad_right,
- unsigned int pad_top, unsigned int pad_bottom,
+ PadStrideInfo(unsigned int stride_x,
+ unsigned int stride_y,
+ unsigned int pad_left,
+ unsigned int pad_right,
+ unsigned int pad_top,
+ unsigned int pad_bottom,
DimensionRoundingType round)
: _stride(std::make_pair(stride_x, stride_y)),
_pad_left(pad_left),
@@ -243,10 +249,10 @@ public:
private:
std::pair _stride;
- unsigned int _pad_left;
- unsigned int _pad_top;
- unsigned int _pad_right;
- unsigned int _pad_bottom;
+ unsigned int _pad_left;
+ unsigned int _pad_top;
+ unsigned int _pad_right;
+ unsigned int _pad_bottom;
DimensionRoundingType _round_type;
};
diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h
index 2ebfcd7f839326f5fa336b4d2850aeaf44c815cb..bb8692d70acf2cd1637174ca8b089aa49b5fcdae 100644
--- a/arm_compute/core/Dimensions.h
+++ b/arm_compute/core/Dimensions.h
@@ -50,8 +50,7 @@ public:
* @param[in] dims Values to initialize the dimensions.
*/
template
- explicit Dimensions(Ts... dims)
- : _id{ { static_cast(dims)... } }, _num_dimensions{ sizeof...(dims) }
+ explicit Dimensions(Ts... dims) : _id{{static_cast(dims)...}}, _num_dimensions{sizeof...(dims)}
{
}
@@ -78,7 +77,7 @@ public:
ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions);
_id[dimension] = value;
// Don't increase the number of dimensions if the new dimension is 1
- if(increase_dim_unit || value != 1)
+ if (increase_dim_unit || value != 1)
{
_num_dimensions = std::max(_num_dimensions, dimension + 1);
}
@@ -108,7 +107,7 @@ public:
void increment(size_t dim, T step = 1)
{
ARM_COMPUTE_ERROR_ON(dim >= _num_dimensions);
- if((std::numeric_limits::max() - _id[dim]) >= step)
+ if ((std::numeric_limits::max() - _id[dim]) >= step)
{
_id[dim] += step;
}
@@ -162,7 +161,7 @@ public:
const size_t last = std::min(_num_dimensions, first + n);
- if(last > (first + 1))
+ if (last > (first + 1))
{
// Collapse dimensions into the first
_id[first] = std::accumulate(&_id[first], &_id[last], 1, std::multiplies());
@@ -196,7 +195,7 @@ public:
void remove(size_t idx)
{
ARM_COMPUTE_ERROR_ON(_num_dimensions < 1);
- if(idx >= _num_dimensions)
+ if (idx >= _num_dimensions)
{
return;
}
@@ -262,7 +261,7 @@ protected:
~Dimensions() = default;
std::array _id;
- size_t _num_dimensions{ 0 };
+ size_t _num_dimensions{0};
};
/** Check that given dimensions are equal.
@@ -289,5 +288,5 @@ inline bool operator!=(const Dimensions &lhs, const Dimensions &rhs)
{
return !(lhs == rhs);
}
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_DIMENSIONS_H*/
diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h
index 0854f2c5274eddb3a5ac5052c4732548719d6d95..7a7033805a1d29c10723ba13248bdffbc5fe1547 100644
--- a/arm_compute/core/Error.h
+++ b/arm_compute/core/Error.h
@@ -53,8 +53,7 @@ class Status
{
public:
/** Default Constructor **/
- Status()
- : _code(ErrorCode::OK), _error_description(" ")
+ Status() : _code(ErrorCode::OK), _error_description(" ")
{
}
/** Default Constructor
@@ -101,7 +100,7 @@ public:
/** Throws a runtime exception in case it contains a valid error status */
void throw_if_error() const
{
- if(!bool(*this))
+ if (!bool(*this))
{
internal_throw_on_error();
}
@@ -141,7 +140,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] err Error status
*/
[[noreturn]] void throw_error(Status err);
-}
+} // namespace arm_compute
/** To avoid unused variables warnings
*
* This is useful if for example a variable is only used
@@ -156,7 +155,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] error_code Error code.
* @param[in] msg Message to encapsulate.
*/
-#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg)
+#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) \
+ arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg)
/** Creates an error on location with a given message
*
@@ -166,7 +166,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] line Line in which the error occurred.
* @param[in] msg Message to display before abandoning.
*/
-#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) arm_compute::create_error_msg(error_code, func, file, line, msg)
+#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) \
+ arm_compute::create_error_msg(error_code, func, file, line, msg)
/** Creates an error on location with a given message. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -178,14 +179,14 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \
- do \
- { \
- std::array out{ 0 }; \
- int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
- snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
- arm_compute::create_error(error_code, std::string(out.data())); \
- } while(false)
+#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \
+ do \
+ { \
+ std::array out{0}; \
+ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
+ arm_compute::create_error(error_code, std::string(out.data())); \
+ } while (false)
/** An error is returned with the given description.
*
@@ -195,7 +196,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
do \
{ \
return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, __VA_ARGS__); \
- } while(false)
+ } while (false)
/** Checks if a status contains an error and returns it
*
@@ -205,18 +206,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
do \
{ \
const auto s = status; \
- if(!bool(s)) \
+ if (!bool(s)) \
{ \
return s; \
} \
- } while(false)
+ } while (false)
/** Checks if an error value is valid if not throws an exception with the error
*
* @param[in] error Error value to check.
*/
-#define ARM_COMPUTE_THROW_ON_ERROR(error) \
- error.throw_if_error();
+#define ARM_COMPUTE_THROW_ON_ERROR(error) error.throw_if_error();
/** If the condition is true, an error is returned. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -228,28 +228,29 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(cond, msg, ...) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
- std::array out{ 0 }; \
+ std::array out{0}; \
int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", __func__, __FILE__, __LINE__); \
snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
return arm_compute::create_error(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, an error is returned
*
* @param[in] cond Condition to evaluate.
* @param[in] msg Error description message
*/
-#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \
- do \
- { \
- if(cond) \
- { \
- return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, msg); \
- } \
- } while(false)
+#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \
+ do \
+ { \
+ if (cond) \
+ { \
+ return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, \
+ msg); \
+ } \
+ } while (false)
/** If the condition is true, an error is thrown. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -261,17 +262,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \
- do \
- { \
- if(cond) \
- { \
- std::array out{ 0 }; \
- int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
- snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
- return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
- } \
- } while(false)
+#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \
+ do \
+ { \
+ if (cond) \
+ { \
+ std::array out{0}; \
+ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
+ return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
+ } \
+ } while (false)
/** If the condition is true, an error is thrown.
*
@@ -284,18 +285,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(cond, func, file, line, msg) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
return arm_compute::create_error_msg(ErrorCode::RUNTIME_ERROR, func, file, line, msg); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, an error is returned
*
* @param[in] cond Condition to evaluate
*/
-#define ARM_COMPUTE_RETURN_ERROR_ON(cond) \
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond)
+#define ARM_COMPUTE_RETURN_ERROR_ON(cond) ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond)
/** If the condition is true, an error is returned
*
@@ -314,11 +314,12 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] line Line in which the error occurred.
* @param[in] msg Message to display.
*/
-#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \
- do \
- { \
- arm_compute::throw_error(arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \
- } while(false)
+#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \
+ do \
+ { \
+ arm_compute::throw_error( \
+ arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \
+ } while (false)
/** Print the given message then throw an std::runtime_error. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -332,11 +333,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, ...) \
do \
{ \
- std::array out{ 0 }; \
- int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+ std::array out{0}; \
+ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
arm_compute::throw_error(arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data()))); \
- } while(false)
+ } while (false)
/** Print the given message then throw an std::runtime_error. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -361,7 +362,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT
+#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) \
+ ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT
/** Print the given message then throw an std::runtime_error.
*
@@ -380,11 +382,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_EXIT_ON_MSG(cond, msg) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
ARM_COMPUTE_ERROR(msg); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, the given message is printed and program exits. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -396,27 +398,25 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, ...) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
ARM_COMPUTE_ERROR_VAR(msg, __VA_ARGS__); \
} \
- } while(false)
+ } while (false)
#ifdef ARM_COMPUTE_ASSERTS_ENABLED
/** Checks if a status value is valid if not throws an exception with the error
*
* @param[in] status Status value to check.
*/
-#define ARM_COMPUTE_ERROR_THROW_ON(status) \
- status.throw_if_error()
+#define ARM_COMPUTE_ERROR_THROW_ON(status) status.throw_if_error()
/** If the condition is true, the given message is printed and an exception is thrown
*
* @param[in] cond Condition to evaluate.
* @param[in] msg Message to display.
*/
-#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) \
- ARM_COMPUTE_EXIT_ON_MSG(cond, msg)
+#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) ARM_COMPUTE_EXIT_ON_MSG(cond, msg)
/** If the condition is true, the given message is printed and an exception is thrown. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -425,8 +425,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) \
- ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__)
+#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__)
/** If the condition is true, the given message is printed and an exception is thrown.
*
@@ -439,11 +438,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, __VA_ARGS__); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, the given message is printed and an exception is thrown, otherwise value is returned
*
@@ -464,8 +463,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
*
* @param[in] cond Condition to evaluate.
*/
-#define ARM_COMPUTE_ERROR_ON(cond) \
- ARM_COMPUTE_ERROR_ON_MSG(cond, #cond)
+#define ARM_COMPUTE_ERROR_ON(cond) ARM_COMPUTE_ERROR_ON_MSG(cond, #cond)
/** If the condition is true then an error message is printed and an exception thrown
*
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index f19e1e12e054847e1c1edc9820599b79d9869b4b..960201510a126280d562eff4725c11e38b665258 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -96,7 +96,6 @@ public:
void reset(size_t dimension);
private:
-
/** Initialize a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window.
*
* @param[in] num_dims The number of dimensions.
@@ -112,8 +111,7 @@ private:
class Dimension
{
public:
- constexpr Dimension()
- : _dim_start(0), _stride(0)
+ constexpr Dimension() : _dim_start(0), _stride(0)
{
}
@@ -133,7 +131,7 @@ private:
* @param[in,out] iterators Tensor iterators which will be updated by this function before calling lambda_function.
*/
template
-inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators);
+inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators);
/** Permutes given Dimensions according to a permutation vector
*
@@ -146,7 +144,7 @@ template
inline void permute(Dimensions &dimensions, const PermutationVector &perm)
{
auto dimensions_copy = utility::make_array::num_max_dimensions>(dimensions.begin(), dimensions.end());
- for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
{
T dimension_val = (perm[i] < dimensions.num_dimensions()) ? dimensions_copy[perm[i]] : 0;
dimensions.set(i, dimension_val);
@@ -163,7 +161,7 @@ inline void permute(Dimensions &dimensions, const PermutationVector &perm)
inline void permute(TensorShape &shape, const PermutationVector &perm)
{
TensorShape shape_copy = shape;
- for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
{
size_t dimension_val = (perm[i] < shape.num_dimensions()) ? shape_copy[perm[i]] : 1;
shape.set(i, dimension_val, false, false); // Avoid changes in _num_dimension
@@ -180,8 +178,11 @@ inline void permute(TensorShape &shape, const PermutationVector &perm)
*
* @return The corresponding valid region
*/
-ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape,
- InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined);
+ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info,
+ const TensorShape &dst_shape,
+ InterpolationPolicy interpolate_policy,
+ SamplingPolicy sampling_policy,
+ bool border_undefined);
/** Convert a linear index into n-dimensional coordinates.
*
@@ -224,7 +225,8 @@ const std::map> &get_layout_map();
*
* @return The int conversion of the requested data layout index.
*/
-inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension);
+inline size_t get_data_layout_dimension_index(const DataLayout &data_layout,
+ const DataLayoutDimension &data_layout_dimension);
/** Get the DataLayoutDimension of a given index and layout.
*
@@ -245,10 +247,17 @@ inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &dat
*
* @return the number of output tiles along the x and y directions of size "output_tile_size"
*/
-inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info)
+inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims,
+ const Size2D &kernel_size,
+ const Size2D &output_tile_size,
+ const PadStrideInfo &conv_info)
{
- int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast(output_tile_size.width));
- int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast(output_tile_size.height));
+ int num_tiles_x =
+ std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) /
+ static_cast(output_tile_size.width));
+ int num_tiles_y =
+ std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) /
+ static_cast(output_tile_size.height));
// Clamp in case we provide paddings but we have 1D convolution
num_tiles_x = std::min(num_tiles_x, static_cast(in_dims.width));
@@ -277,7 +286,7 @@ inline T wrap_around(T x, T m)
*/
inline Coordinates &convert_negative_axis(Coordinates &coords, int max_value)
{
- for(unsigned int i = 0; i < coords.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < coords.num_dimensions(); ++i)
{
coords[i] = wrap_around(coords[i], max_value);
}
diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl
index ff902bba203a6c699281705e4a9c1ff60c793afd..60a21e94181c09b3c763a454ff800aefdcae27f6 100644
--- a/arm_compute/core/Helpers.inl
+++ b/arm_compute/core/Helpers.inl
@@ -32,12 +32,9 @@ template
struct IncrementIterators
{
template
- static void unroll(T &&it, Ts &&... iterators)
+ static void unroll(T &&it, Ts &&...iterators)
{
- auto increment = [](T && it)
- {
- it.increment(dimension);
- };
+ auto increment = [](T &&it) { it.increment(dimension); };
utility::for_each(increment, std::forward(it), std::forward(iterators)...);
}
static void unroll()
@@ -50,14 +47,14 @@ template
struct ForEachDimension
{
template
- static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators)
+ static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators)
{
const auto &d = w[dim - 1];
- for(auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators < dim - 1 >::unroll(iterators...))
+ for (auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators::unroll(iterators...))
{
id.set(dim - 1, v);
- ForEachDimension < dim - 1 >::unroll(w, id, lambda_function, iterators...);
+ ForEachDimension::unroll(w, id, lambda_function, iterators...);
}
}
};
@@ -66,7 +63,7 @@ template <>
struct ForEachDimension<0>
{
template
- static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators)
+ static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators)
{
ARM_COMPUTE_UNUSED(w, iterators...);
lambda_function(id);
@@ -74,31 +71,31 @@ struct ForEachDimension<0>
};
template
-inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
+inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
{
w.validate();
- for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
+ for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
{
ARM_COMPUTE_ERROR_ON(w[i].step() == 0);
}
Coordinates id;
- ForEachDimension::unroll(w, id, std::forward(lambda_function), std::forward(iterators)...);
+ ForEachDimension::unroll(w, id, std::forward(lambda_function),
+ std::forward(iterators)...);
}
-inline constexpr Iterator::Iterator()
- : _ptr(nullptr), _dims()
+inline constexpr Iterator::Iterator() : _ptr(nullptr), _dims()
{
}
-inline Iterator::Iterator(const ITensor *tensor, const Window &win)
- : Iterator()
+inline Iterator::Iterator(const ITensor *tensor, const Window &win) : Iterator()
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
ARM_COMPUTE_ERROR_ON(tensor->info() == nullptr);
- initialize(tensor->info()->num_dimensions(), tensor->info()->strides_in_bytes(), tensor->buffer(), tensor->info()->offset_first_element_in_bytes(), win);
+ initialize(tensor->info()->num_dimensions(), tensor->info()->strides_in_bytes(), tensor->buffer(),
+ tensor->info()->offset_first_element_in_bytes(), win);
}
inline Iterator::Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
@@ -107,21 +104,22 @@ inline Iterator::Iterator(size_t num_dims, const Strides &strides, uint8_t *buff
initialize(num_dims, strides, buffer, offset, win);
}
-inline void Iterator::initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
+inline void
+Iterator::initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
{
ARM_COMPUTE_ERROR_ON(buffer == nullptr);
_ptr = buffer + offset;
//Initialize the stride for each dimension and calculate the position of the first element of the iteration:
- for(unsigned int n = 0; n < num_dims; ++n)
+ for (unsigned int n = 0; n < num_dims; ++n)
{
_dims[n]._stride = win[n].step() * strides[n];
std::get<0>(_dims)._dim_start += static_cast(strides[n]) * win[n].start();
}
//Copy the starting point to all the dimensions:
- for(unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n)
+ for (unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n)
{
_dims[n]._dim_start = std::get<0>(_dims)._dim_start;
}
@@ -135,7 +133,7 @@ inline void Iterator::increment(const size_t dimension)
_dims[dimension]._dim_start += _dims[dimension]._stride;
- for(unsigned int n = 0; n < dimension; ++n)
+ for (unsigned int n = 0; n < dimension; ++n)
{
_dims[n]._dim_start = _dims[dimension]._dim_start;
}
@@ -157,7 +155,7 @@ inline void Iterator::reset(const size_t dimension)
_dims[dimension]._dim_start = _dims[dimension + 1]._dim_start;
- for(unsigned int n = 0; n < dimension; ++n)
+ for (unsigned int n = 0; n < dimension; ++n)
{
_dims[n]._dim_start = _dims[dimension]._dim_start;
}
@@ -170,9 +168,9 @@ inline Coordinates index2coords(const TensorShape &shape, int index)
ARM_COMPUTE_ERROR_ON_MSG(index < 0 || index >= num_elements, "Index has to be in [0, num_elements]!");
ARM_COMPUTE_ERROR_ON_MSG(num_elements == 0, "Cannot create coordinate from empty shape!");
- Coordinates coord{ 0 };
+ Coordinates coord{0};
- for(int d = shape.num_dimensions() - 1; d >= 0; --d)
+ for (int d = shape.num_dimensions() - 1; d >= 0; --d)
{
num_elements /= shape[d];
coord.set(d, index / num_elements);
@@ -191,7 +189,7 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord)
int index = 0;
int stride = 1;
- for(unsigned int d = 0; d < coord.num_dimensions(); ++d)
+ for (unsigned int d = 0; d < coord.num_dimensions(); ++d)
{
index += coord[d] * stride;
stride *= shape[d];
@@ -200,9 +198,11 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord)
return index;
}
-inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
+inline size_t get_data_layout_dimension_index(const DataLayout &data_layout,
+ const DataLayoutDimension &data_layout_dimension)
{
- ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
+ ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN,
+ "Cannot retrieve the dimension index for an unknown layout!");
const auto &dims = get_layout_map().at(data_layout);
const auto &it = std::find(dims.cbegin(), dims.cend(), data_layout_dimension);
ARM_COMPUTE_ERROR_ON_MSG(it == dims.cend(), "Invalid dimension for the given layout.");
@@ -211,7 +211,8 @@ inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, con
inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index)
{
- ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the layout dimension for an unknown layout!");
+ ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN,
+ "Cannot retrieve the layout dimension for an unknown layout!");
const auto &dims = get_layout_map().at(data_layout);
ARM_COMPUTE_ERROR_ON_MSG(index >= dims.size(), "Invalid index for the given layout.");
return dims[index];
diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h
index 880f6d6b271faf04f7c8232e870cac6c16f2ccb9..9c9fb909150e965bd7ecda2e14c80dc08b83718e 100644
--- a/arm_compute/core/IAccessWindow.h
+++ b/arm_compute/core/IAccessWindow.h
@@ -100,7 +100,10 @@ public:
* @return a valid region.
*
*/
- virtual ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const = 0;
+ virtual ValidRegion compute_valid_region(const Window &window,
+ ValidRegion input_valid_region,
+ bool border_undefined,
+ BorderSize border_size) const = 0;
};
/** Implementation of a rectangular access pattern. */
@@ -161,7 +164,10 @@ public:
* @param[in] border_undefined (Optional) Undefined borders are excluded from the valid region.
* @param[in] border_size (Optional) Size of the border around the XY-plane of the tensor.
*/
- void set_valid_region(const Window &window, const ValidRegion &input_valid_region, bool border_undefined = false, const BorderSize &border_size = BorderSize(0));
+ void set_valid_region(const Window &window,
+ const ValidRegion &input_valid_region,
+ bool border_undefined = false,
+ const BorderSize &border_size = BorderSize(0));
/** Compute the valid region based on access pattern, valid region of the inputs and border mode.
*
@@ -189,7 +195,10 @@ public:
* @return a valid region.
*
*/
- ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
+ ValidRegion compute_valid_region(const Window &window,
+ ValidRegion input_valid_region,
+ bool border_undefined,
+ BorderSize border_size) const override;
bool update_window_if_needed(Window &window) const override;
bool update_padding_if_needed(const Window &window) override;
diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h
index 6edbc1d5d56037817a84b67a594839f7bcf18bcf..3471fc9a86c4e5c631bdfa011840c6aa708bfc18 100644
--- a/arm_compute/core/IArray.h
+++ b/arm_compute/core/IArray.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_IARRAY_H
#include "arm_compute/core/Error.h"
+
#include
#include
@@ -36,14 +37,12 @@ class IArray
{
public:
/** Default constructor */
- IArray()
- : _num_values(0), _max_size(0) {};
+ IArray() : _num_values(0), _max_size(0){};
/** Constructor: initializes an array which can contain up to max_num_points values
*
* @param[in] max_num_values Maximum number of values the array will be able to stored
*/
- IArray(size_t max_num_values)
- : _num_values(0), _max_size(max_num_values)
+ IArray(size_t max_num_values) : _num_values(0), _max_size(max_num_values)
{
}
/** Maximum number of values which can be stored in this array
@@ -73,7 +72,7 @@ public:
bool push_back(const T &val)
{
ARM_COMPUTE_ERROR_ON(0 == _max_size);
- if(_num_values >= max_num_values())
+ if (_num_values >= max_num_values())
{
_num_values = max_num_values() + 1;
return false;
@@ -142,5 +141,5 @@ using IInt16Array = IArray;
using IInt32Array = IArray;
/** Interface for Array of floats. */
using IFloatArray = IArray;
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_IARRAY_H */
diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h
index 98fd18cc9104adce056c1b84b01fe5840ee206b0..403a2c724e8245cb882bfa1a856f671300803f73 100644
--- a/arm_compute/core/IKernel.h
+++ b/arm_compute/core/IKernel.h
@@ -73,5 +73,5 @@ protected:
private:
Window _window;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_IKERNEL_H */
diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h
index 32b93576bddc6461a72052b5df94b31ca175f030..aad831326104a68b71ad466733af4c566a54e3ec 100644
--- a/arm_compute/core/ITensor.h
+++ b/arm_compute/core/ITensor.h
@@ -94,9 +94,9 @@ public:
void mark_as_used() const;
private:
- mutable bool _is_used = { true }; /**< Flag that marks if the tensor is used or not */
+ mutable bool _is_used = {true}; /**< Flag that marks if the tensor is used or not */
};
using IImage = ITensor;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ITENSOR_H */
diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h
index e7c0b182c69e5f192dafb2d33f8498a815f33d38..c42f4b57a1ed5fb6e200358b75feae49b585babe 100644
--- a/arm_compute/core/ITensorInfo.h
+++ b/arm_compute/core/ITensorInfo.h
@@ -29,6 +29,7 @@
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Utility.h"
+
#include "support/ICloneable.h"
#include
@@ -328,23 +329,23 @@ public:
* not broadcast compatible.
*/
template
- static std::pair broadcast_shape_and_valid_region(const Infos &... infos)
+ static std::pair broadcast_shape_and_valid_region(const Infos &...infos)
{
TensorShape bc_shape = TensorShape::broadcast_shape(infos.tensor_shape()...);
- ValidRegion bc_valid_region{ Coordinates(), bc_shape };
+ ValidRegion bc_valid_region{Coordinates(), bc_shape};
- auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo & info)
+ auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo &info)
{
- if(info.num_dimensions() != 0)
+ if (info.num_dimensions() != 0)
{
- for(size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d)
+ for (size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d)
{
const bool is_broadcast = (info.tensor_shape()[d] == 1);
const int anchor_max = std::max(bc_valid_region.anchor[d], info.valid_region().anchor[d]);
const size_t valid_min = std::min(bc_valid_region.shape[d], info.valid_region().shape[d]);
- if(!is_broadcast || (valid_min == 0))
+ if (!is_broadcast || (valid_min == 0))
{
bc_valid_region.anchor.set(d, anchor_max);
bc_valid_region.shape.set(d, valid_min);
diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h
index 17b72418621b4f33aeb5176134cb22a437b1fcb1..f456c50769ed0699284b832b58f804fe75981921 100644
--- a/arm_compute/core/ITensorPack.h
+++ b/arm_compute/core/ITensorPack.h
@@ -42,18 +42,16 @@ public:
struct PackElement
{
PackElement() = default;
- PackElement(int id, ITensor *tensor)
- : id(id), tensor(tensor), ctensor(nullptr)
+ PackElement(int id, ITensor *tensor) : id(id), tensor(tensor), ctensor(nullptr)
{
}
- PackElement(int id, const ITensor *ctensor)
- : id(id), tensor(nullptr), ctensor(ctensor)
+ PackElement(int id, const ITensor *ctensor) : id(id), tensor(nullptr), ctensor(ctensor)
{
}
- int id{ -1 };
- ITensor *tensor{ nullptr };
- const ITensor *ctensor{ nullptr };
+ int id{-1};
+ ITensor *tensor{nullptr};
+ const ITensor *ctensor{nullptr};
};
public:
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 305766e8251721935efcd581388951674921ffc7..168a06a55cfe15e788b606aa73ed7940cc3252fc 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -21,12 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS
-#define ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS
+#ifndef ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
+#define ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/experimental/IPostOp.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
namespace arm_compute
@@ -34,24 +33,24 @@ namespace arm_compute
/** Descriptor for FFT scale kernels */
struct FFTScaleKernelInfo
{
- float scale{ 0.f }; /**< Axis to perform the kernel on. */
- bool conjugate{ true }; /**< Flag to conjugate the output/ */
+ float scale{0.f}; /**< Axis to perform the kernel on. */
+ bool conjugate{true}; /**< Flag to conjugate the output/ */
};
/** Descriptor for FFT digit reverse kernels */
struct FFTDigitReverseKernelInfo
{
- unsigned int axis{ 0 }; /**< Axis to perform the kernel on. */
- bool conjugate{ false }; /**< Flag to conjugate the output/ */
+ unsigned int axis{0}; /**< Axis to perform the kernel on. */
+ bool conjugate{false}; /**< Flag to conjugate the output/ */
};
/** Descriptor used by the FFT core kernels */
struct FFTRadixStageKernelInfo
{
- unsigned int axis{ 0 }; /**< Axis to run the kernel on. */
- unsigned int radix{ 0 }; /**< Radix to use. */
- unsigned int Nx{ 0 }; /**< Nx coefficient. */
- bool is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
+ unsigned int axis{0}; /**< Axis to run the kernel on. */
+ unsigned int radix{0}; /**< Radix to use. */
+ unsigned int Nx{0}; /**< Nx coefficient. */
+ bool is_first_stage{false}; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
};
class ITensorInfo;
@@ -59,92 +58,102 @@ class ITensorInfo;
struct GEMMKernelInfo
{
GEMMKernelInfo() = default;
- GEMMKernelInfo(
- unsigned int im,
- unsigned int in,
- unsigned int ik,
- unsigned int idepth_output_gemm3d,
- bool ireinterpret_input_as_3d,
- bool ibroadcast_bias,
- bool ifp_mixed_precision,
- bool ihas_pad_y,
- ActivationLayerInfo iactivation_info,
- int inmult_transpose1xW_width,
- int imult_interleave4x4_height,
- GEMMLHSMatrixInfo ilhs_info,
- GEMMRHSMatrixInfo irhs_info,
- int32_t ina_offset,
- int32_t inb_offset,
- const experimental::PostOpList &ipost_ops = experimental::PostOpList {})
- : m(im), n(in), k(ik), depth_output_gemm3d(idepth_output_gemm3d), reinterpret_input_as_3d(ireinterpret_input_as_3d), broadcast_bias(ibroadcast_bias), fp_mixed_precision(ifp_mixed_precision),
- has_pad_y(ihas_pad_y), activation_info(iactivation_info), mult_transpose1xW_width(inmult_transpose1xW_width), mult_interleave4x4_height(imult_interleave4x4_height), lhs_info(ilhs_info),
- rhs_info(irhs_info), a_offset(ina_offset), b_offset(inb_offset), post_ops(ipost_ops)
+ GEMMKernelInfo(unsigned int im,
+ unsigned int in,
+ unsigned int ik,
+ unsigned int idepth_output_gemm3d,
+ bool ireinterpret_input_as_3d,
+ bool ibroadcast_bias,
+ bool ifp_mixed_precision,
+ bool ihas_pad_y,
+ ActivationLayerInfo iactivation_info,
+ int inmult_transpose1xW_width,
+ int imult_interleave4x4_height,
+ GEMMLHSMatrixInfo ilhs_info,
+ GEMMRHSMatrixInfo irhs_info,
+ int32_t ina_offset,
+ int32_t inb_offset)
+ : m(im),
+ n(in),
+ k(ik),
+ depth_output_gemm3d(idepth_output_gemm3d),
+ reinterpret_input_as_3d(ireinterpret_input_as_3d),
+ broadcast_bias(ibroadcast_bias),
+ fp_mixed_precision(ifp_mixed_precision),
+ has_pad_y(ihas_pad_y),
+ activation_info(iactivation_info),
+ mult_transpose1xW_width(inmult_transpose1xW_width),
+ mult_interleave4x4_height(imult_interleave4x4_height),
+ lhs_info(ilhs_info),
+ rhs_info(irhs_info),
+ a_offset(ina_offset),
+ b_offset(inb_offset)
{
}
- unsigned int m{ 0 }; /**< Number of LHS rows*/
- unsigned int n{ 0 }; /**< Number of RHS columns*/
- unsigned int k{ 0 }; /**< Number of LHS columns or RHS rows */
- unsigned int depth_output_gemm3d{ 0 }; /**< Depth of the output tensor in case is reinterpreted as 3D */
- bool reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */
- bool broadcast_bias{ false }; /**< Flag used to broadcast the bias addition */
- bool fp_mixed_precision{ false }; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
- bool has_pad_y{ false }; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */
- ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */
- int mult_transpose1xW_width{ 1 }; /**< Multiplication factor for the width of the 1xW transposed block */
- int mult_interleave4x4_height{ 1 }; /**< Multiplication factor for the height of the 4x4 interleaved block */
- GEMMLHSMatrixInfo lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */
- GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */
- int32_t a_offset{ 0 }; /**< Offset to be added to each element of the matrix A */
- int32_t b_offset{ 0 }; /**< Offset to be added to each element of the matrix B */
- GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */
- experimental::PostOpList post_ops{}; /**< (EXPERIMENTAL_POST_OPS) Specifies a list of post ops to be fused after the main op. Note unsupported post ops would not be executed.
- * If specified, automatically disable the @ref activation_info */
+ unsigned int m{0}; /**< Number of LHS rows*/
+ unsigned int n{0}; /**< Number of RHS columns*/
+ unsigned int k{0}; /**< Number of LHS columns or RHS rows */
+ unsigned int depth_output_gemm3d{0}; /**< Depth of the output tensor in case is reinterpreted as 3D */
+ bool reinterpret_input_as_3d{false}; /**< Flag used to reinterpret the input as 3D */
+ bool broadcast_bias{false}; /**< Flag used to broadcast the bias addition */
+ bool fp_mixed_precision{false}; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
+ bool has_pad_y{
+ false}; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */
+ ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */
+ int mult_transpose1xW_width{1}; /**< Multiplication factor for the width of the 1xW transposed block */
+ int mult_interleave4x4_height{1}; /**< Multiplication factor for the height of the 4x4 interleaved block */
+ GEMMLHSMatrixInfo
+ lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */
+ GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */
+ int32_t a_offset{0}; /**< Offset to be added to each element of the matrix A */
+ int32_t b_offset{0}; /**< Offset to be added to each element of the matrix B */
+ GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */
};
/** Compute descriptor used by the depthwise convolution native kernel */
struct DWCComputeKernelInfo
{
- unsigned int n0{ 1 }; /**< Number of columns processed by each thread */
- unsigned int m0{ 1 }; /**< Number of rows processed by each thread */
- bool export_input_to_cl_image{ false }; /**< Export input to cl_image */
- bool export_weights_to_cl_image{ false }; /**< Export the weights to cl_image */
+ unsigned int n0{1}; /**< Number of columns processed by each thread */
+ unsigned int m0{1}; /**< Number of rows processed by each thread */
+ bool export_input_to_cl_image{false}; /**< Export input to cl_image */
+ bool export_weights_to_cl_image{false}; /**< Export the weights to cl_image */
};
/** Compute descriptor used by the direct convolution kernel */
struct DirectConvComputeKernelInfo
{
- int32_t m0{ 1 }; /**< Number of rows to be processed by the kernel */
- int32_t n0{ 1 }; /**< Number of columns to be processed by the kernel */
- int32_t k0{ 1 }; /**< Number of partial accumulations to be processed in a single iteration by the kernel */
- bool export_weights_to_cl_image{ false }; /**< Flag to export the weights to cl_image */
- bool export_output_to_cl_image{ false }; /**< Flag to export the output to cl_image */
- bool export_input_to_cl_image{ false }; /**< Flag to export the input to cl_image */
+ int32_t m0{1}; /**< Number of rows to be processed by the kernel */
+ int32_t n0{1}; /**< Number of columns to be processed by the kernel */
+ int32_t k0{1}; /**< Number of partial accumulations to be processed in a single iteration by the kernel */
+ bool export_weights_to_cl_image{false}; /**< Flag to export the weights to cl_image */
+ bool export_output_to_cl_image{false}; /**< Flag to export the output to cl_image */
+ bool export_input_to_cl_image{false}; /**< Flag to export the input to cl_image */
};
/** Descriptor used by the softmax kernels */
struct SoftmaxKernelInfo
{
- float beta{ 1.f }; /**< A scaling factor for the exponent with default value 1.0 */
- bool is_log{ false }; /**< Flag used to perform Log Softmax operation */
- DataType input_data_type{ DataType::UNKNOWN }; /**< Input tensor data type */
- int32_t axis{ 0 }; /**< The dimension in which to apply softmax. */
+ float beta{1.f}; /**< A scaling factor for the exponent with default value 1.0 */
+ bool is_log{false}; /**< Flag used to perform Log Softmax operation */
+ DataType input_data_type{DataType::UNKNOWN}; /**< Input tensor data type */
+ int32_t axis{0}; /**< The dimension in which to apply softmax. */
};
/** Descriptor used by the direct convolution layer output stage kernels */
struct DirectConvolutionLayerOutputStageKernelInfo
{
- int32_t result_fixedpoint_multiplier{ 0 }; /**< Result output stage multiplier used for quantizing */
- int32_t result_shift{ 0 }; /**< Result output stage shift used for quantizing */
- int32_t result_offset_after_shift{ 0 }; /**< Result offset used for quantizing */
- DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
+ int32_t result_fixedpoint_multiplier{0}; /**< Result output stage multiplier used for quantizing */
+ int32_t result_shift{0}; /**< Result output stage shift used for quantizing */
+ int32_t result_offset_after_shift{0}; /**< Result offset used for quantizing */
+ DataType output_data_type{
+ DataType::UNKNOWN}; /**< Output tensor data type to use if the output is not initialized */
};
struct InstanceNormalizationLayerKernelInfo
{
/** Default constructor */
- InstanceNormalizationLayerKernelInfo()
- : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true)
+ InstanceNormalizationLayerKernelInfo() : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true)
{
}
/** Constructor
@@ -181,10 +190,10 @@ struct GEMMLowpReductionKernelInfo
{
}
- int32_t k{ 0 }; /**< Number of matrix columns/rows */
- bool is_reshaped{ false }; /**< True if the input tensor has been reshaped */
- int32_t scalar{ 0 }; /**< Scalar value to multiply each reduced column/row by */
- bool mul_by_scalar{ false }; /**< True if each column/row reduction has to be multiplied by a scalar value */
+ int32_t k{0}; /**< Number of matrix columns/rows */
+ bool is_reshaped{false}; /**< True if the input tensor has been reshaped */
+ int32_t scalar{0}; /**< Scalar value to multiply each reduced column/row by */
+ bool mul_by_scalar{false}; /**< True if each column/row reduction has to be multiplied by a scalar value */
};
struct ScaleKernelInfo
@@ -206,13 +215,13 @@ struct ScaleKernelInfo
bool use_padding = true,
bool align_corners = false,
DataLayout data_layout = DataLayout::UNKNOWN) noexcept
- : interpolation_policy{ interpolation_policy },
- border_mode{ border_mode },
- constant_border_value{ constant_border_value },
- sampling_policy{ sampling_policy },
- use_padding{ use_padding },
- align_corners{ align_corners },
- data_layout{ data_layout }
+ : interpolation_policy{interpolation_policy},
+ border_mode{border_mode},
+ constant_border_value{constant_border_value},
+ sampling_policy{sampling_policy},
+ use_padding{use_padding},
+ align_corners{align_corners},
+ data_layout{data_layout}
{
}
@@ -228,16 +237,17 @@ struct ScaleKernelInfo
struct MatMulKernelInfo
{
MatMulKernelInfo() = default;
- MatMulKernelInfo(bool adj_lhs, bool adj_rhs, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false)
- : adj_lhs{ adj_lhs }, adj_rhs{ adj_rhs }, m0{ m0 }, n0{ n0 }, k0{ k0 }, export_rhs_to_cl_image{ export_rhs_to_cl_image }
+ MatMulKernelInfo(
+ bool adj_lhs, bool adj_rhs, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false)
+ : adj_lhs{adj_lhs}, adj_rhs{adj_rhs}, m0{m0}, n0{n0}, k0{k0}, export_rhs_to_cl_image{export_rhs_to_cl_image}
{
}
- bool adj_lhs{ false }; /**< Get Adjoint LHS flag value */
- bool adj_rhs{ false }; /**< Get Adjoint RHS flag value */
- int m0{ 1 }; /**< Number of output rows processed by each work-item*/
- int n0{ 1 }; /**< Number of output columns processed by each work-item*/
- int k0{ 1 }; /**< Number of inner accumulations */
- bool export_rhs_to_cl_image{ false }; /**< Flag to know whether the RHS tensor should be exported to cl_image*/
+ bool adj_lhs{false}; /**< Get Adjoint LHS flag value */
+ bool adj_rhs{false}; /**< Get Adjoint RHS flag value */
+ int m0{1}; /**< Number of output rows processed by each work-item*/
+ int n0{1}; /**< Number of output columns processed by each work-item*/
+ int k0{1}; /**< Number of inner accumulations */
+ bool export_rhs_to_cl_image{false}; /**< Flag to know whether the RHS tensor should be exported to cl_image*/
};
} // namespace arm_compute
-#endif /* ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS */
+#endif // ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
diff --git a/arm_compute/core/Log.h b/arm_compute/core/Log.h
index bc0ecb802e0d83484db704a4500d13fdaf83a856..03b861f765c34e8c24641a3f540603fe67d59ae4 100644
--- a/arm_compute/core/Log.h
+++ b/arm_compute/core/Log.h
@@ -34,11 +34,11 @@
#define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER() \
do \
{ \
- if(arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \
+ if (arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \
{ \
arm_compute::logging::LoggerRegistry::get().create_reserved_loggers(); \
} \
- } while(false)
+ } while (false)
#else /* ARM_COMPUTE_LOGGING_ENABLED */
#define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER()
#endif /* ARM_COMPUTE_LOGGING_ENABLED */
@@ -53,7 +53,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG("CORE", log_level, msg); \
- } while(false)
+ } while (false)
/** Log a message with format to the core system logger
*
@@ -66,7 +66,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__); \
- } while(false)
+ } while (false)
/** Log a stream to the core system logger
*
@@ -78,7 +78,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss); \
- } while(false)
+ } while (false)
/** Log information level message to the core system logger
*
@@ -89,7 +89,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg); \
- } while(false)
+ } while (false)
/** Log information level formatted message to the core system logger
*
@@ -101,7 +101,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, #fmt, __VA_ARGS__); \
- } while(false)
+ } while (false)
/** Log information level stream to the core system logger
*
@@ -112,6 +112,6 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss); \
- } while(false)
+ } while (false)
#endif /* ARM_COMPUTE_LOGGING_MACROS_H */
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index 790f58a7935ab3f210a1038695506c177e5dbd6f..0b4df4f2e2e68c8fb49ed4580f4ea1b4212c0dee 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -24,8 +24,8 @@
#ifndef ARM_COMPUTE_PIXELVALUE_H
#define ARM_COMPUTE_PIXELVALUE_H
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/core/Types.h"
#include
@@ -36,11 +36,7 @@ class PixelValue
{
public:
/** Default constructor: value initialized to 0 */
- PixelValue() noexcept
- : value
- {
- int64_t(0)
- }
+ PixelValue() noexcept : value{int64_t(0)}
{
}
/** Initialize the union with a pixel value of chosen datatype
@@ -49,10 +45,9 @@ public:
* @param[in] datatype DataType that @p v have to be stored
* @param[in] qinfo (Optional) QuantizationInfo to apply in case of quantized data types to @p v
*/
- PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
- : PixelValue()
+ PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) : PixelValue()
{
- switch(datatype)
+ switch (datatype)
{
case DataType::U8:
value.u8 = static_cast(v);
@@ -112,8 +107,7 @@ public:
*
* @param[in] v S8 value.
*/
- PixelValue(int8_t v)
- : PixelValue()
+ PixelValue(int8_t v) : PixelValue()
{
value.s8 = v;
}
@@ -121,8 +115,7 @@ public:
*
* @param[in] v U8 value.
*/
- PixelValue(uint8_t v)
- : PixelValue()
+ PixelValue(uint8_t v) : PixelValue()
{
value.u8 = v;
}
@@ -130,8 +123,7 @@ public:
*
* @param[in] v U16 value.
*/
- PixelValue(uint16_t v)
- : PixelValue()
+ PixelValue(uint16_t v) : PixelValue()
{
value.u16 = v;
}
@@ -139,8 +131,7 @@ public:
*
* @param[in] v S16 value.
*/
- PixelValue(int16_t v)
- : PixelValue()
+ PixelValue(int16_t v) : PixelValue()
{
value.s16 = v;
}
@@ -148,8 +139,7 @@ public:
*
* @param[in] v U32 value.
*/
- PixelValue(uint32_t v)
- : PixelValue()
+ PixelValue(uint32_t v) : PixelValue()
{
value.u32 = v;
}
@@ -157,8 +147,7 @@ public:
*
* @param[in] v S32 value.
*/
- PixelValue(int32_t v)
- : PixelValue()
+ PixelValue(int32_t v) : PixelValue()
{
value.s32 = v;
}
@@ -167,8 +156,7 @@ public:
*
* @param[in] v U64 value.
*/
- PixelValue(uint64_t v)
- : PixelValue()
+ PixelValue(uint64_t v) : PixelValue()
{
value.u64 = v;
}
@@ -176,8 +164,7 @@ public:
*
* @param[in] v S64 value.
*/
- PixelValue(int64_t v)
- : PixelValue()
+ PixelValue(int64_t v) : PixelValue()
{
value.s64 = v;
}
@@ -185,8 +172,7 @@ public:
*
* @param[in] v F16 value.
*/
- PixelValue(bfloat16 v)
- : PixelValue()
+ PixelValue(bfloat16 v) : PixelValue()
{
value.bf16 = v;
}
@@ -194,8 +180,7 @@ public:
*
* @param[in] v F16 value.
*/
- PixelValue(half v)
- : PixelValue()
+ PixelValue(half v) : PixelValue()
{
value.f16 = v;
}
@@ -203,8 +188,7 @@ public:
*
* @param[in] v F32 value.
*/
- PixelValue(float v)
- : PixelValue()
+ PixelValue(float v) : PixelValue()
{
value.f32 = v;
}
@@ -212,8 +196,7 @@ public:
*
* @param[in] v F64 value.
*/
- PixelValue(double v)
- : PixelValue()
+ PixelValue(double v) : PixelValue()
{
value.f64 = v;
}
@@ -221,23 +204,23 @@ public:
* Use the field corresponding to the image format
*/
union
- {
- uint64_t u64; /**< Single channel U64 */
- int64_t s64; /**< Single channel S64 */
- uint8_t rgb[3]; /**< 3 channels: RGB888 */
- uint8_t yuv[3]; /**< 3 channels: Any YUV format */
- uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */
- double f64; /**< Single channel double */
- float f32; /**< Single channel float 32 */
- half f16; /**< Single channel F16 */
- bfloat16 bf16; /**< Single channel brain floating-point number */
- uint8_t u8; /**< Single channel U8 */
- int8_t s8; /**< Single channel S8 */
- uint16_t u16; /**< Single channel U16 */
- int16_t s16; /**< Single channel S16 */
- uint32_t u32; /**< Single channel U32 */
- int32_t s32; /**< Single channel S32 */
- } value;
+ {
+ uint64_t u64; /**< Single channel U64 */
+ int64_t s64; /**< Single channel S64 */
+ uint8_t rgb[3]; /**< 3 channels: RGB888 */
+ uint8_t yuv[3]; /**< 3 channels: Any YUV format */
+ uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */
+ double f64; /**< Single channel double */
+ float f32; /**< Single channel float 32 */
+ half f16; /**< Single channel F16 */
+ bfloat16 bf16; /**< Single channel brain floating-point number */
+ uint8_t u8; /**< Single channel U8 */
+ int8_t s8; /**< Single channel S8 */
+ uint16_t u16; /**< Single channel U16 */
+ int16_t s16; /**< Single channel S16 */
+ uint32_t u32; /**< Single channel U32 */
+ int32_t s32; /**< Single channel S32 */
+ } value;
/** Interpret the pixel value as a U8
*
* @param[out] v Returned value
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index 8fa513eee1e61e442eae70589bcbe6166da27fd3..471b8c57aba6100f8962e270659ebd252968fd57 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/Rounding.h"
#include "arm_compute/core/utils/misc/Utility.h"
+
#include "support/ToolchainSupport.h"
#include
@@ -41,8 +42,7 @@ using qasymm16_t = uint16_t; /**< 16 bit quantized asymmetric scalar value
struct UniformQuantizationInfo
{
/** Default constructor */
- UniformQuantizationInfo()
- : scale(0.f), offset(0)
+ UniformQuantizationInfo() : scale(0.f), offset(0)
{
}
/** Constructor
@@ -50,8 +50,7 @@ struct UniformQuantizationInfo
* @param[in] scale Quantization scale
* @param[in] offset Quantization offset
*/
- UniformQuantizationInfo(float scale, int32_t offset)
- : scale(scale), offset(offset)
+ UniformQuantizationInfo(float scale, int32_t offset) : scale(scale), offset(offset)
{
}
/** Checks if the scale and offset are both zero */
@@ -69,9 +68,7 @@ class QuantizationInfo
{
public:
/** Default constructor */
- QuantizationInfo() noexcept
- : _scale(),
- _offset()
+ QuantizationInfo() noexcept : _scale(), _offset()
{
}
/** Construct quantization info.
@@ -80,8 +77,7 @@ public:
*
* @param[in] scale Scale.
*/
- QuantizationInfo(float scale)
- : _scale(1, scale), _offset()
+ QuantizationInfo(float scale) : _scale(1, scale), _offset()
{
}
/** Construct quantization info.
@@ -91,8 +87,7 @@ public:
* @param[in] scale Scale.
* @param[in] offset Offset.
*/
- QuantizationInfo(float scale, int offset)
- : _scale(1, scale), _offset(1, offset)
+ QuantizationInfo(float scale, int offset) : _scale(1, scale), _offset(1, offset)
{
}
/** Construct quantization info.
@@ -101,8 +96,7 @@ public:
*
* @param[in] scale Scale.
*/
- QuantizationInfo(std::vector scale)
- : _scale(scale), _offset()
+ QuantizationInfo(std::vector scale) : _scale(scale), _offset()
{
}
/** Construct quantization info.
@@ -112,8 +106,7 @@ public:
* @param[in] scale Scale.
* @param[in] offset Offset.
*/
- QuantizationInfo(std::vector scale, std::vector offset)
- : _scale(scale), _offset(offset)
+ QuantizationInfo(std::vector scale, std::vector offset) : _scale(scale), _offset(offset)
{
}
/** Scale vector accessor
@@ -208,8 +201,7 @@ inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantiza
template
struct Qasymm8QuantizationHelper
{
- static_assert(std::is_same::value
- || std::is_same::value,
+ static_assert(std::is_same::value || std::is_same::value,
"quantized type should be either uint8_t or int8_t.");
/** Quantize a value given a 8-bit asymmetric quantization scheme
@@ -234,9 +226,10 @@ struct Qasymm8QuantizationHelper
*
* @return Quantized value
*/
- static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
+ static inline QUANTIZED_TYPE
+ quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
{
- if(rounding_policy == RoundingPolicy::TO_NEAREST_UP)
+ if (rounding_policy == RoundingPolicy::TO_NEAREST_UP)
{
return quantize(value, qinfo);
}
@@ -254,7 +247,8 @@ struct Qasymm8QuantizationHelper
*
* @return Quantized value
*/
- static inline QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+ static inline QUANTIZED_TYPE
+ quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
const UniformQuantizationInfo uqinfo = qinfo.uniform();
ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0);
@@ -297,7 +291,8 @@ struct Qasymm8QuantizationHelper
* @return Quantized value
*/
template
-inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline uint8_t
+quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
return Qasymm8QuantizationHelper::quantize(value, qinfo, rounding_policy);
}
@@ -311,7 +306,9 @@ inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPol
* @return Quantized value
*/
template
-inline int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline int8_t quantize_qasymm8_signed(float value,
+ const INFO_TYPE &qinfo,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
return Qasymm8QuantizationHelper::quantize(value, qinfo, rounding_policy);
}
@@ -441,7 +438,9 @@ inline float dequantize(uint16_t value, float scale, int32_t offset)
*
* @return Quantized value
*/
-inline int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline int16_t quantize_qsymm16(float value,
+ const UniformQuantizationInfo &qinfo,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
int quantized = arm_compute::round(value / qinfo.scale, rounding_policy);
quantized = arm_compute::utility::clamp(quantized);
@@ -492,7 +491,9 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo)
*
* @return Quantized value
*/
-inline uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline uint16_t quantize_qasymm16(float value,
+ const UniformQuantizationInfo &qinfo,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
quantized = arm_compute::utility::clamp(quantized);
@@ -565,7 +566,8 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
* z_n = - z_i * s_i / s_o + z_o
*
*/
-inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out)
+inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in,
+ const UniformQuantizationInfo &uqinfo_out)
{
float scale_to_apply = uqinfo_out.scale;
int32_t offset_to_apply = uqinfo_out.offset;
diff --git a/arm_compute/core/Rounding.h b/arm_compute/core/Rounding.h
index b6817b5107ae5119dc3a3d3867fad39d3878ff4e..30a5a0fe9dbd91989e2a868e827d6ac65da9d5c6 100644
--- a/arm_compute/core/Rounding.h
+++ b/arm_compute/core/Rounding.h
@@ -42,5 +42,5 @@ enum class RoundingPolicy
* @return Rounded value of the argument x.
*/
int round(float x, RoundingPolicy rounding_policy);
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ROUNDING_H */
diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h
index f3e9bea4c7f594cba73c56a6f082c203b450ee8d..672b392050152b24baa89ef19305a8bf7b89e899 100644
--- a/arm_compute/core/Size2D.h
+++ b/arm_compute/core/Size2D.h
@@ -41,9 +41,7 @@ public:
* @param[in] w Width of the image or rectangle
* @param[in] h Height of the image or rectangle
*/
- Size2D(size_t w, size_t h) noexcept
- : width(w),
- height(h)
+ Size2D(size_t w, size_t h) noexcept : width(w), height(h)
{
}
/** The area of the image or rectangle calculated as (width * height)
@@ -90,5 +88,5 @@ public:
size_t width = {}; /**< Width of the image region or rectangle */
size_t height = {}; /**< Height of the image region or rectangle */
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_SIZE2D_H */
diff --git a/arm_compute/core/Size3D.h b/arm_compute/core/Size3D.h
index 4241ed4f7e0afc4ea68ac44e5a2e7f04eefd4658..e2dc6fe012870c8c06b660a46ff87a0d0dc58eb2 100644
--- a/arm_compute/core/Size3D.h
+++ b/arm_compute/core/Size3D.h
@@ -40,8 +40,7 @@ public:
* @param[in] h Height of the 3D shape or object
* @param[in] d Depth of the 3D shape or object
*/
- Size3D(size_t w, size_t h, size_t d) noexcept
- : width(w), height(h), depth(d)
+ Size3D(size_t w, size_t h, size_t d) noexcept : width(w), height(h), depth(d)
{
}
diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h
index 208fc4b294f45545240e5860d78ebd9bfcce7cf0..6b261becc0382d3663e8f6e90d968dfb1cbf6218 100644
--- a/arm_compute/core/Steps.h
+++ b/arm_compute/core/Steps.h
@@ -45,8 +45,7 @@ public:
* @param[in] steps Values to initialize the steps.
*/
template
- Steps(Ts... steps)
- : Dimensions{ steps... }
+ Steps(Ts... steps) : Dimensions{steps...}
{
// Initialize empty dimensions to 1
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
@@ -62,5 +61,5 @@ public:
/** Default destructor */
~Steps() = default;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_STEPS_H*/
diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h
index b582d066f7174475b849852ec9493d0434ee045f..627b219987b054d5e8ca10a2f03583ce3de70233 100644
--- a/arm_compute/core/Strides.h
+++ b/arm_compute/core/Strides.h
@@ -43,8 +43,7 @@ public:
* @param[in] strides Values to initialize the strides.
*/
template
- constexpr Strides(Ts... strides)
- : Dimensions{ strides... }
+ constexpr Strides(Ts... strides) : Dimensions{strides...}
{
}
/** Allow instances of this class to be copy constructed */
diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h
index 21703b0d9359d88d954415f8fc69e7c41061a644..7a3ee2cfd0f2c6588955589d935da03925c05d65 100644
--- a/arm_compute/core/SubTensorInfo.h
+++ b/arm_compute/core/SubTensorInfo.h
@@ -24,10 +24,9 @@
#ifndef ARM_COMPUTE_SUBTENSORINFO_H
#define ARM_COMPUTE_SUBTENSORINFO_H
-#include "arm_compute/core/ITensorInfo.h"
-
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/TensorShape.h"
@@ -73,7 +72,7 @@ public:
// Inherited methods overridden:
std::unique_ptr clone() const override;
- ITensorInfo &set_data_type(DataType data_type) override
+ ITensorInfo &set_data_type(DataType data_type) override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
_parent->set_data_type(data_type);
@@ -143,7 +142,7 @@ public:
return _parent->offset_element_in_bytes(_coords);
}
int32_t offset_element_in_bytes(const Coordinates &pos) const override;
- size_t element_size() const override
+ size_t element_size() const override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _parent->element_size();
@@ -227,7 +226,7 @@ public:
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
// Check if subtensor is valid if parent is configured
- if(_parent->tensor_shape().total_size() != 0)
+ if (_parent->tensor_shape().total_size() != 0)
{
ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region);
}
diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h
index e738a797b298ff3d468a8e55f74fa627350e5ccf..b18f750427a1a57bbac98f469129640fcda1d4ab 100644
--- a/arm_compute/core/TensorInfo.h
+++ b/arm_compute/core/TensorInfo.h
@@ -24,15 +24,14 @@
#ifndef ARM_COMPUTE_TENSORINFO_H
#define ARM_COMPUTE_TENSORINFO_H
-#include "arm_compute/core/ITensorInfo.h"
-
-#include "ITensorInfo.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
+#include "ITensorInfo.h"
#include
#include
@@ -112,7 +111,10 @@ public:
* @param[in] data_type Data type to use for each tensor element
* @param[in] quantization_info The quantization settings for the tensor data.
*/
- TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info);
+ TensorInfo(const TensorShape &tensor_shape,
+ size_t num_channels,
+ DataType data_type,
+ QuantizationInfo quantization_info);
/** Initialize the tensor info with just a format.
*
@@ -136,7 +138,11 @@ public:
* @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element.
* @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element).
*/
- void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes);
+ void init(const TensorShape &tensor_shape,
+ Format format,
+ const Strides &strides_in_bytes,
+ size_t offset_first_element_in_bytes,
+ size_t total_size_in_bytes);
/** Initialize the tensor info with just a format.
*
@@ -164,8 +170,12 @@ public:
* @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element.
* @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element).
*/
- void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes,
- size_t total_size_in_bytes);
+ void init(const TensorShape &tensor_shape,
+ size_t num_channels,
+ DataType data_type,
+ const Strides &strides_in_bytes,
+ size_t offset_first_element_in_bytes,
+ size_t total_size_in_bytes);
/** Initialize the metadata structure for the given tensor shape and single-plane format, (Padding is automatically calculated)
*
* @note The padding used by this method is really conservative so that the tensor can be used for most functions.
@@ -191,19 +201,19 @@ public:
// Inherited methods overridden:
std::unique_ptr clone() const override;
- ITensorInfo &set_data_type(DataType data_type) override;
- ITensorInfo &set_num_channels(int num_channels) override;
- ITensorInfo &set_format(Format format) override;
- ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
- ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override;
- ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override;
- ITensorInfo &set_data_layout(const DataLayout &data_layout) override;
- ITensorInfo &reset_padding() override;
- bool auto_padding() override;
- ITensorInfo &set_lock_paddings(bool flag) override;
- bool lock_paddings() const override;
- bool extend_padding(const PaddingSize &padding) override;
- size_t dimension(size_t index) const override
+ ITensorInfo &set_data_type(DataType data_type) override;
+ ITensorInfo &set_num_channels(int num_channels) override;
+ ITensorInfo &set_format(Format format) override;
+ ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
+ ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override;
+ ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override;
+ ITensorInfo &set_data_layout(const DataLayout &data_layout) override;
+ ITensorInfo &reset_padding() override;
+ bool auto_padding() override;
+ ITensorInfo &set_lock_paddings(bool flag) override;
+ bool lock_paddings() const override;
+ bool extend_padding(const PaddingSize &padding) override;
+ size_t dimension(size_t index) const override
{
return _tensor_shape[index];
}
@@ -220,7 +230,7 @@ public:
return _offset_first_element_in_bytes;
}
int32_t offset_element_in_bytes(const Coordinates &pos) const override;
- size_t element_size() const override
+ size_t element_size() const override
{
return data_size_from_type(_data_type) * _num_channels;
}
@@ -266,7 +276,8 @@ public:
}
bool is_dynamic() const override
{
- return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) != std::cend(_dims_state);
+ return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) !=
+ std::cend(_dims_state);
}
bool are_values_constant() const override
{
@@ -343,11 +354,15 @@ private:
*/
inline bool operator==(const TensorInfo &lhs, const TensorInfo &rhs)
{
- return (lhs._total_size == rhs._total_size) && (lhs._offset_first_element_in_bytes == rhs._offset_first_element_in_bytes) && (lhs._strides_in_bytes == rhs._strides_in_bytes)
- && (lhs._num_channels == rhs._num_channels) && (lhs._tensor_shape == rhs._tensor_shape) && (lhs._dims_state == rhs._dims_state) && (lhs._data_type == rhs._data_type) && (lhs._format == rhs._format)
- && (lhs._is_resizable == rhs._is_resizable) && (lhs._valid_region == rhs._valid_region) && (lhs._padding == rhs._padding) && (lhs._quantization_info == rhs._quantization_info)
- && (lhs._data_layout == rhs._data_layout) && (lhs._are_values_constant == rhs._are_values_constant)
- && (lhs._id == rhs._id);
+ return (lhs._total_size == rhs._total_size) &&
+ (lhs._offset_first_element_in_bytes == rhs._offset_first_element_in_bytes) &&
+ (lhs._strides_in_bytes == rhs._strides_in_bytes) && (lhs._num_channels == rhs._num_channels) &&
+ (lhs._tensor_shape == rhs._tensor_shape) && (lhs._dims_state == rhs._dims_state) &&
+ (lhs._data_type == rhs._data_type) && (lhs._format == rhs._format) &&
+ (lhs._is_resizable == rhs._is_resizable) && (lhs._valid_region == rhs._valid_region) &&
+ (lhs._padding == rhs._padding) && (lhs._quantization_info == rhs._quantization_info) &&
+ (lhs._data_layout == rhs._data_layout) && (lhs._are_values_constant == rhs._are_values_constant) &&
+ (lhs._id == rhs._id);
}
} // namespace arm_compute
#endif /*ARM_COMPUTE_TENSORINFO_H */
diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h
index 4c9186ac64cfb5c847692781de6ab0d7f2faed6a..c1707e262f1e2364d5ce58f173dd58e3771cedbf 100644
--- a/arm_compute/core/TensorShape.h
+++ b/arm_compute/core/TensorShape.h
@@ -44,11 +44,10 @@ public:
* @param[in] dims Values to initialize the dimensions.
*/
template
- TensorShape(Ts... dims)
- : Dimensions{ dims... }
+ TensorShape(Ts... dims) : Dimensions{dims...}
{
// Initialize unspecified dimensions to 1
- if(_num_dimensions > 0)
+ if (_num_dimensions > 0)
{
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
}
@@ -79,7 +78,7 @@ public:
TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true, bool increase_dim_unit = true)
{
// Clear entire shape if one dimension is zero
- if(value == 0)
+ if (value == 0)
{
_num_dimensions = 0;
std::fill(_id.begin(), _id.end(), 0);
@@ -94,7 +93,7 @@ public:
Dimensions::set(dimension, value, increase_dim_unit);
// Correct number dimensions to ignore trailing dimensions of size 1
- if(apply_dim_correction)
+ if (apply_dim_correction)
{
apply_dimension_correction();
}
@@ -123,7 +122,7 @@ public:
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
// Correct number dimensions to ignore trailing dimensions of size 1
- if(apply_dim_correction)
+ if (apply_dim_correction)
{
apply_dimension_correction();
}
@@ -212,26 +211,26 @@ public:
* @return The broadcasted shape or an empty shape if the shapes are not broadcast compatible.
*/
template
- static TensorShape broadcast_shape(const Shapes &... shapes)
+ static TensorShape broadcast_shape(const Shapes &...shapes)
{
TensorShape bc_shape;
- auto broadcast = [&bc_shape](const TensorShape & other)
+ auto broadcast = [&bc_shape](const TensorShape &other)
{
- if(bc_shape.num_dimensions() == 0)
+ if (bc_shape.num_dimensions() == 0)
{
bc_shape = other;
}
- else if(other.num_dimensions() != 0)
+ else if (other.num_dimensions() != 0)
{
- for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+ for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
{
const size_t dim_min = std::min(bc_shape[d], other[d]);
const size_t dim_max = std::max(bc_shape[d], other[d]);
- if((dim_min != 1) && (dim_min != dim_max))
+ if ((dim_min != 1) && (dim_min != dim_max))
{
- bc_shape = TensorShape{ 0U };
+ bc_shape = TensorShape{0U};
break;
}
@@ -249,9 +248,9 @@ private:
/** Remove trailing dimensions of size 1 from the reported number of dimensions. */
void apply_dimension_correction()
{
- for(int i = static_cast(_num_dimensions) - 1; i > 0; --i)
+ for (int i = static_cast(_num_dimensions) - 1; i > 0; --i)
{
- if(_id[i] == 1)
+ if (_id[i] == 1)
{
--_num_dimensions;
}
@@ -262,5 +261,5 @@ private:
}
}
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_TENSORSHAPE_H*/
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 12d860205ead9c91301fc4af498d248e75e0556b..6b51af17d4bb060805864baf4e4a5d48211d41b7 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ACL_ARM_COMPUTE_CORE_TYPES
-#define ACL_ARM_COMPUTE_CORE_TYPES
+#ifndef ACL_ARM_COMPUTE_CORE_TYPES_H
+#define ACL_ARM_COMPUTE_CORE_TYPES_H
/** The following symbols have been moved to:
* half
@@ -59,14 +59,13 @@
/** The following symbols have been moved to:
* MatMulInfo
*/
-#include "arm_compute/function_info/MatMulInfo.h"
-
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Size3D.h"
#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/experimental/IPostOp.h"
#include "arm_compute/core/utils/misc/Macros.h"
+#include "arm_compute/function_info/MatMulInfo.h"
+
#include "support/Bfloat16.h"
#include
@@ -144,8 +143,7 @@ enum class ComparisonOperation
struct ValidRegion
{
/** Default constructor */
- ValidRegion()
- : anchor{}, shape{}
+ ValidRegion() : anchor{}, shape{}
{
}
@@ -166,8 +164,7 @@ struct ValidRegion
* @param[in] a_shape Shape of the valid region.
*
*/
- ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape)
- : anchor{ an_anchor }, shape{ a_shape }
+ ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) : anchor{an_anchor}, shape{a_shape}
{
anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions()));
}
@@ -180,7 +177,7 @@ struct ValidRegion
*
*/
ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions)
- : anchor{ an_anchor }, shape{ a_shape }
+ : anchor{an_anchor}, shape{a_shape}
{
ARM_COMPUTE_ERROR_ON(num_dimensions < std::max(anchor.num_dimensions(), shape.num_dimensions()));
anchor.set_num_dimensions(num_dimensions);
@@ -242,32 +239,24 @@ enum class BorderMode
struct BorderSize
{
/** Empty border, i.e. no border */
- constexpr BorderSize() noexcept
- : top{ 0 },
- right{ 0 },
- bottom{ 0 },
- left{ 0 }
+ constexpr BorderSize() noexcept : top{0}, right{0}, bottom{0}, left{0}
{
}
/** Border with equal size around the 2D plane */
- explicit constexpr BorderSize(unsigned int size) noexcept
- : top{ size },
- right{ size },
- bottom{ size },
- left{ size }
+ explicit constexpr BorderSize(unsigned int size) noexcept : top{size}, right{size}, bottom{size}, left{size}
{
}
/** Border with same size for top/bottom and left/right */
constexpr BorderSize(unsigned int top_bottom, unsigned int left_right)
- : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right }
+ : top{top_bottom}, right{left_right}, bottom{top_bottom}, left{left_right}
{
}
/** Border with different sizes */
constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left)
- : top{ top }, right{ right }, bottom{ bottom }, left{ left }
+ : top{top}, right{right}, bottom{bottom}, left{left}
{
}
@@ -372,7 +361,7 @@ enum class InterpolationPolicy
{
NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */
BILINEAR, /**< Output values are defined by bilinear interpolation between the pixels */
- AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
+ AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
};
/** Bilinear Interpolation method used by LKTracker */
@@ -479,12 +468,12 @@ enum class NormType
*/
struct DetectionWindow
{
- uint16_t x{ 0 }; /**< Top-left x coordinate */
- uint16_t y{ 0 }; /**< Top-left y coordinate */
- uint16_t width{ 0 }; /**< Width of the detection window */
- uint16_t height{ 0 }; /**< Height of the detection window */
- uint16_t idx_class{ 0 }; /**< Index of the class */
- float score{ 0.f }; /**< Confidence value for the detection window */
+ uint16_t x{0}; /**< Top-left x coordinate */
+ uint16_t y{0}; /**< Top-left y coordinate */
+ uint16_t width{0}; /**< Width of the detection window */
+ uint16_t height{0}; /**< Height of the detection window */
+ uint16_t idx_class{0}; /**< Index of the class */
+ float score{0.f}; /**< Confidence value for the detection window */
};
/** Available pooling types */
@@ -521,12 +510,28 @@ public:
* @param[in] im_width (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1
* @param[in] im_height (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1
*/
- BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f,
- int detections = 100, bool soft_nms_enabled = false,
- NMSType soft_nms_method = NMSType::LINEAR,
- float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f)
- : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma),
- _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height)
+ BoxNMSLimitInfo(float score_thresh = 0.05f,
+ float nms = 0.3f,
+ int detections = 100,
+ bool soft_nms_enabled = false,
+ NMSType soft_nms_method = NMSType::LINEAR,
+ float soft_nms_sigma = 0.5f,
+ float soft_nms_min_score_thres = 0.001f,
+ bool suppress_size = false,
+ float min_size = 1.0f,
+ float im_width = 1.0f,
+ float im_height = 1.0f)
+ : _score_thresh(score_thresh),
+ _nms(nms),
+ _detections_per_im(detections),
+ _soft_nms_enabled(soft_nms_enabled),
+ _soft_nms_method(soft_nms_method),
+ _soft_nms_sigma(soft_nms_sigma),
+ _soft_nms_min_score_thres(soft_nms_min_score_thres),
+ _suppress_size(suppress_size),
+ _min_size(min_size),
+ _im_width(im_width),
+ _im_height(im_height)
{
}
/** Get the score threshold */
@@ -604,14 +609,13 @@ private:
struct Padding2D
{
Padding2D() = default;
- Padding2D(size_t left, size_t right, size_t top, size_t bottom)
- : left(left), right(right), top(top), bottom(bottom)
+ Padding2D(size_t left, size_t right, size_t top, size_t bottom) : left(left), right(right), top(top), bottom(bottom)
{
}
- size_t left = { 0 }; /**< Padding across the width dimension on the left, in elements. */
- size_t right = { 0 }; /**< Padding across the width dimension on the right, in elements. */
- size_t top = { 0 }; /**< Padding across the height dimension on the top, in elements. */
- size_t bottom = { 0 }; /**< Padding across the height dimension on the bottom, in elements. */
+ size_t left = {0}; /**< Padding across the width dimension on the left, in elements. */
+ size_t right = {0}; /**< Padding across the width dimension on the right, in elements. */
+ size_t top = {0}; /**< Padding across the height dimension on the top, in elements. */
+ size_t bottom = {0}; /**< Padding across the height dimension on the bottom, in elements. */
};
/** Padding information for 3D operations like Conv3d */
@@ -631,12 +635,12 @@ struct Padding3D
{
}
- size_t left = { 0 }; /**< Padding across the width dimenstion on the left, in elements. */
- size_t right = { 0 }; /**< Padding across the width dimenstion on the right, in elements. */
- size_t top = { 0 }; /**< Padding across the height dimenstion on the top, in elements. */
- size_t bottom = { 0 }; /**< Padding across the height dimenstion on the bottom, in elements. */
- size_t front = { 0 }; /**< Padding across the depth dimenstion on the front, in elements. */
- size_t back = { 0 }; /**< Padding across the depth dimenstion on the back, in elements. */
+ size_t left = {0}; /**< Padding across the width dimenstion on the left, in elements. */
+ size_t right = {0}; /**< Padding across the width dimenstion on the right, in elements. */
+ size_t top = {0}; /**< Padding across the height dimenstion on the top, in elements. */
+ size_t bottom = {0}; /**< Padding across the height dimenstion on the bottom, in elements. */
+ size_t front = {0}; /**< Padding across the depth dimenstion on the front, in elements. */
+ size_t back = {0}; /**< Padding across the depth dimenstion on the back, in elements. */
};
/** PriorBox layer info */
@@ -668,9 +672,15 @@ public:
* @param[in] img_size (Optional) Image size.
* @param[in] steps (Optional) Step values.
*/
- PriorBoxLayerInfo(const std::vector &min_sizes, const std::vector &variances, float offset, bool flip = true, bool clip = false,
- const std::vector &max_sizes = {}, const std::vector &aspect_ratios = {},
- const Coordinates2D &img_size = Coordinates2D{ 0, 0 }, const std::array &steps = { { 0.f, 0.f } })
+ PriorBoxLayerInfo(const std::vector &min_sizes,
+ const std::vector &variances,
+ float offset,
+ bool flip = true,
+ bool clip = false,
+ const std::vector &max_sizes = {},
+ const std::vector &aspect_ratios = {},
+ const Coordinates2D &img_size = Coordinates2D{0, 0},
+ const std::array &steps = {{0.f, 0.f}})
: _min_sizes(min_sizes),
_variances(variances),
_offset(offset),
@@ -682,22 +692,22 @@ public:
_steps(steps)
{
_aspect_ratios.push_back(1.);
- for(unsigned int i = 0; i < aspect_ratios.size(); ++i)
+ for (unsigned int i = 0; i < aspect_ratios.size(); ++i)
{
float ar = aspect_ratios[i];
bool already_exist = false;
- for(auto ar_new : _aspect_ratios)
+ for (auto ar_new : _aspect_ratios)
{
- if(fabs(ar - ar_new) < 1e-6)
+ if (fabs(ar - ar_new) < 1e-6)
{
already_exist = true;
break;
}
}
- if(!already_exist)
+ if (!already_exist)
{
_aspect_ratios.push_back(ar);
- if(flip)
+ if (flip)
{
_aspect_ratios.push_back(1.f / ar);
}
@@ -751,14 +761,14 @@ public:
}
private:
- std::vector _min_sizes;
- std::vector _variances;
- float _offset;
- bool _flip;
- bool _clip;
- std::vector _max_sizes;
- std::vector _aspect_ratios;
- Coordinates2D _img_size;
+ std::vector _min_sizes;
+ std::vector _variances;
+ float _offset;
+ bool _flip;
+ bool _clip;
+ std::vector _max_sizes;
+ std::vector _aspect_ratios;
+ Coordinates2D _img_size;
std::array _steps;
};
@@ -809,8 +819,16 @@ public:
* @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false.
* @param[in] eta (Optional) Eta.
*/
- DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1,
- float confidence_threshold = std::numeric_limits::lowest(), bool variance_encoded_in_target = false, float eta = 1)
+ DetectionOutputLayerInfo(int num_classes,
+ bool share_location,
+ DetectionOutputLayerCodeType code_type,
+ int keep_top_k,
+ float nms_threshold,
+ int top_k = -1,
+ int background_label_id = -1,
+ float confidence_threshold = std::numeric_limits::lowest(),
+ bool variance_encoded_in_target = false,
+ float eta = 1)
: _num_classes(num_classes),
_share_location(share_location),
_code_type(code_type),
@@ -924,8 +942,15 @@ public:
* @param[in] detection_per_class (Optional) Number of detection per class. Used in the Regular Non-Max-Suppression. Defaults to 100.
* @param[in] dequantize_scores (Optional) If the scores need to be dequantized. Defaults to true.
*/
- DetectionPostProcessLayerInfo(unsigned int max_detections, unsigned int max_classes_per_detection, float nms_score_threshold, float iou_threshold, unsigned int num_classes,
- std::array scales_values, bool use_regular_nms = false, unsigned int detection_per_class = 100, bool dequantize_scores = true)
+ DetectionPostProcessLayerInfo(unsigned int max_detections,
+ unsigned int max_classes_per_detection,
+ float nms_score_threshold,
+ float iou_threshold,
+ unsigned int num_classes,
+ std::array scales_values,
+ bool use_regular_nms = false,
+ unsigned int detection_per_class = 100,
+ bool dequantize_scores = true)
: _max_detections(max_detections),
_max_classes_per_detection(max_classes_per_detection),
_nms_score_threshold(nms_score_threshold),
@@ -1003,15 +1028,15 @@ public:
}
private:
- unsigned int _max_detections;
- unsigned int _max_classes_per_detection;
- float _nms_score_threshold;
- float _iou_threshold;
- unsigned int _num_classes;
+ unsigned int _max_detections;
+ unsigned int _max_classes_per_detection;
+ float _nms_score_threshold;
+ float _iou_threshold;
+ unsigned int _num_classes;
std::array _scales_values;
- bool _use_regular_nms;
- unsigned int _detection_per_class;
- bool _dequantize_scores;
+ bool _use_regular_nms;
+ unsigned int _detection_per_class;
+ bool _dequantize_scores;
};
/** Pooling Layer Information struct*/
@@ -1241,8 +1266,14 @@ public:
* @param[in] spatial_scale Spatial scale to be applied to the ROI coordinates and dimensions.
* @param[in] sampling_ratio Number of samples to include in each pooling region (if set to zero, a ceil(roi_dims/pooling_dims))
*/
- ROIPoolingLayerInfo(unsigned int pooled_width, unsigned int pooled_height, float spatial_scale, unsigned int sampling_ratio = 0)
- : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale), _sampling_ratio(sampling_ratio)
+ ROIPoolingLayerInfo(unsigned int pooled_width,
+ unsigned int pooled_height,
+ float spatial_scale,
+ unsigned int sampling_ratio = 0)
+ : _pooled_width(pooled_width),
+ _pooled_height(pooled_height),
+ _spatial_scale(spatial_scale),
+ _sampling_ratio(sampling_ratio)
{
}
/** Get the pooled width of the layer */
@@ -1289,10 +1320,24 @@ public:
* @param[in] min_size (Optional)Size used to validate the anchors produced. Defaults to 16.
* @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4.
*/
- GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0,
+ GenerateProposalsInfo(float im_width,
+ float im_height,
+ float im_scale,
+ float spatial_scale = 1.0,
+ int pre_nms_topN = 6000,
+ int post_nms_topN = 300,
+ float nms_thres = 0.7,
+ float min_size = 16.0,
size_t values_per_roi = 4)
- : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres),
- _min_size(min_size), _values_per_roi(values_per_roi)
+ : _im_height(im_height),
+ _im_width(im_width),
+ _im_scale(im_scale),
+ _spatial_scale(spatial_scale),
+ _pre_nms_topN(pre_nms_topN),
+ _post_nms_topN(post_nms_topN),
+ _nms_thres(nms_thres),
+ _min_size(min_size),
+ _values_per_roi(values_per_roi)
{
}
@@ -1418,11 +1463,20 @@ public:
* @param[in] correct_transform_coords (Optional)Correct bounding box transform coordinates. Defaults to false
* @param[in] bbox_xform_clip (Optional)Minimum bounding box width and height after bounding box transformation in log-space. Defaults to log(1000/16)
*/
- BoundingBoxTransformInfo(float img_width, float img_height, float scale, bool apply_scale = false, const std::array weights = { { 1.f, 1.f, 1.f, 1.f } }, bool correct_transform_coords =
- false,
- float bbox_xform_clip =
- 4.135166556742356f)
- : _img_width(img_width), _img_height(img_height), _scale(scale), _apply_scale(apply_scale), _correct_transform_coords(correct_transform_coords), _weights(weights), _bbox_xform_clip(bbox_xform_clip)
+ BoundingBoxTransformInfo(float img_width,
+ float img_height,
+ float scale,
+ bool apply_scale = false,
+ const std::array weights = {{1.f, 1.f, 1.f, 1.f}},
+ bool correct_transform_coords = false,
+ float bbox_xform_clip = 4.135166556742356f)
+ : _img_width(img_width),
+ _img_height(img_height),
+ _scale(scale),
+ _apply_scale(apply_scale),
+ _correct_transform_coords(correct_transform_coords),
+ _weights(weights),
+ _bbox_xform_clip(bbox_xform_clip)
{
}
@@ -1462,13 +1516,13 @@ public:
}
private:
- float _img_width;
- float _img_height;
- float _scale;
- bool _apply_scale;
- bool _correct_transform_coords;
+ float _img_width;
+ float _img_height;
+ float _scale;
+ bool _apply_scale;
+ bool _correct_transform_coords;
std::array _weights;
- float _bbox_xform_clip;
+ float _bbox_xform_clip;
};
/** Normalization Layer Information class */
@@ -1485,7 +1539,12 @@ public:
* @param[in] is_scaled (Optional) Boolean that specifies if alpha will be scaled by the normalization size or not.
* Should be false to follow [Krichevksy 2012].
*/
- NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f, bool is_scaled = true)
+ NormalizationLayerInfo(NormType type,
+ uint32_t norm_size = 5,
+ float alpha = 0.0001f,
+ float beta = 0.5f,
+ float kappa = 1.f,
+ bool is_scaled = true)
: _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled)
{
}
@@ -1613,7 +1672,12 @@ class WeightsInfo
public:
/** Default constructor */
WeightsInfo()
- : _are_reshaped(false), _kernel_width(0), _kernel_height(0), _num_kernels(0), _retain_internal_weights(false), _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
+ : _are_reshaped(false),
+ _kernel_width(0),
+ _kernel_height(0),
+ _num_kernels(0),
+ _retain_internal_weights(false),
+ _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
{
}
/** Constructor
@@ -1625,9 +1689,18 @@ public:
* @param[in] retain_internal_weights (Optional) True if internal reshaped weights must be retained. Used for reconfiguration purposes. Default is false.
* @param[in] weight_format (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
*/
- WeightsInfo(bool are_reshaped, unsigned int kernel_width, unsigned int kernel_height, unsigned int num_kernels, bool retain_internal_weights = false,
- arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED)
- : _are_reshaped(are_reshaped), _kernel_width(kernel_width), _kernel_height(kernel_height), _num_kernels(num_kernels), _retain_internal_weights(retain_internal_weights), _weight_format(weight_format)
+ WeightsInfo(bool are_reshaped,
+ unsigned int kernel_width,
+ unsigned int kernel_height,
+ unsigned int num_kernels,
+ bool retain_internal_weights = false,
+ arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED)
+ : _are_reshaped(are_reshaped),
+ _kernel_width(kernel_width),
+ _kernel_height(kernel_height),
+ _num_kernels(num_kernels),
+ _retain_internal_weights(retain_internal_weights),
+ _weight_format(weight_format)
{
}
/** Flag which specifies if the weights tensor has been reshaped.
@@ -1699,7 +1772,14 @@ class GEMMReshapeInfo final
public:
/** Default constructor */
GEMMReshapeInfo()
- : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _broadcast_bias(false)
+ : _m(1),
+ _n(1),
+ _k(1),
+ _mult_transpose1xW_width(1),
+ _mult_interleave4x4_height(1),
+ _depth_output_gemm3d(0),
+ _reinterpret_input_as_3d(false),
+ _broadcast_bias(false)
{
}
/** Constructor
@@ -1715,9 +1795,22 @@ public:
* to perform 1x1 convolutions with the NHWC data layout)
* @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
*/
- GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool broadcast_bias = false)
- : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d),
- _reinterpret_input_as_3d(reinterpret_input_as_3d), _broadcast_bias(broadcast_bias)
+ GEMMReshapeInfo(int m,
+ int n,
+ int k,
+ int mult_transpose1xW_width = 1,
+ int mult_interleave4x4_height = 1,
+ int depth_output_gemm3d = 0,
+ bool reinterpret_input_as_3d = false,
+ bool broadcast_bias = false)
+ : _m(m),
+ _n(n),
+ _k(k),
+ _mult_transpose1xW_width(mult_transpose1xW_width),
+ _mult_interleave4x4_height(mult_interleave4x4_height),
+ _depth_output_gemm3d(depth_output_gemm3d),
+ _reinterpret_input_as_3d(reinterpret_input_as_3d),
+ _broadcast_bias(broadcast_bias)
{
}
/** Number of matrix A rows
@@ -1807,11 +1900,11 @@ struct GEMMLHSMatrixInfo
: m0(m), k0(k), v0(v), transpose(trans), interleave(inter)
{
}
- unsigned int m0{ 1 }; /**< Number of rows processed by the matrix multiplication */
- unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */
- unsigned int v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
- bool transpose{ true }; /**< True if the (m0xk0) block has to be transposed before been stored */
- bool interleave{ true }; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
+ unsigned int m0{1}; /**< Number of rows processed by the matrix multiplication */
+ unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */
+ unsigned int v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
+ bool transpose{true}; /**< True if the (m0xk0) block has to be transposed before been stored */
+ bool interleave{true}; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
};
/** GEMM RHS (Right Hand Side) matrix information */
@@ -1822,12 +1915,13 @@ struct GEMMRHSMatrixInfo
: n0(n), k0(k), h0(h), transpose(trans), interleave(inter), export_to_cl_image(export_to_cl_img)
{
}
- unsigned int n0{ 1 }; /**< Number of columns processed by the matrix multiplication */
- unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */
- unsigned int h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
- bool transpose{ true }; /**< True if the (k0xn0) block has to be transposed before been stored */
- bool interleave{ true }; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
- bool export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
+ unsigned int n0{1}; /**< Number of columns processed by the matrix multiplication */
+ unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */
+ unsigned int h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+ bool transpose{true}; /**< True if the (k0xn0) block has to be transposed before been stored */
+ bool interleave{true}; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
+ bool export_to_cl_image{
+ false}; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
};
class ITensorInfo;
@@ -1843,16 +1937,23 @@ struct WinogradInfo
* @param[in] conv_info Convolution info (Pads, strides)
* @param[in] data_layout Data layout to use for the output tensor once the convolution has been applied
*/
- WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
- : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout)
- {
- }
-
- Size2D output_tile_size{}; /**< Width and height of the output tile */
- Size2D kernel_size{}; /**< Width and height of the kernel*/
- Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */
- PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */
- DataLayout output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
+ WinogradInfo(
+ Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
+ : output_tile_size(output_tile_sz),
+ kernel_size(kernel_sz),
+ input_dimensions(input_dims),
+ convolution_info(conv_info),
+ output_data_layout(data_layout)
+ {
+ }
+
+ Size2D output_tile_size{}; /**< Width and height of the output tile */
+ Size2D kernel_size{}; /**< Width and height of the kernel*/
+ Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */
+ PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */
+ DataLayout output_data_layout{
+ DataLayout::
+ NCHW}; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
};
/** IO formatting information class*/
@@ -1915,4 +2016,4 @@ struct IOFormatInfo
/** Class for holding information related to cropping */
using CropInfo = Padding2D;
} // namespace arm_compute
-#endif /* ACL_ARM_COMPUTE_CORE_TYPES */
+#endif // ACL_ARM_COMPUTE_CORE_TYPES_H
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index c5b50167bf6eaecd4b3d2f471dc6ac3877adc3c2..a2146522f76858a8fccbfa4dba06e60b522485fe 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -69,7 +69,7 @@ template
inline void permute_strides(Dimensions &dimensions, const PermutationVector &perm)
{
const auto old_dim = utility::make_array::num_max_dimensions>(dimensions.begin(), dimensions.end());
- for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
{
T dimension_val = old_dim[i];
dimensions.set(perm[i], dimension_val);
@@ -87,7 +87,11 @@ inline void permute_strides(Dimensions &dimensions, const PermutationVector &
*
* @return PadStrideInfo for SAME padding
*/
-PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout = DataLayout::NCHW, const Size2D &dilation = Size2D(1u, 1u),
+PadStrideInfo calculate_same_pad(TensorShape input_shape,
+ TensorShape weights_shape,
+ PadStrideInfo conv_info,
+ DataLayout data_layout = DataLayout::NCHW,
+ const Size2D &dilation = Size2D(1u, 1u),
const DimensionRoundingType &rounding_type = DimensionRoundingType::FLOOR);
/** Returns expected width and height of the deconvolution's output tensor.
@@ -100,8 +104,10 @@ PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_sh
*
* @return A pair with the new width in the first position and the new height in the second.
*/
-std::pair deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
- unsigned int kernel_width, unsigned int kernel_height,
+std::pair deconvolution_output_dimensions(unsigned int in_width,
+ unsigned int in_height,
+ unsigned int kernel_width,
+ unsigned int kernel_height,
const PadStrideInfo &pad_stride_info);
/** Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
@@ -115,8 +121,10 @@ std::pair deconvolution_output_dimensions(unsigned i
*
* @return A pair with the new width in the first position and the new height in the second.
*/
-std::pair scaled_dimensions(int width, int height,
- int kernel_width, int kernel_height,
+std::pair scaled_dimensions(int width,
+ int height,
+ int kernel_width,
+ int kernel_height,
const PadStrideInfo &pad_stride_info,
const Size2D &dilation = Size2D(1U, 1U));
@@ -130,9 +138,8 @@ std::pair scaled_dimensions(int width, int height,
*
* @return A pair with the new width in the first position and the new height in the second, returned values can be < 1
*/
-std::pair scaled_dimensions_signed(int width, int height,
- int kernel_width, int kernel_height,
- const PadStrideInfo &pad_stride_info);
+std::pair scaled_dimensions_signed(
+ int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info);
/** Returns calculated width, height and depth of output scaled tensor depending on dimensions rounding mode.
*
@@ -147,8 +154,12 @@ std::pair scaled_dimensions_signed(int width, int height,
* @return A tuple with the new width in the first position, the new height in the second, and the new depth in the third.
* Returned values can be < 1
*/
-std::tuple scaled_3d_dimensions_signed(int width, int height, int depth,
- int kernel_width, int kernel_height, int kernel_depth,
+std::tuple scaled_3d_dimensions_signed(int width,
+ int height,
+ int depth,
+ int kernel_width,
+ int kernel_height,
+ int kernel_depth,
const Pooling3dLayerInfo &pool3d_info);
/** Check if the given reduction operation should be handled in a serial way.
@@ -178,7 +189,9 @@ QuantizationInfo get_softmax_output_quantization_info(DataType input_type, bool
*
* @return The pair with minimum and maximum values
*/
-std::pair get_quantized_activation_min_max(const ActivationLayerInfo &act_info, DataType data_type, UniformQuantizationInfo oq_info);
+std::pair get_quantized_activation_min_max(const ActivationLayerInfo &act_info,
+ DataType data_type,
+ UniformQuantizationInfo oq_info);
/** Convert a channel identity into a string.
*
@@ -295,26 +308,27 @@ inline size_t num_of_elements_in_range(const float start, const float end, const
* @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter
*/
template
-void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ")
+void print_consecutive_elements_impl(
+ std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ")
{
using print_type = typename std::conditional::value, T, int>::type;
std::ios stream_status(nullptr);
stream_status.copyfmt(s);
- for(unsigned int i = 0; i < n; ++i)
+ for (unsigned int i = 0; i < n; ++i)
{
// Set stream width as it is not a "sticky" stream manipulator
- if(stream_width != 0)
+ if (stream_width != 0)
{
s.width(stream_width);
}
- if(std::is_same::type, half>::value)
+ if (std::is_same::type, half>::value)
{
// We use T instead of print_type here is because the std::is_floating_point returns false and then the print_type becomes int.
s << std::right << static_cast(ptr[i]) << element_delim;
}
- else if(std::is_same::type, bfloat16>::value)
+ else if (std::is_same::type, bfloat16>::value)
{
// We use T instead of print_type here is because the std::is_floating_point returns false and then the print_type becomes int.
s << std::right << float(ptr[i]) << element_delim;
@@ -343,17 +357,17 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
using print_type = typename std::conditional::value, T, int>::type;
int max_width = -1;
- for(unsigned int i = 0; i < n; ++i)
+ for (unsigned int i = 0; i < n; ++i)
{
std::stringstream ss;
ss.copyfmt(s);
- if(std::is_same::type, half>::value)
+ if (std::is_same::type, half>::value)
{
// We use T instead of print_type here is because the std::is_floating_point returns false and then the print_type becomes int.
ss << static_cast(ptr[i]);
}
- else if(std::is_same::type, bfloat16>::value)
+ else if (std::is_same::type, bfloat16>::value)
{
// We use T instead of print_type here is because the std::is_floating_point returns false and then the print_type becomes int.
ss << float(ptr[i]);
@@ -377,7 +391,12 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
* @param[in] stream_width (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0.
* @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter
*/
-void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim = " ");
+void print_consecutive_elements(std::ostream &s,
+ DataType dt,
+ const uint8_t *ptr,
+ unsigned int n,
+ int stream_width,
+ const std::string &element_delim = " ");
/** Identify the maximum width of n consecutive elements.
*
@@ -390,5 +409,5 @@ void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr
*/
int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n);
#endif /* ARM_COMPUTE_ASSERTS_ENABLED */
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_UTILS_H */
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index 5bffc16f3b56831fed298cc8ac39c582df4e2b5a..5550560affd059612361a4e96a367060edbe830f 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -24,13 +24,13 @@
#ifndef ARM_COMPUTE_VALIDATE_H
#define ARM_COMPUTE_VALIDATE_H
-#include "arm_compute/core/utils/DataLayoutUtils.h"
-#include "arm_compute/core/utils/DataTypeUtils.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/utils/FormatUtils.h"
#include "arm_compute/core/IKernel.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/core/utils/DataLayoutUtils.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
+#include "arm_compute/core/utils/FormatUtils.h"
#include "arm_compute/core/Window.h"
#include
@@ -50,9 +50,9 @@ namespace detail
template
inline bool have_different_dimensions(const Dimensions &dim1, const Dimensions &dim2, unsigned int upper_dim)
{
- for(unsigned int i = upper_dim; i < arm_compute::Dimensions::num_max_dimensions; ++i)
+ for (unsigned int i = upper_dim; i < arm_compute::Dimensions::num_max_dimensions; ++i)
{
- if(dim1[i] != dim2[i])
+ if (dim1[i] != dim2[i])
{
return true;
}
@@ -80,7 +80,7 @@ public:
* @param[in] line Source code line. Used for error reporting.
*/
compare_dimension(const Dimensions &dim, const char *function, const char *file, int line)
- : _dim{ dim }, _function{ function }, _file{ file }, _line{ line }
+ : _dim{dim}, _function{function}, _file{file}, _line{line}
{
}
@@ -111,7 +111,7 @@ inline arm_compute::Status for_each_error(F &&)
}
template
-inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&... args)
+inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&...args)
{
ARM_COMPUTE_RETURN_ON_ERROR(func(arg));
ARM_COMPUTE_RETURN_ON_ERROR(for_each_error(func, args...));
@@ -148,13 +148,11 @@ struct get_tensor_info_t
* @return Status
*/
template
-inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&... pointers)
+inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&...pointers)
{
- const std::array pointers_array{ { std::forward(pointers)... } };
- bool has_nullptr = std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr)
- {
- return (ptr == nullptr);
- });
+ const std::array pointers_array{{std::forward(pointers)...}};
+ bool has_nullptr =
+ std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr) { return (ptr == nullptr); });
ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(has_nullptr, function, file, line, "Nullptr object!");
return arm_compute::Status{};
}
@@ -178,8 +176,8 @@ inline arm_compute::Status error_on_nullptr(const char *function, const char *fi
*
* @return Status
*/
-arm_compute::Status error_on_mismatching_windows(const char *function, const char *file, const int line,
- const Window &full, const Window &win);
+arm_compute::Status error_on_mismatching_windows(
+ const char *function, const char *file, const int line, const Window &full, const Window &win);
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_windows(__func__, __FILE__, __LINE__, f, w))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_WINDOWS(f, w) \
@@ -200,8 +198,8 @@ arm_compute::Status error_on_mismatching_windows(const char *function, const cha
*
* @return Status
*/
-arm_compute::Status error_on_invalid_subwindow(const char *function, const char *file, const int line,
- const Window &full, const Window &sub);
+arm_compute::Status error_on_invalid_subwindow(
+ const char *function, const char *file, const int line, const Window &full, const Window &sub);
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subwindow(__func__, __FILE__, __LINE__, f, s))
#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBWINDOW(f, s) \
@@ -220,12 +218,14 @@ arm_compute::Status error_on_invalid_subwindow(const char *function, const char
*
* @return Status
*/
-arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *function, const char *file, const int line,
- const Window &full, const Window &window, const int dim);
+arm_compute::Status error_on_window_not_collapsable_at_dimension(
+ const char *function, const char *file, const int line, const Window &full, const Window &window, const int dim);
#define ARM_COMPUTE_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
#define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
/** Return an error if the passed coordinates have too many dimensions.
*
@@ -239,8 +239,8 @@ arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *fun
*
* @return Status
*/
-arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, const char *file, const int line,
- const Coordinates &pos, unsigned int max_dim);
+arm_compute::Status error_on_coordinates_dimensions_gte(
+ const char *function, const char *file, const int line, const Coordinates &pos, unsigned int max_dim);
#define ARM_COMPUTE_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_coordinates_dimensions_gte(__func__, __FILE__, __LINE__, p, md))
#define ARM_COMPUTE_RETURN_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \
@@ -258,8 +258,8 @@ arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, co
*
* @return Status
*/
-arm_compute::Status error_on_window_dimensions_gte(const char *function, const char *file, const int line,
- const Window &win, unsigned int max_dim);
+arm_compute::Status error_on_window_dimensions_gte(
+ const char *function, const char *file, const int line, const Window &win, unsigned int max_dim);
#define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md))
#define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \
@@ -277,16 +277,23 @@ arm_compute::Status error_on_window_dimensions_gte(const char *function, const c
* @return Status
*/
template
-arm_compute::Status error_on_mismatching_dimensions(const char *function, const char *file, int line,
- const Dimensions &dim1, const Dimensions &dim2, Ts &&... dims)
+arm_compute::Status error_on_mismatching_dimensions(const char *function,
+ const char *file,
+ int line,
+ const Dimensions &dim1,
+ const Dimensions &dim2,
+ Ts &&...dims)
{
- ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension(dim1, function, file, line), dim2, std::forward(dims)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension(dim1, function, file, line), dim2,
+ std::forward(dims)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return true if the given format has horizontal subsampling.
*
@@ -296,7 +303,10 @@ arm_compute::Status error_on_mismatching_dimensions(const char *function, const
*/
inline bool has_format_horizontal_subsampling(Format format)
{
- return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
+ return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 ||
+ format == Format::NV21 || format == Format::IYUV || format == Format::UV88)
+ ? true
+ : false;
}
/** Return true if the given format has vertical subsampling.
@@ -307,7 +317,9 @@ inline bool has_format_horizontal_subsampling(Format format)
*/
inline bool has_format_vertical_subsampling(Format format)
{
- return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
+ return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88)
+ ? true
+ : false;
}
/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats.
@@ -325,16 +337,16 @@ inline bool has_format_vertical_subsampling(Format format)
*/
inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
{
- TensorShape output{ shape };
+ TensorShape output{shape};
// Force width to be even for formats which require subsampling of the U and V channels
- if(has_format_horizontal_subsampling(format))
+ if (has_format_horizontal_subsampling(format))
{
output.set(0, (output.x() + 1) & ~1U);
}
// Force height to be even for formats which require subsampling of the U and V channels
- if(has_format_vertical_subsampling(format))
+ if (has_format_vertical_subsampling(format))
{
output.set(1, (output.y() + 1) & ~1U);
}
@@ -354,18 +366,20 @@ inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
* @return Status
*/
template
-arm_compute::Status error_on_tensors_not_even(const char *function, const char *file, int line,
- const Format &format, const ITensor *tensor1, Ts... tensors)
+arm_compute::Status error_on_tensors_not_even(
+ const char *function, const char *file, int line, const Format &format, const ITensor *tensor1, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward(tensors)...));
- const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward(tensors)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor)
- {
- const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format);
- return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2);
- }),
- function, file, line, "Tensor shape has odd dimensions");
+ const std::array tensors_info_array{{tensor1, std::forward(tensors)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+ std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(),
+ [&](const ITensor *tensor)
+ {
+ const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format);
+ return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2);
+ }),
+ function, file, line, "Tensor shape has odd dimensions");
return arm_compute::Status{};
}
@@ -382,21 +396,22 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char *
*
* @return The subsampled tensor shape.
*/
-inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
+inline TensorShape
+calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
{
- TensorShape output{ shape };
+ TensorShape output{shape};
// Subsample shape only for U or V channel
- if(Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
+ if (Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
{
// Subsample width for the tensor shape when channel is U or V
- if(has_format_horizontal_subsampling(format))
+ if (has_format_horizontal_subsampling(format))
{
output.set(0, output.x() / 2U);
}
// Subsample height for the tensor shape when channel is U or V
- if(has_format_vertical_subsampling(format))
+ if (has_format_vertical_subsampling(format))
{
output.set(1, output.y() / 2U);
}
@@ -418,25 +433,32 @@ inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format f
* @return Status
*/
template
-arm_compute::Status error_on_tensors_not_subsampled(const char *function, const char *file, int line,
- const Format &format, const TensorShape &shape, const ITensor *tensor1, Ts... tensors)
+arm_compute::Status error_on_tensors_not_subsampled(const char *function,
+ const char *file,
+ int line,
+ const Format &format,
+ const TensorShape &shape,
+ const ITensor *tensor1,
+ Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward(tensors)...));
- const TensorShape sub2_shape = calculate_subsampled_shape(shape, format);
- const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward(tensors)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor)
- {
- return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2);
- }),
- function, file, line, "Tensor shape has mismatch dimensions for sub-sampling");
+ const TensorShape sub2_shape = calculate_subsampled_shape(shape, format);
+ const std::array tensors_info_array{{tensor1, std::forward(tensors)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+ std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(),
+ [&](const ITensor *tensor)
+ { return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2); }),
+ function, file, line, "Tensor shape has mismatch dimensions for sub-sampling");
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return an error if the passed two tensor infos have different shapes from the given dimension
*
@@ -450,10 +472,15 @@ arm_compute::Status error_on_tensors_not_subsampled(const char *function, const
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ const ITensorInfo *tensor_info_1,
+ const ITensorInfo *tensor_info_2,
+ Ts... tensor_infos)
{
- return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2, std::forward(tensor_infos)...);
+ return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2,
+ std::forward(tensor_infos)...);
}
/** Return an error if the passed two tensors have different shapes from the given dimension
*
@@ -467,8 +494,12 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ const ITensor *tensor_1,
+ const ITensor *tensor_2,
+ Ts... tensors)
{
return error_on_mismatching_shapes(function, file, line, 0U, tensor_1, tensor_2, std::forward(tensors)...);
}
@@ -485,19 +516,28 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- unsigned int upper_dim, const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ unsigned int upper_dim,
+ const ITensorInfo *tensor_info_1,
+ const ITensorInfo *tensor_info_2,
+ Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_2 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- const std::array < const ITensorInfo *, 2 + sizeof...(Ts) > tensors_info_array{ { tensor_info_1, tensor_info_2, tensor_infos... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(), [&](const ITensorInfo * tensor_info)
- {
- return detail::have_different_dimensions((*tensors_info_array.cbegin())->tensor_shape(), tensor_info->tensor_shape(), upper_dim);
- }),
- function, file, line, "Tensors have different shapes");
+ const std::array tensors_info_array{
+ {tensor_info_1, tensor_info_2, tensor_infos...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(),
+ [&](const ITensorInfo *tensor_info)
+ {
+ return detail::have_different_dimensions(
+ (*tensors_info_array.cbegin())->tensor_shape(),
+ tensor_info->tensor_shape(), upper_dim);
+ }),
+ function, file, line, "Tensors have different shapes");
return arm_compute::Status{};
}
/** Return an error if the passed two tensors have different shapes from the given dimension
@@ -513,14 +553,20 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- unsigned int upper_dim, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ unsigned int upper_dim,
+ const ITensor *tensor_1,
+ const ITensor *tensor_2,
+ Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_2 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(),
- detail::get_tensor_info_t()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(),
+ detail::get_tensor_info_t()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...) \
@@ -539,19 +585,18 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_data_layouts(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- DataLayout &&tensor_data_layout = tensor_info->data_layout();
- const std::array tensors_infos_array{ { tensor_infos... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
- {
- return tensor_info_obj->data_layout() != tensor_data_layout;
- }),
- function, file, line, "Tensors have different data layouts");
+ DataLayout &&tensor_data_layout = tensor_info->data_layout();
+ const std::array tensors_infos_array{{tensor_infos...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(),
+ [&](const ITensorInfo *tensor_info_obj)
+ { return tensor_info_obj->data_layout() != tensor_data_layout; }),
+ function, file, line, "Tensors have different data layouts");
return arm_compute::Status{};
}
/** Return an error if the passed tensors have different data layouts
@@ -565,19 +610,21 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line,
- const ITensor *tensor, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_data_layouts(
+ const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward(tensors)...));
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(function, file, line, tensor->info(),
- detail::get_tensor_info_t()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(
+ function, file, line, tensor->info(), detail::get_tensor_info_t()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return an error if the passed two tensor infos have different data types
*
@@ -590,19 +637,18 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_data_types(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- DataType &&tensor_data_type = tensor_info->data_type();
- const std::array tensors_infos_array{ { tensor_infos... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
- {
- return tensor_info_obj->data_type() != tensor_data_type;
- }),
- function, file, line, "Tensors have different data types");
+ DataType &&tensor_data_type = tensor_info->data_type();
+ const std::array tensors_infos_array{{tensor_infos...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(),
+ [&](const ITensorInfo *tensor_info_obj)
+ { return tensor_info_obj->data_type() != tensor_data_type; }),
+ function, file, line, "Tensors have different data types");
return arm_compute::Status{};
}
/** Return an error if the passed two tensors have different data types
@@ -616,19 +662,21 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line,
- const ITensor *tensor, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_data_types(
+ const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(function, file, line, tensor->info(),
- detail::get_tensor_info_t()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(
+ function, file, line, tensor->info(), detail::get_tensor_info_t()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return an error if the passed tensor infos have different asymmetric quantized data types or different quantization info
*
@@ -644,28 +692,32 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_quantization_info(const char *function,
+ const char *file,
+ const int line,
+ const ITensorInfo *tensor_info_1,
+ const ITensorInfo *tensor_info_2,
+ Ts... tensor_infos)
{
DataType &&first_data_type = tensor_info_1->data_type();
const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info();
- if(!is_data_type_quantized(first_data_type))
+ if (!is_data_type_quantized(first_data_type))
{
return arm_compute::Status{};
}
- const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->data_type() != first_data_type;
- }),
- function, file, line, "Tensors have different asymmetric quantized data types");
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->quantization_info() != first_quantization_info;
- }),
- function, file, line, "Tensors have different quantization information");
+ const std::array tensor_infos_array{
+ {tensor_info_2, std::forward(tensor_infos)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(),
+ [&](const ITensorInfo *tensor_info)
+ { return tensor_info->data_type() != first_data_type; }),
+ function, file, line, "Tensors have different asymmetric quantized data types");
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+ std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(),
+ [&](const ITensorInfo *tensor_info)
+ { return tensor_info->quantization_info() != first_quantization_info; }),
+ function, file, line, "Tensors have different quantization information");
return arm_compute::Status{};
}
@@ -683,17 +735,24 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu
* @return Status
*/
template
-inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line,
- const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_quantization_info(const char *function,
+ const char *file,
+ const int line,
+ const ITensor *tensor_1,
+ const ITensor *tensor_2,
+ Ts... tensors)
{
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(),
- detail::get_tensor_info_t()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(),
+ detail::get_tensor_info_t