diff --git a/Android.mk b/Android.mk
index f008840e30de0a7a03303bf9bf078ec9c6da30a0..e83000414f62a04437cbd78160fa46b655650be0 100644
--- a/Android.mk
+++ b/Android.mk
@@ -31,32 +31,39 @@ LOCAL_EXPORT_C_INCLUDES := \
$(ARMNN_SOURCE_UTILS_HEADER_PATH)
LOCAL_C_INCLUDES := \
- $(OPENCL_HEADER_PATH) \
- $(NN_HEADER_PATH) \
- $(ARMNN_HEADER_PATH) \
- $(ARMNN_SOURCE_HEADER_PATH) \
- $(ARMNN_SOURCE_UTILS_HEADER_PATH)
+ $(OPENCL_HEADER_PATH) \
+ $(NN_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
+ $(ARMNN_SOURCE_HEADER_PATH) \
+ $(ARMNN_SOURCE_UTILS_HEADER_PATH)
LOCAL_SRC_FILES := \
+ src/armnnUtils/DotSerializer.cpp \
+ src/armnnUtils/FloatingPointConverter.cpp \
src/armnnUtils/Logging.cpp \
src/armnnUtils/Permute.cpp \
- src/armnnUtils/DotSerializer.cpp \
src/armnn/backends/ArmComputeTensorUtils.cpp \
src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp \
+ src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.cpp \
src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp \
+ src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.cpp \
src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp \
src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp \
+ src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp \
+ src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp \
src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp \
src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp \
+ src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp \
src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp \
src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp \
+ src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp \
src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp \
@@ -68,6 +75,7 @@ LOCAL_SRC_FILES := \
src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp \
src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp \
+ src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp \
src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp \
src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp \
@@ -78,14 +86,18 @@ LOCAL_SRC_FILES := \
src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp \
+ src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp \
+ src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp \
src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp \
+ src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp \
src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp \
+ src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp \
@@ -96,6 +108,7 @@ LOCAL_SRC_FILES := \
src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp \
+ src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp \
src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp \
src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp \
@@ -129,6 +142,7 @@ LOCAL_SRC_FILES := \
src/armnn/backends/RefWorkloads/Activation.cpp \
src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp \
src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp \
+ src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp \
src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp \
src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp \
src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp \
@@ -147,21 +161,25 @@ LOCAL_SRC_FILES := \
src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp \
src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp \
src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp \
+ src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp \
+ src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp \
src/armnn/backends/MemCopyWorkload.cpp \
src/armnn/backends/WorkloadData.cpp \
src/armnn/backends/WorkloadFactory.cpp \
- src/armnn/backends/AclBaseMemoryManager.cpp \
src/armnn/layers/ActivationLayer.cpp \
src/armnn/layers/AdditionLayer.cpp \
src/armnn/layers/BatchNormalizationLayer.cpp \
src/armnn/layers/ConstantLayer.cpp \
src/armnn/layers/Convolution2dLayer.cpp \
+ src/armnn/layers/ConvertFp16ToFp32Layer.cpp \
+ src/armnn/layers/ConvertFp32ToFp16Layer.cpp \
src/armnn/layers/DepthwiseConvolution2dLayer.cpp \
src/armnn/layers/FakeQuantizationLayer.cpp \
src/armnn/layers/FloorLayer.cpp \
src/armnn/layers/FullyConnectedLayer.cpp \
src/armnn/layers/InputLayer.cpp \
src/armnn/layers/L2NormalizationLayer.cpp \
+ src/armnn/layers/LstmLayer.cpp \
src/armnn/layers/MemCopyLayer.cpp \
src/armnn/layers/MergerLayer.cpp \
src/armnn/layers/MultiplicationLayer.cpp \
@@ -182,20 +200,33 @@ LOCAL_SRC_FILES := \
src/armnn/InternalTypes.cpp \
src/armnn/Layer.cpp \
src/armnn/LoadedNetwork.cpp \
+ src/armnn/NeonInterceptorScheduler.cpp \
+ src/armnn/NeonTimer.cpp \
src/armnn/Network.cpp \
src/armnn/backends/OutputHandler.cpp \
+ src/armnn/OpenClTimer.cpp \
+ src/armnn/WallClockTimer.cpp \
+ src/armnn/ProfilingEvent.cpp \
src/armnn/Profiling.cpp \
+ src/armnn/JsonPrinter.cpp \
src/armnn/Tensor.cpp \
src/armnn/Utils.cpp \
src/armnn/LayerSupport.cpp \
+ src/armnn/Observable.cpp \
src/armnn/backends/RefLayerSupport.cpp \
src/armnn/backends/ClLayerSupport.cpp \
src/armnn/backends/NeonLayerSupport.cpp \
src/armnn/backends/NeonWorkloadUtils.cpp \
- src/armnn/backends/NeonWorkloadFactory.cpp
+ src/armnn/backends/NeonWorkloadFactory.cpp \
+ src/armnn/memory/BaseMemoryManager.cpp \
+ src/armnn/memory/BlobLifetimeManager.cpp \
+ src/armnn/memory/BlobMemoryPool.cpp \
+ src/armnn/memory/OffsetLifetimeManager.cpp \
+ src/armnn/memory/OffsetMemoryPool.cpp \
+ src/armnn/memory/PoolManager.cpp
LOCAL_STATIC_LIBRARIES := \
- armnn-arm_compute \
+ armnn-arm_compute \
libboost_log \
libboost_system \
libboost_thread
@@ -213,9 +244,20 @@ LOCAL_CFLAGS := \
include $(BUILD_STATIC_LIBRARY)
+###############
+# armnn-tests #
+###############
include $(CLEAR_VARS)
-LOCAL_C_INCLUDES := \
+LOCAL_MODULE := armnn-tests
+LOCAL_MODULE_TAGS := eng optional
+LOCAL_ARM_MODE := arm
+LOCAL_PROPRIETARY_MODULE := true
+
+# Mark source files as dependent on Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+
+LOCAL_C_INCLUDES := \
$(OPENCL_HEADER_PATH) \
$(NN_HEADER_PATH) \
$(ARMNN_HEADER_PATH) \
@@ -230,14 +272,19 @@ LOCAL_CFLAGS := \
-DARMCOMPUTECL_ENABLED \
-DARMCOMPUTENEON_ENABLED
-LOCAL_SRC_FILES := \
+LOCAL_SRC_FILES := \
src/armnn/test/UnitTests.cpp \
src/armnn/test/EndToEndTest.cpp \
src/armnn/test/UtilsTests.cpp \
src/armnn/test/GraphTests.cpp \
src/armnn/test/RuntimeTests.cpp \
src/armnn/test/TensorTest.cpp \
- src/armnn/test/Network_test.cpp \
+ src/armnn/test/NeonTimerTest.cpp \
+ src/armnn/test/NetworkTests.cpp \
+ src/armnn/test/InstrumentTests.cpp \
+ src/armnn/test/OpenClTimerTest.cpp \
+ src/armnn/test/ProfilingEventTest.cpp \
+ src/armnn/test/ObservableTest.cpp \
src/armnn/backends/test/IsLayerSupportedTest.cpp \
src/armnn/backends/test/Reference.cpp \
src/armnn/backends/test/WorkloadDataValidation.cpp \
@@ -259,7 +306,7 @@ LOCAL_STATIC_LIBRARIES := \
libboost_thread \
armnn-arm_compute
-LOCAL_SHARED_LIBRARIES := \
+LOCAL_SHARED_LIBRARIES := \
libbase \
libhidlbase \
libhidltransport \
@@ -271,18 +318,5 @@ LOCAL_SHARED_LIBRARIES := \
android.hidl.memory@1.0 \
libOpenCL
-LOCAL_MODULE := armnn-tests
-
-LOCAL_MODULE_TAGS := eng optional
-
-LOCAL_ARM_MODE := arm
-
-# Mark source files as dependent on Android.mk
-LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
-
-LOCAL_PROPRIETARY_MODULE := true
-
include $(BUILD_EXECUTABLE)
-
-
diff --git a/BuildGuideAndroidNDK.md b/BuildGuideAndroidNDK.md
index 5d6f523632a6129ac9d900947f076c9a6a890397..8b2e2a86ba8590b7fa459da8e8ea0a854175a546 100644
--- a/BuildGuideAndroidNDK.md
+++ b/BuildGuideAndroidNDK.md
@@ -164,8 +164,8 @@ All downloaded or generated files will be saved inside the `~/armnn-devenv` dire
CC=aarch64-linux-android-clang \
CXX_FLAGS="-fPIE -fPIC" \
cmake .. \
- -DCMAKE_SYSTEM_NAME=Linux \
- -DCMAKE_EXE_LINKER_FLAGS=-pie \
+ -DCMAKE_SYSTEM_NAME=Android \
+ -DCMAKE_EXE_LINKER_FLAGS="-pie -llog" \
-DARMCOMPUTE_ROOT=$HOME/armnn-devenv/ComputeLibrary/ \
-DARMCOMPUTE_BUILD_DIR=$HOME/armnn-devenv/ComputeLibrary/build \
-DBOOST_ROOT=$HOME/armnn-devenv/boost/install/ \
@@ -181,11 +181,11 @@ All downloaded or generated files will be saved inside the `~/armnn-devenv` dire
* Push the build results to an Android device and make symbolic links for shared libraries:
```bash
- adb push libarmnnTfParser.so libarmnn.so UnitTests \
- $NDK/sources/cxx-stl/llvm-libc++/libs/arm64-v8a/libc++_shared.so \
- /data/local/tmp/
- adb push $HOME/armnn-devenv/google/arm64_pb_install/lib/libprotobuf.so \
- /data/local/tmp/libprotobuf.so.15.0.1
+ adb push libarmnnTfParser.so /data/local/tmp/
+ adb push libarmnn.so /data/local/tmp/
+ adb push UnitTests /data/local/tmp/
+ adb push $NDK/sources/cxx-stl/llvm-libc++/libs/arm64-v8a/libc++_shared.so /data/local/tmp/
+ adb push $HOME/armnn-devenv/google/arm64_pb_install/lib/libprotobuf.so /data/local/tmp/libprotobuf.so.15.0.1
adb shell 'ln -s libprotobuf.so.15.0.1 /data/local/tmp/libprotobuf.so.15'
adb shell 'ln -s libprotobuf.so.15.0.1 /data/local/tmp/libprotobuf.so'
```
diff --git a/BuildGuideCrossCompilation.md b/BuildGuideCrossCompilation.md
new file mode 100644
index 0000000000000000000000000000000000000000..df015a08f446b6b05b64162736f86799cce9eef0
--- /dev/null
+++ b/BuildGuideCrossCompilation.md
@@ -0,0 +1,265 @@
+# How to Cross-Compile ArmNN on x86_64 for arm64
+
+* [Introduction](#introduction)
+* [Build and install Google's Protobuf library](#buildProtobuf)
+* [Build Caffe for x86_64](#buildCaffe)
+* [Cross-compiling ToolChain](#installCCT)
+* [Build Boost library for arm64](#installBaarch)
+* [Build Compute Library](#buildCL)
+* [Build Compute Library](#buildCL)
+* [Build ArmNN](#buildANN)
+* [Run Unit Tests](#unittests)
+* [Troubleshooting and Errors](#troubleshooting)
+
+
+#### Introduction
+These are the step by step instructions on Cross-Compiling ArmNN under an x86_64 system to target an Arm64 system. This build flow has been tested with Ubuntu 16.04.
+The instructions show how to build the ArmNN core library and the Boost, Protobuf, Caffe and Compute Libraries necessary for compilation.
+
+#### Build and install Google's Protobuf library
+
+* Get protobuf-all-3.5.1.tar.gz from here: https://github.com/google/protobuf/releases
+* Extract:
+ ```bash
+ tar -zxvf protobuf-all-3.5.1.tar.gz
+ cd protobuf-3.5.1
+ ```
+* Build a native (x86_64) version of the protobuf libraries and compiler (protoc):
+ (Requires cUrl, autoconf, llibtool, and other build dependencies if not previously installed: sudo apt install curl autoconf libtool build-essential g++)
+ ```
+ mkdir x86_64_build
+ cd x86_64_build
+ ../configure --prefix=$HOME/armnn-devenv/google/x86_64_pb_install
+ make install -j16
+ cd ..
+ ```
+* Build the arm64 version of the protobuf libraries:
+ ```
+ mkdir arm64_build
+ cd arm64_build
+ CC=aarch64-linux-gnu-gcc \
+ CXX=aarch64-linux-gnu-g++ \
+ ../configure --host=aarch64-linux \
+ --prefix=$HOME/armnn-devenv/google/arm64_pb_install \
+ --with-protoc=$HOME/armnn-devenv/google/x86_64_pb_install/bin/protoc
+ make install -j16
+ cd ..
+ ```
+
+#### Build Caffe for x86_64
+* Ubuntu 16.04 installation. These steps are taken from the full Caffe installation documentation at: http://caffe.berkeleyvision.org/install_apt.html
+* Install dependencies:
+ ```bash
+ sudo apt-get install libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev
+ sudo apt-get install --no-install-recommends libboost-all-dev
+ sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
+ sudo apt-get install libopenblas-dev
+ sudo apt-get install libatlas-base-dev
+ ```
+* Download Caffe-Master from: https://github.com/BVLC/caffe
+ ```bash
+ git clone https://github.com/BVLC/caffe.git
+ cd caffe
+ cp Makefile.config.example Makefile.config
+ ```
+* Adjust Makefile.config (for example, if using Anaconda Python, or if cuDNN is desired):
+ ```
+ CPU only version -
+ CPU_ONLY := 1
+ Add hdf5 and protobuf include and library directories (Replace $HOME with your actual /home/username dir)
+ INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial/ $HOME/armnn-devenv/google/x86_64_pb_install/include/
+ LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/hdf5/serial/ $HOME/armnn-devenv/google/x86_64_pb_install/lib/
+ g++ need to be version 5
+ CUSTOM_CXX := g++-5
+ ```
+* Setup environment:
+ ```bash
+ export PATH=$HOME/armnn-devenv/google/x86_64_pb_install/bin/:$PATH
+ export LD_LIBRARY_PATH=$HOME/armnn-devenv/google/x86_64_pb_install/lib/:LD_LIBRARY_PATH
+ ```
+* Compilation with Make:
+ ```bash
+ make all
+ make test
+ make runtest
+ ```
+ These should all run without errors
+* caffe.pb.h and caffe.pb.cc will be needed when building ArmNN's Caffe Parser
+
+#### Cross-compiling ToolChain
+* Install the standard cross-compilation libraries for arm64:
+ ```
+ sudo apt install crossbuild-essential-arm64
+ ```
+#### Build Boost library for arm64
+* Build Boost library for arm64
+ Download Boost version 1.64 from http://www.boost.org/doc/libs/1_64_0/more/getting_started/unix-variants.html
+ Version 1.66 is not supported.
+ ```bash
+ tar -zxvf boost_1_64_0.tar.gz
+ cd boost_1_64_0
+ echo "using gcc : arm : aarch64-linux-gnu-g++ ;" > user_config.jam
+ ./bootstrap.sh --prefix=$HOME/armnn-devenv/boost_arm64_install
+ ./b2 install toolset=gcc-arm link=static cxxflags=-fPIC --with-filesystem --with-test --with-log --with-program_options -j32 --user-config=user_config.jam
+ ```
+
+#### Build Compute Library
+* Building the Arm Compute Library:
+ ```bash
+ git clone https://github.com/ARM-software/ComputeLibrary.git
+ cd ComputeLibrary/
+ scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" -j8 internal_only=0
+ ```
+
+#### Build ArmNN
+* Compile ArmNN for arm64:
+ ```bash
+ git clone https://github.com/ARM-software/armnn.git
+ cd armnn
+ mkdir build
+ cd build
+ ```
+
+* Use CMake to configure your build environment, update the following script and run it from the armnn/build directory to set up the armNN build:
+ ```bash
+ #!/bin/bash
+ CXX=aarch64-linux-gnu-g++ \
+ CC=aarch64-linux-gnu-gcc \
+ cmake .. \
+ -DARMCOMPUTE_ROOT=$HOME/armnn-devenv/ComputeLibrary \
+ -DARMCOMPUTE_BUILD_DIR=$HOME/armnn-devenv/ComputeLibrary/build/ \
+ -DBOOST_ROOT=$HOME/armnn-devenv/boost_arm64_install/ \
+ -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1 \
+ -DCAFFE_GENERATED_SOURCES=$HOME/armnn-devenv/caffe/build/src \
+ -DBUILD_CAFFE_PARSER=1 \
+ -DPROTOBUF_ROOT=$HOME/armnn-devenv/google/x86_64_pb_install/ \
+ -DPROTOBUF_LIBRARY_DEBUG=$HOME/armnn-devenv/google/arm64_pb_install/lib/libprotobuf.so.15.0.1 \
+ -DPROTOBUF_LIBRARY_RELEASE=$HOME/armnn-devenv/google/arm64_pb_install/lib/libprotobuf.so.15.0.1
+ ```
+* Run the build
+ ```bash
+ make -j32
+ ```
+
+#### Run Unit Tests
+* Copy the build folder to an arm64 linux machine
+* Copy the libprotobuf.so.15.0.1 library file to the build folder
+* cd to the build folder on your arm64 machine and set your LD_LIBRARY_PATH to its current location:
+ ```
+ cd build/
+ export LD_LIBRARY_PATH=`pwd`
+ ```
+* Run the UnitTests:
+ ```
+ ./UnitTests
+ Running 567 test cases...
+
+ *** No errors detected
+ ```
+#### Troubleshooting and Errors:
+#### Error adding symbols: File in wrong format
+* When building armNN:
+ ```
+ /usr/local/lib/libboost_log.a: error adding symbols: File in wrong format
+ collect2: error: ld returned 1 exit status
+ CMakeFiles/armnn.dir/build.make:4028: recipe for target 'libarmnn.so' failed
+ make[2]: *** [libarmnn.so] Error 1
+ CMakeFiles/Makefile2:105: recipe for target 'CMakeFiles/armnn.dir/all' failed
+ make[1]: *** [CMakeFiles/armnn.dir/all] Error 2
+ Makefile:127: recipe for target 'all' failed
+ make: *** [all] Error 2
+ ```
+* Boost libraries are not compiled for the correct architecture, try recompiling for arm64
+##
+#### Virtual memory exhausted
+* When compiling the boost libraries:
+ ```bash
+ virtual memory exhausted: Cannot allocate memory
+ ```
+* Not enough memory available to compile. Increase the amount of RAM or swap space available.
+
+##
+#### Unrecognized command line option '-m64'
+* When compiling the boost libraries:
+ ```bash
+ aarch64-linux-gnu-g++: error: unrecognized command line option ‘-m64’
+ ```
+* Clean the boost library directory before trying to build with a different architecture:
+ ```bash
+ sudo ./b2 clean
+ ```
+* It should show the following for arm64:
+ ```bash
+ - 32-bit : no
+ - 64-bit : yes
+ - arm : yes
+ ```
+
+##
+#### Missing libz.so.1
+* When compiling armNN:
+ ```bash
+ /usr/lib/gcc-cross/aarch64-linux-gnu/5/../../../../aarch64-linux-gnu/bin/ld: warning: libz.so.1, needed by /home//armNN/usr/lib64/libprotobuf.so.15.0.0, not found (try using -rpath or -rpath-link)
+ ```
+
+* Missing arm64 libraries for libz.so.1, these can be added by adding a second architecture to dpkg and explicitely installing them:
+ ```bash
+ sudo dpkg --add-architecture arm64
+ sudo apt-get install zlib1g:arm64
+ sudo apt-get update
+ sudo ldconfig
+ ```
+* If apt-get update returns 404 errors for arm64 repos refer to section 5 below.
+* Alternatively the missing arm64 version of libz.so.1 can be downloaded and installed from a .deb package here:
+ https://launchpad.net/ubuntu/wily/arm64/zlib1g/1:1.2.8.dfsg-2ubuntu4
+ ```bash
+ sudo dpkg -i zlib1g_1.2.8.dfsg-2ubuntu4_arm64.deb
+ ```
+##
+#### Unable to install arm64 packages after adding arm64 architecture
+* Using sudo apt-get update should add all of the required repos for arm64 but if it does not or you are getting 404 errors the following instructions can be used to add the repos manually:
+* From stackoverflow:
+https://askubuntu.com/questions/430705/how-to-use-apt-get-to-download-multi-arch-library/430718
+* Open /etc/apt/sources.list with your preferred text editor.
+
+* Mark all the current (default) repos as \[arch=], e.g.
+ ```bash
+ deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ xenial main restricted
+ ```
+* Then add the following:
+ ```bash
+ deb [arch=arm64] http://ports.ubuntu.com/ xenial main restricted
+ deb [arch=arm64] http://ports.ubuntu.com/ xenial-updates main restricted
+ deb [arch=arm64] http://ports.ubuntu.com/ xenial universe
+ deb [arch=arm64] http://ports.ubuntu.com/ xenial-updates universe
+ deb [arch=arm64] http://ports.ubuntu.com/ xenial multiverse
+ deb [arch=arm64] http://ports.ubuntu.com/ xenial-updates multiverse
+ deb [arch=arm64] http://ports.ubuntu.com/ xenial-backports main restricted universe multiverse
+ ```
+* Update and install again:
+ ```bash
+ sudo apt-get install zlib1g:arm64
+ sudo apt-get update
+ sudo ldconfig
+ ```
+##
+#### Undefined references to google::protobuf:: functions
+* When compiling armNN there are multiple errors of the following type:
+ ```
+ libarmnnCaffeParser.so: undefined reference to `google::protobuf:*
+ ```
+* Missing or out of date protobuf compilation libraries.
+ Use the command 'protoc --version' to check which version of protobuf is available (version 3.5.1 is required).
+ Follow the instructions above to install protobuf 3.5.1
+ Note this will require you to recompile Caffe for x86_64
+
+##
+#### Errors on strict-aliasing rules when compiling the Compute Library
+* When compiling the Compute Library there are multiple errors on strict-aliasing rules:
+ ```
+ cc1plus: error: unrecognized command line option ‘-Wno-implicit-fallthrough’ [-Werror]
+ ```
+* Add Werror=0 to the scons command:
+ ```
+ scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" -j8 internal_only=0 Werror=0
+ ```
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f40a21c10a0067f641090fda319ba022af060ca6..c06a869af5b5ad622ad4c4af07bf157fd16eba13 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,8 @@ if (BUILD_TESTS)
add_subdirectory(tests)
endif()
+add_subdirectory(samples)
+
# Include the additional cmake files in their own target so that they will appear nicely in IDEs
add_custom_target(AdditionalCMakeFiles SOURCES ${additional_cmake_files})
@@ -31,6 +33,12 @@ list(APPEND armnnUtils_sources
src/armnnUtils/HeapProfiling.hpp
src/armnnUtils/LeakChecking.cpp
src/armnnUtils/LeakChecking.hpp
+ src/armnnUtils/CsvReader.cpp
+ src/armnnUtils/CsvReader.hpp
+ src/armnnUtils/FloatingPointConverter.cpp
+ src/armnnUtils/FloatingPointConverter.hpp
+ src/armnnUtils/VerificationHelpers.hpp
+ src/armnnUtils/VerificationHelpers.cpp
)
if(BUILD_TF_PARSER OR BUILD_CAFFE_PARSER)
list(APPEND armnnUtils_sources
@@ -45,6 +53,8 @@ if(BUILD_CAFFE_PARSER)
set(armnn_caffe_parser_sources)
list(APPEND armnn_caffe_parser_sources
include/armnnCaffeParser/ICaffeParser.hpp
+ src/armnnCaffeParser/RecordByRecordCaffeParser.hpp
+ src/armnnCaffeParser/RecordByRecordCaffeParser.cpp
src/armnnCaffeParser/CaffeParser.hpp
src/armnnCaffeParser/CaffeParser.cpp
${CAFFE_GENERATED_SOURCES}/caffe/proto/caffe.pb.cc
@@ -63,6 +73,30 @@ if(BUILD_CAFFE_PARSER)
target_link_libraries(armnnCaffeParser armnn)
target_link_libraries(armnnCaffeParser ${PROTOBUF_LIBRARIES})
+
+endif()
+
+if(BUILD_ONNX_PARSER)
+ set(armnn_onnx_parser_sources)
+ list(APPEND armnn_onnx_parser_sources
+ include/armnnOnnxParser/IOnnxParser.hpp
+ src/armnnOnnxParser/OnnxParser.hpp
+ src/armnnOnnxParser/OnnxParser.cpp
+ ${ONNX_GENERATED_SOURCES}/onnx/onnx.pb.cc
+ )
+ # The generated onnx protobuf .cc files are not warning clean and we can't fix them.
+ if(COMPILER_IS_GNU_LIKE)
+ set_source_files_properties(${ONNX_GENERATED_SOURCES}/onnx/onnx.pb.cc PROPERTIES COMPILE_FLAGS "-Wno-conversion -Wno-sign-conversion")
+ endif()
+
+ add_library_ex(armnnOnnxParser SHARED ${armnn_onnx_parser_sources})
+
+ target_include_directories(armnnOnnxParser PRIVATE src/armnnUtils)
+
+ target_link_libraries(armnnOnnxParser armnn)
+
+ # Protobuf
+ target_link_libraries(armnnOnnxParser ${PROTOBUF_LIBRARIES})
endif()
if(BUILD_TF_PARSER)
@@ -88,7 +122,25 @@ if(BUILD_TF_PARSER)
target_link_libraries(armnnTfParser ${PROTOBUF_LIBRARIES})
endif()
+if(BUILD_TF_LITE_PARSER)
+ set(armnn_tf_lite_parser_sources)
+ list(APPEND armnn_tf_lite_parser_sources
+ include/armnnTfLiteParser/ITfLiteParser.hpp
+ src/armnnTfLiteParser/TfLiteParser.hpp
+ src/armnnTfLiteParser/TfLiteParser.cpp
+ )
+
+ add_library_ex(armnnTfLiteParser SHARED ${armnn_tf_lite_parser_sources})
+
+ target_include_directories(armnnTfLiteParser PRIVATE src/armnnUtils)
+
+ target_link_libraries(armnnTfLiteParser ${Boost_FILESYSTEM_LIBRARY} ${Boost_THREAD_LIBRARY})
+ target_link_libraries(armnnTfLiteParser armnn ${FLATBUFFERS_LIBRARY})
+endif()
+
# ArmNN source files required for all build options
+include_directories(SYSTEM third-party)
+
list(APPEND armnn_sources
include/armnn/ArmNN.hpp
include/armnn/Descriptors.hpp
@@ -126,9 +178,8 @@ list(APPEND armnn_sources
src/armnn/backends/WorkloadData.cpp
src/armnn/backends/WorkloadFactory.hpp
src/armnn/backends/WorkloadFactory.cpp
- src/armnn/backends/AclBaseMemoryManager.hpp
- src/armnn/backends/AclBaseMemoryManager.cpp
src/armnn/backends/WorkloadInfo.hpp
+ src/armnn/backends/WorkloadUtils.hpp
src/armnn/backends/MemCopyWorkload.cpp
src/armnn/backends/MemCopyWorkload.hpp
src/armnn/backends/RefWorkloads/Broadcast.hpp
@@ -222,6 +273,12 @@ list(APPEND armnn_sources
src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp
src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp
src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp
+ src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.cpp
+ src/armnn/backends/RefWorkloads/RefLstmFloat32Workload.hpp
+ src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp
+ src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp
+ src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp
+ src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp
src/armnn/layers/LayerCloneBase.hpp
src/armnn/layers/LayerWithParameters.hpp
src/armnn/layers/ActivationLayer.hpp
@@ -234,6 +291,10 @@ list(APPEND armnn_sources
src/armnn/layers/ConstantLayer.cpp
src/armnn/layers/Convolution2dLayer.hpp
src/armnn/layers/Convolution2dLayer.cpp
+ src/armnn/layers/ConvertFp16ToFp32Layer.hpp
+ src/armnn/layers/ConvertFp16ToFp32Layer.cpp
+ src/armnn/layers/ConvertFp32ToFp16Layer.hpp
+ src/armnn/layers/ConvertFp32ToFp16Layer.cpp
src/armnn/layers/DepthwiseConvolution2dLayer.hpp
src/armnn/layers/DepthwiseConvolution2dLayer.cpp
src/armnn/layers/FakeQuantizationLayer.hpp
@@ -246,6 +307,8 @@ list(APPEND armnn_sources
src/armnn/layers/InputLayer.cpp
src/armnn/layers/L2NormalizationLayer.hpp
src/armnn/layers/L2NormalizationLayer.cpp
+ src/armnn/layers/LstmLayer.cpp
+ src/armnn/layers/LstmLayer.hpp
src/armnn/layers/MemCopyLayer.hpp
src/armnn/layers/MemCopyLayer.cpp
src/armnn/layers/MergerLayer.hpp
@@ -268,8 +331,11 @@ list(APPEND armnn_sources
src/armnn/layers/SoftmaxLayer.cpp
src/armnn/layers/SplitterLayer.hpp
src/armnn/layers/SplitterLayer.cpp
+ src/armnn/Half.hpp
src/armnn/InternalTypes.hpp
src/armnn/InternalTypes.cpp
+ src/armnn/JsonPrinter.hpp
+ src/armnn/JsonPrinter.cpp
src/armnn/LayerFwd.hpp
src/armnn/Layer.hpp
src/armnn/Layer.cpp
@@ -279,6 +345,7 @@ list(APPEND armnn_sources
src/armnn/SerializeLayerParameters.cpp
src/armnn/SerializeLayerParameters.hpp
src/armnn/Descriptors.cpp
+ src/armnn/DeviceSpec.hpp
src/armnn/LoadedNetwork.hpp
src/armnn/LoadedNetwork.cpp
src/armnn/Exceptions.cpp
@@ -286,22 +353,35 @@ list(APPEND armnn_sources
src/armnn/Graph.cpp
src/armnn/Network.hpp
src/armnn/Network.cpp
+ src/armnn/NetworkUtils.hpp
src/armnn/backends/OutputHandler.hpp
src/armnn/backends/OutputHandler.cpp
+ src/armnn/ProfilingEvent.cpp
+ src/armnn/ProfilingEvent.hpp
src/armnn/Profiling.cpp
+ src/armnn/Instrument.hpp
+ src/armnn/WallClockTimer.hpp
+ src/armnn/WallClockTimer.cpp
src/armnn/Tensor.cpp
src/armnn/Utils.cpp
src/armnn/LayerSupport.cpp
src/armnn/LayerSupportCommon.hpp
src/armnn/optimizations/All.hpp
+ src/armnn/optimizations/ConvertConstants.hpp
src/armnn/optimizations/MovePermuteUp.hpp
src/armnn/optimizations/Optimization.hpp
src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp
src/armnn/optimizations/OptimizeInversePermutes.hpp
src/armnn/optimizations/PermuteAsReshape.hpp
src/armnn/optimizations/SquashEqualSiblings.hpp
+ src/armnn/optimizations/OptimizeInverseConversions.hpp
+ src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp
src/armnn/Optimizer.hpp
src/armnn/Optimizer.cpp
+ third-party/half/half.hpp
+ src/armnn/IGraphObservable.hpp
+ src/armnn/Observable.hpp
+ src/armnn/Observable.cpp
)
if(ARMCOMPUTENEON)
@@ -322,12 +402,18 @@ if(ARMCOMPUTENEON)
src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp
src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp
src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp
+ src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp
+ src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp
+ src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp
+ src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp
src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp
src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp
+ src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp
+ src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp
src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp
src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp
src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp
@@ -338,6 +424,8 @@ if(ARMCOMPUTENEON)
src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp
src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp
src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp
+ src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp
+ src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.hpp
src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp
src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp
src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp
@@ -358,6 +446,8 @@ if(ARMCOMPUTENEON)
src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp
src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp
src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp
+ src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp
+ src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp
src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp
src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp
src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp
@@ -368,7 +458,11 @@ if(ARMCOMPUTENEON)
src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp
src/armnn/backends/NeonWorkloadUtils.cpp
src/armnn/backends/NeonWorkloadUtils.hpp
- src/armnn/backends/NeonTensorHandle.hpp)
+ src/armnn/backends/NeonTensorHandle.hpp
+ src/armnn/NeonInterceptorScheduler.hpp
+ src/armnn/NeonInterceptorScheduler.cpp
+ src/armnn/NeonTimer.hpp
+ src/armnn/NeonTimer.cpp)
endif()
if(ARMCOMPUTECL)
# Additionally include source files for ARM Compute OpenCL backend
@@ -377,8 +471,16 @@ if(ARMCOMPUTECL)
src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp
src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp
src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp
+ src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.cpp
+ src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.hpp
+ src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp
+ src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp
+ src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp
+ src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp
src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp
src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp
+ src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.cpp
+ src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.hpp
src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp
src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp
src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp
@@ -394,17 +496,20 @@ if(ARMCOMPUTECL)
src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
+ src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp
+ src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp
src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp
src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp
src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp
src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp
- src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp
src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp
src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp
src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp
src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp
+ src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.cpp
+ src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.hpp
src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp
src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp
src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp
@@ -427,6 +532,8 @@ if(ARMCOMPUTECL)
src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp
src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp
src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp
+ src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp
+ src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp
src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
@@ -436,14 +543,29 @@ if(ARMCOMPUTECL)
src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp
src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp
src/armnn/backends/ClWorkloadUtils.hpp
- src/armnn/backends/ClTensorHandle.hpp)
+ src/armnn/backends/ClTensorHandle.hpp
+ src/armnn/OpenClTimer.cpp
+ src/armnn/OpenClTimer.hpp)
endif()
# Files shared by all ARM Compute backends
if(ARMCOMPUTENEON OR ARMCOMPUTECL)
list(APPEND armnn_sources
src/armnn/backends/ArmComputeTensorUtils.hpp
src/armnn/backends/ArmComputeTensorUtils.cpp
- src/armnn/backends/ArmComputeUtils.hpp)
+ src/armnn/backends/ArmComputeUtils.hpp
+ src/armnn/memory/IMemoryPool.hpp
+ src/armnn/memory/BlobMemoryPool.cpp
+ src/armnn/memory/BlobMemoryPool.hpp
+ src/armnn/memory/BlobLifetimeManager.cpp
+ src/armnn/memory/BlobLifetimeManager.hpp
+ src/armnn/memory/PoolManager.cpp
+ src/armnn/memory/PoolManager.hpp
+ src/armnn/memory/BaseMemoryManager.hpp
+ src/armnn/memory/BaseMemoryManager.cpp
+ src/armnn/memory/OffsetMemoryPool.cpp
+ src/armnn/memory/OffsetMemoryPool.hpp
+ src/armnn/memory/OffsetLifetimeManager.cpp
+ src/armnn/memory/OffsetLifetimeManager.hpp)
endif()
# Files used for Streamline-based profiling backend
@@ -459,13 +581,20 @@ target_include_directories(armnn PRIVATE src/armnnUtils)
target_link_libraries(armnn armnnUtils)
target_link_libraries(armnn ${CMAKE_DL_LIBS})
+
install(TARGETS armnn DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
if(BUILD_CAFFE_PARSER)
install(TARGETS armnnCaffeParser DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
endif()
+if(BUILD_ONNX_PARSER)
+ install(TARGETS armnnOnnxParser DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
+endif()
if(BUILD_TF_PARSER)
install(TARGETS armnnTfParser DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
endif()
+if(BUILD_TF_LITE_PARSER)
+ install(TARGETS armnnTfLiteParser DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
+endif()
install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
target_link_libraries(armnn ${Boost_LOG_LIBRARY} ${Boost_THREAD_LIBRARY} ${Boost_SYSTEM_LIBRARY})
@@ -488,14 +617,21 @@ if(BUILD_UNIT_TESTS)
src/armnn/test/UnitTests.hpp
src/armnn/test/EndToEndTest.cpp
src/armnn/test/UtilsTests.cpp
+ src/armnn/test/JsonPrinterTests.cpp
src/armnn/test/GraphTests.cpp
src/armnn/test/OptimizerTests.cpp
+ src/armnn/test/ProfilerTests.cpp
src/armnn/test/RuntimeTests.cpp
src/armnn/test/CreateWorkload.hpp
src/armnn/test/TensorTest.cpp
src/armnn/test/TensorHelpers.hpp
- src/armnn/test/Network_test.cpp
+ src/armnn/test/CsvReaderTest.cpp
+ src/armnn/test/NetworkTests.cpp
+ src/armnn/test/FloatingPointConverterTest.cpp
+ src/armnn/test/ProfilingEventTest.cpp
src/armnn/test/GraphUtils.hpp
+ src/armnn/test/InstrumentTests.cpp
+ src/armnn/test/ObservableTest.cpp
src/armnn/backends/test/IsLayerSupportedTest.cpp
src/armnn/backends/test/IsLayerSupportedTestImpl.hpp
src/armnn/backends/test/Reference.cpp
@@ -504,6 +640,7 @@ if(BUILD_UNIT_TESTS)
src/armnn/backends/test/TensorCopyUtils.cpp
src/armnn/backends/test/LayerTests.hpp
src/armnn/backends/test/LayerTests.cpp
+ src/armnn/backends/test/LayerReleaseConstantDataTest.cpp
src/armnn/backends/test/Conv2dTestImpl.hpp
src/armnn/backends/test/ActivationTestImpl.hpp
src/armnn/backends/test/ActivationFixture.hpp
@@ -522,14 +659,18 @@ if(BUILD_UNIT_TESTS)
list(APPEND unittest_sources
src/armnn/backends/test/ArmComputeNeon.cpp
src/armnn/backends/test/CreateWorkloadNeon.cpp
- src/armnn/test/CreateWorkloadClNeon.hpp)
+ src/armnn/test/CreateWorkloadClNeon.hpp
+ src/armnn/test/NeonTimerTest.cpp)
endif()
if(ARMCOMPUTECL)
list(APPEND unittest_sources
src/armnn/backends/test/ArmComputeCl.cpp
+ src/armnn/backends/test/ClContextControlFixture.hpp
src/armnn/backends/test/CreateWorkloadCl.cpp
- src/armnn/test/CreateWorkloadClNeon.hpp)
+ src/armnn/test/CreateWorkloadClNeon.hpp
+ src/armnn/test/OpenClTimerTest.cpp
+ src/armnn/test/FP16SupportTest.cpp)
endif()
if(ARMCOMPUTENEON OR ARMCOMPUTECL)
@@ -550,6 +691,7 @@ if(BUILD_UNIT_TESTS)
src/armnnTfParser/test/FusedBatchNorm.cpp
src/armnnTfParser/test/Identity.cpp
src/armnnTfParser/test/LocalResponseNormalization.cpp
+ src/armnnTfParser/test/MaximumForLeakyRelu.cpp
src/armnnTfParser/test/Multiplication.cpp
src/armnnTfParser/test/MultiOutput.cpp
src/armnnTfParser/test/PassThru.cpp
@@ -565,10 +707,29 @@ if(BUILD_UNIT_TESTS)
src/armnnTfParser/test/Squeeze.cpp)
endif()
+ if(BUILD_TF_LITE_PARSER)
+ list(APPEND unittest_sources
+ src/armnnTfLiteParser/test/ParserFlatbuffersFixture.hpp
+ src/armnnTfLiteParser/test/AvgPool2D.cpp
+ src/armnnTfLiteParser/test/Conv2D.cpp
+ src/armnnTfLiteParser/test/DepthwiseConvolution2D.cpp
+ src/armnnTfLiteParser/test/Softmax.cpp
+ src/armnnTfLiteParser/test/Squeeze.cpp
+ src/armnnTfLiteParser/test/LoadModel.cpp
+ src/armnnTfLiteParser/test/GetBuffer.cpp
+ src/armnnTfLiteParser/test/OutputShapeOfSqueeze.cpp
+ src/armnnTfLiteParser/test/InputOutputTensorNames.cpp
+ src/armnnTfLiteParser/test/GetTensorIds.cpp
+ src/armnnTfLiteParser/test/GetSubgraphInputsOutputs.cpp
+ src/armnnTfLiteParser/test/GetInputsOutputs.cpp
+ )
+ endif()
+
if(BUILD_CAFFE_PARSER)
list(APPEND unittest_sources
src/armnnCaffeParser/test/TestAdd.cpp
src/armnnCaffeParser/test/TestConcat.cpp
+ src/armnnCaffeParser/test/TestConvolution.cpp
src/armnnCaffeParser/test/TestDropout.cpp
src/armnnCaffeParser/test/TestInputs.cpp
src/armnnCaffeParser/test/TestMul.cpp
@@ -579,19 +740,41 @@ if(BUILD_UNIT_TESTS)
)
endif()
+ if(BUILD_ONNX_PARSER)
+ list(APPEND unittest_sources
+ src/armnnOnnxParser/test/Constructor.cpp
+ src/armnnOnnxParser/test/CreateNetwork.cpp
+ src/armnnOnnxParser/test/ProtoxtFixture.cpp
+ src/armnnOnnxParser/test/Const.cpp
+ src/armnnOnnxParser/test/Pooling.cpp
+ src/armnnOnnxParser/test/Reshape.cpp
+ src/armnnOnnxParser/test/Relu.cpp
+ src/armnnOnnxParser/test/Conv2D.cpp
+ src/armnnOnnxParser/test/Addition.cpp
+ src/armnnOnnxParser/test/FullyConnected.cpp
+ src/armnnOnnxParser/test/GetInputsOutputs.cpp
+ src/armnnOnnxParser/test/BatchNorm.cpp
+ src/armnnOnnxParser/test/DepthConv.cpp
+ )
+ endif()
+
add_executable_ex(UnitTests ${unittest_sources})
target_include_directories(UnitTests PRIVATE src/armnn)
target_include_directories(UnitTests PRIVATE src/armnnUtils)
- if(NOT HEAP_PROFILING AND VALGRIND_FOUND)
- # Valgrind works with gperftools version number <= 2.4
- target_compile_definitions(UnitTests PRIVATE "WITH_VALGRIND=1")
+ if(VALGRIND_FOUND)
+ if(HEAP_PROFILING OR LEAK_CHECKING)
+ message("Valgrind is disabled for heap profiling and leak checking builds.")
+ else()
+ # Valgrind works with gperftools version number <= 2.4
+ target_compile_definitions(UnitTests PRIVATE "WITH_VALGRIND=1")
+ endif()
endif()
target_link_libraries(UnitTests armnn)
target_link_libraries(UnitTests armnnUtils)
target_link_libraries(UnitTests ${CMAKE_THREAD_LIBS_INIT})
- target_link_libraries(UnitTests ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
+ target_link_libraries(UnitTests ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
if(BUILD_TF_PARSER)
target_link_libraries(UnitTests armnnTfParser)
@@ -601,6 +784,13 @@ if(BUILD_UNIT_TESTS)
target_link_libraries(UnitTests armnnCaffeParser)
endif()
+ if(BUILD_TF_LITE_PARSER)
+ target_link_libraries(UnitTests armnnTfLiteParser)
+ endif()
+
+ if(BUILD_ONNX_PARSER)
+ target_link_libraries(UnitTests armnnOnnxParser)
+ endif()
+
addDllCopyCommands(UnitTests)
endif()
-
diff --git a/README.md b/README.md
index e451cb1754a0997b0fa85ad4928bfeb8a78a39e0..72f5a1faea39358f5e154129a7133ef5e0e836c0 100644
--- a/README.md
+++ b/README.md
@@ -4,14 +4,24 @@ For more information about Arm NN, see:
+There is a getting started guide here using TensorFlow Lite: [TensorFlow Lite Support](src/armnnTfLiteParser/README.md)
+
There is a getting started guide here using Caffe:
+There is a getting started guide here using ONNX: [ONNX Support](src/armnnOnnxParser/README.md)
+
### Build Instructions
Arm tests the build system of Arm NN with the following build environments:
* Android NDK: [How to use Android NDK to build ArmNN](BuildGuideAndroidNDK.md)
-* Cross compilation from x86_64 Ubuntu to arm64 Linux
+* Cross compilation from x86_64 Ubuntu to arm64 Linux: [ArmNN Cross Compilation](BuildGuideCrossCompilation.md)
* Native compilation under arm64 Debian 9
Arm NN is written using portable C++14 and the build system uses [CMake](https://cmake.org/) so it is possible to build for a wide variety of target platforms, from a wide variety of host environments.
+
+The armnn/tests directory contains tests used during ArmNN development. Many of them depend on third-party IP, model protobufs and image files not distributed with ArmNN. The dependencies of some of the tests are available freely on the Internet, for those who wish to experiment.
+
+The 'ExecuteNetwork' program, in armnn/tests/ExecuteNetwork, has no additional dependencies beyond those required by ArmNN and the model parsers. It takes any model and any input tensor, and simply prints out the output tensor. Run with no arguments to see command-line help.
+
+The 'armnn/samples' directory contains SimpleSample.cpp. A very basic example of the ArmNN SDK API in use.
\ No newline at end of file
diff --git a/cmake/GlobalConfig.cmake b/cmake/GlobalConfig.cmake
index 2dbeadaadf53c4ca8f357298c340d55bc5f9b2cb..47bdd5ca326b20e47602296f6685c514fac254ac 100644
--- a/cmake/GlobalConfig.cmake
+++ b/cmake/GlobalConfig.cmake
@@ -1,15 +1,20 @@
option(BUILD_CAFFE_PARSER "Build Caffe parser" OFF)
option(BUILD_TF_PARSER "Build Tensorflow parser" OFF)
+option(BUILD_ONNX_PARSER "Build Onnx parser" OFF)
option(BUILD_UNIT_TESTS "Build unit tests" ON)
option(BUILD_TESTS "Build test applications" OFF)
option(BUILD_FOR_COVERAGE "Use no optimization and output .gcno and .gcda files" OFF)
option(ARMCOMPUTENEON "Build with ARM Compute NEON support" OFF)
option(ARMCOMPUTECL "Build with ARM Compute OpenCL support" OFF)
-option(PROFILING "Build with ArmNN built-in profiling support" OFF)
option(PROFILING_BACKEND_STREAMLINE "Forward the armNN profiling events to DS-5/Streamline as annotations" OFF)
-# options used for heap profiling
+# options used for heap profiling and leak checking
option(HEAP_PROFILING "Build with heap profiling enabled" OFF)
+option(LEAK_CHECKING "Build with leak checking enabled" OFF)
option(GPERFTOOLS_ROOT "Location where the gperftools 'include' and 'lib' folders to be found" Off)
+# options used for tensorflow lite support
+option(BUILD_TF_LITE_PARSER "Build Tensorflow Lite parser" OFF)
+option(TF_LITE_GENERATED_PATH "Tensorflow lite generated C++ schema location" OFF)
+option(FLATBUFFERS_ROOT "Location where the flatbuffers 'include' and 'lib' folders to be found" Off)
include(SelectLibraryConfigurations)
@@ -106,7 +111,7 @@ link_directories(${Boost_LIBRARY_DIR})
find_package (Threads)
# Favour the protobuf passed on command line
-if(BUILD_TF_PARSER OR BUILD_CAFFE_PARSER)
+if(BUILD_TF_PARSER OR BUILD_CAFFE_PARSER OR BUILD_ONNX_PARSER)
find_library(PROTOBUF_LIBRARY_DEBUG NAMES "protobufd"
PATHS ${PROTOBUF_ROOT}/lib
NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
@@ -149,6 +154,63 @@ if(BUILD_TF_PARSER)
include_directories(SYSTEM "${TF_GENERATED_SOURCES}")
endif()
+if(BUILD_ONNX_PARSER)
+ add_definitions(-DARMNN_ONNX_PARSER)
+
+ find_path(ONNX_GENERATED_SOURCES "onnx/onnx.pb.cc")
+
+ # C++ headers generated for onnx protobufs
+ include_directories(SYSTEM "${ONNX_GENERATED_SOURCES}")
+endif()
+
+
+# Flatbuffers support for TF Lite
+if(BUILD_TF_LITE_PARSER)
+ find_path(TF_LITE_SCHEMA_INCLUDE_PATH
+ schema_generated.h
+ HINTS ${TF_LITE_GENERATED_PATH})
+
+ if(NOT TF_LITE_SCHEMA_INCLUDE_PATH)
+ message(WARNING
+ "Couldn't find 'schema_generated.h' at ${TF_LITE_GENERATED_PATH}. Disabling Tf Lite support")
+ set(BUILD_TF_LITE_PARSER Off)
+ else()
+ message(STATUS "Tf Lite generated header found at: ${TF_LITE_SCHEMA_INCLUDE_PATH}")
+ endif()
+
+ # verify we have a valid flatbuffers include path
+ find_path(FLATBUFFERS_INCLUDE_PATH flatbuffers/flatbuffers.h
+ HINTS ${FLATBUFFERS_ROOT}/include /usr/local/include /usr/include)
+
+ if(NOT FLATBUFFERS_INCLUDE_PATH)
+ message(WARNING
+ "Couldn't find 'flatbuffers/flatbuffers.h' at ${FLATBUFFERS_ROOT}/include. Disabling Tf Lite support")
+ set(BUILD_TF_LITE_PARSER Off)
+ else()
+ message(STATUS "Flatbuffers headers are located at: ${FLATBUFFERS_INCLUDE_PATH}")
+ endif()
+
+ find_library(FLATBUFFERS_LIBRARY
+ NAMES libflatbuffers.a flatbuffers
+ HINTS ${FLATBUFFERS_ROOT}/lib /usr/local/lib /usr/lib)
+
+ if(NOT FLATBUFFERS_LIBRARY)
+ message(WARNING
+ "Couldn't find flatbuffers library. Disabling Tf Lite support")
+ set(BUILD_TF_LITE_PARSER Off)
+ else()
+ message(STATUS "Flatbuffers library located at: ${FLATBUFFERS_LIBRARY}")
+ endif()
+
+ # Setup includes and libs only if we still want Tf Lite
+ if(BUILD_TF_LITE_PARSER)
+ include_directories(SYSTEM "${TF_LITE_SCHEMA_INCLUDE_PATH}")
+ include_directories(SYSTEM "${FLATBUFFERS_INCLUDE_PATH}")
+ add_definitions(-DARMNN_TF_LITE_PARSER)
+ add_definitions(-DARMNN_TF_LITE_SCHEMA_PATH="${TF_LITE_SCHEMA_INCLUDE_PATH}/schema.fbs")
+ endif()
+endif()
+
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
# ARM Compute
@@ -238,12 +300,7 @@ if(ARMCOMPUTENEON OR ARMCOMPUTECL)
find_path(HALF_INCLUDE half/half.hpp
PATHS ${ARMCOMPUTE_ROOT}/include
NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
- include_directories(${HALF_INCLUDE})
-endif()
-
-# Built-in profiler
-if(PROFILING)
- add_definitions(-DARMNN_PROFILING_ENABLED)
+ include_directories(SYSTEM ${HALF_INCLUDE})
endif()
# Streamline annotate
@@ -252,7 +309,7 @@ if(PROFILING_BACKEND_STREAMLINE)
add_definitions(-DARMNN_STREAMLINE_ENABLED)
endif()
-if(HEAP_PROFILING)
+if(HEAP_PROFILING OR LEAK_CHECKING)
# enable heap profiling for everything except for referencetests
if(NOT ${PROJECT_NAME} STREQUAL "referencetests")
find_path(HEAP_PROFILER_INCLUDE gperftools/heap-profiler.h
@@ -265,9 +322,14 @@ if(HEAP_PROFILING)
link_directories(${GPERFTOOLS_ROOT}/lib)
link_libraries(${GPERF_TOOLS_LIBRARY})
- add_definitions("-DARMNN_HEAP_PROFILING_ENABLED=1")
+ if (HEAP_PROFILING)
+ add_definitions("-DARMNN_HEAP_PROFILING_ENABLED=1")
+ endif()
+ if (LEAK_CHECKING)
+ add_definitions("-DARMNN_LEAK_CHECKING_ENABLED=1")
+ endif()
else()
- message("Heap profiling is disabled for referencetests")
+ message("Heap profiling and leak checking are disabled for referencetests")
endif()
else()
# Valgrind only works with gperftools version number <= 2.4
@@ -283,3 +345,6 @@ if(NOT BUILD_TF_PARSER)
message(STATUS "Tensorflow parser support is disabled")
endif()
+if(NOT BUILD_TF_LITE_PARSER)
+ message(STATUS "Tensorflow Lite parser support is disabled")
+endif()
diff --git a/include/armnn/ArmNN.hpp b/include/armnn/ArmNN.hpp
index d1cb7a84881b4507b1f1d3f38bda157edc849721..66697c428b137d38cd649e40a40ffd171af6b304 100644
--- a/include/armnn/ArmNN.hpp
+++ b/include/armnn/ArmNN.hpp
@@ -9,6 +9,7 @@
#include "IRuntime.hpp"
#include "INetwork.hpp"
#include "LayerSupport.hpp"
+#include "LstmParams.hpp"
#include "Tensor.hpp"
#include "Types.hpp"
#include "TypesUtils.hpp"
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index 2595656c70e78d6a3cf01f8d89e308a471ad3716..3cf152befe66cdf99f50569e4874bbc90cecfc12 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -95,8 +95,8 @@ private:
uint32_t** m_ViewSizes;
};
-// Convenience template to create a OriginsDescriptor to use when creating a Merger layer for performing concatenation
-// of a number of input tensors
+/// Convenience template to create an OriginsDescriptor to use when creating a Merger layer for performing concatenation
+/// of a number of input tensors
template
OriginsDescriptor CreateMergerDescriptorForConcatenation(TensorShapeIt first, TensorShapeIt last,
unsigned int concatenationDimension)
@@ -301,7 +301,35 @@ struct ResizeBilinearDescriptor
struct ReshapeDescriptor
{
+ ReshapeDescriptor()
+ : m_TargetShape()
+ {}
+
+ ReshapeDescriptor(const TensorShape& shape)
+ : m_TargetShape(shape)
+ {}
+
TensorShape m_TargetShape;
};
+// temporary descriptor for Lstm
+struct LstmDescriptor
+{
+ LstmDescriptor()
+ : m_ActivationFunc(1) // 0: None, 1: Relu, 3: Relu6, 4: Tanh, 6: Sigmoid
+ , m_ClippingThresCell(0.0)
+ , m_ClippingThresProj(0.0)
+ , m_CifgEnabled(true)
+ , m_PeepholeEnabled(false)
+ , m_ProjectionEnabled(false)
+ {}
+
+ uint32_t m_ActivationFunc;
+ float m_ClippingThresCell;
+ float m_ClippingThresProj;
+ bool m_CifgEnabled;
+ bool m_PeepholeEnabled;
+ bool m_ProjectionEnabled;
+};
+
}
diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp
index 58b4bcc62641259095460cf3904ab0c35d5f23e2..8c14614876f21b8c4dbae82dafba9131bea59ae5 100644
--- a/include/armnn/DescriptorsFwd.hpp
+++ b/include/armnn/DescriptorsFwd.hpp
@@ -12,6 +12,7 @@ struct Convolution2dDescriptor;
struct DepthwiseConvolution2dDescriptor;
struct FakeQuantizationDescriptor;
struct FullyConnectedDescriptor;
+struct LstmDescriptor;
struct PermuteDescriptor;
struct NormalizationDescriptor;
struct Pooling2dDescriptor;
diff --git a/include/armnn/Exceptions.hpp b/include/armnn/Exceptions.hpp
index 630c77660deaa05d23297ecc52d5b09d4d1c1943..403fc593b52552ecd8f46b45721fc7e6d73d8816 100644
--- a/include/armnn/Exceptions.hpp
+++ b/include/armnn/Exceptions.hpp
@@ -11,7 +11,38 @@
namespace armnn
{
-// base class for all ArmNN exceptions so that users can filter to just those
+struct CheckLocation
+{
+ const char* m_Function;
+ const char* m_File;
+ unsigned int m_Line;
+
+ CheckLocation(const char* func,
+ const char* file,
+ unsigned int line)
+ : m_Function{func}
+ , m_File{file}
+ , m_Line{line}
+ {
+ }
+
+ std::string AsString() const
+ {
+ std::stringstream ss;
+ ss << " at function " << m_Function
+ << " [" << m_File << ':' << m_Line << "]";
+ return ss.str();
+ }
+
+ std::string FileLine() const
+ {
+ std::stringstream ss;
+ ss << " [" << m_File << ':' << m_Line << "]";
+ return ss.str();
+ }
+};
+
+/// Base class for all ArmNN exceptions so that users can filter to just those.
class Exception : public std::exception
{
public:
@@ -91,4 +122,6 @@ void ConditionalThrowIfNotEqual(const std::string& message,
}
}
-}
+} // namespace armnn
+
+#define CHECK_LOCATION() armnn::CheckLocation(__func__, __FILE__, __LINE__)
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 5cff810db5ecfda8649e196371979266cd1ad5f2..cefcbfb06cb630db35ac82fa795978526e0da725 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -11,6 +11,7 @@
#include "armnn/Types.hpp"
#include
+#include
namespace armnn
{
@@ -25,7 +26,8 @@ public:
virtual IOutputSlot* GetConnection() = 0;
protected:
- ~IInputSlot() {} /// Not user deletable
+ /// Not user deletable.
+ ~IInputSlot() {}
};
/// @brief An output connection slot for a layer.
@@ -45,7 +47,8 @@ public:
virtual void Disconnect(IInputSlot& slot) = 0;
protected:
- ~IOutputSlot() {} /// Not user deletable
+ /// Not user deletable.
+ ~IOutputSlot() {}
};
/// @brief Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
@@ -63,9 +66,12 @@ public:
virtual const IOutputSlot& GetOutputSlot(unsigned int index) const = 0;
virtual IOutputSlot& GetOutputSlot(unsigned int index) = 0;
+ virtual std::vector InferOutputShapes(const std::vector& inputShapes) const = 0;
+
virtual LayerGuid GetGuid() const = 0;
protected:
- ~IConnectableLayer() {} // Objects are not deletable via the handle
+ /// Objects are not deletable via the handle
+ ~IConnectableLayer() {}
};
using INetworkPtr = std::unique_ptr;
@@ -81,19 +87,19 @@ public:
virtual Status PrintGraph() = 0;
- /// Add an input layer to the network.
- /// @param id User generated id to uniquely identify a particular input. The same id needs to be specified
+ /// Adds an input layer to the network.
+ /// @param id - User generated id to uniquely identify a particular input. The same id needs to be specified.
/// when passing the inputs to the IRuntime::EnqueueWorkload() function.
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddInputLayer(LayerBindingId id, const char* name = nullptr) = 0;
- /// Add a 2D convolution layer to the network.
- /// @param convolution2dDescriptor Description of the 2D convolution layer
- /// @param weights Tensor for the weights data.
- /// @param biases (Optional) Tensor for the bias data. Must match the output tensor shape.
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a 2D convolution layer to the network.
+ /// @param convolution2dDescriptor - Description of the 2D convolution layer.
+ /// @param weights - Tensor for the weights data.
+ /// @param biases - (Optional) Tensor for the bias data. Must match the output tensor shape.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
const ConstTensor& weights,
const char* name = nullptr) = 0;
@@ -103,12 +109,12 @@ public:
const ConstTensor& biases,
const char* name = nullptr) = 0;
- /// Add a 2D depthwise convolution layer to the network.
- /// @param convolution2dDescriptor Description of the 2D depthwise convolution layer
- /// @param weights Tensor for the weights data. Expected format: [1, outputChannels, height, width]
- /// @param biases (Optional) Tensor for the bias data. Must match the output tensor shape.
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a 2D depthwise convolution layer to the network.
+ /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
+ /// @param weights - Tensor for the weights data. Expected format: [1, outputChannels, height, width].
+ /// @param biases (Optional) - Tensor for the bias data. Must match the output tensor shape.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddDepthwiseConvolution2dLayer(
const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
const ConstTensor& weights,
@@ -120,12 +126,12 @@ public:
const ConstTensor& biases,
const char* name = nullptr) = 0;
- /// Add a fully connected layer to the network.
- /// @param fullyConnectedDescriptor Description of the fully connected layer
- /// @param weights Tensor for the weights data.
- /// @param biases (Optional) Tensor for the bias data.
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a fully connected layer to the network.
+ /// @param fullyConnectedDescriptor - Description of the fully connected layer.
+ /// @param weights - Tensor for the weights data.
+ /// @param biases - (Optional) Tensor for the bias data.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
const ConstTensor& weights,
const char* name = nullptr) = 0;
@@ -135,76 +141,77 @@ public:
const ConstTensor& biases,
const char* name = nullptr) = 0;
- /// Add a permute layer to the network.
- /// @param permuteDescriptor PermuteDescriptor to configure the permute
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a permute layer to the network.
+ /// @param permuteDescriptor - PermuteDescriptor to configure the permute.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
const char* name = nullptr) = 0;
- /// Add a pooling layer to the network.
- /// @param pooling2dDescriptor Pooling2dDescriptor to configure the pooling
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a pooling layer to the network.
+ /// @param pooling2dDescriptor - Pooling2dDescriptor to configure the pooling.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
const char* name = nullptr) = 0;
- /// Add an activation layer to the network.
- /// @param activationDescriptor ActivationDescriptor to configure the activation
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds an activation layer to the network.
+ /// @param activationDescriptor - ActivationDescriptor to configure the activation.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddActivationLayer(const ActivationDescriptor& activationDescriptor,
const char* name = nullptr) = 0;
- /// Add a normalization layer to the network.
- /// @param normalizationDescriptor NormalizationDescriptor to configure the normalization
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a normalization layer to the network.
+ /// @param normalizationDescriptor - NormalizationDescriptor to configure the normalization.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor,
const char* name = nullptr) = 0;
- /// Add a softmax layer to the network.
- /// @param softmaxDescriptor SoftmaxDescriptor to configure the softmax
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a softmax layer to the network.
+ /// @param softmaxDescriptor - SoftmaxDescriptor to configure the softmax.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
const char* name = nullptr) = 0;
- /// Add a splitter layer to the network.
- /// @param splitterDescriptor WindowsDescriptor to configure the splitting process. Number of Views must be equal to
- /// the number of outputs, and their order must match - e.g. first view corresponds to
- /// the first output, second view to the second output, etc....
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a splitter layer to the network.
+ /// @param splitterDescriptor - WindowsDescriptor to configure the splitting process.
+ /// Number of Views must be equal to the number of outputs,
+ /// and their order must match - e.g. first view corresponds to
+ /// the first output, second view to the second output, etc....
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddSplitterLayer(const ViewsDescriptor& splitterDescriptor
, const char* name = nullptr) = 0;
- /// Add a merger layer to the network.
- /// @param mergerDescriptor WindowsDescriptor to configure the merging process. Number of Views must be equal to
+ /// Adds a merger layer to the network.
+ /// @param mergerDescriptor - WindowsDescriptor to configure the merging process. Number of Views must be equal to
/// the number of inputs, and their order must match - e.g. first view corresponds to
/// the first input, second view to the second input, etc....
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddMergerLayer(const OriginsDescriptor& mergerDescriptor,
const char* name = nullptr) = 0;
- /// Add an addition layer to the network.
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds an addition layer to the network.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddAdditionLayer(const char* name = nullptr) = 0;
- /// Add a multiplication layer to the network.
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a multiplication layer to the network.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddMultiplicationLayer(const char* name = nullptr) = 0;
- /// Add a batch normalization layer to the network.
- /// @param mean Pre-calculated mean for each channel
- /// @param variance Pre-calculated variance for each channel
- /// @param beta Per-channel additive factor
- /// @param gamma Per-channel multiplicative factor
- /// @return Interface for configuring the layer.
- /// @param name Optional name for the layer
+ /// Adds a batch normalization layer to the network.
+ /// @param mean - Pre-calculated mean for each channel.
+ /// @param variance - Pre-calculated variance for each channel.
+ /// @param beta - Per-channel additive factor.
+ /// @param gamma - Per-channel multiplicative factor.
+ /// @return - Interface for configuring the layer.
+ /// @param name - Optional name for the layer.
virtual IConnectableLayer* AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
const ConstTensor& mean,
const ConstTensor& variance,
@@ -212,47 +219,55 @@ public:
const ConstTensor& gamma,
const char* name = nullptr) = 0;
- /// Add a resize bilinear layer to the network.
- /// @param resizeDesc Parameters for the resize operation
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer
+ /// Adds a resize bilinear layer to the network.
+ /// @param resizeDesc - Parameters for the resize operation.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddResizeBilinearLayer(const ResizeBilinearDescriptor& resizeDesc,
const char* name = nullptr) = 0;
- /// Add an L2 normalization layer to the network.
+ /// Adds an L2 normalization layer to the network.
/// Normalization is performed along dimension 1, but requires a 4d input.
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddL2NormalizationLayer(const char* name = nullptr) = 0;
/// Adds a layer with no inputs and a single output, which always corresponds to
/// the passed in constant tensor.
- /// @param input Tensor to be provided as the only output of the layer. The layer will maintain its own copy of the
- /// tensor data, meaning the memory referenced by @a input can be freed or reused after this function is
- /// called.
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer
+ /// @param input - Tensor to be provided as the only output of the layer. The layer will maintain
+ /// its own copy of the tensor data, meaning the memory referenced by @a input can
+ /// be freed or reused after this function is called.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddConstantLayer(const ConstTensor& input,
const char* name = nullptr) = 0;
- /// Add a reshape layer to the network.
- /// @param reshapeDescriptor Parameters for the reshape operation
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a reshape layer to the network.
+ /// @param reshapeDescriptor - Parameters for the reshape operation.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
const char* name = nullptr) = 0;
- /// Add a floor layer to the network.
- /// @param name Optional name for the layer
- /// @return Interface for configuring the layer.
+ /// Adds a floor layer to the network.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
virtual IConnectableLayer* AddFloorLayer(const char* name = nullptr) = 0;
- /// Add an output layer to the network.
- /// @param id User generated id to uniquely identify a particular output. The same id needs to be specified
+ /// Adds an output layer to the network.
+ /// @param id - User generated id to uniquely identify a particular output. The same id needs to be specified
/// when passing the outputs to the IRuntime::EnqueueWorkload() function.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
+ virtual IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr) = 0;
+
+ /// Add a Lstm layer to the network
+ /// @param descriptor Parameters for the Lstm operation
/// @param name Optional name for the layer
/// @return Interface for configuring the layer.
- virtual IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr) = 0;
+ virtual IConnectableLayer* AddLstmLayer(const LstmDescriptor& descriptor,
+ const LstmInputParams& params,
+ const char* name = nullptr) = 0;
protected:
~INetwork() {}
@@ -268,16 +283,34 @@ public:
virtual Status PrintGraph() = 0;
virtual Status SerializeToDot(std::ostream& stream) const = 0;
+
protected:
~IOptimizedNetwork() {}
};
+struct OptimizerOptions
+{
+ OptimizerOptions() : m_ReduceFp32ToFp16(false) {}
+
+ OptimizerOptions(bool reduceFp32ToFp16)
+ : m_ReduceFp32ToFp16(reduceFp32ToFp16)
+ {
+ }
+
+ // Reduce Fp32 data to Fp16 for faster processing
+ bool m_ReduceFp32ToFp16;
+};
/// Create an optimized version of the network
/// @param network INetwork description of the network to be optimized.
-/// @param deviceSpec The choice of the default computation backend.
+/// @param backendPreferences The choice of the backend ordered by user preferences.
+/// @param deviceSpec DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
+/// @param options OptimizerOptions object with optimizer configuration options
/// @return An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from
/// armnn::Exception if process fails.
-IOptimizedNetworkPtr Optimize(const INetwork& network, const DeviceSpec& deviceSpec);
+IOptimizedNetworkPtr Optimize(const INetwork& network,
+ const std::vector& backendPreferences,
+ const IDeviceSpec& deviceSpec,
+ const OptimizerOptions& options = OptimizerOptions());
} //namespace armnn
diff --git a/include/armnn/IProfiler.hpp b/include/armnn/IProfiler.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a28173e5e17dbb48610ceec34f608be2b5f45f71
--- /dev/null
+++ b/include/armnn/IProfiler.hpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include
+
+namespace armnn
+{
+
+class IProfiler
+{
+public:
+ /// Enables/disables profiling for this profiler.
+ /// @param [in] enableProfiling A flag that indicates whether profiling should be enabled or not.
+ virtual void EnableProfiling(bool enableProfiling) = 0;
+
+ /// Checks whether profiling is enabled.
+ /// Profiling is disabled by default.
+ /// @return true if profiling is enabled, false otherwise.
+ virtual bool IsProfilingEnabled() = 0;
+
+ /// Analyzes the tracked events and writes the results to the given output stream.
+ /// Please refer to the configuration variables in Profiling.cpp to customize the information written.
+ /// @param [out] outStream The stream where to write the profiling results to.
+ virtual void AnalyzeEventsAndWriteResults(std::ostream& outStream) const = 0;
+
+ /// Print stats for events in JSON Format to the given output stream.
+ /// @param [out] outStream The stream where to write the profiling results to.
+ virtual void Print(std::ostream& outStream) const = 0;
+
+protected:
+ ~IProfiler() {}
+};
+
+} // namespace armnn
diff --git a/include/armnn/IRuntime.hpp b/include/armnn/IRuntime.hpp
index a1a3f0fda95c1c38e555291ee34cdd56d3af7499..36efdbdcab75748b7a3d04a4e95b1dbe58924cd3 100644
--- a/include/armnn/IRuntime.hpp
+++ b/include/armnn/IRuntime.hpp
@@ -9,6 +9,7 @@
#include "Types.hpp"
#include "Tensor.hpp"
#include "INetwork.hpp"
+#include "IProfiler.hpp"
#include "TypesUtils.hpp"
namespace armnn
@@ -16,7 +17,7 @@ namespace armnn
using NetworkId = int;
-class IClTunedParameters;
+class IGpuAccTunedParameters;
class IRuntime;
using IRuntimePtr = std::unique_ptr;
@@ -26,66 +27,80 @@ class IRuntime
public:
struct CreationOptions
{
- Compute m_DefaultComputeDevice;
- bool m_UseCpuRefAsFallback;
- /// If set, uses the CL tuned parameters from the given object when executing CL workloads.
+ CreationOptions()
+ : m_GpuAccTunedParameters(nullptr)
+ , m_EnableGpuProfiling(false)
+ {}
+
+ /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
/// It will also be updated with new tuned parameters if it is configured to do so.
- IClTunedParameters* m_ClTunedParameters;
-
- CreationOptions(Compute defaultComputeDevice)
- : m_DefaultComputeDevice(defaultComputeDevice)
- , m_UseCpuRefAsFallback(true)
- , m_ClTunedParameters(nullptr)
- {
- }
+ std::shared_ptr m_GpuAccTunedParameters;
+
+ // Setting this flag will allow the user to obtain GPU profiling information from the runtime.
+ bool m_EnableGpuProfiling;
};
static IRuntime* CreateRaw(const CreationOptions& options);
static IRuntimePtr Create(const CreationOptions& options);
static void Destroy(IRuntime* runtime);
+ /// Loads a complete network into the IRuntime.
+ /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
+ /// @param [in] network - Complete network to load into the IRuntime.
+ /// The runtime takes ownership of the network once passed in.
+ /// @return armnn::Status
+ virtual Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network) = 0;
+
/// Load a complete network into the IRuntime.
/// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
/// @param [in] network Complete network to load into the IRuntime.
+ /// @param [out] errorMessage Error message if there were any errors.
/// The runtime takes ownership of the network once passed in.
/// @return armnn::Status
- virtual Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network) = 0;
+ virtual Status LoadNetwork(NetworkId& networkIdOut,
+ IOptimizedNetworkPtr network,
+ std::string & errorMessage) = 0;
virtual TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const = 0;
virtual TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const = 0;
- // Evaluate network using input in inputTensors, outputs filled into outputTensors
+ /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
virtual Status EnqueueWorkload(NetworkId networkId,
- const InputTensors& inputTensors,
- const OutputTensors& outputTensors) = 0;
+ const InputTensors& inputTensors,
+ const OutputTensors& outputTensors) = 0;
- /// Unload a network from the IRuntime.
+ /// Unloads a network from the IRuntime.
/// At the moment this only removes the network from the m_Impl->m_Network.
/// This might need more work in the future to be AndroidNN compliant.
- /// @param [in] networkId Unique identifier for the network to be unloaded. Generated in LoadNetwork().
+ /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
/// @return armnn::Status
virtual Status UnloadNetwork(NetworkId networkId) = 0;
- virtual const DeviceSpec& GetDeviceSpec() const = 0;
+ virtual const IDeviceSpec& GetDeviceSpec() const = 0;
+
+ /// Gets the profiler corresponding to the given network id.
+ /// @param networkId The id of the network for which to get the profile.
+ /// @return A pointer to the requested profiler, or nullptr if not found.
+ virtual const std::shared_ptr GetProfiler(NetworkId networkId) const = 0;
protected:
~IRuntime() {}
};
-using IClTunedParametersPtr = std::unique_ptr;
+using IGpuAccTunedParametersPtr = std::shared_ptr;
-/// Manages a set of Open CL parameters which have been tuned for maximum performance.
-/// Pass an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
-/// for all CL workload execution.
+/// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
+/// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
+/// for all GPU workload execution.
///
/// Can be created in two modes:
-/// - In UseTunedParameters mode the parameters stored in this object are used to execute CL workloads.
-/// - In UpdateTunedParameters mode, additionally, whenever a CL workload is executed for the first time the
+/// - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
+/// - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
/// optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
///
-/// The parameters can be loaded from and saved to a file so that you first run a slow initial read-write
+/// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
/// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
-class IClTunedParameters
+class IGpuAccTunedParameters
{
public:
enum class Mode
@@ -96,10 +111,10 @@ public:
/// Creates an IClTunedParameters with the given mode.
/// @{
- static IClTunedParameters* CreateRaw(Mode mode);
- static IClTunedParametersPtr Create(Mode mode);
+ static IGpuAccTunedParameters* CreateRaw(Mode mode);
+ static IGpuAccTunedParametersPtr Create(Mode mode);
/// @}
- static void Destroy(IClTunedParameters* params);
+ static void Destroy(IGpuAccTunedParameters* params);
/// Loads an existing set of tuned parameters from the given file.
/// If there is an error loading the file, an armnn::Exception is thrown.
@@ -110,7 +125,7 @@ public:
virtual void Save(const char* filename) const = 0;
protected:
- virtual ~IClTunedParameters() {};
+ virtual ~IGpuAccTunedParameters() {};
};
}
diff --git a/include/armnn/LayerSupport.hpp b/include/armnn/LayerSupport.hpp
index 43a5756e4a845897d909fa600a57aa9c285ce3ed..c875619949d810fd0132e97fa05c03c46dc3ede4 100644
--- a/include/armnn/LayerSupport.hpp
+++ b/include/armnn/LayerSupport.hpp
@@ -13,6 +13,7 @@ namespace armnn
bool IsActivationSupported(Compute compute,
const TensorInfo& input,
+ const TensorInfo& output,
const ActivationDescriptor& descriptor,
char* reasonIfUnsupported = nullptr,
size_t reasonIfUnsupportedMaxLength = 1024);
@@ -26,6 +27,11 @@ bool IsAdditionSupported(Compute compute,
bool IsBatchNormalizationSupported(Compute compute,
const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& mean,
+ const TensorInfo& var,
+ const TensorInfo& beta,
+ const TensorInfo& gamma,
const BatchNormalizationDescriptor& descriptor,
char* reasonIfUnsupported = nullptr,
size_t reasonIfUnsupportedMaxLength = 1024);
@@ -35,6 +41,18 @@ bool IsConstantSupported(Compute compute,
char* reasonIfUnsupported = nullptr,
size_t reasonIfUnsupportedMaxLength = 1024);
+bool IsConvertFp16ToFp32Supported(Compute compute,
+ const TensorInfo& input,
+ const TensorInfo& output,
+ char* reasonIfUnsupported = nullptr,
+ size_t reasonIfUnsupportedMaxLength = 1024);
+
+bool IsConvertFp32ToFp16Supported(Compute compute,
+ const TensorInfo& input,
+ const TensorInfo& output,
+ char* reasonIfUnsupported = nullptr,
+ size_t reasonIfUnsupportedMaxLength = 1024);
+
bool IsConvolution2dSupported(Compute compute,
const TensorInfo& input,
const TensorInfo& output,
@@ -46,8 +64,10 @@ bool IsConvolution2dSupported(Compute compute,
bool IsDepthwiseConvolutionSupported(Compute compute,
const TensorInfo& input,
+ const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
const TensorInfo& weights,
+ const TensorInfo& biases,
char* reasonIfUnsupported = nullptr,
size_t reasonIfUnsupportedMaxLength = 1024);
@@ -57,16 +77,35 @@ bool IsInputSupported(Compute compute,
size_t reasonIfUnsupportedMaxLength = 1024);
bool IsFullyConnectedSupported(Compute compute,
- const TensorInfo& input,const
- FullyConnectedDescriptor& descriptor,
+ const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& weights,
+ const TensorInfo& biases,
+ const FullyConnectedDescriptor& descriptor,
char* reasonIfUnsupported = nullptr,
size_t reasonIfUnsupportedMaxLength = 1024);
bool IsL2NormalizationSupported(Compute compute,
const TensorInfo& input,
+ const TensorInfo& output,
char* reasonIfUnsupported = nullptr,
size_t reasonIfUnsupportedMaxLength = 1024);
+bool IsLstmSupported(Compute compute, const TensorInfo& input, const TensorInfo& outputStateIn,
+ const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer,
+ const TensorInfo& outputStateOut, const TensorInfo& cellStateOut,
+ const TensorInfo& output, const LstmDescriptor& descriptor,
+ const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights,
+ const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights,
+ const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights,
+ const TensorInfo& forgetGateBias, const TensorInfo& cellBias,
+ const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights,
+ const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights,
+ const TensorInfo* inputGateBias, const TensorInfo* projectionWeights,
+ const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights,
+ const TensorInfo* cellToOutputWeights, char* reasonIfUnsupported = nullptr,
+ size_t reasonIfUnsupportedMaxLength = 1024);
+
bool IsMergerSupported(Compute compute,
const std::vector inputs,
const OriginsDescriptor& descriptor,
@@ -76,6 +115,7 @@ bool IsMergerSupported(Compute compute,
bool IsMultiplicationSupported(Compute compute,
const TensorInfo& input0,
const TensorInfo& input1,
+ const TensorInfo& output,
char* reasonIfUnsupported = nullptr,
size_t reasonIfUnsupportedMaxLength = 1024);
@@ -112,6 +152,7 @@ bool IsResizeBilinearSupported(Compute compute,
bool IsSoftmaxSupported(Compute compute,
const TensorInfo& input,
+ const TensorInfo& output,
const SoftmaxDescriptor& descriptor,
char* reasonIfUnsupported = nullptr,
size_t reasonIfUnsupportedMaxLength = 1024);
diff --git a/include/armnn/LstmParams.hpp b/include/armnn/LstmParams.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..cfca0df5bb941f0e05d87ca8224e3b6f92b7fb52
--- /dev/null
+++ b/include/armnn/LstmParams.hpp
@@ -0,0 +1,55 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "TensorFwd.hpp"
+
+namespace armnn
+{
+
+struct LstmInputParams
+{
+ LstmInputParams()
+ : m_InputToInputWeights(nullptr)
+ , m_InputToForgetWeights(nullptr)
+ , m_InputToCellWeights(nullptr)
+ , m_InputToOutputWeights(nullptr)
+ , m_RecurrentToInputWeights(nullptr)
+ , m_RecurrentToForgetWeights(nullptr)
+ , m_RecurrentToCellWeights(nullptr)
+ , m_RecurrentToOutputWeights(nullptr)
+ , m_CellToInputWeights(nullptr)
+ , m_CellToForgetWeights(nullptr)
+ , m_CellToOutputWeights(nullptr)
+ , m_InputGateBias(nullptr)
+ , m_ForgetGateBias(nullptr)
+ , m_CellBias(nullptr)
+ , m_OutputGateBias(nullptr)
+ , m_ProjectionWeights(nullptr)
+ , m_ProjectionBias(nullptr)
+ {
+ }
+
+ const ConstTensor* m_InputToInputWeights;
+ const ConstTensor* m_InputToForgetWeights;
+ const ConstTensor* m_InputToCellWeights;
+ const ConstTensor* m_InputToOutputWeights;
+ const ConstTensor* m_RecurrentToInputWeights;
+ const ConstTensor* m_RecurrentToForgetWeights;
+ const ConstTensor* m_RecurrentToCellWeights;
+ const ConstTensor* m_RecurrentToOutputWeights;
+ const ConstTensor* m_CellToInputWeights;
+ const ConstTensor* m_CellToForgetWeights;
+ const ConstTensor* m_CellToOutputWeights;
+ const ConstTensor* m_InputGateBias;
+ const ConstTensor* m_ForgetGateBias;
+ const ConstTensor* m_CellBias;
+ const ConstTensor* m_OutputGateBias;
+ const ConstTensor* m_ProjectionWeights;
+ const ConstTensor* m_ProjectionBias;
+};
+
+} // namespace armnn
+
diff --git a/include/armnn/NetworkFwd.hpp b/include/armnn/NetworkFwd.hpp
index 75667fdfd09ae28584830c9df927449d6c95cf92..56aedaf8d41dcc6bf74d21e69e84e245bde807d6 100644
--- a/include/armnn/NetworkFwd.hpp
+++ b/include/armnn/NetworkFwd.hpp
@@ -6,6 +6,7 @@
namespace armnn
{
+struct LstmInputParams;
class INetwork;
class IOptimizedNetwork;
class Graph;
@@ -13,4 +14,4 @@ class IInputSlot;
class IOutputSlot;
class IConnectableLayer;
class IDataLayer;
-}
\ No newline at end of file
+}
diff --git a/include/armnn/Tensor.hpp b/include/armnn/Tensor.hpp
index 910278f33f51579bbe82e77e9d96049506ded807..718dd817c5df11cb90f4a789d386bb29450e8172 100644
--- a/include/armnn/Tensor.hpp
+++ b/include/armnn/Tensor.hpp
@@ -18,7 +18,7 @@ namespace armnn
class TensorShape
{
public:
- /// Empty (invalid) constructor
+ /// Empty (invalid) constructor.
TensorShape();
TensorShape(unsigned int numDimensions, const unsigned int* dimensionSizes);
@@ -53,7 +53,7 @@ private:
class TensorInfo
{
public:
- /// Empty (invalid) constructor
+ /// Empty (invalid) constructor.
TensorInfo();
TensorInfo(const TensorShape& shape, DataType dataType,
@@ -88,7 +88,7 @@ public:
private:
TensorShape m_Shape;
DataType m_DataType;
- /// Scale and offset values used for quantization
+ /// Scale and offset values are used for quantization.
struct Quantization
{
Quantization() : m_Scale(0.f), m_Offset(0) {}
@@ -102,11 +102,11 @@ template
class BaseTensor
{
public:
- /// Empty (invalid) constructor
+ /// Empty (invalid) constructor.
BaseTensor();
/// Constructor from a raw memory pointer.
- /// @param memoryArea Region of CPU-addressable memory where tensor data will be stored. Must be valid while
+ /// @param memoryArea - Region of CPU-addressable memory where tensor data will be stored. Must be valid while
/// workloads are on the fly. Tensor instances do not claim ownership of referenced memory regions, that is,
/// no attempt will be made by ArmNN to free these memory regions automatically.
BaseTensor(const TensorInfo& info, MemoryType memoryArea);
@@ -130,7 +130,7 @@ public:
MemoryType GetMemoryArea() const { return m_MemoryArea; }
protected:
- // protected destructor to stop users from making these
+ // Protected destructor to stop users from making these
// (could still new one on the heap and then leak it...)
~BaseTensor() {}
@@ -144,21 +144,23 @@ private:
class Tensor : public BaseTensor
{
public:
- using BaseTensor::BaseTensor; // Bring in the constructors and assignment operator
+ /// Brings in the constructors and assignment operator.
+ using BaseTensor::BaseTensor;
};
/// A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
class ConstTensor : public BaseTensor
{
public:
- using BaseTensor::BaseTensor; // Bring in the constructors and assignment operator
+ /// Brings in the constructors and assignment operator.
+ using BaseTensor::BaseTensor;
ConstTensor() : BaseTensor() {} // This needs to be redefined explicitly??
- // Can be implicitly constructed from non-const Tensor
+ /// Can be implicitly constructed from non-const Tensor.
ConstTensor(const Tensor& other) : BaseTensor(other.GetInfo(), other.GetMemoryArea()) {}
/// Constructor from a backing container.
- /// @param container An stl-like container type which implements data() and size() methods.
+ /// @param container - An stl-like container type which implements data() and size() methods.
/// Presence of data() and size() is a strong indicator of the continuous memory layout of the container,
/// which is a requirement for Tensor data. Tensor instances do not claim ownership of referenced memory regions,
/// that is, no attempt will be made by ArmNN to free these memory regions automatically.
diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp
index c9a4bf13e5a1722a61b5f206fd450d2341b2303a..fe1fcb45d2a15755eeda11a997a1bb062df30239 100644
--- a/include/armnn/Types.hpp
+++ b/include/armnn/Types.hpp
@@ -22,9 +22,10 @@ enum class Status
enum class DataType
{
- Float32 = 0,
- QuantisedAsymm8 = 1,
- Signed32 = 2
+ Float16 = 0,
+ Float32 = 1,
+ QuantisedAsymm8 = 2,
+ Signed32 = 3
};
enum class ActivationFunction
@@ -33,7 +34,7 @@ enum class ActivationFunction
TanH = 1,
Linear = 2,
ReLu = 3,
- BoundedReLu = 4, //< min(a, max(b, input))
+ BoundedReLu = 4, ///< min(a, max(b, input))
SoftReLu = 5,
LeakyReLu = 6,
Abs = 7,
@@ -51,16 +52,18 @@ enum class PoolingAlgorithm
///
/// The padding method modifies the output of pooling layers.
/// In both supported methods, the values are ignored (they are
-/// not even zeros which would make a difference for max pooling
+/// not even zeroes, which would make a difference for max pooling
/// a tensor with negative values). The difference between
-/// IgnoreValue and Exclude is that the former count the padding
+/// IgnoreValue and Exclude is that the former counts the padding
/// fields in the divisor of Average and L2 pooling, while
/// Exclude does not.
///
enum class PaddingMethod
{
- IgnoreValue = 0, // The padding fields count, but ignored
- Exclude = 1 // The padding fields don't count and ignored
+ /// The padding fields count, but are ignored
+ IgnoreValue = 0,
+ /// The padding fields don't count and are ignored
+ Exclude = 1
};
enum class NormalizationAlgorithmChannel
@@ -71,8 +74,10 @@ enum class NormalizationAlgorithmChannel
enum class NormalizationAlgorithmMethod
{
- LocalBrightness = 0, /* Krichevsky 2012: Local Brightness Normalization */
- LocalContrast = 1 /* Jarret 2009: Local Contrast Normalization */
+ /// Krichevsky 2012: Local Brightness Normalization
+ LocalBrightness = 0,
+ /// Jarret 2009: Local Contrast Normalization
+ LocalContrast = 1
};
enum class OutputShapeRounding
@@ -83,15 +88,20 @@ enum class OutputShapeRounding
enum class Compute
{
- CpuRef = 0, // CPU Execution: Reference C++ kernels
- CpuAcc = 1, // CPU Execution: NEON: ArmCompute
- GpuAcc = 2, // GPU Execution: OpenCL: ArmCompute
+ /// CPU Execution: Reference C++ kernels
+ CpuRef = 0,
+ /// CPU Execution: NEON: ArmCompute
+ CpuAcc = 1,
+ /// GPU Execution: OpenCL: ArmCompute
+ GpuAcc = 2,
Undefined = 5
};
-struct DeviceSpec
+class IDeviceSpec
{
- Compute DefaultComputeDevice;
+protected:
+ IDeviceSpec() {};
+ virtual ~IDeviceSpec() {};
};
/// Type of identifiers for bindable layers (inputs, outputs).
@@ -105,10 +115,10 @@ public:
using ArrayType = std::array;
using ConstIterator = typename ArrayType::const_iterator;
- /// @param dimMappings Indicates how to translate tensor elements from a given source into the target destination,
+ /// @param dimMappings - Indicates how to translate tensor elements from a given source into the target destination,
/// when source and target potentially have different memory layouts.
///
- /// E.g. For a 4-d tensor laid out in memory with format (Batch Element, Height, Width, Channels),
+ /// E.g. For a 4-d tensor laid out in a memory with the format (Batch Element, Height, Width, Channels),
/// which is to be passed as an input to ArmNN, each source dimension is mapped to the corresponding
/// ArmNN dimension. The Batch dimension remains the same (0 -> 0). The source Height dimension is mapped
/// to the location of the ArmNN Height dimension (1 -> 2). Similar arguments are made for the Width and
@@ -152,7 +162,7 @@ private:
SizeType m_NumDimMappings;
};
-// Define LayerGuid type.
+/// Define LayerGuid type.
using LayerGuid = unsigned int;
}
diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp
index c63b653ae34f6a3883330f4efdb7197342276e60..3077ce111f3b9e2f92b54883c2b48f25ae28d596 100644
--- a/include/armnn/TypesUtils.hpp
+++ b/include/armnn/TypesUtils.hpp
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
namespace armnn
{
@@ -89,8 +90,9 @@ constexpr unsigned int GetDataTypeSize(DataType dataType)
{
switch (dataType)
{
- case DataType::Signed32:
- case DataType::Float32: return 4U;
+ case DataType::Float16: return 2U;
+ case DataType::Float32:
+ case DataType::Signed32: return 4U;
case DataType::QuantisedAsymm8: return 1U;
default: return 0U;
}
@@ -107,17 +109,17 @@ constexpr bool StrEqual(const char* strA, const char (&strB)[N])
return isEqual;
}
-constexpr Compute ParseComputeDevice(const char* str)
+constexpr armnn::Compute ParseComputeDevice(const char* str)
{
- if (StrEqual(str, "CpuAcc"))
+ if (armnn::StrEqual(str, "CpuAcc"))
{
return armnn::Compute::CpuAcc;
}
- else if (StrEqual(str, "CpuRef"))
+ else if (armnn::StrEqual(str, "CpuRef"))
{
return armnn::Compute::CpuRef;
}
- else if (StrEqual(str, "GpuAcc"))
+ else if (armnn::StrEqual(str, "GpuAcc"))
{
return armnn::Compute::GpuAcc;
}
@@ -131,59 +133,60 @@ constexpr const char* GetDataTypeName(DataType dataType)
{
switch (dataType)
{
- case DataType::Float32: return "Float32";
+ case DataType::Float16: return "Float16";
+ case DataType::Float32: return "Float32";
case DataType::QuantisedAsymm8: return "Unsigned8";
- case DataType::Signed32: return "Signed32";
- default: return "Unknown";
+ case DataType::Signed32: return "Signed32";
+
+ default:
+ return "Unknown";
}
}
-template
-constexpr DataType GetDataType();
-
-template <>
-constexpr DataType GetDataType()
-{
- return DataType::Float32;
-}
-template <>
-constexpr DataType GetDataType()
-{
- return DataType::QuantisedAsymm8;
-}
+template
+struct IsHalfType
+ : std::integral_constant::value && sizeof(T) == 2>
+{};
-template <>
-constexpr DataType GetDataType()
-{
- return DataType::Signed32;
-}
+template
+struct GetDataTypeImpl;
template
-constexpr bool IsQuantizedType()
+struct GetDataTypeImpl::value, T>>
{
- return std::is_integral::value;
-}
-
+ static constexpr DataType Value = DataType::Float16;
+};
-template
-struct ResolveTypeImpl;
+template<>
+struct GetDataTypeImpl
+{
+ static constexpr DataType Value = DataType::Float32;
+};
template<>
-struct ResolveTypeImpl
+struct GetDataTypeImpl
{
- using Type = uint8_t;
+ static constexpr DataType Value = DataType::QuantisedAsymm8;
};
template<>
-struct ResolveTypeImpl
+struct GetDataTypeImpl
{
- using Type = float;
+ static constexpr DataType Value = DataType::Signed32;
};
-template
-using ResolveType = typename ResolveTypeImpl::Type;
+template
+constexpr DataType GetDataType()
+{
+ return GetDataTypeImpl::Value;
+}
+template
+constexpr bool IsQuantizedType()
+{
+ return std::is_integral::value;
+}
inline std::ostream& operator<<(std::ostream& os, Status stat)
{
@@ -191,7 +194,23 @@ inline std::ostream& operator<<(std::ostream& os, Status stat)
return os;
}
-inline std::ostream& operator<<(std::ostream& os, Compute compute)
+inline std::ostream& operator<<(std::ostream& os, const std::vector& compute)
+{
+ for (const Compute& comp : compute) {
+ os << GetComputeDeviceAsCString(comp) << " ";
+ }
+ return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const std::set& compute)
+{
+ for (const Compute& comp : compute) {
+ os << GetComputeDeviceAsCString(comp) << " ";
+ }
+ return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const Compute& compute)
{
os << GetComputeDeviceAsCString(compute);
return os;
@@ -212,11 +231,11 @@ inline std::ostream & operator<<(std::ostream & os, const armnn::TensorShape & s
return os;
}
-/// Quantize a floating point data type into an 8-bit data type
-/// @param value The value to quantize
-/// @param scale The scale (must be non-zero)
-/// @param offset The offset
-/// @return The quantized value calculated as round(value/scale)+offset
+/// Quantize a floating point data type into an 8-bit data type.
+/// @param value - The value to quantize.
+/// @param scale - The scale (must be non-zero).
+/// @param offset - The offset.
+/// @return - The quantized value calculated as round(value/scale)+offset.
///
template
inline QuantizedType Quantize(float value, float scale, int32_t offset)
@@ -234,11 +253,11 @@ inline QuantizedType Quantize(float value, float scale, int32_t offset)
return quantizedBits;
}
-/// Dequantize an 8-bit data type into a floating point data type
-/// @param value The value to dequantize
-/// @param scale The scale (must be non-zero)
-/// @param offset The offset
-/// @return The dequantized value calculated as (value-offset)*scale
+/// Dequantize an 8-bit data type into a floating point data type.
+/// @param value - The value to dequantize.
+/// @param scale - The scale (must be non-zero).
+/// @param offset - The offset.
+/// @return - The dequantized value calculated as (value-offset)*scale.
///
template
inline float Dequantize(QuantizedType value, float scale, int32_t offset)
@@ -249,4 +268,18 @@ inline float Dequantize(QuantizedType value, float scale, int32_t offset)
return dequantized;
}
+template
+void VerifyTensorInfoDataType(const armnn::TensorInfo & info)
+{
+ auto expectedType = armnn::GetDataType();
+ if (info.GetDataType() != expectedType)
+ {
+ std::stringstream ss;
+ ss << "Unexpected datatype:" << armnn::GetDataTypeName(info.GetDataType())
+ << " for tensor:" << info.GetShape()
+ << ". The type expected to be: " << armnn::GetDataTypeName(expectedType);
+ throw armnn::Exception(ss.str());
+ }
+}
+
} //namespace armnn
diff --git a/include/armnn/Utils.hpp b/include/armnn/Utils.hpp
index 1a0c34baad5e1c1a68cadd992f91bbac5b478c77..4b5cb9892df26bfaed075a5e2ce2ebe48ee6e421 100644
--- a/include/armnn/Utils.hpp
+++ b/include/armnn/Utils.hpp
@@ -4,6 +4,9 @@
//
#pragma once
+#include
+#include "armnn/TypesUtils.hpp"
+
namespace armnn
{
@@ -24,4 +27,4 @@ enum class LogSeverity
/// severity: All log messages that are at this severity level or higher will be printed, others will be ignored.
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity);
-}
+} // namespace armnn
diff --git a/include/armnn/Version.hpp b/include/armnn/Version.hpp
index d5f794eb8b116909f1ad4888a38aa004117a2227..1a290d71779dce4e57eb63581f9093b5b649c8a8 100644
--- a/include/armnn/Version.hpp
+++ b/include/armnn/Version.hpp
@@ -9,4 +9,4 @@
// YYYY = 4-digit year number
// MM = 2-digit month number
// PP = 2-digit patch number
-#define ARMNN_VERSION "20180502"
+#define ARMNN_VERSION "20180800"
diff --git a/include/armnnCaffeParser/ICaffeParser.hpp b/include/armnnCaffeParser/ICaffeParser.hpp
index 55fc85052b0ff79bcd10fa6033764dde2c7353cb..0f23a658b25cbba1f7d1a4040b428d6863f2bf0f 100644
--- a/include/armnnCaffeParser/ICaffeParser.hpp
+++ b/include/armnnCaffeParser/ICaffeParser.hpp
@@ -28,28 +28,28 @@ public:
static ICaffeParserPtr Create();
static void Destroy(ICaffeParser* parser);
- /// Create the network from a protobuf text file on disk
+ /// Create the network from a protobuf text file on the disk.
virtual armnn::INetworkPtr CreateNetworkFromTextFile(
const char* graphFile,
const std::map& inputShapes,
const std::vector& requestedOutputs) = 0;
- /// Create the network from a protobuf binary file on disk
+ /// Create the network from a protobuf binary file on the disk.
virtual armnn::INetworkPtr CreateNetworkFromBinaryFile(
const char* graphFile,
const std::map& inputShapes,
const std::vector& requestedOutputs) = 0;
- /// Create the network directly from protobuf text in a string. Useful for debugging/testing
+ /// Create the network directly from protobuf text in a string. Useful for debugging/testin.g
virtual armnn::INetworkPtr CreateNetworkFromString(
const char* protoText,
const std::map& inputShapes,
const std::vector& requestedOutputs) = 0;
- /// Retrieve binding info (layer id and tensor info) for the network input identified by the given layer name
+ /// Retrieve binding info (layer id and tensor info) for the network input identified by the given layer name.
virtual BindingPointInfo GetNetworkInputBindingInfo(const std::string& name) const = 0;
- /// Retrieve binding info (layer id and tensor info) for the network output identified by the given layer name
+ /// Retrieve binding info (layer id and tensor info) for the network output identified by the given layer name.
virtual BindingPointInfo GetNetworkOutputBindingInfo(const std::string& name) const = 0;
protected:
diff --git a/include/armnnOnnxParser/IOnnxParser.hpp b/include/armnnOnnxParser/IOnnxParser.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c7ec41ec84d1abf83540ca65dc0c6388b2adc2c6
--- /dev/null
+++ b/include/armnnOnnxParser/IOnnxParser.hpp
@@ -0,0 +1,48 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include
+#include
+
+#include
+#include
+#include