diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index bbf5bec00f724ed177f2e03ded18ae352916c2e5..58f80cc5903240abc7361700573ccac0baff7cea 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -76,32 +76,29 @@ BENCH_BINARY_OP(bitwise_and, uint8_t); BENCH_BINARY_OP(compare_equal_u8, uint8_t); BENCH_BINARY_OP(compare_greater_u8, uint8_t); -template +template static void bench_unary_op(Function f, benchmark::State& state) { bench_functor(state, [f]() { - (void)f(get_source_buffer_a(), - image_width * Channels * sizeof(I), - get_destination_buffer(), - image_width * Channels * sizeof(O), image_width, image_height); + (void)f(get_source_buffer_a(), + image_width * InChannels * sizeof(I), + get_destination_buffer(), + image_width * OutChannels * sizeof(O), image_width, image_height); }); } -#define BENCH_UNARY_OP(name, channels, type) \ - static void name(benchmark::State& state) { \ - bench_unary_op(kleidicv_##name, state); \ - } \ +#define BENCH_UNARY_OP(name, channels, type) \ + static void name(benchmark::State& state) { \ + bench_unary_op(kleidicv_##name, state); \ + } \ BENCHMARK(name) -BENCH_UNARY_OP(rgb_to_yuv_u8, 3, uint8_t); -BENCH_UNARY_OP(rgba_to_yuv_u8, 4, uint8_t); -BENCH_UNARY_OP(bgr_to_yuv_u8, 3, uint8_t); -BENCH_UNARY_OP(bgra_to_yuv_u8, 4, uint8_t); BENCH_UNARY_OP(exp_f32, 1, float); -#define BENCH_UNARY_OP_DIFFERENT_IO_TYPES(name, itype, otype) \ - static void name(benchmark::State& state) { \ - bench_unary_op(kleidicv_##name, state); \ - } \ +#define BENCH_UNARY_OP_DIFFERENT_IO_TYPES(name, itype, otype) \ + static void name(benchmark::State& state) { \ + bench_unary_op(kleidicv_##name, state); \ + } \ BENCHMARK(name) BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_f32_s8, float, int8_t); @@ -109,6 +106,31 @@ BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_f32_u8, float, uint8_t); BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_s8_f32, int8_t, float); BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_u8_f32, uint8_t, float); +#define BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(name, in_channels, \ + out_channels, type) \ + static void name(benchmark::State& state) { \ + bench_unary_op(kleidicv_##name, \ + state); \ + } \ + BENCHMARK(name) + +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgb_to_yuv_u8, 3, 3, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgba_to_yuv_u8, 4, 3, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(bgr_to_yuv_u8, 3, 3, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(bgra_to_yuv_u8, 4, 3, uint8_t); + +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(gray_to_rgb_u8, 1, 3, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(gray_to_rgba_u8, 1, 4, uint8_t); + +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgb_to_bgr_u8, 3, 3, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgb_to_rgb_u8, 3, 3, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgba_to_bgra_u8, 4, 4, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgba_to_rgba_u8, 4, 4, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgb_to_bgra_u8, 3, 4, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgb_to_rgba_u8, 3, 4, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgba_to_bgr_u8, 4, 3, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgba_to_rgb_u8, 4, 3, uint8_t); + static void min_max_loc_u8(benchmark::State& state) { bench_functor(state, []() { size_t min_offset, max_offset; @@ -142,20 +164,11 @@ BENCH_SCALE(scale_f32_generic, scale_f32, 1.234, 4.567, float); template static void min_max(F f, benchmark::State& state) { - // Setup - std::vector src; - src.resize(image_width * image_height); - std::mt19937 generator; - std::generate(src.begin(), src.end(), generator); - - T min_value = 0, max_value = 0; - - for (auto _ : state) { - // This code gets benchmarked - auto unused = f(src.data(), image_width * sizeof(T), image_width, - image_height, &min_value, &max_value); - (void)unused; - } + bench_functor(state, [f]() { + T min_value = 0, max_value = 0; + (void)f(get_source_buffer_a(), image_width * sizeof(T), image_width, + image_height, &min_value, &max_value); + }); } #define BENCH_MIN_MAX(name, type) \ @@ -246,6 +259,26 @@ static void gaussian_blur(benchmark::State& state) { (void)kleidicv_filter_context_release(context); } +static void gaussian_blur_3x3_u8_1ch(benchmark::State& state) { + gaussian_blur(state); +} +BENCHMARK(gaussian_blur_3x3_u8_1ch); + +static void gaussian_blur_3x3_u8_3ch(benchmark::State& state) { + gaussian_blur(state); +} +BENCHMARK(gaussian_blur_3x3_u8_3ch); + +static void gaussian_blur_5x5_u8_1ch(benchmark::State& state) { + gaussian_blur(state); +} +BENCHMARK(gaussian_blur_5x5_u8_1ch); + +static void gaussian_blur_5x5_u8_3ch(benchmark::State& state) { + gaussian_blur(state); +} +BENCHMARK(gaussian_blur_5x5_u8_3ch); + static void gaussian_blur_7x7_u8_1ch(benchmark::State& state) { gaussian_blur(state); } @@ -265,3 +298,88 @@ static void gaussian_blur_15x15_u8_3ch(benchmark::State& state) { gaussian_blur(state); } BENCHMARK(gaussian_blur_15x15_u8_3ch); + +template +static void sobel_filter(Function f, benchmark::State& state) { + bench_functor(state, [f]() { + (void)f(get_source_buffer_a(), image_width * sizeof(uint8_t), + get_destination_buffer(), image_width * sizeof(int16_t), + image_width, image_height, 1); + }); +} + +static void sobel_filter_vertical(benchmark::State& state) { + sobel_filter(kleidicv_sobel_3x3_vertical_s16_u8, state); +} +BENCHMARK(sobel_filter_vertical); + +static void sobel_filter_horizontal(benchmark::State& state) { + sobel_filter(kleidicv_sobel_3x3_horizontal_s16_u8, state); +} +BENCHMARK(sobel_filter_horizontal); + +template +static void yuv_sp(Function f, benchmark::State& state) { + bench_functor(state, [f]() { + (void)f(get_source_buffer_a(), image_width * sizeof(uint8_t), + get_source_buffer_b(), + (image_width / 2) * sizeof(uint8_t), + get_destination_buffer(), + image_width * sizeof(uint8_t), image_width, image_height, true); + }); +} + +static void yuv_sp_to_rgb(benchmark::State& state) { + yuv_sp<3>(kleidicv_yuv_sp_to_rgb_u8, state); +} +BENCHMARK(yuv_sp_to_rgb); + +static void yuv_sp_to_bgr(benchmark::State& state) { + yuv_sp<3>(kleidicv_yuv_sp_to_bgr_u8, state); +} +BENCHMARK(yuv_sp_to_bgr); + +static void yuv_sp_to_rgba(benchmark::State& state) { + yuv_sp<4>(kleidicv_yuv_sp_to_rgba_u8, state); +} +BENCHMARK(yuv_sp_to_rgba); + +static void yuv_sp_to_bgra(benchmark::State& state) { + yuv_sp<4>(kleidicv_yuv_sp_to_bgra_u8, state); +} +BENCHMARK(yuv_sp_to_bgra); + +template +static void morphology(Function f, benchmark::State& state) { + kleidicv_morphology_context_t* context = nullptr; + kleidicv_error_t err = kleidicv_morphology_create( + &context, kleidicv_rectangle_t{KernelSize, KernelSize}, + kleidicv_point_t{0, 0}, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{0, 0, 0, 0}, 1, 1, sizeof(T), + kleidicv_rectangle_t{image_width, image_height}); + if (err != KLEIDICV_OK) { + state.SkipWithError("Could not initialize morphology context."); + return; + } + + bench_functor(state, [f, context]() { + (void)f(get_source_buffer_a(), image_width * sizeof(T), + get_destination_buffer(), image_width * sizeof(T), + image_width, image_height, context); + }); + + (void)kleidicv_morphology_release(context); +} + +#define BENCH_MORPHOLOGY(name, kernel_size) \ + static void name##_##kernel_size##x##kernel_size(benchmark::State& state) { \ + morphology(kleidicv_##name##_u8, state); \ + } \ + BENCHMARK(name##_##kernel_size##x##kernel_size) + +BENCH_MORPHOLOGY(dilate, 3); +BENCH_MORPHOLOGY(dilate, 5); +BENCH_MORPHOLOGY(dilate, 17); +BENCH_MORPHOLOGY(erode, 3); +BENCH_MORPHOLOGY(erode, 5); +BENCH_MORPHOLOGY(erode, 17);