From 0cbdb430d587eedef7251a74ebd708f4de88a9e1 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Fri, 12 Jul 2024 13:37:21 +0100 Subject: [PATCH] Fix out-of-bound memory write in int4 kernels Signed-off-by: Viet-Hoa Do --- ...atmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c | 6 +++--- ...atmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c | 6 +++--- ...atmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c | 6 +++--- ...atmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c index d987b2f4..8b649f3f 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c @@ -235,13 +235,13 @@ void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm( "mov x23, %x[dst]\n" "cmp x27, #0x1\n" "add x22, x23, %x[dst_stride_row]\n" - "csel x22, x22, x23, GE\n" + "csel x22, x22, x23, GT\n" "cmp x27, #0x2\n" "add x21, x23, %x[dst_stride_row], LSL #1\n" - "csel x21, x21, x22, GE\n" + "csel x21, x21, x22, GT\n" "cmp x27, #0x3\n" "add x20, x21, %x[dst_stride_row]\n" - "csel x20, x20, x21, GE\n" + "csel x20, x20, x21, GT\n" "tbz x25, #1, 6f\n" "st1 { v0.d }[0], [x20], #0x8\n" "st1 { v1.d }[0], [x21], #0x8\n" diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c index b93f8147..50e260d8 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c @@ -464,13 +464,13 @@ void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm( "mov x23, %x[dst]\n" "cmp x12, #0x1\n" "add x22, x23, %x[dst_stride_row]\n" - "csel x22, x22, x23, GE\n" + "csel x22, x22, x23, GT\n" "cmp x12, #0x2\n" "add x21, x23, %x[dst_stride_row], LSL #1\n" - "csel x21, x21, x22, GE\n" + "csel x21, x21, x22, GT\n" "cmp x12, #0x3\n" "add x20, x21, %x[dst_stride_row]\n" - "csel x20, x20, x21, GE\n" + "csel x20, x20, x21, GT\n" "tbz x25, #1, 16f\n" "st1 { v7.d }[0], [x20], #0x8\n" "st1 { v8.d }[0], [x21], #0x8\n" diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c index 1f7d0b8f..6a07803c 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c @@ -305,13 +305,13 @@ void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm( "mov x23, %x[dst]\n" "cmp x27, #0x1\n" "add x22, x23, %x[dst_stride_row]\n" - "csel x22, x22, x23, GE\n" + "csel x22, x22, x23, GT\n" "cmp x27, #0x2\n" "add x21, x23, %x[dst_stride_row], LSL #1\n" - "csel x21, x21, x22, GE\n" + "csel x21, x21, x22, GT\n" "cmp x27, #0x3\n" "add x20, x21, %x[dst_stride_row]\n" - "csel x20, x20, x21, GE\n" + "csel x20, x20, x21, GT\n" "tbz x25, #2, 7f\n" "st1 { v5.4s }, [x20], #0x10\n" "st1 { v7.4s }, [x21], #0x10\n" diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c index 8bcce742..1bb5d9d4 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c @@ -702,13 +702,13 @@ void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm( "mov x23, %x[dst]\n" "cmp x12, #0x1\n" "add x22, x23, %x[dst_stride_row]\n" - "csel x22, x22, x23, GE\n" + "csel x22, x22, x23, GT\n" "cmp x12, #0x2\n" "add x21, x23, %x[dst_stride_row], LSL #1\n" - "csel x21, x21, x22, GE\n" + "csel x21, x21, x22, GT\n" "cmp x12, #0x3\n" "add x20, x21, %x[dst_stride_row]\n" - "csel x20, x20, x21, GE\n" + "csel x20, x20, x21, GT\n" "tbz x25, #2, 19f\n" "st1 { v1.4s }, [x20], #0x10\n" "st1 { v14.4s }, [x21], #0x10\n" -- GitLab