diff --git a/src/common/keywords.txt b/src/common/keywords.txt index 816919c6fc3dae10e7c5c1e6621a22baea2153b0..b979809db5419e98ce250084b9b2013c2c2ab2d5 100644 --- a/src/common/keywords.txt +++ b/src/common/keywords.txt @@ -143,6 +143,8 @@ tdxsetx_tdx_tdx_vd tdxsetxyz_tdx_tdx_vd_vd_vd tdxsety_tdx_tdx_vd tdxsetz_tdx_tdx_vd +trigh_special +trighf_special vabs_vd_vd vabs_vf2_vf2 vabs_vf_vf diff --git a/src/common/misc.h b/src/common/misc.h index 2f21411d5df49f8c9ae0240b709f708a0fd98271..761cd385f7a977ea279d32c5210ee1df2b29bc16 100644 --- a/src/common/misc.h +++ b/src/common/misc.h @@ -150,8 +150,14 @@ // Other bounds +// - cosh(f)(x) and sinh(f)(x) approximation holds up to x equals +#define TRIGHF_BOUND 0x1.6p+6f /* 88.0 */ +// - cosh(f)(x) and sinh(f)(x) should overflow for x higher than +#define ATRIGHF_FLT_MAX 0x1.65a9f8p+6 /* 89.41599 */ +#define ATRIGH_DBL_MAX 0x1.633ce8fb9f87ep+9 /* 710.4759 */ + // - log1p(f)(x) approximation holds up to x equals -#define LOG1PF_BOUND 0x1.2ced32p+126 /* 1.0e+38 */ +#define LOG1PF_BOUND 0x1.2ced32p+126f /* 1.0e+38 */ #define LOG1P_BOUND 0x1.c7b1f3cac7433p+1019 /* 1.0e+307 */ // diff --git a/src/libm-benchmarks/benchlibm.cpp b/src/libm-benchmarks/benchlibm.cpp index 2d29d95b50bd7ebaded1999f07511246ce04f648..f7aeb61d384ecfeb0b25f2c29c4a2a1eaaf68f1f 100644 --- a/src/libm-benchmarks/benchlibm.cpp +++ b/src/libm-benchmarks/benchlibm.cpp @@ -66,4 +66,13 @@ BENCH_DOUBLE_SCALAR(expm1, -700, 700); // pow BENCH_SCALAR_2ARGS(pow, -30, 30); +// ======================HYPERBOLIC======================== +// cosh +BENCH_SINGLE_SCALAR(cosh, -100, 100); +BENCH_DOUBLE_SCALAR(cosh, -700, 700); + +// sinh +BENCH_SINGLE_SCALAR(sinh, -100, 100); +BENCH_DOUBLE_SCALAR(sinh, -700, 700); + BENCHMARK_MAIN(); \ No newline at end of file diff --git a/src/libm-benchmarks/benchsleef.cpp b/src/libm-benchmarks/benchsleef.cpp index 8c7ae8a1e58f2720e1544cfe8ecc5c4a235dde96..eb9f364a6be25d6e7c044b885080c1f898f00401 100644 --- a/src/libm-benchmarks/benchsleef.cpp +++ b/src/libm-benchmarks/benchsleef.cpp @@ -67,4 +67,12 @@ BENCH_DOUBLEP_W_FIX_ULP(expm1, u10, -700, 700); // pow BENCH_ALL_W_FIX_ULP(pow, u10, -30, 30); +// ======================HYPERBOLIC======================== +// cosh +BENCH_ALL_SINGLEP(cosh, -100, 100); +BENCH_ALL_DOUBLEP(cosh, -700, 700); +// sinh +BENCH_ALL_SINGLEP(sinh, -100, 100); +BENCH_ALL_DOUBLEP(sinh, -700, 700); + BENCHMARK_MAIN(); \ No newline at end of file diff --git a/src/libm-tester/tester.c b/src/libm-tester/tester.c index 2a5ed003a8793762c6d59a684d3289f67e898404..2c0fb25261dd2005a0e5939cbb7d082afe13f718 100644 --- a/src/libm-tester/tester.c +++ b/src/libm-tester/tester.c @@ -4021,14 +4021,14 @@ void do_test() { fprintf(stderr, "sinh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_sinh, child_sinh, d, 1.0); - for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_sinh, child_sinh, d, 1.0); + for(d = -LOG_DBL_MAX-4;d < LOG_DBL_MAX+4 && success;d += 0.2) checkAccuracy_d(mpfr_sinh, child_sinh, d, 1.0); showResult(success); // fprintf(stderr, "cosh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_cosh, child_cosh, d, 1.0); - for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_cosh, child_cosh, d, 1.0); + for(d = -LOG_DBL_MAX-4;d < LOG_DBL_MAX+4 && success;d += 0.2) checkAccuracy_d(mpfr_cosh, child_cosh, d, 1.0); showResult(success); // @@ -4868,7 +4868,7 @@ void do_test() { fprintf(stderr, "sinhf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_sinh, child_sinhf, d, 1.0); if (!enableFlushToZero) { - for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_sinh, child_sinhf, d, 1.0); + for(d = -TRIGHF_BOUND-4;d < TRIGHF_BOUND+4 && success;d += 0.2) checkAccuracy_f(mpfr_sinh, child_sinhf, d, 1.0); } showResult(success); @@ -4877,7 +4877,7 @@ void do_test() { fprintf(stderr, "coshf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_cosh, child_coshf, d, 1.0); if (!enableFlushToZero) { - for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_cosh, child_coshf, d, 1.0); + for(d = -TRIGHF_BOUND-4;d < TRIGHF_BOUND+4 && success;d += 0.2) checkAccuracy_f(mpfr_cosh, child_coshf, d, 1.0); } showResult(success); diff --git a/src/libm/sleefdp.c b/src/libm/sleefdp.c index f03fb9ab485796ef7f751e7d40697acaba30c83b..84f7128443591e8791a54346d4ec05cb52ffd42f 100644 --- a/src/libm/sleefdp.c +++ b/src/libm/sleefdp.c @@ -1690,7 +1690,14 @@ EXPORT CONST double xsinh(double x) { d = ddsub_d2_d2_d2(d, ddrec_d2_d2(d)); y = (d.x + d.y) * 0.5; - y = fabsk(x) > 710 ? SLEEF_INFINITY : y; + if (fabsk(x) > ATRIGH_DBL_MAX) { + y = SLEEF_INFINITY; + } else if (fabsk(x) > LOG_DBL_MAX) { + Sleef_double2 dt = expk2(dd(fabsk(x)/2, 0)); + Sleef_double2 dy = ddscale_d2_d2_d(dt, 0.5); + dy = ddmul_d2_d2_d2(dy, dt); + y = dy.x + dy.y; + } y = xisnan(y) ? SLEEF_INFINITY : y; y = mulsign(y, x); y = xisnan(x) ? SLEEF_NAN : y; @@ -1704,7 +1711,14 @@ EXPORT CONST double xcosh(double x) { d = ddadd_d2_d2_d2(d, ddrec_d2_d2(d)); y = (d.x + d.y) * 0.5; - y = fabsk(x) > 710 ? SLEEF_INFINITY : y; + if (fabsk(x) > ATRIGH_DBL_MAX) { + y = SLEEF_INFINITY; + } else if (fabsk(x) > LOG_DBL_MAX) { + Sleef_double2 dt = expk2(dd(fabsk(x)/2, 0)); + Sleef_double2 dy = ddscale_d2_d2_d(dt, 0.5); + dy = ddmul_d2_d2_d2(dy, dt); + y = dy.x + dy.y; + } y = xisnan(y) ? SLEEF_INFINITY : y; y = xisnan(x) ? SLEEF_NAN : y; diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c index fda80c2e2d78c6cdadc1d08d257aca95e6151f14..39035ef8b01374cb7da61c6edbecd730983d9df2 100644 --- a/src/libm/sleefsimddp.c +++ b/src/libm/sleefsimddp.c @@ -2435,13 +2435,32 @@ static INLINE CONST VECTOR_CC vdouble2 expk2(vdouble2 d) FUNC_ATTR { } #if !defined(DETERMINISTIC) +static CONST VECTOR_CC vdouble trigh_special(vdouble d) FUNC_ATTR { + vdouble2 dt = expk2(vcast_vd2_vd_vd(vmul_vd_vd_vd(vabs_vd_vd(d),vcast_vd_d(0.5)), vcast_vd_d(0))); + vdouble2 dy = ddscale_vd2_vd2_vd(dt, vcast_vd_d(0.5)); + dy = ddmul_vd2_vd2_vd2(dy, dt); + return vadd_vd_vd_vd(vd2getx_vd_vd2(dy), vd2gety_vd_vd2(dy)); +} + EXPORT CONST VECTOR_CC vdouble xsinh(vdouble x) FUNC_ATTR { vdouble y = vabs_vd_vd(x); vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); d = ddsub_vd2_vd2_vd2(d, ddrec_vd2_vd2(d)); y = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(0.5)); - y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); + vopmask oflow = vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(ATRIGH_DBL_MAX)); + y = vsel_vd_vo_vd_vd(oflow, vcast_vd_d(SLEEF_INFINITY), y); + + // Fallback to special case subroutine for inputs between LOG_DBL_MAX and ATRIGH_DBL_MAX. + vopmask ocore = vle_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(LOG_DBL_MAX)); + vopmask o = vor_vo_vo_vo(ocore, oflow); + +#if defined(ENABLE_SVESTREAM) + y = vsel_vd_vo_vd_vd(o, y, trigh_special(x)); +#else + if(!LIKELY(vtestallones_i_vo64 (o))) y = vsel_vd_vo_vd_vd(o, y, trigh_special(x)); +#endif + y = vsel_vd_vo_vd_vd(visnan_vo_vd(y), vcast_vd_d(SLEEF_INFINITY), y); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); @@ -2454,7 +2473,19 @@ EXPORT CONST VECTOR_CC vdouble xcosh(vdouble x) FUNC_ATTR { d = ddadd_vd2_vd2_vd2(d, ddrec_vd2_vd2(d)); y = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(0.5)); - y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); + vopmask oflow = vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(ATRIGH_DBL_MAX)); + y = vsel_vd_vo_vd_vd(oflow, vcast_vd_d(SLEEF_INFINITY), y); + + // Fallback to special case subroutine for inputs between LOG_DBL_MAX and ATRIGH_DBL_MAX. + vopmask ocore = vle_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(LOG_DBL_MAX)); + vopmask o = vor_vo_vo_vo(ocore, oflow); + +#if defined(ENABLE_SVESTREAM) + y = vsel_vd_vo_vd_vd(o, y, trigh_special(x)); +#else + if(!LIKELY(vtestallones_i_vo64 (o))) y = vsel_vd_vo_vd_vd(o, y, trigh_special(x)); +#endif + y = vsel_vd_vo_vd_vd(visnan_vo_vd(y), vcast_vd_d(SLEEF_INFINITY), y); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index c66645e9e3458eb183fa83d48b44e7bfc6fae3d6..81b38d51cd0bae3190e12eae78b02d45e8034246 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -2475,14 +2475,32 @@ static INLINE CONST VECTOR_CC vfloat2 expk2f(vfloat2 d) FUNC_ATTR { } #if !defined(DETERMINISTIC) +static CONST VECTOR_CC vfloat trighf_special(vfloat d) FUNC_ATTR { + vfloat2 dt = expk2f(vcast_vf2_vf_vf(vmul_vf_vf_vf(vabs_vf_vf(d),vcast_vf_f(0.5f)), vcast_vf_f(0))); + vfloat2 dy = dfscale_vf2_vf2_vf(dt, vcast_vf_f(0.5f)); + dy = dfmul_vf2_vf2_vf2(dy, dt); + return vadd_vf_vf_vf(vf2getx_vf_vf2(dy), vf2gety_vf_vf2(dy)); +} + EXPORT CONST VECTOR_CC vfloat xsinhf(vfloat x) FUNC_ATTR { vfloat y = vabs_vf_vf(x); vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); d = dfsub_vf2_vf2_vf2(d, dfrec_vf2_vf2(d)); y = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(0.5)); - y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)), - visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); + vopmask oflow = vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(ATRIGHF_FLT_MAX)); + y = vsel_vf_vo_vf_vf(oflow, vcast_vf_f(SLEEF_INFINITYf), y); + + // Fallback to special case subroutine for inputs between ATRIGHF_BOUND and ATRIGHF_FLT_MAX. + vopmask ocore = vle_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(TRIGHF_BOUND)); + vopmask o = vor_vo_vo_vo(ocore, oflow); + +#if defined(ENABLE_SVESTREAM) + y = vsel_vf_vo_vf_vf(o, y, trighf_special(x)); +#else + if(!LIKELY(vtestallones_i_vo32 (o))) y = vsel_vf_vo_vf_vf(o, y, trighf_special(x)); +#endif + y = vsel_vf_vo_vf_vf(visnan_vo_vf(y), vcast_vf_f(SLEEF_INFINITYf), y); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); @@ -2495,8 +2513,18 @@ EXPORT CONST VECTOR_CC vfloat xcoshf(vfloat x) FUNC_ATTR { d = dfadd_vf2_vf2_vf2(d, dfrec_vf2_vf2(d)); y = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(0.5)); - y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)), - visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); + vopmask oflow = vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(ATRIGHF_FLT_MAX)); + y = vsel_vf_vo_vf_vf(oflow, vcast_vf_f(SLEEF_INFINITYf), y); + + // Fallback to special case subroutine for inputs between TRIGHF_BOUND and ATRIGHF_FLT_MAX. + vopmask ocore = vle_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(TRIGHF_BOUND)); + vopmask o = vor_vo_vo_vo(ocore, oflow); +#if defined(ENABLE_SVESTREAM) + y = vsel_vf_vo_vf_vf(o, y, trighf_special(x)); +#else + if(!LIKELY(vtestallones_i_vo32 (o))) y = vsel_vf_vo_vf_vf(o, y, trighf_special(x)); +#endif + y = vsel_vf_vo_vf_vf(visnan_vo_vf(y), vcast_vf_f(SLEEF_INFINITYf), y); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; diff --git a/src/libm/sleefsp.c b/src/libm/sleefsp.c index 1df87f4d6e267bc435a8a342d5d5354aa4162bf3..22d055b251e742320de6b798636f67a440e8f871 100644 --- a/src/libm/sleefsp.c +++ b/src/libm/sleefsp.c @@ -1398,7 +1398,14 @@ EXPORT CONST float xsinhf(float x) { d = dfsub_f2_f2_f2(d, dfrec_f2_f2(d)); y = (d.x + d.y) * 0.5f; - y = fabsfk(x) > 89 ? SLEEF_INFINITYf : y; + if (fabsfk(x) > ATRIGHF_FLT_MAX) { + y = SLEEF_INFINITYf; + } else if (fabsfk(x) > TRIGHF_BOUND) { + Sleef_float2 dt = expk2f(df(fabsfk(x)/2, 0)); + Sleef_float2 dy = dfscale_f2_f2_f(dt, 0.5f); + dy = dfmul_f2_f2_f2(dy, dt); + y = dy.x + dy.y; + } y = xisnanf(y) ? SLEEF_INFINITYf : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; @@ -1412,7 +1419,14 @@ EXPORT CONST float xcoshf(float x) { d = dfadd_f2_f2_f2(d, dfrec_f2_f2(d)); y = (d.x + d.y) * 0.5f; - y = fabsfk(x) > 89 ? SLEEF_INFINITYf : y; + if (fabsfk(x) > ATRIGHF_FLT_MAX) { + y = SLEEF_INFINITYf; + } else if (fabsfk(x) > TRIGHF_BOUND) { + Sleef_float2 dt = expk2f(df(fabsfk(x)/2, 0)); + Sleef_float2 dy = dfscale_f2_f2_f(dt, 0.5f); + dy = dfmul_f2_f2_f2(dy, dt); + y = dy.x + dy.y; + } y = xisnanf(y) ? SLEEF_INFINITYf : y; y = xisnanf(x) ? SLEEF_NANf : y;