diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc index 56f18b01903841cbb47c13b1611c79ce7343a5a1..22071c71fc4166423b0ff2cbace75ac6b1703b0c 100644 --- a/src/aarch64/assembler-aarch64.cc +++ b/src/aarch64/assembler-aarch64.cc @@ -3689,6 +3689,11 @@ void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) { Emit(FJCVTZS | Rn(vn) | Rd(rd)); } +void Assembler::bfcvt(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kBF16)); + VIXL_ASSERT(vd.Is1H() && vn.Is1S()); + Emit(0x1e634000 | Rn(vn) | Rd(vd)); +} void Assembler::NEONFPConvertToInt(const Register& rd, const VRegister& vn, diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index d443cb012cc06d327f23ccc13ceeb9f3cbc2c96f..72dbe605e3ede21112e9692c36909290a2f4d833 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -2453,6 +2453,9 @@ class Assembler : public vixl::internal::AssemblerBase { // FP convert to unsigned integer, round towards +infinity. void fcvtpu(const VRegister& vd, const VRegister& vn); + // Floating-point convert from single-precision to BFloat16 format. + void bfcvt(const VRegister& vd, const VRegister& vn); + // Convert signed integer or fixed point to FP. void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc index e0c9fa2f1ef3545a9ca9fd75efafc4d785e05be1..606ac6f4e4b5c0f77bde9d2ec8f7691e648325f9 100644 --- a/src/aarch64/cpu-features-auditor-aarch64.cc +++ b/src/aarch64/cpu-features-auditor-aarch64.cc @@ -1953,6 +1953,8 @@ void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) { CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128)}, {"pmullt_z_zz_q"_h, CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128)}, + {"bfcvt_bs_floatdp1"_h, + CPUFeatures(CPUFeatures::kFP, CPUFeatures::kBF16)}, }; if (features.count(form_hash_) > 0) { diff --git a/src/aarch64/decoder-visitor-map-aarch64.h b/src/aarch64/decoder-visitor-map-aarch64.h index 09cdeaddaae4a0fb88840ca562ef7ab14608c25f..34468624fbab8266a6bff10dcd0b3266202c5095 100644 --- a/src/aarch64/decoder-visitor-map-aarch64.h +++ b/src/aarch64/decoder-visitor-map-aarch64.h @@ -1518,7 +1518,6 @@ {"fcsel_d_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \ {"fcsel_h_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \ {"fcsel_s_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \ - {"bfcvt_bs_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ {"fabs_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ {"fabs_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ {"fabs_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index fa137a6be680636e66383c2632fac38dc10696aa..781bcfb97b0a877f77b5999ac0f16e2d02033196 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -42,6 +42,7 @@ const Disassembler::FormToVisitorFnMap *Disassembler::GetFormToVisitorFnMap() { {"fcvt_hs_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source}, {"fcvt_sd_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source}, {"fcvt_sh_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source}, + {"bfcvt_bs_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source}, {"fmov_d_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source}, {"fmov_h_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source}, {"fmov_s_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source}, @@ -1975,6 +1976,9 @@ void Disassembler::VisitFPDataProcessing1Source(const Instruction *instr) { case "fcvt_hd_floatdp1"_h: form = "'Hd, 'Dn"; break; + case "bfcvt_bs_floatdp1"_h: + form = "'Hd, 'Sn"; + break; } FormatWithDecodedMnemonic(instr, form); } diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h index 22ddf469ef312006998d5d229a8f9302efd21caa..118ee9019c0aac2181619eabc9f67fe6bdb84ffa 100644 --- a/src/aarch64/macro-assembler-aarch64.h +++ b/src/aarch64/macro-assembler-aarch64.h @@ -1190,6 +1190,11 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { SingleEmissionCheckScope guard(this); retab(); } + void Bfcvt(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bfcvt(vd, vn); + } void Braa(const Register& xn, const Register& xm) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index 2554663945fde1a81c6bfc03d65bcc4fcfab9a79..93b46d8b65b6f9d262388f895634221f029727c5 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -115,6 +115,7 @@ const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() { {"fcvt_sh_floatdp1"_h, &Simulator::SimulateFPConvert}, {"fcvt_dh_floatdp1"_h, &Simulator::SimulateFPConvert}, {"fcvt_hd_floatdp1"_h, &Simulator::SimulateFPConvert}, + {"bfcvt_bs_floatdp1"_h, &Simulator::SimulateFPConvert}, {"frint32x_d_floatdp1"_h, &Simulator::SimulateFPRoundIntToSize}, {"frint32x_s_floatdp1"_h, &Simulator::SimulateFPRoundIntToSize}, {"frint32z_d_floatdp1"_h, &Simulator::SimulateFPRoundIntToSize}, @@ -6744,6 +6745,9 @@ void Simulator::SimulateFPConvert(const Instruction* instr) { case "fcvt_hd_floatdp1"_h: WriteHRegister(fd, Float16ToRawbits(FPToFloat16(dn, FPTieEven, nan))); break; + case "bfcvt_bs_floatdp1"_h: + WriteHRegister(fd, BFloat16ToRawbits(FPToBFloat16(sn, FPTieEven, nan))); + break; } } diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h index 0c49a01589f83b270b0d6457d0db064c7bea2b86..d7a973cfd039c6c69bf14c0e7db3f0e1d898baa2 100644 --- a/src/aarch64/simulator-aarch64.h +++ b/src/aarch64/simulator-aarch64.h @@ -5521,8 +5521,8 @@ class Simulator : public DecoderVisitor { VIXL_ABORT_WITH_MSG("Tried to double free GCS "); } else { delete gcsptr; - // To ensure other tokens remain valid, we do not remove this element - // but set it to nullptr instead. + // To ensure other tokens remain valid, we do not remove this element + // but set it to nullptr instead. stacks_[gcs_index] = nullptr; } } diff --git a/src/utils-vixl.cc b/src/utils-vixl.cc index 639a4b1957029265e971e1addaa0cf9bfb5a2ffb..356837d0431aba520015423d994f068aadf9096e 100644 --- a/src/utils-vixl.cc +++ b/src/utils-vixl.cc @@ -34,14 +34,19 @@ namespace vixl { const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000)); const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000); const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00); +const BFloat16 kBFP16DefaultNaN = RawbitsToBFloat16(0x7fc0); // Floating-point zero values. const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0); const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000); +const BFloat16 kBFP16PositiveZero = RawbitsToBFloat16(0x0); +const BFloat16 kBFP16NegativeZero = RawbitsToBFloat16(0x8000); // Floating-point infinity values. const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00); const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00); +const BFloat16 kBFP16PositiveInfinity = RawbitsToBFloat16(0x7f80); +const BFloat16 kBFP16NegativeInfinity = RawbitsToBFloat16(0xff80); const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000); const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000); const double kFP64PositiveInfinity = @@ -57,6 +62,14 @@ bool IsZero(Float16 value) { uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; } +bool IsZero(BFloat16 value) { + uint16_t bits = BFloat16ToRawbits(value); + return (bits == BFloat16ToRawbits(kBFP16PositiveZero) || + bits == BFloat16ToRawbits(kBFP16NegativeZero)); +} + +uint16_t BFloat16ToRawbits(BFloat16 value) { return value.rawbits_; } + uint32_t FloatToRawbits(float value) { uint32_t bits = 0; memcpy(&bits, &value, 4); @@ -78,6 +91,13 @@ Float16 RawbitsToFloat16(uint16_t bits) { } +BFloat16 RawbitsToBFloat16(uint16_t bits) { + BFloat16 f; + f.rawbits_ = bits; + return f; +} + + float RawbitsToFloat(uint32_t bits) { float value = 0.0; memcpy(&value, &bits, 4); @@ -552,4 +572,76 @@ Float16 FPToFloat16(double value, return kFP16PositiveZero; } +BFloat16 FPToBFloat16(float value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception) { + // Only the FPTieEven rounding mode is implemented. + VIXL_ASSERT(round_mode == FPTieEven); + USE(round_mode); + + uint32_t raw = FloatToRawbits(value); + int32_t sign = raw >> 31; + int32_t exponent = + static_cast(ExtractUnsignedBitfield32(30, 23, raw)) - 127; + uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw); + + switch (std::fpclassify(value)) { + case FP_NAN: { + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kBFP16DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred as much as possible, except + // that the top bit is forced to '1', making the result a quiet NaN. + uint16_t result = (sign == 0) ? BFloat16ToRawbits(kBFP16PositiveInfinity) + : BFloat16ToRawbits(kBFP16NegativeInfinity); + result |= mantissa >> (kFloatMantissaBits - kBFloat16MantissaBits); + result |= (1 << 6); // Force a quiet NaN; + return RawbitsToBFloat16(result); + } + + case FP_ZERO: + return (sign == 0) ? kBFP16PositiveZero : kBFP16NegativeZero; + + case FP_INFINITE: + return (sign == 0) ? kBFP16PositiveInfinity : kBFP16NegativeInfinity; + + case FP_NORMAL: + // Add the implicit '1' bit to the mantissa. + mantissa += (1 << 23); + break; + + case FP_SUBNORMAL: + // Reduce exponent to account for MSB of mantissa. + int32_t leading_mantissa_bits = + CountLeadingZeros(mantissa) - (32 - kFloatMantissaBits); + exponent -= leading_mantissa_bits; + break; + } + + // Convert float-to-half as the processor would, assuming that FPCR.FZ + // (flush-to-zero) is not set. + return FPRoundToBFloat16(sign, exponent, mantissa, round_mode); +} + +BFloat16 FPToBFloat16(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception) { + USE(value); + USE(round_mode); + USE(DN); + USE(exception); + // TODO: Implement this for correct conversion of doubles to BFloat (without + // implicit NaN silencing.) + VIXL_UNIMPLEMENTED(); + return kBFP16PositiveZero; +} + } // namespace vixl diff --git a/src/utils-vixl.h b/src/utils-vixl.h index ff560ed90985bf2d802ac1484f17be295305f524..d28f1458a29fd99cb678aab21cfd23ac27c56f4e 100644 --- a/src/utils-vixl.h +++ b/src/utils-vixl.h @@ -258,9 +258,22 @@ class Float16 { uint16_t rawbits_; }; -// Floating point representation. uint16_t Float16ToRawbits(Float16 value); +Float16 RawbitsToFloat16(uint16_t bits); + +class BFloat16 { + public: + explicit BFloat16(float value); + BFloat16() : rawbits_(0) {} + friend uint16_t BFloat16ToRawbits(BFloat16 value); + friend BFloat16 RawbitsToBFloat16(uint16_t bits); + + protected: + uint16_t rawbits_; +}; +uint16_t BFloat16ToRawbits(BFloat16 value); +BFloat16 RawbitsToBFloat16(uint16_t bits); uint32_t FloatToRawbits(float value); VIXL_DEPRECATED("FloatToRawbits", @@ -274,8 +287,6 @@ VIXL_DEPRECATED("DoubleToRawbits", return DoubleToRawbits(value); } -Float16 RawbitsToFloat16(uint16_t bits); - float RawbitsToFloat(uint32_t bits); VIXL_DEPRECATED("RawbitsToFloat", inline float rawbits_to_float(uint32_t bits)) { @@ -1132,6 +1143,8 @@ const unsigned kFloatMantissaBits = 23; const unsigned kFloatExponentBits = 8; const unsigned kFloat16MantissaBits = 10; const unsigned kFloat16ExponentBits = 5; +const unsigned kBFloat16MantissaBits = 7; +const unsigned kBFloat16ExponentBits = kFloatExponentBits; enum FPRounding { // The first four values are encodable directly by FPCR. @@ -1388,6 +1401,16 @@ static inline float FPRoundToFloat(int64_t sign, return RawbitsToFloat(bits); } +// See FPRound for a description of this function. +static inline BFloat16 FPRoundToBFloat16(int64_t sign, + int64_t exponent, + uint64_t mantissa, + FPRounding round_mode) { + return RawbitsToBFloat16( + FPRound( + sign, exponent, mantissa, round_mode)); +} + float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL); float FPToFloat(double value, @@ -1408,6 +1431,16 @@ Float16 FPToFloat16(double value, UseDefaultNaN DN, bool* exception = NULL); +BFloat16 FPToBFloat16(float value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception = NULL); + +BFloat16 FPToBFloat16(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception = NULL); + // Like static_cast(value), but with specialisations for the Float16 type. template T StaticCastFPTo(F value) { diff --git a/test/aarch64/test-assembler-fp-aarch64.cc b/test/aarch64/test-assembler-fp-aarch64.cc index 22010298626fd700e5296a00b306f594d70c9fad..542224894e73dc13acd49b2517f7ed49375e537d 100644 --- a/test/aarch64/test-assembler-fp-aarch64.cc +++ b/test/aarch64/test-assembler-fp-aarch64.cc @@ -4993,5 +4993,55 @@ TEST(default_nan_double) { DefaultNaNHelper(qn, qm, qa); } +TEST(bfcvt) { + SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kBF16); + + START(); + Label done, loop, exit; + __ Mov(x11, 0); // Error count. + __ Mov(x12, 0xffff0000); // Test input. + + __ Bind(&loop); + __ Mov(w0, w12); + __ Fmov(s0, w0); + __ Bfcvt(h0, s0); // Convert with instruction under test. + __ Fmov(w10, s0); + + // Equivalent code for bfcvt, assuming DN=0 and ties-even rounding. + + __ Bic(w1, w0, 0x80000000); // Clear sign bit. + __ Cmp(w1, 0x7f800000); // Test for NaN. + __ Uxth(w1, w0); // w1 = low half of input. + __ Lsr(w0, w0, 16); // w0 = high half of input. + __ Cset(w2, gt); // Set w2 if NaN. + __ Orr(w0, w0, Operand(w2, LSL, 6)); // Quieten NaN using w2. + __ B(gt, &done); + + __ Cmp(w1, 0x8000); // Set flags based on low half. + __ And(w1, w0, 1); // Get bit 16 of input + __ Add(w1, w1, w0); // and add to result + __ Cinc(w0, w0, gt); // Result + 1 (for > 0x8000) + __ Csel(w0, w1, w0, eq); // Select result + 1 (gt case) + // result + ((in >> 16) & 1) (eq case) + __ Bind(&done); + + __ Cmp(w0, w10); // Expected result in w0, actual in w10. + __ Cinc(x11, x11, ne); // Increment error counter on mismatch. + uint32_t dec = (1 << 20) + (1 << 14); + __ Sub(x12, x12, dec); // Test every ~10^6 value, covering all exponent + // inputs with many mantissae, infinities, NaNs. + __ Cmp(x12, 0); + __ B(ge, &loop); // Loop until input/counter is zero. + __ Bind(&exit); + + END(); + + if (CAN_RUN()) { + RUN(); + + ASSERT_EQUAL_64(0, x11); + } +} + } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-cpu-features-aarch64.cc b/test/aarch64/test-cpu-features-aarch64.cc index c018f49bc39d40b5ca596e328a3079509be7948f..ccd49b707220d5cdee61a3ccfccdc66fc160956a 100644 --- a/test/aarch64/test-cpu-features-aarch64.cc +++ b/test/aarch64/test-cpu-features-aarch64.cc @@ -3864,5 +3864,12 @@ TEST_FEAT(pmullb, pmullb(z12.VnQ(), z21.VnD(), z12.VnD())) TEST_FEAT(pmullt, pmullt(z12.VnQ(), z21.VnD(), z12.VnD())) #undef TEST_FEAT +#define TEST_FEAT(NAME, ASM) \ + TEST_TEMPLATE(CPUFeatures(CPUFeatures::kFP, CPUFeatures::kBF16), \ + BF16_##NAME, \ + ASM) +TEST_FEAT(bfcvt, bfcvt(h0, s0)) +#undef TEST_FEAT + } // namespace aarch64 } // namespace vixl diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc index 7b91739bd2fe2f183e6b4276194c17d671c44e50..24898be0fcea43698cb82a9ffa13f95c5670a25d 100644 --- a/test/aarch64/test-disasm-aarch64.cc +++ b/test/aarch64/test-disasm-aarch64.cc @@ -3962,7 +3962,7 @@ TEST(architecture_features) { // COMPARE_PREFIX(dci(0x0fc0f000), "bfmlal"); // BFMLAL_asimdelem_F // COMPARE_PREFIX(dci(0x2e40fc00), "bfdot"); // BFDOT_asimdsame2_D // COMPARE_PREFIX(dci(0x2ec0fc00), "bfmlal"); // BFMLAL_asimdsame2_F_ - // COMPARE_PREFIX(dci(0x1e634000), "bfcvt"); // BFCVT_BS_floatdp1 + COMPARE_PREFIX(dci(0x1e634000), "bfcvt"); // BFCVT_BS_floatdp1 // COMPARE_PREFIX(dci(0x6e40ec00), "bfmmla"); // BFMMLA_asimdsame2_E // ARMv8.6 - DGH diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc index f50e5a607d8deb5a752fdcc4062bf0ceb18ad8c7..bccc6c5c6073fc96a48ebb6c030aebf370882492 100644 --- a/test/aarch64/test-disasm-neon-aarch64.cc +++ b/test/aarch64/test-disasm-neon-aarch64.cc @@ -4057,6 +4057,14 @@ TEST(neon_2regmisc_fp16) { CLEANUP(); } +TEST(neon_bfloat16) { + SETUP(); + + COMPARE_MACRO(Bfcvt(h10, s5), "bfcvt h10, s5"); + + CLEANUP(); +} + TEST(neon_acrosslanes) { SETUP(); diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc index c3d268f397ff081912bd55a112f95f7095800c5d..867be7cce454a9066c4e8c8405bfa7ac04173534 100644 --- a/test/aarch64/test-simulator-sve-aarch64.cc +++ b/test/aarch64/test-simulator-sve-aarch64.cc @@ -1867,5 +1867,81 @@ TEST_SVE(neon_sm4ekey) { } } +TEST_SVE(fp_bfcvt) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kNEON, + CPUFeatures::kCRC32, + CPUFeatures::kFP, + CPUFeatures::kBF16); + START(); + + SetInitialMachineState(&masm, kFpInputSet); + // state = 0x1e5cbcac + + { + ExactAssemblyScope scope(&masm, 20 * kInstructionSize); + __ dci(0x1e6340e6); // bfcvt h6, s7 + // vl128 state = 0x19138b01 + __ dci(0x1e6340f6); // bfcvt h22, s7 + // vl128 state = 0xd3bc6bbc + __ dci(0x1e63417e); // bfcvt h30, s11 + // vl128 state = 0xf1da3865 + __ dci(0x1e6341ce); // bfcvt h14, s14 + // vl128 state = 0xb740056d + __ dci(0x1e63410f); // bfcvt h15, s8 + // vl128 state = 0x8ea2ccc9 + __ dci(0x1e63439f); // bfcvt h31, s28 + // vl128 state = 0xcf994aa7 + __ dci(0x1e63409e); // bfcvt h30, s4 + // vl128 state = 0x24145469 + __ dci(0x1e63409c); // bfcvt h28, s4 + // vl128 state = 0xb1176b99 + __ dci(0x1e63409d); // bfcvt h29, s4 + // vl128 state = 0x9521639c + __ dci(0x1e6342b9); // bfcvt h25, s21 + // vl128 state = 0xc5e5ab22 + __ dci(0x1e6342bd); // bfcvt h29, s21 + // vl128 state = 0x2adafc0c + __ dci(0x1e6342ad); // bfcvt h13, s21 + // vl128 state = 0xb22252be + __ dci(0x1e6342ac); // bfcvt h12, s21 + // vl128 state = 0x28a508e6 + __ dci(0x1e6342a4); // bfcvt h4, s21 + // vl128 state = 0x24744124 + __ dci(0x1e6342c6); // bfcvt h6, s22 + // vl128 state = 0xbe79bb80 + __ dci(0x1e6342d6); // bfcvt h22, s22 + // vl128 state = 0x3c6cf0c2 + __ dci(0x1e6342d2); // bfcvt h18, s22 + // vl128 state = 0x8c2f10b6 + __ dci(0x1e6342da); // bfcvt h26, s22 + // vl128 state = 0x5c115ed4 + __ dci(0x1e6342db); // bfcvt h27, s22 + // vl128 state = 0x6657c63e + __ dci(0x1e6342d3); // bfcvt h19, s22 + // vl128 state = 0x0948374c + } + + uint32_t state; + ComputeMachineStateHash(&masm, &state); + __ Mov(x0, reinterpret_cast(&state)); + __ Ldr(w0, MemOperand(x0)); + + END(); + if (CAN_RUN()) { + RUN(); + uint32_t expected_hashes[] = { + 0x0948374c, + 0x3219f32f, + 0x4571e7d1, + 0x9de318e3, + 0x83558fbe, + }; + ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2( + core.GetSVELaneCount(kQRegSize))], + x0); + } +} + } // namespace aarch64 } // namespace vixl