diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 56f18b01903841cbb47c13b1611c79ce7343a5a1..22071c71fc4166423b0ff2cbace75ac6b1703b0c 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -3689,6 +3689,11 @@ void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) {
   Emit(FJCVTZS | Rn(vn) | Rd(rd));
 }
 
+void Assembler::bfcvt(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kBF16));
+  VIXL_ASSERT(vd.Is1H() && vn.Is1S());
+  Emit(0x1e634000 | Rn(vn) | Rd(vd));
+}
 
 void Assembler::NEONFPConvertToInt(const Register& rd,
                                    const VRegister& vn,
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index d443cb012cc06d327f23ccc13ceeb9f3cbc2c96f..72dbe605e3ede21112e9692c36909290a2f4d833 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -2453,6 +2453,9 @@ class Assembler : public vixl::internal::AssemblerBase {
   // FP convert to unsigned integer, round towards +infinity.
   void fcvtpu(const VRegister& vd, const VRegister& vn);
 
+  // Floating-point convert from single-precision to BFloat16 format.
+  void bfcvt(const VRegister& vd, const VRegister& vn);
+
   // Convert signed integer or fixed point to FP.
   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
 
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index e0c9fa2f1ef3545a9ca9fd75efafc4d785e05be1..606ac6f4e4b5c0f77bde9d2ec8f7691e648325f9 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -1953,6 +1953,8 @@ void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) {
          CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128)},
         {"pmullt_z_zz_q"_h,
          CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128)},
+        {"bfcvt_bs_floatdp1"_h,
+         CPUFeatures(CPUFeatures::kFP, CPUFeatures::kBF16)},
     };
 
     if (features.count(form_hash_) > 0) {
diff --git a/src/aarch64/decoder-visitor-map-aarch64.h b/src/aarch64/decoder-visitor-map-aarch64.h
index 09cdeaddaae4a0fb88840ca562ef7ab14608c25f..34468624fbab8266a6bff10dcd0b3266202c5095 100644
--- a/src/aarch64/decoder-visitor-map-aarch64.h
+++ b/src/aarch64/decoder-visitor-map-aarch64.h
@@ -1518,7 +1518,6 @@
       {"fcsel_d_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect},         \
       {"fcsel_h_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect},         \
       {"fcsel_s_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect},         \
-      {"bfcvt_bs_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source},    \
       {"fabs_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source},      \
       {"fabs_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source},      \
       {"fabs_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source},      \
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index fa137a6be680636e66383c2632fac38dc10696aa..781bcfb97b0a877f77b5999ac0f16e2d02033196 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -42,6 +42,7 @@ const Disassembler::FormToVisitorFnMap *Disassembler::GetFormToVisitorFnMap() {
       {"fcvt_hs_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source},
       {"fcvt_sd_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source},
       {"fcvt_sh_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source},
+      {"bfcvt_bs_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source},
       {"fmov_d_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source},
       {"fmov_h_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source},
       {"fmov_s_floatdp1"_h, &Disassembler::VisitFPDataProcessing1Source},
@@ -1975,6 +1976,9 @@ void Disassembler::VisitFPDataProcessing1Source(const Instruction *instr) {
     case "fcvt_hd_floatdp1"_h:
       form = "'Hd, 'Dn";
       break;
+    case "bfcvt_bs_floatdp1"_h:
+      form = "'Hd, 'Sn";
+      break;
   }
   FormatWithDecodedMnemonic(instr, form);
 }
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 22ddf469ef312006998d5d229a8f9302efd21caa..118ee9019c0aac2181619eabc9f67fe6bdb84ffa 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -1190,6 +1190,11 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
     SingleEmissionCheckScope guard(this);
     retab();
   }
+  void Bfcvt(const VRegister& vd, const VRegister& vn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    bfcvt(vd, vn);
+  }
   void Braa(const Register& xn, const Register& xm) {
     VIXL_ASSERT(allow_macro_instructions_);
     SingleEmissionCheckScope guard(this);
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 2554663945fde1a81c6bfc03d65bcc4fcfab9a79..93b46d8b65b6f9d262388f895634221f029727c5 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -115,6 +115,7 @@ const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() {
       {"fcvt_sh_floatdp1"_h, &Simulator::SimulateFPConvert},
       {"fcvt_dh_floatdp1"_h, &Simulator::SimulateFPConvert},
       {"fcvt_hd_floatdp1"_h, &Simulator::SimulateFPConvert},
+      {"bfcvt_bs_floatdp1"_h, &Simulator::SimulateFPConvert},
       {"frint32x_d_floatdp1"_h, &Simulator::SimulateFPRoundIntToSize},
       {"frint32x_s_floatdp1"_h, &Simulator::SimulateFPRoundIntToSize},
       {"frint32z_d_floatdp1"_h, &Simulator::SimulateFPRoundIntToSize},
@@ -6744,6 +6745,9 @@ void Simulator::SimulateFPConvert(const Instruction* instr) {
     case "fcvt_hd_floatdp1"_h:
       WriteHRegister(fd, Float16ToRawbits(FPToFloat16(dn, FPTieEven, nan)));
       break;
+    case "bfcvt_bs_floatdp1"_h:
+      WriteHRegister(fd, BFloat16ToRawbits(FPToBFloat16(sn, FPTieEven, nan)));
+      break;
   }
 }
 
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 0c49a01589f83b270b0d6457d0db064c7bea2b86..d7a973cfd039c6c69bf14c0e7db3f0e1d898baa2 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -5521,8 +5521,8 @@ class Simulator : public DecoderVisitor {
         VIXL_ABORT_WITH_MSG("Tried to double free GCS ");
       } else {
         delete gcsptr;
-	// To ensure other tokens remain valid, we do not remove this element
-	// but set it to nullptr instead.
+        // To ensure other tokens remain valid, we do not remove this element
+        // but set it to nullptr instead.
         stacks_[gcs_index] = nullptr;
       }
     }
diff --git a/src/utils-vixl.cc b/src/utils-vixl.cc
index 639a4b1957029265e971e1addaa0cf9bfb5a2ffb..356837d0431aba520015423d994f068aadf9096e 100644
--- a/src/utils-vixl.cc
+++ b/src/utils-vixl.cc
@@ -34,14 +34,19 @@ namespace vixl {
 const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
 const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
 const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
+const BFloat16 kBFP16DefaultNaN = RawbitsToBFloat16(0x7fc0);
 
 // Floating-point zero values.
 const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0);
 const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000);
+const BFloat16 kBFP16PositiveZero = RawbitsToBFloat16(0x0);
+const BFloat16 kBFP16NegativeZero = RawbitsToBFloat16(0x8000);
 
 // Floating-point infinity values.
 const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00);
 const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00);
+const BFloat16 kBFP16PositiveInfinity = RawbitsToBFloat16(0x7f80);
+const BFloat16 kBFP16NegativeInfinity = RawbitsToBFloat16(0xff80);
 const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
 const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
 const double kFP64PositiveInfinity =
@@ -57,6 +62,14 @@ bool IsZero(Float16 value) {
 
 uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; }
 
+bool IsZero(BFloat16 value) {
+  uint16_t bits = BFloat16ToRawbits(value);
+  return (bits == BFloat16ToRawbits(kBFP16PositiveZero) ||
+          bits == BFloat16ToRawbits(kBFP16NegativeZero));
+}
+
+uint16_t BFloat16ToRawbits(BFloat16 value) { return value.rawbits_; }
+
 uint32_t FloatToRawbits(float value) {
   uint32_t bits = 0;
   memcpy(&bits, &value, 4);
@@ -78,6 +91,13 @@ Float16 RawbitsToFloat16(uint16_t bits) {
 }
 
 
+BFloat16 RawbitsToBFloat16(uint16_t bits) {
+  BFloat16 f;
+  f.rawbits_ = bits;
+  return f;
+}
+
+
 float RawbitsToFloat(uint32_t bits) {
   float value = 0.0;
   memcpy(&value, &bits, 4);
@@ -552,4 +572,76 @@ Float16 FPToFloat16(double value,
   return kFP16PositiveZero;
 }
 
+BFloat16 FPToBFloat16(float value,
+                      FPRounding round_mode,
+                      UseDefaultNaN DN,
+                      bool* exception) {
+  // Only the FPTieEven rounding mode is implemented.
+  VIXL_ASSERT(round_mode == FPTieEven);
+  USE(round_mode);
+
+  uint32_t raw = FloatToRawbits(value);
+  int32_t sign = raw >> 31;
+  int32_t exponent =
+      static_cast<int32_t>(ExtractUnsignedBitfield32(30, 23, raw)) - 127;
+  uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
+
+  switch (std::fpclassify(value)) {
+    case FP_NAN: {
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kBFP16DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred as much as possible, except
+      //    that the top bit is forced to '1', making the result a quiet NaN.
+      uint16_t result = (sign == 0) ? BFloat16ToRawbits(kBFP16PositiveInfinity)
+                                    : BFloat16ToRawbits(kBFP16NegativeInfinity);
+      result |= mantissa >> (kFloatMantissaBits - kBFloat16MantissaBits);
+      result |= (1 << 6);  // Force a quiet NaN;
+      return RawbitsToBFloat16(result);
+    }
+
+    case FP_ZERO:
+      return (sign == 0) ? kBFP16PositiveZero : kBFP16NegativeZero;
+
+    case FP_INFINITE:
+      return (sign == 0) ? kBFP16PositiveInfinity : kBFP16NegativeInfinity;
+
+    case FP_NORMAL:
+      // Add the implicit '1' bit to the mantissa.
+      mantissa += (1 << 23);
+      break;
+
+    case FP_SUBNORMAL:
+      // Reduce exponent to account for MSB of mantissa.
+      int32_t leading_mantissa_bits =
+          CountLeadingZeros(mantissa) - (32 - kFloatMantissaBits);
+      exponent -= leading_mantissa_bits;
+      break;
+  }
+
+  // Convert float-to-half as the processor would, assuming that FPCR.FZ
+  // (flush-to-zero) is not set.
+  return FPRoundToBFloat16(sign, exponent, mantissa, round_mode);
+}
+
+BFloat16 FPToBFloat16(double value,
+                      FPRounding round_mode,
+                      UseDefaultNaN DN,
+                      bool* exception) {
+  USE(value);
+  USE(round_mode);
+  USE(DN);
+  USE(exception);
+  // TODO: Implement this for correct conversion of doubles to BFloat (without
+  // implicit NaN silencing.)
+  VIXL_UNIMPLEMENTED();
+  return kBFP16PositiveZero;
+}
+
 }  // namespace vixl
diff --git a/src/utils-vixl.h b/src/utils-vixl.h
index ff560ed90985bf2d802ac1484f17be295305f524..d28f1458a29fd99cb678aab21cfd23ac27c56f4e 100644
--- a/src/utils-vixl.h
+++ b/src/utils-vixl.h
@@ -258,9 +258,22 @@ class Float16 {
   uint16_t rawbits_;
 };
 
-// Floating point representation.
 uint16_t Float16ToRawbits(Float16 value);
+Float16 RawbitsToFloat16(uint16_t bits);
+
+class BFloat16 {
+ public:
+  explicit BFloat16(float value);
+  BFloat16() : rawbits_(0) {}
+  friend uint16_t BFloat16ToRawbits(BFloat16 value);
+  friend BFloat16 RawbitsToBFloat16(uint16_t bits);
+
+ protected:
+  uint16_t rawbits_;
+};
 
+uint16_t BFloat16ToRawbits(BFloat16 value);
+BFloat16 RawbitsToBFloat16(uint16_t bits);
 
 uint32_t FloatToRawbits(float value);
 VIXL_DEPRECATED("FloatToRawbits",
@@ -274,8 +287,6 @@ VIXL_DEPRECATED("DoubleToRawbits",
   return DoubleToRawbits(value);
 }
 
-Float16 RawbitsToFloat16(uint16_t bits);
-
 float RawbitsToFloat(uint32_t bits);
 VIXL_DEPRECATED("RawbitsToFloat",
                 inline float rawbits_to_float(uint32_t bits)) {
@@ -1132,6 +1143,8 @@ const unsigned kFloatMantissaBits = 23;
 const unsigned kFloatExponentBits = 8;
 const unsigned kFloat16MantissaBits = 10;
 const unsigned kFloat16ExponentBits = 5;
+const unsigned kBFloat16MantissaBits = 7;
+const unsigned kBFloat16ExponentBits = kFloatExponentBits;
 
 enum FPRounding {
   // The first four values are encodable directly by FPCR<RMode>.
@@ -1388,6 +1401,16 @@ static inline float FPRoundToFloat(int64_t sign,
   return RawbitsToFloat(bits);
 }
 
+// See FPRound for a description of this function.
+static inline BFloat16 FPRoundToBFloat16(int64_t sign,
+                                         int64_t exponent,
+                                         uint64_t mantissa,
+                                         FPRounding round_mode) {
+  return RawbitsToBFloat16(
+      FPRound<uint16_t, kBFloat16ExponentBits, kBFloat16MantissaBits>(
+          sign, exponent, mantissa, round_mode));
+}
+
 
 float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL);
 float FPToFloat(double value,
@@ -1408,6 +1431,16 @@ Float16 FPToFloat16(double value,
                     UseDefaultNaN DN,
                     bool* exception = NULL);
 
+BFloat16 FPToBFloat16(float value,
+                      FPRounding round_mode,
+                      UseDefaultNaN DN,
+                      bool* exception = NULL);
+
+BFloat16 FPToBFloat16(double value,
+                      FPRounding round_mode,
+                      UseDefaultNaN DN,
+                      bool* exception = NULL);
+
 // Like static_cast<T>(value), but with specialisations for the Float16 type.
 template <typename T, typename F>
 T StaticCastFPTo(F value) {
diff --git a/test/aarch64/test-assembler-fp-aarch64.cc b/test/aarch64/test-assembler-fp-aarch64.cc
index 22010298626fd700e5296a00b306f594d70c9fad..542224894e73dc13acd49b2517f7ed49375e537d 100644
--- a/test/aarch64/test-assembler-fp-aarch64.cc
+++ b/test/aarch64/test-assembler-fp-aarch64.cc
@@ -4993,5 +4993,55 @@ TEST(default_nan_double) {
   DefaultNaNHelper(qn, qm, qa);
 }
 
+TEST(bfcvt) {
+  SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kBF16);
+
+  START();
+  Label done, loop, exit;
+  __ Mov(x11, 0);           // Error count.
+  __ Mov(x12, 0xffff0000);  // Test input.
+
+  __ Bind(&loop);
+  __ Mov(w0, w12);
+  __ Fmov(s0, w0);
+  __ Bfcvt(h0, s0);  // Convert with instruction under test.
+  __ Fmov(w10, s0);
+
+  // Equivalent code for bfcvt, assuming DN=0 and ties-even rounding.
+
+  __ Bic(w1, w0, 0x80000000);           // Clear sign bit.
+  __ Cmp(w1, 0x7f800000);               // Test for NaN.
+  __ Uxth(w1, w0);                      // w1 = low half of input.
+  __ Lsr(w0, w0, 16);                   // w0 = high half of input.
+  __ Cset(w2, gt);                      // Set w2 if NaN.
+  __ Orr(w0, w0, Operand(w2, LSL, 6));  // Quieten NaN using w2.
+  __ B(gt, &done);
+
+  __ Cmp(w1, 0x8000);       // Set flags based on low half.
+  __ And(w1, w0, 1);        // Get bit 16 of input
+  __ Add(w1, w1, w0);       //  and add to result
+  __ Cinc(w0, w0, gt);      // Result + 1 (for > 0x8000)
+  __ Csel(w0, w1, w0, eq);  // Select result + 1                 (gt case)
+                            //        result + ((in >> 16) & 1)  (eq case)
+  __ Bind(&done);
+
+  __ Cmp(w0, w10);        // Expected result in w0, actual in w10.
+  __ Cinc(x11, x11, ne);  // Increment error counter on mismatch.
+  uint32_t dec = (1 << 20) + (1 << 14);
+  __ Sub(x12, x12, dec);  // Test every ~10^6 value, covering all exponent
+                          //  inputs with many mantissae, infinities, NaNs.
+  __ Cmp(x12, 0);
+  __ B(ge, &loop);  // Loop until input/counter is zero.
+  __ Bind(&exit);
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(0, x11);
+  }
+}
+
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/test/aarch64/test-cpu-features-aarch64.cc b/test/aarch64/test-cpu-features-aarch64.cc
index c018f49bc39d40b5ca596e328a3079509be7948f..ccd49b707220d5cdee61a3ccfccdc66fc160956a 100644
--- a/test/aarch64/test-cpu-features-aarch64.cc
+++ b/test/aarch64/test-cpu-features-aarch64.cc
@@ -3864,5 +3864,12 @@ TEST_FEAT(pmullb, pmullb(z12.VnQ(), z21.VnD(), z12.VnD()))
 TEST_FEAT(pmullt, pmullt(z12.VnQ(), z21.VnD(), z12.VnD()))
 #undef TEST_FEAT
 
+#define TEST_FEAT(NAME, ASM)                                       \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kFP, CPUFeatures::kBF16), \
+                BF16_##NAME,                                       \
+                ASM)
+TEST_FEAT(bfcvt, bfcvt(h0, s0))
+#undef TEST_FEAT
+
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index 7b91739bd2fe2f183e6b4276194c17d671c44e50..24898be0fcea43698cb82a9ffa13f95c5670a25d 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -3962,7 +3962,7 @@ TEST(architecture_features) {
   // COMPARE_PREFIX(dci(0x0fc0f000), "bfmlal");   // BFMLAL_asimdelem_F
   // COMPARE_PREFIX(dci(0x2e40fc00), "bfdot");   // BFDOT_asimdsame2_D
   // COMPARE_PREFIX(dci(0x2ec0fc00), "bfmlal");   // BFMLAL_asimdsame2_F_
-  // COMPARE_PREFIX(dci(0x1e634000), "bfcvt");   // BFCVT_BS_floatdp1
+  COMPARE_PREFIX(dci(0x1e634000), "bfcvt");  // BFCVT_BS_floatdp1
   // COMPARE_PREFIX(dci(0x6e40ec00), "bfmmla");   // BFMMLA_asimdsame2_E
 
   // ARMv8.6 - DGH
diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc
index f50e5a607d8deb5a752fdcc4062bf0ceb18ad8c7..bccc6c5c6073fc96a48ebb6c030aebf370882492 100644
--- a/test/aarch64/test-disasm-neon-aarch64.cc
+++ b/test/aarch64/test-disasm-neon-aarch64.cc
@@ -4057,6 +4057,14 @@ TEST(neon_2regmisc_fp16) {
   CLEANUP();
 }
 
+TEST(neon_bfloat16) {
+  SETUP();
+
+  COMPARE_MACRO(Bfcvt(h10, s5), "bfcvt h10, s5");
+
+  CLEANUP();
+}
+
 TEST(neon_acrosslanes) {
   SETUP();
 
diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc
index c3d268f397ff081912bd55a112f95f7095800c5d..867be7cce454a9066c4e8c8405bfa7ac04173534 100644
--- a/test/aarch64/test-simulator-sve-aarch64.cc
+++ b/test/aarch64/test-simulator-sve-aarch64.cc
@@ -1867,5 +1867,81 @@ TEST_SVE(neon_sm4ekey) {
   }
 }
 
+TEST_SVE(fp_bfcvt) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kFP,
+                          CPUFeatures::kBF16);
+  START();
+
+  SetInitialMachineState(&masm, kFpInputSet);
+  // state = 0x1e5cbcac
+
+  {
+    ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+    __ dci(0x1e6340e6);  // bfcvt h6, s7
+    // vl128 state = 0x19138b01
+    __ dci(0x1e6340f6);  // bfcvt h22, s7
+    // vl128 state = 0xd3bc6bbc
+    __ dci(0x1e63417e);  // bfcvt h30, s11
+    // vl128 state = 0xf1da3865
+    __ dci(0x1e6341ce);  // bfcvt h14, s14
+    // vl128 state = 0xb740056d
+    __ dci(0x1e63410f);  // bfcvt h15, s8
+    // vl128 state = 0x8ea2ccc9
+    __ dci(0x1e63439f);  // bfcvt h31, s28
+    // vl128 state = 0xcf994aa7
+    __ dci(0x1e63409e);  // bfcvt h30, s4
+    // vl128 state = 0x24145469
+    __ dci(0x1e63409c);  // bfcvt h28, s4
+    // vl128 state = 0xb1176b99
+    __ dci(0x1e63409d);  // bfcvt h29, s4
+    // vl128 state = 0x9521639c
+    __ dci(0x1e6342b9);  // bfcvt h25, s21
+    // vl128 state = 0xc5e5ab22
+    __ dci(0x1e6342bd);  // bfcvt h29, s21
+    // vl128 state = 0x2adafc0c
+    __ dci(0x1e6342ad);  // bfcvt h13, s21
+    // vl128 state = 0xb22252be
+    __ dci(0x1e6342ac);  // bfcvt h12, s21
+    // vl128 state = 0x28a508e6
+    __ dci(0x1e6342a4);  // bfcvt h4, s21
+    // vl128 state = 0x24744124
+    __ dci(0x1e6342c6);  // bfcvt h6, s22
+    // vl128 state = 0xbe79bb80
+    __ dci(0x1e6342d6);  // bfcvt h22, s22
+    // vl128 state = 0x3c6cf0c2
+    __ dci(0x1e6342d2);  // bfcvt h18, s22
+    // vl128 state = 0x8c2f10b6
+    __ dci(0x1e6342da);  // bfcvt h26, s22
+    // vl128 state = 0x5c115ed4
+    __ dci(0x1e6342db);  // bfcvt h27, s22
+    // vl128 state = 0x6657c63e
+    __ dci(0x1e6342d3);  // bfcvt h19, s22
+    // vl128 state = 0x0948374c
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x0948374c,
+        0x3219f32f,
+        0x4571e7d1,
+        0x9de318e3,
+        0x83558fbe,
+    };
+    ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
+                        core.GetSVELaneCount(kQRegSize))],
+                    x0);
+  }
+}
+
 }  // namespace aarch64
 }  // namespace vixl