diff --git a/README.md b/README.md
index 8329d37656d0967dc442330b9f4fe75691eee249..bcbd45bd4bd0918883309ecea3a6b08c3f87ef12 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,16 @@
-VIXL: ARMv8 Runtime Code Generation Library
-===========================================
+VIXL: Arm Runtime Code Generation Library
+=========================================
 
 Contents:
 
- * Overview
- * Licence
- * Requirements
- * Known limitations
- * Bug reports
- * Usage
-
+ * [Overview](#overview)
+ * [Licence](#licence)
+ * [Requirements](#requirements)
+ * [Versioning](#versioning)
+ * [Supported Arm Architecture Features](#supported-arm-architecture-features)
+ * [Known limitations](#known-limitations)
+ * [Bug reports](#bug-reports)
+ * [Usage](#usage)
 
 Overview
 ========
@@ -17,21 +18,16 @@ Overview
 VIXL contains three components.
 
  1. Programmatic **assemblers** to generate A64, A32 or T32 code at runtime. The
-    assemblers abstract some of the constraints of each ISA; for example, most
+    assemblers abstract some of the constraints of each ISA; for example, some
     instructions support any immediate.
  2. **Disassemblers** that can print any instruction emitted by the assemblers.
- 3. A **simulator** that can simulate any instruction emitted by the A64
+ 3. An **A64 simulator** that can simulate any instruction emitted by the A64
     assembler. The simulator allows generated code to be run on another
     architecture without the need for a full ISA model.
 
-The VIXL git repository can be found [on GitHub][vixl].
-
-Build and Test Status
----------------------
+The VIXL git repository can be found [on GitLab][vixl].
 
-  * [![Build Status](https://ci.linaro.org/buildStatus/icon?job=linaro-art-vixlpresubmit)](https://ci.linaro.org/job/linaro-art-vixlpresubmit/) Simulator
-  * [![Build Status](https://ci.linaro.org/buildStatus/icon?job=linaro-art-vixlpresubmit-native-armv8)](https://ci.linaro.org/job/linaro-art-vixlpresubmit-native-armv8/) Native
-  * [![Build Status](https://ci.linaro.org/buildStatus/icon?job=linaro-art-vixlpresubmit-macos)](https://ci.linaro.org/job/linaro-art-vixlpresubmit-macos/) MacOS
+ Build status: [![Build Status](https://gitlab.arm.com/runtimes/vixl/badges/main/pipeline.svg)](https://gitlab.arm.com/runtimes/vixl/-/pipelines)
 
 
 Licence
@@ -69,34 +65,42 @@ Refer to the 'Usage' section for details.
 Note that in Ubuntu 18.04, clang-tidy-4.0 will only work if the clang-4.0
 package is also installed.
 
+Versioning
+==========
+
+VIXL uses [Semantic Versioning 2.0.0][semver] - see [VERSIONS](VERSIONS.md) for details.
+
 Supported Arm Architecture Features
 ===================================
 
-| Feature    | VIXL CPUFeatures Flag | Notes                           |
-|------------|-----------------------|---------------------------------|
-| BTI        | kBTI                  | Per-page enabling not supported |
-| DotProd    | kDotProduct           |                                 |
-| FCMA       | kFcma                 |                                 |
-| FHM        | kFHM                  |                                 |
-| FP16       | kFPHalf, kNEONHalf    |                                 |
-| FRINTTS    | kFrintToFixedSizedInt |                                 |
-| FlagM      | kFlagM                |                                 |
-| FlagM2     | kAXFlag               |                                 |
-| I8MM       | kI8MM                 |                                 |
-| JSCVT      | kJSCVT                |                                 |
-| LOR        | kLORegions            |                                 |
-| LRCPC      | kRCpc                 |                                 |
-| LRCPC2     | kRCpcImm              |                                 |
-| LSE        | kAtomics              |                                 |
-| PAuth      | kPAuth, kPAuthGeneric | Not ERETAA, ERETAB              |
-| RAS        | kRAS                  |                                 |
-| RDM        | kRDM                  |                                 |
-| SVE        | kSVE                  |                                 |
-| SVE2       | kSVE2                 |                                 |
-| SVEBitPerm | kSVEBitPerm           |                                 |
-| SVEF32MM   | kSVEF32MM             |                                 |
-| SVEF64MM   | kSVEF64MM             |                                 |
-| SVEI8MM    | kSVEI8MM              |                                 |
+| Feature    | VIXL CPUFeatures Flag         | Notes                           |
+|------------|-------------------------------|---------------------------------|
+| BTI        | kBTI                          | Per-page enabling not supported |
+| CSSC       | kCSSC                         |                                 |
+| DotProd    | kDotProduct                   |                                 |
+| FCMA       | kFcma                         |                                 |
+| FHM        | kFHM                          |                                 |
+| FP16       | kFPHalf, kNEONHalf            |                                 |
+| FRINTTS    | kFrintToFixedSizedInt         |                                 |
+| FlagM      | kFlagM                        |                                 |
+| FlagM2     | kAXFlag                       |                                 |
+| I8MM       | kI8MM                         |                                 |
+| JSCVT      | kJSCVT                        |                                 |
+| LOR        | kLORegions                    |                                 |
+| LRCPC      | kRCpc                         |                                 |
+| LRCPC2     | kRCpcImm                      |                                 |
+| LSE        | kAtomics                      |                                 |
+| MOPS       | kMOPS                         |                                 |
+| MTE        | kMTEInstructions, kMTE, kMTE3 |                                 |
+| PAuth      | kPAuth, kPAuthGeneric         | Not ERETAA, ERETAB              |
+| RAS        | kRAS                          |                                 |
+| RDM        | kRDM                          |                                 |
+| SVE        | kSVE                          |                                 |
+| SVE2       | kSVE2                         |                                 |
+| SVEBitPerm | kSVEBitPerm                   |                                 |
+| SVEF32MM   | kSVEF32MM                     |                                 |
+| SVEF64MM   | kSVEF64MM                     |                                 |
+| SVEI8MM    | kSVEI8MM                      |                                 |
 
 Enable generating code for an architecture feature by combining a flag with
 the MacroAssembler's defaults. For example, to generate code for SVE, use
@@ -202,7 +206,7 @@ selection.
 Bug reports
 ===========
 
-Bug reports may be made in the Issues section of GitHub, or sent to
+Bug reports may be made in the Issues section of GitLab, or sent to
 vixl@arm.com. Please provide any steps required to recreate a bug, along with
 build environment and host system information.
 
@@ -257,8 +261,11 @@ aarch32_examples` or `scons aarch64_examples` from the root directory, or use
 [cpplint]: https://github.com/google/styleguide/tree/gh-pages/cpplint
            "Google's cpplint.py script."
 
-[vixl]: https://github.com/Linaro/vixl
-        "The VIXL repository on GitHub."
+[vixl]: https://gitlab.arm.com/runtimes/vixl
+        "The VIXL repository on GitLab."
+
+[semver]: https://semver.org/spec/v2.0.0.html
+          "Semantic Versioning 2.0.0 Specification"
 
 [getting-started-aarch32]: doc/aarch32/getting-started-aarch32.md
                            "Introduction to VIXL for AArch32."
diff --git a/doc/aarch64/supported-instructions-aarch64.md b/doc/aarch64/supported-instructions-aarch64.md
index 5919354f517efae40ff0a7ebe1ae983a3c4105c2..75c2ef86cb6999ce64edf08d1077c4b7e368b0f6 100644
--- a/doc/aarch64/supported-instructions-aarch64.md
+++ b/doc/aarch64/supported-instructions-aarch64.md
@@ -7,7 +7,7 @@ operations to the precision required by AArch64 - please check the simulator
 source code for details.
 
 #### AAch64 integer instructions ####
-[a](#integer-a) [b](#integer-b) [c](#integer-c) [d](#integer-d) [e](#integer-e) [h](#integer-h) [i](#integer-i) [l](#integer-l) [m](#integer-m) [n](#integer-n) [o](#integer-o) [p](#integer-p) [r](#integer-r) [s](#integer-s) [t](#integer-t) [u](#integer-u) [x](#integer-x)
+[a](#integer-a) [b](#integer-b) [c](#integer-c) [d](#integer-d) [e](#integer-e) [g](#integer-g) [h](#integer-h) [i](#integer-i) [l](#integer-l) [m](#integer-m) [n](#integer-n) [o](#integer-o) [p](#integer-p) [r](#integer-r) [s](#integer-s) [t](#integer-t) [u](#integer-u) [x](#integer-x) [y](#integer-y)
 
 #### AArch64 floating point and NEON instructions ####
 [a](#float-a) [b](#float-b) [c](#float-c) [d](#float-d) [e](#float-e) [f](#float-f) [i](#float-i) [l](#float-l) [m](#float-m) [n](#float-n) [o](#float-o) [p](#float-p) [r](#float-r) [s](#float-s) [t](#float-t) [u](#float-u) [x](#float-x) [z](#float-z)
@@ -25,6 +25,13 @@ AArch64 integer instructions
 
 <a id="integer-a">
 
+### ABS ###
+
+Absolute value.
+
+    void abs(const Register& rd, const Register& rn)
+
+
 ### ADC ###
 
 Add with carry bit.
@@ -46,6 +53,13 @@ Add.
     void add(const Register& rd, const Register& rn, const Operand& operand)
 
 
+### ADDG ###
+
+Add with Tag.
+
+    void addg(const Register& xd, const Register& xn, int offset, int tag_offset)
+
+
 ### ADDS ###
 
 Add and update status flags.
@@ -578,6 +592,13 @@ Invert carry flag _(Armv8.4)_.
     void cfinv()
 
 
+### CHKFEAT ###
+
+Check feature status.
+
+    void chkfeat(const Register& rd)
+
+
 ### CINC ###
 
 Conditional increment: rd = cond ? rn + 1 : rn.
@@ -627,6 +648,13 @@ Compare.
     void cmp(const Register& rn, const Operand& operand)
 
 
+### CMPP ###
+
+Compare with Tag.
+
+    void cmpp(const Register& xn, const Register& xm)
+
+
 ### CNEG ###
 
 Conditional negate: rd = cond ? -rn : rn.
@@ -634,6 +662,181 @@ Conditional negate: rd = cond ? -rn : rn.
     void cneg(const Register& rd, const Register& rn, Condition cond)
 
 
+### CNT ###
+
+Count bits.
+
+    void cnt(const Register& rd, const Register& rn)
+
+
+### CPYE ###
+
+Memory Copy.
+
+    void cpye(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYEN ###
+
+Memory Copy, reads and writes non-temporal.
+
+    void cpyen(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYERN ###
+
+Memory Copy, reads non-temporal.
+
+    void cpyern(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYEWN ###
+
+Memory Copy, writes non-temporal.
+
+    void cpyewn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFE ###
+
+Memory Copy Forward-only.
+
+    void cpyfe(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFEN ###
+
+Memory Copy Forward-only, reads and writes non-temporal.
+
+    void cpyfen(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFERN ###
+
+Memory Copy Forward-only, reads non-temporal.
+
+    void cpyfern(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFEWN ###
+
+Memory Copy Forward-only, writes non-temporal.
+
+    void cpyfewn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFM ###
+
+Memory Copy Forward-only.
+
+    void cpyfm(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFMN ###
+
+Memory Copy Forward-only, reads and writes non-temporal.
+
+    void cpyfmn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFMRN ###
+
+Memory Copy Forward-only, reads non-temporal.
+
+    void cpyfmrn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFMWN ###
+
+Memory Copy Forward-only, writes non-temporal.
+
+    void cpyfmwn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFP ###
+
+Memory Copy Forward-only.
+
+    void cpyfp(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFPN ###
+
+Memory Copy Forward-only, reads and writes non-temporal.
+
+    void cpyfpn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFPRN ###
+
+Memory Copy Forward-only, reads non-temporal.
+
+    void cpyfprn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYFPWN ###
+
+Memory Copy Forward-only, writes non-temporal.
+
+    void cpyfpwn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYM ###
+
+Memory Copy.
+
+    void cpym(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYMN ###
+
+Memory Copy, reads and writes non-temporal.
+
+    void cpymn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYMRN ###
+
+Memory Copy, reads non-temporal.
+
+    void cpymrn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYMWN ###
+
+Memory Copy, writes non-temporal.
+
+    void cpymwn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYP ###
+
+Memory Copy.
+
+    void cpyp(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYPN ###
+
+Memory Copy, reads and writes non-temporal.
+
+    void cpypn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYPRN ###
+
+Memory Copy, reads non-temporal.
+
+    void cpyprn(const Register& rd, const Register& rs, const Register& rn)
+
+
+### CPYPWN ###
+
+Memory Copy, writes non-temporal.
+
+    void cpypwn(const Register& rd, const Register& rs, const Register& rn)
+
+
 ### CRC32B ###
 
 CRC-32 checksum from byte.
@@ -751,6 +954,13 @@ Conditional select negation: rd = cond ? rn : -rm.
                Condition cond)
 
 
+### CTZ ###
+
+Count Trailing Zeros.
+
+    void ctz(const Register& rd, const Register& rn)
+
+
 <a id="integer-d">
 
 ### DC ###
@@ -807,6 +1017,43 @@ Extract.
               unsigned lsb)
 
 
+<a id="integer-g">
+
+### GCSPOPM ###
+
+Guarded Control Stack Pop.
+
+    void gcspopm(const Register& rt)
+
+
+### GCSPUSHM ###
+
+Guarded Control Stack Push.
+
+    void gcspushm(const Register& rt)
+
+
+### GCSSS1 ###
+
+Guarded Control Stack Switch Stack 1.
+
+    void gcsss1(const Register& rt)
+
+
+### GCSSS2 ###
+
+Guarded Control Stack Switch Stack 2.
+
+    void gcsss2(const Register& rt)
+
+
+### GMI ###
+
+Tag Mask Insert.
+
+    void gmi(const Register& xd, const Register& xn, const Register& xm)
+
+
 <a id="integer-h">
 
 ### HINT ###
@@ -839,6 +1086,13 @@ System instruction cache operation.
     void ic(InstructionCacheOp op, const Register& rt)
 
 
+### IRG ###
+
+Insert Random Tag.
+
+    void irg(const Register& xd, const Register& xn, const Register& xm = xzr)
+
+
 ### ISB ###
 
 Instruction synchronization barrier.
@@ -1212,6 +1466,13 @@ Atomic exclusive OR on halfword in memory, with Store-release semantics _(Armv8.
     void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src)
 
 
+### LDG ###
+
+Load Allocation Tag.
+
+    void ldg(const Register& xt, const MemOperand& addr)
+
+
 ### LDLAR ###
 
 Load LORelease register _(Armv8.1)_.
@@ -2332,6 +2593,20 @@ Signed integer divide.
     void sdiv(const Register& rd, const Register& rn, const Register& rm)
 
 
+### SETE ###
+
+Memory Set.
+
+    void sete(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETEN ###
+
+Memory Set, non-temporal.
+
+    void seten(const Register& rd, const Register& rn, const Register& rs)
+
+
 ### SETF16 ###
 
 Set NZCV flags from register, treated as an 16-bit value _(Armv8.4)_.
@@ -2346,6 +2621,76 @@ Set NZCV flags from register, treated as an 8-bit value _(Armv8.4)_.
     void setf8(const Register& rn)
 
 
+### SETGE ###
+
+Memory Set with tag setting.
+
+    void setge(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETGEN ###
+
+Memory Set with tag setting, non-temporal.
+
+    void setgen(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETGM ###
+
+Memory Set with tag setting.
+
+    void setgm(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETGMN ###
+
+Memory Set with tag setting, non-temporal.
+
+    void setgmn(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETGP ###
+
+Memory Set with tag setting.
+
+    void setgp(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETGPN ###
+
+Memory Set with tag setting, non-temporal.
+
+    void setgpn(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETM ###
+
+Memory Set.
+
+    void setm(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETMN ###
+
+Memory Set, non-temporal.
+
+    void setmn(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETP ###
+
+Memory Set.
+
+    void setp(const Register& rd, const Register& rn, const Register& rs)
+
+
+### SETPN ###
+
+Memory Set, non-temporal.
+
+    void setpn(const Register& rd, const Register& rn, const Register& rs)
+
+
 ### SMADDL ###
 
 Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
@@ -2356,6 +2701,20 @@ Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
                 const Register& xa)
 
 
+### SMAX ###
+
+Signed Maximum.
+
+    void smax(const Register& rd, const Register& rn, const Operand& op)
+
+
+### SMIN ###
+
+Signed Minimum.
+
+    void smin(const Register& rd, const Register& rn, const Operand& op)
+
+
 ### SMSUBL ###
 
 Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
@@ -2380,6 +2739,13 @@ Signed long multiply: 32 x 32 -> 64-bit.
     void smull(const Register& xd, const Register& wn, const Register& wm)
 
 
+### ST2G ###
+
+Store Allocation Tags.
+
+    void st2g(const Register& xt, const MemOperand& addr)
+
+
 ### STADD ###
 
 Atomic add on word or doubleword in memory, without return. _(Armv8.1)_
@@ -2506,6 +2872,20 @@ Atomic exclusive OR on halfword in memory, with Store-release semantics and with
     void steorlh(const Register& rs, const MemOperand& src)
 
 
+### STG ###
+
+Store Allocation Tag.
+
+    void stg(const Register& xt, const MemOperand& addr)
+
+
+### STGP ###
+
+Store Allocation Tag and Pair of registers.
+
+    void stgp(const Register& xt1, const Register& xt2, const MemOperand& addr)
+
+
 ### STLLR ###
 
 Store LORelease register _(Armv8.1)_.
@@ -2913,6 +3293,20 @@ Store exclusive half-word.
     void stxrh(const Register& rs, const Register& rt, const MemOperand& dst)
 
 
+### STZ2G ###
+
+Store Allocation Tags, Zeroing.
+
+    void stz2g(const Register& xt, const MemOperand& addr)
+
+
+### STZG ###
+
+Store Allocation Tag, Zeroing.
+
+    void stzg(const Register& xt, const MemOperand& addr)
+
+
 ### SUB ###
 
 Subtract.
@@ -2920,6 +3314,27 @@ Subtract.
     void sub(const Register& rd, const Register& rn, const Operand& operand)
 
 
+### SUBG ###
+
+Subtract with Tag.
+
+    void subg(const Register& xd, const Register& xn, int offset, int tag_offset)
+
+
+### SUBP ###
+
+Subtract Pointer.
+
+    void subp(const Register& xd, const Register& xn, const Register& xm)
+
+
+### SUBPS ###
+
+Subtract Pointer, setting Flags.
+
+    void subps(const Register& xd, const Register& xn, const Register& xm)
+
+
 ### SUBS ###
 
 Subtract and update status flags.
@@ -3053,6 +3468,13 @@ System instruction.
     void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr)
 
 
+### SYSL ###
+
+System instruction with result.
+
+    void sysl(int op, const Register& xt = xzr)
+
+
 <a id="integer-t">
 
 ### TBNZ ###
@@ -3146,6 +3568,20 @@ Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
                 const Register& xa)
 
 
+### UMAX ###
+
+Unsigned Maximum.
+
+    void umax(const Register& rd, const Register& rn, const Operand& op)
+
+
+### UMIN ###
+
+Unsigned Minimum.
+
+    void umin(const Register& rd, const Register& rn, const Operand& op)
+
+
 ### UMSUBL ###
 
 Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
@@ -3221,6 +3657,15 @@ Strip Pointer Authentication Code of Instruction address in LR _(Armv8.3)_.
     void xpaclri()
 
 
+<a id="integer-y">
+
+### YIELD ###
+
+Yield.
+
+    void yield()
+
+
 
 AArch64 floating point and NEON instructions
 --------------------------------------------
@@ -3276,6 +3721,34 @@ Add across vector.
     void addv(const VRegister& vd, const VRegister& vn)
 
 
+### AESD ###
+
+AES single round decryption.
+
+    void aesd(const VRegister& vd, const VRegister& vn)
+
+
+### AESE ###
+
+AES single round encryption.
+
+    void aese(const VRegister& vd, const VRegister& vn)
+
+
+### AESIMC ###
+
+AES inverse mix columns.
+
+    void aesimc(const VRegister& vd, const VRegister& vn)
+
+
+### AESMC ###
+
+AES mix columns.
+
+    void aesmc(const VRegister& vd, const VRegister& vn)
+
+
 ### AND ###
 
 Bitwise and.
@@ -3285,6 +3758,16 @@ Bitwise and.
 
 <a id="float-b">
 
+### BCAX ###
+
+Bit Clear and exclusive-OR.
+
+    void bcax(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              const VRegister& va)
+
+
 ### BIC ###
 
 Bit clear immediate.
@@ -3445,6 +3928,16 @@ Bitwise eor.
     void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm)
 
 
+### EOR3 ###
+
+Three-way Exclusive-OR.
+
+    void eor3(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              const VRegister& va)
+
+
 ### EXT ###
 
 Extract vector from pair of vectors.
@@ -4693,6 +5186,13 @@ Rounding add narrow returning high half (second part).
     void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm)
 
 
+### RAX1 ###
+
+Rotate and Exclusive-OR
+
+    void rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
 ### RBIT ###
 
 Reverse bit order.
@@ -4873,6 +5373,104 @@ Signed dot product by element _(Armv8.2)_.
               int vm_index)
 
 
+### SHA1C ###
+
+SHA1 hash update (choose).
+
+    void sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SHA1H ###
+
+SHA1 fixed rotate.
+
+    void sha1h(const VRegister& sd, const VRegister& sn)
+
+
+### SHA1M ###
+
+SHA1 hash update (majority).
+
+    void sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SHA1P ###
+
+SHA1 hash update (parity).
+
+    void sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SHA1SU0 ###
+
+SHA1 schedule update 0.
+
+    void sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SHA1SU1 ###
+
+SHA1 schedule update 1.
+
+    void sha1su1(const VRegister& vd, const VRegister& vn)
+
+
+### SHA256H ###
+
+SHA256 hash update (part 1).
+
+    void sha256h(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SHA256H2 ###
+
+SHA256 hash update (part 2).
+
+    void sha256h2(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SHA256SU0 ###
+
+SHA256 schedule update 0.
+
+    void sha256su0(const VRegister& vd, const VRegister& vn)
+
+
+### SHA256SU1 ###
+
+SHA256 schedule update 1.
+
+    void sha256su1(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SHA512H ###
+
+SHA512 hash update part 1.
+
+    void sha512h(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SHA512H2 ###
+
+SHA512 hash update part 2.
+
+    void sha512h2(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SHA512SU0 ###
+
+SHA512 schedule Update 0.
+
+    void sha512su0(const VRegister& vd, const VRegister& vn)
+
+
+### SHA512SU1 ###
+
+SHA512 schedule Update 1.
+
+    void sha512su1(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
 ### SHADD ###
 
 Signed halving add.
@@ -4929,6 +5527,84 @@ Shift left by immediate and insert.
     void sli(const VRegister& vd, const VRegister& vn, int shift)
 
 
+### SM3PARTW1 ###
+
+SM3PARTW1.
+
+    void sm3partw1(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SM3PARTW2 ###
+
+SM3PARTW2.
+
+    void sm3partw2(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### SM3SS1 ###
+
+SM3SS1.
+
+    void sm3ss1(const VRegister& vd,
+                const VRegister& vn,
+                const VRegister& vm,
+                const VRegister& va)
+
+
+### SM3TT1A ###
+
+SM3TT1A.
+
+    void sm3tt1a(const VRegister& vd,
+                 const VRegister& vn,
+                 const VRegister& vm,
+                 int index)
+
+
+### SM3TT1B ###
+
+SM3TT1B.
+
+    void sm3tt1b(const VRegister& vd,
+                 const VRegister& vn,
+                 const VRegister& vm,
+                 int index)
+
+
+### SM3TT2A ###
+
+SM3TT2A.
+
+    void sm3tt2a(const VRegister& vd,
+                 const VRegister& vn,
+                 const VRegister& vm,
+                 int index)
+
+
+### SM3TT2B ###
+
+SM3TT2B.
+
+    void sm3tt2b(const VRegister& vd,
+                 const VRegister& vn,
+                 const VRegister& vm,
+                 int index)
+
+
+### SM4E ###
+
+SM4 Encode.
+
+    void sm4e(const VRegister& vd, const VRegister& vn)
+
+
+### SM4EKEY ###
+
+SM4 Key.
+
+    void sm4ekey(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
 ### SMAX ###
 
 Signed maximum.
@@ -6285,6 +6961,16 @@ Unzip vectors (secondary).
 
 <a id="float-x">
 
+### XAR ###
+
+Exclusive-OR and Rotate.
+
+    void xar(const VRegister& vd,
+             const VRegister& vn,
+             const VRegister& vm,
+             int rotate)
+
+
 ### XTN ###
 
 Extract narrow.
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index 60e8d9f6820891fe2c7629fe675d212c31d24360..7b91739bd2fe2f183e6b4276194c17d671c44e50 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -3933,33 +3933,28 @@ TEST(architecture_features) {
   COMPARE_PREFIX(dci(0xd500405f), "axflag");  // AXFLAG_M_pstate
 
   // ARMv8.5 - MTE
-  // COMPARE_PREFIX(dci(0x68800000), "stgp");   // STGP_64_ldstpair_post
-  // COMPARE_PREFIX(dci(0x69000000), "stgp");   // STGP_64_ldstpair_off
-  // COMPARE_PREFIX(dci(0x69800000), "stgp");   // STGP_64_ldstpair_pre
-  // COMPARE_PREFIX(dci(0x91800000), "addg");   // ADDG_64_addsub_immtags
-  // COMPARE_PREFIX(dci(0x9ac00000), "subp");   // SUBP_64S_dp_2src
-  // COMPARE_PREFIX(dci(0x9ac01000), "irg");   // IRG_64I_dp_2src
-  // COMPARE_PREFIX(dci(0x9ac01400), "gmi");   // GMI_64G_dp_2src
-  // COMPARE_PREFIX(dci(0xbac00000), "subps");   // SUBPS_64S_dp_2src
-  // COMPARE_PREFIX(dci(0xd1800000), "subg");   // SUBG_64_addsub_immtags
-  // COMPARE_PREFIX(dci(0xd9200400), "stg");   // STG_64Spost_ldsttags
-  // COMPARE_PREFIX(dci(0xd9200800), "stg");   // STG_64Soffset_ldsttags
-  // COMPARE_PREFIX(dci(0xd9200c00), "stg");   // STG_64Spre_ldsttags
-  // COMPARE_PREFIX(dci(0xd9600000), "ldg");   // LDG_64Loffset_ldsttags
-  // COMPARE_PREFIX(dci(0xd9600400), "stzg");   // STZG_64Spost_ldsttags
-  // COMPARE_PREFIX(dci(0xd9600800), "stzg");   // STZG_64Soffset_ldsttags
-  // COMPARE_PREFIX(dci(0xd9600c00), "stzg");   // STZG_64Spre_ldsttags
-  // COMPARE_PREFIX(dci(0xd9a00400), "st2g");   // ST2G_64Spost_ldsttags
-  // COMPARE_PREFIX(dci(0xd9a00800), "st2g");   // ST2G_64Soffset_ldsttags
-  // COMPARE_PREFIX(dci(0xd9a00c00), "st2g");   // ST2G_64Spre_ldsttags
-  // COMPARE_PREFIX(dci(0xd9e00400), "stz2g");   // STZ2G_64Spost_ldsttags
-  // COMPARE_PREFIX(dci(0xd9e00800), "stz2g");   // STZ2G_64Soffset_ldsttags
-  // COMPARE_PREFIX(dci(0xd9e00c00), "stz2g");   // STZ2G_64Spre_ldsttags
-
-  // ARMv8.5 - MTE2
-  // COMPARE_PREFIX(dci(0xd9200000), "stzgm");   // STZGM_64bulk_ldsttags
-  // COMPARE_PREFIX(dci(0xd9a00000), "stgm");   // STGM_64bulk_ldsttags
-  // COMPARE_PREFIX(dci(0xd9e00000), "ldgm");   // LDGM_64bulk_ldsttags
+  COMPARE_PREFIX(dci(0x68800000), "stgp");   // STGP_64_ldstpair_post
+  COMPARE_PREFIX(dci(0x69000000), "stgp");   // STGP_64_ldstpair_off
+  COMPARE_PREFIX(dci(0x69800000), "stgp");   // STGP_64_ldstpair_pre
+  COMPARE_PREFIX(dci(0x91800000), "addg");   // ADDG_64_addsub_immtags
+  COMPARE_PREFIX(dci(0x9ac00000), "subp");   // SUBP_64S_dp_2src
+  COMPARE_PREFIX(dci(0x9ac01000), "irg");    // IRG_64I_dp_2src
+  COMPARE_PREFIX(dci(0x9ac01400), "gmi");    // GMI_64G_dp_2src
+  COMPARE_PREFIX(dci(0xbac00000), "subps");  // SUBPS_64S_dp_2src
+  COMPARE_PREFIX(dci(0xd1800000), "subg");   // SUBG_64_addsub_immtags
+  COMPARE_PREFIX(dci(0xd9200400), "stg");    // STG_64Spost_ldsttags
+  COMPARE_PREFIX(dci(0xd9200800), "stg");    // STG_64Soffset_ldsttags
+  COMPARE_PREFIX(dci(0xd9200c00), "stg");    // STG_64Spre_ldsttags
+  COMPARE_PREFIX(dci(0xd9600000), "ldg");    // LDG_64Loffset_ldsttags
+  COMPARE_PREFIX(dci(0xd9600400), "stzg");   // STZG_64Spost_ldsttags
+  COMPARE_PREFIX(dci(0xd9600800), "stzg");   // STZG_64Soffset_ldsttags
+  COMPARE_PREFIX(dci(0xd9600c00), "stzg");   // STZG_64Spre_ldsttags
+  COMPARE_PREFIX(dci(0xd9a00400), "st2g");   // ST2G_64Spost_ldsttags
+  COMPARE_PREFIX(dci(0xd9a00800), "st2g");   // ST2G_64Soffset_ldsttags
+  COMPARE_PREFIX(dci(0xd9a00c00), "st2g");   // ST2G_64Spre_ldsttags
+  COMPARE_PREFIX(dci(0xd9e00400), "stz2g");  // STZ2G_64Spost_ldsttags
+  COMPARE_PREFIX(dci(0xd9e00800), "stz2g");  // STZ2G_64Soffset_ldsttags
+  COMPARE_PREFIX(dci(0xd9e00c00), "stz2g");  // STZ2G_64Spre_ldsttags
 
   // ARMv8.6 - BF16
   // COMPARE_PREFIX(dci(0x0ea16800), "bfcvtn");   // BFCVTN_asimdmisc_4S