diff --git a/.clang-format b/.clang-format index fe1aa1a30d40267b71a2c90b23e16f7cfe473eea..7630990aa07aec9034d438cf9659cbf0998b68a6 100644 --- a/.clang-format +++ b/.clang-format @@ -690,6 +690,13 @@ ForEachMacros: - 'v4l2_m2m_for_each_src_buf' - 'v4l2_m2m_for_each_src_buf_safe' - 'virtio_device_for_each_vq' + - 'vkms_config_for_each_connector' + - 'vkms_config_for_each_crtc' + - 'vkms_config_for_each_encoder' + - 'vkms_config_for_each_plane' + - 'vkms_config_connector_for_each_possible_encoder' + - 'vkms_config_encoder_for_each_possible_crtc' + - 'vkms_config_plane_for_each_possible_crtc' - 'while_for_each_ftrace_op' - 'xa_for_each' - 'xa_for_each_marked' diff --git a/.gitignore b/.gitignore index 5937c74d3dc1e4ca56d93ea0b8d5ff9fc64ec839..f2f63e47fb88686d5d5ab17d480c9301184134a9 100644 --- a/.gitignore +++ b/.gitignore @@ -65,6 +65,7 @@ modules.order /vmlinux.32 /vmlinux.map /vmlinux.symvers +/vmlinux.unstripped /vmlinux-gdb.py /vmlinuz /System.map diff --git a/.mailmap b/.mailmap index b1b709230db8ceb060eb40a4bae22aa844d81523..4f7cd8e231778cdf0ee45fe2d2cba1a554965685 100644 --- a/.mailmap +++ b/.mailmap @@ -31,6 +31,13 @@ Alexander Lobakin Alexander Lobakin Alexander Mikhalitsyn Alexander Mikhalitsyn +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin Alexandre Belloni Alexandre Ghiti Alexei Avshalom Lazar @@ -153,7 +160,6 @@ Carlos Bilbao Carlos Bilbao Carlos Bilbao Changbin Du -Changbin Du Chao Yu Chao Yu Chester Lin @@ -271,6 +277,7 @@ Hamza Mahfooz Hanjun Guo Hans Verkuil Hans Verkuil +Harry Yoo <42.hyeyoo@gmail.com> Heiko Carstens Heiko Carstens Heiko Stuebner @@ -282,6 +289,7 @@ Henrik Rydberg Herbert Xu Huacai Chen Huacai Chen +Ike Panhc J. Bruce Fields J. Bruce Fields Jacob Shin @@ -304,7 +312,6 @@ Jan Glauber Jan Kuliga Jarkko Sakkinen Jarkko Sakkinen -Jarkko Sakkinen Jason Gunthorpe Jason Gunthorpe Jason Gunthorpe @@ -542,6 +549,8 @@ Nicolas Pitre Nicolas Pitre Nicolas Saenz Julienne Nicolas Saenz Julienne +Nicolas Schier +Nicolas Schier Niklas Söderlund Nikolay Aleksandrov Nikolay Aleksandrov @@ -686,6 +695,8 @@ Stephen Hemminger Stephen Hemminger Stephen Hemminger Stephen Hemminger +Stephen Smalley +Stephen Smalley Steve Wise Steve Wise Subash Abhinov Kasiviswanathan @@ -759,7 +770,6 @@ Vinod Koul Viresh Kumar Viresh Kumar Viresh Kumar -Viresh Kumar Viresh Kumar Vishnu Dasa Vivek Aknurwar diff --git a/CREDITS b/CREDITS index d8a594aefed61a948d1d6b89edb968434616b2b4..1b77fba6c27ecb793a675f0ed3a8c3b230bc2f5f 100644 --- a/CREDITS +++ b/CREDITS @@ -317,6 +317,10 @@ S: Code 930.5, Goddard Space Flight Center S: Greenbelt, Maryland 20771 S: USA +N: Joel Becker +E: jlbec@evilplan.org +D: configfs + N: Adam Belay E: ambx1@neo.rr.com D: Linux Plug and Play Support @@ -855,6 +859,10 @@ N: John Crispin E: john@phrozen.org D: MediaTek MT7623 Gigabit ethernet support +N: Conor Culhane +E: conor.culhane@silvaco.com +D: Silvaco I3C master driver + N: Laurence Culhane E: loz@holmes.demon.co.uk D: Wrote the initial alpha SLIP code @@ -1895,6 +1903,7 @@ S: Czech Republic N: Seth Jennings E: sjenning@redhat.com D: Creation and maintenance of zswap +D: Creation and maintenace of the zbud allocator N: Jeremy Kerr D: Maintainer of SPU File System @@ -2187,6 +2196,10 @@ D: Various ACPI fixes, keeping correct battery state through suspend D: various lockdep annotations, autofs and other random bugfixes S: Prague, Czech Republic +N: Ishizaki Kou +E: kou.ishizaki@toshiba.co.jp +D: Spidernet driver for PowerPC Cell platforms + N: Gene Kozin E: 74604.152@compuserve.com W: https://www.sangoma.com @@ -2197,6 +2210,9 @@ S: Markham, Ontario S: L3R 8B2 S: Canada +N: Christian Krafft +D: PowerPC Cell support + N: Maxim Krasnyansky E: maxk@qualcomm.com W: http://vtun.sf.net @@ -2389,6 +2405,10 @@ S: ICP vortex GmbH S: Neckarsulm S: Germany +N: Geoff Levand +E: geoff@infradead.org +D: Spidernet driver for PowerPC Cell platforms + N: Phil Lewis E: beans@bucket.ualr.edu D: Promised to send money if I would put his name in the source tree. @@ -3233,6 +3253,10 @@ N: Rui Prior E: rprior@inescn.pt D: ATM device driver for NICStAR based cards +N: Roopa Prabhu +E: roopa@nvidia.com +D: Bridge co-maintainer, vxlan and networking contributor + N: Stefan Probst E: sp@caldera.de D: The Linux Support Team Erlangen, 1993-97 @@ -3646,6 +3670,10 @@ S: 149 Union St. S: Kingston, Ontario S: Canada K7L 2P4 +N: Pravin B Shelar +E: pshelar@ovn.org +D: Open vSwitch maintenance and contributions + N: John Shifflett E: john@geolog.com E: jshiffle@netcom.com @@ -3788,6 +3816,7 @@ N: Dan Streetman E: ddstreet@ieee.org D: Maintenance and development of zswap D: Creation and maintenance of the zpool API +D: Maintenace of the zbud allocator N: Drew Sullivan E: drew@ss.org @@ -4315,6 +4344,7 @@ S: England N: Vitaly Wool E: vitaly.wool@konsulko.com D: Maintenance and development of zswap +D: Maintenance and development of z3fold N: Chris Wright E: chrisw@sous-sol.org diff --git a/Documentation/ABI/README b/Documentation/ABI/README index 8bac9cb09a6de7a50f596abab38318368e592128..ef0e6d11e919c80190286e1f522d9bcb4fd91017 100644 --- a/Documentation/ABI/README +++ b/Documentation/ABI/README @@ -1,4 +1,5 @@ -This directory attempts to document the ABI between the Linux kernel and +This part of the documentation inside Documentation/ABI directory +attempts to document the ABI between the Linux kernel and userspace, and the relative stability of these interfaces. Due to the everchanging nature of Linux, and the differing maturity levels, these interfaces should be used by userspace programs in different ways. diff --git a/Documentation/ABI/obsolete/sysfs-class-cxl b/Documentation/ABI/removed/sysfs-class-cxl similarity index 87% rename from Documentation/ABI/obsolete/sysfs-class-cxl rename to Documentation/ABI/removed/sysfs-class-cxl index 8cba1b626985ea41c234832bee14e1dd507dbbf9..266c413b96e86dcf67edc6b4e6cd8b764209e326 100644 --- a/Documentation/ABI/obsolete/sysfs-class-cxl +++ b/Documentation/ABI/removed/sysfs-class-cxl @@ -1,5 +1,4 @@ -The cxl driver is no longer maintained, and will be removed from the kernel in -the near future. +The cxl driver was removed in 6.15. Please note that attributes that are shared between devices are stored in the directory pointed to by the symlink device/. @@ -10,7 +9,7 @@ For example, the real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is Slave contexts (eg. /sys/class/cxl/afu0.0s): What: /sys/class/cxl//afu_err_buf -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only AFU Error Buffer contents. The contents of this file are @@ -21,7 +20,7 @@ Description: read only What: /sys/class/cxl//irqs_max -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write Decimal value of maximum number of interrupts that can be @@ -32,7 +31,7 @@ Description: read/write Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//irqs_min -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the minimum number of interrupts that @@ -42,7 +41,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//mmio_size -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the size of the MMIO space that may be mmapped @@ -50,7 +49,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//modes_supported -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only List of the modes this AFU supports. One per line. @@ -58,7 +57,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//mode -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write The current mode the AFU is using. Will be one of the modes @@ -68,7 +67,7 @@ Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//prefault_mode -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write Set the mode for prefaulting in segments into the segment table @@ -88,7 +87,7 @@ Description: read/write Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//reset -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: write only Writing 1 here will reset the AFU provided there are not @@ -96,14 +95,14 @@ Description: write only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//api_version -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the current version of the kernel/user API. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//api_version_compatible -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the lowest version of the userspace API @@ -117,7 +116,7 @@ An AFU may optionally export one or more PCIe like configuration records, known as AFU configuration records, which will show up here (if present). What: /sys/class/cxl//cr/vendor -Date: February 2015 +Date: February 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Hexadecimal value of the vendor ID found in this AFU @@ -125,7 +124,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//cr/device -Date: February 2015 +Date: February 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Hexadecimal value of the device ID found in this AFU @@ -133,7 +132,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//cr/class -Date: February 2015 +Date: February 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Hexadecimal value of the class code found in this AFU @@ -141,7 +140,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//cr/config -Date: February 2015 +Date: February 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only This binary file provides raw access to the AFU configuration @@ -155,7 +154,7 @@ Users: https://github.com/ibm-capi/libcxl Master contexts (eg. /sys/class/cxl/afu0.0m) What: /sys/class/cxl/m/mmio_size -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the size of the MMIO space that may be mmapped @@ -163,14 +162,14 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl/m/pp_mmio_len -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the Per Process MMIO space length. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl/m/pp_mmio_off -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only (not in a guest) @@ -181,21 +180,21 @@ Users: https://github.com/ibm-capi/libcxl Card info (eg. /sys/class/cxl/card0) What: /sys/class/cxl//caia_version -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Identifies the CAIA Version the card implements. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//psl_revision -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Identifies the revision level of the PSL. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//base_image -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only (not in a guest) @@ -206,7 +205,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//image_loaded -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only (not in a guest) @@ -215,7 +214,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//load_image_on_perst -Date: December 2014 +Date: December 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write (not in a guest) @@ -232,7 +231,7 @@ Description: read/write Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//reset -Date: October 2014 +Date: October 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: write only Writing 1 will issue a PERST to card provided there are no @@ -243,7 +242,7 @@ Description: write only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//perst_reloads_same_image -Date: July 2015 +Date: July 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write (not in a guest) @@ -257,7 +256,7 @@ Description: read/write Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//psl_timebase_synced -Date: March 2016 +Date: March 2016, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Returns 1 if the psl timebase register is synchronized @@ -265,7 +264,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//tunneled_ops_supported -Date: May 2018 +Date: May 2018, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Returns 1 if tunneled operations are supported in capi mode, diff --git a/Documentation/ABI/removed/sysfs-class-rfkill b/Documentation/ABI/removed/sysfs-class-rfkill index f25174eafd55aecfeb3c3e49e653a0392ed6954e..20cb688af173b53593f890186d0711058770959e 100644 --- a/Documentation/ABI/removed/sysfs-class-rfkill +++ b/Documentation/ABI/removed/sysfs-class-rfkill @@ -4,7 +4,7 @@ For details to this subsystem look at Documentation/driver-api/rfkill.rst. What: /sys/class/rfkill/rfkill[0-9]+/claim Date: 09-Jul-2007 -KernelVersion v2.6.22 +KernelVersion: v2.6.22 Contact: linux-wireless@vger.kernel.org Description: This file was deprecated because there no longer was a way to claim just control over a single rfkill instance. diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 0cceb2badc836b8cbdade543deff71edef0e3da1..3879963f0f01e5f291e40bb7cf3fb638e7a61d6b 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -109,6 +109,10 @@ Contact: Martin K. Petersen Description: Indicates whether a storage device is capable of storing integrity metadata. Set if the device is T10 PI-capable. + This flag is set to 1 if the storage media is formatted + with T10 Protection Information. If the storage media is + not formatted with T10 Protection Information, this flag + is set to 0. What: /sys/block//integrity/format @@ -117,6 +121,13 @@ Contact: Martin K. Petersen Description: Metadata format for integrity capable block device. E.g. T10-DIF-TYPE1-CRC. + This field describes the type of T10 Protection Information + that the block device can send and receive. + If the device can store application integrity metadata but + no T10 Protection Information profile is used, this field + contains "nop". + If the device does not support integrity metadata, this + field contains "none". What: /sys/block//integrity/protection_interval_bytes @@ -142,7 +153,17 @@ Date: June 2008 Contact: Martin K. Petersen Description: Number of bytes of integrity tag space available per - 512 bytes of data. + protection_interval_bytes, which is typically + the device's logical block size. + This field describes the size of the application tag + if the storage device is formatted with T10 Protection + Information and permits use of the application tag. + The tag_size is reported in bytes and indicates the + space available for adding an opaque tag to each block + (protection_interval_bytes). + If the device does not support T10 Protection Information + (even if the device provides application integrity + metadata space), this field is set to 0. What: /sys/block//integrity/write_generate @@ -229,6 +250,17 @@ Description: encryption, refer to Documentation/block/inline-encryption.rst. +What: /sys/block//queue/crypto/hw_wrapped_keys +Date: February 2025 +Contact: linux-block@vger.kernel.org +Description: + [RO] The presence of this file indicates that the device + supports hardware-wrapped inline encryption keys, i.e. key blobs + that can only be unwrapped and used by dedicated hardware. For + more information about hardware-wrapped inline encryption keys, + see Documentation/block/inline-encryption.rst. + + What: /sys/block//queue/crypto/max_dun_bits Date: February 2022 Contact: linux-block@vger.kernel.org @@ -267,6 +299,15 @@ Description: use with inline encryption. +What: /sys/block//queue/crypto/raw_keys +Date: February 2025 +Contact: linux-block@vger.kernel.org +Description: + [RO] The presence of this file indicates that the device + supports raw inline encryption keys, i.e. keys that are managed + in raw, plaintext form in software. + + What: /sys/block//queue/dax Date: June 2016 Contact: linux-block@vger.kernel.org diff --git a/Documentation/ABI/stable/sysfs-class-rfkill b/Documentation/ABI/stable/sysfs-class-rfkill index 037979f7dc4bed3bc1e06a61d6aff3cdfd1960b3..67b605e3dd168d61f585d87dbc478ce4d5851efc 100644 --- a/Documentation/ABI/stable/sysfs-class-rfkill +++ b/Documentation/ABI/stable/sysfs-class-rfkill @@ -16,7 +16,7 @@ Description: The rfkill class subsystem folder. What: /sys/class/rfkill/rfkill[0-9]+/name Date: 09-Jul-2007 -KernelVersion v2.6.22 +KernelVersion: v2.6.22 Contact: linux-wireless@vger.kernel.org Description: Name assigned by driver to this key (interface or driver name). Values: arbitrary string. @@ -24,7 +24,7 @@ Values: arbitrary string. What: /sys/class/rfkill/rfkill[0-9]+/type Date: 09-Jul-2007 -KernelVersion v2.6.22 +KernelVersion: v2.6.22 Contact: linux-wireless@vger.kernel.org Description: Driver type string ("wlan", "bluetooth", etc). Values: See include/linux/rfkill.h. @@ -32,7 +32,7 @@ Values: See include/linux/rfkill.h. What: /sys/class/rfkill/rfkill[0-9]+/persistent Date: 09-Jul-2007 -KernelVersion v2.6.22 +KernelVersion: v2.6.22 Contact: linux-wireless@vger.kernel.org Description: Whether the soft blocked state is initialised from non-volatile storage at startup. @@ -44,7 +44,7 @@ Values: A numeric value: What: /sys/class/rfkill/rfkill[0-9]+/state Date: 09-Jul-2007 -KernelVersion v2.6.22 +KernelVersion: v2.6.22 Contact: linux-wireless@vger.kernel.org Description: Current state of the transmitter. This file was scheduled to be removed in 2014, but due to its @@ -67,7 +67,7 @@ Values: A numeric value. What: /sys/class/rfkill/rfkill[0-9]+/hard Date: 12-March-2010 -KernelVersion v2.6.34 +KernelVersion: v2.6.34 Contact: linux-wireless@vger.kernel.org Description: Current hardblock state. This file is read only. Values: A numeric value. @@ -81,7 +81,7 @@ Values: A numeric value. What: /sys/class/rfkill/rfkill[0-9]+/soft Date: 12-March-2010 -KernelVersion v2.6.34 +KernelVersion: v2.6.34 Contact: linux-wireless@vger.kernel.org Description: Current softblock state. This file is read and write. Values: A numeric value. diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node index 402af4b2b905f3e273ec9be14218ba8350d31295..a02707cb7cbc8a77a21c55a04e1d11130fa64b7b 100644 --- a/Documentation/ABI/stable/sysfs-devices-node +++ b/Documentation/ABI/stable/sysfs-devices-node @@ -177,6 +177,12 @@ Description: The cache write policy: 0 for write-back, 1 for write-through, other or unknown. +What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/address_mode +Date: March 2025 +Contact: Dave Jiang +Description: + The address mode: 0 for reserved, 1 for extended-linear. + What: /sys/devices/system/node/nodeX/x86/sgx_total_bytes Date: November 2021 Contact: Jarkko Sakkinen diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu index 902392d7eddf03e84e1e7cd420e715d2e0239898..cf78bd99f6c860e1566743329448aa22cedd3476 100644 --- a/Documentation/ABI/stable/sysfs-devices-system-cpu +++ b/Documentation/ABI/stable/sysfs-devices-system-cpu @@ -24,12 +24,6 @@ Description: Default value for the Data Stream Control Register (DSCR) on If set by a process it will be inherited by child processes. Values: 64 bit unsigned integer (bit field) -What: /sys/devices/system/cpu/cpuX/topology/physical_package_id -Description: physical package id of cpuX. Typically corresponds to a physical - socket number, but the actual value is architecture and platform - dependent. -Values: integer - What: /sys/devices/system/cpu/cpuX/topology/die_id Description: the CPU die ID of cpuX. Typically it is the hardware platform's identifier (rather than the kernel's). The actual value is @@ -86,10 +80,6 @@ What: /sys/devices/system/cpu/cpuX/topology/die_cpus Description: internal kernel map of CPUs within the same die. Values: hexadecimal bitmask. -What: /sys/devices/system/cpu/cpuX/topology/ppin -Description: per-socket protected processor inventory number -Values: hexadecimal. - What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list Description: human-readable list of CPUs within the same die. The format is like 0-3, 8-11, 14,17. diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index f2ec42949a54d7b293541810fec83c7cfa8e26a3..4a355e6747ae57db899c3f9a3ec9b9aff7a01551 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -246,14 +246,14 @@ Description: Controls whether PRS disable is turned on for the workqueue. capability. What: /sys/bus/dsa/devices/wq./occupancy -Date May 25, 2021 +Date: May 25, 2021 KernelVersion: 5.14.0 Contact: dmaengine@vger.kernel.org Description: Show the current number of entries in this WQ if WQ Occupancy Support bit WQ capabilities is 1. What: /sys/bus/dsa/devices/wq./enqcmds_retries -Date Oct 29, 2021 +Date: Oct 29, 2021 KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org Description: Indicate the number of retires for an enqcmds submission on a sharedwq. diff --git a/Documentation/ABI/testing/configfs-usb-gadget-midi2 b/Documentation/ABI/testing/configfs-usb-gadget-midi2 index 0eac3aaba13737abb15e37a18e7e3dd44a3f2147..d76a52e2ca7fd30c774b8a0bfbff868974a08524 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-midi2 +++ b/Documentation/ABI/testing/configfs-usb-gadget-midi2 @@ -47,7 +47,7 @@ Description: midi1_first_group The first UMP Group number for MIDI 1.0 (0-15) midi1_num_groups The number of groups for MIDI 1.0 (0-16) ui_hint 0: unknown, 1: receiver, 2: sender, 3: both - midi_ci_verison Supported MIDI-CI version number (8 bit) + midi_ci_version Supported MIDI-CI version number (8 bit) is_midi1 Legacy MIDI 1.0 device (0, 1 or 2) sysex8_streams Max number of SysEx8 streams (8 bit) active Active FB flag (0 or 1) diff --git a/Documentation/ABI/testing/debugfs-dwc-pcie b/Documentation/ABI/testing/debugfs-dwc-pcie new file mode 100644 index 0000000000000000000000000000000000000000..92b76f52a40825d4bf66a2f4da4786ada4b631d2 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-dwc-pcie @@ -0,0 +1,157 @@ +What: /sys/kernel/debug/dwc_pcie_/rasdes_debug/lane_detect +Date: February 2025 +Contact: Shradha Todi +Description: (RW) Write the lane number to be checked for detection. Read + will return whether PHY indicates receiver detection on the + selected lane. The default selected lane is Lane0. + +What: /sys/kernel/debug/dwc_pcie_/rasdes_debug/rx_valid +Date: February 2025 +Contact: Shradha Todi +Description: (RW) Write the lane number to be checked as valid or invalid. + Read will return the status of PIPE RXVALID signal of the + selected lane. The default selected lane is Lane0. + +What: /sys/kernel/debug/dwc_pcie_/rasdes_err_inj/ +Date: February 2025 +Contact: Shradha Todi +Description: The "rasdes_err_inj" is a directory which can be used to inject + errors into the system. The possible errors that can be injected + are: + + 1) tx_lcrc - TLP LCRC error injection TX Path + 2) b16_crc_dllp - 16b CRC error injection of ACK/NAK DLLP + 3) b16_crc_upd_fc - 16b CRC error injection of Update-FC DLLP + 4) tx_ecrc - TLP ECRC error injection TX Path + 5) fcrc_tlp - TLP's FCRC error injection TX Path + 6) parity_tsos - Parity error of TSOS + 7) parity_skpos - Parity error on SKPOS + 8) rx_lcrc - LCRC error injection RX Path + 9) rx_ecrc - ECRC error injection RX Path + 10) tlp_err_seq - TLPs SEQ# error + 11) ack_nak_dllp_seq - DLLPS ACK/NAK SEQ# error + 12) ack_nak_dllp - ACK/NAK DLLPs transmission block + 13) upd_fc_dllp - UpdateFC DLLPs transmission block + 14) nak_dllp - Always transmission for NAK DLLP + 15) inv_sync_hdr_sym - Invert SYNC header + 16) com_pad_ts1 - COM/PAD TS1 order set + 17) com_pad_ts2 - COM/PAD TS2 order set + 18) com_fts - COM/FTS FTS order set + 19) com_idl - COM/IDL E-idle order set + 20) end_edb - END/EDB symbol + 21) stp_sdp - STP/SDP symbol + 22) com_skp - COM/SKP SKP order set + 23) posted_tlp_hdr - Posted TLP Header credit value control + 24) non_post_tlp_hdr - Non-Posted TLP Header credit value control + 25) cmpl_tlp_hdr - Completion TLP Header credit value control + 26) posted_tlp_data - Posted TLP Data credit value control + 27) non_post_tlp_data - Non-Posted TLP Data credit value control + 28) cmpl_tlp_data - Completion TLP Data credit value control + 29) duplicate_tlp - Generates duplicate TLPs + 30) nullified_tlp - Generates Nullified TLPs + + (WO) Write to the attribute will prepare controller to inject + the respective error in the next transmission of data. + + Parameter required to write will change in the following ways: + + - Errors 9 and 10 are sequence errors. The write command: + + echo > /sys/kernel/debug/dwc_pcie_/rasdes_err_inj/ + + + Number of errors to be injected + + The difference to add or subtract from natural + sequence number to generate sequence error. + Allowed range from -4095 to 4095 + + - Errors 23 to 28 are credit value error insertions. The write + command: + + echo > /sys/kernel/debug/dwc_pcie_/rasdes_err_inj/ + + + Number of errors to be injected + + The difference to add or subtract from UpdateFC + credit value. Allowed range from -4095 to 4095 + + Target VC number + + - All other errors. The write command: + + echo > /sys/kernel/debug/dwc_pcie_/rasdes_err_inj/ + + + Number of errors to be injected + +What: /sys/kernel/debug/dwc_pcie_/rasdes_event_counters//counter_enable +Date: February 2025 +Contact: Shradha Todi +Description: The "rasdes_event_counters" is the directory which can be used + to collect statistical data about the number of times a certain + event has occurred in the controller. The list of possible + events are: + + 1) EBUF Overflow + 2) EBUF Underrun + 3) Decode Error + 4) Running Disparity Error + 5) SKP OS Parity Error + 6) SYNC Header Error + 7) Rx Valid De-assertion + 8) CTL SKP OS Parity Error + 9) 1st Retimer Parity Error + 10) 2nd Retimer Parity Error + 11) Margin CRC and Parity Error + 12) Detect EI Infer + 13) Receiver Error + 14) RX Recovery Req + 15) N_FTS Timeout + 16) Framing Error + 17) Deskew Error + 18) Framing Error In L0 + 19) Deskew Uncompleted Error + 20) Bad TLP + 21) LCRC Error + 22) Bad DLLP + 23) Replay Number Rollover + 24) Replay Timeout + 25) Rx Nak DLLP + 26) Tx Nak DLLP + 27) Retry TLP + 28) FC Timeout + 29) Poisoned TLP + 30) ECRC Error + 31) Unsupported Request + 32) Completer Abort + 33) Completion Timeout + 34) EBUF SKP Add + 35) EBUF SKP Del + + (RW) Write 1 to enable the event counter and write 0 to disable + the event counter. Read will return whether the counter is + currently enabled or disabled. Counter is disabled by default. + +What: /sys/kernel/debug/dwc_pcie_/rasdes_event_counters//counter_value +Date: February 2025 +Contact: Shradha Todi +Description: (RO) Read will return the current value of the event counter. + To reset the counter, counter should be disabled first and then + enabled back using the "counter_enable" attribute. + +What: /sys/kernel/debug/dwc_pcie_/rasdes_event_counters//lane_select +Date: February 2025 +Contact: Shradha Todi +Description: (RW) Some lanes in the event list are lane specific events. + These include events from 1 to 11, as well as, 34 and 35. Write + the lane number for which you wish the counter to be enabled, + disabled, or value dumped. Read will return the current + selected lane number. Lane0 is selected by default. + +What: /sys/kernel/debug/dwc_pcie_/ltssm_status +Date: February 2025 +Contact: Hans Zhang <18255117159@163.com> +Description: (RO) Read will return the current PCIe LTSSM state in both + string and raw value. diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index 1ef69e0271f91dbdedd93795c54f8c5f0e3dc012..36c57de0a10ae5bb85866b5f1601e69249d97fe1 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -22,14 +22,6 @@ Description: device. The reset operation frees all the memory associated with this device. -What: /sys/block/zram/max_comp_streams -Date: February 2014 -Contact: Sergey Senozhatsky -Description: - The max_comp_streams file is read-write and specifies the - number of backend's zcomp_strm compression streams (number of - concurrent compress operations). - What: /sys/block/zram/comp_algorithm Date: February 2014 Contact: Sergey Senozhatsky diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti b/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti index bf2869c413e72253ba25d5ab8b373b55a6c597b6..a97b70f588da8b41e3395636037d63a0f30cf78f 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti @@ -1,241 +1,241 @@ What: /sys/bus/coresight/devices//enable Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Enable/Disable the CTI hardware. What: /sys/bus/coresight/devices//powered Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Indicate if the CTI hardware is powered. What: /sys/bus/coresight/devices//ctmid Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Display the associated CTM ID What: /sys/bus/coresight/devices//nr_trigger_cons Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Number of devices connected to triggers on this CTI What: /sys/bus/coresight/devices//triggers/name Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Name of connected device What: /sys/bus/coresight/devices//triggers/in_signals Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Input trigger signals from connected device What: /sys/bus/coresight/devices//triggers/in_types Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Functional types for the input trigger signals from connected device What: /sys/bus/coresight/devices//triggers/out_signals Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Output trigger signals to connected device What: /sys/bus/coresight/devices//triggers/out_types Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Functional types for the output trigger signals to connected device What: /sys/bus/coresight/devices//regs/inout_sel Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Select the index for inen and outen registers. What: /sys/bus/coresight/devices//regs/inen Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Read or write the CTIINEN register selected by inout_sel. What: /sys/bus/coresight/devices//regs/outen Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Read or write the CTIOUTEN register selected by inout_sel. What: /sys/bus/coresight/devices//regs/gate Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Read or write CTIGATE register. What: /sys/bus/coresight/devices//regs/asicctl Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Read or write ASICCTL register. What: /sys/bus/coresight/devices//regs/intack Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Write the INTACK register. What: /sys/bus/coresight/devices//regs/appset Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Set CTIAPPSET register to activate channel. Read back to determine current value of register. What: /sys/bus/coresight/devices//regs/appclear Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Write APPCLEAR register to deactivate channel. What: /sys/bus/coresight/devices//regs/apppulse Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Write APPPULSE to pulse a channel active for one clock cycle. What: /sys/bus/coresight/devices//regs/chinstatus Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Read current status of channel inputs. What: /sys/bus/coresight/devices//regs/choutstatus Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) read current status of channel outputs. What: /sys/bus/coresight/devices//regs/triginstatus Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) read current status of input trigger signals What: /sys/bus/coresight/devices//regs/trigoutstatus Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) read current status of output trigger signals. What: /sys/bus/coresight/devices//channels/trigin_attach Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Attach a CTI input trigger to a CTM channel. What: /sys/bus/coresight/devices//channels/trigin_detach Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Detach a CTI input trigger from a CTM channel. What: /sys/bus/coresight/devices//channels/trigout_attach Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Attach a CTI output trigger to a CTM channel. What: /sys/bus/coresight/devices//channels/trigout_detach Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Detach a CTI output trigger from a CTM channel. What: /sys/bus/coresight/devices//channels/chan_gate_enable Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Enable CTIGATE for single channel (Write) or list enabled channels through the gate (R). What: /sys/bus/coresight/devices//channels/chan_gate_disable Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Disable CTIGATE for single channel. What: /sys/bus/coresight/devices//channels/chan_set Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Activate a single channel. What: /sys/bus/coresight/devices//channels/chan_clear Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Deactivate a single channel. What: /sys/bus/coresight/devices//channels/chan_pulse Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Pulse a single channel - activate for a single clock cycle. What: /sys/bus/coresight/devices//channels/trigout_filtered Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) List of output triggers filtered across all connections. What: /sys/bus/coresight/devices//channels/trig_filter_enable Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Enable or disable trigger output signal filtering. What: /sys/bus/coresight/devices//channels/chan_inuse Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) show channels with at least one attached trigger signal. What: /sys/bus/coresight/devices//channels/chan_free Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) show channels with no attached trigger signals. What: /sys/bus/coresight/devices//channels/chan_xtrigs_sel Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (RW) Write channel number to select a channel to view, read to see selected channel number. What: /sys/bus/coresight/devices//channels/chan_xtrigs_in Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Read to see input triggers connected to selected view channel. What: /sys/bus/coresight/devices//channels/chan_xtrigs_out Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Read) Read to see output triggers connected to selected view channel. What: /sys/bus/coresight/devices//channels/chan_xtrigs_reset Date: March 2020 -KernelVersion 5.7 +KernelVersion: 5.7 Contact: Mike Leach or Mathieu Poirier Description: (Write) Clear all channel / trigger programming. diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm index bf710ea6e0efc5fa0081cddcca1974b383fc1117..a341b08ae70bf344c62b11b18775cc295a6f43bd 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm @@ -1,6 +1,6 @@ What: /sys/bus/coresight/devices//integration_test Date: January 2023 -KernelVersion 6.2 +KernelVersion: 6.2 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (Write) Run integration test for tpdm. Integration test @@ -14,7 +14,7 @@ Description: What: /sys/bus/coresight/devices//reset_dataset Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (Write) Reset the dataset of the tpdm. @@ -24,7 +24,7 @@ Description: What: /sys/bus/coresight/devices//dsb_trig_type Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the trigger type of the DSB for tpdm. @@ -35,7 +35,7 @@ Description: What: /sys/bus/coresight/devices//dsb_trig_ts Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the trigger timestamp of the DSB for tpdm. @@ -46,7 +46,7 @@ Description: What: /sys/bus/coresight/devices//dsb_mode Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the programming mode of the DSB for tpdm. @@ -60,7 +60,7 @@ Description: What: /sys/bus/coresight/devices//dsb_edge/ctrl_idx Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the index number of the edge detection for the DSB @@ -69,7 +69,7 @@ Description: What: /sys/bus/coresight/devices//dsb_edge/ctrl_val Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: Write a data to control the edge detection corresponding to @@ -85,7 +85,7 @@ Description: What: /sys/bus/coresight/devices//dsb_edge/ctrl_mask Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: Write a data to mask the edge detection corresponding to the index @@ -97,21 +97,21 @@ Description: What: /sys/bus/coresight/devices//dsb_edge/edcr[0:15] Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: Read a set of the edge control value of the DSB in TPDM. What: /sys/bus/coresight/devices//dsb_edge/edcmr[0:7] Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: Read a set of the edge control mask of the DSB in TPDM. What: /sys/bus/coresight/devices//dsb_trig_patt/xpr[0:7] Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the value of the trigger pattern for the DSB @@ -119,7 +119,7 @@ Description: What: /sys/bus/coresight/devices//dsb_trig_patt/xpmr[0:7] Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the mask of the trigger pattern for the DSB @@ -127,21 +127,21 @@ Description: What: /sys/bus/coresight/devices//dsb_patt/tpr[0:7] Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the value of the pattern for the DSB subunit TPDM. What: /sys/bus/coresight/devices//dsb_patt/tpmr[0:7] Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the mask of the pattern for the DSB subunit TPDM. What: /sys/bus/coresight/devices//dsb_patt/enable_ts Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (Write) Set the pattern timestamp of DSB tpdm. Read @@ -153,7 +153,7 @@ Description: What: /sys/bus/coresight/devices//dsb_patt/set_type Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (Write) Set the pattern type of DSB tpdm. Read @@ -165,7 +165,7 @@ Description: What: /sys/bus/coresight/devices//dsb_msr/msr[0:31] Date: March 2023 -KernelVersion 6.7 +KernelVersion: 6.7 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the MSR(mux select register) for the DSB subunit @@ -173,7 +173,7 @@ Description: What: /sys/bus/coresight/devices//cmb_mode Date: January 2024 -KernelVersion 6.9 +KernelVersion: 6.9 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (Write) Set the data collection mode of CMB tpdm. Continuous change creates CMB data set elements on every CMBCLK edge. @@ -187,7 +187,7 @@ Description: (Write) Set the data collection mode of CMB tpdm. Continuous What: /sys/bus/coresight/devices//cmb_trig_patt/xpr[0:1] Date: January 2024 -KernelVersion 6.9 +KernelVersion: 6.9 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the value of the trigger pattern for the CMB @@ -195,7 +195,7 @@ Description: What: /sys/bus/coresight/devices//cmb_trig_patt/xpmr[0:1] Date: January 2024 -KernelVersion 6.9 +KernelVersion: 6.9 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the mask of the trigger pattern for the CMB @@ -203,21 +203,21 @@ Description: What: /sys/bus/coresight/devices//dsb_patt/tpr[0:1] Date: January 2024 -KernelVersion 6.9 +KernelVersion: 6.9 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the value of the pattern for the CMB subunit TPDM. What: /sys/bus/coresight/devices//dsb_patt/tpmr[0:1] Date: January 2024 -KernelVersion 6.9 +KernelVersion: 6.9 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the mask of the pattern for the CMB subunit TPDM. What: /sys/bus/coresight/devices//cmb_patt/enable_ts Date: January 2024 -KernelVersion 6.9 +KernelVersion: 6.9 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (Write) Set the pattern timestamp of CMB tpdm. Read @@ -229,7 +229,7 @@ Description: What: /sys/bus/coresight/devices//cmb_trig_ts Date: January 2024 -KernelVersion 6.9 +KernelVersion: 6.9 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the trigger timestamp of the CMB for tpdm. @@ -240,7 +240,7 @@ Description: What: /sys/bus/coresight/devices//cmb_ts_all Date: January 2024 -KernelVersion 6.9 +KernelVersion: 6.9 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Read or write the status of timestamp upon all interface. @@ -252,8 +252,23 @@ Description: What: /sys/bus/coresight/devices//cmb_msr/msr[0:31] Date: January 2024 -KernelVersion 6.9 +KernelVersion: 6.9 Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) Description: (RW) Set/Get the MSR(mux select register) for the CMB subunit TPDM. + +What: /sys/bus/coresight/devices//mcmb_trig_lane +Date: Feb 2025 +KernelVersion 6.15 +Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) +Description: + (RW) Set/Get which lane participates in the output pattern + match cross trigger mechanism for the MCMB subunit TPDM. + +What: /sys/bus/coresight/devices//mcmb_lanes_select +Date: Feb 2025 +KernelVersion 6.15 +Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) +Description: + (RW) Set/Get the enablement of the individual lane. diff --git a/Documentation/ABI/testing/sysfs-bus-counter b/Documentation/ABI/testing/sysfs-bus-counter index 73ac84c0bca76e013f2534efc40c0f0e8d5fb023..3e8259e56d384225ec6e952b6d5d75208f185736 100644 --- a/Documentation/ABI/testing/sysfs-bus-counter +++ b/Documentation/ABI/testing/sysfs-bus-counter @@ -34,6 +34,14 @@ Contact: linux-iio@vger.kernel.org Description: Count data of Count Y represented as a string. +What: /sys/bus/counter/devices/counterX/countY/compare +KernelVersion: 6.15 +Contact: linux-iio@vger.kernel.org +Description: + If the counter device supports compare registers -- registers + used to compare counter channels against a particular count -- + the compare count for channel Y is provided by this attribute. + What: /sys/bus/counter/devices/counterX/countY/capture KernelVersion: 6.1 Contact: linux-iio@vger.kernel.org @@ -301,6 +309,7 @@ Description: What: /sys/bus/counter/devices/counterX/cascade_counts_enable_component_id What: /sys/bus/counter/devices/counterX/external_input_phase_clock_select_component_id +What: /sys/bus/counter/devices/counterX/countY/compare_component_id What: /sys/bus/counter/devices/counterX/countY/capture_component_id What: /sys/bus/counter/devices/counterX/countY/ceiling_component_id What: /sys/bus/counter/devices/counterX/countY/floor_component_id diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 3f5627a1210a16aca7c18d17131a56491048a0c2..99bb3faf7a0e121a5f6b6a7d76adf6a233a6984e 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -1,5 +1,5 @@ What: /sys/bus/cxl/flush -Date: Januarry, 2022 +Date: January, 2022 KernelVersion: v5.18 Contact: linux-cxl@vger.kernel.org Description: @@ -18,6 +18,24 @@ Description: specification. +What: /sys/bus/cxl/devices/memX/payload_max +Date: December, 2020 +KernelVersion: v5.12 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) Maximum size (in bytes) of the mailbox command payload + registers. Linux caps this at 1MB if the device reports a + larger size. + + +What: /sys/bus/cxl/devices/memX/label_storage_size +Date: May, 2021 +KernelVersion: v5.13 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) Size (in bytes) of the Label Storage Area (LSA). + + What: /sys/bus/cxl/devices/memX/ram/size Date: December, 2020 KernelVersion: v5.12 @@ -33,7 +51,7 @@ Date: May, 2023 KernelVersion: v6.8 Contact: linux-cxl@vger.kernel.org Description: - (RO) For CXL host platforms that support "QoS Telemmetry" + (RO) For CXL host platforms that support "QoS Telemetry" this attribute conveys a comma delimited list of platform specific cookies that identifies a QoS performance class for the volatile partition of the CXL mem device. These @@ -60,7 +78,7 @@ Date: May, 2023 KernelVersion: v6.8 Contact: linux-cxl@vger.kernel.org Description: - (RO) For CXL host platforms that support "QoS Telemmetry" + (RO) For CXL host platforms that support "QoS Telemetry" this attribute conveys a comma delimited list of platform specific cookies that identifies a QoS performance class for the persistent partition of the CXL mem device. These @@ -321,14 +339,13 @@ KernelVersion: v6.0 Contact: linux-cxl@vger.kernel.org Description: (RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it - translates from a host physical address range, to a device local - address range. Device-local address ranges are further split - into a 'ram' (volatile memory) range and 'pmem' (persistent - memory) range. The 'mode' attribute emits one of 'ram', 'pmem', - 'mixed', or 'none'. The 'mixed' indication is for error cases - when a decoder straddles the volatile/persistent partition - boundary, and 'none' indicates the decoder is not actively - decoding, or no DPA allocation policy has been set. + translates from a host physical address range, to a device + local address range. Device-local address ranges are further + split into a 'ram' (volatile memory) range and 'pmem' + (persistent memory) range. The 'mode' attribute emits one of + 'ram', 'pmem', or 'none'. The 'none' indicates the decoder is + not actively decoding, or no DPA allocation policy has been + set. 'mode' can be written, when the decoder is in the 'disabled' state, with either 'ram' or 'pmem' to set the boundaries for the @@ -423,7 +440,7 @@ Date: May, 2023 KernelVersion: v6.5 Contact: linux-cxl@vger.kernel.org Description: - (RO) For CXL host platforms that support "QoS Telemmetry" this + (RO) For CXL host platforms that support "QoS Telemetry" this root-decoder-only attribute conveys a platform specific cookie that identifies a QoS performance class for the CXL Window. This class-id can be compared against a similar "qos_class" @@ -586,3 +603,15 @@ Description: See Documentation/ABI/stable/sysfs-devices-node. access0 provides the number to the closest initiator and access1 provides the number to the closest CPU. + + +What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown +Date: Feb, 2025 +KernelVersion: v6.15 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The device dirty shutdown count value, which is the number + of times the device could have incurred in potential data loss. + The count is persistent across power loss and wraps back to 0 + upon overflow. If this file is not present, the device does not + have the necessary support for dirty tracking. diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 25d366d452a55293c420300335a0271a28855107..722aa989baac43f694076074b307d134867b4533 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -2268,7 +2268,7 @@ Description: representing the sensor unique ID number. What: /sys/bus/iio/devices/iio:deviceX/filter_type_available -What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_mode_available +What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_type_available KernelVersion: 6.1 Contact: linux-iio@vger.kernel.org Description: @@ -2290,6 +2290,16 @@ Description: * "sinc3+pf2" - Sinc3 + device specific Post Filter 2. * "sinc3+pf3" - Sinc3 + device specific Post Filter 3. * "sinc3+pf4" - Sinc3 + device specific Post Filter 4. + * "wideband" - filter with wideband low ripple passband + and sharp transition band. + +What: /sys/bus/iio/devices/iio:deviceX/filter_type +What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_type +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Specifies which filter type apply to the channel. The possible + values are given by the filter_type_available attribute. What: /sys/.../events/in_proximity_thresh_either_runningperiod KernelVersion: 6.6 diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 new file mode 100644 index 0000000000000000000000000000000000000000..d3fad27421d6dcd58a3e0660e71002a96e1af7fb --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 @@ -0,0 +1,20 @@ +What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_mode_available +KernelVersion: 6.2 +Contact: linux-iio@vger.kernel.org +Description: + Reading returns a list with the possible filter modes. + + This ABI is only kept for backwards compatibility and the values + returned are identical to filter_type_available attribute + documented in Documentation/ABI/testing/sysfs-bus-iio. Please, + use filter_type_available like ABI to provide filter options for + new drivers. + +What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_mode +KernelVersion: 6.2 +Contact: linux-iio@vger.kernel.org +Description: + This ABI is only kept for backwards compatibility and the values + returned are identical to in_voltageY-voltageZ_filter_type + attribute documented in Documentation/ABI/testing/sysfs-bus-iio. + Please, use in_voltageY-voltageZ_filter_type for new drivers. diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 5da6a14dc326bd651d8cabb795d769f221daa81e..69f952fffec72f1515d144e0cd33cb43267ead49 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -583,3 +583,32 @@ Description: enclosure-specific indications "specific0" to "specific7", hence the corresponding led class devices are unavailable if the DSM interface is used. + +What: /sys/bus/pci/devices/.../doe_features +Date: March 2025 +Contact: Linux PCI developers +Description: + This directory contains a list of the supported Data Object + Exchange (DOE) features. The features are the file name. + The contents of each file is the raw Vendor ID and data + object feature values. + + The value comes from the device and specifies the vendor and + data object type supported. The lower (RHS of the colon) is + the data object type in hex. The upper (LHS of the colon) + is the vendor ID. + + As all DOE devices must support the DOE discovery feature, + if DOE is supported you will at least see the doe_discovery + file, with this contents: + + # cat doe_features/doe_discovery + 0001:00 + + If the device supports other features you will see other + files as well. For example if CMA/SPDM and secure CMA/SPDM + are supported the doe_features directory will look like + this: + + # ls doe_features + 0001:01 0001:02 doe_discovery diff --git a/Documentation/ABI/testing/sysfs-class-chromeos b/Documentation/ABI/testing/sysfs-class-chromeos index 74ece942722e71ec139f544f0701725f1125c68b..7fa5be6cc774847a889c79aacc31f7a732b3c2dc 100644 --- a/Documentation/ABI/testing/sysfs-class-chromeos +++ b/Documentation/ABI/testing/sysfs-class-chromeos @@ -31,3 +31,23 @@ Date: August 2015 KernelVersion: 4.2 Description: Show the information about the EC software and hardware. + +What: /sys/class/chromeos/cros_ec/usbpdmuxinfo +Date: February 2025 +Description: + Show PD mux status for each typec port with following flags: + - "USB": USB connected + - "DP": DP connected + - "POLARITY": CC line Polarity inverted + - "HPD_IRQ": Hot Plug Detect interrupt is asserted + - "HPD_LVL": Hot Plug Detect level is asserted + - "SAFE": DP is in safe mode + - "TBT": TBT enabled + - "USB4": USB4 enabled + +What: /sys/class/chromeos/cros_ec/ap_mode_entry +Date: February 2025 +Description: + Show if the AP mode entry EC feature is supported. + It indicates whether the EC waits for direction from the AP + to enter Type-C altmodes or USB4 mode. diff --git a/Documentation/ABI/testing/sysfs-driver-amd-sfh b/Documentation/ABI/testing/sysfs-driver-amd-sfh new file mode 100644 index 0000000000000000000000000000000000000000..c053126a83bb329cc579439a84ca71db0023ed6d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-amd-sfh @@ -0,0 +1,13 @@ +What: /sys/bus/pci/drivers/pcie_mp2_amd/*/hpd +Date: April 2025 +Contact: mario.limonciello@amd.com +Description: + Human presence detection (HPD) enable/disable. + When HPD is enabled, the device will be able to detect the + presence of a human and will send an interrupt that can be + used to wake the system from a low power state. + When HPD is disabled, the device will not be able to detect + the presence of a human. + + Access: Read/Write + Valid values: enabled/disabled diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd new file mode 100644 index 0000000000000000000000000000000000000000..2a19584d091e484464435ade8ff94c6ad8e3ec00 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd @@ -0,0 +1,13 @@ +What: /sys/bus/hid/drivers/hid-appletb-kbd//mode +Date: September, 2023 +KernelVersion: 6.5 +Contact: linux-input@vger.kernel.org +Description: + The set of keys displayed on the Touch Bar. + Valid values are: + == ================= + 0 Escape key only + 1 Function keys + 2 Media/brightness keys + 3 None + == ================= diff --git a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc index c12316dfd9732d7e7908de12c56a75b35a0a01f4..a6e4003649327915a0050f9e69b9b7898f465cf3 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc +++ b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc @@ -17,7 +17,7 @@ Description: Read only. Returns the firmware version of Intel MAX10 What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_address Date: January 2021 KernelVersion: 5.12 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the first MAC address in a block of sequential MAC addresses assigned to the board that is managed by the Intel MAX10 BMC. It is stored in @@ -28,7 +28,7 @@ Description: Read only. Returns the first MAC address in a block What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_count Date: January 2021 KernelVersion: 5.12 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the number of sequential MAC addresses assigned to the board managed by the Intel MAX10 BMC. This value is stored in FLASH and is mirrored diff --git a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update index 9051695d221170bd82c4e84aa9fda49bf80ad1bf..c69fd3894eb4588e29a14dc483ba2ca3fc14addd 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update +++ b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update @@ -1,7 +1,7 @@ What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_root_entry_hash Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the root entry hash for the static region if one is programmed, else it returns the string: "hash not programmed". This file is only @@ -11,7 +11,7 @@ Description: Read only. Returns the root entry hash for the static What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_root_entry_hash Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the root entry hash for the partial reconfiguration region if one is programmed, else it returns the string: "hash not programmed". This file @@ -21,7 +21,7 @@ Description: Read only. Returns the root entry hash for the partial What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_root_entry_hash Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the root entry hash for the BMC image if one is programmed, else it returns the string: "hash not programmed". This file is only visible if the @@ -31,7 +31,7 @@ Description: Read only. Returns the root entry hash for the BMC image What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_canceled_csks Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns a list of indices for canceled code signing keys for the static region. The standard bitmap list format is used (e.g. "1,2-6,9"). @@ -39,7 +39,7 @@ Description: Read only. Returns a list of indices for canceled code What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_canceled_csks Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns a list of indices for canceled code signing keys for the partial reconfiguration region. The standard bitmap list format is used (e.g. "1,2-6,9"). @@ -47,7 +47,7 @@ Description: Read only. Returns a list of indices for canceled code What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_canceled_csks Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns a list of indices for canceled code signing keys for the BMC. The standard bitmap list format is used (e.g. "1,2-6,9"). @@ -55,7 +55,7 @@ Description: Read only. Returns a list of indices for canceled code What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/flash_count Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns number of times the secure update staging area has been flashed. Format: "%u". diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index 5fa6655aee84090d302f64bd6a31b277018e230b..ae0191295d29b8135bc6d5c41986bf41eebbd3be 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -1559,3 +1559,48 @@ Description: Symbol - HCMID. This file shows the UFSHCD manufacturer id. The Manufacturer ID is defined by JEDEC in JEDEC-JEP106. The file is read only. + +What: /sys/bus/platform/drivers/ufshcd/*/critical_health +What: /sys/bus/platform/devices/*.ufs/critical_health +Date: February 2025 +Contact: Avri Altman +Description: Report the number of times a critical health event has been + reported by a UFS device. Further insight into the specific + issue can be gained by reading one of: bPreEOLInfo, + bDeviceLifeTimeEstA, bDeviceLifeTimeEstB, + bWriteBoosterBufferLifeTimeEst, and bRPMBLifeTimeEst. + + The file is read only. + +What: /sys/bus/platform/drivers/ufshcd/*/clkscale_enable +What: /sys/bus/platform/devices/*.ufs/clkscale_enable +Date: January 2025 +Contact: Ziqi Chen +Description: + This attribute shows whether the UFS clock scaling is enabled or not. + And it can be used to enable/disable the clock scaling by writing + 1 or 0 to this attribute. + + The attribute is read/write. + +What: /sys/bus/platform/drivers/ufshcd/*/clkgate_enable +What: /sys/bus/platform/devices/*.ufs/clkgate_enable +Date: January 2025 +Contact: Ziqi Chen +Description: + This attribute shows whether the UFS clock gating is enabled or not. + And it can be used to enable/disable the clock gating by writing + 1 or 0 to this attribute. + + The attribute is read/write. + +What: /sys/bus/platform/drivers/ufshcd/*/clkgate_delay_ms +What: /sys/bus/platform/devices/*.ufs/clkgate_delay_ms +Date: January 2025 +Contact: Ziqi Chen +Description: + This attribute shows and sets the number of milliseconds of idle time + before the UFS driver starts to perform clock gating. This can + prevent the UFS from frequently performing clock gating/ungating. + + The attribute is read/write. diff --git a/Documentation/ABI/testing/sysfs-edac-ecs b/Documentation/ABI/testing/sysfs-edac-ecs new file mode 100644 index 0000000000000000000000000000000000000000..87c885c4eb1abd8b60db0dbd907f928599aa5954 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-edac-ecs @@ -0,0 +1,74 @@ +What: /sys/bus/edac/devices//ecs_fruX +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + The sysfs EDAC bus devices //ecs_fruX subdirectory + pertains to the memory media ECS (Error Check Scrub) control + feature, where directory corresponds to a device + registered with the EDAC device driver for the ECS feature. + /ecs_fruX belongs to the media FRUs (Field Replaceable Unit) + under the memory device. + + The sysfs ECS attr nodes are only present if the parent + driver has implemented the corresponding attr callback + function and provided the necessary operations to the EDAC + device driver during registration. + +What: /sys/bus/edac/devices//ecs_fruX/log_entry_type +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The log entry type of how the DDR5 ECS log is reported. + + - 0 - per DRAM. + + - 1 - per memory media FRU. + + - All other values are reserved. + +What: /sys/bus/edac/devices//ecs_fruX/mode +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The mode of how the DDR5 ECS counts the errors. + Error count is tracked based on two different modes + selected by DDR5 ECS Control Feature - Codeword mode and + Row Count mode. If the ECS is under Codeword mode, then + the error count increments each time a codeword with check + bit errors is detected. If the ECS is under Row Count mode, + then the error counter increments each time a row with + check bit errors is detected. + + - 0 - ECS counts rows in the memory media that have ECC errors. + + - 1 - ECS counts codewords with errors, specifically, it counts + the number of ECC-detected errors in the memory media. + + - All other values are reserved. + +What: /sys/bus/edac/devices//ecs_fruX/reset +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (WO) ECS reset ECC counter. + + - 1 - reset ECC counter to the default value. + + - All other values are reserved. + +What: /sys/bus/edac/devices//ecs_fruX/threshold +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) DDR5 ECS threshold count per gigabits of memory cells. + The ECS error count is subject to the ECS Threshold count + per Gbit, which masks error counts less than the Threshold. + + Supported values are 256, 1024 and 4096. + + All other values are reserved. diff --git a/Documentation/ABI/testing/sysfs-edac-memory-repair b/Documentation/ABI/testing/sysfs-edac-memory-repair new file mode 100644 index 0000000000000000000000000000000000000000..0434a3b23ff3ee940d558a3a4e54241618c04f88 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-edac-memory-repair @@ -0,0 +1,206 @@ +What: /sys/bus/edac/devices//mem_repairX +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + The sysfs EDAC bus devices //mem_repairX subdirectory + pertains to the memory media repair features control, such as + PPR (Post Package Repair), memory sparing etc, where + directory corresponds to a device registered with the EDAC + device driver for the memory repair features. + + Post Package Repair is a maintenance operation requests the memory + device to perform a repair operation on its media. It is a memory + self-healing feature that fixes a failing memory location by + replacing it with a spare row in a DRAM device. For example, a + CXL memory device with DRAM components that support PPR features may + implement PPR maintenance operations. DRAM components may support + two types of PPR functions: hard PPR, for a permanent row repair, and + soft PPR, for a temporary row repair. Soft PPR may be much faster + than hard PPR, but the repair is lost with a power cycle. + + The sysfs attributes nodes for a repair feature are only + present if the parent driver has implemented the corresponding + attr callback function and provided the necessary operations + to the EDAC device driver during registration. + + In some states of system configuration (e.g. before address + decoders have been configured), memory devices (e.g. CXL) + may not have an active mapping in the main host address + physical address map. As such, the memory to repair must be + identified by a device specific physical addressing scheme + using a device physical address(DPA). The DPA and other control + attributes to use will be presented in related error records. + +What: /sys/bus/edac/devices//mem_repairX/repair_type +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) Memory repair type. For eg. post package repair, + memory sparing etc. Valid values are: + + - ppr - Post package repair. + + - cacheline-sparing + + - row-sparing + + - bank-sparing + + - rank-sparing + + - All other values are reserved. + +What: /sys/bus/edac/devices//mem_repairX/persist_mode +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Get/Set the current persist repair mode set for a + repair function. Persist repair modes supported in the + device, based on a memory repair function, either is temporary, + which is lost with a power cycle or permanent. Valid values are: + + - 0 - Soft memory repair (temporary repair). + + - 1 - Hard memory repair (permanent repair). + + - All other values are reserved. + +What: /sys/bus/edac/devices//mem_repairX/repair_safe_when_in_use +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) True if memory media is accessible and data is retained + during the memory repair operation. + The data may not be retained and memory requests may not be + correctly processed during a repair operation. In such case + repair operation can not be executed at runtime. The memory + must be taken offline. + +What: /sys/bus/edac/devices//mem_repairX/hpa +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Host Physical Address (HPA) of the memory to repair. + The HPA to use will be provided in related error records. + +What: /sys/bus/edac/devices//mem_repairX/dpa +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Device Physical Address (DPA) of the memory to repair. + The specific DPA to use will be provided in related error + records. + + In some states of system configuration (e.g. before address + decoders have been configured), memory devices (e.g. CXL) + may not have an active mapping in the main host address + physical address map. As such, the memory to repair must be + identified by a device specific physical addressing scheme + using a DPA. The device physical address(DPA) to use will be + presented in related error records. + +What: /sys/bus/edac/devices//mem_repairX/nibble_mask +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Read/Write Nibble mask of the memory to repair. + Nibble mask identifies one or more nibbles in error on the + memory bus that produced the error event. Nibble Mask bit 0 + shall be set if nibble 0 on the memory bus produced the + event, etc. For example, CXL PPR and sparing, a nibble mask + bit set to 1 indicates the request to perform repair + operation in the specific device. All nibble mask bits set + to 1 indicates the request to perform the operation in all + devices. Eg. for CXL memory repair, the specific value of + nibble mask to use will be provided in related error records. + For more details, See nibble mask field in CXL spec ver 3.1, + section 8.2.9.7.1.2 Table 8-103 soft PPR and section + 8.2.9.7.1.3 Table 8-104 hard PPR, section 8.2.9.7.1.4 + Table 8-105 memory sparing. + +What: /sys/bus/edac/devices//mem_repairX/min_hpa +What: /sys/bus/edac/devices//mem_repairX/max_hpa +What: /sys/bus/edac/devices//mem_repairX/min_dpa +What: /sys/bus/edac/devices//mem_repairX/max_dpa +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The supported range of memory address that is to be + repaired. The memory device may give the supported range of + attributes to use and it will depend on the memory device + and the portion of memory to repair. + The userspace may receive the specific value of attributes + to use for a repair operation from the memory device via + related error records and trace events, for eg. CXL DRAM + and CXL general media error records in CXL memory devices. + +What: /sys/bus/edac/devices//mem_repairX/bank_group +What: /sys/bus/edac/devices//mem_repairX/bank +What: /sys/bus/edac/devices//mem_repairX/rank +What: /sys/bus/edac/devices//mem_repairX/row +What: /sys/bus/edac/devices//mem_repairX/column +What: /sys/bus/edac/devices//mem_repairX/channel +What: /sys/bus/edac/devices//mem_repairX/sub_channel +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The control attributes for the memory to be repaired. + The specific value of attributes to use depends on the + portion of memory to repair and will be reported to the host + in related error records and be available to userspace + in trace events, such as CXL DRAM and CXL general media + error records of CXL memory devices. + + When readng back these attributes, it returns the current + value of memory requested to be repaired. + + bank_group - The bank group of the memory to repair. + + bank - The bank number of the memory to repair. + + rank - The rank of the memory to repair. Rank is defined as a + set of memory devices on a channel that together execute a + transaction. + + row - The row number of the memory to repair. + + column - The column number of the memory to repair. + + channel - The channel of the memory to repair. Channel is + defined as an interface that can be independently accessed + for a transaction. + + sub_channel - The subchannel of the memory to repair. + + The requirement to set these attributes varies based on the + repair function. The attributes in sysfs are not present + unless required for a repair function. + + For example, CXL spec ver 3.1, Section 8.2.9.7.1.2 Table 8-103 + soft PPR and Section 8.2.9.7.1.3 Table 8-104 hard PPR operations, + these attributes are not required to set. CXL spec ver 3.1, + Section 8.2.9.7.1.4 Table 8-105 memory sparing, these attributes + are required to set based on memory sparing granularity. + +What: /sys/bus/edac/devices//mem_repairX/repair +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (WO) Issue the memory repair operation for the specified + memory repair attributes. The operation may fail if resources + are insufficient based on the requirements of the memory + device and repair function. + + - 1 - Issue the repair operation. + + - All other values are reserved. diff --git a/Documentation/ABI/testing/sysfs-edac-scrub b/Documentation/ABI/testing/sysfs-edac-scrub new file mode 100644 index 0000000000000000000000000000000000000000..c43be90deab4a9b9b8c78b0b5dea8ed8e87cf6c6 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-edac-scrub @@ -0,0 +1,69 @@ +What: /sys/bus/edac/devices//scrubX +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + The sysfs EDAC bus devices //scrubX subdirectory + belongs to an instance of memory scrub control feature, + where directory corresponds to a device/memory + region registered with the EDAC device driver for the + scrub control feature. + + The sysfs scrub attr nodes are only present if the parent + driver has implemented the corresponding attr callback + function and provided the necessary operations to the EDAC + device driver during registration. + +What: /sys/bus/edac/devices//scrubX/addr +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The base address of the memory region to be scrubbed + for on-demand scrubbing. Setting address starts scrubbing. + The size must be set before that. + + The readback addr value is non-zero if the requested + on-demand scrubbing is in progress, zero otherwise. + +What: /sys/bus/edac/devices//scrubX/size +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The size of the memory region to be scrubbed + (on-demand scrubbing). + +What: /sys/bus/edac/devices//scrubX/enable_background +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Start/Stop background (patrol) scrubbing if supported. + +What: /sys/bus/edac/devices//scrubX/min_cycle_duration +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) Supported minimum scrub cycle duration in seconds + by the memory scrubber. + +What: /sys/bus/edac/devices//scrubX/max_cycle_duration +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) Supported maximum scrub cycle duration in seconds + by the memory scrubber. + +What: /sys/bus/edac/devices//scrubX/current_cycle_duration +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The current scrub cycle duration in seconds and must be + within the supported range by the memory scrubber. + + Scrub has an overhead when running and that may want to be + reduced by taking longer to do it. diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 3e1630c70d8ae70456de35a627afde1211c8882c..59adb7dc6f9efd89a8477f21086127cd058a43c6 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -347,7 +347,7 @@ Description: Used to control configure extension list: - [c] means add/del cold file extension What: /sys/fs/f2fs//unusable -Date April 2019 +Date: April 2019 Contact: "Daniel Rosenberg" Description: If checkpoint=disable, it displays the number of blocks that are unusable. @@ -355,7 +355,7 @@ Description: If checkpoint=disable, it displays the number of blocks that would be unusable if checkpoint=disable were to be set. What: /sys/fs/f2fs//encoding -Date July 2019 +Date: July 2019 Contact: "Daniel Rosenberg" Description: Displays name and version of the encoding set for the filesystem. If no encoding is set, displays (none) @@ -734,6 +734,7 @@ Description: Support configuring fault injection type, should be FAULT_BLKADDR_VALIDITY 0x000040000 FAULT_BLKADDR_CONSISTENCE 0x000080000 FAULT_NO_SEGMENT 0x000100000 + FAULT_INCONSISTENT_FOOTER 0x000200000 =========================== =========== What: /sys/fs/f2fs//discard_io_aware_gran @@ -828,3 +829,20 @@ Date: November 2024 Contact: "Chao Yu" Description: It controls max read extent count for per-inode, the value of threshold is 10240 by default. + +What: /sys/fs/f2fs/tuning/reclaim_caches_kb +Date: February 2025 +Contact: "Jaegeuk Kim" +Description: It reclaims the given KBs of file-backed pages registered by + ioctl(F2FS_IOC_DONATE_RANGE). + For example, writing N tries to drop N KBs spaces in LRU. + +What: /sys/fs/f2fs//carve_out +Date: March 2025 +Contact: "Daeho Jeong" +Description: For several zoned storage devices, vendors will provide extra space which + was used for device level GC than specs and F2FS can use this space for + filesystem level GC. To do that, we can reserve the space using + reserved_blocks. However, it is not enough, since this extra space should + not be shown to users. So, with this new sysfs node, we can hide the space + by substracting reserved_blocks from total bytes. diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump b/Documentation/ABI/testing/sysfs-kernel-fadump index 2f9daa7ca55be0db403450535e20c78e75791ff8..b64b7622e6fc0333774bf451f19bc9dfb73ccc27 100644 --- a/Documentation/ABI/testing/sysfs-kernel-fadump +++ b/Documentation/ABI/testing/sysfs-kernel-fadump @@ -55,4 +55,5 @@ Date: May 2024 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write This is a special sysfs file available to setup additional - parameters to be passed to capture kernel. + parameters to be passed to capture kernel. For HASH MMU it + is exported only if RMA size higher than 768MB. diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cma b/Documentation/ABI/testing/sysfs-kernel-mm-cma index dfd755201142f1c8f53416ed82010b316f75ee49..aaf2a5d8b13bd5bc5eca9e3049f4702f23b17e13 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-cma +++ b/Documentation/ABI/testing/sysfs-kernel-mm-cma @@ -29,3 +29,16 @@ Date: Feb 2024 Contact: Anshuman Khandual Description: the number of pages CMA API succeeded to release + +What: /sys/kernel/mm/cma//total_pages +Date: Jun 2024 +Contact: Frank van der Linden +Description: + The size of the CMA area in pages. + +What: /sys/kernel/mm/cma//available_pages +Date: Jun 2024 +Contact: Frank van der Linden +Description: + The number of pages in the CMA area that are still + available for CMA allocation. diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon index b057eddefbfc9a85a5a2e8752cb50fb34ea96606..293197f180ade957eed082d09218680e6b8efc8d 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-damon +++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon @@ -91,6 +91,36 @@ Description: Writing a value to this file sets the update interval of the DAMON context in microseconds as the value. Reading this file returns the value. +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/intervals/intrvals_goal/access_bp +Date: Feb 2025 +Contact: SeongJae Park +Description: Writing a value to this file sets the monitoring intervals + auto-tuning target DAMON-observed access events ratio within + the given time interval (aggrs in same directory), in bp + (1/10,000). Reading this file returns the value. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/intervals/intrvals_goal/aggrs +Date: Feb 2025 +Contact: SeongJae Park +Description: Writing a value to this file sets the time interval to achieve + the monitoring intervals auto-tuning target DAMON-observed + access events ratio (access_bp in same directory) within. + Reading this file returns the value. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/intervals/intrvals_goal/min_sample_us +Date: Feb 2025 +Contact: SeongJae Park +Description: Writing a value to this file sets the minimum value of + auto-tuned sampling interval in microseconds. Reading this + file returns the value. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/intervals/intrvals_goal/max_sample_us +Date: Feb 2025 +Contact: SeongJae Park +Description: Writing a value to this file sets the maximum value of + auto-tuned sampling interval in microseconds. Reading this + file returns the value. + What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/nr_regions/min WDate: Mar 2022 @@ -345,6 +375,20 @@ Description: If 'addr' is written to the 'type' file, writing to or reading from this file sets or gets the end address of the address range for the filter. +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters//min +Date: Feb 2025 +Contact: SeongJae Park +Description: If 'hugepage_size' is written to the 'type' file, writing to + or reading from this file sets or gets the minimum size of the + hugepage for the filter. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters//max +Date: Feb 2025 +Contact: SeongJae Park +Description: If 'hugepage_size' is written to the 'type' file, writing to + or reading from this file sets or gets the maximum size of the + hugepage for the filter. + What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters//target_idx Date: Dec 2022 Contact: SeongJae Park @@ -365,6 +409,22 @@ Description: Writing 'Y' or 'N' to this file sets whether to allow or reject applying the scheme's action to the memory that satisfies the 'type' and the 'matching' of the directory. +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//core_filters +Date: Feb 2025 +Contact: SeongJae Park +Description: Directory for DAMON core layer-handled DAMOS filters. Files + under this directory works same to those of + /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters + directory. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//ops_filters +Date: Feb 2025 +Contact: SeongJae Park +Description: Directory for DAMON operations set layer-handled DAMOS filters. + Files under this directory works same to those of + /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters + directory. + What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//stats/nr_tried Date: Mar 2022 Contact: SeongJae Park diff --git a/Documentation/ABI/testing/sysfs-kernel-reboot b/Documentation/ABI/testing/sysfs-kernel-reboot index 837330fb251134ffdf29cd68f0b2a845b088e5a0..e117aba46be0e8d3cdff3abfb678f8847a726122 100644 --- a/Documentation/ABI/testing/sysfs-kernel-reboot +++ b/Documentation/ABI/testing/sysfs-kernel-reboot @@ -30,3 +30,11 @@ KernelVersion: 5.11 Contact: Matteo Croce Description: Don't wait for any other CPUs on reboot and avoid anything that could hang. + +What: /sys/kernel/reboot/hw_protection +Date: April 2025 +KernelVersion: 6.15 +Contact: Ahmad Fatoum +Description: Hardware protection action taken on critical events like + overtemperature or imminent voltage loss. + Valid values are: reboot shutdown diff --git a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl index 65ed3865da62469ba0475ec85c140076fed788eb..09f783fa0a5350ca3ad09d882052d1eac1e20e16 100644 --- a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl +++ b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl @@ -150,3 +150,13 @@ Description: The "mfg_lock" sysfs attribute is write-only. A successful write to this attribute will latch the board-level attributes into EEPROM, making them read-only. + +What: /sys/bus/platform/devices/MLNXBF04:00/rtc_battery +Date: June 2025 +KernelVersion: 6.15 +Contact: "Xiangrong Li " +Description: + The "rtc_battery" sysfs attribute is read-only. + A successful read from this attribute returns the status of + the board's RTC battery. The RTC battery status register is + also cleared upon successful read operation. diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index a3942b1036e2514bcf4c2824a5628355fde73850..2192478e83cfcbcc1bc2ed37b06227ca10fa5e94 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -131,7 +131,7 @@ Description: CAUTION: Using it will cause your machine's real-time (CMOS) clock to be set to a random invalid time after a resume. -What; /sys/power/pm_trace_dev_match +What: /sys/power/pm_trace_dev_match Date: October 2010 Contact: James Hogan Description: diff --git a/Documentation/ABI/testing/sysfs-pps-gen-tio b/Documentation/ABI/testing/sysfs-pps-gen-tio new file mode 100644 index 0000000000000000000000000000000000000000..3c34ff17a33569969434ab48aef0c5d78cc7ca14 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-pps-gen-tio @@ -0,0 +1,6 @@ +What: /sys/class/pps-gen/pps-genx/enable +Date: April 2025 +KernelVersion: 6.15 +Contact: Subramanian Mohan +Description: + Enable or disable PPS TIO generator output. diff --git a/Documentation/Makefile b/Documentation/Makefile index 52c6c5a3efa99d43a496cc09e4430e1e023f8c87..63094646df2890a788542a273e4a828a844b2932 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -12,7 +12,7 @@ endif # Check for broken ABI files ifeq ($(CONFIG_WARN_ABI_ERRORS),y) -$(shell $(srctree)/scripts/get_abi.pl validate --dir $(srctree)/Documentation/ABI) +$(shell $(srctree)/scripts/get_abi.py --dir $(srctree)/Documentation/ABI validate) endif # You can set these variables from the command line. diff --git a/Documentation/PCI/endpoint/pci-endpoint.rst b/Documentation/PCI/endpoint/pci-endpoint.rst index 35f82f2d45f5ef155b657e337e1eef51b85e68ac..599763aa01ca9d017b59c2c669be92a850e171c4 100644 --- a/Documentation/PCI/endpoint/pci-endpoint.rst +++ b/Documentation/PCI/endpoint/pci-endpoint.rst @@ -57,11 +57,10 @@ by the PCI controller driver. The PCI controller driver can then create a new EPC device by invoking devm_pci_epc_create()/pci_epc_create(). -* devm_pci_epc_destroy()/pci_epc_destroy() +* pci_epc_destroy() - The PCI controller driver can destroy the EPC device created by either - devm_pci_epc_create() or pci_epc_create() using devm_pci_epc_destroy() or - pci_epc_destroy(). + The PCI controller driver can destroy the EPC device created by + pci_epc_create() using pci_epc_destroy(). * pci_epc_linkup() diff --git a/Documentation/RCU/rcubarrier.rst b/Documentation/RCU/rcubarrier.rst index 6da7f66da2a808005c3f6173d9a0b21d24bd5488..12a7b059654f790313a3e9248b09380ff97f0c9b 100644 --- a/Documentation/RCU/rcubarrier.rst +++ b/Documentation/RCU/rcubarrier.rst @@ -329,10 +329,7 @@ Answer: was first added back in 2005. This is because on_each_cpu() disables preemption, which acted as an RCU read-side critical section, thus preventing CPU 0's grace period from completing - until on_each_cpu() had dealt with all of the CPUs. However, - with the advent of preemptible RCU, rcu_barrier() no longer - waited on nonpreemptible regions of code in preemptible kernels, - that being the job of the new rcu_barrier_sched() function. + until on_each_cpu() had dealt with all of the CPUs. However, with the RCU flavor consolidation around v4.20, this possibility was once again ruled out, because the consolidated diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst index 30080ff6f4062d9c11affa57cb56cc1e9afd0285..d1ccd6039a8c3a6b7a90cf6e8f4b61a0a5f43d17 100644 --- a/Documentation/RCU/stallwarn.rst +++ b/Documentation/RCU/stallwarn.rst @@ -96,6 +96,13 @@ warnings: the ``rcu_.*timer wakeup didn't happen for`` console-log message, which will include additional debugging information. +- A timer issue causes time to appear to jump forward, so that RCU + believes that the RCU CPU stall-warning timeout has been exceeded + when in fact much less time has passed. This could be due to + timer hardware bugs, timer driver bugs, or even corruption of + the "jiffies" global variable. These sorts of timer hardware + and driver bugs are not uncommon when testing new hardware. + - A low-level kernel issue that either fails to invoke one of the variants of rcu_eqs_enter(true), rcu_eqs_exit(true), ct_idle_enter(), ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst index 1ef5784c1b84e272a7f8eaa90137b04a1f73b2fb..53faeed7c1905eaa722a984f123e7d14fed3612f 100644 --- a/Documentation/RCU/whatisRCU.rst +++ b/Documentation/RCU/whatisRCU.rst @@ -971,6 +971,16 @@ unfortunately any spinlock in a ``SLAB_TYPESAFE_BY_RCU`` object must be initialized after each and every call to kmem_cache_alloc(), which renders reference-free spinlock acquisition completely unsafe. Therefore, when using ``SLAB_TYPESAFE_BY_RCU``, make proper use of a reference counter. +If using refcount_t, the specialized refcount_{add|inc}_not_zero_acquire() +and refcount_set_release() APIs should be used to ensure correct operation +ordering when verifying object identity and when initializing newly +allocated objects. Acquire fence in refcount_{add|inc}_not_zero_acquire() +ensures that identity checks happen *after* reference count is taken. +refcount_set_release() should be called after a newly allocated object is +fully initialized and release fence ensures that new values are visible +*before* refcount can be successfully taken by other users. Once +refcount_set_release() is called, the object should be considered visible +by other tasks. (Those willing to initialize their locks in a kmem_cache constructor may also use locking, including cache-friendly sequence locking.) diff --git a/Documentation/admin-guide/LSM/index.rst b/Documentation/admin-guide/LSM/index.rst index ce63be6d64ad6306f6bb157958deb47c5846ffaa..b44ef68f6e4da64fed265c7f4d9a3548cf4ef6e4 100644 --- a/Documentation/admin-guide/LSM/index.rst +++ b/Documentation/admin-guide/LSM/index.rst @@ -48,3 +48,4 @@ subdirectories. Yama SafeSetID ipe + landlock diff --git a/Documentation/admin-guide/LSM/landlock.rst b/Documentation/admin-guide/LSM/landlock.rst new file mode 100644 index 0000000000000000000000000000000000000000..9e61607def087f85c71eff6dd0d321d7516a5d03 --- /dev/null +++ b/Documentation/admin-guide/LSM/landlock.rst @@ -0,0 +1,158 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright © 2025 Microsoft Corporation + +================================ +Landlock: system-wide management +================================ + +:Author: Mickaël Salaün +:Date: March 2025 + +Landlock can leverage the audit framework to log events. + +User space documentation can be found here: +Documentation/userspace-api/landlock.rst. + +Audit +===== + +Denied access requests are logged by default for a sandboxed program if `audit` +is enabled. This default behavior can be changed with the +sys_landlock_restrict_self() flags (cf. +Documentation/userspace-api/landlock.rst). Landlock logs can also be masked +thanks to audit rules. Landlock can generate 2 audit record types. + +Record types +------------ + +AUDIT_LANDLOCK_ACCESS + This record type identifies a denied access request to a kernel resource. + The ``domain`` field indicates the ID of the domain that blocked the + request. The ``blockers`` field indicates the cause(s) of this denial + (separated by a comma), and the following fields identify the kernel object + (similar to SELinux). There may be more than one of this record type per + audit event. + + Example with a file link request generating two records in the same event:: + + domain=195ba459b blockers=fs.refer path="/usr/bin" dev="vda2" ino=351 + domain=195ba459b blockers=fs.make_reg,fs.refer path="/usr/local" dev="vda2" ino=365 + +AUDIT_LANDLOCK_DOMAIN + This record type describes the status of a Landlock domain. The ``status`` + field can be either ``allocated`` or ``deallocated``. + + The ``allocated`` status is part of the same audit event and follows + the first logged ``AUDIT_LANDLOCK_ACCESS`` record of a domain. It identifies + Landlock domain information at the time of the sys_landlock_restrict_self() + call with the following fields: + + - the ``domain`` ID + - the enforcement ``mode`` + - the domain creator's ``pid`` + - the domain creator's ``uid`` + - the domain creator's executable path (``exe``) + - the domain creator's command line (``comm``) + + Example:: + + domain=195ba459b status=allocated mode=enforcing pid=300 uid=0 exe="/root/sandboxer" comm="sandboxer" + + The ``deallocated`` status is an event on its own and it identifies a + Landlock domain release. After such event, it is guarantee that the + related domain ID will never be reused during the lifetime of the system. + The ``domain`` field indicates the ID of the domain which is released, and + the ``denials`` field indicates the total number of denied access request, + which might not have been logged according to the audit rules and + sys_landlock_restrict_self()'s flags. + + Example:: + + domain=195ba459b status=deallocated denials=3 + + +Event samples +-------------- + +Here are two examples of log events (see serial numbers). + +In this example a sandboxed program (``kill``) tries to send a signal to the +init process, which is denied because of the signal scoping restriction +(``LL_SCOPED=s``):: + + $ LL_FS_RO=/ LL_FS_RW=/ LL_SCOPED=s LL_FORCE_LOG=1 ./sandboxer kill 1 + +This command generates two events, each identified with a unique serial +number following a timestamp (``msg=audit(1729738800.268:30)``). The first +event (serial ``30``) contains 4 records. The first record +(``type=LANDLOCK_ACCESS``) shows an access denied by the domain `1a6fdc66f`. +The cause of this denial is signal scopping restriction +(``blockers=scope.signal``). The process that would have receive this signal +is the init process (``opid=1 ocomm="systemd"``). + +The second record (``type=LANDLOCK_DOMAIN``) describes (``status=allocated``) +domain `1a6fdc66f`. This domain was created by process ``286`` executing the +``/root/sandboxer`` program launched by the root user. + +The third record (``type=SYSCALL``) describes the syscall, its provided +arguments, its result (``success=no exit=-1``), and the process that called it. + +The fourth record (``type=PROCTITLE``) shows the command's name as an +hexadecimal value. This can be translated with ``python -c +'print(bytes.fromhex("6B696C6C0031"))'``. + +Finally, the last record (``type=LANDLOCK_DOMAIN``) is also the only one from +the second event (serial ``31``). It is not tied to a direct user space action +but an asynchronous one to free resources tied to a Landlock domain +(``status=deallocated``). This can be useful to know that the following logs +will not concern the domain ``1a6fdc66f`` anymore. This record also summarize +the number of requests this domain denied (``denials=1``), whether they were +logged or not. + +.. code-block:: + + type=LANDLOCK_ACCESS msg=audit(1729738800.268:30): domain=1a6fdc66f blockers=scope.signal opid=1 ocomm="systemd" + type=LANDLOCK_DOMAIN msg=audit(1729738800.268:30): domain=1a6fdc66f status=allocated mode=enforcing pid=286 uid=0 exe="/root/sandboxer" comm="sandboxer" + type=SYSCALL msg=audit(1729738800.268:30): arch=c000003e syscall=62 success=no exit=-1 [..] ppid=272 pid=286 auid=0 uid=0 gid=0 [...] comm="kill" [...] + type=PROCTITLE msg=audit(1729738800.268:30): proctitle=6B696C6C0031 + type=LANDLOCK_DOMAIN msg=audit(1729738800.324:31): domain=1a6fdc66f status=deallocated denials=1 + +Here is another example showcasing filesystem access control:: + + $ LL_FS_RO=/ LL_FS_RW=/tmp LL_FORCE_LOG=1 ./sandboxer sh -c "echo > /etc/passwd" + +The related audit logs contains 8 records from 3 different events (serials 33, +34 and 35) created by the same domain `1a6fdc679`:: + + type=LANDLOCK_ACCESS msg=audit(1729738800.221:33): domain=1a6fdc679 blockers=fs.write_file path="/dev/tty" dev="devtmpfs" ino=9 + type=LANDLOCK_DOMAIN msg=audit(1729738800.221:33): domain=1a6fdc679 status=allocated mode=enforcing pid=289 uid=0 exe="/root/sandboxer" comm="sandboxer" + type=SYSCALL msg=audit(1729738800.221:33): arch=c000003e syscall=257 success=no exit=-13 [...] ppid=272 pid=289 auid=0 uid=0 gid=0 [...] comm="sh" [...] + type=PROCTITLE msg=audit(1729738800.221:33): proctitle=7368002D63006563686F203E202F6574632F706173737764 + type=LANDLOCK_ACCESS msg=audit(1729738800.221:34): domain=1a6fdc679 blockers=fs.write_file path="/etc/passwd" dev="vda2" ino=143821 + type=SYSCALL msg=audit(1729738800.221:34): arch=c000003e syscall=257 success=no exit=-13 [...] ppid=272 pid=289 auid=0 uid=0 gid=0 [...] comm="sh" [...] + type=PROCTITLE msg=audit(1729738800.221:34): proctitle=7368002D63006563686F203E202F6574632F706173737764 + type=LANDLOCK_DOMAIN msg=audit(1729738800.261:35): domain=1a6fdc679 status=deallocated denials=2 + + +Event filtering +--------------- + +If you get spammed with audit logs related to Landlock, this is either an +attack attempt or a bug in the security policy. We can put in place some +filters to limit noise with two complementary ways: + +- with sys_landlock_restrict_self()'s flags if we can fix the sandboxed + programs, +- or with audit rules (see :manpage:`auditctl(8)`). + +Additional documentation +======================== + +* `Linux Audit Documentation`_ +* Documentation/userspace-api/landlock.rst +* Documentation/security/landlock.rst +* https://landlock.io + +.. Links +.. _Linux Audit Documentation: + https://github.com/linux-audit/audit-documentation/wiki diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index b557cf1c820d281add0f12ae896db9b59f598f4e..70b02f30013a8891c62e99f504be709b43a3d8be 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -165,7 +165,7 @@ Configuring the kernel "make xconfig" Qt based configuration tool. - "make gconfig" GTK+ based configuration tool. + "make gconfig" GTK based configuration tool. "make oldconfig" Default all questions based on the contents of your existing ./.config file and asking about diff --git a/Documentation/admin-guide/abi-obsolete-files.rst b/Documentation/admin-guide/abi-obsolete-files.rst new file mode 100644 index 0000000000000000000000000000000000000000..3061a916b4b56a911a66017a024aec0c17201cbe --- /dev/null +++ b/Documentation/admin-guide/abi-obsolete-files.rst @@ -0,0 +1,7 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Obsolete ABI Files +================== + +.. kernel-abi:: obsolete + :no-symbols: diff --git a/Documentation/admin-guide/abi-obsolete.rst b/Documentation/admin-guide/abi-obsolete.rst index 594e697aa1b2f478bd3ebdff3c1804aa3dfc51fd..640f3903e84703c62153a0053e4b4ecbac48e48c 100644 --- a/Documentation/admin-guide/abi-obsolete.rst +++ b/Documentation/admin-guide/abi-obsolete.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ABI obsolete symbols ==================== @@ -7,5 +9,5 @@ marked to be removed at some later point in time. The description of the interface will document the reason why it is obsolete and when it can be expected to be removed. -.. kernel-abi:: ABI/obsolete - :rst: +.. kernel-abi:: obsolete + :no-files: diff --git a/Documentation/admin-guide/abi-removed-files.rst b/Documentation/admin-guide/abi-removed-files.rst new file mode 100644 index 0000000000000000000000000000000000000000..f1bdfadd2ec45273ebbc4e0b2d795dbd8d587de3 --- /dev/null +++ b/Documentation/admin-guide/abi-removed-files.rst @@ -0,0 +1,7 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Removed ABI Files +================= + +.. kernel-abi:: removed + :no-symbols: diff --git a/Documentation/admin-guide/abi-removed.rst b/Documentation/admin-guide/abi-removed.rst index f9e000c81828e5b4f83847b1efb662571131f9f7..88832d3eacd6edcfb0293e381ee442c6fe6f03c4 100644 --- a/Documentation/admin-guide/abi-removed.rst +++ b/Documentation/admin-guide/abi-removed.rst @@ -1,5 +1,7 @@ +.. SPDX-License-Identifier: GPL-2.0 + ABI removed symbols =================== -.. kernel-abi:: ABI/removed - :rst: +.. kernel-abi:: removed + :no-files: diff --git a/Documentation/admin-guide/abi-stable-files.rst b/Documentation/admin-guide/abi-stable-files.rst new file mode 100644 index 0000000000000000000000000000000000000000..f867738fc178a2886653362d0dfecdcccddd989b --- /dev/null +++ b/Documentation/admin-guide/abi-stable-files.rst @@ -0,0 +1,7 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Stable ABI Files +================ + +.. kernel-abi:: stable + :no-symbols: diff --git a/Documentation/admin-guide/abi-stable.rst b/Documentation/admin-guide/abi-stable.rst index fc3361d847b123a6b56a2ec2dbced51e3425a093..528c68401f4b974c79fc1d71e4bca16c12722c0a 100644 --- a/Documentation/admin-guide/abi-stable.rst +++ b/Documentation/admin-guide/abi-stable.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ABI stable symbols ================== @@ -10,5 +12,5 @@ for at least 2 years. Most interfaces (like syscalls) are expected to never change and always be available. -.. kernel-abi:: ABI/stable - :rst: +.. kernel-abi:: stable + :no-files: diff --git a/Documentation/admin-guide/abi-testing-files.rst b/Documentation/admin-guide/abi-testing-files.rst new file mode 100644 index 0000000000000000000000000000000000000000..1da868e42fdb8c68139afe4694c11152d4cec5ec --- /dev/null +++ b/Documentation/admin-guide/abi-testing-files.rst @@ -0,0 +1,7 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Testing ABI Files +================= + +.. kernel-abi:: testing + :no-symbols: diff --git a/Documentation/admin-guide/abi-testing.rst b/Documentation/admin-guide/abi-testing.rst index 19767926b344078f382ee01a9725fab0e91b3e8d..6153ebd38e2d0b34c4ca977f795f49633d2796cf 100644 --- a/Documentation/admin-guide/abi-testing.rst +++ b/Documentation/admin-guide/abi-testing.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ABI testing symbols =================== @@ -16,5 +18,5 @@ Programs that use these interfaces are strongly encouraged to add their name to the description of these interfaces, so that the kernel developers can easily notify them if any changes occur. -.. kernel-abi:: ABI/testing - :rst: +.. kernel-abi:: testing + :no-files: diff --git a/Documentation/admin-guide/abi.rst b/Documentation/admin-guide/abi.rst index bcab3ef2597ca09e02de3afb075792efc718fb52..c6039359e5853eeedcba82f01f2c9a1f5617a8ab 100644 --- a/Documentation/admin-guide/abi.rst +++ b/Documentation/admin-guide/abi.rst @@ -1,7 +1,14 @@ +.. SPDX-License-Identifier: GPL-2.0 + ===================== Linux ABI description ===================== +.. kernel-abi:: README + +ABI symbols +----------- + .. toctree:: :maxdepth: 2 @@ -9,3 +16,14 @@ Linux ABI description abi-testing abi-obsolete abi-removed + +ABI files +--------- + +.. toctree:: + :maxdepth: 2 + + abi-stable-files + abi-testing-files + abi-obsolete-files + abi-removed-files diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 1576fb93f06c4a21dba0230b18a3a51736909798..9bdb30901a93014f4110b50e6c090a996952db39 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -54,7 +54,7 @@ The list of possible return codes: If you use 'echo', the returned value is set by the 'echo' utility, and, in general case, something like:: - echo 3 > /sys/block/zram0/max_comp_streams + echo foo > /sys/block/zram0/comp_algorithm if [ $? -ne 0 ]; then handle_error fi @@ -73,21 +73,7 @@ This creates 4 devices: /dev/zram{0,1,2,3} num_devices parameter is optional and tells zram how many devices should be pre-created. Default: 1. -2) Set max number of compression streams -======================================== - -Regardless of the value passed to this attribute, ZRAM will always -allocate multiple compression streams - one per online CPU - thus -allowing several concurrent compression operations. The number of -allocated compression streams goes down when some of the CPUs -become offline. There is no single-compression-stream mode anymore, -unless you are running a UP system or have only 1 CPU online. - -To find out how many streams are currently available:: - - cat /sys/block/zram0/max_comp_streams - -3) Select compression algorithm +2) Select compression algorithm =============================== Using comp_algorithm device attribute one can see available and @@ -107,7 +93,7 @@ Examples:: For the time being, the `comp_algorithm` content shows only compression algorithms that are supported by zram. -4) Set compression algorithm parameters: Optional +3) Set compression algorithm parameters: Optional ================================================= Compression algorithms may support specific parameters which can be @@ -138,7 +124,7 @@ better the compression ratio, it even can take negatives values for some algorithms), for other algorithms `level` is acceleration level (the higher the value the lower the compression ratio). -5) Set Disksize +4) Set Disksize =============== Set disk size by writing the value to sysfs node 'disksize'. @@ -158,7 +144,7 @@ There is little point creating a zram of greater than twice the size of memory since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the size of the disk when not in use so a huge zram is wasteful. -6) Set memory limit: Optional +5) Set memory limit: Optional ============================= Set memory limit by writing the value to sysfs node 'mem_limit'. @@ -177,7 +163,7 @@ Examples:: # To disable memory limit echo 0 > /sys/block/zram0/mem_limit -7) Activate +6) Activate =========== :: @@ -188,7 +174,7 @@ Examples:: mkfs.ext4 /dev/zram1 mount /dev/zram1 /tmp -8) Add/remove zram devices +7) Add/remove zram devices ========================== zram provides a control interface, which enables dynamic (on-demand) device @@ -208,7 +194,7 @@ execute:: echo X > /sys/class/zram-control/hot_remove -9) Stats +8) Stats ======== Per-device statistics are exported as various nodes under /sys/block/zram/ @@ -228,8 +214,6 @@ mem_limit WO specifies the maximum amount of memory ZRAM can writeback_limit WO specifies the maximum amount of write IO zram can write out to backing device as 4KB unit writeback_limit_enable RW show and set writeback_limit feature -max_comp_streams RW the number of possible concurrent compress - operations comp_algorithm RW show and change the compression algorithm algorithm_params WO setup compression algorithm parameters compact WO trigger memory compaction @@ -310,7 +294,7 @@ a single line of text and contains the following stats separated by whitespace: Unit: 4K bytes ============== ============================================================= -10) Deactivate +9) Deactivate ============== :: @@ -318,7 +302,7 @@ a single line of text and contains the following stats separated by whitespace: swapoff /dev/zram0 umount /dev/zram1 -11) Reset +10) Reset ========= Write any positive value to 'reset' sysfs node:: diff --git a/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst index 582d3427de3f819558164cca07438c879a4aa555..a964aff373b19d2df2dc2ab78690a82331afb7ce 100644 --- a/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst +++ b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst @@ -125,3 +125,7 @@ to unfreeze all tasks in the container:: This is the basic mechanism which should do the right thing for user space task in a simple scenario. + +This freezer implementation is affected by shortcomings (see commit +76f969e8948d8 ("cgroup: cgroup v2 freezer")) and cgroup v2 freezer is +recommended. diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 286d16fc22ebb70becddb11ed692951b7ccda3cf..d6b1db8cc7ebb384e20551f499f54d15b47f7fab 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -90,6 +90,7 @@ Brief summary of control files. used. memory.swappiness set/show swappiness parameter of vmscan (See sysctl's vm.swappiness) + Per memcg knob does not exist in cgroup v2. memory.move_charge_at_immigrate This knob is deprecated. memory.oom_control set/show oom controls. This knob is deprecated and shouldn't be @@ -609,6 +610,10 @@ memory.stat file includes following statistics: 'rss + mapped_file" will give you resident set size of cgroup. + Note that some kernel configurations might account complete larger + allocations (e.g., THP) towards 'rss' and 'mapped_file', even if + only some, but not all that memory is mapped. + (Note: file and shmem may be shared among other cgroups. In that case, mapped_file is accounted only when the memory cgroup is owner of page cache.) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index cb1b4e759b7e267c24d7f4f69564c16fb29c4d89..1a16ce68a4d7f6f8c9070be89c4975dbfa79077e 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1076,15 +1076,20 @@ cpufreq governor about the minimum desired frequency which should always be provided by a CPU, as well as the maximum desired frequency, which should not be exceeded by a CPU. -WARNING: cgroup2 doesn't yet support control of realtime processes. For -a kernel built with the CONFIG_RT_GROUP_SCHED option enabled for group -scheduling of realtime processes, the cpu controller can only be enabled -when all RT processes are in the root cgroup. This limitation does -not apply if CONFIG_RT_GROUP_SCHED is disabled. Be aware that system -management software may already have placed RT processes into nonroot -cgroups during the system boot process, and these processes may need -to be moved to the root cgroup before the cpu controller can be enabled -with a CONFIG_RT_GROUP_SCHED enabled kernel. +WARNING: cgroup2 cpu controller doesn't yet fully support the control of +realtime processes. For a kernel built with the CONFIG_RT_GROUP_SCHED option +enabled for group scheduling of realtime processes, the cpu controller can only +be enabled when all RT processes are in the root cgroup. Be aware that system +management software may already have placed RT processes into non-root cgroups +during the system boot process, and these processes may need to be moved to the +root cgroup before the cpu controller can be enabled with a +CONFIG_RT_GROUP_SCHED enabled kernel. + +With CONFIG_RT_GROUP_SCHED disabled, this limitation does not apply and some of +the interface files either affect realtime processes or account for them. See +the following section for details. Only the cpu controller is affected by +CONFIG_RT_GROUP_SCHED. Other controllers can be used for the resource control of +realtime processes irrespective of CONFIG_RT_GROUP_SCHED. CPU Interface Files @@ -1440,7 +1445,10 @@ The following nested keys are defined. anon Amount of memory used in anonymous mappings such as - brk(), sbrk(), and mmap(MAP_ANONYMOUS) + brk(), sbrk(), and mmap(MAP_ANONYMOUS). Note that + some kernel configurations might account complete larger + allocations (e.g., THP) if only some, but not all the + memory of such an allocation is mapped anymore. file Amount of memory used to cache filesystem data, @@ -1483,7 +1491,10 @@ The following nested keys are defined. Amount of application memory swapped out to zswap. file_mapped - Amount of cached filesystem data mapped with mmap() + Amount of cached filesystem data mapped with mmap(). Note + that some kernel configurations might account complete + larger allocations (e.g., THP) if only some, but not + not all the memory of such an allocation is mapped. file_dirty Amount of cached filesystem data that was modified but @@ -1555,6 +1566,12 @@ The following nested keys are defined. workingset_nodereclaim Number of times a shadow node has been reclaimed + pswpin (npn) + Number of pages swapped into memory + + pswpout (npn) + Number of pages swapped out of memory + pgscan (npn) Amount of scanned pages (in an inactive LRU list) @@ -1570,6 +1587,9 @@ The following nested keys are defined. pgscan_khugepaged (npn) Amount of scanned pages by khugepaged (in an inactive LRU list) + pgscan_proactive (npn) + Amount of scanned pages proactively (in an inactive LRU list) + pgsteal_kswapd (npn) Amount of reclaimed pages by kswapd @@ -1579,6 +1599,9 @@ The following nested keys are defined. pgsteal_khugepaged (npn) Amount of reclaimed pages by khugepaged + pgsteal_proactive (npn) + Amount of reclaimed pages proactively + pgfault (npn) Total number of page faults incurred @@ -1656,6 +1679,9 @@ The following nested keys are defined. pgdemote_khugepaged Number of pages demoted by khugepaged. + pgdemote_proactive + Number of pages demoted by proactively. + hugetlb Amount of memory used by hugetlb pages. This metric only shows up if hugetlb usage is accounted for in memory.current (i.e. diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index 9f8139ff97d656367700598202b668494a582d55..4467f6d4b6329e5f1c8a3bffcc6e6bcb8cd2a6e9 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -146,6 +146,11 @@ integrity:: integrity for the encrypted device. The additional space is then used for storing authentication tag (and persistent IV if needed). +integrity_key_size: + Optionally set the integrity key size if it differs from the digest size. + It allows the use of wrapped key algorithms where the key size is + independent of the cryptographic key size. + sector_size: Use as the encryption unit instead of 512 bytes sectors. This option can be in range 512 - 4096 bytes and must be power of two. diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst index d8a5f14d0e3c41a78d77678519bd2513b04dc009..c2e18ecc065c921104ec9625ab7f35e63c59d706 100644 --- a/Documentation/admin-guide/device-mapper/dm-integrity.rst +++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst @@ -92,6 +92,11 @@ Target arguments: allowed. This mode is useful for data recovery if the device cannot be activated in any of the other standard modes. + I - inline mode - in this mode, dm-integrity will store integrity + data directly in the underlying device sectors. + The underlying device must have an integrity profile that + allows storing user integrity data and provides enough + space for the selected integrity tag. 5. the number of additional arguments diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst index a65c1602cb2391277310fcf636c105b0616fccd3..8c3f1f967a3cdf0883ef95cf5f60ccab2977e7c9 100644 --- a/Documentation/admin-guide/device-mapper/verity.rst +++ b/Documentation/admin-guide/device-mapper/verity.rst @@ -87,6 +87,15 @@ panic_on_corruption Panic the device when a corrupted block is discovered. This option is not compatible with ignore_corruption and restart_on_corruption. +restart_on_error + Restart the system when an I/O error is detected. + This option can be combined with the restart_on_corruption option. + +panic_on_error + Panic the device when an I/O error is detected. This option is + not compatible with the restart_on_error option but can be combined + with the panic_on_corruption option. + ignore_zero_blocks Do not verify blocks that are expected to contain zeroes and always return zeroes instead. This may be useful if the partition contains unused blocks @@ -142,8 +151,15 @@ root_hash_sig_key_desc already in the secondary trusted keyring. try_verify_in_tasklet - If verity hashes are in cache, verify data blocks in kernel tasklet instead - of workqueue. This option can reduce IO latency. + If verity hashes are in cache and the IO size does not exceed the limit, + verify data blocks in bottom half instead of workqueue. This option can + reduce IO latency. The size limits can be configured via + /sys/module/dm_verity/parameters/use_bh_bytes. The four parameters + correspond to limits for IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE and IOPRIO_CLASS_IDLE in turn. + For example: + ,,, + 4096,4096,4096,4096 Theory of operation =================== diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst index 2418b0c2d3dfccabdb91592106e6f978b6f77058..b857eb6ca1b6206943f1cd1cf580752751b1e70e 100644 --- a/Documentation/admin-guide/ext4.rst +++ b/Documentation/admin-guide/ext4.rst @@ -238,11 +238,10 @@ When mounting an ext4 filesystem, the following option are accepted: configured using tune2fs) data_err=ignore(*) - Just print an error message if an error occurs in a file data buffer in - ordered mode. + Just print an error message if an error occurs in a file data buffer. + data_err=abort - Abort the journal if an error occurs in a file data buffer in ordered - mode. + Abort the journal if an error occurs in a file data buffer. grpid | bsdgroups New objects have the group ID of their parent. diff --git a/Documentation/admin-guide/gpio/gpio-sim.rst b/Documentation/admin-guide/gpio/gpio-sim.rst index 1cc5567a4bbe7cb290ebc74aecfb56058734831f..35d49ccd49e0a3c3c071b91f9acf1408f650a308 100644 --- a/Documentation/admin-guide/gpio/gpio-sim.rst +++ b/Documentation/admin-guide/gpio/gpio-sim.rst @@ -71,7 +71,7 @@ specific lines. The name of those subdirectories must take the form of: ``'line'`` (e.g. ``'line0'``, ``'line20'``, etc.) as the name will be used by the module to assign the config to the specific line at given offset. -Once the confiuration is complete, the ``'live'`` attribute must be set to 1 in +Once the configuration is complete, the ``'live'`` attribute must be set to 1 in order to instantiate the chip. It can be set back to 0 to destroy the simulated chip. The module will synchronously wait for the new simulated device to be successfully probed and if this doesn't happen, writing to ``'live'`` will diff --git a/Documentation/admin-guide/gpio/gpio-virtuser.rst b/Documentation/admin-guide/gpio/gpio-virtuser.rst index 2aca70db9f3b575ac27fca6aefa034fc6916f8e2..7e7c0df51640282145ba1d54746cba0696178dd5 100644 --- a/Documentation/admin-guide/gpio/gpio-virtuser.rst +++ b/Documentation/admin-guide/gpio/gpio-virtuser.rst @@ -92,7 +92,7 @@ struct. The first two take string values as arguments: Activating GPIO consumers ------------------------- -Once the confiuration is complete, the ``'live'`` attribute must be set to 1 in +Once the configuration is complete, the ``'live'`` attribute must be set to 1 in order to instantiate the consumer. It can be set back to 0 to destroy the virtual device. The module will synchronously wait for the new simulated device to be successfully probed and if this doesn't happen, writing to ``'live'`` will diff --git a/Documentation/admin-guide/highuid.rst b/Documentation/admin-guide/highuid.rst deleted file mode 100644 index 6ee70465c0eadbe9fcfe26384ed002febf91acd4..0000000000000000000000000000000000000000 --- a/Documentation/admin-guide/highuid.rst +++ /dev/null @@ -1,80 +0,0 @@ -=================================================== -Notes on the change from 16-bit UIDs to 32-bit UIDs -=================================================== - -:Author: Chris Wing -:Last updated: January 11, 2000 - -- kernel code MUST take into account __kernel_uid_t and __kernel_uid32_t - when communicating between user and kernel space in an ioctl or data - structure. - -- kernel code should use uid_t and gid_t in kernel-private structures and - code. - -What's left to be done for 32-bit UIDs on all Linux architectures: - -- Disk quotas have an interesting limitation that is not related to the - maximum UID/GID. They are limited by the maximum file size on the - underlying filesystem, because quota records are written at offsets - corresponding to the UID in question. - Further investigation is needed to see if the quota system can cope - properly with huge UIDs. If it can deal with 64-bit file offsets on all - architectures, this should not be a problem. - -- Decide whether or not to keep backwards compatibility with the system - accounting file, or if we should break it as the comments suggest - (currently, the old 16-bit UID and GID are still written to disk, and - part of the former pad space is used to store separate 32-bit UID and - GID) - -- Need to validate that OS emulation calls the 16-bit UID - compatibility syscalls, if the OS being emulated used 16-bit UIDs, or - uses the 32-bit UID system calls properly otherwise. - - This affects at least: - - - iBCS on Intel - - - sparc32 emulation on sparc64 - (need to support whatever new 32-bit UID system calls are added to - sparc32) - -- Validate that all filesystems behave properly. - - At present, 32-bit UIDs _should_ work for: - - - ext2 - - ufs - - isofs - - nfs - - coda - - udf - - Ioctl() fixups have been made for: - - - ncpfs - - smbfs - - Filesystems with simple fixups to prevent 16-bit UID wraparound: - - - minix - - sysv - - qnx4 - - Other filesystems have not been checked yet. - -- The ncpfs and smpfs filesystems cannot presently use 32-bit UIDs in - all ioctl()s. Some new ioctl()s have been added with 32-bit UIDs, but - more are needed. (as well as new user<->kernel data structures) - -- The ELF core dump format only supports 16-bit UIDs on arm, i386, m68k, - sh, and sparc32. Fixing this is probably not that important, but would - require adding a new ELF section. - -- The ioctl()s used to control the in-kernel NFS server only support - 16-bit UIDs on arm, i386, m68k, sh, and sparc32. - -- make sure that the UID mapping feature of AX25 networking works properly - (it should be safe because it's always used a 32-bit integer to - communicate between user and kernel) diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index ff0b440ef2dc90be293208c597f8729039ce8540..451874b8135d8ef048c7c8c38898f71ed81f05e3 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -22,3 +22,4 @@ are configurable at compile, boot or run time. srso gather_data_sampling reg-file-data-sampling + rsb diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst index 0585d02b9a6cbc46a1c61d6d113e2df56f889527..ad15417d39f9917daf4f494cd347b46b3b19b966 100644 --- a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst +++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst @@ -29,14 +29,6 @@ Below is the list of affected Intel processors [#f1]_: RAPTORLAKE_S 06_BFH =================== ============ -As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and -RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as -vulnerable in Linux because they share the same family/model with an affected -part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or -CPUID.HYBRID. This information could be used to distinguish between the -affected and unaffected parts, but it is deemed not worth adding complexity as -the reporting is fixed automatically when these parts enumerate RFDS_NO. - Mitigation ========== Intel released a microcode update that enables software to clear sensitive diff --git a/Documentation/admin-guide/hw-vuln/rsb.rst b/Documentation/admin-guide/hw-vuln/rsb.rst new file mode 100644 index 0000000000000000000000000000000000000000..21dbf9cf25f8bd9d16d4f1c6157c9212520d78fe --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/rsb.rst @@ -0,0 +1,268 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +RSB-related mitigations +======================= + +.. warning:: + Please keep this document up-to-date, otherwise you will be + volunteered to update it and convert it to a very long comment in + bugs.c! + +Since 2018 there have been many Spectre CVEs related to the Return Stack +Buffer (RSB) (sometimes referred to as the Return Address Stack (RAS) or +Return Address Predictor (RAP) on AMD). + +Information about these CVEs and how to mitigate them is scattered +amongst a myriad of microarchitecture-specific documents. + +This document attempts to consolidate all the relevant information in +once place and clarify the reasoning behind the current RSB-related +mitigations. It's meant to be as concise as possible, focused only on +the current kernel mitigations: what are the RSB-related attack vectors +and how are they currently being mitigated? + +It's *not* meant to describe how the RSB mechanism operates or how the +exploits work. More details about those can be found in the references +below. + +Rather, this is basically a glorified comment, but too long to actually +be one. So when the next CVE comes along, a kernel developer can +quickly refer to this as a refresher to see what we're actually doing +and why. + +At a high level, there are two classes of RSB attacks: RSB poisoning +(Intel and AMD) and RSB underflow (Intel only). They must each be +considered individually for each attack vector (and microarchitecture +where applicable). + +---- + +RSB poisoning (Intel and AMD) +============================= + +SpectreRSB +~~~~~~~~~~ + +RSB poisoning is a technique used by SpectreRSB [#spectre-rsb]_ where +an attacker poisons an RSB entry to cause a victim's return instruction +to speculate to an attacker-controlled address. This can happen when +there are unbalanced CALLs/RETs after a context switch or VMEXIT. + +* All attack vectors can potentially be mitigated by flushing out any + poisoned RSB entries using an RSB filling sequence + [#intel-rsb-filling]_ [#amd-rsb-filling]_ when transitioning between + untrusted and trusted domains. But this has a performance impact and + should be avoided whenever possible. + + .. DANGER:: + **FIXME**: Currently we're flushing 32 entries. However, some CPU + models have more than 32 entries. The loop count needs to be + increased for those. More detailed information is needed about RSB + sizes. + +* On context switch, the user->user mitigation requires ensuring the + RSB gets filled or cleared whenever IBPB gets written [#cond-ibpb]_ + during a context switch: + + * AMD: + On Zen 4+, IBPB (or SBPB [#amd-sbpb]_ if used) clears the RSB. + This is indicated by IBPB_RET in CPUID [#amd-ibpb-rsb]_. + + On Zen < 4, the RSB filling sequence [#amd-rsb-filling]_ must be + always be done in addition to IBPB [#amd-ibpb-no-rsb]_. This is + indicated by X86_BUG_IBPB_NO_RET. + + * Intel: + IBPB always clears the RSB: + + "Software that executed before the IBPB command cannot control + the predicted targets of indirect branches executed after the + command on the same logical processor. The term indirect branch + in this context includes near return instructions, so these + predicted targets may come from the RSB." [#intel-ibpb-rsb]_ + +* On context switch, user->kernel attacks are prevented by SMEP. User + space can only insert user space addresses into the RSB. Even + non-canonical addresses can't be inserted due to the page gap at the + end of the user canonical address space reserved by TASK_SIZE_MAX. + A SMEP #PF at instruction fetch prevents the kernel from speculatively + executing user space. + + * AMD: + "Finally, branches that are predicted as 'ret' instructions get + their predicted targets from the Return Address Predictor (RAP). + AMD recommends software use a RAP stuffing sequence (mitigation + V2-3 in [2]) and/or Supervisor Mode Execution Protection (SMEP) + to ensure that the addresses in the RAP are safe for + speculation. Collectively, we refer to these mitigations as "RAP + Protection"." [#amd-smep-rsb]_ + + * Intel: + "On processors with enhanced IBRS, an RSB overwrite sequence may + not suffice to prevent the predicted target of a near return + from using an RSB entry created in a less privileged predictor + mode. Software can prevent this by enabling SMEP (for + transitions from user mode to supervisor mode) and by having + IA32_SPEC_CTRL.IBRS set during VM exits." [#intel-smep-rsb]_ + +* On VMEXIT, guest->host attacks are mitigated by eIBRS (and PBRSB + mitigation if needed): + + * AMD: + "When Automatic IBRS is enabled, the internal return address + stack used for return address predictions is cleared on VMEXIT." + [#amd-eibrs-vmexit]_ + + * Intel: + "On processors with enhanced IBRS, an RSB overwrite sequence may + not suffice to prevent the predicted target of a near return + from using an RSB entry created in a less privileged predictor + mode. Software can prevent this by enabling SMEP (for + transitions from user mode to supervisor mode) and by having + IA32_SPEC_CTRL.IBRS set during VM exits. Processors with + enhanced IBRS still support the usage model where IBRS is set + only in the OS/VMM for OSes that enable SMEP. To do this, such + processors will ensure that guest behavior cannot control the + RSB after a VM exit once IBRS is set, even if IBRS was not set + at the time of the VM exit." [#intel-eibrs-vmexit]_ + + Note that some Intel CPUs are susceptible to Post-barrier Return + Stack Buffer Predictions (PBRSB) [#intel-pbrsb]_, where the last + CALL from the guest can be used to predict the first unbalanced RET. + In this case the PBRSB mitigation is needed in addition to eIBRS. + +AMD RETBleed / SRSO / Branch Type Confusion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +On AMD, poisoned RSB entries can also be created by the AMD RETBleed +variant [#retbleed-paper]_ [#amd-btc]_ or by Speculative Return Stack +Overflow [#amd-srso]_ (Inception [#inception-paper]_). The kernel +protects itself by replacing every RET in the kernel with a branch to a +single safe RET. + +---- + +RSB underflow (Intel only) +========================== + +RSB Alternate (RSBA) ("Intel Retbleed") +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some Intel Skylake-generation CPUs are susceptible to the Intel variant +of RETBleed [#retbleed-paper]_ (Return Stack Buffer Underflow +[#intel-rsbu]_). If a RET is executed when the RSB buffer is empty due +to mismatched CALLs/RETs or returning from a deep call stack, the branch +predictor can fall back to using the Branch Target Buffer (BTB). If a +user forces a BTB collision then the RET can speculatively branch to a +user-controlled address. + +* Note that RSB filling doesn't fully mitigate this issue. If there + are enough unbalanced RETs, the RSB may still underflow and fall back + to using a poisoned BTB entry. + +* On context switch, user->user underflow attacks are mitigated by the + conditional IBPB [#cond-ibpb]_ on context switch which effectively + clears the BTB: + + * "The indirect branch predictor barrier (IBPB) is an indirect branch + control mechanism that establishes a barrier, preventing software + that executed before the barrier from controlling the predicted + targets of indirect branches executed after the barrier on the same + logical processor." [#intel-ibpb-btb]_ + +* On context switch and VMEXIT, user->kernel and guest->host RSB + underflows are mitigated by IBRS or eIBRS: + + * "Enabling IBRS (including enhanced IBRS) will mitigate the "RSBU" + attack demonstrated by the researchers. As previously documented, + Intel recommends the use of enhanced IBRS, where supported. This + includes any processor that enumerates RRSBA but not RRSBA_DIS_S." + [#intel-rsbu]_ + + However, note that eIBRS and IBRS do not mitigate intra-mode attacks. + Like RRSBA below, this is mitigated by clearing the BHB on kernel + entry. + + As an alternative to classic IBRS, call depth tracking (combined with + retpolines) can be used to track kernel returns and fill the RSB when + it gets close to being empty. + +Restricted RSB Alternate (RRSBA) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some newer Intel CPUs have Restricted RSB Alternate (RRSBA) behavior, +which, similar to RSBA described above, also falls back to using the BTB +on RSB underflow. The only difference is that the predicted targets are +restricted to the current domain when eIBRS is enabled: + +* "Restricted RSB Alternate (RRSBA) behavior allows alternate branch + predictors to be used by near RET instructions when the RSB is + empty. When eIBRS is enabled, the predicted targets of these + alternate predictors are restricted to those belonging to the + indirect branch predictor entries of the current prediction domain. + [#intel-eibrs-rrsba]_ + +When a CPU with RRSBA is vulnerable to Branch History Injection +[#bhi-paper]_ [#intel-bhi]_, an RSB underflow could be used for an +intra-mode BTI attack. This is mitigated by clearing the BHB on +kernel entry. + +However if the kernel uses retpolines instead of eIBRS, it needs to +disable RRSBA: + +* "Where software is using retpoline as a mitigation for BHI or + intra-mode BTI, and the processor both enumerates RRSBA and + enumerates RRSBA_DIS controls, it should disable this behavior." + [#intel-retpoline-rrsba]_ + +---- + +References +========== + +.. [#spectre-rsb] `Spectre Returns! Speculation Attacks using the Return Stack Buffer `_ + +.. [#intel-rsb-filling] "Empty RSB Mitigation on Skylake-generation" in `Retpoline: A Branch Target Injection Mitigation `_ + +.. [#amd-rsb-filling] "Mitigation V2-3" in `Software Techniques for Managing Speculation `_ + +.. [#cond-ibpb] Whether IBPB is written depends on whether the prev and/or next task is protected from Spectre attacks. It typically requires opting in per task or system-wide. For more details see the documentation for the ``spectre_v2_user`` cmdline option in Documentation/admin-guide/kernel-parameters.txt. + +.. [#amd-sbpb] IBPB without flushing of branch type predictions. Only exists for AMD. + +.. [#amd-ibpb-rsb] "Function 8000_0008h -- Processor Capacity Parameters and Extended Feature Identification" in `AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions `_. SBPB behaves the same way according to `this email `_. + +.. [#amd-ibpb-no-rsb] `Spectre Attacks: Exploiting Speculative Execution `_ + +.. [#intel-ibpb-rsb] "Introduction" in `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 `_ + +.. [#amd-smep-rsb] "Existing Mitigations" in `Technical Guidance for Mitigating Branch Type Confusion `_ + +.. [#intel-smep-rsb] "Enhanced IBRS" in `Indirect Branch Restricted Speculation `_ + +.. [#amd-eibrs-vmexit] "Extended Feature Enable Register (EFER)" in `AMD64 Architecture Programmer's Manual Volume 2: System Programming `_ + +.. [#intel-eibrs-vmexit] "Enhanced IBRS" in `Indirect Branch Restricted Speculation `_ + +.. [#intel-pbrsb] `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 `_ + +.. [#retbleed-paper] `RETBleed: Arbitrary Speculative Code Execution with Return Instruction `_ + +.. [#amd-btc] `Technical Guidance for Mitigating Branch Type Confusion `_ + +.. [#amd-srso] `Technical Update Regarding Speculative Return Stack Overflow `_ + +.. [#inception-paper] `Inception: Exposing New Attack Surfaces with Training in Transient Execution `_ + +.. [#intel-rsbu] `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 `_ + +.. [#intel-ibpb-btb] `Indirect Branch Predictor Barrier' `_ + +.. [#intel-eibrs-rrsba] "Guidance for RSBU" in `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 `_ + +.. [#bhi-paper] `Branch History Injection: On the Effectiveness of Hardware Mitigations Against Cross-Privilege Spectre-v2 Attacks `_ + +.. [#intel-bhi] `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 `_ + +.. [#intel-retpoline-rrsba] "Retpoline" in `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 `_ diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst index 2ad1c05b8c88396bbf835a8fcc7fcb00291bc30c..66af95251a3d1bbb720f29da379f70098e80ba77 100644 --- a/Documentation/admin-guide/hw-vuln/srso.rst +++ b/Documentation/admin-guide/hw-vuln/srso.rst @@ -104,7 +104,20 @@ The possible values in this file are: (spec_rstack_overflow=ibpb-vmexit) + * 'Mitigation: Reduced Speculation': + This mitigation gets automatically enabled when the above one "IBPB on + VMEXIT" has been selected and the CPU supports the BpSpecReduce bit. + + It gets automatically enabled on machines which have the + SRSO_USER_KERNEL_NO=1 CPUID bit. In that case, the code logic is to switch + to the above =ibpb-vmexit mitigation because the user/kernel boundary is + not affected anymore and thus "safe RET" is not needed. + + After enabling the IBPB on VMEXIT mitigation option, the BpSpecReduce bit + is detected (functionality present on all such machines) and that + practically overrides IBPB on VMEXIT as it has a lot less performance + impact and takes care of the guest->host attack vector too. In order to exploit vulnerability, an attacker needs to: diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index c8af32a8f800051b7b854553bacd92a16a604867..259d79fbeb94460c946e015957571957f2dfc4c0 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -187,7 +187,6 @@ A few hard-to-categorize and generally obsolete documents. .. toctree:: :maxdepth: 1 - highuid ldm unicode diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst index 609a3201fd4e1ec233aa45260e94c335f01edbf7..9453196ade51b8065563d8e76989f274098f0fe8 100644 --- a/Documentation/admin-guide/iostats.rst +++ b/Documentation/admin-guide/iostats.rst @@ -2,62 +2,39 @@ I/O statistics fields ===================== -Since 2.4.20 (and some versions before, with patches), and 2.5.45, -more extensive disk statistics have been introduced to help measure disk -activity. Tools such as ``sar`` and ``iostat`` typically interpret these and do -the work for you, but in case you are interested in creating your own -tools, the fields are explained here. - -In 2.4 now, the information is found as additional fields in -``/proc/partitions``. In 2.6 and upper, the same information is found in two -places: one is in the file ``/proc/diskstats``, and the other is within -the sysfs file system, which must be mounted in order to obtain -the information. Throughout this document we'll assume that sysfs -is mounted on ``/sys``, although of course it may be mounted anywhere. -Both ``/proc/diskstats`` and sysfs use the same source for the information -and so should not differ. - -Here are examples of these different formats:: - - 2.4: - 3 0 39082680 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 - 3 1 9221278 hda1 35486 0 35496 38030 0 0 0 0 0 38030 38030 - - 2.6+ sysfs: - 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 - 35486 38030 38030 38030 - - 2.6+ diskstats: - 3 0 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 - 3 1 hda1 35486 38030 38030 38030 - - 4.18+ diskstats: - 3 0 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 0 0 0 0 - -On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have -a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``. - -The advantage of one over the other is that the sysfs choice works well -if you are watching a known, small set of disks. ``/proc/diskstats`` may -be a better choice if you are watching a large number of disks because -you'll avoid the overhead of 50, 100, or 500 or more opens/closes with -each snapshot of your disk statistics. - -In 2.4, the statistics fields are those after the device name. In -the above example, the first field of statistics would be 446216. -By contrast, in 2.6+ if you look at ``/sys/block/hda/stat``, you'll -find just the 15 fields, beginning with 446216. If you look at -``/proc/diskstats``, the 15 fields will be preceded by the major and -minor device numbers, and device name. Each of these formats provides -15 fields of statistics, each meaning exactly the same things. -All fields except field 9 are cumulative since boot. Field 9 should -go to zero as I/Os complete; all others only increase (unless they -overflow and wrap). Wrapping might eventually occur on a very busy -or long-lived system; so applications should be prepared to deal with -it. Regarding wrapping, the types of the fields are either unsigned -int (32 bit) or unsigned long (32-bit or 64-bit, depending on your -machine) as noted per-field below. Unless your observations are very -spread in time, these fields should not wrap twice before you notice it. +The kernel exposes disk statistics via ``/proc/diskstats`` and +``/sys/block//stat``. These stats are usually accessed via tools +such as ``sar`` and ``iostat``. + +Here are examples using a disk with two partitions:: + + /proc/diskstats: + 259 0 nvme0n1 255999 814 12369153 47919 996852 81 36123024 425995 0 301795 580470 0 0 0 0 60602 106555 + 259 1 nvme0n1p1 492 813 17572 96 848 81 108288 210 0 76 307 0 0 0 0 0 0 + 259 2 nvme0n1p2 255401 1 12343477 47799 996004 0 36014736 425784 0 344336 473584 0 0 0 0 0 0 + + /sys/block/nvme0n1/stat: + 255999 814 12369153 47919 996858 81 36123056 426009 0 301809 580491 0 0 0 0 60605 106562 + + /sys/block/nvme0n1/nvme0n1p1/stat: + 492 813 17572 96 848 81 108288 210 0 76 307 0 0 0 0 0 0 + +Both files contain the same 17 statistics. ``/sys/block//stat`` +contains the fields for ````. In ``/proc/diskstats`` the fields +are prefixed with the major and minor device numbers and the device +name. In the example above, the first stat value for ``nvme0n1`` is +255999 in both files. + +The sysfs ``stat`` file is efficient for monitoring a small, known set +of disks. If you're tracking a large number of devices, +``/proc/diskstats`` is often the better choice since it avoids the +overhead of opening and closing multiple files for each snapshot. + +All fields are cumulative, monotonic counters, except for field 9, which +resets to zero as I/Os complete. The remaining fields reset at boot, on +device reattachment or reinitialization, or when the underlying counter +overflows. Applications reading these counters should detect and handle +resets when comparing stat snapshots. Each set of stats only applies to the indicated device; if you want system-wide stats you'll have to find all the devices and sum them all up. diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 5376890adbeb079574d812a1a775c1db1f587853..1f7f14c6e184cc1a3429683a65865e186db1a72e 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -180,10 +180,6 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64) 1) On i386, enable high memory support under "Processor type and features":: - CONFIG_HIGHMEM64G=y - - or:: - CONFIG_HIGHMEM4G 2) With CONFIG_SMP=y, usually nr_cpus=1 need specified on the kernel diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb8752b42ec8582b8750d7e014c4d76166fa2fc1..d9fd26b95b340909101b80ab4b22a6bbd3d20ab6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -416,10 +416,6 @@ Format: { quiet (default) | verbose | debug } Change the amount of debugging information output when initialising the APIC and IO-APIC components. - For X86-32, this can also be used to specify an APIC - driver name. - Format: apic=driver_name - Examples: apic=bigsmp apic_extnmi= [APIC,X86,EARLY] External NMI delivery setting Format: { bsp (default) | all | none } @@ -1411,7 +1407,8 @@ earlyprintk=serial[,0x...[,baudrate]] earlyprintk=ttySn[,baudrate] earlyprintk=dbgp[debugController#] - earlyprintk=pciserial[,force],bus:device.function[,baudrate] + earlyprintk=mmio32,membase[,{nocfg|baudrate}] + earlyprintk=pciserial[,force],bus:device.function[,{nocfg|baudrate}] earlyprintk=xdbc[xhciController#] earlyprintk=bios @@ -1419,6 +1416,9 @@ the normal console is initialized. It is not enabled by default because it has some cosmetic problems. + Use "nocfg" to skip UART configuration, assume + BIOS/firmware has configured UART correctly. + Append ",keep" to not disable it when the real console takes over. @@ -1785,7 +1785,9 @@ allocation boundaries as a proactive defense against bounds-checking flaws in the kernel's copy_to_user()/copy_from_user() interface. - on Perform hardened usercopy checks (default). + The default is determined by + CONFIG_HARDENED_USERCOPY_DEFAULT_ON. + on Perform hardened usercopy checks. off Disable hardened usercopy checks. hardlockup_all_cpu_backtrace= @@ -1861,7 +1863,7 @@ hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET registers. Default set by CONFIG_HPET_MMAP_DEFAULT. - hugepages= [HW] Number of HugeTLB pages to allocate at boot. + hugepages= [HW,EARLY] Number of HugeTLB pages to allocate at boot. If this follows hugepagesz (below), it specifies the number of pages of hugepagesz to be allocated. If this is the first HugeTLB parameter on the command @@ -1873,15 +1875,24 @@ :[,:] hugepagesz= - [HW] The size of the HugeTLB pages. This is used in - conjunction with hugepages (above) to allocate huge - pages of a specific size at boot. The pair - hugepagesz=X hugepages=Y can be specified once for - each supported huge page size. Huge page sizes are - architecture dependent. See also + [HW,EARLY] The size of the HugeTLB pages. This is + used in conjunction with hugepages (above) to + allocate huge pages of a specific size at boot. The + pair hugepagesz=X hugepages=Y can be specified once + for each supported huge page size. Huge page sizes + are architecture dependent. See also Documentation/admin-guide/mm/hugetlbpage.rst. Format: size[KMG] + hugepage_alloc_threads= + [HW] The number of threads that should be used to + allocate hugepages during boot. This option can be + used to improve system bootup time when allocating + a large amount of huge pages. + The default value is 25% of the available hardware threads. + + Note that this parameter only applies to non-gigantic huge pages. + hugetlb_cma= [HW,CMA,EARLY] The size of a CMA area used for allocation of gigantic hugepages. Or using node format, the size of a CMA area per node can be specified. @@ -1892,6 +1903,13 @@ hugepages using the CMA allocator. If enabled, the boot-time allocation of gigantic hugepages is skipped. + hugetlb_cma_only= + [HW,CMA,EARLY] When allocating new HugeTLB pages, only + try to allocate from the CMA areas. + + This option does nothing if hugetlb_cma= is not also + specified. + hugetlb_free_vmemmap= [KNL] Requires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP enabled. @@ -1933,6 +1951,12 @@ which allow the hypervisor to 'idle' the guest on lock contention. + hw_protection= [HW] + Format: reboot | shutdown + + Hardware protection action taken on critical events like + overtemperature or imminent voltage loss. + i2c_bus= [HW] Override the default board specific I2C bus speed or register an additional I2C bus that is not registered from board initialization code. @@ -2316,6 +2340,9 @@ per_cpu_perf_limits Allow per-logical-CPU P-State performance control limits using cpufreq sysfs interface + no_cas + Do not enable capacity-aware scheduling (CAS) on + hybrid systems intremap= [X86-64,Intel-IOMMU,EARLY] on enable Interrupt Remapping (default) @@ -3116,6 +3143,8 @@ * max_sec_lba48: Set or clear transfer size limit to 65535 sectors. + * external: Mark port as external (hotplug-capable). + * [no]lpm: Enable or disable link power management. * [no]setxfer: Indicate if transfer speed mode setting @@ -4233,10 +4262,10 @@ nosmp [SMP,EARLY] Tells an SMP kernel to act as a UP kernel, and disable the IO APIC. legacy for "maxcpus=0". - nosmt [KNL,MIPS,PPC,S390,EARLY] Disable symmetric multithreading (SMT). + nosmt [KNL,MIPS,PPC,EARLY] Disable symmetric multithreading (SMT). Equivalent to smt=1. - [KNL,X86,PPC] Disable symmetric multithreading (SMT). + [KNL,X86,PPC,S390] Disable symmetric multithreading (SMT). nosmt=force: Force disable SMT, cannot be undone via the sysfs control file. @@ -5017,6 +5046,14 @@ Format: default: 0 (auto_verbose is enabled) + printk.debug_non_panic_cpus= + Allows storing messages from non-panic CPUs into + the printk log buffer during panic(). They are + flushed to consoles by the panic-CPU on + a best-effort basis. + Format: (1/Y/y=enable, 0/N/n=disable) + Default: disabled + printk.devkmsg={on,off,ratelimit} Control writing to /dev/kmsg. on - unlimited logging to /dev/kmsg from userspace @@ -5758,6 +5795,11 @@ rcutorture.test_boost_duration= [KNL] Duration (s) of each individual boost test. + rcutorture.test_boost_holdoff= [KNL] + Holdoff time (s) from start of test to the start + of RCU priority-boost testing. Defaults to zero, + that is, no holdoff. + rcutorture.test_boost_interval= [KNL] Interval (s) between each boost test. @@ -6082,7 +6124,7 @@ is assumed to be I/O ports; otherwise it is memory. reserve_mem= [RAM] - Format: nn[KNG]::