From 3c47c2ccd5a29c78780ccfd0227a805f3873ab1c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 14 Jan 2025 07:35:08 -0800 Subject: [PATCH 0001/1585] nvmet: fix rw control endian access Fixes: 3ec5c62cfcf060e ("nvmet: handle rw's limited retry flag") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501142128.WexgyMTv-lkp@intel.com/ Cc: Guixin Liu Signed-off-by: Keith Busch --- drivers/nvme/target/io-cmd-bdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 6380b60fd4900..2b09b2c69857a 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -272,7 +272,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) iter_flags = SG_MITER_FROM_SG; } - if (req->cmd->rw.control & NVME_RW_LR) + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_LR)) opf |= REQ_FAILFAST_DEV; if (is_pci_p2pdma_page(sg_page(req->sg))) -- GitLab From d68fc95a771e0a7edd876ede7913d61276be77fd Mon Sep 17 00:00:00 2001 From: Francis Pravin Date: Fri, 17 Jan 2025 05:12:09 +0530 Subject: [PATCH 0002/1585] nvme-pci: remove redundant dma frees in hmb The value of size is 0 when there is no dma buffer allocated. The value of i also remains 0. So, no need to free the dma buffer in out_free_bufs. Hence, remove the redundant dma frees. Signed-off-by: Francis Pravin Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fe0795e16e250..a14f3c74b7171 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2153,14 +2153,6 @@ static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred, return 0; out_free_bufs: - while (--i >= 0) { - size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE; - - dma_free_attrs(dev->dev, size, bufs[i], - le64_to_cpu(descs[i].addr), - DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); - } - kfree(bufs); out_free_descs: dma_free_coherent(dev->dev, descs_size, descs, descs_dma); -- GitLab From dbf2bb1a1319b7c7d8828905378a6696cca6b0f2 Mon Sep 17 00:00:00 2001 From: Georg Gottleuber Date: Mon, 16 Dec 2024 23:28:03 +0100 Subject: [PATCH 0003/1585] nvme-pci: Add TUXEDO InfinityFlex to Samsung sleep quirk On the TUXEDO InfinityFlex, a Samsung 990 Evo NVMe leads to a high power consumption in s2idle sleep (4 watts). This patch applies 'Force No Simple Suspend' quirk to achieve a sleep with a lower power consumption, typically around 1.4 watts. Signed-off-by: Georg Gottleuber Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a14f3c74b7171..60afffc917b77 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3134,7 +3134,8 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) * because of high power consumption (> 2 Watt) in s2idle * sleep. Only some boards with Intel CPU are affected. */ - if (dmi_match(DMI_BOARD_NAME, "GMxPXxx") || + if (dmi_match(DMI_BOARD_NAME, "DN50Z-140HC-YD") || + dmi_match(DMI_BOARD_NAME, "GMxPXxx") || dmi_match(DMI_BOARD_NAME, "PH4PG31") || dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1") || dmi_match(DMI_BOARD_NAME, "PH6PG01_PH6PG71")) -- GitLab From 11cb3529d18514f7d28ad2190533192aedefd761 Mon Sep 17 00:00:00 2001 From: Georg Gottleuber Date: Mon, 16 Dec 2024 23:28:04 +0100 Subject: [PATCH 0004/1585] nvme-pci: Add TUXEDO IBP Gen9 to Samsung sleep quirk On the TUXEDO InfinityBook Pro Gen9 Intel, a Samsung 990 Evo NVMe leads to a high power consumption in s2idle sleep (4 watts). This patch applies 'Force No Simple Suspend' quirk to achieve a sleep with a lower power consumption, typically around 1.2 watts. Signed-off-by: Georg Gottleuber Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 60afffc917b77..ac708169efed9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3136,6 +3136,7 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) */ if (dmi_match(DMI_BOARD_NAME, "DN50Z-140HC-YD") || dmi_match(DMI_BOARD_NAME, "GMxPXxx") || + dmi_match(DMI_BOARD_NAME, "GXxMRXx") || dmi_match(DMI_BOARD_NAME, "PH4PG31") || dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1") || dmi_match(DMI_BOARD_NAME, "PH6PG01_PH6PG71")) -- GitLab From ccb7276a6d26d6f8416e315b43b45e15ee7f29e2 Mon Sep 17 00:00:00 2001 From: Andy Strohman Date: Thu, 9 Jan 2025 02:27:56 +0000 Subject: [PATCH 0005/1585] batman-adv: fix panic during interface removal Reference counting is used to ensure that batadv_hardif_neigh_node and batadv_hard_iface are not freed before/during batadv_v_elp_throughput_metric_update work is finished. But there isn't a guarantee that the hard if will remain associated with a soft interface up until the work is finished. This fixes a crash triggered by reboot that looks like this: Call trace: batadv_v_mesh_free+0xd0/0x4dc [batman_adv] batadv_v_elp_throughput_metric_update+0x1c/0xa4 process_one_work+0x178/0x398 worker_thread+0x2e8/0x4d0 kthread+0xd8/0xdc ret_from_fork+0x10/0x20 (the batadv_v_mesh_free call is misleading, and does not actually happen) I was able to make the issue happen more reliably by changing hardif_neigh->bat_v.metric_work work to be delayed work. This allowed me to track down and confirm the fix. Cc: stable@vger.kernel.org Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput") Signed-off-by: Andy Strohman [sven@narfation.org: prevent entering batadv_v_elp_get_throughput without soft_iface] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 1d704574e6bf5..fbf499bcc6718 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -66,12 +66,19 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; + struct net_device *soft_iface = hard_iface->soft_iface; struct ethtool_link_ksettings link_settings; struct net_device *real_netdev; struct station_info sinfo; u32 throughput; int ret; + /* don't query throughput when no longer associated with any + * batman-adv interface + */ + if (!soft_iface) + return BATADV_THROUGHPUT_DEFAULT_VALUE; + /* if the user specified a customised value for this interface, then * return it directly */ @@ -141,7 +148,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) default_throughput: if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) { - batadv_info(hard_iface->soft_iface, + batadv_info(soft_iface, "WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n", hard_iface->net_dev->name, BATADV_THROUGHPUT_DEFAULT_VALUE / 10, -- GitLab From 9bcbb6104a344d3526e185ee1e7b985509914e90 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Tue, 21 Jan 2025 04:40:16 +0000 Subject: [PATCH 0006/1585] KVM: arm64: Flush hyp bss section after initialization of variables in bss To determine CPU features during initialization, the nVHE hypervisor utilizes sanitized values of the host's CPU features registers. These values, stored in u64 idaa64*_el1_sys_val variables are updated by the kvm_hyp_init_symbols() function at EL1. To ensure EL2 visibility with the MMU off, the data cache needs to be flushed after these updates. However, individually flushing each variable using kvm_flush_dcache_to_poc() is inefficient. These cpu feature variables would be part of the bss section of the hypervisor. Hence, flush the entire bss section of hypervisor once the initialization is complete. Fixes: 6c30bfb18d0b ("KVM: arm64: Add handlers for protected VM System Registers") Suggested-by: Fuad Tabba Signed-off-by: Lokesh Vutla Link: https://lore.kernel.org/r/20250121044016.2219256-1-lokeshvutla@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bcc4f7e926349..0725a0b50a3e9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2400,6 +2400,13 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1); kvm_nvhe_sym(__icache_flags) = __icache_flags; kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; + + /* + * Flush entire BSS since part of its data containing init symbols is read + * while the MMU is off. + */ + kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start), + kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start)); } static int __init kvm_hyp_init_protection(u32 hyp_va_bits) -- GitLab From 3429dd57f0deb1a602c2624a1dd7c4c11b6c4734 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 17 Jan 2025 10:58:52 +0000 Subject: [PATCH 0007/1585] sched/fair: Fix inaccurate h_nr_runnable accounting with delayed dequeue set_delayed() adjusts cfs_rq->h_nr_runnable for the hierarchy when an entity is delayed irrespective of whether the entity corresponds to a task or a cfs_rq. Consider the following scenario: root / \ A B (*) delayed since B is no longer eligible on root | | Task0 Task1 <--- dequeue_task_fair() - task blocks When Task1 blocks (dequeue_entity() for task's se returns true), dequeue_entities() will continue adjusting cfs_rq->h_nr_* for the hierarchy of Task1. However, when the sched_entity corresponding to cfs_rq B is delayed, set_delayed() will adjust the h_nr_runnable for the hierarchy too leading to both dequeue_entity() and set_delayed() decrementing h_nr_runnable for the dequeue of the same task. A SCHED_WARN_ON() to inspect h_nr_runnable post its update in dequeue_entities() like below: cfs_rq->h_nr_runnable -= h_nr_runnable; SCHED_WARN_ON(((int) cfs_rq->h_nr_runnable) < 0); is consistently tripped when running wakeup intensive workloads like hackbench in a cgroup. This error is self correcting since cfs_rq are per-cpu and cannot migrate. The entitiy is either picked for full dequeue or is requeued when a task wakes up below it. Both those paths call clear_delayed() which again increments h_nr_runnable of the hierarchy without considering if the entity corresponds to a task or not. h_nr_runnable will eventually reflect the correct value however in the interim, the incorrect values can still influence PELT calculation which uses se->runnable_weight or cfs_rq->h_nr_runnable. Since only delayed tasks take the early return path in dequeue_entities() and enqueue_task_fair(), adjust the h_nr_runnable in {set,clear}_delayed() only when a task is delayed as this path skips the h_nr_* update loops and returns early. For entities corresponding to cfs_rq, the h_nr_* update loop in the caller will do the right thing. Fixes: 76f2f783294d ("sched/eevdf: More PELT vs DELAYED_DEQUEUE") Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Gautham R. Shenoy Tested-by: Swapnil Sapkal Link: https://lkml.kernel.org/r/20250117105852.23908-1-kprateek.nayak@amd.com --- kernel/sched/fair.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 26958431deb7a..f4e4d3ed943c7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5372,6 +5372,15 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void set_delayed(struct sched_entity *se) { se->sched_delayed = 1; + + /* + * Delayed se of cfs_rq have no tasks queued on them. + * Do not adjust h_nr_runnable since dequeue_entities() + * will account it for blocked tasks. + */ + if (!entity_is_task(se)) + return; + for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); @@ -5384,6 +5393,16 @@ static void set_delayed(struct sched_entity *se) static void clear_delayed(struct sched_entity *se) { se->sched_delayed = 0; + + /* + * Delayed se of cfs_rq have no tasks queued on them. + * Do not adjust h_nr_runnable since a dequeue has + * already accounted for it or an enqueue of a task + * below it will account for it in enqueue_task_fair(). + */ + if (!entity_is_task(se)) + return; + for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); -- GitLab From b893d7ff853e27aa6000fc4ca12e0ffda3318bfc Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 13 Jan 2025 12:07:57 -0600 Subject: [PATCH 0008/1585] scsi: core: Add passthrough tests for success and no failure definitions This patch adds scsi_check_passthrough() tests for the cases where a command completes successfully and when the command failed but the caller did not pass in a list of failures. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20250113180757.16691-1-michael.christie@oracle.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib_test.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/scsi_lib_test.c b/drivers/scsi/scsi_lib_test.c index 99834426a100a..ae8af0e0047a8 100644 --- a/drivers/scsi/scsi_lib_test.c +++ b/drivers/scsi/scsi_lib_test.c @@ -67,6 +67,13 @@ static void scsi_lib_test_multiple_sense(struct kunit *test) }; int i; + /* Success */ + sc.result = 0; + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, NULL)); + /* Command failed but caller did not pass in a failures array */ + scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, NULL)); /* Match end of array */ scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36); KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); -- GitLab From 1b3e2d4ec0c5848776cc56d2624998aa5b2f0d27 Mon Sep 17 00:00:00 2001 From: "Bao D. Nguyen" Date: Mon, 13 Jan 2025 10:32:07 -0800 Subject: [PATCH 0009/1585] scsi: ufs: core: Fix the HIGH/LOW_TEMP Bit Definitions According to the UFS Device Specification, the dExtendedUFSFeaturesSupport defines the support for TOO_HIGH_TEMPERATURE as bit[4] and the TOO_LOW_TEMPERATURE as bit[5]. Correct the code to match with the UFS device specification definition. Cc: stable@vger.kernel.org Fixes: e88e2d32200a ("scsi: ufs: core: Probe for temperature notification support") Signed-off-by: Bao D. Nguyen Link: https://lore.kernel.org/r/69992b3e3e3434a5c7643be5a64de48be892ca46.1736793068.git.quic_nguyenb@quicinc.com Reviewed-by: Avri Altman Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- include/ufs/ufs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 89672ad8c3bb0..f151feb0ca8c7 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -385,8 +385,8 @@ enum { /* Possible values for dExtendedUFSFeaturesSupport */ enum { - UFS_DEV_LOW_TEMP_NOTIF = BIT(4), - UFS_DEV_HIGH_TEMP_NOTIF = BIT(5), + UFS_DEV_HIGH_TEMP_NOTIF = BIT(4), + UFS_DEV_LOW_TEMP_NOTIF = BIT(5), UFS_DEV_EXT_TEMP_NOTIF = BIT(6), UFS_DEV_HPB_SUPPORT = BIT(7), UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), -- GitLab From c9d2782988df354b5a2db00be93920b4ecdde7a2 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 14 Jan 2025 10:50:41 +0800 Subject: [PATCH 0010/1585] scsi: target: core: Add line break to status show To ensure the output is not tangled with the shell prompt, add a line break to clearly display the status. Signed-off-by: Guixin Liu Link: https://lore.kernel.org/r/20250114025041.97301-1-kanie@linux.alibaba.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index c42cbde8a31b3..210648a0092e2 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -117,9 +117,9 @@ static ssize_t target_stat_tgt_status_show(struct config_item *item, char *page) { if (to_stat_tgt_dev(item)->export_count) - return snprintf(page, PAGE_SIZE, "activated"); + return snprintf(page, PAGE_SIZE, "activated\n"); else - return snprintf(page, PAGE_SIZE, "deactivated"); + return snprintf(page, PAGE_SIZE, "deactivated\n"); } static ssize_t target_stat_tgt_non_access_lus_show(struct config_item *item, -- GitLab From 8c09f612b2937da109ed0df583ace3a29fc95a93 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Tue, 14 Jan 2025 20:12:05 +0200 Subject: [PATCH 0011/1585] scsi: ufs: core: Simplify temperature exception event handling This commit simplifies the temperature exception event handling by removing the ufshcd_temp_exception_event_handler() function and directly calling ufs_hwmon_notify_event() in ufshcd_exception_event_handler(). The ufshcd_temp_exception_event_handler() function contained a placeholder comment for platform vendors to add additional steps if required. However, since its introduction a few years ago, no vendor has added any additional steps. Therefore, the placeholder function is removed to streamline the code. Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20250114181205.153760-1-avri.altman@wdc.com Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0920a443588c2..f6c38cf103820 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5976,24 +5976,6 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) __func__, err); } -static void ufshcd_temp_exception_event_handler(struct ufs_hba *hba, u16 status) -{ - u32 value; - - if (ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, - QUERY_ATTR_IDN_CASE_ROUGH_TEMP, 0, 0, &value)) - return; - - dev_info(hba->dev, "exception Tcase %d\n", value - 80); - - ufs_hwmon_notify_event(hba, status & MASK_EE_URGENT_TEMP); - - /* - * A placeholder for the platform vendors to add whatever additional - * steps required - */ -} - static int __ufshcd_wb_toggle(struct ufs_hba *hba, bool set, enum flag_idn idn) { u8 index; @@ -6214,7 +6196,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work) ufshcd_bkops_exception_event_handler(hba); if (status & hba->ee_drv_mask & MASK_EE_URGENT_TEMP) - ufshcd_temp_exception_event_handler(hba, status); + ufs_hwmon_notify_event(hba, status & MASK_EE_URGENT_TEMP); ufs_debugfs_exception_event(hba, status); } -- GitLab From c8084a89bd91b05f51a36bff61f63a94c800b0d6 Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Sun, 19 Jan 2025 16:43:12 +0800 Subject: [PATCH 0012/1585] Input: wdt87xx_i2c - fix compiler warning As reported by the kernel test robot, the following warning occur: >> drivers/input/touchscreen/wdt87xx_i2c.c:1166:36: warning: 'wdt87xx_acpi_id' defined but not used [-Wunused-const-variable=] 1166 | static const struct acpi_device_id wdt87xx_acpi_id[] = { | ^~~~~~~~~~~~~~~ The 'wdt87xx_acpi_id' array is only used when CONFIG_ACPI is enabled. Wrapping its definition and 'MODULE_DEVICE_TABLE' in '#ifdef CONFIG_ACPI' prevents a compiler warning when ACPI is disabled. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501181549.uzdlBwuN-lkp@intel.com/ Signed-off-by: Yu-Chun Lin Link: https://lore.kernel.org/r/20250119084312.1851486-1-eleanor15x@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wdt87xx_i2c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c index 27941245e962f..88d376090e6e6 100644 --- a/drivers/input/touchscreen/wdt87xx_i2c.c +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -1153,11 +1153,13 @@ static const struct i2c_device_id wdt87xx_dev_id[] = { }; MODULE_DEVICE_TABLE(i2c, wdt87xx_dev_id); +#ifdef CONFIG_ACPI static const struct acpi_device_id wdt87xx_acpi_id[] = { { "WDHT0001", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, wdt87xx_acpi_id); +#endif static struct i2c_driver wdt87xx_driver = { .probe = wdt87xx_ts_probe, -- GitLab From e7e34ffc976aaae4f465b7898303241b81ceefc3 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 20 Jan 2025 20:35:28 +0100 Subject: [PATCH 0013/1585] batman-adv: Ignore neighbor throughput metrics in error case If a temporary error happened in the evaluation of the neighbor throughput information, then the invalid throughput result should not be stored in the throughtput EWMA. Cc: stable@vger.kernel.org Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 50 ++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index fbf499bcc6718..65e52de52bcd2 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -59,11 +59,13 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) /** * batadv_v_elp_get_throughput() - get the throughput towards a neighbour * @neigh: the neighbour for which the throughput has to be obtained + * @pthroughput: calculated throughput towards the given neighbour in multiples + * of 100kpbs (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). * - * Return: The throughput towards the given neighbour in multiples of 100kpbs - * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). + * Return: true when value behind @pthroughput was set */ -static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) +static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, + u32 *pthroughput) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; struct net_device *soft_iface = hard_iface->soft_iface; @@ -77,14 +79,16 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) * batman-adv interface */ if (!soft_iface) - return BATADV_THROUGHPUT_DEFAULT_VALUE; + return false; /* if the user specified a customised value for this interface, then * return it directly */ throughput = atomic_read(&hard_iface->bat_v.throughput_override); - if (throughput != 0) - return throughput; + if (throughput != 0) { + *pthroughput = throughput; + return true; + } /* if this is a wireless device, then ask its throughput through * cfg80211 API @@ -111,19 +115,24 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) * possible to delete this neighbor. For now set * the throughput metric to 0. */ - return 0; + *pthroughput = 0; + return true; } if (ret) goto default_throughput; - if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) - return sinfo.expected_throughput / 100; + if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) { + *pthroughput = sinfo.expected_throughput / 100; + return true; + } /* try to estimate the expected throughput based on reported tx * rates */ - if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) - return cfg80211_calculate_bitrate(&sinfo.txrate) / 3; + if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) { + *pthroughput = cfg80211_calculate_bitrate(&sinfo.txrate) / 3; + return true; + } goto default_throughput; } @@ -142,8 +151,10 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; throughput = link_settings.base.speed; - if (throughput && throughput != SPEED_UNKNOWN) - return throughput * 10; + if (throughput && throughput != SPEED_UNKNOWN) { + *pthroughput = throughput * 10; + return true; + } } default_throughput: @@ -157,7 +168,8 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) } /* if none of the above cases apply, return the base_throughput */ - return BATADV_THROUGHPUT_DEFAULT_VALUE; + *pthroughput = BATADV_THROUGHPUT_DEFAULT_VALUE; + return true; } /** @@ -169,15 +181,21 @@ void batadv_v_elp_throughput_metric_update(struct work_struct *work) { struct batadv_hardif_neigh_node_bat_v *neigh_bat_v; struct batadv_hardif_neigh_node *neigh; + u32 throughput; + bool valid; neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v, metric_work); neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node, bat_v); - ewma_throughput_add(&neigh->bat_v.throughput, - batadv_v_elp_get_throughput(neigh)); + valid = batadv_v_elp_get_throughput(neigh, &throughput); + if (!valid) + goto put_neigh; + + ewma_throughput_add(&neigh->bat_v.throughput, throughput); +put_neigh: /* decrement refcounter to balance increment performed before scheduling * this task */ -- GitLab From 8c8ecc98f5c65947b0070a24bac11e12e47cc65d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 20 Jan 2025 00:06:11 +0100 Subject: [PATCH 0014/1585] batman-adv: Drop unmanaged ELP metric worker The ELP worker needs to calculate new metric values for all neighbors "reachable" over an interface. Some of the used metric sources require locks which might need to sleep. This sleep is incompatible with the RCU list iterator used for the recorded neighbors. The initial approach to work around of this problem was to queue another work item per neighbor and then run this in a new context. Even when this solved the RCU vs might_sleep() conflict, it has a major problems: Nothing was stopping the work item in case it is not needed anymore - for example because one of the related interfaces was removed or the batman-adv module was unloaded - resulting in potential invalid memory accesses. Directly canceling the metric worker also has various problems: * cancel_work_sync for a to-be-deactivated interface is called with rtnl_lock held. But the code in the ELP metric worker also tries to use rtnl_lock() - which will never return in this case. This also means that cancel_work_sync would never return because it is waiting for the worker to finish. * iterating over the neighbor list for the to-be-deactivated interface is currently done using the RCU specific methods. Which means that it is possible to miss items when iterating over it without the associated spinlock - a behaviour which is acceptable for a periodic metric check but not for a cleanup routine (which must "stop" all still running workers) The better approch is to get rid of the per interface neighbor metric worker and handle everything in the interface worker. The original problems are solved by: * creating a list of neighbors which require new metric information inside the RCU protected context, gathering the metric according to the new list outside the RCU protected context * only use rcu_trylock inside metric gathering code to avoid a deadlock when the cancel_delayed_work_sync is called in the interface removal code (which is called with the rtnl_lock held) Cc: stable@vger.kernel.org Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v.c | 2 -- net/batman-adv/bat_v_elp.c | 71 ++++++++++++++++++++++++++------------ net/batman-adv/bat_v_elp.h | 2 -- net/batman-adv/types.h | 3 -- 4 files changed, 48 insertions(+), 30 deletions(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index ac11f1f08db0f..d35479c465e2c 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -113,8 +113,6 @@ static void batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) { ewma_throughput_init(&hardif_neigh->bat_v.throughput); - INIT_WORK(&hardif_neigh->bat_v.metric_work, - batadv_v_elp_throughput_metric_update); } /** diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 65e52de52bcd2..b065578b4436e 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +43,18 @@ #include "routing.h" #include "send.h" +/** + * struct batadv_v_metric_queue_entry - list of hardif neighbors which require + * and metric update + */ +struct batadv_v_metric_queue_entry { + /** @hardif_neigh: hardif neighbor scheduled for metric update */ + struct batadv_hardif_neigh_node *hardif_neigh; + + /** @list: list node for metric_queue */ + struct list_head list; +}; + /** * batadv_v_elp_start_timer() - restart timer for ELP periodic work * @hard_iface: the interface for which the timer has to be reset @@ -137,10 +151,17 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, goto default_throughput; } + /* only use rtnl_trylock because the elp worker will be cancelled while + * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise + * wait forever when the elp work_item was started and it is then also + * trying to rtnl_lock + */ + if (!rtnl_trylock()) + return false; + /* if not a wifi interface, check if this device provides data via * ethtool (e.g. an Ethernet adapter) */ - rtnl_lock(); ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings); rtnl_unlock(); if (ret == 0) { @@ -175,31 +196,19 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, /** * batadv_v_elp_throughput_metric_update() - worker updating the throughput * metric of a single hop neighbour - * @work: the work queue item + * @neigh: the neighbour to probe */ -void batadv_v_elp_throughput_metric_update(struct work_struct *work) +static void +batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh) { - struct batadv_hardif_neigh_node_bat_v *neigh_bat_v; - struct batadv_hardif_neigh_node *neigh; u32 throughput; bool valid; - neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v, - metric_work); - neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node, - bat_v); - valid = batadv_v_elp_get_throughput(neigh, &throughput); if (!valid) - goto put_neigh; + return; ewma_throughput_add(&neigh->bat_v.throughput, throughput); - -put_neigh: - /* decrement refcounter to balance increment performed before scheduling - * this task - */ - batadv_hardif_neigh_put(neigh); } /** @@ -273,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) */ static void batadv_v_elp_periodic_work(struct work_struct *work) { + struct batadv_v_metric_queue_entry *metric_entry; + struct batadv_v_metric_queue_entry *metric_safe; struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_hard_iface *hard_iface; struct batadv_hard_iface_bat_v *bat_v; struct batadv_elp_packet *elp_packet; + struct list_head metric_queue; struct batadv_priv *bat_priv; struct sk_buff *skb; u32 elp_interval; - bool ret; bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work); hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v); @@ -316,6 +327,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) atomic_inc(&hard_iface->bat_v.elp_seqno); + INIT_LIST_HEAD(&metric_queue); + /* The throughput metric is updated on each sent packet. This way, if a * node is dead and no longer sends packets, batman-adv is still able to * react timely to its death. @@ -340,16 +353,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) /* Reading the estimated throughput from cfg80211 is a task that * may sleep and that is not allowed in an rcu protected - * context. Therefore schedule a task for that. + * context. Therefore add it to metric_queue and process it + * outside rcu protected context. */ - ret = queue_work(batadv_event_workqueue, - &hardif_neigh->bat_v.metric_work); - - if (!ret) + metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC); + if (!metric_entry) { batadv_hardif_neigh_put(hardif_neigh); + continue; + } + + metric_entry->hardif_neigh = hardif_neigh; + list_add(&metric_entry->list, &metric_queue); } rcu_read_unlock(); + list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) { + batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh); + + batadv_hardif_neigh_put(metric_entry->hardif_neigh); + list_del(&metric_entry->list); + kfree(metric_entry); + } + restart_timer: batadv_v_elp_start_timer(hard_iface); out: diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index 9e2740195fa2d..c9cb0a3071004 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -10,7 +10,6 @@ #include "main.h" #include -#include int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface); void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface); @@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface, void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface); int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming); -void batadv_v_elp_throughput_metric_update(struct work_struct *work); #endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 04f6398b3a40e..85a50096f5b24 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v { * neighbor */ unsigned long last_unicast_tx; - - /** @metric_work: work queue callback item for metric update */ - struct work_struct metric_work; }; /** -- GitLab From 3fafa6a02be219ddd05d6201911534a34135cb82 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 20 Jan 2025 15:35:01 +0100 Subject: [PATCH 0015/1585] dt-bindings: interrupt-controller: microchip,lan966x-oic: Clarify endpoint use Reword the description, to make it clear that the LAN966x Outbound Interrupt Controller is used only in PCI endpoint mode. Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Acked-by: Krzysztof Kozlowski Acked-by: Herve Codina Link: https://lore.kernel.org/all/247b1185c93610100f3f8c9e0ab2c1506e53e1f4.1737383314.git.geert+renesas@glider.be --- .../bindings/interrupt-controller/microchip,lan966x-oic.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml index b2adc71741770..dca16e202da99 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml @@ -14,9 +14,8 @@ allOf: description: | The Microchip LAN966x outband interrupt controller (OIC) maps the internal - interrupt sources of the LAN966x device to an external interrupt. - When the LAN966x device is used as a PCI device, the external interrupt is - routed to the PCI interrupt. + interrupt sources of the LAN966x device to a PCI interrupt when the LAN966x + device is used as a PCI device. properties: compatible: -- GitLab From e06c9e3682f58fbeb632b7b866bb4fe66a4a4b42 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 20 Jan 2025 15:35:02 +0100 Subject: [PATCH 0016/1585] irqchip/lan966x-oic: Make CONFIG_LAN966X_OIC depend on CONFIG_MCHP_LAN966X_PCI The Microchip LAN966x outband interrupt controller is only present on Microchip LAN966x SoCs, and only used in PCI endpoint mode. Hence add a dependency on MCHP_LAN966X_PCI, to prevent asking the user about this driver when configuring a kernel without Microchip LAN966x PCIe support. Fixes: 3e3a7b35332924c8 ("irqchip: Add support for LAN966x OIC") Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Acked-by: Herve Codina Link: https://lore.kernel.org/all/28e8a605e72ee45e27f0d06b2b71366159a9c782.1737383314.git.geert+renesas@glider.be --- drivers/irqchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index be063bfb50c4b..c11b9965c4ad9 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -169,6 +169,7 @@ config IXP4XX_IRQ config LAN966X_OIC tristate "Microchip LAN966x OIC Support" + depends on MCHP_LAN966X_PCI || COMPILE_TEST select GENERIC_IRQ_CHIP select IRQ_DOMAIN help -- GitLab From d3d380eded7ee5fc2fc53b3b0e72365ded025c4a Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 9 Jan 2025 14:30:47 +0100 Subject: [PATCH 0017/1585] nvme-fc: go straight to connecting state when initializing The initial controller initialization mimiks the reconnect loop behavior by switching from NEW to RESETTING and then to CONNECTING. The transition from NEW to CONNECTING is a valid transition, so there is no point entering the RESETTING state. TCP and RDMA also transition directly to CONNECTING state. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 094be164ffdc0..7409da42b9ee5 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3578,8 +3578,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || - !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { dev_err(ctrl->ctrl.device, "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); goto fail_ctrl; -- GitLab From 294b2b7516fd06a8dd82e4a6118f318ec521e706 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 9 Jan 2025 14:30:48 +0100 Subject: [PATCH 0018/1585] nvme: handle connectivity loss in nvme_set_queue_count When the set feature attempts fails with any NVME status code set in nvme_set_queue_count, the function still report success. Though the numbers of queues set to 0. This is done to support controllers in degraded state (the admin queue is still up and running but no IO queues). Though there is an exception. When nvme_set_features reports an host path error, nvme_set_queue_count should propagate this error as the connectivity is lost, which means also the admin queue is not working anymore. Fixes: 9a0be7abb62f ("nvme: refactor set_queue_count") Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0d21258e22833..2bcd9f710cb65 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1695,7 +1695,13 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0, &result); - if (status < 0) + + /* + * It's either a kernel error or the host observed a connection + * lost. In either case it's not possible communicate with the + * controller and thus enter the error code path. + */ + if (status < 0 || status == NVME_SC_HOST_PATH_ERROR) return status; /* -- GitLab From ee59e3820ca92a9f4307ae23dfc7229dc8b8d400 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 9 Jan 2025 14:30:49 +0100 Subject: [PATCH 0019/1585] nvme-fc: do not ignore connectivity loss during connecting When a connectivity loss occurs while nvme_fc_create_assocation is being executed, it's possible that the ctrl ends up stuck in the LIVE state: 1) nvme nvme10: NVME-FC{10}: create association : ... 2) nvme nvme10: NVME-FC{10}: controller connectivity lost. Awaiting Reconnect nvme nvme10: queue_size 128 > ctrl maxcmd 32, reducing to maxcmd 3) nvme nvme10: Could not set queue count (880) nvme nvme10: Failed to configure AEN (cfg 900) 4) nvme nvme10: NVME-FC{10}: controller connect complete 5) nvme nvme10: failed nvme_keep_alive_end_io error=4 A connection attempt starts 1) and the ctrl is in state CONNECTING. Shortly after the LLDD driver detects a connection lost event and calls nvme_fc_ctrl_connectivity_loss 2). Because we are still in CONNECTING state, this event is ignored. nvme_fc_create_association continues to run in parallel and tries to communicate with the controller and these commands will fail. Though these errors are filtered out, e.g in 3) setting the I/O queues numbers fails which leads to an early exit in nvme_fc_create_io_queues. Because the number of IO queues is 0 at this point, there is nothing left in nvme_fc_create_association which could detected the connection drop. Thus the ctrl enters LIVE state 4). Eventually the keep alive handler times out 5) but because nothing is being done, the ctrl stays in LIVE state. There is already the ASSOC_FAILED flag to track connectivity loss event but this bit is set too late in the recovery code path. Move this into the connectivity loss event handler and synchronize it with the state change. This ensures that the ASSOC_FAILED flag is seen by nvme_fc_create_io_queues and it does not enter the LIVE state after a connectivity loss event. If the connectivity loss event happens after we entered the LIVE state the normal error recovery path is executed. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7409da42b9ee5..55884d3df6f29 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -781,11 +781,19 @@ nvme_fc_abort_lsops(struct nvme_fc_rport *rport) static void nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) { + enum nvme_ctrl_state state; + unsigned long flags; + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller connectivity lost. Awaiting " "Reconnect", ctrl->cnum); - switch (nvme_ctrl_state(&ctrl->ctrl)) { + spin_lock_irqsave(&ctrl->lock, flags); + set_bit(ASSOC_FAILED, &ctrl->flags); + state = nvme_ctrl_state(&ctrl->ctrl); + spin_unlock_irqrestore(&ctrl->lock, flags); + + switch (state) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: /* @@ -2542,7 +2550,6 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) */ if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { __nvme_fc_abort_outstanding_ios(ctrl, true); - set_bit(ASSOC_FAILED, &ctrl->flags); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", ctrl->cnum); @@ -3167,12 +3174,18 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) else ret = nvme_fc_recreate_io_queues(ctrl); } - if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) - ret = -EIO; if (ret) goto out_term_aen_ops; - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + spin_lock_irqsave(&ctrl->lock, flags); + if (!test_bit(ASSOC_FAILED, &ctrl->flags)) + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + else + ret = -EIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (ret) + goto out_term_aen_ops; ctrl->ctrl.nr_reconnects = 0; -- GitLab From 27af31e44949fa85550176520ef7086a0d00fd7b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Jan 2025 18:07:45 +0200 Subject: [PATCH 0020/1585] hrtimers: Mark is_migration_base() with __always_inline When is_migration_base() is unused, it prevents kernel builds with clang, `make W=1` and CONFIG_WERROR=y: kernel/time/hrtimer.c:156:20: error: unused function 'is_migration_base' [-Werror,-Wunused-function] 156 | static inline bool is_migration_base(struct hrtimer_clock_base *base) | ^~~~~~~~~~~~~~~~~ Fix this by marking it with __always_inline. [ tglx: Use __always_inline instead of __maybe_unused and move it into the usage sites conditional ] Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250116160745.243358-1-andriy.shevchenko@linux.intel.com --- kernel/time/hrtimer.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f6d8df94045c9..4fb81f8c6f1c7 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -145,11 +145,6 @@ static struct hrtimer_cpu_base migration_cpu_base = { #define migration_base migration_cpu_base.clock_base[0] -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return base == &migration_base; -} - /* * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock * means that all timers which are tied to this base via timer->base are @@ -275,11 +270,6 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, #else /* CONFIG_SMP */ -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return false; -} - static inline struct hrtimer_clock_base * lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) __acquires(&timer->base->cpu_base->lock) @@ -1370,6 +1360,18 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, } } +#ifdef CONFIG_SMP +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return base == &migration_base; +} +#else +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return false; +} +#endif + /* * This function is called on PREEMPT_RT kernels when the fast path * deletion of a timer failed because the timer callback function was -- GitLab From 53dac345395c0d2493cbc2f4c85fe38aef5b63f5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 18 Jan 2025 00:24:33 +0100 Subject: [PATCH 0021/1585] hrtimers: Force migrate away hrtimers queued after CPUHP_AP_HRTIMERS_DYING hrtimers are migrated away from the dying CPU to any online target at the CPUHP_AP_HRTIMERS_DYING stage in order not to delay bandwidth timers handling tasks involved in the CPU hotplug forward progress. However wakeups can still be performed by the outgoing CPU after CPUHP_AP_HRTIMERS_DYING. Those can result again in bandwidth timers being armed. Depending on several considerations (crystal ball power management based election, earliest timer already enqueued, timer migration enabled or not), the target may eventually be the current CPU even if offline. If that happens, the timer is eventually ignored. The most notable example is RCU which had to deal with each and every of those wake-ups by deferring them to an online CPU, along with related workarounds: _ e787644caf76 (rcu: Defer RCU kthreads wakeup when CPU is dying) _ 9139f93209d1 (rcu/nocb: Fix RT throttling hrtimer armed from offline CPU) _ f7345ccc62a4 (rcu/nocb: Fix rcuog wake-up from offline softirq) The problem isn't confined to RCU though as the stop machine kthread (which runs CPUHP_AP_HRTIMERS_DYING) reports its completion at the end of its work through cpu_stop_signal_done() and performs a wake up that eventually arms the deadline server timer: WARNING: CPU: 94 PID: 588 at kernel/time/hrtimer.c:1086 hrtimer_start_range_ns+0x289/0x2d0 CPU: 94 UID: 0 PID: 588 Comm: migration/94 Not tainted Stopper: multi_cpu_stop+0x0/0x120 <- stop_machine_cpuslocked+0x66/0xc0 RIP: 0010:hrtimer_start_range_ns+0x289/0x2d0 Call Trace: start_dl_timer enqueue_dl_entity dl_server_start enqueue_task_fair enqueue_task ttwu_do_activate try_to_wake_up complete cpu_stopper_thread Instead of providing yet another bandaid to work around the situation, fix it in the hrtimers infrastructure instead: always migrate away a timer to an online target whenever it is enqueued from an offline CPU. This will also allow to revert all the above RCU disgraceful hacks. Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Reported-by: Vlad Poenaru Reported-by: Usama Arif Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Tested-by: Paul E. McKenney Link: https://lore.kernel.org/all/20250117232433.24027-1-frederic@kernel.org Closes: 20241213203739.1519801-1-usamaarif642@gmail.com --- include/linux/hrtimer_defs.h | 1 + kernel/time/hrtimer.c | 103 ++++++++++++++++++++++++++++------- 2 files changed, 83 insertions(+), 21 deletions(-) diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index c3b4b7ed7c163..84a5045f80f36 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -125,6 +125,7 @@ struct hrtimer_cpu_base { ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + call_single_data_t csd; } ____cacheline_aligned; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 4fb81f8c6f1c7..deb1aa32814e3 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -58,6 +58,8 @@ #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) +static void retrigger_next_event(void *arg); + /* * The timer bases: * @@ -111,7 +113,8 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, - } + }, + .csd = CSD_INIT(retrigger_next_event, NULL) }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { @@ -124,6 +127,14 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_TAI] = HRTIMER_BASE_TAI, }; +static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) +{ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return true; + else + return likely(base->online); +} + /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -178,27 +189,54 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * We do not migrate the timer when it is expiring before the next - * event on the target cpu. When high resolution is enabled, we cannot - * reprogram the target cpu hardware and we would cause it to fire - * late. To keep it simple, we handle the high resolution enabled and - * disabled case similar. + * Check if the elected target is suitable considering its next + * event and the hotplug state of the current CPU. + * + * If the elected target is remote and its next event is after the timer + * to queue, then a remote reprogram is necessary. However there is no + * guarantee the IPI handling the operation would arrive in time to meet + * the high resolution deadline. In this case the local CPU becomes a + * preferred target, unless it is offline. + * + * High and low resolution modes are handled the same way for simplicity. * * Called with cpu_base->lock of target cpu held. */ -static int -hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) +static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, + struct hrtimer_cpu_base *new_cpu_base, + struct hrtimer_cpu_base *this_cpu_base) { ktime_t expires; + /* + * The local CPU clockevent can be reprogrammed. Also get_target_base() + * guarantees it is online. + */ + if (new_cpu_base == this_cpu_base) + return true; + + /* + * The offline local CPU can't be the default target if the + * next remote target event is after this timer. Keep the + * elected new base. An IPI will we issued to reprogram + * it as a last resort. + */ + if (!hrtimer_base_is_online(this_cpu_base)) + return true; + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); - return expires < new_base->cpu_base->expires_next; + + return expires >= new_base->cpu_base->expires_next; } -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) +static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { + if (!hrtimer_base_is_online(base)) { + int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); + + return &per_cpu(hrtimer_bases, cpu); + } + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) if (static_branch_likely(&timers_migration_enabled) && !pinned) return &per_cpu(hrtimer_bases, get_nohz_timer_target()); @@ -249,8 +287,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, raw_spin_unlock(&base->cpu_base->lock); raw_spin_lock(&new_base->cpu_base->lock); - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, + this_cpu_base)) { raw_spin_unlock(&new_base->cpu_base->lock); raw_spin_lock(&base->cpu_base->lock); new_cpu_base = this_cpu_base; @@ -259,8 +297,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, } WRITE_ONCE(timer->base, new_base); } else { - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { new_cpu_base = this_cpu_base; goto again; } @@ -706,8 +743,6 @@ static inline int hrtimer_is_hres_enabled(void) return hrtimer_hres_enabled; } -static void retrigger_next_event(void *arg); - /* * Switch to high resolution mode */ @@ -1195,6 +1230,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) { + struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *new_base; bool force_local, first; @@ -1206,9 +1242,15 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * and enforce reprogramming after it is queued no matter whether * it is the new first expiring timer again or not. */ - force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); + force_local = base->cpu_base == this_cpu_base; force_local &= base->cpu_base->next_timer == timer; + /* + * Don't force local queuing if this enqueue happens on a unplugged + * CPU after hrtimer_cpu_dying() has been invoked. + */ + force_local &= this_cpu_base->online; + /* * Remove an active timer from the queue. In case it is not queued * on the current CPU, make sure that remove_hrtimer() updates the @@ -1238,8 +1280,27 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, } first = enqueue_hrtimer(timer, new_base, mode); - if (!force_local) - return first; + if (!force_local) { + /* + * If the current CPU base is online, then the timer is + * never queued on a remote CPU if it would be the first + * expiring timer there. + */ + if (hrtimer_base_is_online(this_cpu_base)) + return first; + + /* + * Timer was enqueued remote because the current base is + * already offline. If the timer is the first to expire, + * kick the remote CPU to reprogram the clock event. + */ + if (first) { + struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base; + + smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd); + } + return 0; + } /* * Timer was forced to stay on the current CPU to avoid -- GitLab From 93c66fbc280747ea700bd6199633d661e3c819b3 Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Fri, 10 Jan 2025 10:05:54 +0900 Subject: [PATCH 0022/1585] powercap: call put_device() on an error path in powercap_register_control_type() powercap_register_control_type() calls device_register(), but does not release the refcount of the device when it fails. Call put_device() before returning an error to balance the refcount. Since the kfree(control_type) will be done by powercap_release(), remove the lines in powercap_register_control_type() before returning the error. This bug was found by an experimental verifier that I am developing. Signed-off-by: Joe Hattori Link: https://patch.msgid.link/20250110010554.1583411-1-joe@pf.is.s.u-tokyo.ac.jp [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/powercap_sys.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 52c32dcbf7d84..4112a00973382 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -627,8 +627,7 @@ struct powercap_control_type *powercap_register_control_type( dev_set_name(&control_type->dev, "%s", name); result = device_register(&control_type->dev); if (result) { - if (control_type->allocated) - kfree(control_type); + put_device(&control_type->dev); return ERR_PTR(result); } idr_init(&control_type->idr); -- GitLab From a216542027b892e6651c1b4e076012140d04afaf Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 10 Jan 2025 15:22:24 +0000 Subject: [PATCH 0023/1585] btrfs: fix lockdep splat while merging a relocation root When COWing a relocation tree path, at relocation.c:replace_path(), we can trigger a lockdep splat while we are in the btrfs_search_slot() call against the relocation root. This happens in that callchain at ctree.c:read_block_for_search() when we happen to find a child extent buffer already loaded through the fs tree with a lockdep class set to the fs tree. So when we attempt to lock that extent buffer through a relocation tree we have to reset the lockdep class to the class for a relocation tree, since a relocation tree has extent buffers that used to belong to a fs tree and may currently be already loaded (we swap extent buffers between the two trees at the end of replace_path()). However we are missing calls to btrfs_maybe_reset_lockdep_class() to reset the lockdep class at ctree.c:read_block_for_search() before we read lock an extent buffer, just like we did for btrfs_search_slot() in commit b40130b23ca4 ("btrfs: fix lockdep splat with reloc root extent buffers"). So add the missing btrfs_maybe_reset_lockdep_class() calls before the attempts to read lock an extent buffer at ctree.c:read_block_for_search(). The lockdep splat was reported by syzbot and it looks like this: ====================================================== WARNING: possible circular locking dependency detected 6.13.0-rc5-syzkaller-00163-gab75170520d4 #0 Not tainted ------------------------------------------------------ syz.0.0/5335 is trying to acquire lock: ffff8880545dbc38 (btrfs-tree-01){++++}-{4:4}, at: btrfs_tree_read_lock_nested+0x2f/0x250 fs/btrfs/locking.c:146 but task is already holding lock: ffff8880545dba58 (btrfs-treloc-02/1){+.+.}-{4:4}, at: btrfs_tree_lock_nested+0x2f/0x250 fs/btrfs/locking.c:189 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (btrfs-treloc-02/1){+.+.}-{4:4}: reacquire_held_locks+0x3eb/0x690 kernel/locking/lockdep.c:5374 __lock_release kernel/locking/lockdep.c:5563 [inline] lock_release+0x396/0xa30 kernel/locking/lockdep.c:5870 up_write+0x79/0x590 kernel/locking/rwsem.c:1629 btrfs_force_cow_block+0x14b3/0x1fd0 fs/btrfs/ctree.c:660 btrfs_cow_block+0x371/0x830 fs/btrfs/ctree.c:755 btrfs_search_slot+0xc01/0x3180 fs/btrfs/ctree.c:2153 replace_path+0x1243/0x2740 fs/btrfs/relocation.c:1224 merge_reloc_root+0xc46/0x1ad0 fs/btrfs/relocation.c:1692 merge_reloc_roots+0x3b3/0x980 fs/btrfs/relocation.c:1942 relocate_block_group+0xb0a/0xd40 fs/btrfs/relocation.c:3754 btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4087 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3494 __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4278 btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4655 btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3670 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #1 (btrfs-tree-01/1){+.+.}-{4:4}: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849 down_write_nested+0xa2/0x220 kernel/locking/rwsem.c:1693 btrfs_tree_lock_nested+0x2f/0x250 fs/btrfs/locking.c:189 btrfs_init_new_buffer fs/btrfs/extent-tree.c:5052 [inline] btrfs_alloc_tree_block+0x41c/0x1440 fs/btrfs/extent-tree.c:5132 btrfs_force_cow_block+0x526/0x1fd0 fs/btrfs/ctree.c:573 btrfs_cow_block+0x371/0x830 fs/btrfs/ctree.c:755 btrfs_search_slot+0xc01/0x3180 fs/btrfs/ctree.c:2153 btrfs_insert_empty_items+0x9c/0x1a0 fs/btrfs/ctree.c:4351 btrfs_insert_empty_item fs/btrfs/ctree.h:688 [inline] btrfs_insert_inode_ref+0x2bb/0xf80 fs/btrfs/inode-item.c:330 btrfs_rename_exchange fs/btrfs/inode.c:7990 [inline] btrfs_rename2+0xcb7/0x2b90 fs/btrfs/inode.c:8374 vfs_rename+0xbdb/0xf00 fs/namei.c:5067 do_renameat2+0xd94/0x13f0 fs/namei.c:5224 __do_sys_renameat2 fs/namei.c:5258 [inline] __se_sys_renameat2 fs/namei.c:5255 [inline] __x64_sys_renameat2+0xce/0xe0 fs/namei.c:5255 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (btrfs-tree-01){++++}-{4:4}: check_prev_add kernel/locking/lockdep.c:3161 [inline] check_prevs_add kernel/locking/lockdep.c:3280 [inline] validate_chain+0x18ef/0x5920 kernel/locking/lockdep.c:3904 __lock_acquire+0x1397/0x2100 kernel/locking/lockdep.c:5226 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849 down_read_nested+0xb5/0xa50 kernel/locking/rwsem.c:1649 btrfs_tree_read_lock_nested+0x2f/0x250 fs/btrfs/locking.c:146 btrfs_tree_read_lock fs/btrfs/locking.h:188 [inline] read_block_for_search+0x718/0xbb0 fs/btrfs/ctree.c:1610 btrfs_search_slot+0x1274/0x3180 fs/btrfs/ctree.c:2237 replace_path+0x1243/0x2740 fs/btrfs/relocation.c:1224 merge_reloc_root+0xc46/0x1ad0 fs/btrfs/relocation.c:1692 merge_reloc_roots+0x3b3/0x980 fs/btrfs/relocation.c:1942 relocate_block_group+0xb0a/0xd40 fs/btrfs/relocation.c:3754 btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4087 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3494 __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4278 btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4655 btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3670 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Chain exists of: btrfs-tree-01 --> btrfs-tree-01/1 --> btrfs-treloc-02/1 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(btrfs-treloc-02/1); lock(btrfs-tree-01/1); lock(btrfs-treloc-02/1); rlock(btrfs-tree-01); *** DEADLOCK *** 8 locks held by syz.0.0/5335: #0: ffff88801e3ae420 (sb_writers#13){.+.+}-{0:0}, at: mnt_want_write_file+0x5e/0x200 fs/namespace.c:559 #1: ffff888052c760d0 (&fs_info->reclaim_bgs_lock){+.+.}-{4:4}, at: __btrfs_balance+0x4c2/0x26b0 fs/btrfs/volumes.c:4183 #2: ffff888052c74850 (&fs_info->cleaner_mutex){+.+.}-{4:4}, at: btrfs_relocate_block_group+0x775/0xd90 fs/btrfs/relocation.c:4086 #3: ffff88801e3ae610 (sb_internal#2){.+.+}-{0:0}, at: merge_reloc_root+0xf11/0x1ad0 fs/btrfs/relocation.c:1659 #4: ffff888052c76470 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0x405/0xda0 fs/btrfs/transaction.c:288 #5: ffff888052c76498 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0x405/0xda0 fs/btrfs/transaction.c:288 #6: ffff8880545db878 (btrfs-tree-01/1){+.+.}-{4:4}, at: btrfs_tree_lock_nested+0x2f/0x250 fs/btrfs/locking.c:189 #7: ffff8880545dba58 (btrfs-treloc-02/1){+.+.}-{4:4}, at: btrfs_tree_lock_nested+0x2f/0x250 fs/btrfs/locking.c:189 stack backtrace: CPU: 0 UID: 0 PID: 5335 Comm: syz.0.0 Not tainted 6.13.0-rc5-syzkaller-00163-gab75170520d4 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_circular_bug+0x13a/0x1b0 kernel/locking/lockdep.c:2074 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2206 check_prev_add kernel/locking/lockdep.c:3161 [inline] check_prevs_add kernel/locking/lockdep.c:3280 [inline] validate_chain+0x18ef/0x5920 kernel/locking/lockdep.c:3904 __lock_acquire+0x1397/0x2100 kernel/locking/lockdep.c:5226 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849 down_read_nested+0xb5/0xa50 kernel/locking/rwsem.c:1649 btrfs_tree_read_lock_nested+0x2f/0x250 fs/btrfs/locking.c:146 btrfs_tree_read_lock fs/btrfs/locking.h:188 [inline] read_block_for_search+0x718/0xbb0 fs/btrfs/ctree.c:1610 btrfs_search_slot+0x1274/0x3180 fs/btrfs/ctree.c:2237 replace_path+0x1243/0x2740 fs/btrfs/relocation.c:1224 merge_reloc_root+0xc46/0x1ad0 fs/btrfs/relocation.c:1692 merge_reloc_roots+0x3b3/0x980 fs/btrfs/relocation.c:1942 relocate_block_group+0xb0a/0xd40 fs/btrfs/relocation.c:3754 btrfs_relocate_block_group+0x77d/0xd90 fs/btrfs/relocation.c:4087 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3494 __btrfs_balance+0x1b0f/0x26b0 fs/btrfs/volumes.c:4278 btrfs_balance+0xbdc/0x10c0 fs/btrfs/volumes.c:4655 btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3670 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f1ac6985d29 Code: ff ff c3 (...) RSP: 002b:00007f1ac63fe038 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f1ac6b76160 RCX: 00007f1ac6985d29 RDX: 0000000020000180 RSI: 00000000c4009420 RDI: 0000000000000007 RBP: 00007f1ac6a01b08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000001 R14: 00007f1ac6b76160 R15: 00007fffda145a88 Reported-by: syzbot+63913e558c084f7f8fdc@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/677b3014.050a0220.3b53b0.0064.GAE@google.com/ Fixes: 99785998ed1c ("btrfs: reduce lock contention when eb cache miss for btree search") Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 92071ca0655f0..3dc5a35dd19b3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1496,6 +1496,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { btrfs_unlock_up_safe(p, parent_level + 1); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); @@ -1539,6 +1540,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { ASSERT(ret == -EAGAIN); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); -- GitLab From 0d85f5c2dd91df6b5da454406756f463ba923b69 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 13 Jan 2025 15:01:08 +0000 Subject: [PATCH 0024/1585] btrfs: fix assertion failure when splitting ordered extent after transaction abort If while we are doing a direct IO write a transaction abort happens, we mark all existing ordered extents with the BTRFS_ORDERED_IOERR flag (done at btrfs_destroy_ordered_extents()), and then after that if we enter btrfs_split_ordered_extent() and the ordered extent has bytes left (meaning we have a bio that doesn't cover the whole ordered extent, see details at btrfs_extract_ordered_extent()), we will fail on the following assertion at btrfs_split_ordered_extent(): ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); because the BTRFS_ORDERED_IOERR flag is set and the definition of BTRFS_ORDERED_TYPE_FLAGS is just the union of all flags that identify the type of write (regular, nocow, prealloc, compressed, direct IO, encoded). Fix this by returning an error from btrfs_extract_ordered_extent() if we find the BTRFS_ORDERED_IOERR flag in the ordered extent. The error will be the error that resulted in the transaction abort or -EIO if no transaction abort happened. This was recently reported by syzbot with the following trace: FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 1 CPU: 0 UID: 0 PID: 5321 Comm: syz.0.0 Not tainted 6.13.0-rc5-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 fail_dump lib/fault-inject.c:53 [inline] should_fail_ex+0x3b0/0x4e0 lib/fault-inject.c:154 should_failslab+0xac/0x100 mm/failslab.c:46 slab_pre_alloc_hook mm/slub.c:4072 [inline] slab_alloc_node mm/slub.c:4148 [inline] __do_kmalloc_node mm/slub.c:4297 [inline] __kmalloc_noprof+0xdd/0x4c0 mm/slub.c:4310 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1037 [inline] btrfs_chunk_alloc_add_chunk_item+0x244/0x1100 fs/btrfs/volumes.c:5742 reserve_chunk_space+0x1ca/0x2c0 fs/btrfs/block-group.c:4292 check_system_chunk fs/btrfs/block-group.c:4319 [inline] do_chunk_alloc fs/btrfs/block-group.c:3891 [inline] btrfs_chunk_alloc+0x77b/0xf80 fs/btrfs/block-group.c:4187 find_free_extent_update_loop fs/btrfs/extent-tree.c:4166 [inline] find_free_extent+0x42d1/0x5810 fs/btrfs/extent-tree.c:4579 btrfs_reserve_extent+0x422/0x810 fs/btrfs/extent-tree.c:4672 btrfs_new_extent_direct fs/btrfs/direct-io.c:186 [inline] btrfs_get_blocks_direct_write+0x706/0xfa0 fs/btrfs/direct-io.c:321 btrfs_dio_iomap_begin+0xbb7/0x1180 fs/btrfs/direct-io.c:525 iomap_iter+0x697/0xf60 fs/iomap/iter.c:90 __iomap_dio_rw+0xeb9/0x25b0 fs/iomap/direct-io.c:702 btrfs_dio_write fs/btrfs/direct-io.c:775 [inline] btrfs_direct_write+0x610/0xa30 fs/btrfs/direct-io.c:880 btrfs_do_write_iter+0x2a0/0x760 fs/btrfs/file.c:1397 do_iter_readv_writev+0x600/0x880 vfs_writev+0x376/0xba0 fs/read_write.c:1050 do_pwritev fs/read_write.c:1146 [inline] __do_sys_pwritev2 fs/read_write.c:1204 [inline] __se_sys_pwritev2+0x196/0x2b0 fs/read_write.c:1195 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f1281f85d29 RSP: 002b:00007f12819fe038 EFLAGS: 00000246 ORIG_RAX: 0000000000000148 RAX: ffffffffffffffda RBX: 00007f1282176080 RCX: 00007f1281f85d29 RDX: 0000000000000001 RSI: 0000000020000240 RDI: 0000000000000005 RBP: 00007f12819fe090 R08: 0000000000000000 R09: 0000000000000003 R10: 0000000000007000 R11: 0000000000000246 R12: 0000000000000002 R13: 0000000000000000 R14: 00007f1282176080 R15: 00007ffcb9e23328 BTRFS error (device loop0 state A): Transaction aborted (error -12) BTRFS: error (device loop0 state A) in btrfs_chunk_alloc_add_chunk_item:5745: errno=-12 Out of memory BTRFS info (device loop0 state EA): forced readonly assertion failed: !(flags & ~BTRFS_ORDERED_TYPE_FLAGS), in fs/btrfs/ordered-data.c:1234 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ordered-data.c:1234! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 0 UID: 0 PID: 5321 Comm: syz.0.0 Not tainted 6.13.0-rc5-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:btrfs_split_ordered_extent+0xd8d/0xe20 fs/btrfs/ordered-data.c:1234 RSP: 0018:ffffc9000d1df2b8 EFLAGS: 00010246 RAX: 0000000000000057 RBX: 000000000006a000 RCX: 9ce21886c4195300 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: 0000000000000091 R08: ffffffff817f0a3c R09: 1ffff92001a3bdf4 R10: dffffc0000000000 R11: fffff52001a3bdf5 R12: 1ffff1100a45f401 R13: ffff8880522fa018 R14: dffffc0000000000 R15: 000000000006a000 FS: 00007f12819fe6c0(0000) GS:ffff88801fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000557750bd7da8 CR3: 00000000400ea000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_extract_ordered_extent fs/btrfs/direct-io.c:702 [inline] btrfs_dio_submit_io+0x4be/0x6d0 fs/btrfs/direct-io.c:737 iomap_dio_submit_bio fs/iomap/direct-io.c:85 [inline] iomap_dio_bio_iter+0x1022/0x1740 fs/iomap/direct-io.c:447 __iomap_dio_rw+0x13b7/0x25b0 fs/iomap/direct-io.c:703 btrfs_dio_write fs/btrfs/direct-io.c:775 [inline] btrfs_direct_write+0x610/0xa30 fs/btrfs/direct-io.c:880 btrfs_do_write_iter+0x2a0/0x760 fs/btrfs/file.c:1397 do_iter_readv_writev+0x600/0x880 vfs_writev+0x376/0xba0 fs/read_write.c:1050 do_pwritev fs/read_write.c:1146 [inline] __do_sys_pwritev2 fs/read_write.c:1204 [inline] __se_sys_pwritev2+0x196/0x2b0 fs/read_write.c:1195 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f1281f85d29 RSP: 002b:00007f12819fe038 EFLAGS: 00000246 ORIG_RAX: 0000000000000148 RAX: ffffffffffffffda RBX: 00007f1282176080 RCX: 00007f1281f85d29 RDX: 0000000000000001 RSI: 0000000020000240 RDI: 0000000000000005 RBP: 00007f12819fe090 R08: 0000000000000000 R09: 0000000000000003 R10: 0000000000007000 R11: 0000000000000246 R12: 0000000000000002 R13: 0000000000000000 R14: 00007f1282176080 R15: 00007ffcb9e23328 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:btrfs_split_ordered_extent+0xd8d/0xe20 fs/btrfs/ordered-data.c:1234 RSP: 0018:ffffc9000d1df2b8 EFLAGS: 00010246 RAX: 0000000000000057 RBX: 000000000006a000 RCX: 9ce21886c4195300 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: 0000000000000091 R08: ffffffff817f0a3c R09: 1ffff92001a3bdf4 R10: dffffc0000000000 R11: fffff52001a3bdf5 R12: 1ffff1100a45f401 R13: ffff8880522fa018 R14: dffffc0000000000 R15: 000000000006a000 FS: 00007f12819fe6c0(0000) GS:ffff88801fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000557750bd7da8 CR3: 00000000400ea000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 In this case the transaction abort was due to (an injected) memory allocation failure when attempting to allocate a new chunk. Reported-by: syzbot+f60d8337a5c8e8d92a77@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/6777f2dd.050a0220.178762.0045.GAE@google.com/ Fixes: 52b1fdca23ac ("btrfs: handle completed ordered extents in btrfs_split_ordered_extent") Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 30eceaf829a7e..4aca7475fd82c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1229,6 +1229,18 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( */ if (WARN_ON_ONCE(len >= ordered->num_bytes)) return ERR_PTR(-EINVAL); + /* + * If our ordered extent had an error there's no point in continuing. + * The error may have come from a transaction abort done either by this + * task or some other concurrent task, and the transaction abort path + * iterates over all existing ordered extents and sets the flag + * BTRFS_ORDERED_IOERR on them. + */ + if (unlikely(flags & (1U << BTRFS_ORDERED_IOERR))) { + const int fs_error = BTRFS_FS_ERROR(fs_info); + + return fs_error ? ERR_PTR(fs_error) : ERR_PTR(-EIO); + } /* We cannot split partially completed ordered extents. */ if (ordered->bytes_left) { ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); -- GitLab From c9c863793395cf0a66c2778a29d72c48c02fbb66 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 20 Jan 2025 09:40:43 +1030 Subject: [PATCH 0025/1585] btrfs: do not output error message if a qgroup has been already cleaned up [BUG] There is a bug report that btrfs outputs the following error message: BTRFS info (device nvme0n1p2): qgroup scan completed (inconsistency flag cleared) BTRFS warning (device nvme0n1p2): failed to cleanup qgroup 0/1179: -2 [CAUSE] The error itself is pretty harmless, and the end user should ignore it. When a subvolume is fully dropped, btrfs will call btrfs_qgroup_cleanup_dropped_subvolume() to delete the qgroup. However if a qgroup rescan happened before a subvolume fully dropped, qgroup for that subvolume will not be re-created, as rescan will only create new qgroup if there is a BTRFS_ROOT_REF_KEY found. But before we drop a subvolume, the subvolume is unlinked thus there is no BTRFS_ROOT_REF_KEY. In that case, btrfs_remove_qgroup() will fail with -ENOENT and trigger the above error message. [FIX] Just ignore -ENOENT error from btrfs_remove_qgroup() inside btrfs_qgroup_cleanup_dropped_subvolume(). Reported-by: John Shand Link: https://bugzilla.suse.com/show_bug.cgi?id=1236056 Fixes: 839d6ea4f86d ("btrfs: automatically remove the subvolume qgroup") Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b90fabe302e61..aaf16019d829a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1897,8 +1897,11 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su /* * It's squota and the subvolume still has numbers needed for future * accounting, in this case we can not delete it. Just skip it. + * + * Or the qgroup is already removed by a qgroup rescan. For both cases we're + * safe to ignore them. */ - if (ret == -EBUSY) + if (ret == -EBUSY || ret == -ENOENT) ret = 0; return ret; } -- GitLab From e2f0943cf37305dbdeaf9846e3c941451bcdef63 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 20 Jan 2025 17:26:10 +0000 Subject: [PATCH 0026/1585] btrfs: fix use-after-free when attempting to join an aborted transaction When we are trying to join the current transaction and if it's aborted, we read its 'aborted' field after unlocking fs_info->trans_lock and without holding any extra reference count on it. This means that a concurrent task that is aborting the transaction may free the transaction before we read its 'aborted' field, leading to a use-after-free. Fix this by reading the 'aborted' field while holding fs_info->trans_lock since any freeing task must first acquire that lock and set fs_info->running_transaction to NULL before freeing the transaction. This was reported by syzbot and Dmitry with the following stack traces from KASAN: ================================================================== BUG: KASAN: slab-use-after-free in join_transaction+0xd9b/0xda0 fs/btrfs/transaction.c:278 Read of size 4 at addr ffff888011839024 by task kworker/u4:9/1128 CPU: 0 UID: 0 PID: 1128 Comm: kworker/u4:9 Not tainted 6.13.0-rc7-syzkaller-00019-gc45323b7560e #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Workqueue: events_unbound btrfs_async_reclaim_data_space Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x169/0x550 mm/kasan/report.c:489 kasan_report+0x143/0x180 mm/kasan/report.c:602 join_transaction+0xd9b/0xda0 fs/btrfs/transaction.c:278 start_transaction+0xaf8/0x1670 fs/btrfs/transaction.c:697 flush_space+0x448/0xcf0 fs/btrfs/space-info.c:803 btrfs_async_reclaim_data_space+0x159/0x510 fs/btrfs/space-info.c:1321 process_one_work kernel/workqueue.c:3236 [inline] process_scheduled_works+0xa66/0x1840 kernel/workqueue.c:3317 worker_thread+0x870/0xd30 kernel/workqueue.c:3398 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Allocated by task 5315: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x243/0x390 mm/slub.c:4329 kmalloc_noprof include/linux/slab.h:901 [inline] join_transaction+0x144/0xda0 fs/btrfs/transaction.c:308 start_transaction+0xaf8/0x1670 fs/btrfs/transaction.c:697 btrfs_create_common+0x1b2/0x2e0 fs/btrfs/inode.c:6572 lookup_open fs/namei.c:3649 [inline] open_last_lookups fs/namei.c:3748 [inline] path_openat+0x1c03/0x3590 fs/namei.c:3984 do_filp_open+0x27f/0x4e0 fs/namei.c:4014 do_sys_openat2+0x13e/0x1d0 fs/open.c:1402 do_sys_open fs/open.c:1417 [inline] __do_sys_creat fs/open.c:1495 [inline] __se_sys_creat fs/open.c:1489 [inline] __x64_sys_creat+0x123/0x170 fs/open.c:1489 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 5336: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:582 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x59/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2353 [inline] slab_free mm/slub.c:4613 [inline] kfree+0x196/0x430 mm/slub.c:4761 cleanup_transaction fs/btrfs/transaction.c:2063 [inline] btrfs_commit_transaction+0x2c97/0x3720 fs/btrfs/transaction.c:2598 insert_balance_item+0x1284/0x20b0 fs/btrfs/volumes.c:3757 btrfs_balance+0x992/0x10c0 fs/btrfs/volumes.c:4633 btrfs_ioctl_balance+0x493/0x7c0 fs/btrfs/ioctl.c:3670 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f The buggy address belongs to the object at ffff888011839000 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 36 bytes inside of freed 2048-byte region [ffff888011839000, ffff888011839800) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11838 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff) page_type: f5(slab) raw: 00fff00000000040 ffff88801ac42000 ffffea0000493400 dead000000000002 raw: 0000000000000000 0000000000080008 00000001f5000000 0000000000000000 head: 00fff00000000040 ffff88801ac42000 ffffea0000493400 dead000000000002 head: 0000000000000000 0000000000080008 00000001f5000000 0000000000000000 head: 00fff00000000003 ffffea0000460e01 ffffffffffffffff 0000000000000000 head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 57, tgid 57 (kworker/0:2), ts 67248182943, free_ts 67229742023 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1558 prep_new_page mm/page_alloc.c:1566 [inline] get_page_from_freelist+0x365c/0x37a0 mm/page_alloc.c:3476 __alloc_pages_noprof+0x292/0x710 mm/page_alloc.c:4753 alloc_pages_mpol_noprof+0x3e1/0x780 mm/mempolicy.c:2269 alloc_slab_page+0x6a/0x110 mm/slub.c:2423 allocate_slab+0x5a/0x2b0 mm/slub.c:2589 new_slab mm/slub.c:2642 [inline] ___slab_alloc+0xc27/0x14a0 mm/slub.c:3830 __slab_alloc+0x58/0xa0 mm/slub.c:3920 __slab_alloc_node mm/slub.c:3995 [inline] slab_alloc_node mm/slub.c:4156 [inline] __do_kmalloc_node mm/slub.c:4297 [inline] __kmalloc_node_track_caller_noprof+0x2e9/0x4c0 mm/slub.c:4317 kmalloc_reserve+0x111/0x2a0 net/core/skbuff.c:609 __alloc_skb+0x1f3/0x440 net/core/skbuff.c:678 alloc_skb include/linux/skbuff.h:1323 [inline] alloc_skb_with_frags+0xc3/0x820 net/core/skbuff.c:6612 sock_alloc_send_pskb+0x91a/0xa60 net/core/sock.c:2884 sock_alloc_send_skb include/net/sock.h:1803 [inline] mld_newpack+0x1c3/0xaf0 net/ipv6/mcast.c:1747 add_grhead net/ipv6/mcast.c:1850 [inline] add_grec+0x1492/0x19a0 net/ipv6/mcast.c:1988 mld_send_cr net/ipv6/mcast.c:2114 [inline] mld_ifc_work+0x691/0xd90 net/ipv6/mcast.c:2651 page last free pid 5300 tgid 5300 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1127 [inline] free_unref_page+0xd3f/0x1010 mm/page_alloc.c:2659 __slab_free+0x2c2/0x380 mm/slub.c:4524 qlink_free mm/kasan/quarantine.c:163 [inline] qlist_free_all+0x9a/0x140 mm/kasan/quarantine.c:179 kasan_quarantine_reduce+0x14f/0x170 mm/kasan/quarantine.c:286 __kasan_slab_alloc+0x23/0x80 mm/kasan/common.c:329 kasan_slab_alloc include/linux/kasan.h:250 [inline] slab_post_alloc_hook mm/slub.c:4119 [inline] slab_alloc_node mm/slub.c:4168 [inline] __do_kmalloc_node mm/slub.c:4297 [inline] __kmalloc_noprof+0x236/0x4c0 mm/slub.c:4310 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1037 [inline] fib_create_info+0xc14/0x25b0 net/ipv4/fib_semantics.c:1435 fib_table_insert+0x1f6/0x1f20 net/ipv4/fib_trie.c:1231 fib_magic+0x3d8/0x620 net/ipv4/fib_frontend.c:1112 fib_add_ifaddr+0x40c/0x5e0 net/ipv4/fib_frontend.c:1156 fib_netdev_event+0x375/0x490 net/ipv4/fib_frontend.c:1494 notifier_call_chain+0x1a5/0x3f0 kernel/notifier.c:85 __dev_notify_flags+0x207/0x400 dev_change_flags+0xf0/0x1a0 net/core/dev.c:9045 do_setlink+0xc90/0x4210 net/core/rtnetlink.c:3109 rtnl_changelink net/core/rtnetlink.c:3723 [inline] __rtnl_newlink net/core/rtnetlink.c:3875 [inline] rtnl_newlink+0x1bb6/0x2210 net/core/rtnetlink.c:4012 Memory state around the buggy address: ffff888011838f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888011838f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888011839000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888011839080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888011839100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: syzbot+45212e9d87a98c3f5b42@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/678e7da5.050a0220.303755.007c.GAE@google.com/ Reported-by: Dmitry Vyukov Link: https://lore.kernel.org/linux-btrfs/CACT4Y+ZFBdo7pT8L2AzM=vegZwjp-wNkVJZQf0Ta3vZqtExaSw@mail.gmail.com/ Fixes: 871383be592b ("btrfs: add missing unlocks to transaction abort paths") Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 15312013f2a34..aca83a98b75a2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -274,8 +274,10 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, cur_trans = fs_info->running_transaction; if (cur_trans) { if (TRANS_ABORTED(cur_trans)) { + const int abort_error = cur_trans->aborted; + spin_unlock(&fs_info->trans_lock); - return cur_trans->aborted; + return abort_error; } if (btrfs_blocked_trans_types[cur_trans->state] & type) { spin_unlock(&fs_info->trans_lock); -- GitLab From fdef89ce6fada462aef9cb90a140c93c8c209f0f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 21 Jan 2025 12:24:39 +0000 Subject: [PATCH 0027/1585] btrfs: avoid starting new transaction when cleaning qgroup during subvolume drop At btrfs_qgroup_cleanup_dropped_subvolume() all we want to commit the current transaction in order to have all the qgroup rfer/excl numbers up to date. However we are using btrfs_start_transaction(), which joins the current transaction if there is one that is not yet committing, but also starts a new one if there is none or if the current one is already committing (its state is >= TRANS_STATE_COMMIT_START). This later case results in unnecessary IO, wasting time and a pointless rotation of the backup roots in the super block. So instead of using btrfs_start_transaction() followed by a btrfs_commit_transaction(), use btrfs_commit_current_transaction() which achieves our purpose and avoids starting and committing new transactions. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index aaf16019d829a..f9d3766c809b4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1880,11 +1880,7 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su * Commit current transaction to make sure all the rfer/excl numbers * get updated. */ - trans = btrfs_start_transaction(fs_info->quota_root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - ret = btrfs_commit_transaction(trans); + ret = btrfs_commit_current_transaction(fs_info->quota_root); if (ret < 0) return ret; -- GitLab From 5e0e02f0d7e52cfc8b1adfc778dd02181d8b47b4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 15 Jan 2025 09:05:15 -0700 Subject: [PATCH 0028/1585] futex: Pass in task to futex_queue() futex_queue() -> __futex_queue() uses 'current' as the task to store in the struct futex_q->task field. This is fine for synchronous usage of the futex infrastructure, but it's not always correct when used by io_uring where the task doing the initial futex_queue() might not be available later on. This doesn't lead to any issues currently, as the io_uring side doesn't support PI futexes, but it does leave a potentially dangling pointer which is never a good idea. Have futex_queue() take a task_struct argument, and have the regular callers pass in 'current' for that. Meanwhile io_uring can just pass in NULL, as the task should never be used off that path. In theory req->tctx->task could be used here, but there's no point populating it with a task field that will never be used anyway. Reported-by: Jann Horn Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/22484a23-542c-4003-b721-400688a0d055@kernel.dk --- io_uring/futex.c | 2 +- kernel/futex/core.c | 5 +++-- kernel/futex/futex.h | 11 ++++++++--- kernel/futex/pi.c | 2 +- kernel/futex/waitwake.c | 4 ++-- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/io_uring/futex.c b/io_uring/futex.c index 30139cc150f22..e5cc208810ad5 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -338,7 +338,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) hlist_add_head(&req->hash_node, &ctx->futex_list); io_ring_submit_unlock(ctx, issue_flags); - futex_queue(&ifd->q, hb); + futex_queue(&ifd->q, hb, NULL); return IOU_ISSUE_SKIP_COMPLETE; } diff --git a/kernel/futex/core.c b/kernel/futex/core.c index ebdd76b4ecbba..3db8567f5a44e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -532,7 +532,8 @@ void futex_q_unlock(struct futex_hash_bucket *hb) futex_hb_waiters_dec(hb); } -void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) +void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, + struct task_struct *task) { int prio; @@ -548,7 +549,7 @@ void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) plist_node_init(&q->list, prio); plist_add(&q->list, &hb->chain); - q->task = current; + q->task = task; } /** diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 99b32e728c4ad..6b2f4c7eb720f 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -285,13 +285,15 @@ static inline int futex_get_value_locked(u32 *dest, u32 __user *from) } extern void __futex_unqueue(struct futex_q *q); -extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb); +extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, + struct task_struct *task); extern int futex_unqueue(struct futex_q *q); /** * futex_queue() - Enqueue the futex_q on the futex_hash_bucket * @q: The futex_q to enqueue * @hb: The destination hash bucket + * @task: Task queueing this futex * * The hb->lock must be held by the caller, and is released here. A call to * futex_queue() is typically paired with exactly one call to futex_unqueue(). The @@ -299,11 +301,14 @@ extern int futex_unqueue(struct futex_q *q); * or nothing if the unqueue is done as part of the wake process and the unqueue * state is implicit in the state of woken task (see futex_wait_requeue_pi() for * an example). + * + * Note that @task may be NULL, for async usage of futexes. */ -static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) +static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, + struct task_struct *task) __releases(&hb->lock) { - __futex_queue(q, hb); + __futex_queue(q, hb, task); spin_unlock(&hb->lock); } diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index daea650b16f51..7a941845f7eee 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -982,7 +982,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl /* * Only actually queue now that the atomic ops are done: */ - __futex_queue(&q, hb); + __futex_queue(&q, hb, current); if (trylock) { ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index 3a10375d95218..a9056acb75eef 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -350,7 +350,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q, * access to the hash list and forcing another memory barrier. */ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); - futex_queue(q, hb); + futex_queue(q, hb, current); /* Arm the timer */ if (timeout) @@ -461,7 +461,7 @@ int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken) * next futex. Queue each futex at this moment so hb can * be unlocked. */ - futex_queue(q, hb); + futex_queue(q, hb, current); continue; } -- GitLab From 915175b49f65d9edeb81659e82cbb27b621dbc17 Mon Sep 17 00:00:00 2001 From: Jinliang Zheng Date: Wed, 15 Jan 2025 20:35:25 +0800 Subject: [PATCH 0029/1585] xfs: fix the entry condition of exact EOF block allocation optimization When we call create(), lseek() and write() sequentially, offset != 0 cannot be used as a judgment condition for whether the file already has extents. Furthermore, when xfs_bmap_adjacent() has not given a better blkno, it is not necessary to use exact EOF block allocation. Suggested-by: Dave Chinner Signed-off-by: Jinliang Zheng Reviewed-by: Dave Chinner Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_bmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 40ad22fb808b9..0ef19f1469ec9 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3563,12 +3563,12 @@ xfs_bmap_btalloc_at_eof( int error; /* - * If there are already extents in the file, try an exact EOF block - * allocation to extend the file as a contiguous extent. If that fails, - * or it's the first allocation in a file, just try for a stripe aligned - * allocation. + * If there are already extents in the file, and xfs_bmap_adjacent() has + * given a better blkno, try an exact EOF block allocation to extend the + * file as a contiguous extent. If that fails, or it's the first + * allocation in a file, just try for a stripe aligned allocation. */ - if (ap->offset) { + if (ap->eof) { xfs_extlen_t nextminlen = 0; /* @@ -3736,7 +3736,8 @@ xfs_bmap_btalloc_best_length( int error; ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); - xfs_bmap_adjacent(ap); + if (!xfs_bmap_adjacent(ap)) + ap->eof = false; /* * Search for an allocation group with a single extent large enough for -- GitLab From 89841b23809f5fb12cbead142204064739fef25a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Jan 2025 07:03:35 +0100 Subject: [PATCH 0030/1585] xfs: remove an out of data comment in _xfs_buf_alloc There hasn't been anything like an io_length for a long time. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 7fbdd4b30676c..f1252ed8bd0a7 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -232,11 +232,6 @@ _xfs_buf_alloc( bp->b_mount = target->bt_mount; bp->b_flags = flags; - /* - * Set length and io_length to the same value initially. - * I/O routines should use io_length, which will be the same in - * most cases but may be reset (e.g. XFS recovery). - */ error = xfs_buf_get_maps(bp, nmaps); if (error) { kmem_cache_free(xfs_buf_cache, bp); -- GitLab From f5f0ed89f13e3e5246404a322ee85169a226bfb5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jan 2025 06:43:21 +0100 Subject: [PATCH 0031/1585] xfs: don't call remap_verify_area with sb write protection held The XFS_IOC_EXCHANGE_RANGE ioctl with the XFS_EXCHANGE_RANGE_TO_EOF flag operates on a range bounded by the end of the file. This means the actual amount of blocks exchanged is derived from the inode size, which is only stable with the IOLOCK (i_rwsem) held. Do that, it currently calls remap_verify_area from inside the sb write protection which nests outside the IOLOCK. But this makes fsnotify_file_area_perm which is called from remap_verify_area unhappy when the kernel is built with lockdep and the recently added CONFIG_FANOTIFY_ACCESS_PERMISSIONS option. Fix this by always calling remap_verify_area before taking the write protection, and passing a 0 size to remap_verify_area similar to the FICLONE/FICLONERANGE ioctls when they are asked to clone until the file end. (Note: the size argument gets passed to fsnotify_file_area_perm, but then isn't actually used there). Fixes: 9a64d9b3109d ("xfs: introduce new file range exchange ioctl") Cc: # v6.10 Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_exchrange.c | 71 ++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 44 deletions(-) diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index f340a2015c4c7..0b41bdfecdfbc 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -329,22 +329,6 @@ xfs_exchrange_mappings( * successfully but before locks are dropped. */ -/* Verify that we have security clearance to perform this operation. */ -static int -xfs_exchange_range_verify_area( - struct xfs_exchrange *fxr) -{ - int ret; - - ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, - true); - if (ret) - return ret; - - return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, - true); -} - /* * Performs necessary checks before doing a range exchange, having stabilized * mutable inode attributes via i_rwsem. @@ -355,11 +339,13 @@ xfs_exchange_range_checks( unsigned int alloc_unit) { struct inode *inode1 = file_inode(fxr->file1); + loff_t size1 = i_size_read(inode1); struct inode *inode2 = file_inode(fxr->file2); + loff_t size2 = i_size_read(inode2); uint64_t allocmask = alloc_unit - 1; int64_t test_len; uint64_t blen; - loff_t size1, size2, tmp; + loff_t tmp; int error; /* Don't touch certain kinds of inodes */ @@ -368,24 +354,25 @@ xfs_exchange_range_checks( if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) return -ETXTBSY; - size1 = i_size_read(inode1); - size2 = i_size_read(inode2); - /* Ranges cannot start after EOF. */ if (fxr->file1_offset > size1 || fxr->file2_offset > size2) return -EINVAL; - /* - * If the caller said to exchange to EOF, we set the length of the - * request large enough to cover everything to the end of both files. - */ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + /* + * If the caller said to exchange to EOF, we set the length of + * the request large enough to cover everything to the end of + * both files. + */ fxr->length = max_t(int64_t, size1 - fxr->file1_offset, size2 - fxr->file2_offset); - - error = xfs_exchange_range_verify_area(fxr); - if (error) - return error; + } else { + /* + * Otherwise we require both ranges to end within EOF. + */ + if (fxr->file1_offset + fxr->length > size1 || + fxr->file2_offset + fxr->length > size2) + return -EINVAL; } /* @@ -401,15 +388,6 @@ xfs_exchange_range_checks( check_add_overflow(fxr->file2_offset, fxr->length, &tmp)) return -EINVAL; - /* - * We require both ranges to end within EOF, unless we're exchanging - * to EOF. - */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && - (fxr->file1_offset + fxr->length > size1 || - fxr->file2_offset + fxr->length > size2)) - return -EINVAL; - /* * Make sure we don't hit any file size limits. If we hit any size * limits such that test_length was adjusted, we abort the whole @@ -747,6 +725,7 @@ xfs_exchange_range( { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); + loff_t check_len = fxr->length; int ret; BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & @@ -779,14 +758,18 @@ xfs_exchange_range( return -EBADF; /* - * If we're not exchanging to EOF, we can check the areas before - * stabilizing both files' i_size. + * If we're exchanging to EOF we can't calculate the length until taking + * the iolock. Pass a 0 length to remap_verify_area similar to the + * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well. */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { - ret = xfs_exchange_range_verify_area(fxr); - if (ret) - return ret; - } + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) + check_len = 0; + ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true); + if (ret) + return ret; + ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true); + if (ret) + return ret; /* Update cmtime if the fd/inode don't forbid it. */ if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) -- GitLab From 58f5c8d5ca07a2f9fa93fb073f5b1646ec482ff2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 24 Jan 2025 13:00:33 +0200 Subject: [PATCH 0032/1585] nvmet: fix a memory leak in controller identify Simply free an allocated buffer once we copied its content to the request sgl. kmemleak complaint: unreferenced object 0xffff8cd40c388000 (size 4096): comm "kworker/2:2H", pid 14739, jiffies 4401313113 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): [] kmemleak_alloc+0x4a/0x90 [] __kmalloc_cache_noprof+0x35a/0x420 [] nvmet_execute_identify+0x912/0x9f0 [nvmet] [] nvmet_tcp_try_recv_pdu+0x84c/0xc90 [nvmet_tcp] [] nvmet_tcp_io_work+0x82/0x8b0 [nvmet_tcp] [] process_one_work+0x178/0x3e0 [] worker_thread+0x2ec/0x420 [] kthread+0xf0/0x120 [] ret_from_fork+0x44/0x70 [] ret_from_fork_asm+0x1a/0x30 Fixes: 84909f7decbd ("nvmet: use kzalloc instead of ZERO_PAGE in nvme_execute_identify_ns_nvm()") Signed-off-by: Sagi Grimberg Reviewed-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/target/admin-cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 3ddd8e44e148c..ec7f70be6daa7 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -1067,6 +1067,7 @@ static void nvme_execute_identify_ns_nvm(struct nvmet_req *req) goto out; } status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); + kfree(id); out: nvmet_req_complete(req, status); } -- GitLab From be8ee18152b0523752f3a44900363838bd1573bb Mon Sep 17 00:00:00 2001 From: Atul Kumar Pant Date: Sat, 18 Jan 2025 14:09:27 +0530 Subject: [PATCH 0033/1585] sched_ext: Fixes typos in comments Fixes some spelling errors in the comments. Signed-off-by: Atul Kumar Pant Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8857c0709bdde..283d7f1addc50 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -416,7 +416,7 @@ struct sched_ext_ops { /** * @update_idle: Update the idle state of a CPU - * @cpu: CPU to udpate the idle state for + * @cpu: CPU to update the idle state for * @idle: whether entering or exiting the idle state * * This operation is called when @rq's CPU goes or leaves the idle @@ -1214,7 +1214,7 @@ static bool scx_kf_allowed_if_unlocked(void) /** * nldsq_next_task - Iterate to the next task in a non-local DSQ - * @dsq: user dsq being interated + * @dsq: user dsq being iterated * @cur: current position, %NULL to start iteration * @rev: walk backwards * @@ -2078,7 +2078,7 @@ static void set_task_runnable(struct rq *rq, struct task_struct *p) /* * list_add_tail() must be used. scx_ops_bypass() depends on tasks being - * appened to the runnable_list. + * appended to the runnable_list. */ list_add_tail(&p->scx.runnable_node, &rq->scx.runnable_list); } @@ -2480,7 +2480,7 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags, /* * A poorly behaving BPF scheduler can live-lock the system by e.g. incessantly * banging on the same DSQ on a large NUMA system to the point where switching - * to the bypass mode can take a long time. Inject artifical delays while the + * to the bypass mode can take a long time. Inject artificial delays while the * bypass mode is switching to guarantee timely completion. */ static void scx_ops_breather(struct rq *rq) @@ -3144,7 +3144,7 @@ static struct task_struct *pick_task_scx(struct rq *rq) * * Unless overridden by ops.core_sched_before(), @p->scx.core_sched_at is used * to implement the default task ordering. The older the timestamp, the higher - * prority the task - the global FIFO ordering matching the default scheduling + * priority the task - the global FIFO ordering matching the default scheduling * behavior. * * When ops.core_sched_before() is enabled, @p->scx.core_sched_at is used to @@ -4590,7 +4590,7 @@ static int scx_cgroup_init(void) cgroup_warned_missing_idle = false; /* - * scx_tg_on/offline() are excluded thorugh scx_cgroup_rwsem. If we walk + * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk * cgroups and init, all online cgroups are initialized. */ rcu_read_lock(); -- GitLab From 2279563e3a8cac367b267b09c15cf1e39c06c5cc Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 22 Jan 2025 10:05:25 +0100 Subject: [PATCH 0034/1585] sched_ext: Include task weight in the error state dump Report the task weight when dumping the task state during an error exit. Moreover, adjust the output format to display dsq_vtime, slice, and weight on the same line. This can help identify whether certain tasks were excessively prioritized or de-prioritized due to large niceness gaps. Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 283d7f1addc50..7081c7be5f622 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5277,9 +5277,10 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx, scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK, p->scx.dsq_flags, ops_state & SCX_OPSS_STATE_MASK, ops_state >> SCX_OPSS_QSEQ_SHIFT); - dump_line(s, " sticky/holding_cpu=%d/%d dsq_id=%s dsq_vtime=%llu slice=%llu", - p->scx.sticky_cpu, p->scx.holding_cpu, dsq_id_buf, - p->scx.dsq_vtime, p->scx.slice); + dump_line(s, " sticky/holding_cpu=%d/%d dsq_id=%s", + p->scx.sticky_cpu, p->scx.holding_cpu, dsq_id_buf); + dump_line(s, " dsq_vtime=%llu slice=%llu weight=%u", + p->scx.dsq_vtime, p->scx.slice, p->scx.weight); dump_line(s, " cpus=%*pb", cpumask_pr_args(p->cpus_ptr)); if (SCX_HAS_OP(dump_task)) { -- GitLab From 74ca334338a4489173d9e50775b13fa20cbd5958 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 23 Jan 2025 13:46:06 +0100 Subject: [PATCH 0035/1585] selftests/sched_ext: Fix enum resolution All scx enums are now automatically generated from vmlinux.h and they must be initialized using the SCX_ENUM_INIT() macro. Fix the scx selftests to use this macro to properly initialize these values. Fixes: 8da7bf2cee27 ("tools/sched_ext: Receive updates from SCX repo") Reported-by: Ihor Solodrai Closes: https://lore.kernel.org/all/Z2tNK2oFDX1OPp8C@slm.duckdns.org/ Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- .../testing/selftests/sched_ext/create_dsq.c | 10 ++++---- .../selftests/sched_ext/ddsp_bogus_dsq_fail.c | 7 ++++-- .../sched_ext/ddsp_vtimelocal_fail.c | 7 ++++-- .../selftests/sched_ext/dsp_local_on.c | 1 + .../sched_ext/enq_last_no_enq_fails.c | 10 ++++---- .../sched_ext/enq_select_cpu_fails.c | 10 ++++---- tools/testing/selftests/sched_ext/exit.c | 1 + tools/testing/selftests/sched_ext/hotplug.c | 6 +++-- .../selftests/sched_ext/init_enable_count.c | 25 ++++++------------- tools/testing/selftests/sched_ext/maximal.c | 7 ++++-- tools/testing/selftests/sched_ext/minimal.c | 10 ++++---- tools/testing/selftests/sched_ext/prog_run.c | 10 ++++---- .../testing/selftests/sched_ext/reload_loop.c | 9 +++---- .../selftests/sched_ext/select_cpu_dfl.c | 7 ++++-- .../sched_ext/select_cpu_dfl_nodispatch.c | 7 ++++-- .../selftests/sched_ext/select_cpu_dispatch.c | 7 ++++-- .../sched_ext/select_cpu_dispatch_bad_dsq.c | 7 ++++-- .../sched_ext/select_cpu_dispatch_dbl_dsp.c | 7 ++++-- .../selftests/sched_ext/select_cpu_vtime.c | 7 ++++-- 19 files changed, 88 insertions(+), 67 deletions(-) diff --git a/tools/testing/selftests/sched_ext/create_dsq.c b/tools/testing/selftests/sched_ext/create_dsq.c index fa946d9146d4d..d67431f57ac65 100644 --- a/tools/testing/selftests/sched_ext/create_dsq.c +++ b/tools/testing/selftests/sched_ext/create_dsq.c @@ -14,11 +14,11 @@ static enum scx_test_status setup(void **ctx) { struct create_dsq *skel; - skel = create_dsq__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = create_dsq__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(create_dsq__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c index e65d22f23f3bc..b6d13496b24e8 100644 --- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c +++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct ddsp_bogus_dsq_fail *skel; - skel = ddsp_bogus_dsq_fail__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = ddsp_bogus_dsq_fail__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(ddsp_bogus_dsq_fail__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c index abafee587cd60..af9ce4ee8baac 100644 --- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c +++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c @@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx) { struct ddsp_vtimelocal_fail *skel; - skel = ddsp_vtimelocal_fail__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = ddsp_vtimelocal_fail__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(ddsp_vtimelocal_fail__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c index 0ff27e57fe430..e1f2ce4abfe64 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.c @@ -15,6 +15,7 @@ static enum scx_test_status setup(void **ctx) skel = dsp_local_on__open(); SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); SCX_FAIL_IF(dsp_local_on__load(skel), "Failed to load skel"); diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c index 73e679953e27a..d3387ae036794 100644 --- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c +++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct enq_last_no_enq_fails *skel; - skel = enq_last_no_enq_fails__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = enq_last_no_enq_fails__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(enq_last_no_enq_fails__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c index dd1350e5f002d..a80e3a3b3698c 100644 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c +++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct enq_select_cpu_fails *skel; - skel = enq_select_cpu_fails__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = enq_select_cpu_fails__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(enq_select_cpu_fails__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c index 31bcd06e21cd3..9451782689de1 100644 --- a/tools/testing/selftests/sched_ext/exit.c +++ b/tools/testing/selftests/sched_ext/exit.c @@ -23,6 +23,7 @@ static enum scx_test_status run(void *ctx) char buf[16]; skel = exit__open(); + SCX_ENUM_INIT(skel); skel->rodata->exit_point = tc; exit__load(skel); link = bpf_map__attach_struct_ops(skel->maps.exit_ops); diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index 87bf220b1bcee..1c9ceb661c43e 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -49,8 +49,10 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined) SCX_ASSERT(is_cpu_online()); - skel = hotplug__open_and_load(); - SCX_ASSERT(skel); + skel = hotplug__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(hotplug__load(skel), "Failed to load skel"); /* Testing the offline -> online path, so go offline before starting */ if (onlining) diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c index 97d45f1e5597e..0f3eddc7a17a0 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.c @@ -15,22 +15,6 @@ #define SCHED_EXT 7 -static struct init_enable_count * -open_load_prog(bool global) -{ - struct init_enable_count *skel; - - skel = init_enable_count__open(); - SCX_BUG_ON(!skel, "Failed to open skel"); - - if (!global) - skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL; - - SCX_BUG_ON(init_enable_count__load(skel), "Failed to load skel"); - - return skel; -} - static enum scx_test_status run_test(bool global) { struct init_enable_count *skel; @@ -40,7 +24,14 @@ static enum scx_test_status run_test(bool global) struct sched_param param = {}; pid_t pids[num_pre_forks]; - skel = open_load_prog(global); + skel = init_enable_count__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + + if (!global) + skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL; + + SCX_FAIL_IF(init_enable_count__load(skel), "Failed to load skel"); /* * Fork a bunch of children before we attach the scheduler so that we diff --git a/tools/testing/selftests/sched_ext/maximal.c b/tools/testing/selftests/sched_ext/maximal.c index f38fc973c3800..c6be50a9941d5 100644 --- a/tools/testing/selftests/sched_ext/maximal.c +++ b/tools/testing/selftests/sched_ext/maximal.c @@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx) { struct maximal *skel; - skel = maximal__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = maximal__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(maximal__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/minimal.c b/tools/testing/selftests/sched_ext/minimal.c index 6c5db8ebbf8ac..89f7261757ffb 100644 --- a/tools/testing/selftests/sched_ext/minimal.c +++ b/tools/testing/selftests/sched_ext/minimal.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct minimal *skel; - skel = minimal__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = minimal__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(minimal__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/prog_run.c b/tools/testing/selftests/sched_ext/prog_run.c index 3cd57ef8daaa5..05974820ca69d 100644 --- a/tools/testing/selftests/sched_ext/prog_run.c +++ b/tools/testing/selftests/sched_ext/prog_run.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct prog_run *skel; - skel = prog_run__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = prog_run__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(prog_run__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/reload_loop.c b/tools/testing/selftests/sched_ext/reload_loop.c index 5cfba2d6e0568..308211d804364 100644 --- a/tools/testing/selftests/sched_ext/reload_loop.c +++ b/tools/testing/selftests/sched_ext/reload_loop.c @@ -18,11 +18,10 @@ bool force_exit = false; static enum scx_test_status setup(void **ctx) { - skel = maximal__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = maximal__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(maximal__load(skel), "Failed to load skel"); return SCX_TEST_PASS; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.c index a53a40c2d2f0f..5b6e045e1109b 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.c @@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dfl *skel; - skel = select_cpu_dfl__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dfl__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dfl__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c index 1d85bf4bf3a39..9b5d232efb7f6 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c @@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dfl_nodispatch *skel; - skel = select_cpu_dfl_nodispatch__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dfl_nodispatch__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dfl_nodispatch__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c index 0309ca8785b36..80283dbc41b7e 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c @@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dispatch *skel; - skel = select_cpu_dispatch__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dispatch__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dispatch__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c index 47eb6ed7627d9..5e72ebbc90a5a 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dispatch_bad_dsq *skel; - skel = select_cpu_dispatch_bad_dsq__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dispatch_bad_dsq__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dispatch_bad_dsq__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c index 48ff028a3c46d..aa85949478bcf 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dispatch_dbl_dsp *skel; - skel = select_cpu_dispatch_dbl_dsp__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dispatch_dbl_dsp__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dispatch_dbl_dsp__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.c index b4629c2364f5d..1e9b5c9bfff1d 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_vtime.c +++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_vtime *skel; - skel = select_cpu_vtime__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_vtime__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_vtime__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; -- GitLab From 64a1ba4072b34af1b76bf15fca5c2075b8cc4d64 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Thu, 23 Jan 2025 21:51:38 +0530 Subject: [PATCH 0036/1585] wifi: ath12k: fix handling of 6 GHz rules In the US country code, to avoid including 6 GHz rules in the 5 GHz rules list, the number of 5 GHz rules is set to a default constant value of 4 (REG_US_5G_NUM_REG_RULES). However, if there are more than 4 valid 5 GHz rules, the current logic will bypass the legitimate 6 GHz rules. For example, if there are 5 valid 5 GHz rules and 1 valid 6 GHz rule, the current logic will only consider 4 of the 5 GHz rules, treating the last valid rule as a 6 GHz rule. Consequently, the actual 6 GHz rule is never processed, leading to the eventual disabling of 6 GHz channels. To fix this issue, instead of hardcoding the value to 4, use a helper function to determine the number of 6 GHz rules present in the 5 GHz rules list and ignore only those rules. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Cc: stable@vger.kernel.org Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20250123-fix_6ghz_rules_handling-v1-1-d734bfa58ff4@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 61 ++++++++++++++++++++------- drivers/net/wireless/ath/ath12k/wmi.h | 1 - 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index dced2aa9ba1a3..d953742b67e14 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -4681,6 +4681,22 @@ static struct ath12k_reg_rule return reg_rule_ptr; } +static u8 ath12k_wmi_ignore_num_extra_rules(struct ath12k_wmi_reg_rule_ext_params *rule, + u32 num_reg_rules) +{ + u8 num_invalid_5ghz_rules = 0; + u32 count, start_freq; + + for (count = 0; count < num_reg_rules; count++) { + start_freq = le32_get_bits(rule[count].freq_info, REG_RULE_START_FREQ); + + if (start_freq >= ATH12K_MIN_6G_FREQ) + num_invalid_5ghz_rules++; + } + + return num_invalid_5ghz_rules; +} + static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, struct sk_buff *skb, struct ath12k_reg_info *reg_info) @@ -4691,6 +4707,7 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, u32 num_2g_reg_rules, num_5g_reg_rules; u32 num_6g_reg_rules_ap[WMI_REG_CURRENT_MAX_AP_TYPE]; u32 num_6g_reg_rules_cl[WMI_REG_CURRENT_MAX_AP_TYPE][WMI_REG_MAX_CLIENT_TYPE]; + u8 num_invalid_5ghz_ext_rules; u32 total_reg_rules = 0; int ret, i, j; @@ -4784,20 +4801,6 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, memcpy(reg_info->alpha2, &ev->alpha2, REG_ALPHA2_LEN); - /* FIXME: Currently FW includes 6G reg rule also in 5G rule - * list for country US. - * Having same 6G reg rule in 5G and 6G rules list causes - * intersect check to be true, and same rules will be shown - * multiple times in iw cmd. So added hack below to avoid - * parsing 6G rule from 5G reg rule list, and this can be - * removed later, after FW updates to remove 6G reg rule - * from 5G rules list. - */ - if (memcmp(reg_info->alpha2, "US", 2) == 0) { - reg_info->num_5g_reg_rules = REG_US_5G_NUM_REG_RULES; - num_5g_reg_rules = reg_info->num_5g_reg_rules; - } - reg_info->dfs_region = le32_to_cpu(ev->dfs_region); reg_info->phybitmap = le32_to_cpu(ev->phybitmap); reg_info->num_phy = le32_to_cpu(ev->num_phy); @@ -4900,8 +4903,29 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, } } + ext_wmi_reg_rule += num_2g_reg_rules; + + /* Firmware might include 6 GHz reg rule in 5 GHz rule list + * for few countries along with separate 6 GHz rule. + * Having same 6 GHz reg rule in 5 GHz and 6 GHz rules list + * causes intersect check to be true, and same rules will be + * shown multiple times in iw cmd. + * Hence, avoid parsing 6 GHz rule from 5 GHz reg rule list + */ + num_invalid_5ghz_ext_rules = ath12k_wmi_ignore_num_extra_rules(ext_wmi_reg_rule, + num_5g_reg_rules); + + if (num_invalid_5ghz_ext_rules) { + ath12k_dbg(ab, ATH12K_DBG_WMI, + "CC: %s 5 GHz reg rules number %d from fw, %d number of invalid 5 GHz rules", + reg_info->alpha2, reg_info->num_5g_reg_rules, + num_invalid_5ghz_ext_rules); + + num_5g_reg_rules = num_5g_reg_rules - num_invalid_5ghz_ext_rules; + reg_info->num_5g_reg_rules = num_5g_reg_rules; + } + if (num_5g_reg_rules) { - ext_wmi_reg_rule += num_2g_reg_rules; reg_info->reg_rules_5g_ptr = create_ext_reg_rules_from_wmi(num_5g_reg_rules, ext_wmi_reg_rule); @@ -4913,7 +4937,12 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, } } - ext_wmi_reg_rule += num_5g_reg_rules; + /* We have adjusted the number of 5 GHz reg rules above. But still those + * many rules needs to be adjusted in ext_wmi_reg_rule. + * + * NOTE: num_invalid_5ghz_ext_rules will be 0 for rest other cases. + */ + ext_wmi_reg_rule += (num_5g_reg_rules + num_invalid_5ghz_ext_rules); for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++) { reg_info->reg_rules_6g_ap_ptr[i] = diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 6f55dbdf629db..b16615b116ae7 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -3943,7 +3943,6 @@ struct ath12k_wmi_eht_rate_set_params { #define MAX_REG_RULES 10 #define REG_ALPHA2_LEN 2 #define MAX_6G_REG_RULES 5 -#define REG_US_5G_NUM_REG_RULES 4 enum wmi_start_event_param { WMI_VDEV_START_RESP_EVENT = 0, -- GitLab From e76946110137703c16423baf6ee177b751a34b7e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 23 Jan 2025 16:25:35 +0800 Subject: [PATCH 0037/1585] workqueue: Put the pwq after detaching the rescuer from the pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 68f83057b913("workqueue: Reap workers via kthread_stop() and remove detach_completion") adds code to reap the normal workers but mistakenly does not handle the rescuer and also removes the code waiting for the rescuer in put_unbound_pool(), which caused a use-after-free bug reported by Cheung Wall. To avoid the use-after-free bug, the pool’s reference must be held until the detachment is complete. Therefore, move the code that puts the pwq after detaching the rescuer from the pool. Reported-by: cheung wall Cc: cheung wall Link: https://lore.kernel.org/lkml/CAKHoSAvP3iQW+GwmKzWjEAOoPvzeWeoMO0Gz7Pp3_4kxt-RMoA@mail.gmail.com/ Fixes: 68f83057b913("workqueue: Reap workers via kthread_stop() and remove detach_completion") Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 33a23c7b22747..ccad33001c58c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3516,12 +3516,6 @@ static int rescuer_thread(void *__rescuer) } } - /* - * Put the reference grabbed by send_mayday(). @pool won't - * go away while we're still attached to it. - */ - put_pwq(pwq); - /* * Leave this pool. Notify regular workers; otherwise, we end up * with 0 concurrency and stalling the execution. @@ -3532,6 +3526,12 @@ static int rescuer_thread(void *__rescuer) worker_detach_from_pool(rescuer); + /* + * Put the reference grabbed by send_mayday(). @pool might + * go away any time after it. + */ + put_pwq_unlocked(pwq); + raw_spin_lock_irq(&wq_mayday_lock); } -- GitLab From e9fe182772dcb2630964724fd93e9c90b68ea0fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Jan 2025 10:48:25 -1000 Subject: [PATCH 0038/1585] sched_ext: selftests/dsp_local_on: Fix sporadic failures dsp_local_on has several incorrect assumptions, one of which is that p->nr_cpus_allowed always tracks p->cpus_ptr. This is not true when a task is scheduled out while migration is disabled - p->cpus_ptr is temporarily overridden to the previous CPU while p->nr_cpus_allowed remains unchanged. This led to sporadic test faliures when dsp_local_on_dispatch() tries to put a migration disabled task to a different CPU. Fix it by keeping the previous CPU when migration is disabled. There are SCX schedulers that make use of p->nr_cpus_allowed. They should also implement explicit handling for p->migration_disabled. Signed-off-by: Tejun Heo Reported-by: Ihor Solodrai Cc: Andrea Righi Cc: Changwoo Min --- tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index fbda6bf546712..758b479bd1ee1 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - if (p->nr_cpus_allowed == nr_cpus) + if (p->nr_cpus_allowed == nr_cpus && !p->migration_disabled) target = bpf_get_prandom_u32() % nr_cpus; else target = scx_bpf_task_cpu(p); -- GitLab From e0f63bc68f59d281e2d06e596f6c1bd9382a15cd Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Tue, 21 Jan 2025 18:09:25 -0300 Subject: [PATCH 0039/1585] drm/print: Include drm_device.h The header drm_print.h uses members of struct drm_device pointers, as such, it should include drm_device.h to let the compiler know the full type definition. Without such include, users of drm_print.h that don't explicitly need drm_device.h would bump into build errors and be forced to include the latter. Signed-off-by: Gustavo Sousa Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20250121210935.84357-1-gustavo.sousa@intel.com Signed-off-by: Lucas De Marchi --- include/drm/drm_print.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index b3906dc043886..8d3e17d7554a8 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -32,6 +32,7 @@ #include #include +#include struct debugfs_regset32; struct drm_device; -- GitLab From 41f198d58b6f2b36f9f8a4481d517369b324e773 Mon Sep 17 00:00:00 2001 From: Tanya Agarwal Date: Fri, 24 Jan 2025 01:18:00 +0530 Subject: [PATCH 0040/1585] tomoyo: fix spelling error Fix spelling error in security/tomoyo module comments that were identified using the codespell tool. No functional changes - documentation only. Signed-off-by: Tanya Agarwal Reviewed-by: Mimi Zohar Signed-off-by: Tetsuo Handa --- security/tomoyo/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 3a7b0874cf44d..5f9ccab26e9ab 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -920,7 +920,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, #ifdef CONFIG_MMU /* * This is called at execve() time in order to dig around - * in the argv/environment of the new proceess + * in the argv/environment of the new process * (represented by bprm). */ mmap_read_lock(bprm->mm); -- GitLab From 691a1f3f180133965d01e0ab0f332248d0345554 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 26 Jan 2025 19:13:58 +0900 Subject: [PATCH 0041/1585] tomoyo: fix spelling errors No functional changes. Signed-off-by: Tetsuo Handa --- security/tomoyo/securityfs_if.c | 6 +++--- security/tomoyo/tomoyo.c | 5 +---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c index a2705798476f9..7e69747b2f771 100644 --- a/security/tomoyo/securityfs_if.c +++ b/security/tomoyo/securityfs_if.c @@ -229,11 +229,11 @@ static void __init tomoyo_create_entry(const char *name, const umode_t mode, } /** - * tomoyo_initerface_init - Initialize /sys/kernel/security/tomoyo/ interface. + * tomoyo_interface_init - Initialize /sys/kernel/security/tomoyo/ interface. * * Returns 0. */ -static int __init tomoyo_initerface_init(void) +static int __init tomoyo_interface_init(void) { struct tomoyo_domain_info *domain; struct dentry *tomoyo_dir; @@ -270,4 +270,4 @@ static int __init tomoyo_initerface_init(void) return 0; } -fs_initcall(tomoyo_initerface_init); +fs_initcall(tomoyo_interface_init); diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 04a92c3d65d44..d6ebcd9db80a3 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -549,10 +549,7 @@ static const struct lsm_id tomoyo_lsmid = { .id = LSM_ID_TOMOYO, }; -/* - * tomoyo_security_ops is a "struct security_operations" which is used for - * registering TOMOYO. - */ +/* tomoyo_hooks is used for registering TOMOYO. */ static struct security_hook_list tomoyo_hooks[] __ro_after_init = { LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare), LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds), -- GitLab From 1f566840a82982141f94086061927a90e79440e5 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 24 Jan 2025 20:54:41 -0500 Subject: [PATCH 0042/1585] clocksource: Use pr_info() for "Checking clocksource synchronization" message The "Checking clocksource synchronization" message is normally printed when clocksource_verify_percpu() is called for a given clocksource if both the CLOCK_SOURCE_UNSTABLE and CLOCK_SOURCE_VERIFY_PERCPU flags are set. It is an informational message and so pr_info() is the correct choice. Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Acked-by: John Stultz Link: https://lore.kernel.org/all/20250125015442.3740588-1-longman@redhat.com --- kernel/time/clocksource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7304d7cf47f2d..77d9566d3aa68 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -382,7 +382,8 @@ void clocksource_verify_percpu(struct clocksource *cs) return; } testcpu = smp_processor_id(); - pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", + cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); for_each_cpu(cpu, &cpus_chosen) { if (cpu == testcpu) continue; -- GitLab From 825c78e6a60c309a59d18d5ac5968aa79cef0bd6 Mon Sep 17 00:00:00 2001 From: Xu Lu Date: Mon, 27 Jan 2025 17:38:46 +0800 Subject: [PATCH 0043/1585] irqchip/riscv: Ensure ordering of memory writes and IPI writes RISC-V distinguishes between memory accesses and device I/O and uses FENCE instruction to order them as viewed by other RISC-V harts and external devices or coprocessors. The FENCE instruction can order any combination of device input(I), device output(O), memory reads(R) and memory writes(W). For example, 'fence w, o' is used to ensure all memory writes from instructions preceding the FENCE instruction appear earlier in the global memory order than device output writes from instructions after the FENCE instruction. RISC-V issues IPIs by writing to the IMSIC/ACLINT MMIO registers, which is regarded as device output operation. However, the existing implementation of the IMSIC/ACLINT drivers issue the IPI via writel_relaxed(), which does not guarantee the order of device output operation and preceding memory writes. As a consequence the hart receiving the IPI might not observe the IPI related data. Fix this by replacing writel_relaxed() with writel() when issuing IPIs, which uses 'fence w, o' to ensure all previous writes made by the current hart are visible to other harts before they receive the IPI. Signed-off-by: Xu Lu Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250127093846.98625-1-luxu.kernel@bytedance.com --- drivers/irqchip/irq-riscv-imsic-early.c | 2 +- drivers/irqchip/irq-thead-c900-aclint-sswi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c index c5c2e6929a2f5..275df50057057 100644 --- a/drivers/irqchip/irq-riscv-imsic-early.c +++ b/drivers/irqchip/irq-riscv-imsic-early.c @@ -27,7 +27,7 @@ static void imsic_ipi_send(unsigned int cpu) { struct imsic_local_config *local = per_cpu_ptr(imsic->global.local, cpu); - writel_relaxed(IMSIC_IPI_ID, local->msi_va); + writel(IMSIC_IPI_ID, local->msi_va); } static void imsic_ipi_starting_cpu(void) diff --git a/drivers/irqchip/irq-thead-c900-aclint-sswi.c b/drivers/irqchip/irq-thead-c900-aclint-sswi.c index b0e366ade4271..8ff6e7a1363bd 100644 --- a/drivers/irqchip/irq-thead-c900-aclint-sswi.c +++ b/drivers/irqchip/irq-thead-c900-aclint-sswi.c @@ -31,7 +31,7 @@ static DEFINE_PER_CPU(void __iomem *, sswi_cpu_regs); static void thead_aclint_sswi_ipi_send(unsigned int cpu) { - writel_relaxed(0x1, per_cpu(sswi_cpu_regs, cpu)); + writel(0x1, per_cpu(sswi_cpu_regs, cpu)); } static void thead_aclint_sswi_ipi_clear(void) -- GitLab From 987f379b54091cc1b1db986bde71cee1081350b3 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Fri, 24 Jan 2025 09:50:39 +0100 Subject: [PATCH 0044/1585] irqchip/irq-mvebu-icu: Fix access to msi_data from irq_domain::host_data mvebu_icu_translate() incorrectly casts irq_domain::host_data directly to mvebu_icu_msi_data. However, host_data actually points to a structure of type msi_domain_info. This incorrect cast causes issues such as the thermal sensors of the CP110 platform malfunctioning. Specifically, the translation of the SEI interrupt to IRQ_TYPE_EDGE_RISING fails, preventing proper interrupt handling. The following error was observed: genirq: Setting trigger mode 4 for irq 85 failed (irq_chip_set_type_parent+0x0/0x34) armada_thermal f2400000.system-controller:thermal-sensor@70: Cannot request threaded IRQ 85 Resolve the issue by first casting host_data to msi_domain_info and then accessing mvebu_icu_msi_data through msi_domain_info::chip_data. Fixes: d929e4db22b6 ("irqchip/irq-mvebu-icu: Prepare for real per device MSI") Signed-off-by: Stefan Eichenberger Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250124085140.44792-1-eichest@gmail.com --- drivers/irqchip/irq-mvebu-icu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index b337f6c05f184..4eebed39880a5 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -68,7 +68,8 @@ static int mvebu_icu_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { unsigned int param_count = static_branch_unlikely(&legacy_bindings) ? 3 : 2; - struct mvebu_icu_msi_data *msi_data = d->host_data; + struct msi_domain_info *info = d->host_data; + struct mvebu_icu_msi_data *msi_data = info->chip_data; struct mvebu_icu *icu = msi_data->icu; /* Check the count of the parameters in dt */ -- GitLab From fb95897b8c60653805aa09daec575ca30983f768 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Fri, 24 Jan 2025 11:22:28 +0800 Subject: [PATCH 0045/1585] xfs: Propagate errors from xfs_reflink_cancel_cow_range in xfs_dax_write_iomap_end In xfs_dax_write_iomap_end(), directly return the result of xfs_reflink_cancel_cow_range() when !written, ensuring proper error propagation and improving code robustness. Fixes: ea6c49b784f0 ("xfs: support CoW in fsdax mode") Cc: stable@vger.kernel.org # v6.0 Reviewed-by: Darrick J. Wong Signed-off-by: Wentao Liang Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_iomap.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 50fa3ef89f6c9..d61460309a783 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -976,10 +976,8 @@ xfs_dax_write_iomap_end( if (!xfs_is_cow_inode(ip)) return 0; - if (!written) { - xfs_reflink_cancel_cow_range(ip, pos, length, true); - return 0; - } + if (!written) + return xfs_reflink_cancel_cow_range(ip, pos, length, true); return xfs_reflink_end_cow(ip, pos, written); } -- GitLab From 28aecef5b1015bf6023ddc12b1a67f6678271fcb Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 19 Jan 2025 22:02:38 +0530 Subject: [PATCH 0046/1585] selftests: livepatch: handle PRINTK_CALLER in check_result() Some arch configs (like ppc64) enable CONFIG_PRINTK_CALLER, which adds the caller id as part of the dmesg. With recent util-linux's update 467a5b3192f16 ('dmesg: add caller_id support') the standard "dmesg" has been enhanced to print PRINTK_CALLER fields. Due to this, even though the expected vs observed are same, end testcase results are failed. -% insmod test_modules/test_klp_livepatch.ko -livepatch: enabling patch 'test_klp_livepatch' -livepatch: 'test_klp_livepatch': initializing patching transition -livepatch: 'test_klp_livepatch': starting patching transition -livepatch: 'test_klp_livepatch': completing patching transition -livepatch: 'test_klp_livepatch': patching complete -% echo 0 > /sys/kernel/livepatch/test_klp_livepatch/enabled -livepatch: 'test_klp_livepatch': initializing unpatching transition -livepatch: 'test_klp_livepatch': starting unpatching transition -livepatch: 'test_klp_livepatch': completing unpatching transition -livepatch: 'test_klp_livepatch': unpatching complete -% rmmod test_klp_livepatch +[ T3659] % insmod test_modules/test_klp_livepatch.ko +[ T3682] livepatch: enabling patch 'test_klp_livepatch' +[ T3682] livepatch: 'test_klp_livepatch': initializing patching transition +[ T3682] livepatch: 'test_klp_livepatch': starting patching transition +[ T826] livepatch: 'test_klp_livepatch': completing patching transition +[ T826] livepatch: 'test_klp_livepatch': patching complete +[ T3659] % echo 0 > /sys/kernel/livepatch/test_klp_livepatch/enabled +[ T3659] livepatch: 'test_klp_livepatch': initializing unpatching transition +[ T3659] livepatch: 'test_klp_livepatch': starting unpatching transition +[ T789] livepatch: 'test_klp_livepatch': completing unpatching transition +[ T789] livepatch: 'test_klp_livepatch': unpatching complete +[ T3659] % rmmod test_klp_livepatch ERROR: livepatch kselftest(s) failed not ok 1 selftests: livepatch: test-livepatch.sh # exit=1 Currently the check_result() handles the "[time]" removal from the dmesg. Enhance the check to also handle removal of "[Thread Id]" or "[CPU Id]". Signed-off-by: Madhavan Srinivasan Acked-by: Miroslav Benes Reviewed-by: Petr Mladek Tested-by: Petr Mladek Link: https://lore.kernel.org/r/20250119163238.749847-1-maddy@linux.ibm.com Signed-off-by: Petr Mladek --- tools/testing/selftests/livepatch/functions.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index e5d06fb402335..15601402dee65 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -306,7 +306,8 @@ function check_result { result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ - sed 's/^\[[ 0-9.]*\] //') + sed 's/^\[[ 0-9.]*\] //' | \ + sed 's/^\[[ ]*[CT][0-9]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" -- GitLab From 26b63bee2f6e711c5a169997fd126fddcfb90848 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Fri, 24 Jan 2025 11:45:09 +0800 Subject: [PATCH 0047/1585] xfs: Add error handling for xfs_reflink_cancel_cow_range In xfs_inactive(), xfs_reflink_cancel_cow_range() is called without error handling, risking unnoticed failures and inconsistent behavior compared to other parts of the code. Fix this issue by adding an error handling for the xfs_reflink_cancel_cow_range(), improving code robustness. Fixes: 6231848c3aa5 ("xfs: check for cow blocks before trying to clear them") Cc: stable@vger.kernel.org # v4.17 Reviewed-by: Darrick J. Wong Signed-off-by: Wentao Liang Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c95fe1b1de4e6..b1f9f156ec888 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1404,8 +1404,11 @@ xfs_inactive( goto out; /* Try to clean out the cow blocks if there are any. */ - if (xfs_inode_has_cow_data(ip)) - xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (xfs_inode_has_cow_data(ip)) { + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (error) + goto out; + } if (VFS_I(ip)->i_nlink != 0) { /* -- GitLab From 698244bbb3bfd32ddf9a0b70a12b1c7d69056497 Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Sun, 19 Jan 2025 00:31:42 +0800 Subject: [PATCH 0048/1585] irqchip/apple-aic: Only handle PMC interrupt as FIQ when configured so The CPU PMU in Apple SoCs can be configured to fire its interrupt in one of several ways, and since Apple A11 one of the methods is FIQ, but the check of the configuration register fails to test explicitely for FIQ mode. It tests whether the IMODE bitfield is zero or not and the PMCRO_IACT bit is set. That results in false positives when the IMODE bitfield is not zero, but does not have the mode PMCR0_IMODE_FIQ. Only handle the PMC interrupt as a FIQ when the CPU PMU has been configured to fire FIQs, i.e. the IMODE bitfield value is PMCR0_IMODE_FIQ and PMCR0_IACT is set. Fixes: c7708816c944 ("irqchip/apple-aic: Wire PMU interrupts") Signed-off-by: Nick Chan Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250118163554.16733-1-towinchenmi@gmail.com --- drivers/irqchip/irq-apple-aic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index da5250f0155cf..2b1684c60e3ca 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -577,7 +577,8 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT)); } - if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) { + if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == + (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { int irq; if (cpumask_test_cpu(smp_processor_id(), &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) -- GitLab From d6f3e7d564b2309e1f17e709a70eca78d7ca2bb8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Jan 2025 12:22:12 -1000 Subject: [PATCH 0049/1585] sched_ext: Fix incorrect autogroup migration detection scx_move_task() is called from sched_move_task() and tells the BPF scheduler that cgroup migration is being committed. sched_move_task() is used by both cgroup and autogroup migrations and scx_move_task() tried to filter out autogroup migrations by testing the destination cgroup and PF_EXITING but this is not enough. In fact, without explicitly tagging the thread which is doing the cgroup migration, there is no good way to tell apart scx_move_task() invocations for racing migration to the root cgroup and an autogroup migration. This led to scx_move_task() incorrectly ignoring a migration from non-root cgroup to an autogroup of the root cgroup triggering the following warning: WARNING: CPU: 7 PID: 1 at kernel/sched/ext.c:3725 scx_cgroup_can_attach+0x196/0x340 ... Call Trace: cgroup_migrate_execute+0x5b1/0x700 cgroup_attach_task+0x296/0x400 __cgroup_procs_write+0x128/0x140 cgroup_procs_write+0x17/0x30 kernfs_fop_write_iter+0x141/0x1f0 vfs_write+0x31d/0x4a0 __x64_sys_write+0x72/0xf0 do_syscall_64+0x82/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix it by adding an argument to sched_move_task() that indicates whether the moving is for a cgroup or autogroup migration. After the change, scx_move_task() is called only for cgroup migrations and renamed to scx_cgroup_move_task(). Link: https://github.com/sched-ext/scx/issues/370 Fixes: 819513666966 ("sched_ext: Add cgroup support") Cc: stable@vger.kernel.org # v6.12+ Acked-by: Peter Zijlstra (Intel) Signed-off-by: Tejun Heo --- kernel/sched/autogroup.c | 4 ++-- kernel/sched/core.c | 7 ++++--- kernel/sched/ext.c | 15 +-------------- kernel/sched/ext.h | 4 ++-- kernel/sched/sched.h | 2 +- 5 files changed, 10 insertions(+), 22 deletions(-) diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index db68a964e34e2..c4a3ccf6a8ace 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -150,7 +150,7 @@ void sched_autogroup_exit_task(struct task_struct *p) * see this thread after that: we can no longer use signal->autogroup. * See the PF_EXITING check in task_wants_autogroup(). */ - sched_move_task(p); + sched_move_task(p, true); } static void @@ -182,7 +182,7 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) * sched_autogroup_exit_task(). */ for_each_thread(p, t) - sched_move_task(t); + sched_move_task(t, true); unlock_task_sighand(p, &flags); autogroup_kref_put(prev); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 901170708e2a2..e77897a62442e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9042,7 +9042,7 @@ static void sched_change_group(struct task_struct *tsk, struct task_group *group * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect * its new group. */ -void sched_move_task(struct task_struct *tsk) +void sched_move_task(struct task_struct *tsk, bool for_autogroup) { int queued, running, queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; @@ -9071,7 +9071,8 @@ void sched_move_task(struct task_struct *tsk) put_prev_task(rq, tsk); sched_change_group(tsk, group); - scx_move_task(tsk); + if (!for_autogroup) + scx_cgroup_move_task(tsk); if (queued) enqueue_task(rq, tsk, queue_flags); @@ -9172,7 +9173,7 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset) struct cgroup_subsys_state *css; cgroup_taskset_for_each(task, css, tset) - sched_move_task(task); + sched_move_task(task, false); scx_cgroup_finish_attach(); } diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7081c7be5f622..c7b159f488343 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4323,24 +4323,11 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset) return ops_sanitize_err("cgroup_prep_move", ret); } -void scx_move_task(struct task_struct *p) +void scx_cgroup_move_task(struct task_struct *p) { if (!scx_cgroup_enabled) return; - /* - * We're called from sched_move_task() which handles both cgroup and - * autogroup moves. Ignore the latter. - * - * Also ignore exiting tasks, because in the exit path tasks transition - * from the autogroup to the root group, so task_group_is_autogroup() - * alone isn't able to catch exiting autogroup tasks. This is safe for - * cgroup_move(), because cgroup migrations never happen for PF_EXITING - * tasks. - */ - if (task_group_is_autogroup(task_group(p)) || (p->flags & PF_EXITING)) - return; - /* * @p must have ops.cgroup_prep_move() called on it and thus * cgrp_moving_from set. diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 4d022d17ac7dd..1079b56b0f7ae 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -73,7 +73,7 @@ static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {} int scx_tg_online(struct task_group *tg); void scx_tg_offline(struct task_group *tg); int scx_cgroup_can_attach(struct cgroup_taskset *tset); -void scx_move_task(struct task_struct *p); +void scx_cgroup_move_task(struct task_struct *p); void scx_cgroup_finish_attach(void); void scx_cgroup_cancel_attach(struct cgroup_taskset *tset); void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); @@ -82,7 +82,7 @@ void scx_group_set_idle(struct task_group *tg, bool idle); static inline int scx_tg_online(struct task_group *tg) { return 0; } static inline void scx_tg_offline(struct task_group *tg) {} static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; } -static inline void scx_move_task(struct task_struct *p) {} +static inline void scx_cgroup_move_task(struct task_struct *p) {} static inline void scx_cgroup_finish_attach(void) {} static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {} static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {} diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 38e0e323dda26..b93c8c3dc05a5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -572,7 +572,7 @@ extern void sched_online_group(struct task_group *tg, extern void sched_destroy_group(struct task_group *tg); extern void sched_release_group(struct task_group *tg); -extern void sched_move_task(struct task_struct *tsk); +extern void sched_move_task(struct task_struct *tsk, bool for_autogroup); #ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); -- GitLab From 5f52bbf2f6e0997394cf9c449d44e1c80ff4282c Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 25 Jan 2025 18:14:12 +0100 Subject: [PATCH 0050/1585] tools/sched_ext: Add helper to check task migration state Introduce a new helper for BPF schedulers to determine whether a task can migrate or not (supporting both SMP and UP systems). Fixes: e9fe182772dc ("sched_ext: selftests/dsp_local_on: Fix sporadic failures") Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/sched_ext/include/scx/common.bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index f3e15e9efa76b..f254a39b86a58 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -404,6 +404,17 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask) return (const struct cpumask *)mask; } +/* + * Return true if task @p cannot migrate to a different CPU, false + * otherwise. + */ +static inline bool is_migration_disabled(const struct task_struct *p) +{ + if (bpf_core_field_exists(p->migration_disabled)) + return p->migration_disabled; + return false; +} + /* rcu */ void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; -- GitLab From 3c7d51b0d29954c40ea3a097e0ec7884b4344331 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 25 Jan 2025 10:36:07 +0100 Subject: [PATCH 0051/1585] sched_ext: selftests/dsp_local_on: Fix selftest on UP systems In UP systems p->migration_disabled is not available. Fix this by using the portable helper is_migration_disabled(p). Fixes: e9fe182772dc ("sched_ext: selftests/dsp_local_on: Fix sporadic failures") Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index 758b479bd1ee1..c02b2aa6fc641 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - if (p->nr_cpus_allowed == nr_cpus && !p->migration_disabled) + if (p->nr_cpus_allowed == nr_cpus && !is_migration_disabled(p)) target = bpf_get_prandom_u32() % nr_cpus; else target = scx_bpf_task_cpu(p); -- GitLab From 1626e5ef0b00386a4fd083fa7c46c8edbd75f9b4 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 27 Jan 2025 23:06:16 +0100 Subject: [PATCH 0052/1585] sched_ext: Fix lock imbalance in dispatch_to_local_dsq() While performing the rq locking dance in dispatch_to_local_dsq(), we may trigger the following lock imbalance condition, in particular when multiple tasks are rapidly changing CPU affinity (i.e., running a `stress-ng --race-sched 0`): [ 13.413579] ===================================== [ 13.413660] WARNING: bad unlock balance detected! [ 13.413729] 6.13.0-virtme #15 Not tainted [ 13.413792] ------------------------------------- [ 13.413859] kworker/1:1/80 is trying to release lock (&rq->__lock) at: [ 13.413954] [] dispatch_to_local_dsq+0x108/0x1a0 [ 13.414111] but there are no more locks to release! [ 13.414176] [ 13.414176] other info that might help us debug this: [ 13.414258] 1 lock held by kworker/1:1/80: [ 13.414318] #0: ffff8b66feb41698 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested+0x20/0x90 [ 13.414612] [ 13.414612] stack backtrace: [ 13.415255] CPU: 1 UID: 0 PID: 80 Comm: kworker/1:1 Not tainted 6.13.0-virtme #15 [ 13.415505] Workqueue: 0x0 (events) [ 13.415567] Sched_ext: dsp_local_on (enabled+all), task: runnable_at=-2ms [ 13.415570] Call Trace: [ 13.415700] [ 13.415744] dump_stack_lvl+0x78/0xe0 [ 13.415806] ? dispatch_to_local_dsq+0x108/0x1a0 [ 13.415884] print_unlock_imbalance_bug+0x11b/0x130 [ 13.415965] ? dispatch_to_local_dsq+0x108/0x1a0 [ 13.416226] lock_release+0x231/0x2c0 [ 13.416326] _raw_spin_unlock+0x1b/0x40 [ 13.416422] dispatch_to_local_dsq+0x108/0x1a0 [ 13.416554] flush_dispatch_buf+0x199/0x1d0 [ 13.416652] balance_one+0x194/0x370 [ 13.416751] balance_scx+0x61/0x1e0 [ 13.416848] prev_balance+0x43/0xb0 [ 13.416947] __pick_next_task+0x6b/0x1b0 [ 13.417052] __schedule+0x20d/0x1740 This happens because dispatch_to_local_dsq() is racing with dispatch_dequeue() and, when the latter wins, we incorrectly assume that the task has been moved to dst_rq. Fix by properly tracking the currently locked rq. Fixes: 4d3ca89bdd31 ("sched_ext: Refactor consume_remote_task()") Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index c7b159f488343..a6d6d6dadde51 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2575,6 +2575,9 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, { struct rq *src_rq = task_rq(p); struct rq *dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq); +#ifdef CONFIG_SMP + struct rq *locked_rq = rq; +#endif /* * We're synchronized against dequeue through DISPATCHING. As @p can't @@ -2611,8 +2614,9 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE); /* switch to @src_rq lock */ - if (rq != src_rq) { - raw_spin_rq_unlock(rq); + if (locked_rq != src_rq) { + raw_spin_rq_unlock(locked_rq); + locked_rq = src_rq; raw_spin_rq_lock(src_rq); } @@ -2630,6 +2634,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, } else { move_remote_task_to_local_dsq(p, enq_flags, src_rq, dst_rq); + /* task has been moved to dst_rq, which is now locked */ + locked_rq = dst_rq; } /* if the destination CPU is idle, wake it up */ @@ -2638,8 +2644,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, } /* switch back to @rq lock */ - if (rq != dst_rq) { - raw_spin_rq_unlock(dst_rq); + if (locked_rq != rq) { + raw_spin_rq_unlock(locked_rq); raw_spin_rq_lock(rq); } #else /* CONFIG_SMP */ -- GitLab From a9ab28b3d21aec6d0f56fe722953e20ce470237b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jan 2025 06:22:58 +0100 Subject: [PATCH 0053/1585] xfs: remove xfs_buf_cache.bc_lock xfs_buf_cache.bc_lock serializes adding buffers to and removing them from the hashtable. But as the rhashtable code already uses fine grained internal locking for inserts and removals the extra protection isn't actually required. It also happens to fix a lock order inversion vs b_lock added by the recent lookup race fix. Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race") Reported-by: Lai, Yi Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Dave Chinner Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 31 +++++++++++++++++-------------- fs/xfs/xfs_buf.h | 1 - 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index f1252ed8bd0a7..ef207784876c8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache; * * xfs_buf_rele: * b_lock - * pag_buf_lock - * lru_lock + * lru_lock * * xfs_buftarg_drain_rele * lru_lock @@ -220,14 +219,21 @@ _xfs_buf_alloc( */ flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); - spin_lock_init(&bp->b_lock); + /* + * A new buffer is held and locked by the owner. This ensures that the + * buffer is owned by the caller and racing RCU lookups right after + * inserting into the hash table are safe (and will have to wait for + * the unlock to do anything non-trivial). + */ bp->b_hold = 1; + sema_init(&bp->b_sema, 0); /* held, no waiters */ + + spin_lock_init(&bp->b_lock); atomic_set(&bp->b_lru_ref, 1); init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_lru); INIT_LIST_HEAD(&bp->b_list); INIT_LIST_HEAD(&bp->b_li_list); - sema_init(&bp->b_sema, 0); /* held, no waiters */ bp->b_target = target; bp->b_mount = target->bt_mount; bp->b_flags = flags; @@ -497,7 +503,6 @@ int xfs_buf_cache_init( struct xfs_buf_cache *bch) { - spin_lock_init(&bch->bc_lock); return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); } @@ -647,17 +652,20 @@ xfs_buf_find_insert( if (error) goto out_free_buf; - spin_lock(&bch->bc_lock); + /* The new buffer keeps the perag reference until it is freed. */ + new_bp->b_pag = pag; + + rcu_read_lock(); bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, &new_bp->b_rhash_head, xfs_buf_hash_params); if (IS_ERR(bp)) { + rcu_read_unlock(); error = PTR_ERR(bp); - spin_unlock(&bch->bc_lock); goto out_free_buf; } if (bp && xfs_buf_try_hold(bp)) { /* found an existing buffer */ - spin_unlock(&bch->bc_lock); + rcu_read_unlock(); error = xfs_buf_find_lock(bp, flags); if (error) xfs_buf_rele(bp); @@ -665,10 +673,8 @@ xfs_buf_find_insert( *bpp = bp; goto out_free_buf; } + rcu_read_unlock(); - /* The new buffer keeps the perag reference until it is freed. */ - new_bp->b_pag = pag; - spin_unlock(&bch->bc_lock); *bpp = new_bp; return 0; @@ -1085,7 +1091,6 @@ xfs_buf_rele_cached( } /* we are asked to drop the last reference */ - spin_lock(&bch->bc_lock); __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* @@ -1097,7 +1102,6 @@ xfs_buf_rele_cached( bp->b_state &= ~XFS_BSTATE_DISPOSE; else bp->b_hold--; - spin_unlock(&bch->bc_lock); } else { bp->b_hold--; /* @@ -1115,7 +1119,6 @@ xfs_buf_rele_cached( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head, xfs_buf_hash_params); - spin_unlock(&bch->bc_lock); if (pag) xfs_perag_put(pag); freebuf = true; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7e73663c5d4a5..3b4ed42e11c01 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t; #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ struct xfs_buf_cache { - spinlock_t bc_lock; struct rhashtable bc_hash; }; -- GitLab From fd39c41bcd82d5ebaaebadb944eab5598c668a90 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 27 Jan 2025 14:44:14 +0100 Subject: [PATCH 0054/1585] drm/ast: astdp: Fix timeout for enabling video signal The ASTDP transmitter sometimes takes up to 1 second for enabling the video signal, while the timeout is only 200 msec. This results in a kernel error message. Increase the timeout to 1 second. An example of the error message is shown below. [ 697.084433] ------------[ cut here ]------------ [ 697.091115] ast 0000:02:00.0: [drm] drm_WARN_ON(!__ast_dp_wait_enable(ast, enabled)) [ 697.091233] WARNING: CPU: 1 PID: 160 at drivers/gpu/drm/ast/ast_dp.c:232 ast_dp_set_enable+0x123/0x140 [ast] [...] [ 697.272469] RIP: 0010:ast_dp_set_enable+0x123/0x140 [ast] [...] [ 697.415283] Call Trace: [ 697.420727] [ 697.425908] ? show_trace_log_lvl+0x196/0x2c0 [ 697.433304] ? show_trace_log_lvl+0x196/0x2c0 [ 697.440693] ? drm_atomic_helper_commit_modeset_enables+0x30a/0x470 [ 697.450115] ? ast_dp_set_enable+0x123/0x140 [ast] [ 697.458059] ? __warn.cold+0xaf/0xca [ 697.464713] ? ast_dp_set_enable+0x123/0x140 [ast] [ 697.472633] ? report_bug+0x134/0x1d0 [ 697.479544] ? handle_bug+0x58/0x90 [ 697.486127] ? exc_invalid_op+0x13/0x40 [ 697.492975] ? asm_exc_invalid_op+0x16/0x20 [ 697.500224] ? preempt_count_sub+0x14/0xc0 [ 697.507473] ? ast_dp_set_enable+0x123/0x140 [ast] [ 697.515377] ? ast_dp_set_enable+0x123/0x140 [ast] [ 697.523227] drm_atomic_helper_commit_modeset_enables+0x30a/0x470 [ 697.532388] drm_atomic_helper_commit_tail+0x58/0x90 [ 697.540400] ast_mode_config_helper_atomic_commit_tail+0x30/0x40 [ast] [ 697.550009] commit_tail+0xfe/0x1d0 [ 697.556547] drm_atomic_helper_commit+0x198/0x1c0 This is a cosmetical problem. Enabling the video signal still works even with the error message. The problem has always been present, but only recent versions of the ast driver warn about missing the timeout. Signed-off-by: Thomas Zimmermann Fixes: 4e29cc7c5c67 ("drm/ast: astdp: Replace ast_dp_set_on_off()") Cc: Thomas Zimmermann Cc: Jocelyn Falempe Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: # v6.13+ Reviewed-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20250127134423.84266-1-tzimmermann@suse.de --- drivers/gpu/drm/ast/ast_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 0e282b7b167c6..b9eb67e3fa90e 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -195,7 +195,7 @@ static bool __ast_dp_wait_enable(struct ast_device *ast, bool enabled) if (enabled) vgacrdf_test |= AST_IO_VGACRDF_DP_VIDEO_ENABLE; - for (i = 0; i < 200; ++i) { + for (i = 0; i < 1000; ++i) { if (i) mdelay(1); vgacrdf = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xdf, -- GitLab From 7bf6b497a747b0e28a411beacdd62f1488d0781c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jan 2025 08:55:33 +0100 Subject: [PATCH 0055/1585] nvmet: the result field in nvmet_alloc_ctrl_args is little endian So use the __le32 type for it. Fixes: 6202783184bf ("nvmet: Improve nvmet_alloc_ctrl() interface and implementation") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/nvmet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f4df458df9dbb..6a9af4e4d7325 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -582,7 +582,7 @@ struct nvmet_alloc_ctrl_args { const struct nvmet_fabrics_ops *ops; struct device *p2p_client; u32 kato; - u32 result; + __le32 result; u16 error_loc; u16 status; }; -- GitLab From cc3d4671a0db9499b201c43faba6c46e1a21274c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jan 2025 08:55:34 +0100 Subject: [PATCH 0056/1585] nvmet: add a missing endianess conversion in nvmet_execute_admin_connect The kato field is little endian on the wire, but native endian in the in-core structure, add the missing byte swap. Fixes: 6202783184bf ("nvmet: Improve nvmet_alloc_ctrl() interface and implementation") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index a7ff05b3be29f..eb406c90c1679 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -287,7 +287,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) args.subsysnqn = d->subsysnqn; args.hostnqn = d->hostnqn; args.hostid = &d->hostid; - args.kato = c->kato; + args.kato = le32_to_cpu(c->kato); ctrl = nvmet_alloc_ctrl(&args); if (!ctrl) -- GitLab From 0e9724d0f89e8d77fa683e3129cadaed7c6e609d Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Mon, 20 Jan 2025 16:50:47 +0000 Subject: [PATCH 0057/1585] wifi: brcmfmac: use random seed flag for BCM4355 and BCM4364 firmware Before 6.13, random seed to the firmware was given based on the logic whether the device had valid OTP or not, and such devices were found mainly on the T2 and Apple Silicon Macs. In 6.13, the logic was changed, and the device table was used for this purpose, so as to cover the special case of BCM43752 chip. During the transition, the device table for BCM4364 and BCM4355 Wi-Fi chips which had valid OTP was not modified, thus breaking Wi-Fi on these devices. This patch adds does the necessary changes, similar to the ones done for other chips. Fixes: ea11a89c3ac6 ("wifi: brcmfmac: add flag for random seed during firmware download") Cc: stable@vger.kernel.org Signed-off-by: Aditya Garg Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://patch.msgid.link/47E43F07-E11D-478C-86D4-23627154AC7C@live.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index e4395b1f8c11e..d2caa80e94123 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -2712,7 +2712,7 @@ static const struct pci_device_id brcmf_pcie_devid_table[] = { BRCMF_PCIE_DEVICE(BRCM_PCIE_4350_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE_SUB(0x4355, BRCM_PCIE_VENDOR_ID_BROADCOM, 0x4355, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_4354_RAW_DEVICE_ID, WCC), - BRCMF_PCIE_DEVICE(BRCM_PCIE_4355_DEVICE_ID, WCC), + BRCMF_PCIE_DEVICE(BRCM_PCIE_4355_DEVICE_ID, WCC_SEED), BRCMF_PCIE_DEVICE(BRCM_PCIE_4356_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_43567_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_43570_DEVICE_ID, WCC), @@ -2723,7 +2723,7 @@ static const struct pci_device_id brcmf_pcie_devid_table[] = { BRCMF_PCIE_DEVICE(BRCM_PCIE_43602_2G_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_43602_5G_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_43602_RAW_DEVICE_ID, WCC), - BRCMF_PCIE_DEVICE(BRCM_PCIE_4364_DEVICE_ID, WCC), + BRCMF_PCIE_DEVICE(BRCM_PCIE_4364_DEVICE_ID, WCC_SEED), BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_DEVICE_ID, BCA), BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_2G_DEVICE_ID, BCA), BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_5G_DEVICE_ID, BCA), -- GitLab From f4c9c2cc827d803159730b1da813a0c595969831 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Tue, 28 Jan 2025 16:11:06 +0100 Subject: [PATCH 0058/1585] batman-adv: Fix incorrect offset in batadv_tt_tvlv_ogm_handler_v1() Since commit 4436df478860 ("batman-adv: Add flex array to struct batadv_tvlv_tt_data"), the introduction of batadv_tvlv_tt_data's flex array member in batadv_tt_tvlv_ogm_handler_v1() put tt_changes at invalid offset. Those TT changes are supposed to be filled from the end of batadv_tvlv_tt_data structure (including vlan_data flexible array), but only the flex array size is taken into account missing completely the size of the fixed part of the structure itself. Fix the tt_change offset computation by using struct_size() instead of flex_array_size() so both flex array member and its container structure sizes are taken into account. Cc: stable@vger.kernel.org Fixes: 4436df478860 ("batman-adv: Add flex array to struct batadv_tvlv_tt_data") Signed-off-by: Remi Pommarel Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 760d51fdbdf60..7d5de4cbb814f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3959,23 +3959,21 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tt_data; u16 num_entries, num_vlan; - size_t flex_size; + size_t tt_data_sz; if (tvlv_value_len < sizeof(*tt_data)) return; tt_data = tvlv_value; - tvlv_value_len -= sizeof(*tt_data); - num_vlan = ntohs(tt_data->num_vlan); - flex_size = flex_array_size(tt_data, vlan_data, num_vlan); - if (tvlv_value_len < flex_size) + tt_data_sz = struct_size(tt_data, vlan_data, num_vlan); + if (tvlv_value_len < tt_data_sz) return; tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data - + flex_size); - tvlv_value_len -= flex_size; + + tt_data_sz); + tvlv_value_len -= tt_data_sz; num_entries = batadv_tt_entries(tvlv_value_len); -- GitLab From b9a49520679e98700d3d89689cc91c08a1c88c1d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 19 Jan 2025 00:55:32 +0100 Subject: [PATCH 0059/1585] rcuref: Plug slowpath race in rcuref_put() Kernel test robot reported an "imbalanced put" in the rcuref_put() slow path, which turned out to be a false positive. Consider the following race: ref = 0 (via rcuref_init(ref, 1)) T1 T2 rcuref_put(ref) -> atomic_add_negative_release(-1, ref) # ref -> 0xffffffff -> rcuref_put_slowpath(ref) rcuref_get(ref) -> atomic_add_negative_relaxed(1, &ref->refcnt) -> return true; # ref -> 0 rcuref_put(ref) -> atomic_add_negative_release(-1, ref) # ref -> 0xffffffff -> rcuref_put_slowpath() -> cnt = atomic_read(&ref->refcnt); # cnt -> 0xffffffff / RCUREF_NOREF -> atomic_try_cmpxchg_release(&ref->refcnt, &cnt, RCUREF_DEAD)) # ref -> 0xe0000000 / RCUREF_DEAD -> return true -> cnt = atomic_read(&ref->refcnt); # cnt -> 0xe0000000 / RCUREF_DEAD -> if (cnt > RCUREF_RELEASED) # 0xe0000000 > 0xc0000000 -> WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()") The problem is the additional read in the slow path (after it decremented to RCUREF_NOREF) which can happen after the counter has been marked RCUREF_DEAD. Prevent this by reusing the return value of the decrement. Now every "final" put uses RCUREF_NOREF in the slow path and attempts the final cmpxchg() to RCUREF_DEAD. [ bigeasy: Add changelog ] Fixes: ee1ee6db07795 ("atomics: Provide rcuref - scalable reference counting") Reported-by: kernel test robot Debugged-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Sebastian Andrzej Siewior Cc: stable@vger.kernel.org Closes: https://lore.kernel.org/oe-lkp/202412311453.9d7636a2-lkp@intel.com --- include/linux/rcuref.h | 9 ++++++--- lib/rcuref.c | 5 ++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h index 2c8bfd0f1b6b3..6322d8c1c6b42 100644 --- a/include/linux/rcuref.h +++ b/include/linux/rcuref.h @@ -71,27 +71,30 @@ static inline __must_check bool rcuref_get(rcuref_t *ref) return rcuref_get_slowpath(ref); } -extern __must_check bool rcuref_put_slowpath(rcuref_t *ref); +extern __must_check bool rcuref_put_slowpath(rcuref_t *ref, unsigned int cnt); /* * Internal helper. Do not invoke directly. */ static __always_inline __must_check bool __rcuref_put(rcuref_t *ref) { + int cnt; + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(), "suspicious rcuref_put_rcusafe() usage"); /* * Unconditionally decrease the reference count. The saturation and * dead zones provide enough tolerance for this. */ - if (likely(!atomic_add_negative_release(-1, &ref->refcnt))) + cnt = atomic_sub_return_release(1, &ref->refcnt); + if (likely(cnt >= 0)) return false; /* * Handle the last reference drop and cases inside the saturation * and dead zones. */ - return rcuref_put_slowpath(ref); + return rcuref_put_slowpath(ref, cnt); } /** diff --git a/lib/rcuref.c b/lib/rcuref.c index 97f300eca927c..5bd726b71e393 100644 --- a/lib/rcuref.c +++ b/lib/rcuref.c @@ -220,6 +220,7 @@ EXPORT_SYMBOL_GPL(rcuref_get_slowpath); /** * rcuref_put_slowpath - Slowpath of __rcuref_put() * @ref: Pointer to the reference count + * @cnt: The resulting value of the fastpath decrement * * Invoked when the reference count is outside of the valid zone. * @@ -233,10 +234,8 @@ EXPORT_SYMBOL_GPL(rcuref_get_slowpath); * with a concurrent get()/put() pair. Caller is not allowed to * deconstruct the protected object. */ -bool rcuref_put_slowpath(rcuref_t *ref) +bool rcuref_put_slowpath(rcuref_t *ref, unsigned int cnt) { - unsigned int cnt = atomic_read(&ref->refcnt); - /* Did this drop the last reference? */ if (likely(cnt == RCUREF_NOREF)) { /* -- GitLab From 7332537962956fab2c055b37e5e2e6a0d2a8d6bf Mon Sep 17 00:00:00 2001 From: Jared Kangas Date: Tue, 21 Jan 2025 06:25:04 -0800 Subject: [PATCH 0060/1585] bpf: Remove unnecessary BTF lookups in bpf_sk_storage_tracing_allowed When loading BPF programs, bpf_sk_storage_tracing_allowed() does a series of lookups to get a type name from the program's attach_btf_id, making the assumption that the type is present in the vmlinux BTF along the way. However, this results in btf_type_by_id() returning a null pointer if a non-vmlinux kernel BTF is attached to. Proof-of-concept on a kernel with CONFIG_IPV6=m: $ cat bpfcrash.c #include #include #include static int bpf(enum bpf_cmd cmd, union bpf_attr *attr) { return syscall(__NR_bpf, cmd, attr, sizeof(*attr)); } int main(void) { const int btf_fd = bpf(BPF_BTF_GET_FD_BY_ID, &(union bpf_attr) { .btf_id = BTF_ID, }); if (btf_fd < 0) return 1; const int bpf_sk_storage_get = 107; const struct bpf_insn insns[] = { { .code = BPF_JMP | BPF_CALL, .imm = bpf_sk_storage_get}, { .code = BPF_JMP | BPF_EXIT }, }; return bpf(BPF_PROG_LOAD, &(union bpf_attr) { .prog_type = BPF_PROG_TYPE_TRACING, .expected_attach_type = BPF_TRACE_FENTRY, .license = (unsigned long)"GPL", .insns = (unsigned long)&insns, .insn_cnt = sizeof(insns) / sizeof(insns[0]), .attach_btf_obj_fd = btf_fd, .attach_btf_id = TYPE_ID, }); } $ sudo bpftool btf list | grep ipv6 2: name [ipv6] size 928200B $ sudo bpftool btf dump id 2 | awk '$3 ~ /inet6_sock_destruct/' [130689] FUNC 'inet6_sock_destruct' type_id=130677 linkage=static $ gcc -D_DEFAULT_SOURCE -DBTF_ID=2 -DTYPE_ID=130689 \ bpfcrash.c -o bpfcrash $ sudo ./bpfcrash This causes a null pointer dereference: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Call trace: bpf_sk_storage_tracing_allowed+0x8c/0xb0 P check_helper_call.isra.0+0xa8/0x1730 do_check+0xa18/0xb40 do_check_common+0x140/0x640 bpf_check+0xb74/0xcb8 bpf_prog_load+0x598/0x9a8 __sys_bpf+0x580/0x980 __arm64_sys_bpf+0x28/0x40 invoke_syscall.constprop.0+0x54/0xe8 do_el0_svc+0xb4/0xd0 el0_svc+0x44/0x1f8 el0t_64_sync_handler+0x13c/0x160 el0t_64_sync+0x184/0x188 Resolve this by using prog->aux->attach_func_name and removing the lookups. Fixes: 8e4597c627fb ("bpf: Allow using bpf_sk_storage in FENTRY/FEXIT/RAW_TP") Suggested-by: Martin KaFai Lau Signed-off-by: Jared Kangas Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250121142504.1369436-1-jkangas@redhat.com Signed-off-by: Alexei Starovoitov --- net/core/bpf_sk_storage.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 7d41cde1bcca6..2e538399757fe 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -355,11 +355,6 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = { static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) { - const struct btf *btf_vmlinux; - const struct btf_type *t; - const char *tname; - u32 btf_id; - if (prog->aux->dst_prog) return false; @@ -374,13 +369,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) return true; case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: - btf_vmlinux = bpf_get_btf_vmlinux(); - if (IS_ERR_OR_NULL(btf_vmlinux)) - return false; - btf_id = prog->aux->attach_btf_id; - t = btf_type_by_id(btf_vmlinux, btf_id); - tname = btf_name_by_offset(btf_vmlinux, t->name_off); - return !!strncmp(tname, "bpf_sk_storage", + return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage", strlen("bpf_sk_storage")); default: return false; -- GitLab From 6b3d638ca897e099fa99bd6d02189d3176f80a47 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Wed, 22 Jan 2025 00:06:42 +0900 Subject: [PATCH 0061/1585] bpf, test_run: Fix use-after-free issue in eth_skb_pkt_type() KMSAN reported a use-after-free issue in eth_skb_pkt_type()[1]. The cause of the issue was that eth_skb_pkt_type() accessed skb's data that didn't contain an Ethernet header. This occurs when bpf_prog_test_run_xdp() passes an invalid value as the user_data argument to bpf_test_init(). Fix this by returning an error when user_data is less than ETH_HLEN in bpf_test_init(). Additionally, remove the check for "if (user_size > size)" as it is unnecessary. [1] BUG: KMSAN: use-after-free in eth_skb_pkt_type include/linux/etherdevice.h:627 [inline] BUG: KMSAN: use-after-free in eth_type_trans+0x4ee/0x980 net/ethernet/eth.c:165 eth_skb_pkt_type include/linux/etherdevice.h:627 [inline] eth_type_trans+0x4ee/0x980 net/ethernet/eth.c:165 __xdp_build_skb_from_frame+0x5a8/0xa50 net/core/xdp.c:635 xdp_recv_frames net/bpf/test_run.c:272 [inline] xdp_test_run_batch net/bpf/test_run.c:361 [inline] bpf_test_run_xdp_live+0x2954/0x3330 net/bpf/test_run.c:390 bpf_prog_test_run_xdp+0x148e/0x1b10 net/bpf/test_run.c:1318 bpf_prog_test_run+0x5b7/0xa30 kernel/bpf/syscall.c:4371 __sys_bpf+0x6a6/0xe20 kernel/bpf/syscall.c:5777 __do_sys_bpf kernel/bpf/syscall.c:5866 [inline] __se_sys_bpf kernel/bpf/syscall.c:5864 [inline] __x64_sys_bpf+0xa4/0xf0 kernel/bpf/syscall.c:5864 x64_sys_call+0x2ea0/0x3d90 arch/x86/include/generated/asm/syscalls_64.h:322 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd9/0x1d0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: free_pages_prepare mm/page_alloc.c:1056 [inline] free_unref_page+0x156/0x1320 mm/page_alloc.c:2657 __free_pages+0xa3/0x1b0 mm/page_alloc.c:4838 bpf_ringbuf_free kernel/bpf/ringbuf.c:226 [inline] ringbuf_map_free+0xff/0x1e0 kernel/bpf/ringbuf.c:235 bpf_map_free kernel/bpf/syscall.c:838 [inline] bpf_map_free_deferred+0x17c/0x310 kernel/bpf/syscall.c:862 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa2b/0x1b60 kernel/workqueue.c:3310 worker_thread+0xedf/0x1550 kernel/workqueue.c:3391 kthread+0x535/0x6b0 kernel/kthread.c:389 ret_from_fork+0x6e/0x90 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 CPU: 1 UID: 0 PID: 17276 Comm: syz.1.16450 Not tainted 6.12.0-05490-g9bb88c659673 #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 Fixes: be3d72a2896c ("bpf: move user_size out of bpf_test_init") Reported-by: syzkaller Suggested-by: Martin KaFai Lau Signed-off-by: Shigeru Yoshida Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev Acked-by: Daniel Borkmann Link: https://patch.msgid.link/20250121150643.671650-1-syoshida@redhat.com Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 8f6f7db48d4e4..7cb192cbd65f3 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -660,12 +660,9 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, void __user *data_in = u64_to_user_ptr(kattr->test.data_in); void *data; - if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) + if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom) return ERR_PTR(-EINVAL); - if (user_size > size) - return ERR_PTR(-EMSGSIZE); - size = SKB_DATA_ALIGN(size); data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) -- GitLab From c7f2188d68c114095660a950b7e880a1e5a71c8f Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Wed, 22 Jan 2025 00:06:43 +0900 Subject: [PATCH 0062/1585] selftests/bpf: Adjust data size to have ETH_HLEN The function bpf_test_init() now returns an error if user_size (.data_size_in) is less than ETH_HLEN, causing the tests to fail. Adjust the data size to ensure it meets the requirement of ETH_HLEN. Signed-off-by: Shigeru Yoshida Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250121150643.671650-2-syoshida@redhat.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c | 4 ++-- .../testing/selftests/bpf/prog_tests/xdp_devmap_attach.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index c7f74f068e788..df27535995af8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -52,10 +52,10 @@ static void test_xdp_with_cpumap_helpers(void) ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); /* send a packet to trigger any potential bugs in there */ - char data[10] = {}; + char data[ETH_HLEN] = {}; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 27ffed17d4be3..461ab18705d5c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -23,7 +23,7 @@ static void test_xdp_with_devmap_helpers(void) __u32 len = sizeof(info); int err, dm_fd, dm_fd_redir, map_fd; struct nstoken *nstoken = NULL; - char data[10] = {}; + char data[ETH_HLEN] = {}; __u32 idx = 0; SYS(out_close, "ip netns add %s", TEST_NS); @@ -58,7 +58,7 @@ static void test_xdp_with_devmap_helpers(void) /* send a packet to trigger any potential bugs in there */ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); @@ -158,7 +158,7 @@ static void test_xdp_with_devmap_helpers_veth(void) struct nstoken *nstoken = NULL; __u32 len = sizeof(info); int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst; - char data[10] = {}; + char data[ETH_HLEN] = {}; __u32 idx = 0; SYS(out_close, "ip netns add %s", TEST_NS); @@ -208,7 +208,7 @@ static void test_xdp_with_devmap_helpers_veth(void) /* send a packet to trigger any potential bugs in there */ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); -- GitLab From 98671a0fd1f14e4a518ee06b19037c20014900eb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Jan 2025 17:22:45 -0800 Subject: [PATCH 0063/1585] bpf: unify VM_WRITE vs VM_MAYWRITE use in BPF map mmaping logic For all BPF maps we ensure that VM_MAYWRITE is cleared when memory-mapping BPF map contents as initially read-only VMA. This is because in some cases BPF verifier relies on the underlying data to not be modified afterwards by user space, so once something is mapped read-only, it shouldn't be re-mmap'ed as read-write. As such, it's not necessary to check VM_MAYWRITE in bpf_map_mmap() and map->ops->map_mmap() callbacks: VM_WRITE should be consistently set for read-write mappings, and if VM_WRITE is not set, there is no way for user space to upgrade read-only mapping to read-write one. This patch cleans up this VM_WRITE vs VM_MAYWRITE handling within bpf_map_mmap(), which is an entry point for any BPF map mmap()-ing logic. We also drop unnecessary sanitization of VM_MAYWRITE in BPF ringbuf's map_mmap() callback implementation, as it is already performed by common code in bpf_map_mmap(). Note, though, that in bpf_map_mmap_{open,close}() callbacks we can't drop VM_MAYWRITE use, because it's possible (and is outside of subsystem's control) to have initially read-write memory mapping, which is subsequently dropped to read-only by user space through mprotect(). In such case, from BPF verifier POV it's read-write data throughout the lifetime of BPF map, and is counted as "active writer". But its VMAs will start out as VM_WRITE|VM_MAYWRITE, then mprotect() can change it to just VM_MAYWRITE (and no VM_WRITE), so when its finally munmap()'ed and bpf_map_mmap_close() is called, vm_flags will be just VM_MAYWRITE, but we still need to decrement active writer count with bpf_map_write_active_dec() as it's still considered to be a read-write mapping by the rest of BPF subsystem. Similar reasoning applies to bpf_map_mmap_open(), which is called whenever mmap(), munmap(), and/or mprotect() forces mm subsystem to split original VMA into multiple discontiguous VMAs. Memory-mapping handling is a bit tricky, yes. Cc: Jann Horn Cc: Suren Baghdasaryan Cc: Shakeel Butt Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20250129012246.1515826-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/ringbuf.c | 4 ---- kernel/bpf/syscall.c | 10 ++++++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index e1cfe890e0be6..1499d8caa9a35 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -268,8 +268,6 @@ static int ringbuf_map_mmap_kern(struct bpf_map *map, struct vm_area_struct *vma /* allow writable mapping for the consumer_pos only */ if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE) return -EPERM; - } else { - vm_flags_clear(vma, VM_MAYWRITE); } /* remap_vmalloc_range() checks size and offset constraints */ return remap_vmalloc_range(vma, rb_map->rb, @@ -289,8 +287,6 @@ static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma * position, and the ring buffer data itself. */ return -EPERM; - } else { - vm_flags_clear(vma, VM_MAYWRITE); } /* remap_vmalloc_range() checks size and offset constraints */ return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0daf098e32074..9bec3dce421f0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1065,15 +1065,21 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &bpf_map_default_vmops; vma->vm_private_data = map; vm_flags_clear(vma, VM_MAYEXEC); + /* If mapping is read-only, then disallow potentially re-mapping with + * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing + * means that as far as BPF map's memory-mapped VMAs are concerned, + * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set, + * both should be set, so we can forget about VM_MAYWRITE and always + * check just VM_WRITE + */ if (!(vma->vm_flags & VM_WRITE)) - /* disallow re-mapping with PROT_WRITE */ vm_flags_clear(vma, VM_MAYWRITE); err = map->ops->map_mmap(map, vma); if (err) goto out; - if (vma->vm_flags & VM_MAYWRITE) + if (vma->vm_flags & VM_WRITE) bpf_map_write_active_inc(map); out: mutex_unlock(&map->freeze_mutex); -- GitLab From bc27c52eea189e8f7492d40739b7746d67b65beb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Jan 2025 17:22:46 -0800 Subject: [PATCH 0064/1585] bpf: avoid holding freeze_mutex during mmap operation We use map->freeze_mutex to prevent races between map_freeze() and memory mapping BPF map contents with writable permissions. The way we naively do this means we'll hold freeze_mutex for entire duration of all the mm and VMA manipulations, which is completely unnecessary. This can potentially also lead to deadlocks, as reported by syzbot in [0]. So, instead, hold freeze_mutex only during writeability checks, bump (proactively) "write active" count for the map, unlock the mutex and proceed with mmap logic. And only if something went wrong during mmap logic, then undo that "write active" counter increment. [0] https://lore.kernel.org/bpf/678dcbc9.050a0220.303755.0066.GAE@google.com/ Fixes: fc9702273e2e ("bpf: Add mmap() support for BPF_MAP_TYPE_ARRAY") Reported-by: syzbot+4dc041c686b7c816a71e@syzkaller.appspotmail.com Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20250129012246.1515826-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9bec3dce421f0..14d6e99459d32 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1035,7 +1035,7 @@ static const struct vm_operations_struct bpf_map_default_vmops = { static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) { struct bpf_map *map = filp->private_data; - int err; + int err = 0; if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) return -ENOTSUPP; @@ -1059,7 +1059,12 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) err = -EACCES; goto out; } + bpf_map_write_active_inc(map); } +out: + mutex_unlock(&map->freeze_mutex); + if (err) + return err; /* set default open/close callbacks */ vma->vm_ops = &bpf_map_default_vmops; @@ -1076,13 +1081,11 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) vm_flags_clear(vma, VM_MAYWRITE); err = map->ops->map_mmap(map, vma); - if (err) - goto out; + if (err) { + if (vma->vm_flags & VM_WRITE) + bpf_map_write_active_dec(map); + } - if (vma->vm_flags & VM_WRITE) - bpf_map_write_active_inc(map); -out: - mutex_unlock(&map->freeze_mutex); return err; } -- GitLab From e1e17a1715982201034024863efbf238bee2bdf9 Mon Sep 17 00:00:00 2001 From: Prasad Pandit Date: Mon, 11 Mar 2024 16:21:22 +0530 Subject: [PATCH 0065/1585] firmware: iscsi_ibft: fix ISCSI_IBFT Kconfig entry Fix ISCSI_IBFT Kconfig entry, replace tab with a space character. Fixes: 138fe4e0697 ("Firmware: add iSCSI iBFT Support") Signed-off-by: Prasad Pandit Signed-off-by: Konrad Rzeszutek Wilk --- drivers/firmware/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 71d8b26c4103b..9f35f69e0f9e2 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -106,7 +106,7 @@ config ISCSI_IBFT select ISCSI_BOOT_SYSFS select ISCSI_IBFT_FIND if X86 depends on ACPI && SCSI && SCSI_LOWLEVEL - default n + default n help This option enables support for detection and exposing of iSCSI Boot Firmware Table (iBFT) via sysfs to userspace. If you wish to -- GitLab From 07e0d99a2f701123ad3104c0f1a1e66bce74d6e5 Mon Sep 17 00:00:00 2001 From: Chengen Du Date: Tue, 14 Jan 2025 12:12:34 +0800 Subject: [PATCH 0066/1585] iscsi_ibft: Fix UBSAN shift-out-of-bounds warning in ibft_attr_show_nic() When performing an iSCSI boot using IPv6, iscsistart still reads the /sys/firmware/ibft/ethernetX/subnet-mask entry. Since the IPv6 prefix length is 64, this causes the shift exponent to become negative, triggering a UBSAN warning. As the concept of a subnet mask does not apply to IPv6, the value is set to ~0 to suppress the warning message. Signed-off-by: Chengen Du Signed-off-by: Konrad Rzeszutek Wilk --- drivers/firmware/iscsi_ibft.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 6e9788324fea5..371f24569b3b2 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -310,7 +310,10 @@ static ssize_t ibft_attr_show_nic(void *data, int type, char *buf) str += sprintf_ipaddr(str, nic->ip_addr); break; case ISCSI_BOOT_ETH_SUBNET_MASK: - val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1)); + if (nic->subnet_mask_prefix > 32) + val = cpu_to_be32(~0); + else + val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1)); str += sprintf(str, "%pI4", &val); break; case ISCSI_BOOT_ETH_PREFIX_LEN: -- GitLab From 79fc672a092d93a7eac24fe20a571d4efd8fa5a4 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Thu, 19 Dec 2024 17:02:56 +0800 Subject: [PATCH 0067/1585] drm/komeda: Add check for komeda_get_layer_fourcc_list() Add check for the return value of komeda_get_layer_fourcc_list() to catch the potential exception. Fixes: 5d51f6c0da1b ("drm/komeda: Add writeback support") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Acked-by: Liviu Dudau Link: https://lore.kernel.org/r/20241219090256.146424-1-haoxiang_li2024@163.com Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index ebccb74306a76..f30b3d5eeca5c 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -160,6 +160,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms, formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl, kwb_conn->wb_layer->layer_type, &n_formats); + if (!formats) { + kfree(kwb_conn); + return -ENOMEM; + } err = drm_writeback_connector_init(&kms->base, wb_conn, &komeda_wb_connector_funcs, -- GitLab From 0532a79efd68a4d9686b0385e4993af4b130ff82 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 22 Jan 2025 18:09:13 +0800 Subject: [PATCH 0068/1585] strparser: Add read_sock callback Added a new read_sock handler, allowing users to customize read operations instead of relying on the native socket's read_sock. Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://patch.msgid.link/20250122100917.49845-2-mrpre@163.com --- Documentation/networking/strparser.rst | 9 ++++++++- include/net/strparser.h | 2 ++ net/strparser/strparser.c | 11 +++++++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/strparser.rst b/Documentation/networking/strparser.rst index 6cab1f74ae05a..7f623d1db72aa 100644 --- a/Documentation/networking/strparser.rst +++ b/Documentation/networking/strparser.rst @@ -112,7 +112,7 @@ Functions Callbacks ========= -There are six callbacks: +There are seven callbacks: :: @@ -182,6 +182,13 @@ There are six callbacks: the length of the message. skb->len - offset may be greater then full_len since strparser does not trim the skb. + :: + + int (*read_sock)(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); + + The read_sock callback is used by strparser instead of + sock->ops->read_sock, if provided. :: int (*read_sock_done)(struct strparser *strp, int err); diff --git a/include/net/strparser.h b/include/net/strparser.h index 41e2ce9e9e10f..0a83010b3a64a 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -43,6 +43,8 @@ struct strparser; struct strp_callbacks { int (*parse_msg)(struct strparser *strp, struct sk_buff *skb); void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); + int (*read_sock)(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); int (*read_sock_done)(struct strparser *strp, int err); void (*abort_parser)(struct strparser *strp, int err); void (*lock)(struct strparser *strp); diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 8299ceb3e3739..95696f42647ec 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -347,7 +347,10 @@ static int strp_read_sock(struct strparser *strp) struct socket *sock = strp->sk->sk_socket; read_descriptor_t desc; - if (unlikely(!sock || !sock->ops || !sock->ops->read_sock)) + if (unlikely(!sock || !sock->ops)) + return -EBUSY; + + if (unlikely(!strp->cb.read_sock && !sock->ops->read_sock)) return -EBUSY; desc.arg.data = strp; @@ -355,7 +358,10 @@ static int strp_read_sock(struct strparser *strp) desc.count = 1; /* give more than one skb per call */ /* sk should be locked here, so okay to do read_sock */ - sock->ops->read_sock(strp->sk, &desc, strp_recv); + if (strp->cb.read_sock) + strp->cb.read_sock(strp, &desc, strp_recv); + else + sock->ops->read_sock(strp->sk, &desc, strp_recv); desc.error = strp->cb.read_sock_done(strp, desc.error); @@ -468,6 +474,7 @@ int strp_init(struct strparser *strp, struct sock *sk, strp->cb.unlock = cb->unlock ? : strp_sock_unlock; strp->cb.rcv_msg = cb->rcv_msg; strp->cb.parse_msg = cb->parse_msg; + strp->cb.read_sock = cb->read_sock; strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp; -- GitLab From 36b62df5683c315ba58c950f1a9c771c796c30ec Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 22 Jan 2025 18:09:14 +0800 Subject: [PATCH 0069/1585] bpf: Fix wrong copied_seq calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'sk->copied_seq' was updated in the tcp_eat_skb() function when the action of a BPF program was SK_REDIRECT. For other actions, like SK_PASS, the update logic for 'sk->copied_seq' was moved to tcp_bpf_recvmsg_parser() to ensure the accuracy of the 'fionread' feature. It works for a single stream_verdict scenario, as it also modified sk_data_ready->sk_psock_verdict_data_ready->tcp_read_skb to remove updating 'sk->copied_seq'. However, for programs where both stream_parser and stream_verdict are active (strparser purpose), tcp_read_sock() was used instead of tcp_read_skb() (sk_data_ready->strp_data_ready->tcp_read_sock). tcp_read_sock() now still updates 'sk->copied_seq', leading to duplicate updates. In summary, for strparser + SK_PASS, copied_seq is redundantly calculated in both tcp_read_sock() and tcp_bpf_recvmsg_parser(). The issue causes incorrect copied_seq calculations, which prevent correct data reads from the recv() interface in user-land. We do not want to add new proto_ops to implement a new version of tcp_read_sock, as this would introduce code complexity [1]. We could have added noack and copied_seq to desc, and then called ops->read_sock. However, unfortunately, other modules didn’t fully initialize desc to zero. So, for now, we are directly calling tcp_read_sock_noack() in tcp_bpf.c. [1]: https://lore.kernel.org/bpf/20241218053408.437295-1-mrpre@163.com Fixes: e5c6de5fa025 ("bpf, sockmap: Incorrectly handling copied_seq") Suggested-by: Jakub Sitnicki Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://patch.msgid.link/20250122100917.49845-3-mrpre@163.com --- include/linux/skmsg.h | 2 ++ include/net/tcp.h | 8 ++++++++ net/core/skmsg.c | 7 +++++++ net/ipv4/tcp.c | 29 ++++++++++++++++++++++++----- net/ipv4/tcp_bpf.c | 36 ++++++++++++++++++++++++++++++++++++ 5 files changed, 77 insertions(+), 5 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 2cbe0c22a32f3..0b9095a281b89 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -91,6 +91,8 @@ struct sk_psock { struct sk_psock_progs progs; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) struct strparser strp; + u32 copied_seq; + u32 ingress_bytes; #endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b2b04835688f..9c044fb9ab26e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -729,6 +729,9 @@ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); +int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off); void tcp_read_done(struct sock *sk, size_t len); @@ -2599,6 +2602,11 @@ struct sk_psock; #ifdef CONFIG_BPF_SYSCALL int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); +#ifdef CONFIG_BPF_STREAM_PARSER +struct strparser; +int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); +#endif /* CONFIG_BPF_STREAM_PARSER */ #endif /* CONFIG_BPF_SYSCALL */ #ifdef CONFIG_INET diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 61f3f3d4e5285..0ddc4c7188332 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -549,6 +549,9 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, return num_sge; } +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + psock->ingress_bytes += len; +#endif copied = len; msg->sg.start = 0; msg->sg.size = copied; @@ -1144,6 +1147,10 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) if (!ret) sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED); + if (sk_is_tcp(sk)) { + psock->strp.cb.read_sock = tcp_bpf_strp_read_sock; + psock->copied_seq = tcp_sk(sk)->copied_seq; + } return ret; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0d704bda6c416..285678d8ce077 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1565,12 +1565,13 @@ EXPORT_SYMBOL(tcp_recv_skb); * or for 'peeking' the socket using this routine * (although both would be easy to implement). */ -int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +static int __tcp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq) { struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); - u32 seq = tp->copied_seq; + u32 seq = *copied_seq; u32 offset; int copied = 0; @@ -1624,9 +1625,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_eat_recv_skb(sk, skb); if (!desc->count) break; - WRITE_ONCE(tp->copied_seq, seq); + WRITE_ONCE(*copied_seq, seq); } - WRITE_ONCE(tp->copied_seq, seq); + WRITE_ONCE(*copied_seq, seq); + + if (noack) + goto out; tcp_rcv_space_adjust(sk); @@ -1635,10 +1639,25 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_recv_skb(sk, seq, &offset); tcp_cleanup_rbuf(sk, copied); } +out: return copied; } + +int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + return __tcp_read_sock(sk, desc, recv_actor, false, + &tcp_sk(sk)->copied_seq); +} EXPORT_SYMBOL(tcp_read_sock); +int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq) +{ + return __tcp_read_sock(sk, desc, recv_actor, noack, copied_seq); +} + int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { struct sk_buff *skb; diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 47f65b1b70ca2..ba581785adb4b 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -646,6 +646,42 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) ops->sendmsg == tcp_sendmsg ? 0 : -ENOTSUPP; } +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) +int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + struct sock *sk = strp->sk; + struct sk_psock *psock; + struct tcp_sock *tp; + int copied = 0; + + tp = tcp_sk(sk); + rcu_read_lock(); + psock = sk_psock(sk); + if (WARN_ON_ONCE(!psock)) { + desc->error = -EINVAL; + goto out; + } + + psock->ingress_bytes = 0; + copied = tcp_read_sock_noack(sk, desc, recv_actor, true, + &psock->copied_seq); + if (copied < 0) + goto out; + /* recv_actor may redirect skb to another socket (SK_REDIRECT) or + * just put skb into ingress queue of current socket (SK_PASS). + * For SK_REDIRECT, we need to ack the frame immediately but for + * SK_PASS, we want to delay the ack until tcp_bpf_recvmsg_parser(). + */ + tp->copied_seq = psock->copied_seq - psock->ingress_bytes; + tcp_rcv_space_adjust(sk); + __tcp_cleanup_rbuf(sk, copied - psock->ingress_bytes); +out: + rcu_read_unlock(); + return copied; +} +#endif /* CONFIG_BPF_STREAM_PARSER */ + int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; -- GitLab From 5459cce6bf49e72ee29be21865869c2ac42419f5 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 22 Jan 2025 18:09:15 +0800 Subject: [PATCH 0070/1585] bpf: Disable non stream socket for strparser Currently, only TCP supports strparser, but sockmap doesn't intercept non-TCP connections to attach strparser. For example, with UDP, although the read/write handlers are replaced, strparser is not executed due to the lack of a read_sock operation. Furthermore, in udp_bpf_recvmsg(), it checks whether the psock has data, and if not, it falls back to the native UDP read interface, making UDP + strparser appear to read correctly. According to its commit history, this behavior is unexpected. Moreover, since UDP lacks the concept of streams, we intercept it directly. Fixes: 1fa1fe8ff161 ("bpf, sockmap: Test shutdown() correctly exits epoll and recv()=0") Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Acked-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://patch.msgid.link/20250122100917.49845-4-mrpre@163.com --- net/core/sock_map.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index f1b9b3958792c..3b0f59d9b4db8 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -303,7 +303,10 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) write_lock_bh(&sk->sk_callback_lock); if (stream_parser && stream_verdict && !psock->saved_data_ready) { - ret = sk_psock_init_strp(sk, psock); + if (sk_is_tcp(sk)) + ret = sk_psock_init_strp(sk, psock); + else + ret = -EOPNOTSUPP; if (ret) { write_unlock_bh(&sk->sk_callback_lock); sk_psock_put(sk, psock); -- GitLab From a0c11149509aa905aeec10cf9998091443472b0b Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 22 Jan 2025 18:09:16 +0800 Subject: [PATCH 0071/1585] selftests/bpf: Fix invalid flag of recv() SOCK_NONBLOCK flag is only effective during socket creation, not during recv. Use MSG_DONTWAIT instead. Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Acked-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://patch.msgid.link/20250122100917.49845-5-mrpre@163.com --- tools/testing/selftests/bpf/prog_tests/sockmap_basic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 884ad87783d59..0c51b7288978e 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -522,8 +522,8 @@ static void test_sockmap_skb_verdict_shutdown(void) if (!ASSERT_EQ(err, 1, "epoll_wait(fd)")) goto out_close; - n = recv(c1, &b, 1, SOCK_NONBLOCK); - ASSERT_EQ(n, 0, "recv_timeout(fin)"); + n = recv(c1, &b, 1, MSG_DONTWAIT); + ASSERT_EQ(n, 0, "recv(fin)"); out_close: close(c1); close(p1); @@ -628,7 +628,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog) ASSERT_EQ(avail, expected, "ioctl(FIONREAD)"); /* On DROP test there will be no data to read */ if (pass_prog) { - recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC); + recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC); ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)"); } -- GitLab From 6fcfe96e0f6e9bebe1b185f1548a9a8cb1b68dea Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 22 Jan 2025 18:09:17 +0800 Subject: [PATCH 0072/1585] selftests/bpf: Add strparser test for bpf Add test cases for bpf + strparser and separated them from sockmap_basic, as strparser has more encapsulation and parsing capabilities compared to standard sockmap. Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Acked-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://patch.msgid.link/20250122100917.49845-6-mrpre@163.com --- .../selftests/bpf/prog_tests/sockmap_basic.c | 53 -- .../selftests/bpf/prog_tests/sockmap_strp.c | 454 ++++++++++++++++++ .../selftests/bpf/progs/test_sockmap_strp.c | 53 ++ 3 files changed, 507 insertions(+), 53 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/sockmap_strp.c create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_strp.c diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 0c51b7288978e..f8953455db29f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -531,57 +531,6 @@ static void test_sockmap_skb_verdict_shutdown(void) test_sockmap_pass_prog__destroy(skel); } -static void test_sockmap_stream_pass(void) -{ - int zero = 0, sent, recvd; - int verdict, parser; - int err, map; - int c = -1, p = -1; - struct test_sockmap_pass_prog *pass = NULL; - char snd[256] = "0123456789"; - char rcv[256] = "0"; - - pass = test_sockmap_pass_prog__open_and_load(); - verdict = bpf_program__fd(pass->progs.prog_skb_verdict); - parser = bpf_program__fd(pass->progs.prog_skb_parser); - map = bpf_map__fd(pass->maps.sock_map_rx); - - err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0); - if (!ASSERT_OK(err, "bpf_prog_attach stream parser")) - goto out; - - err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); - if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) - goto out; - - err = create_pair(AF_INET, SOCK_STREAM, &c, &p); - if (err) - goto out; - - /* sk_data_ready of 'p' will be replaced by strparser handler */ - err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) - goto out_close; - - /* - * as 'prog_skb_parser' return the original skb len and - * 'prog_skb_verdict' return SK_PASS, the kernel will just - * pass it through to original socket 'p' - */ - sent = xsend(c, snd, sizeof(snd), 0); - ASSERT_EQ(sent, sizeof(snd), "xsend(c)"); - - recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK, - IO_TIMEOUT_SEC); - ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)"); - -out_close: - close(c); - close(p); - -out: - test_sockmap_pass_prog__destroy(pass); -} static void test_sockmap_skb_verdict_fionread(bool pass_prog) { @@ -1101,8 +1050,6 @@ void test_sockmap_basic(void) test_sockmap_progs_query(BPF_SK_SKB_VERDICT); if (test__start_subtest("sockmap skb_verdict shutdown")) test_sockmap_skb_verdict_shutdown(); - if (test__start_subtest("sockmap stream parser and verdict pass")) - test_sockmap_stream_pass(); if (test__start_subtest("sockmap skb_verdict fionread")) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c new file mode 100644 index 0000000000000..621b3b71888ef --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include "sockmap_helpers.h" +#include "test_skmsg_load_helpers.skel.h" +#include "test_sockmap_strp.skel.h" + +#define STRP_PKT_HEAD_LEN 4 +#define STRP_PKT_BODY_LEN 6 +#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN) + +static const char packet[STRP_PKT_FULL_LEN] = "head+body\0"; +static const int test_packet_num = 100; + +/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready + * with sk held if an skb exists in sk_receive_queue. Then for the + * data_ready implementation of strparser, it will delay the read + * operation if sk is held and EAGAIN is returned. + */ +static int sockmap_strp_consume_pre_data(int p) +{ + int recvd; + bool retried = false; + char rcv[10]; + +retry: + errno = 0; + recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1); + if (recvd < 0 && errno == EAGAIN && retried == false) { + /* On the first call, EAGAIN will certainly be returned. + * A 1-second wait is enough for the workqueue to finish. + */ + sleep(1); + retried = true; + goto retry; + } + + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") || + !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN), + "data mismatch")) + return -1; + return 0; +} + +static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass, + bool need_parser) +{ + struct test_sockmap_strp *strp = NULL; + int verdict, parser; + int err; + + strp = test_sockmap_strp__open_and_load(); + *out_map = bpf_map__fd(strp->maps.sock_map); + + if (need_parser) + parser = bpf_program__fd(strp->progs.prog_skb_parser_partial); + else + parser = bpf_program__fd(strp->progs.prog_skb_parser); + + if (pass) + verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass); + else + verdict = bpf_program__fd(strp->progs.prog_skb_verdict); + + err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream parser")) + goto err; + + err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto err; + + return strp; +err: + test_sockmap_strp__destroy(strp); + return NULL; +} + +/* Dispatch packets to different socket by packet size: + * + * ------ ------ + * | pkt4 || pkt1 |... > remote socket + * ------ ------ / ------ ------ + * | pkt8 | pkt7 |... + * ------ ------ \ ------ ------ + * | pkt3 || pkt2 |... > local socket + * ------ ------ + */ +static void test_sockmap_strp_dispatch_pkt(int family, int sotype) +{ + int i, j, zero = 0, one = 1, recvd; + int err, map; + int c0 = -1, p0 = -1, c1 = -1, p1 = -1; + struct test_sockmap_strp *strp = NULL; + int test_cnt = 6; + char rcv[10]; + struct { + char data[7]; + int data_len; + int send_cnt; + int *receiver; + } send_dir[2] = { + /* data expected to deliver to local */ + {"llllll", 6, 0, &p0}, + /* data expected to deliver to remote */ + {"rrrrr", 5, 0, &c1} + }; + + strp = sockmap_strp_init(&map, false, false); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1); + if (!ASSERT_OK(err, "create_socket_pairs()")) + goto out; + + err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p0)")) + goto out_close; + + err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p1)")) + goto out_close; + + err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero)); + if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)")) + goto out_close; + + /* deliver data with data size greater than 5 to local */ + strp->data->verdict_max_size = 5; + + for (i = 0; i < test_cnt; i++) { + int d = i % 2; + + xsend(c0, send_dir[d].data, send_dir[d].data_len, 0); + send_dir[d].send_cnt++; + } + + for (i = 0; i < 2; i++) { + for (j = 0; j < send_dir[i].send_cnt; j++) { + int expected = send_dir[i].data_len; + + recvd = recv_timeout(*send_dir[i].receiver, rcv, + expected, MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, expected, "recv_timeout()")) + goto out_close; + if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd), + "data mismatch")) + goto out_close; + } + } +out_close: + close(c0); + close(c1); + close(p0); + close(p1); +out: + test_sockmap_strp__destroy(strp); +} + +/* We have multiple packets in one skb + * ------------ ------------ ------------ + * | packet1 | packet2 | ... + * ------------ ------------ ------------ + */ +static void test_sockmap_strp_multiple_pkt(int family, int sotype) +{ + int i, zero = 0; + int sent, recvd, total; + int err, map; + int c = -1, p = -1; + struct test_sockmap_strp *strp = NULL; + char *snd = NULL, *rcv = NULL; + + strp = sockmap_strp_init(&map, true, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_pair(family, sotype, &c, &p); + if (err) + goto out; + + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)")) + goto out_close; + + /* construct multiple packets in one buffer */ + total = test_packet_num * STRP_PKT_FULL_LEN; + snd = malloc(total); + rcv = malloc(total + 1); + if (!ASSERT_TRUE(snd, "malloc(snd)") || + !ASSERT_TRUE(rcv, "malloc(rcv)")) + goto out_close; + + for (i = 0; i < test_packet_num; i++) { + memcpy(snd + i * STRP_PKT_FULL_LEN, + packet, STRP_PKT_FULL_LEN); + } + + sent = xsend(c, snd, total, 0); + if (!ASSERT_EQ(sent, total, "xsend(c)")) + goto out_close; + + /* try to recv one more byte to avoid truncation check */ + recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, total, "recv(rcv)")) + goto out_close; + + /* we sent TCP segment with multiple encapsulation + * then check whether packets are handled correctly + */ + if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch")) + goto out_close; + +out_close: + close(c); + close(p); + if (snd) + free(snd); + if (rcv) + free(rcv); +out: + test_sockmap_strp__destroy(strp); +} + +/* Test strparser with partial read */ +static void test_sockmap_strp_partial_read(int family, int sotype) +{ + int zero = 0, recvd, off; + int err, map; + int c = -1, p = -1; + struct test_sockmap_strp *strp = NULL; + char rcv[STRP_PKT_FULL_LEN + 1] = "0"; + + strp = sockmap_strp_init(&map, true, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_pair(family, sotype, &c, &p); + if (err) + goto out; + + /* sk_data_ready of 'p' will be replaced by strparser handler */ + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)")) + goto out_close; + + /* 1.1 send partial head, 1 byte header left */ + off = STRP_PKT_HEAD_LEN - 1; + xsend(c, packet, off, 0); + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data")) + goto out_close; + + /* 1.2 send remaining head and body */ + xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0); + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data")) + goto out_close; + + /* 2.1 send partial head, 1 byte header left */ + off = STRP_PKT_HEAD_LEN - 1; + xsend(c, packet, off, 0); + + /* 2.2 send remaining head and partial body, 1 byte body left */ + xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0); + off = STRP_PKT_FULL_LEN - 1; + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data")) + goto out_close; + + /* 2.3 send remaining body */ + xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0); + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data")) + goto out_close; + +out_close: + close(c); + close(p); + +out: + test_sockmap_strp__destroy(strp); +} + +/* Test simple socket read/write with strparser + FIONREAD */ +static void test_sockmap_strp_pass(int family, int sotype, bool fionread) +{ + int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail; + int err, map; + int c = -1, p = -1; + int test_cnt = 10, i; + struct test_sockmap_strp *strp = NULL; + char rcv[STRP_PKT_FULL_LEN + 1] = "0"; + + strp = sockmap_strp_init(&map, true, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_pair(family, sotype, &c, &p); + if (err) + goto out; + + /* inject some data before bpf process, it should be read + * correctly because we check sk_receive_queue in + * tcp_bpf_recvmsg_parser(). + */ + sent = xsend(c, packet, pkt_size, 0); + if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)")) + goto out_close; + + /* sk_data_ready of 'p' will be replaced by strparser handler */ + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) + goto out_close; + + /* consume previous data we injected */ + if (sockmap_strp_consume_pre_data(p)) + goto out_close; + + /* Previously, we encountered issues such as deadlocks and + * sequence errors that resulted in the inability to read + * continuously. Therefore, we perform multiple iterations + * of testing here. + */ + for (i = 0; i < test_cnt; i++) { + sent = xsend(c, packet, pkt_size, 0); + if (!ASSERT_EQ(sent, pkt_size, "xsend(c)")) + goto out_close; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") || + !ASSERT_OK(memcmp(packet, rcv, pkt_size), + "memcmp, data mismatch")) + goto out_close; + } + + if (fionread) { + sent = xsend(c, packet, pkt_size, 0); + if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)")) + goto out_close; + + err = ioctl(p, FIONREAD, &avail); + if (!ASSERT_OK(err, "ioctl(FIONREAD) error") || + !ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)")) + goto out_close; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") || + !ASSERT_OK(memcmp(packet, rcv, pkt_size), + "second memcmp, data mismatch")) + goto out_close; + } + +out_close: + close(c); + close(p); + +out: + test_sockmap_strp__destroy(strp); +} + +/* Test strparser with verdict mode */ +static void test_sockmap_strp_verdict(int family, int sotype) +{ + int zero = 0, one = 1, sent, recvd, off; + int err, map; + int c0 = -1, p0 = -1, c1 = -1, p1 = -1; + struct test_sockmap_strp *strp = NULL; + char rcv[STRP_PKT_FULL_LEN + 1] = "0"; + + strp = sockmap_strp_init(&map, false, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + /* We simulate a reverse proxy server. + * When p0 receives data from c0, we forward it to c1. + * From c1's perspective, it will consider this data + * as being sent by p1. + */ + err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1); + if (!ASSERT_OK(err, "create_socket_pairs()")) + goto out; + + err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p0)")) + goto out_close; + + err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p1)")) + goto out_close; + + sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0); + if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)")) + goto out_close; + + recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") || + !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN), + "received data does not match the sent data")) + goto out_close; + + /* send again to ensure the stream is functioning correctly. */ + sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0); + if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)")) + goto out_close; + + /* partial read */ + off = STRP_PKT_FULL_LEN / 2; + recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT, + IO_TIMEOUT_SEC); + recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT, + IO_TIMEOUT_SEC); + + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") || + !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN), + "partial received data does not match the sent data")) + goto out_close; + +out_close: + close(c0); + close(c1); + close(p0); + close(p1); +out: + test_sockmap_strp__destroy(strp); +} + +void test_sockmap_strp(void) +{ + if (test__start_subtest("sockmap strp tcp pass")) + test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false); + if (test__start_subtest("sockmap strp tcp v6 pass")) + test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false); + if (test__start_subtest("sockmap strp tcp pass fionread")) + test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true); + if (test__start_subtest("sockmap strp tcp v6 pass fionread")) + test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true); + if (test__start_subtest("sockmap strp tcp verdict")) + test_sockmap_strp_verdict(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp v6 verdict")) + test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp partial read")) + test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp multiple packets")) + test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp dispatch")) + test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM); +} diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c new file mode 100644 index 0000000000000..dde3d5bec5154 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +int verdict_max_size = 10000; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map SEC(".maps"); + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + __u32 one = 1; + + if (skb->len > verdict_max_size) + return SK_PASS; + + return bpf_sk_redirect_map(skb, &sock_map, one, 0); +} + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict_pass(struct __sk_buff *skb) +{ + return SK_PASS; +} + +SEC("sk_skb/stream_parser") +int prog_skb_parser(struct __sk_buff *skb) +{ + return skb->len; +} + +SEC("sk_skb/stream_parser") +int prog_skb_parser_partial(struct __sk_buff *skb) +{ + /* agreement with the test program on a 4-byte size header + * and 6-byte body. + */ + if (skb->len < 4) { + /* need more header to determine full length */ + return 0; + } + /* return full length decoded from header. + * the return value may be larger than skb->len which + * means framework must wait body coming. + */ + return 10; +} + +char _license[] SEC("license") = "GPL"; -- GitLab From c78f4afbd962f43a3989f45f3ca04300252b19b5 Mon Sep 17 00:00:00 2001 From: Abel Wu Date: Sat, 21 Dec 2024 14:10:16 +0800 Subject: [PATCH 0073/1585] bpf: Fix deadlock when freeing cgroup storage The following commit bc235cdb423a ("bpf: Prevent deadlock from recursive bpf_task_storage_[get|delete]") first introduced deadlock prevention for fentry/fexit programs attaching on bpf_task_storage helpers. That commit also employed the logic in map free path in its v6 version. Later bpf_cgrp_storage was first introduced in c4bcfb38a95e ("bpf: Implement cgroup storage available to non-cgroup-attached bpf progs") which faces the same issue as bpf_task_storage, instead of its busy counter, NULL was passed to bpf_local_storage_map_free() which opened a window to cause deadlock: (acquiring local_storage->lock) _raw_spin_lock_irqsave+0x3d/0x50 bpf_local_storage_update+0xd1/0x460 bpf_cgrp_storage_get+0x109/0x130 bpf_prog_a4d4a370ba857314_cgrp_ptr+0x139/0x170 ? __bpf_prog_enter_recur+0x16/0x80 bpf_trampoline_6442485186+0x43/0xa4 cgroup_storage_ptr+0x9/0x20 (holding local_storage->lock) bpf_selem_unlink_storage_nolock.constprop.0+0x135/0x160 bpf_selem_unlink_storage+0x6f/0x110 bpf_local_storage_map_free+0xa2/0x110 bpf_map_free_deferred+0x5b/0x90 process_one_work+0x17c/0x390 worker_thread+0x251/0x360 kthread+0xd2/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Progs: - A: SEC("fentry/cgroup_storage_ptr") - cgid (BPF_MAP_TYPE_HASH) Record the id of the cgroup the current task belonging to in this hash map, using the address of the cgroup as the map key. - cgrpa (BPF_MAP_TYPE_CGRP_STORAGE) If current task is a kworker, lookup the above hash map using function parameter @owner as the key to get its corresponding cgroup id which is then used to get a trusted pointer to the cgroup through bpf_cgroup_from_id(). This trusted pointer can then be passed to bpf_cgrp_storage_get() to finally trigger the deadlock issue. - B: SEC("tp_btf/sys_enter") - cgrpb (BPF_MAP_TYPE_CGRP_STORAGE) The only purpose of this prog is to fill Prog A's hash map by calling bpf_cgrp_storage_get() for as many userspace tasks as possible. Steps to reproduce: - Run A; - while (true) { Run B; Destroy B; } Fix this issue by passing its busy counter to the free procedure so it can be properly incremented before storage/smap locking. Fixes: c4bcfb38a95e ("bpf: Implement cgroup storage available to non-cgroup-attached bpf progs") Signed-off-by: Abel Wu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20241221061018.37717-1-wuyun.abel@bytedance.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_cgrp_storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index d5dc65bb17550..54ff2a85d4c02 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -153,7 +153,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) static void cgroup_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &cgroup_cache, NULL); + bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy); } /* *gfp_flags* is a hidden argument provided by the verifier */ -- GitLab From bdc35f164b0f60480b2f5e098bb8f3c0cea05cd2 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 31 Jan 2025 00:27:44 +0900 Subject: [PATCH 0074/1585] tomoyo: use better patterns for procfs in learning mode Commit 08ae2487b202 ("tomoyo: automatically use patterns for several situations in learning mode") replaced only $PID part of procfs pathname with \$ pattern. But it turned out that we need to also replace $TID part and $FD part to make this functionality useful for e.g. /bin/lsof . Signed-off-by: Tetsuo Handa --- security/tomoyo/common.c | 145 ++++++++++++++++++++++++++++++--------- 1 file changed, 112 insertions(+), 33 deletions(-) diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index d9fa696321475..0f78898bce09b 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -1980,6 +1980,114 @@ static int tomoyo_truncate(char *str) return strlen(start) + 1; } +/** + * tomoyo_numscan - sscanf() which stores the length of a decimal integer value. + * + * @str: String to scan. + * @head: Leading string that must start with. + * @width: Pointer to "int" for storing length of a decimal integer value after @head. + * @tail: Optional character that must match after a decimal integer value. + * + * Returns whether @str starts with @head and a decimal value follows @head. + */ +static bool tomoyo_numscan(const char *str, const char *head, int *width, const char tail) +{ + const char *cp; + const int n = strlen(head); + + if (!strncmp(str, head, n)) { + cp = str + n; + while (*cp && *cp >= '0' && *cp <= '9') + cp++; + if (*cp == tail || !tail) { + *width = cp - (str + n); + return *width != 0; + } + } + *width = 0; + return 0; +} + +/** + * tomoyo_patternize_path - Make patterns for file path. Used by learning mode. + * + * @buffer: Destination buffer. + * @len: Size of @buffer. + * @entry: Original line. + * + * Returns nothing. + */ +static void tomoyo_patternize_path(char *buffer, const int len, char *entry) +{ + int width; + char *cp = entry; + + /* Nothing to do if this line is not for "file" related entry. */ + if (strncmp(entry, "file ", 5)) + goto flush; + /* + * Nothing to do if there is no colon in this line, for this rewriting + * applies to only filesystems where numeric values in the path are volatile. + */ + cp = strchr(entry + 5, ':'); + if (!cp) { + cp = entry; + goto flush; + } + /* Flush e.g. "file ioctl" part. */ + while (*cp != ' ') + cp--; + *cp++ = '\0'; + tomoyo_addprintf(buffer, len, "%s ", entry); + /* e.g. file ioctl pipe:[$INO] $CMD */ + if (tomoyo_numscan(cp, "pipe:[", &width, ']')) { + cp += width + 7; + tomoyo_addprintf(buffer, len, "pipe:[\\$]"); + goto flush; + } + /* e.g. file ioctl socket:[$INO] $CMD */ + if (tomoyo_numscan(cp, "socket:[", &width, ']')) { + cp += width + 9; + tomoyo_addprintf(buffer, len, "socket:[\\$]"); + goto flush; + } + if (!strncmp(cp, "proc:/self", 10)) { + /* e.g. file read proc:/self/task/$TID/fdinfo/$FD */ + cp += 10; + tomoyo_addprintf(buffer, len, "proc:/self"); + } else if (tomoyo_numscan(cp, "proc:/", &width, 0)) { + /* e.g. file read proc:/$PID/task/$TID/fdinfo/$FD */ + /* + * Don't patternize $PID part if $PID == 1, for several + * programs access only files in /proc/1/ directory. + */ + cp += width + 6; + if (width == 1 && *(cp - 1) == '1') + tomoyo_addprintf(buffer, len, "proc:/1"); + else + tomoyo_addprintf(buffer, len, "proc:/\\$"); + } else { + goto flush; + } + /* Patternize $TID part if "/task/" follows. */ + if (tomoyo_numscan(cp, "/task/", &width, 0)) { + cp += width + 6; + tomoyo_addprintf(buffer, len, "/task/\\$"); + } + /* Patternize $FD part if "/fd/" or "/fdinfo/" follows. */ + if (tomoyo_numscan(cp, "/fd/", &width, 0)) { + cp += width + 4; + tomoyo_addprintf(buffer, len, "/fd/\\$"); + } else if (tomoyo_numscan(cp, "/fdinfo/", &width, 0)) { + cp += width + 8; + tomoyo_addprintf(buffer, len, "/fdinfo/\\$"); + } +flush: + /* Flush remaining part if any. */ + if (*cp) + tomoyo_addprintf(buffer, len, "%s", cp); +} + /** * tomoyo_add_entry - Add an ACL to current thread's domain. Used by learning mode. * @@ -2003,7 +2111,8 @@ static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header) if (!cp) return; *cp++ = '\0'; - len = strlen(cp) + 1; + /* Reserve some space for potentially using patterns. */ + len = strlen(cp) + 16; /* strstr() will return NULL if ordering is wrong. */ if (*cp == 'f') { argv0 = strstr(header, " argv[]={ \""); @@ -2020,40 +2129,10 @@ static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header) if (symlink) len += tomoyo_truncate(symlink + 1) + 1; } - buffer = kmalloc(len, GFP_NOFS); + buffer = kmalloc(len, GFP_NOFS | __GFP_ZERO); if (!buffer) return; - snprintf(buffer, len - 1, "%s", cp); - if (*cp == 'f' && strchr(buffer, ':')) { - /* Automatically replace 2 or more digits with \$ pattern. */ - char *cp2; - - /* e.g. file read proc:/$PID/stat */ - cp = strstr(buffer, " proc:/"); - if (cp && simple_strtoul(cp + 7, &cp2, 10) >= 10 && *cp2 == '/') { - *(cp + 7) = '\\'; - *(cp + 8) = '$'; - memmove(cp + 9, cp2, strlen(cp2) + 1); - goto ok; - } - /* e.g. file ioctl pipe:[$INO] $CMD */ - cp = strstr(buffer, " pipe:["); - if (cp && simple_strtoul(cp + 7, &cp2, 10) >= 10 && *cp2 == ']') { - *(cp + 7) = '\\'; - *(cp + 8) = '$'; - memmove(cp + 9, cp2, strlen(cp2) + 1); - goto ok; - } - /* e.g. file ioctl socket:[$INO] $CMD */ - cp = strstr(buffer, " socket:["); - if (cp && simple_strtoul(cp + 9, &cp2, 10) >= 10 && *cp2 == ']') { - *(cp + 9) = '\\'; - *(cp + 10) = '$'; - memmove(cp + 11, cp2, strlen(cp2) + 1); - goto ok; - } - } -ok: + tomoyo_patternize_path(buffer, len, cp); if (realpath) tomoyo_addprintf(buffer, len, " exec.%s", realpath); if (argv0) -- GitLab From ee2ab467bddfb2d7f68d996dbab94d7b88f8eaf7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 21 Jan 2025 18:11:33 -0700 Subject: [PATCH 0075/1585] x86/boot: Use '-std=gnu11' to fix build with GCC 15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 15 changed the default C standard version to C23, which should not have impacted the kernel because it requests the gnu11 standard via '-std=' in the main Makefile. However, the x86 compressed boot Makefile uses its own set of KBUILD_CFLAGS without a '-std=' value (i.e., using the default), resulting in errors from the kernel's definitions of bool, true, and false in stddef.h, which are reserved keywords under C23. ./include/linux/stddef.h:11:9: error: expected identifier before ‘false’ 11 | false = 0, ./include/linux/types.h:35:33: error: two or more data types in declaration specifiers 35 | typedef _Bool bool; Set '-std=gnu11' in the x86 compressed boot Makefile to resolve the error and consistently use the same C standard version for the entire kernel. Closes: https://lore.kernel.org/4OAhbllK7x4QJGpZjkYjtBYNLd_2whHx9oFiuZcGwtVR4hIzvduultkgfAIRZI3vQpZylu7Gl929HaYFRGeMEalWCpeMzCIIhLxxRhq4U-Y=@protonmail.com/ Closes: https://lore.kernel.org/Z4467umXR2PZ0M1H@tucnak/ Reported-by: Kostadin Shishmanov Reported-by: Jakub Jelinek Signed-off-by: Nathan Chancellor Signed-off-by: Dave Hansen Reviewed-by: Ard Biesheuvel Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20250121-x86-use-std-consistently-gcc-15-v1-1-8ab0acf645cb%40kernel.org --- arch/x86/boot/compressed/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f2051644de943..606c74f274593 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -25,6 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ # avoid errors with '-march=i386', and future flags may depend on the target to # be valid. KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) +KBUILD_CFLAGS += -std=gnu11 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -- GitLab From bb2784d9ab49587ba4fbff37a319fff2924db289 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 30 Jan 2025 19:26:58 +0000 Subject: [PATCH 0076/1585] jiffies: Cast to unsigned long in secs_to_jiffies() conversion While converting users of msecs_to_jiffies(), lkp reported that some range checks would always be true because of the mismatch between the implied int value of secs_to_jiffies() vs the unsigned long return value of the msecs_to_jiffies() calls it was replacing. Fix this by casting the secs_to_jiffies() input value to unsigned long. Fixes: b35108a51cf7ba ("jiffies: Define secs_to_jiffies()") Reported-by: kernel test robot Signed-off-by: Easwar Hariharan Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250130192701.99626-1-eahariha@linux.microsoft.com Closes: https://lore.kernel.org/oe-kbuild-all/202501301334.NB6NszQR-lkp@intel.com/ --- include/linux/jiffies.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index ed945f42e064a..0ea8c9887429f 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -537,7 +537,7 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) * * Return: jiffies value */ -#define secs_to_jiffies(_secs) ((_secs) * HZ) +#define secs_to_jiffies(_secs) (unsigned long)((_secs) * HZ) extern unsigned long __usecs_to_jiffies(const unsigned int u); #if !(USEC_PER_SEC % HZ) -- GitLab From 9065ce69754dece78606c8bbb3821449272e56bf Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Wed, 29 Jan 2025 17:59:44 +0000 Subject: [PATCH 0077/1585] sched/debug: Provide slice length for fair tasks Since commit: 857b158dc5e8 ("sched/eevdf: Use sched_attr::sched_runtime to set request/slice suggestion") ... we have the userspace per-task tunable slice length, which is a key parameter that is otherwise difficult to obtain, so provide it in /proc/$PID/sched. [ mingo: Clarified the changelog. ] Signed-off-by: Christian Loehle Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/453349b1-1637-42f5-a7b2-2385392b5956@arm.com --- kernel/sched/debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index a1be00a988bf6..5b32d3cc393bf 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1265,6 +1265,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, if (task_has_dl_policy(p)) { P(dl.runtime); P(dl.deadline); + } else if (fair_policy(p->policy)) { + P(se.slice); } #ifdef CONFIG_SCHED_CLASS_EXT __PS("ext.enabled", task_on_scx(p)); -- GitLab From 5f230f41fdd9e799f43a699348dc572bca7159aa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Jan 2025 16:43:41 +0100 Subject: [PATCH 0078/1585] KVM: s390: vsie: fix some corner-cases when grabbing vsie pages We try to reuse the same vsie page when re-executing the vsie with a given SCB address. The result is that we use the same shadow SCB -- residing in the vsie page -- and can avoid flushing the TLB when re-running the vsie on a CPU. So, when we allocate a fresh vsie page, or when we reuse a vsie page for a different SCB address -- reusing the shadow SCB in different context -- we set ihcpu=0xffff to trigger the flush. However, after we looked up the SCB address in the radix tree, but before we grabbed the vsie page by raising the refcount to 2, someone could reuse the vsie page for a different SCB address, adjusting page->index and the radix tree. In that case, we would be reusing the vsie page with a wrong page->index. Another corner case is that we might set the SCB address for a vsie page, but fail the insertion into the radix tree. Whoever would reuse that page would remove the corresponding radix tree entry -- which might now be a valid entry pointing at another page, resulting in the wrong vsie page getting removed from the radix tree. Let's handle such races better, by validating that the SCB address of a vsie page didn't change after we grabbed it (not reuse for a different SCB; the alternative would be performing another tree lookup), and by setting the SCB address to invalid until the insertion in the tree succeeded (SCB addresses are aligned to 512, so ULONG_MAX is invalid). These scenarios are rare, the effects a bit unclear, and these issues were only found by code inspection. Let's CC stable to be safe. Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") Cc: stable@vger.kernel.org Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Reviewed-by: Christoph Schlameuss Tested-by: Christoph Schlameuss Message-ID: <20250107154344.1003072-2-david@redhat.com> Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/vsie.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a687695d8f68e..513e608567ccc 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1362,8 +1362,14 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); rcu_read_unlock(); if (page) { - if (page_ref_inc_return(page) == 2) - return page_to_virt(page); + if (page_ref_inc_return(page) == 2) { + if (page->index == addr) + return page_to_virt(page); + /* + * We raced with someone reusing + putting this vsie + * page before we grabbed it. + */ + } page_ref_dec(page); } @@ -1393,15 +1399,20 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) kvm->arch.vsie.next++; kvm->arch.vsie.next %= nr_vcpus; } - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + if (page->index != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + page->index >> 9); } - page->index = addr; - /* double use of the same address */ + /* Mark it as invalid until it resides in the tree. */ + page->index = ULONG_MAX; + + /* Double use of the same address or allocation failure. */ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { page_ref_dec(page); mutex_unlock(&kvm->arch.vsie.mutex); return NULL; } + page->index = addr; mutex_unlock(&kvm->arch.vsie.mutex); vsie_page = page_to_virt(page); @@ -1496,7 +1507,9 @@ void kvm_s390_vsie_destroy(struct kvm *kvm) vsie_page = page_to_virt(page); release_gmap_shadow(vsie_page); /* free the radix tree entry */ - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + if (page->index != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + page->index >> 9); __free_page(page); } kvm->arch.vsie.page_count = 0; -- GitLab From c5f64c98a1f7e4ca0e55b441620473389b8c7a72 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Jan 2025 16:43:42 +0100 Subject: [PATCH 0079/1585] KVM: s390: vsie: stop using page->index Let's stop using page->index, and instead use a field inside "struct vsie_page" to hold that value. We have plenty of space left in there. This is one part of stopping using "struct page" when working with vsie pages. We place the "page_to_virt(page)" strategically, so the next cleanups requires less churn. Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Reviewed-by: Christoph Schlameuss Tested-by: Christoph Schlameuss Message-ID: <20250107154344.1003072-3-david@redhat.com> Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/vsie.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 513e608567ccc..3874a1b49dd54 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -46,7 +46,13 @@ struct vsie_page { gpa_t gvrd_gpa; /* 0x0240 */ gpa_t riccbd_gpa; /* 0x0248 */ gpa_t sdnx_gpa; /* 0x0250 */ - __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */ + /* + * guest address of the original SCB. Remains set for free vsie + * pages, so we can properly look them up in our addr_to_page + * radix tree. + */ + gpa_t scb_gpa; /* 0x0258 */ + __u8 reserved[0x0700 - 0x0260]; /* 0x0260 */ struct kvm_s390_crypto_cb crycb; /* 0x0700 */ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ }; @@ -1362,9 +1368,10 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); rcu_read_unlock(); if (page) { + vsie_page = page_to_virt(page); if (page_ref_inc_return(page) == 2) { - if (page->index == addr) - return page_to_virt(page); + if (vsie_page->scb_gpa == addr) + return vsie_page; /* * We raced with someone reusing + putting this vsie * page before we grabbed it. @@ -1386,6 +1393,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_unlock(&kvm->arch.vsie.mutex); return ERR_PTR(-ENOMEM); } + vsie_page = page_to_virt(page); page_ref_inc(page); kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page; kvm->arch.vsie.page_count++; @@ -1393,18 +1401,19 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) /* reuse an existing entry that belongs to nobody */ while (true) { page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; + vsie_page = page_to_virt(page); if (page_ref_inc_return(page) == 2) break; page_ref_dec(page); kvm->arch.vsie.next++; kvm->arch.vsie.next %= nr_vcpus; } - if (page->index != ULONG_MAX) + if (vsie_page->scb_gpa != ULONG_MAX) radix_tree_delete(&kvm->arch.vsie.addr_to_page, - page->index >> 9); + vsie_page->scb_gpa >> 9); } /* Mark it as invalid until it resides in the tree. */ - page->index = ULONG_MAX; + vsie_page->scb_gpa = ULONG_MAX; /* Double use of the same address or allocation failure. */ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { @@ -1412,10 +1421,9 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_unlock(&kvm->arch.vsie.mutex); return NULL; } - page->index = addr; + vsie_page->scb_gpa = addr; mutex_unlock(&kvm->arch.vsie.mutex); - vsie_page = page_to_virt(page); memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block)); release_gmap_shadow(vsie_page); vsie_page->fault_addr = 0; @@ -1507,9 +1515,9 @@ void kvm_s390_vsie_destroy(struct kvm *kvm) vsie_page = page_to_virt(page); release_gmap_shadow(vsie_page); /* free the radix tree entry */ - if (page->index != ULONG_MAX) + if (vsie_page->scb_gpa != ULONG_MAX) radix_tree_delete(&kvm->arch.vsie.addr_to_page, - page->index >> 9); + vsie_page->scb_gpa >> 9); __free_page(page); } kvm->arch.vsie.page_count = 0; -- GitLab From 905f5ce0835c938c501237d9371cdbc91d8f7e02 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Jan 2025 16:43:43 +0100 Subject: [PATCH 0080/1585] KVM: s390: vsie: stop messing with page refcount Let's stop messing with the page refcount, and use a flag that is set / cleared atomically to remember whether a vsie page is currently in use. Note that we could use a page flag, or a lower bit of the scb_gpa. Let's keep it simple for now, we have sufficient space. While at it, stop passing "struct kvm *" to put_vsie_page(), it's unused. Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Reviewed-by: Christoph Schlameuss Tested-by: Christoph Schlameuss Message-ID: <20250107154344.1003072-4-david@redhat.com> Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/vsie.c | 46 +++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 3874a1b49dd54..424f80f5f6b2d 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -23,6 +23,10 @@ #include "kvm-s390.h" #include "gaccess.h" +enum vsie_page_flags { + VSIE_PAGE_IN_USE = 0, +}; + struct vsie_page { struct kvm_s390_sie_block scb_s; /* 0x0000 */ /* @@ -52,7 +56,12 @@ struct vsie_page { * radix tree. */ gpa_t scb_gpa; /* 0x0258 */ - __u8 reserved[0x0700 - 0x0260]; /* 0x0260 */ + /* + * Flags: must be set/cleared atomically after the vsie page can be + * looked up by other CPUs. + */ + unsigned long flags; /* 0x0260 */ + __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */ struct kvm_s390_crypto_cb crycb; /* 0x0700 */ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ }; @@ -1351,6 +1360,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) return rc; } +/* Try getting a given vsie page, returning "true" on success. */ +static inline bool try_get_vsie_page(struct vsie_page *vsie_page) +{ + if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags)) + return false; + return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + +/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */ +static void put_vsie_page(struct vsie_page *vsie_page) +{ + clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + /* * Get or create a vsie page for a scb address. * @@ -1369,15 +1392,15 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) rcu_read_unlock(); if (page) { vsie_page = page_to_virt(page); - if (page_ref_inc_return(page) == 2) { + if (try_get_vsie_page(vsie_page)) { if (vsie_page->scb_gpa == addr) return vsie_page; /* * We raced with someone reusing + putting this vsie * page before we grabbed it. */ + put_vsie_page(vsie_page); } - page_ref_dec(page); } /* @@ -1394,7 +1417,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) return ERR_PTR(-ENOMEM); } vsie_page = page_to_virt(page); - page_ref_inc(page); + __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page; kvm->arch.vsie.page_count++; } else { @@ -1402,9 +1425,8 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) while (true) { page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; vsie_page = page_to_virt(page); - if (page_ref_inc_return(page) == 2) + if (try_get_vsie_page(vsie_page)) break; - page_ref_dec(page); kvm->arch.vsie.next++; kvm->arch.vsie.next %= nr_vcpus; } @@ -1417,7 +1439,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) /* Double use of the same address or allocation failure. */ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { - page_ref_dec(page); + put_vsie_page(vsie_page); mutex_unlock(&kvm->arch.vsie.mutex); return NULL; } @@ -1431,14 +1453,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) return vsie_page; } -/* put a vsie page acquired via get_vsie_page */ -static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page) -{ - struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT); - - page_ref_dec(page); -} - int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) { struct vsie_page *vsie_page; @@ -1489,7 +1503,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) out_unpin_scb: unpin_scb(vcpu, vsie_page, scb_addr); out_put: - put_vsie_page(vcpu->kvm, vsie_page); + put_vsie_page(vsie_page); return rc < 0 ? rc : 0; } -- GitLab From 4514eda4c1dbd0b7062e06c769d2ceafd25c9284 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Jan 2025 16:43:44 +0100 Subject: [PATCH 0081/1585] KVM: s390: vsie: stop using "struct page" for vsie page Now that we no longer use page->index and the page refcount explicitly, let's avoid messing with "struct page" completely. Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Reviewed-by: Christoph Schlameuss Tested-by: Christoph Schlameuss Message-ID: <20250107154344.1003072-5-david@redhat.com> Signed-off-by: Claudio Imbrenda --- arch/s390/include/asm/kvm_host.h | 4 +++- arch/s390/kvm/vsie.c | 31 ++++++++++++------------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 97c7c81275434..4581388411b71 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -931,12 +931,14 @@ struct sie_page2 { u8 reserved928[0x1000 - 0x928]; /* 0x0928 */ }; +struct vsie_page; + struct kvm_s390_vsie { struct mutex mutex; struct radix_tree_root addr_to_page; int page_count; int next; - struct page *pages[KVM_MAX_VCPUS]; + struct vsie_page *pages[KVM_MAX_VCPUS]; }; struct kvm_s390_gisa_iam { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 424f80f5f6b2d..a0398ff85d00b 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -599,7 +599,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, struct kvm *kvm = gmap->private; struct vsie_page *cur; unsigned long prefix; - struct page *page; int i; if (!gmap_is_shadow(gmap)) @@ -609,10 +608,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, * therefore we can safely reference them all the time. */ for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = READ_ONCE(kvm->arch.vsie.pages[i]); - if (!page) + cur = READ_ONCE(kvm->arch.vsie.pages[i]); + if (!cur) continue; - cur = page_to_virt(page); if (READ_ONCE(cur->gmap) != gmap) continue; prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT; @@ -1384,14 +1382,12 @@ static void put_vsie_page(struct vsie_page *vsie_page) static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) { struct vsie_page *vsie_page; - struct page *page; int nr_vcpus; rcu_read_lock(); - page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); + vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); rcu_read_unlock(); - if (page) { - vsie_page = page_to_virt(page); + if (vsie_page) { if (try_get_vsie_page(vsie_page)) { if (vsie_page->scb_gpa == addr) return vsie_page; @@ -1411,20 +1407,18 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_lock(&kvm->arch.vsie.mutex); if (kvm->arch.vsie.page_count < nr_vcpus) { - page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); - if (!page) { + vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); + if (!vsie_page) { mutex_unlock(&kvm->arch.vsie.mutex); return ERR_PTR(-ENOMEM); } - vsie_page = page_to_virt(page); __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); - kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page; + kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page; kvm->arch.vsie.page_count++; } else { /* reuse an existing entry that belongs to nobody */ while (true) { - page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; - vsie_page = page_to_virt(page); + vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; if (try_get_vsie_page(vsie_page)) break; kvm->arch.vsie.next++; @@ -1438,7 +1432,8 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) vsie_page->scb_gpa = ULONG_MAX; /* Double use of the same address or allocation failure. */ - if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { + if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, + vsie_page)) { put_vsie_page(vsie_page); mutex_unlock(&kvm->arch.vsie.mutex); return NULL; @@ -1519,20 +1514,18 @@ void kvm_s390_vsie_init(struct kvm *kvm) void kvm_s390_vsie_destroy(struct kvm *kvm) { struct vsie_page *vsie_page; - struct page *page; int i; mutex_lock(&kvm->arch.vsie.mutex); for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = kvm->arch.vsie.pages[i]; + vsie_page = kvm->arch.vsie.pages[i]; kvm->arch.vsie.pages[i] = NULL; - vsie_page = page_to_virt(page); release_gmap_shadow(vsie_page); /* free the radix tree entry */ if (vsie_page->scb_gpa != ULONG_MAX) radix_tree_delete(&kvm->arch.vsie.addr_to_page, vsie_page->scb_gpa >> 9); - __free_page(page); + free_page((unsigned long)vsie_page); } kvm->arch.vsie.page_count = 0; mutex_unlock(&kvm->arch.vsie.mutex); -- GitLab From 66119f8ce135de664cb2fb88d9aaa322d7451a1f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Jan 2025 15:46:13 +0100 Subject: [PATCH 0082/1585] KVM: Do not restrict the size of KVM-internal memory regions Exempt KVM-internal memslots from the KVM_MEM_MAX_NR_PAGES restriction, as the limit on the number of pages exists purely to play nice with dirty bitmap operations, which use 32-bit values to index the bitmaps, and dirty logging isn't supported for KVM-internal memslots. Link: https://lore.kernel.org/all/20240802205003.353672-6-seanjc@google.com Signed-off-by: Sean Christopherson Reviewed-by: Christoph Schlameuss Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20250123144627.312456-2-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-2-imbrenda@linux.ibm.com> --- virt/kvm/kvm_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index faf10671eed2a..3f04cd5e3a8cf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1971,7 +1971,15 @@ static int kvm_set_memory_region(struct kvm *kvm, return -EINVAL; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) return -EINVAL; - if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) + + /* + * The size of userspace-defined memory regions is restricted in order + * to play nice with dirty bitmap operations, which are indexed with an + * "unsigned int". KVM's internal memory regions don't support dirty + * logging, and so are exempt. + */ + if (id < KVM_USER_MEM_SLOTS && + (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) return -EINVAL; slots = __kvm_memslots(kvm, as_id); -- GitLab From decff09adbeba4b75a1982b1dc3991761914e2df Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:14 +0100 Subject: [PATCH 0083/1585] KVM: s390: wrapper for KVM_BUG Wrap the call to KVM_BUG; this reduces code duplication and improves readability. Reviewed-by: Christian Borntraeger Reviewed-by: Christoph Schlameuss Reviewed-by: Steffen Eiden Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20250123144627.312456-3-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-3-imbrenda@linux.ibm.com> --- arch/s390/kvm/kvm-s390.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d8080c27d45bd..ecbdd7d41230a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4766,6 +4766,13 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } +static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) +{ + KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, + "Unexpected program interrupt 0x%x, TEID 0x%016lx", + current->thread.gmap_int_code, current->thread.gmap_teid.val); +} + static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { unsigned int flags = 0; @@ -4781,9 +4788,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) vcpu->stat.exit_null++; break; case PGM_NON_SECURE_STORAGE_ACCESS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); + kvm_s390_assert_primary_as(vcpu); /* * This is normal operation; a page belonging to a protected * guest has not been imported yet. Try to import the page into @@ -4794,9 +4799,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) break; case PGM_SECURE_STORAGE_ACCESS: case PGM_SECURE_STORAGE_VIOLATION: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); + kvm_s390_assert_primary_as(vcpu); /* * This can happen after a reboot with asynchronous teardown; * the new guest (normal or protected) will run on top of the @@ -4825,9 +4828,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_FIRST_TRANS: case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); + kvm_s390_assert_primary_as(vcpu); if (vcpu->arch.gmap->pfault_enabled) { rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT); if (rc == -EFAULT) -- GitLab From 413c98f24c63b3b8aff202fce6f01e8950730511 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:15 +0100 Subject: [PATCH 0084/1585] KVM: s390: fake memslot for ucontrol VMs Create a fake memslot for ucontrol VMs. The fake memslot identity-maps userspace. Now memslots will always be present, and ucontrol is not a special case anymore. Suggested-by: Sean Christopherson Reviewed-by: David Hildenbrand Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20250123144627.312456-4-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-4-imbrenda@linux.ibm.com> --- Documentation/virt/kvm/api.rst | 2 +- arch/s390/include/asm/kvm_host.h | 2 ++ arch/s390/kvm/kvm-s390.c | 17 ++++++++++++++++- arch/s390/kvm/kvm-s390.h | 2 ++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 0d1c3a820ce6e..2b52eb77e29cb 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1419,7 +1419,7 @@ fetch) is injected in the guest. S390: ^^^^^ -Returns -EINVAL if the VM has the KVM_VM_S390_UCONTROL flag set. +Returns -EINVAL or -EEXIST if the VM has the KVM_VM_S390_UCONTROL flag set. Returns -EINVAL if called on a protected VM. 4.36 KVM_SET_TSS_ADDR diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 4581388411b71..9a367866cab0e 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -30,6 +30,8 @@ #define KVM_S390_ESCA_CPU_SLOTS 248 #define KVM_MAX_VCPUS 255 +#define KVM_INTERNAL_MEM_SLOTS 1 + /* * These seem to be used for allocating ->chip in the routing table, which we * don't use. 1 is as small as we can get to reduce the needed memory. If we diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ecbdd7d41230a..fc44002a7b04c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3428,8 +3428,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) VM_EVENT(kvm, 3, "vm created with type %lu", type); if (type & KVM_VM_S390_UCONTROL) { + struct kvm_userspace_memory_region2 fake_memslot = { + .slot = KVM_S390_UCONTROL_MEMSLOT, + .guest_phys_addr = 0, + .userspace_addr = 0, + .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE), + .flags = 0, + }; + kvm->arch.gmap = NULL; kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; + /* one flat fake memslot covering the whole address-space */ + mutex_lock(&kvm->slots_lock); + KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm); + mutex_unlock(&kvm->slots_lock); } else { if (sclp.hamax == U64_MAX) kvm->arch.mem_limit = TASK_SIZE_MAX; @@ -5854,7 +5866,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { gpa_t size; - if (kvm_is_ucontrol(kvm)) + if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS) return -EINVAL; /* When we are protected, we should not change the memory slots */ @@ -5906,6 +5918,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int rc = 0; + if (kvm_is_ucontrol(kvm)) + return; + switch (change) { case KVM_MR_DELETE: rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 597d7a71deebe..30736ac16f848 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -20,6 +20,8 @@ #include #include +#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0) + static inline void kvm_s390_fpu_store(struct kvm_run *run) { fpu_stfpc(&run->s.regs.fpc); -- GitLab From 63e71519891024b622d00c486c4d0348c44ca911 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:16 +0100 Subject: [PATCH 0085/1585] KVM: s390: selftests: fix ucontrol memory region test With the latest patch, attempting to create a memslot from userspace will result in an EEXIST error for UCONTROL VMs, instead of EINVAL, since the new memslot will collide with the internal memslot. There is no simple way to bring back the previous behaviour. This is not a problem, but the test needs to be fixed accordingly. Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-5-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-5-imbrenda@linux.ibm.com> --- tools/testing/selftests/kvm/s390/ucontrol_test.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index 135ee22856cf1..22ce9219620ce 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -459,10 +459,14 @@ TEST_F(uc_kvm, uc_no_user_region) }; ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); - ASSERT_EQ(EINVAL, errno); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION", + strerror(errno), errno); ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); - ASSERT_EQ(EINVAL, errno); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION2", + strerror(errno), errno); } TEST_F(uc_kvm, uc_map_unmap) -- GitLab From 5cbe24350b7d8ef6d466a37d56b07ae643c622ca Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:17 +0100 Subject: [PATCH 0086/1585] KVM: s390: move pv gmap functions into kvm Move gmap related functions from kernel/uv into kvm. Create a new file to collect gmap-related functions. Reviewed-by: Janosch Frank Reviewed-by: Christoph Schlameuss [fixed unpack_one(), thanks mhartmay@linux.ibm.com] Link: https://lore.kernel.org/r/20250123144627.312456-6-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-6-imbrenda@linux.ibm.com> --- arch/s390/include/asm/gmap.h | 1 + arch/s390/include/asm/uv.h | 6 +- arch/s390/kernel/uv.c | 292 ++++------------------------------- arch/s390/kvm/Makefile | 2 +- arch/s390/kvm/gmap.c | 212 +++++++++++++++++++++++++ arch/s390/kvm/gmap.h | 17 ++ arch/s390/kvm/intercept.c | 3 +- arch/s390/kvm/kvm-s390.c | 1 + arch/s390/kvm/pv.c | 21 +++ arch/s390/mm/gmap.c | 28 ++++ 10 files changed, 315 insertions(+), 268 deletions(-) create mode 100644 arch/s390/kvm/gmap.c create mode 100644 arch/s390/kvm/gmap.h diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 13f51a6a5bb1b..3e66f53fe3ccc 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -149,6 +149,7 @@ int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool interruptible); +int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split); /** * s390_uv_destroy_range - Destroy a range of pages in the given mm. diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index dc332609f2c3f..b11f5b6d0bd14 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void) } int uv_pin_shared(unsigned long paddr); -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); int uv_convert_from_secure_pte(pte_t pte); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); +int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb); +int uv_convert_from_secure(unsigned long paddr); +int uv_convert_from_secure_folio(struct folio *folio); void setup_uv(void); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 6f9654a191ad9..9f05df2da2f73 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -19,19 +19,6 @@ #include #include -#if !IS_ENABLED(CONFIG_KVM) -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - return 0; -} - -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) -{ - return 0; -} -#endif - /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); @@ -159,6 +146,7 @@ int uv_destroy_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL(uv_destroy_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -175,7 +163,7 @@ int uv_destroy_pte(pte_t pte) * * @paddr: Absolute host address of page to be exported */ -static int uv_convert_from_secure(unsigned long paddr) +int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, @@ -187,11 +175,12 @@ static int uv_convert_from_secure(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure); /* * The caller must already hold a reference to the folio. */ -static int uv_convert_from_secure_folio(struct folio *folio) +int uv_convert_from_secure_folio(struct folio *folio) { int rc; @@ -206,6 +195,7 @@ static int uv_convert_from_secure_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -237,13 +227,33 @@ static int expected_folio_refs(struct folio *folio) return res; } -static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +/** + * make_folio_secure() - make a folio secure + * @folio: the folio to make secure + * @uvcb: the uvcb that describes the UVC to be used + * + * The folio @folio will be made secure if possible, @uvcb will be passed + * as-is to the UVC. + * + * Return: 0 on success; + * -EBUSY if the folio is in writeback or has too many references; + * -E2BIG if the folio is large; + * -EAGAIN if the UVC needs to be attempted again; + * -ENXIO if the address is not mapped; + * -EINVAL if the UVC failed for other reasons. + * + * Context: The caller must hold exactly one extra reference on the folio + * (it's the same logic as split_folio()) + */ +int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; + if (folio_test_large(folio)) + return -E2BIG; if (folio_test_writeback(folio)) - return -EAGAIN; - expected = expected_folio_refs(folio); + return -EBUSY; + expected = expected_folio_refs(folio) + 1; if (!folio_ref_freeze(folio, expected)) return -EBUSY; set_bit(PG_arch_1, &folio->flags); @@ -267,251 +277,7 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return -EAGAIN; return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } - -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) -{ - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} - -/* - * Drain LRU caches: the local one on first invocation and the ones of all - * CPUs on successive invocations. Returns "true" on the first invocation. - */ -static bool drain_lru(bool *drain_lru_called) -{ - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (*drain_lru_called) { - lru_add_drain_all(); - /* We give up here, don't retry immediately. */ - return false; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - *drain_lru_called = true; - /* The caller should try again immediately */ - return true; -} - -/* - * Requests the Ultravisor to make a page accessible to a guest. - * If it's brought in the first time, it will be cleared. If - * it has been exported before, it will be decrypted and integrity - * checked. - */ -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) -{ - struct vm_area_struct *vma; - bool drain_lru_called = false; - spinlock_t *ptelock; - unsigned long uaddr; - struct folio *folio; - pte_t *ptep; - int rc; - -again: - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. If - * userspace is playing dirty tricky with mapping huge pages later - * on this will result in a segmentation fault. - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = -ENXIO; - ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); - if (!ptep) - goto out; - if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { - folio = page_folio(pte_page(*ptep)); - rc = -EAGAIN; - if (folio_test_large(folio)) { - rc = -E2BIG; - } else if (folio_trylock(folio)) { - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); - rc = make_folio_secure(folio, uvcb); - folio_unlock(folio); - } - - /* - * Once we drop the PTL, the folio may get unmapped and - * freed immediately. We need a temporary reference. - */ - if (rc == -EAGAIN || rc == -E2BIG) - folio_get(folio); - } - pte_unmap_unlock(ptep, ptelock); -out: - mmap_read_unlock(gmap->mm); - - switch (rc) { - case -E2BIG: - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - folio_put(folio); - - switch (rc) { - case 0: - /* Splitting succeeded, try again immediately. */ - goto again; - case -EAGAIN: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -EBUSY: - /* Unexpected race. */ - return -EAGAIN; - } - WARN_ON_ONCE(1); - return -ENXIO; - case -EAGAIN: - /* - * If we are here because the UVC returned busy or partial - * completion, this is just a useless check, but it is safe. - */ - folio_wait_writeback(folio); - folio_put(folio); - return -EAGAIN; - case -EBUSY: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -ENXIO: - if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) - return -EFAULT; - return -EAGAIN; - } - return rc; -} -EXPORT_SYMBOL_GPL(gmap_make_secure); - -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) -{ - struct uv_cb_cts uvcb = { - .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, - .header.len = sizeof(uvcb), - .guest_handle = gmap->guest_handle, - .gaddr = gaddr, - }; - - return gmap_make_secure(gmap, gaddr, &uvcb); -} -EXPORT_SYMBOL_GPL(gmap_convert_to_secure); - -/** - * gmap_destroy_page - Destroy a guest page. - * @gmap: the gmap of the guest - * @gaddr: the guest address to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - */ -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) -{ - struct vm_area_struct *vma; - struct folio_walk fw; - unsigned long uaddr; - struct folio *folio; - int rc; - - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Huge pages should not be able to become secure - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = 0; - folio = folio_walk_start(&fw, vma, uaddr, 0); - if (!folio) - goto out; - /* - * See gmap_make_secure(): large folios cannot be secure. Small - * folio implies FW_LEVEL_PTE. - */ - if (folio_test_large(folio) || !pte_write(fw.pte)) - goto out_walk_end; - rc = uv_destroy_folio(folio); - /* - * Fault handlers can race; it is possible that two CPUs will fault - * on the same secure page. One CPU can destroy the page, reboot, - * re-enter secure mode and import it, while the second CPU was - * stuck at the beginning of the handler. At some point the second - * CPU will be able to progress, and it will not be able to destroy - * the page. In that case we do not want to terminate the process, - * we instead try to export the page. - */ - if (rc) - rc = uv_convert_from_secure_folio(folio); -out_walk_end: - folio_walk_end(&fw, vma); -out: - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_destroy_page); +EXPORT_SYMBOL_GPL(make_folio_secure); /* * To be called with the folio locked or with an extra reference! This will diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 02217fb4ae10f..d972dea657fd1 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o +kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c new file mode 100644 index 0000000000000..02adf151d4de4 --- /dev/null +++ b/arch/s390/kvm/gmap.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest memory management for KVM/s390 + * + * Copyright IBM Corp. 2008, 2020, 2024 + * + * Author(s): Claudio Imbrenda + * Martin Schwidefsky + * David Hildenbrand + * Janosch Frank + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "gmap.h" + +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + +static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb) +{ + struct folio *folio = page_folio(page); + int rc; + + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. + */ + if (folio_test_hugetlb(folio)) + return -EFAULT; + if (folio_test_large(folio)) { + mmap_read_unlock(gmap->mm); + rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true); + mmap_read_lock(gmap->mm); + if (rc) + return rc; + folio = page_folio(page); + } + + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, gmap->mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = make_folio_secure(folio, uvcb); + folio_unlock(folio); + + /* + * In theory a race is possible and the folio might have become + * large again before the folio_trylock() above. In that case, no + * action is performed and -EAGAIN is returned; the callers will + * have to try again later. + * In most cases this implies running the VM again, getting the same + * exception again, and make another attempt in this function. + * This is expected to happen extremely rarely. + */ + if (rc == -E2BIG) + return -EAGAIN; + /* The folio has too many references, try to shake some off */ + if (rc == -EBUSY) { + mmap_read_unlock(gmap->mm); + kvm_s390_wiggle_split_folio(gmap->mm, folio, false); + mmap_read_lock(gmap->mm); + return -EAGAIN; + } + + return rc; +} + +/** + * gmap_make_secure() - make one guest page secure + * @gmap: the guest gmap + * @gaddr: the guest address that needs to be made secure + * @uvcb: the UVCB specifying which operation needs to be performed + * + * Context: needs to be called with kvm->srcu held. + * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()). + */ +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +{ + struct kvm *kvm = gmap->private; + struct page *page; + int rc = 0; + + lockdep_assert_held(&kvm->srcu); + + page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); + mmap_read_lock(gmap->mm); + if (page) + rc = __gmap_make_secure(gmap, page, uvcb); + kvm_release_page_clean(page); + mmap_read_unlock(gmap->mm); + + return rc; +} + +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) +{ + struct uv_cb_cts uvcb = { + .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, + .header.len = sizeof(uvcb), + .guest_handle = gmap->guest_handle, + .gaddr = gaddr, + }; + + return gmap_make_secure(gmap, gaddr, &uvcb); +} + +/** + * __gmap_destroy_page() - Destroy a guest page. + * @gmap: the gmap of the guest + * @page: the page to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: must be called holding the mm lock for gmap->mm + */ +static int __gmap_destroy_page(struct gmap *gmap, struct page *page) +{ + struct folio *folio = page_folio(page); + int rc; + + /* + * See gmap_make_secure(): large folios cannot be secure. Small + * folio implies FW_LEVEL_PTE. + */ + if (folio_test_large(folio)) + return -EFAULT; + + rc = uv_destroy_folio(folio); + /* + * Fault handlers can race; it is possible that two CPUs will fault + * on the same secure page. One CPU can destroy the page, reboot, + * re-enter secure mode and import it, while the second CPU was + * stuck at the beginning of the handler. At some point the second + * CPU will be able to progress, and it will not be able to destroy + * the page. In that case we do not want to terminate the process, + * we instead try to export the page. + */ + if (rc) + rc = uv_convert_from_secure_folio(folio); + + return rc; +} + +/** + * gmap_destroy_page() - Destroy a guest page. + * @gmap: the gmap of the guest + * @gaddr: the guest address to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: may sleep. + */ +int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) +{ + struct page *page; + int rc = 0; + + mmap_read_lock(gmap->mm); + page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr)); + if (page) + rc = __gmap_destroy_page(gmap, page); + kvm_release_page_clean(page); + mmap_read_unlock(gmap->mm); + return rc; +} diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h new file mode 100644 index 0000000000000..f2b52ce29be3d --- /dev/null +++ b/arch/s390/kvm/gmap.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016, 2025 + * Author(s): Martin Schwidefsky + * Claudio Imbrenda + */ + +#ifndef ARCH_KVM_S390_GMAP_H +#define ARCH_KVM_S390_GMAP_H + +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); +int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); + +#endif diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 5bbaadf75dc64..acf10aefd08f0 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -21,6 +21,7 @@ #include "gaccess.h" #include "trace.h" #include "trace-s390.h" +#include "gmap.h" u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { @@ -549,7 +550,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) * If the unpin did not succeed, the guest will exit again for the UVC * and we will retry the unpin. */ - if (rc == -EINVAL) + if (rc == -EINVAL || rc == -ENXIO) return 0; /* * If we got -EAGAIN here, we simply return it. It will eventually diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fc44002a7b04c..a25ca440760f1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -50,6 +50,7 @@ #include "kvm-s390.h" #include "gaccess.h" #include "pci.h" +#include "gmap.h" #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 75e81ba26d047..22c012aa5206b 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -17,6 +17,7 @@ #include #include #include "kvm-s390.h" +#include "gmap.h" bool kvm_s390_pv_is_protected(struct kvm *kvm) { @@ -638,10 +639,28 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, .tweak[1] = offset, }; int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb); + unsigned long vmaddr; + bool unlocked; *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; + if (ret == -ENXIO) { + mmap_read_lock(kvm->mm); + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(vmaddr)) { + ret = -EFAULT; + } else { + ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!ret) + ret = __gmap_link(kvm->arch.gmap, addr, vmaddr); + } + mmap_read_unlock(kvm->mm); + if (!ret) + return -EAGAIN; + return ret; + } + if (ret && ret != -EAGAIN) KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x", uvcb.gaddr, *rc, *rrc); @@ -660,6 +679,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx", addr, size); + guard(srcu)(&kvm->srcu); + while (offset < size) { ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); if (ret == -EAGAIN) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 16b8a36c56de1..3e6e25119a964 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -3035,3 +3035,31 @@ int s390_replace_asce(struct gmap *gmap) return 0; } EXPORT_SYMBOL_GPL(s390_replace_asce); + +/** + * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split + * @mm: the mm containing the folio to work on + * @folio: the folio + * @split: whether to split a large folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + */ +int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) +{ + int rc; + + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); + if (split) { + folio_lock(folio); + rc = split_folio(folio); + folio_unlock(folio); + + if (rc != -EBUSY) + return rc; + } + return -EAGAIN; +} +EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio); -- GitLab From 3762e905ec2e498c96464e094b7d46be98151d3b Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:18 +0100 Subject: [PATCH 0087/1585] KVM: s390: use __kvm_faultin_pfn() Refactor the existing page fault handling code to use __kvm_faultin_pfn(). This possible now that memslots are always present. Acked-by: Janosch Frank Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-7-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-7-imbrenda@linux.ibm.com> --- arch/s390/kvm/kvm-s390.c | 126 ++++++++++++++++++++++++++++++--------- arch/s390/kvm/kvm-s390.h | 6 ++ arch/s390/mm/gmap.c | 1 + 3 files changed, 106 insertions(+), 27 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a25ca440760f1..70c98bf127541 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4786,11 +4786,104 @@ static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) current->thread.gmap_int_code, current->thread.gmap_teid.val); } +/* + * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu + * @vcpu: the vCPU whose gmap is to be fixed up + * @gfn: the guest frame number used for memslots (including fake memslots) + * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps + * @flags: FOLL_* flags + * + * Return: 0 on success, < 0 in case of error. + * Context: The mm lock must not be held before calling. May sleep. + */ +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) +{ + struct kvm_memory_slot *slot; + unsigned int fault_flags; + bool writable, unlocked; + unsigned long vmaddr; + struct page *page; + kvm_pfn_t pfn; + int rc; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return vcpu_post_run_addressing_exception(vcpu); + + fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; + if (vcpu->arch.gmap->pfault_enabled) + flags |= FOLL_NOWAIT; + vmaddr = __gfn_to_hva_memslot(slot, gfn); + +try_again: + pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); + + /* Access outside memory, inject addressing exception */ + if (is_noslot_pfn(pfn)) + return vcpu_post_run_addressing_exception(vcpu); + /* Signal pending: try again */ + if (pfn == KVM_PFN_ERR_SIGPENDING) + return -EAGAIN; + + /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */ + if (pfn == KVM_PFN_ERR_NEEDS_IO) { + trace_kvm_s390_major_guest_pfault(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + vcpu->stat.pfault_sync++; + /* Could not setup async pfault, try again synchronously */ + flags &= ~FOLL_NOWAIT; + goto try_again; + } + /* Any other error */ + if (is_error_pfn(pfn)) + return -EFAULT; + + /* Success */ + mmap_read_lock(vcpu->arch.gmap->mm); + /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */ + rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked); + if (!rc) + rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr); + scoped_guard(spinlock, &vcpu->kvm->mmu_lock) { + kvm_release_faultin_page(vcpu->kvm, page, false, writable); + } + mmap_read_unlock(vcpu->arch.gmap->mm); + return rc; +} + +static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) +{ + unsigned long gaddr_tmp; + gfn_t gfn; + + gfn = gpa_to_gfn(gaddr); + if (kvm_is_ucontrol(vcpu->kvm)) { + /* + * This translates the per-vCPU guest address into a + * fake guest address, which can then be used with the + * fake memslots that are identity mapping userspace. + * This allows ucontrol VMs to use the normal fault + * resolution path, like normal VMs. + */ + mmap_read_lock(vcpu->arch.gmap->mm); + gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr); + mmap_read_unlock(vcpu->arch.gmap->mm); + if (gaddr_tmp == -EFAULT) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = gaddr; + vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION; + return -EREMOTE; + } + gfn = gpa_to_gfn(gaddr_tmp); + } + return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); +} + static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { unsigned int flags = 0; unsigned long gaddr; - int rc = 0; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) @@ -4842,37 +4935,14 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: kvm_s390_assert_primary_as(vcpu); - if (vcpu->arch.gmap->pfault_enabled) { - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT); - if (rc == -EFAULT) - return vcpu_post_run_addressing_exception(vcpu); - if (rc == -EAGAIN) { - trace_kvm_s390_major_guest_pfault(vcpu); - if (kvm_arch_setup_async_pf(vcpu)) - return 0; - vcpu->stat.pfault_sync++; - } else { - return rc; - } - } - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags); - if (rc == -EFAULT) { - if (kvm_is_ucontrol(vcpu->kvm)) { - vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; - vcpu->run->s390_ucontrol.trans_exc_code = gaddr; - vcpu->run->s390_ucontrol.pgm_code = 0x10; - return -EREMOTE; - } - return vcpu_post_run_addressing_exception(vcpu); - } - break; + return vcpu_dat_fault_handler(vcpu, gaddr, flags); default: KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", current->thread.gmap_int_code, current->thread.gmap_teid.val); send_sig(SIGSEGV, current, 0); break; } - return rc; + return 0; } static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) @@ -5751,7 +5821,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - r = gmap_fault(vcpu->arch.gmap, arg, 0); + idx = srcu_read_lock(&vcpu->kvm->srcu); + r = vcpu_dat_fault_handler(vcpu, arg, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_ENABLE_CAP: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 30736ac16f848..3be5291723c8e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -410,6 +410,12 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc); +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags); + +static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags) +{ + return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags); +} /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 3e6e25119a964..bfaba77333067 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -605,6 +605,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) radix_tree_preload_end(); return rc; } +EXPORT_SYMBOL(__gmap_link); /** * fixup_user_fault_nowait - manually resolve a user page fault without waiting -- GitLab From 6eb84e130075b9ea35a946dcf9a2476ac2c749a0 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:19 +0100 Subject: [PATCH 0088/1585] KVM: s390: get rid of gmap_fault() All gmap page faults are already handled in kvm by the function kvm_s390_handle_dat_fault(); only few users of gmap_fault remained, all within kvm. Convert those calls to use kvm_s390_handle_dat_fault() instead. Remove gmap_fault() entirely since it has no more users. Acked-by: Janosch Frank Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-8-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-8-imbrenda@linux.ibm.com> --- arch/s390/include/asm/gmap.h | 1 - arch/s390/kvm/intercept.c | 4 +- arch/s390/mm/gmap.c | 124 ----------------------------------- 3 files changed, 2 insertions(+), 127 deletions(-) diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 3e66f53fe3ccc..d4572729269f4 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -113,7 +113,6 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); unsigned long gmap_translate(struct gmap *, unsigned long gaddr); int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index acf10aefd08f0..610dd44a948b2 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -368,7 +368,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg2, &srcaddr, GACC_FETCH, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0); + rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0); if (rc != 0) return rc; @@ -377,7 +377,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg1, &dstaddr, GACC_STORE, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE); + rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE); if (rc != 0) return rc; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index bfaba77333067..e124fca147377 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -607,130 +607,6 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) } EXPORT_SYMBOL(__gmap_link); -/** - * fixup_user_fault_nowait - manually resolve a user page fault without waiting - * @mm: mm_struct of target mm - * @address: user address - * @fault_flags:flags to pass down to handle_mm_fault() - * @unlocked: did we unlock the mmap_lock while retrying - * - * This function behaves similarly to fixup_user_fault(), but it guarantees - * that the fault will be resolved without waiting. The function might drop - * and re-acquire the mm lock, in which case @unlocked will be set to true. - * - * The guarantee is that the fault is handled without waiting, but the - * function itself might sleep, due to the lock. - * - * Context: Needs to be called with mm->mmap_lock held in read mode, and will - * return with the lock held in read mode; @unlocked will indicate whether - * the lock has been dropped and re-acquired. This is the same behaviour as - * fixup_user_fault(). - * - * Return: 0 on success, -EAGAIN if the fault cannot be resolved without - * waiting, -EFAULT if the fault cannot be resolved, -ENOMEM if out of - * memory. - */ -static int fixup_user_fault_nowait(struct mm_struct *mm, unsigned long address, - unsigned int fault_flags, bool *unlocked) -{ - struct vm_area_struct *vma; - unsigned int test_flags; - vm_fault_t fault; - int rc; - - fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; - test_flags = fault_flags & FAULT_FLAG_WRITE ? VM_WRITE : VM_READ; - - vma = find_vma(mm, address); - if (unlikely(!vma || address < vma->vm_start)) - return -EFAULT; - if (unlikely(!(vma->vm_flags & test_flags))) - return -EFAULT; - - fault = handle_mm_fault(vma, address, fault_flags, NULL); - /* the mm lock has been dropped, take it again */ - if (fault & VM_FAULT_COMPLETED) { - *unlocked = true; - mmap_read_lock(mm); - return 0; - } - /* the mm lock has not been dropped */ - if (fault & VM_FAULT_ERROR) { - rc = vm_fault_to_errno(fault, 0); - BUG_ON(!rc); - return rc; - } - /* the mm lock has not been dropped because of FAULT_FLAG_RETRY_NOWAIT */ - if (fault & VM_FAULT_RETRY) - return -EAGAIN; - /* nothing needed to be done and the mm lock has not been dropped */ - return 0; -} - -/** - * __gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Context: Needs to be called with mm->mmap_lock held in read mode. Might - * drop and re-acquire the lock. Will always return with the lock held. - */ -static int __gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - unsigned long vmaddr; - bool unlocked; - int rc = 0; - -retry: - unlocked = false; - - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - - if (fault_flags & FAULT_FLAG_RETRY_NOWAIT) - rc = fixup_user_fault_nowait(gmap->mm, vmaddr, fault_flags, &unlocked); - else - rc = fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked); - if (rc) - return rc; - /* - * In the case that fixup_user_fault unlocked the mmap_lock during - * fault-in, redo __gmap_translate() to avoid racing with a - * map/unmap_segment. - * In particular, __gmap_translate(), fixup_user_fault{,_nowait}(), - * and __gmap_link() must all be called atomically in one go; if the - * lock had been dropped in between, a retry is needed. - */ - if (unlocked) - goto retry; - - return __gmap_link(gmap, gaddr, vmaddr); -} - -/** - * gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Returns 0 on success, -ENOMEM for out of memory conditions, -EFAULT if the - * vm address is already mapped to a different guest segment, and -EAGAIN if - * FAULT_FLAG_RETRY_NOWAIT was specified and the fault could not be processed - * immediately. - */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - int rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_fault(gmap, gaddr, fault_flags); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_fault); - /* * this function is assumed to be called with mmap_lock held */ -- GitLab From d41993f71385ce7e9661c203e02a588a93a59b24 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:20 +0100 Subject: [PATCH 0089/1585] KVM: s390: get rid of gmap_translate() Add gpa_to_hva(), which uses memslots, and use it to replace all uses of gmap_translate(). Reviewed-by: Janosch Frank Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-9-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-9-imbrenda@linux.ibm.com> --- arch/s390/include/asm/gmap.h | 1 - arch/s390/kvm/interrupt.c | 19 +++++++++++-------- arch/s390/kvm/kvm-s390.h | 9 +++++++++ arch/s390/mm/gmap.c | 20 -------------------- 4 files changed, 20 insertions(+), 29 deletions(-) diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index d4572729269f4..74b48f2e608a8 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -111,7 +111,6 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long gmap_translate(struct gmap *, unsigned long gaddr); int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index d4f031e086fc3..07ff0e10cb7f5 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2893,7 +2893,8 @@ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { - u64 uaddr; + u64 uaddr_s, uaddr_i; + int idx; switch (ue->type) { /* we store the userspace addresses instead of the guest addresses */ @@ -2901,14 +2902,16 @@ int kvm_set_routing_entry(struct kvm *kvm, if (kvm_is_ucontrol(kvm)) return -EINVAL; e->set = set_adapter_int; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr); - if (uaddr == -EFAULT) - return -EFAULT; - e->adapter.summary_addr = uaddr; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr); - if (uaddr == -EFAULT) + + idx = srcu_read_lock(&kvm->srcu); + uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr); + uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr); + srcu_read_unlock(&kvm->srcu, idx); + + if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i)) return -EFAULT; - e->adapter.ind_addr = uaddr; + e->adapter.summary_addr = uaddr_s; + e->adapter.ind_addr = uaddr_i; e->adapter.summary_offset = ue->u.adapter.summary_offset; e->adapter.ind_offset = ue->u.adapter.ind_offset; e->adapter.adapter_id = ue->u.adapter.adapter_id; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 3be5291723c8e..61e8544924b34 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -281,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) return gd; } +static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa) +{ + hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + + if (!kvm_is_error_hva(hva)) + hva |= offset_in_page(gpa); + return hva; +} + /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index e124fca147377..7fd298732d1e7 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -463,26 +463,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) } EXPORT_SYMBOL_GPL(__gmap_translate); -/** - * gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - */ -unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_translate(gmap, gaddr); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_translate); - /** * gmap_unlink - disconnect a page table from the gmap shadow tables * @mm: pointer to the parent mm_struct -- GitLab From c9f721ed8ec6942dad951d2d8c4fca291170165e Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:21 +0100 Subject: [PATCH 0090/1585] KVM: s390: move some gmap shadowing functions away from mm/gmap.c Move some gmap shadowing functions from mm/gmap.c to kvm/kvm-s390.c and the newly created kvm/gmap-vsie.c This is a step toward removing gmap from mm. Reviewed-by: Janosch Frank Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-10-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-10-imbrenda@linux.ibm.com> --- arch/s390/include/asm/gmap.h | 9 +- arch/s390/kvm/Makefile | 2 +- arch/s390/kvm/gmap-vsie.c | 142 +++++++++++++++++++++ arch/s390/kvm/gmap.h | 20 +++ arch/s390/kvm/kvm-s390.c | 74 ++++++++++- arch/s390/kvm/kvm-s390.h | 2 + arch/s390/kvm/vsie.c | 2 + arch/s390/mm/gmap.c | 238 +++++------------------------------ 8 files changed, 271 insertions(+), 218 deletions(-) create mode 100644 arch/s390/kvm/gmap-vsie.c diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 74b48f2e608a8..dbf2329281d20 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -106,6 +106,8 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit); void gmap_remove(struct gmap *gmap); struct gmap *gmap_get(struct gmap *gmap); void gmap_put(struct gmap *gmap); +void gmap_free(struct gmap *gmap); +struct gmap *gmap_alloc(unsigned long limit); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len); @@ -118,9 +120,7 @@ void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr) int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val); -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level); -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level); +void gmap_unshadow(struct gmap *sg); int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake); int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, @@ -136,8 +136,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte); void gmap_register_pte_notifier(struct gmap_notifier *); void gmap_unregister_pte_notifier(struct gmap_notifier *); -int gmap_mprotect_notify(struct gmap *, unsigned long start, - unsigned long len, int prot); +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits); void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index d972dea657fd1..f0ffe874adc21 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o +kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c new file mode 100644 index 0000000000000..a6d1dbb04c970 --- /dev/null +++ b/arch/s390/kvm/gmap-vsie.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest memory management for KVM/s390 nested VMs. + * + * Copyright IBM Corp. 2008, 2020, 2024 + * + * Author(s): Claudio Imbrenda + * Martin Schwidefsky + * David Hildenbrand + * Janosch Frank + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "kvm-s390.h" +#include "gmap.h" + +/** + * gmap_find_shadow - find a specific asce in the list of shadow tables + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * Returns the pointer to a gmap if a shadow table with the given asce is + * already available, ERR_PTR(-EAGAIN) if another one is just being created, + * otherwise NULL + * + * Context: Called with parent->shadow_lock held + */ +static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg; + + lockdep_assert_held(&parent->shadow_lock); + list_for_each_entry(sg, &parent->children, list) { + if (!gmap_shadow_valid(sg, asce, edat_level)) + continue; + if (!sg->initialized) + return ERR_PTR(-EAGAIN); + refcount_inc(&sg->ref_count); + return sg; + } + return NULL; +} + +/** + * gmap_shadow - create/find a shadow guest address space + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * The pages of the top level page table referred by the asce parameter + * will be set to read-only and marked in the PGSTEs of the kvm process. + * The shadow table will be removed automatically on any change to the + * PTE mapping for the source table. + * + * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, + * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the + * parent gmap table could not be protected. + */ +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg, *new; + unsigned long limit; + int rc; + + if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) || + KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private)) + return ERR_PTR(-EFAULT); + spin_lock(&parent->shadow_lock); + sg = gmap_find_shadow(parent, asce, edat_level); + spin_unlock(&parent->shadow_lock); + if (sg) + return sg; + /* Create a new shadow gmap */ + limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); + if (asce & _ASCE_REAL_SPACE) + limit = -1UL; + new = gmap_alloc(limit); + if (!new) + return ERR_PTR(-ENOMEM); + new->mm = parent->mm; + new->parent = gmap_get(parent); + new->private = parent->private; + new->orig_asce = asce; + new->edat_level = edat_level; + new->initialized = false; + spin_lock(&parent->shadow_lock); + /* Recheck if another CPU created the same shadow */ + sg = gmap_find_shadow(parent, asce, edat_level); + if (sg) { + spin_unlock(&parent->shadow_lock); + gmap_free(new); + return sg; + } + if (asce & _ASCE_REAL_SPACE) { + /* only allow one real-space gmap shadow */ + list_for_each_entry(sg, &parent->children, list) { + if (sg->orig_asce & _ASCE_REAL_SPACE) { + spin_lock(&sg->guest_table_lock); + gmap_unshadow(sg); + spin_unlock(&sg->guest_table_lock); + list_del(&sg->list); + gmap_put(sg); + break; + } + } + } + refcount_set(&new->ref_count, 2); + list_add(&new->list, &parent->children); + if (asce & _ASCE_REAL_SPACE) { + /* nothing to protect, return right away */ + new->initialized = true; + spin_unlock(&parent->shadow_lock); + return new; + } + spin_unlock(&parent->shadow_lock); + /* protect after insertion, so it will get properly invalidated */ + mmap_read_lock(parent->mm); + rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN, + ((asce & _ASCE_TABLE_LENGTH) + 1), + PROT_READ, GMAP_NOTIFY_SHADOW); + mmap_read_unlock(parent->mm); + spin_lock(&parent->shadow_lock); + new->initialized = true; + if (rc) { + list_del(&new->list); + gmap_free(new); + new = ERR_PTR(rc); + } + spin_unlock(&parent->shadow_lock); + return new; +} diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index f2b52ce29be3d..978f541059f02 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -13,5 +13,25 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level); + +/** + * gmap_shadow_valid - check if a shadow guest address space matches the + * given properties and is still valid + * @sg: pointer to the shadow guest address space structure + * @asce: ASCE for which the shadow table is requested + * @edat_level: edat level to be used for the shadow translation + * + * Returns 1 if the gmap shadow is still valid and matches the given + * properties, the caller can continue using it. Returns 0 otherwise, the + * caller has to request a new shadow gmap in this case. + * + */ +static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) +{ + if (sg->removed) + return 0; + return sg->orig_asce == asce && sg->edat_level == edat_level; +} #endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 70c98bf127541..ebecb96bacce7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4511,6 +4511,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu) return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); } +static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags) +{ + struct kvm *kvm = gmap->private; + gfn_t gfn = gpa_to_gfn(gaddr); + bool unlocked; + hva_t vmaddr; + gpa_t tmp; + int rc; + + if (kvm_is_ucontrol(kvm)) { + tmp = __gmap_translate(gmap, gaddr); + gfn = gpa_to_gfn(tmp); + } + + vmaddr = gfn_to_hva(kvm, gfn); + rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!rc) + rc = __gmap_link(gmap, gaddr, vmaddr); + return rc; +} + +/** + * __kvm_s390_mprotect_many() - Apply specified protection to guest pages + * @gmap: the gmap of the guest + * @gpa: the starting guest address + * @npages: how many pages to protect + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: pgste notification bits to set + * + * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one() + * + * Context: kvm->srcu and gmap->mm need to be held in read mode + */ +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits) +{ + unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0; + gpa_t end = gpa + npages * PAGE_SIZE; + int rc; + + for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) { + rc = gmap_protect_one(gmap, gpa, prot, bits); + if (rc == -EAGAIN) { + __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag); + rc = gmap_protect_one(gmap, gpa, prot, bits); + } + if (rc < 0) + return rc; + } + + return 0; +} + +static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu) +{ + gpa_t gaddr = kvm_s390_get_prefix(vcpu); + int idx, rc; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + mmap_read_lock(vcpu->arch.gmap->mm); + + rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT); + + mmap_read_unlock(vcpu->arch.gmap->mm); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return rc; +} + static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: @@ -4526,9 +4595,8 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) */ if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { int rc; - rc = gmap_mprotect_notify(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu), - PAGE_SIZE * 2, PROT_WRITE); + + rc = kvm_s390_mprotect_notify_prefix(vcpu); if (rc) { kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); return rc; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 61e8544924b34..8d3bbb2dd8d27 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -420,6 +420,8 @@ void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc); int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags); +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits); static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags) { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a0398ff85d00b..a78df3a4f3530 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,7 @@ #include #include "kvm-s390.h" #include "gaccess.h" +#include "gmap.h" enum vsie_page_flags { VSIE_PAGE_IN_USE = 0, diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 7fd298732d1e7..ae71b401312bf 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -43,7 +43,7 @@ static struct page *gmap_alloc_crst(void) * * Returns a guest address space structure. */ -static struct gmap *gmap_alloc(unsigned long limit) +struct gmap *gmap_alloc(unsigned long limit) { struct gmap *gmap; struct page *page; @@ -97,6 +97,7 @@ static struct gmap *gmap_alloc(unsigned long limit) out: return NULL; } +EXPORT_SYMBOL_GPL(gmap_alloc); /** * gmap_create - create a guest address space @@ -191,7 +192,7 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) * * No locks required. There are no references to this gmap anymore. */ -static void gmap_free(struct gmap *gmap) +void gmap_free(struct gmap *gmap) { struct page *page, *next; @@ -218,6 +219,7 @@ static void gmap_free(struct gmap *gmap) kfree(gmap); } +EXPORT_SYMBOL_GPL(gmap_free); /** * gmap_get - increase reference counter for guest address space @@ -958,86 +960,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE * @bits: pgste notification bits to set * - * Returns 0 if successfully protected, -ENOMEM if out of memory and - * -EFAULT if gaddr is invalid (or mapping for shadows is missing). + * Returns: + * PAGE_SIZE if a small page was successfully protected; + * HPAGE_SIZE if a large page was successfully protected; + * -ENOMEM if out of memory; + * -EFAULT if gaddr is invalid (or mapping for shadows is missing); + * -EAGAIN if the guest mapping is missing and should be fixed by the caller. * - * Called with sg->mm->mmap_lock in read. + * Context: Called with sg->mm->mmap_lock in read. */ -static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot, unsigned long bits) +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits) { - unsigned long vmaddr, dist; pmd_t *pmdp; - int rc; + int rc = 0; BUG_ON(gmap_is_shadow(gmap)); - while (len) { - rc = -EAGAIN; - pmdp = gmap_pmd_op_walk(gmap, gaddr); - if (pmdp) { - if (!pmd_leaf(*pmdp)) { - rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - len -= PAGE_SIZE; - gaddr += PAGE_SIZE; - } - } else { - rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); - len = len < dist ? 0 : len - dist; - gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; - } - } - gmap_pmd_op_end(gmap, pmdp); - } - if (rc) { - if (rc == -EINVAL) - return rc; - /* -EAGAIN, fixup of userspace mm and gmap */ - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); - if (rc) - return rc; - } - } - return 0; -} + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (!pmdp) + return -EAGAIN; -/** - * gmap_mprotect_notify - change access rights for a range of ptes and - * call the notifier if any pte changes again - * @gmap: pointer to guest mapping meta data structure - * @gaddr: virtual address in the guest address space - * @len: size of area - * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE - * - * Returns 0 if for each page in the given range a gmap mapping exists, - * the new access rights could be set and the notifier could be armed. - * If the gmap mapping is missing for one or more pages -EFAULT is - * returned. If no memory could be allocated -ENOMEM is returned. - * This function establishes missing page table entries. - */ -int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot) -{ - int rc; + if (!pmd_leaf(*pmdp)) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = PAGE_SIZE; + } else { + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = HPAGE_SIZE; + } + gmap_pmd_op_end(gmap, pmdp); - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap)) - return -EINVAL; - if (!MACHINE_HAS_ESOP && prot == PROT_READ) - return -EINVAL; - mmap_read_lock(gmap->mm); - rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); - mmap_read_unlock(gmap->mm); return rc; } -EXPORT_SYMBOL_GPL(gmap_mprotect_notify); +EXPORT_SYMBOL_GPL(gmap_protect_one); /** * gmap_read_table - get an unsigned long value from a guest page table using @@ -1488,7 +1444,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, * * Called with sg->guest_table_lock */ -static void gmap_unshadow(struct gmap *sg) +void gmap_unshadow(struct gmap *sg) { unsigned long *table; @@ -1514,143 +1470,7 @@ static void gmap_unshadow(struct gmap *sg) break; } } - -/** - * gmap_find_shadow - find a specific asce in the list of shadow tables - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * Returns the pointer to a gmap if a shadow table with the given asce is - * already available, ERR_PTR(-EAGAIN) if another one is just being created, - * otherwise NULL - */ -static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg; - - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce != asce || sg->edat_level != edat_level || - sg->removed) - continue; - if (!sg->initialized) - return ERR_PTR(-EAGAIN); - refcount_inc(&sg->ref_count); - return sg; - } - return NULL; -} - -/** - * gmap_shadow_valid - check if a shadow guest address space matches the - * given properties and is still valid - * @sg: pointer to the shadow guest address space structure - * @asce: ASCE for which the shadow table is requested - * @edat_level: edat level to be used for the shadow translation - * - * Returns 1 if the gmap shadow is still valid and matches the given - * properties, the caller can continue using it. Returns 0 otherwise, the - * caller has to request a new shadow gmap in this case. - * - */ -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) -{ - if (sg->removed) - return 0; - return sg->orig_asce == asce && sg->edat_level == edat_level; -} -EXPORT_SYMBOL_GPL(gmap_shadow_valid); - -/** - * gmap_shadow - create/find a shadow guest address space - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * The pages of the top level page table referred by the asce parameter - * will be set to read-only and marked in the PGSTEs of the kvm process. - * The shadow table will be removed automatically on any change to the - * PTE mapping for the source table. - * - * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, - * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the - * parent gmap table could not be protected. - */ -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg, *new; - unsigned long limit; - int rc; - - BUG_ON(parent->mm->context.allow_gmap_hpage_1m); - BUG_ON(gmap_is_shadow(parent)); - spin_lock(&parent->shadow_lock); - sg = gmap_find_shadow(parent, asce, edat_level); - spin_unlock(&parent->shadow_lock); - if (sg) - return sg; - /* Create a new shadow gmap */ - limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); - if (asce & _ASCE_REAL_SPACE) - limit = -1UL; - new = gmap_alloc(limit); - if (!new) - return ERR_PTR(-ENOMEM); - new->mm = parent->mm; - new->parent = gmap_get(parent); - new->private = parent->private; - new->orig_asce = asce; - new->edat_level = edat_level; - new->initialized = false; - spin_lock(&parent->shadow_lock); - /* Recheck if another CPU created the same shadow */ - sg = gmap_find_shadow(parent, asce, edat_level); - if (sg) { - spin_unlock(&parent->shadow_lock); - gmap_free(new); - return sg; - } - if (asce & _ASCE_REAL_SPACE) { - /* only allow one real-space gmap shadow */ - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce & _ASCE_REAL_SPACE) { - spin_lock(&sg->guest_table_lock); - gmap_unshadow(sg); - spin_unlock(&sg->guest_table_lock); - list_del(&sg->list); - gmap_put(sg); - break; - } - } - } - refcount_set(&new->ref_count, 2); - list_add(&new->list, &parent->children); - if (asce & _ASCE_REAL_SPACE) { - /* nothing to protect, return right away */ - new->initialized = true; - spin_unlock(&parent->shadow_lock); - return new; - } - spin_unlock(&parent->shadow_lock); - /* protect after insertion, so it will get properly invalidated */ - mmap_read_lock(parent->mm); - rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, - ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, - PROT_READ, GMAP_NOTIFY_SHADOW); - mmap_read_unlock(parent->mm); - spin_lock(&parent->shadow_lock); - new->initialized = true; - if (rc) { - list_del(&new->list); - gmap_free(new); - new = ERR_PTR(rc); - } - spin_unlock(&parent->shadow_lock); - return new; -} -EXPORT_SYMBOL_GPL(gmap_shadow); +EXPORT_SYMBOL(gmap_unshadow); /** * gmap_shadow_r2t - create an empty shadow region 2 table -- GitLab From 37d1b5d8d588a9761e47d9941005e2da7def8310 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:22 +0100 Subject: [PATCH 0091/1585] KVM: s390: stop using page->index for non-shadow gmaps The host_to_guest radix tree will now map userspace addresses to guest addresses, instead of userspace addresses to segment tables. When segment tables and page tables are needed, they are found using an additional gmap_table_walk(). This gets rid of all usage of page->index for non-shadow gmaps. Reviewed-by: Janosch Frank Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-11-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-11-imbrenda@linux.ibm.com> --- arch/s390/mm/gmap.c | 105 +++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index ae71b401312bf..1f83262a5a552 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -24,8 +24,20 @@ #include #include +/* + * The address is saved in a radix tree directly; NULL would be ambiguous, + * since 0 is a valid address, and NULL is returned when nothing was found. + * The lower bits are ignored by all users of the macro, so it can be used + * to distinguish a valid address 0 from a NULL. + */ +#define VALID_GADDR_FLAG 1 +#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG) +#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG) + #define GMAP_SHADOW_FAKE_TABLE 1ULL +static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); + static struct page *gmap_alloc_crst(void) { struct page *page; @@ -82,7 +94,6 @@ struct gmap *gmap_alloc(unsigned long limit) page = gmap_alloc_crst(); if (!page) goto out_free; - page->index = 0; list_add(&page->lru, &gmap->crst_list); table = page_to_virt(page); crst_table_init(table, etype); @@ -303,7 +314,6 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, list_add(&page->lru, &gmap->crst_list); *table = __pa(new) | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); - page->index = gaddr; page = NULL; } spin_unlock(&gmap->guest_table_lock); @@ -312,21 +322,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, return 0; } -/** - * __gmap_segment_gaddr - find virtual address from segment pointer - * @entry: pointer to a segment table entry in the guest address space - * - * Returns the virtual address in the guest address space for the segment - */ -static unsigned long __gmap_segment_gaddr(unsigned long *entry) +static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr) { - struct page *page; - unsigned long offset; + return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} - offset = (unsigned long) entry / sizeof(unsigned long); - offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - page = pmd_pgtable_page((pmd_t *) entry); - return page->index + offset; +static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr) +{ + return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} + +static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr, + unsigned long *gaddr) +{ + *gaddr = host_to_guest_delete(gmap, vmaddr); + if (IS_GADDR_VALID(*gaddr)) + return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1); + return NULL; } /** @@ -338,16 +350,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry) */ static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) { - unsigned long *entry; + unsigned long gaddr; int flush = 0; + pmd_t *pmdp; BUG_ON(gmap_is_shadow(gmap)); spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (entry) { - flush = (*entry != _SEGMENT_ENTRY_EMPTY); - *entry = _SEGMENT_ENTRY_EMPTY; + + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { + flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY); + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } + spin_unlock(&gmap->guest_table_lock); return flush; } @@ -564,7 +579,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) spin_lock(&gmap->guest_table_lock); if (*table == _SEGMENT_ENTRY_EMPTY) { rc = radix_tree_insert(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT, table); + vmaddr >> PMD_SHIFT, + (void *)MAKE_VALID_GADDR(gaddr)); if (!rc) { if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & @@ -1995,7 +2011,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte, unsigned long bits) { unsigned long offset, gaddr = 0; - unsigned long *table; struct gmap *gmap, *sg, *next; offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); @@ -2003,12 +2018,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - table = radix_tree_lookup(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (table) - gaddr = __gmap_segment_gaddr(table) + offset; + gaddr = host_to_guest_lookup(gmap, vmaddr) + offset; spin_unlock(&gmap->guest_table_lock); - if (!table) + if (!IS_GADDR_VALID(gaddr)) continue; if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { @@ -2068,10 +2080,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); if (pmdp) { - gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | _SEGMENT_ENTRY_GMAP_UC | @@ -2115,28 +2125,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_csp); */ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); else if (MACHINE_HAS_IDTE) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2151,22 +2158,19 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); */ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); @@ -2174,7 +2178,7 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2690,7 +2694,6 @@ int s390_replace_asce(struct gmap *gmap) page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = 0; table = page_to_virt(page); memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); -- GitLab From ef0c8ef8485d9629c6d042cea8f2082f159b467e Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:23 +0100 Subject: [PATCH 0092/1585] KVM: s390: stop using lists to keep track of used dat tables Until now, every dat table allocated to map a guest was put in a linked list. The page->lru field of struct page was used to keep track of which pages were being used, and when the gmap is torn down, the list was walked and all pages freed. This patch gets rid of the usage of page->lru. Page tables are now freed by recursively walking the dat table tree. Since s390_unlist_old_asce() becomes useless now, remove it. Acked-by: Steffen Eiden Reviewed-by: Janosch Frank Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-12-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-12-imbrenda@linux.ibm.com> --- arch/s390/include/asm/gmap.h | 5 -- arch/s390/mm/gmap.c | 102 ++++++++--------------------------- arch/s390/mm/pgalloc.c | 2 - 3 files changed, 23 insertions(+), 86 deletions(-) diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index dbf2329281d20..b489c45896187 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -23,7 +23,6 @@ /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list - * @crst_list: list of all crst tables used in the guest address space * @mm: pointer to the parent mm_struct * @guest_to_host: radix tree with guest to host address translation * @host_to_guest: radix tree with pointer to segment table entries @@ -35,7 +34,6 @@ * @guest_handle: protected virtual machine handle for the ultravisor * @host_to_rmap: radix tree with gmap_rmap lists * @children: list of shadow gmap structures - * @pt_list: list of all page tables used in the shadow guest address space * @shadow_lock: spinlock to protect the shadow gmap list * @parent: pointer to the parent gmap for shadow guest address spaces * @orig_asce: ASCE for which the shadow page table has been created @@ -45,7 +43,6 @@ */ struct gmap { struct list_head list; - struct list_head crst_list; struct mm_struct *mm; struct radix_tree_root guest_to_host; struct radix_tree_root host_to_guest; @@ -61,7 +58,6 @@ struct gmap { /* Additional data for shadow guest address spaces */ struct radix_tree_root host_to_rmap; struct list_head children; - struct list_head pt_list; spinlock_t shadow_lock; struct gmap *parent; unsigned long orig_asce; @@ -141,7 +137,6 @@ int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); int s390_disable_cow_sharing(void); -void s390_unlist_old_asce(struct gmap *gmap); int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 1f83262a5a552..07df1a7b5ebe5 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -82,9 +82,7 @@ struct gmap *gmap_alloc(unsigned long limit) gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); if (!gmap) goto out; - INIT_LIST_HEAD(&gmap->crst_list); INIT_LIST_HEAD(&gmap->children); - INIT_LIST_HEAD(&gmap->pt_list); INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); @@ -94,7 +92,6 @@ struct gmap *gmap_alloc(unsigned long limit) page = gmap_alloc_crst(); if (!page) goto out_free; - list_add(&page->lru, &gmap->crst_list); table = page_to_virt(page); crst_table_init(table, etype); gmap->table = table; @@ -197,6 +194,27 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) } while (nr > 0); } +static void gmap_free_crst(unsigned long *table, bool free_ptes) +{ + bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0; + int i; + + if (is_segment) { + if (!free_ptes) + goto out; + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _SEGMENT_ENTRY_INVALID)) + page_table_free_pgste(page_ptdesc(phys_to_page(table[i]))); + } else { + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _REGION_ENTRY_INVALID)) + gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes); + } + +out: + free_pages((unsigned long)table, CRST_ALLOC_ORDER); +} + /** * gmap_free - free a guest address space * @gmap: pointer to the guest address space structure @@ -205,24 +223,17 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) */ void gmap_free(struct gmap *gmap) { - struct page *page, *next; - /* Flush tlb of all gmaps (if not already done for shadows) */ if (!(gmap_is_shadow(gmap) && gmap->removed)) gmap_flush_tlb(gmap); /* Free all segment & region tables. */ - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, CRST_ALLOC_ORDER); + gmap_free_crst(gmap->table, gmap_is_shadow(gmap)); + gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->host_to_guest); /* Free additional data for a shadow gmap */ if (gmap_is_shadow(gmap)) { - struct ptdesc *ptdesc, *n; - - /* Free all page tables. */ - list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list) - page_table_free_pgste(ptdesc); gmap_rmap_radix_tree_free(&gmap->host_to_rmap); /* Release reference to the parent */ gmap_put(gmap->parent); @@ -311,7 +322,6 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, crst_table_init(new, init); spin_lock(&gmap->guest_table_lock); if (*table & _REGION_ENTRY_INVALID) { - list_add(&page->lru, &gmap->crst_list); *table = __pa(new) | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); page = NULL; @@ -1243,7 +1253,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } @@ -1271,7 +1280,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } } @@ -1301,7 +1309,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1329,7 +1336,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1359,7 +1365,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1387,7 +1392,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1417,7 +1421,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1449,7 +1452,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, r1t[i] = _REGION1_ENTRY_EMPTY; /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1544,7 +1546,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r2t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1628,7 +1629,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r3t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1712,7 +1712,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= sgt & _REGION_ENTRY_PROTECT; - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1833,7 +1832,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, /* mark as invalid as long as the parent table is not protected */ *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; - list_add(&ptdesc->pt_list, &sg->pt_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_SEGMENT_ENTRY_INVALID; @@ -2623,49 +2621,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, } EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); -/** - * s390_unlist_old_asce - Remove the topmost level of page tables from the - * list of page tables of the gmap. - * @gmap: the gmap whose table is to be removed - * - * On s390x, KVM keeps a list of all pages containing the page tables of the - * gmap (the CRST list). This list is used at tear down time to free all - * pages that are now not needed anymore. - * - * This function removes the topmost page of the tree (the one pointed to by - * the ASCE) from the CRST list. - * - * This means that it will not be freed when the VM is torn down, and needs - * to be handled separately by the caller, unless a leak is actually - * intended. Notice that this function will only remove the page from the - * list, the page will still be used as a top level page table (and ASCE). - */ -void s390_unlist_old_asce(struct gmap *gmap) -{ - struct page *old; - - old = virt_to_page(gmap->table); - spin_lock(&gmap->guest_table_lock); - list_del(&old->lru); - /* - * Sometimes the topmost page might need to be "removed" multiple - * times, for example if the VM is rebooted into secure mode several - * times concurrently, or if s390_replace_asce fails after calling - * s390_remove_old_asce and is attempted again later. In that case - * the old asce has been removed from the list, and therefore it - * will not be freed when the VM terminates, but the ASCE is still - * in use and still pointed to. - * A subsequent call to replace_asce will follow the pointer and try - * to remove the same page from the list again. - * Therefore it's necessary that the page of the ASCE has valid - * pointers, so list_del can work (and do nothing) without - * dereferencing stale or invalid pointers. - */ - INIT_LIST_HEAD(&old->lru); - spin_unlock(&gmap->guest_table_lock); -} -EXPORT_SYMBOL_GPL(s390_unlist_old_asce); - /** * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy * @gmap: the gmap whose ASCE needs to be replaced @@ -2685,8 +2640,6 @@ int s390_replace_asce(struct gmap *gmap) struct page *page; void *table; - s390_unlist_old_asce(gmap); - /* Replacing segment type ASCEs would cause serious issues */ if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) return -EINVAL; @@ -2697,15 +2650,6 @@ int s390_replace_asce(struct gmap *gmap) table = page_to_virt(page); memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); - /* - * The caller has to deal with the old ASCE, but here we make sure - * the new one is properly added to the CRST list, so that - * it will be freed when the VM is torn down. - */ - spin_lock(&gmap->guest_table_lock); - list_add(&page->lru, &gmap->crst_list); - spin_unlock(&gmap->guest_table_lock); - /* Set new table origin while preserving existing ASCE control bits */ asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); WRITE_ONCE(gmap->asce, asce); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index cd2fef79ad2c7..30387a6e98ffd 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -176,8 +176,6 @@ unsigned long *page_table_alloc(struct mm_struct *mm) } table = ptdesc_to_virt(ptdesc); __arch_set_page_dat(table, 1); - /* pt_list is used by gmap only */ - INIT_LIST_HEAD(&ptdesc->pt_list); memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); return table; -- GitLab From 43656f774a4b4a2841035947e89dcde8ee136caa Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:24 +0100 Subject: [PATCH 0093/1585] KVM: s390: move gmap_shadow_pgt_lookup() into kvm Move gmap_shadow_pgt_lookup() from mm/gmap.c into kvm/gaccess.c . Reviewed-by: Steffen Eiden Reviewed-by: Janosch Frank Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-13-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-13-imbrenda@linux.ibm.com> --- arch/s390/include/asm/gmap.h | 3 +-- arch/s390/kvm/gaccess.c | 42 +++++++++++++++++++++++++++++++- arch/s390/kvm/gmap.h | 2 ++ arch/s390/mm/gmap.c | 46 ++---------------------------------- 4 files changed, 46 insertions(+), 47 deletions(-) diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index b489c45896187..4e73ef46d4b2a 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -125,8 +125,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake); int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake); -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, int *fake); int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte); void gmap_register_pte_notifier(struct gmap_notifier *); @@ -142,6 +140,7 @@ void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool interruptible); int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split); +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); /** * s390_uv_destroy_range - Destroy a range of pages in the given mm. diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9816b0060fbe5..bb1340389369c 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -16,6 +16,7 @@ #include #include #include "kvm-s390.h" +#include "gmap.h" #include "gaccess.h" /* @@ -1392,6 +1393,42 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, return 0; } +/** + * shadow_pgt_lookup() - find a shadow page table + * @sg: pointer to the shadow guest address space structure + * @saddr: the address in the shadow aguest address space + * @pgt: parent gmap address of the page table to get shadowed + * @dat_protection: if the pgtable is marked as protected by dat + * @fake: pgt references contiguous guest memory block, not a pgtable + * + * Returns 0 if the shadow page table was found and -EAGAIN if the page + * table was not found. + * + * Called with sg->mm->mmap_lock in read. + */ +static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, + int *dat_protection, int *fake) +{ + unsigned long *table; + struct page *page; + int rc; + + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ + if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { + /* Shadow page tables are full pages (pte+pgste) */ + page = pfn_to_page(*table >> PAGE_SHIFT); + *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; + *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); + *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); + rc = 0; + } else { + rc = -EAGAIN; + } + spin_unlock(&sg->guest_table_lock); + return rc; +} + /** * kvm_s390_shadow_fault - handle fault on a shadow page table * @vcpu: virtual cpu @@ -1415,6 +1452,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, int dat_protection, fake; int rc; + if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm)) + return -EFAULT; + mmap_read_lock(sg->mm); /* * We don't want any guest-2 tables to change - so the parent @@ -1423,7 +1463,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, */ ipte_lock(vcpu->kvm); - rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); + rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); if (rc) rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, &fake); diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index 978f541059f02..c8f031c9ea5f2 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -10,6 +10,8 @@ #ifndef ARCH_KVM_S390_GMAP_H #define ARCH_KVM_S390_GMAP_H +#define GMAP_SHADOW_FAKE_TABLE 1ULL + int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 07df1a7b5ebe5..918ea14515a1e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -36,8 +36,6 @@ #define GMAP_SHADOW_FAKE_TABLE 1ULL -static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); - static struct page *gmap_alloc_crst(void) { struct page *page; @@ -738,8 +736,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, * * Note: Can also be called for shadow gmaps. */ -static inline unsigned long *gmap_table_walk(struct gmap *gmap, - unsigned long gaddr, int level) +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { const int asce_type = gmap->asce & _ASCE_TYPE_MASK; unsigned long *table = gmap->table; @@ -790,6 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, } return table; } +EXPORT_SYMBOL(gmap_table_walk); /** * gmap_pte_op_walk - walk the gmap page table, get the page table lock @@ -1744,46 +1742,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, } EXPORT_SYMBOL_GPL(gmap_shadow_sgt); -/** - * gmap_shadow_pgt_lookup - find a shadow page table - * @sg: pointer to the shadow guest address space structure - * @saddr: the address in the shadow aguest address space - * @pgt: parent gmap address of the page table to get shadowed - * @dat_protection: if the pgtable is marked as protected by dat - * @fake: pgt references contiguous guest memory block, not a pgtable - * - * Returns 0 if the shadow page table was found and -EAGAIN if the page - * table was not found. - * - * Called with sg->mm->mmap_lock in read. - */ -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, - int *fake) -{ - unsigned long *table; - struct page *page; - int rc; - - BUG_ON(!gmap_is_shadow(sg)); - spin_lock(&sg->guest_table_lock); - table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ - if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { - /* Shadow page tables are full pages (pte+pgste) */ - page = pfn_to_page(*table >> PAGE_SHIFT); - *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; - *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); - *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); - rc = 0; - } else { - rc = -EAGAIN; - } - spin_unlock(&sg->guest_table_lock); - return rc; - -} -EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); - /** * gmap_shadow_pgt - instantiate a shadow page table * @sg: pointer to the shadow guest address space structure -- GitLab From c27e002626b9fbd2729fa00ddda789319648e7ba Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:25 +0100 Subject: [PATCH 0094/1585] KVM: s390: remove useless page->index usage The page->index field for VSIE dat tables is only used for segment tables. Stop setting the field for all region tables. Reviewed-by: Janosch Frank Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-14-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-14-imbrenda@linux.ibm.com> --- arch/s390/mm/gmap.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 918ea14515a1e..38f0443217040 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1520,9 +1520,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r2t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r2t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1603,9 +1600,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r3t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r3t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1686,9 +1680,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = sgt & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_sgt = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); -- GitLab From 1f4389931e9fea7e8b3c1f189d505b040b25be8a Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:26 +0100 Subject: [PATCH 0095/1585] KVM: s390: move PGSTE softbits Move the softbits in the PGSTEs to the other usable area. This leaves the 16-bit block of usable bits free, which will be used in the next patch for something else. Reviewed-by: Steffen Eiden Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-15-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-15-imbrenda@linux.ibm.com> --- arch/s390/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a3b51056a1778..a96bde2e5f18d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -420,9 +420,9 @@ void setup_protection_map(void); #define PGSTE_HC_BIT 0x0020000000000000UL #define PGSTE_GR_BIT 0x0004000000000000UL #define PGSTE_GC_BIT 0x0002000000000000UL -#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ -#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ +#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */ +#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */ /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL -- GitLab From 84b7387692a8c849bd8bddd0f5c5474d4923aa6e Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 23 Jan 2025 15:46:27 +0100 Subject: [PATCH 0096/1585] KVM: s390: remove the last user of page->index Shadow page tables use page->index to keep the g2 address of the guest page table being shadowed. Instead of keeping the information in page->index, split the address and smear it over the 16-bit softbits areas of 4 PGSTEs. This removes the last s390 user of page->index. Reviewed-by: Steffen Eiden Reviewed-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250123144627.312456-16-imbrenda@linux.ibm.com Signed-off-by: Claudio Imbrenda Message-ID: <20250123144627.312456-16-imbrenda@linux.ibm.com> --- arch/s390/include/asm/pgtable.h | 15 +++++++++++++++ arch/s390/kvm/gaccess.c | 6 ++++-- arch/s390/mm/gmap.c | 22 ++++++++++++++++++++-- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a96bde2e5f18d..3ca5af4cfe432 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -420,6 +420,7 @@ void setup_protection_map(void); #define PGSTE_HC_BIT 0x0020000000000000UL #define PGSTE_GR_BIT 0x0004000000000000UL #define PGSTE_GC_BIT 0x0002000000000000UL +#define PGSTE_ST2_MASK 0x0000ffff00000000UL #define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */ #define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */ #define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */ @@ -2007,4 +2008,18 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define pmd_pgtable(pmd) \ ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) +static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt) +{ + unsigned long *pgstes, res; + + pgstes = pgt + _PAGE_ENTRIES; + + res = (pgstes[0] & PGSTE_ST2_MASK) << 16; + res |= pgstes[1] & PGSTE_ST2_MASK; + res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16; + res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32; + + return res; +} + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index bb1340389369c..f6fded15633ad 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1409,6 +1409,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, int *dat_protection, int *fake) { + unsigned long pt_index; unsigned long *table; struct page *page; int rc; @@ -1418,9 +1419,10 @@ static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { /* Shadow page tables are full pages (pte+pgste) */ page = pfn_to_page(*table >> PAGE_SHIFT); - *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; + pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page)); + *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE; *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); - *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); + *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE); rc = 0; } else { rc = -EAGAIN; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 38f0443217040..94d9277858009 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1733,6 +1733,23 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, } EXPORT_SYMBOL_GPL(gmap_shadow_sgt); +static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr) +{ + unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc)); + + pgstes += _PAGE_ENTRIES; + + pgstes[0] &= ~PGSTE_ST2_MASK; + pgstes[1] &= ~PGSTE_ST2_MASK; + pgstes[2] &= ~PGSTE_ST2_MASK; + pgstes[3] &= ~PGSTE_ST2_MASK; + + pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK; + pgstes[1] |= pgt_addr & PGSTE_ST2_MASK; + pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK; + pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK; +} + /** * gmap_shadow_pgt - instantiate a shadow page table * @sg: pointer to the shadow guest address space structure @@ -1760,9 +1777,10 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, ptdesc = page_table_alloc_pgste(sg->mm); if (!ptdesc) return -ENOMEM; - ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN; + origin = pgt & _SEGMENT_ENTRY_ORIGIN; if (fake) - ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE; + origin |= GMAP_SHADOW_FAKE_TABLE; + gmap_pgste_set_pgt_addr(ptdesc, origin); s_pgt = page_to_phys(ptdesc_page(ptdesc)); /* Install shadow page table */ spin_lock(&sg->guest_table_lock); -- GitLab From 32239066776a27287837a193b37c6e55259e5c10 Mon Sep 17 00:00:00 2001 From: Christoph Schlameuss Date: Tue, 28 Jan 2025 14:18:03 +0100 Subject: [PATCH 0097/1585] KVM: s390: selftests: Streamline uc_skey test to issue iske after sske In some rare situations a non default storage key is already set on the memory used by the test. Within normal VMs the key is reset / zapped when the memory is added to the VM. This is not the case for ucontrol VMs. With the initial iske check removed this test case can work in all situations. The function of the iske instruction is still validated by the remaining code. Fixes: 0185fbc6a2d3 ("KVM: s390: selftests: Add uc_skey VM test case") Signed-off-by: Christoph Schlameuss Link: https://lore.kernel.org/r/20250128131803.1047388-1-schlameuss@linux.ibm.com Message-ID: <20250128131803.1047388-1-schlameuss@linux.ibm.com> Signed-off-by: Claudio Imbrenda --- .../selftests/kvm/s390/ucontrol_test.c | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index 22ce9219620ce..d265b34c54be8 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -88,10 +88,6 @@ asm("test_skey_asm:\n" " ahi %r0,1\n" " st %r1,0(%r5,%r6)\n" - " iske %r1,%r6\n" - " ahi %r0,1\n" - " diag 0,0,0x44\n" - " sske %r1,%r6\n" " xgr %r1,%r1\n" " iske %r1,%r6\n" @@ -600,7 +596,9 @@ TEST_F(uc_kvm, uc_skey) ASSERT_EQ(true, uc_handle_exit(self)); ASSERT_EQ(1, sync_regs->gprs[0]); - /* ISKE */ + /* SSKE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; ASSERT_EQ(0, uc_run_once(self)); /* @@ -612,21 +610,11 @@ TEST_F(uc_kvm, uc_skey) TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); - TEST_REQUIRE(sie_block->ipa != 0xb229); + TEST_REQUIRE(sie_block->ipa != 0xb22b); - /* ISKE contd. */ + /* SSKE + ISKE contd. */ ASSERT_EQ(false, uc_handle_exit(self)); ASSERT_EQ(2, sync_regs->gprs[0]); - /* assert initial skey (ACC = 0, R & C = 1) */ - ASSERT_EQ(0x06, sync_regs->gprs[1]); - uc_assert_diag44(self); - - /* SSKE + ISKE */ - sync_regs->gprs[1] = skeyvalue; - run->kvm_dirty_regs |= KVM_SYNC_GPRS; - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(false, uc_handle_exit(self)); - ASSERT_EQ(3, sync_regs->gprs[0]); ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); uc_assert_diag44(self); @@ -635,7 +623,7 @@ TEST_F(uc_kvm, uc_skey) run->kvm_dirty_regs |= KVM_SYNC_GPRS; ASSERT_EQ(0, uc_run_once(self)); ASSERT_EQ(false, uc_handle_exit(self)); - ASSERT_EQ(4, sync_regs->gprs[0]); + ASSERT_EQ(3, sync_regs->gprs[0]); /* assert R reset but rest of skey unchanged */ ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); -- GitLab From 6daaae5ff7f3b23a2dacc9c387ff3d4f95b67cad Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 29 Jan 2025 10:51:48 +0100 Subject: [PATCH 0098/1585] gpu: drm_dp_cec: fix broken CEC adapter properties check If the hotplug detect of a display is low for longer than one second (configurable through drm_dp_cec_unregister_delay), then the CEC adapter is unregistered since we assume the display was disconnected. If the HPD went low for less than one second, then we check if the properties of the CEC adapter have changed, since that indicates that we actually switch to new hardware and we have to unregister the old CEC device and register a new one. Unfortunately, the test for changed properties was written poorly, and after a new CEC capability was added to the CEC core code the test always returned true (i.e. the properties had changed). As a result the CEC device was unregistered and re-registered for every HPD toggle. If the CEC remote controller integration was also enabled (CONFIG_MEDIA_CEC_RC was set), then the corresponding input device was also unregistered and re-registered. As a result the input device in /sys would keep incrementing its number, e.g.: /sys/devices/pci0000:00/0000:00:08.1/0000:e7:00.0/rc/rc0/input20 Since short HPD toggles are common, the number could over time get into the thousands. While not a serious issue (i.e. nothing crashes), it is not intended to work that way. This patch changes the test so that it only checks for the single CEC capability that can actually change, and it ignores any other capabilities, so this is now safe as well if new caps are added in the future. With the changed test the bit under #ifndef CONFIG_MEDIA_CEC_RC can be dropped as well, so that's a nice cleanup. Signed-off-by: Hans Verkuil Reported-by: Farblos Reviewed-by: Dmitry Baryshkov Fixes: 2c6d1fffa1d9 ("drm: add support for DisplayPort CEC-Tunneling-over-AUX") Tested-by: Farblos Link: https://patchwork.freedesktop.org/patch/msgid/361bb03d-1691-4e23-84da-0861ead5dbdc@xs4all.nl Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/display/drm_dp_cec.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c index 007ceb281d00d..56a4965e518cc 100644 --- a/drivers/gpu/drm/display/drm_dp_cec.c +++ b/drivers/gpu/drm/display/drm_dp_cec.c @@ -311,16 +311,6 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address) if (!aux->transfer) return; -#ifndef CONFIG_MEDIA_CEC_RC - /* - * CEC_CAP_RC is part of CEC_CAP_DEFAULTS, but it is stripped by - * cec_allocate_adapter() if CONFIG_MEDIA_CEC_RC is undefined. - * - * Do this here as well to ensure the tests against cec_caps are - * correct. - */ - cec_caps &= ~CEC_CAP_RC; -#endif cancel_delayed_work_sync(&aux->cec.unregister_work); mutex_lock(&aux->cec.lock); @@ -337,7 +327,9 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address) num_las = CEC_MAX_LOG_ADDRS; if (aux->cec.adap) { - if (aux->cec.adap->capabilities == cec_caps && + /* Check if the adapter properties have changed */ + if ((aux->cec.adap->capabilities & CEC_CAP_MONITOR_ALL) == + (cec_caps & CEC_CAP_MONITOR_ALL) && aux->cec.adap->available_log_addrs == num_las) { /* Unchanged, so just set the phys addr */ cec_s_phys_addr(aux->cec.adap, source_physical_address, false); -- GitLab From 743bbd93cf29f653fae0e1416a31f03231689911 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 23 Jan 2025 16:01:16 +0100 Subject: [PATCH 0099/1585] ice: put Rx buffers after being done with current frame Introduce a new helper ice_put_rx_mbuf() that will go through gathered frags from current frame and will call ice_put_rx_buf() on them. Current logic that was supposed to simplify and optimize the driver where we go through a batch of all buffers processed in current NAPI instance turned out to be broken for jumbo frames and very heavy load that was coming from both multi-thread iperf and nginx/wrk pair between server and client. The delay introduced by approach that we are dropping is simply too big and we need to take the decision regarding page recycling/releasing as quick as we can. While at it, address an error path of ice_add_xdp_frag() - we were missing buffer putting from day 1 there. As a nice side effect we get rid of annoying and repetitive three-liner: xdp->data = NULL; rx_ring->first_desc = ntc; rx_ring->nr_frags = 0; by embedding it within introduced routine. Fixes: 1dc1a7e7f410 ("ice: Centrallize Rx buffer recycling") Reported-and-tested-by: Xu Du Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Co-developed-by: Jacob Keller Signed-off-by: Jacob Keller Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 79 ++++++++++++++--------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 5d2d7736fd5f1..e173d9c989883 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1103,6 +1103,49 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_buf->page = NULL; } +/** + * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags + * @rx_ring: Rx ring with all the auxiliary data + * @xdp: XDP buffer carrying linear + frags part + * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage + * @ntc: a current next_to_clean value to be stored at rx_ring + * + * Walk through gathered fragments and satisfy internal page + * recycle mechanism; we take here an action related to verdict + * returned by XDP program; + */ +static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + u32 *xdp_xmit, u32 ntc) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + u32 cnt = rx_ring->count; + struct ice_rx_buf *buf; + int i; + + for (i = 0; i < nr_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + + if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + *xdp_xmit |= buf->act; + } else if (buf->act & ICE_XDP_CONSUMED) { + buf->pagecnt_bias++; + } else if (buf->act == ICE_XDP_PASS) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + } + + ice_put_rx_buf(rx_ring, buf); + + if (++idx == cnt) + idx = 0; + } + + xdp->data = NULL; + rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; +} + /** * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: Rx descriptor ring to transact packets on @@ -1120,7 +1163,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_pkts = 0; unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; - u32 cached_ntc = rx_ring->first_desc; struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; @@ -1128,7 +1170,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) u32 xdp_xmit = 0; u32 cached_ntu; bool failure; - u32 first; xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { @@ -1190,6 +1231,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { + ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc); break; } if (++ntc == cnt) @@ -1205,9 +1247,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc); + continue; construct_skb: if (likely(ice_ring_uses_build_skb(rx_ring))) @@ -1221,14 +1262,11 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (unlikely(xdp_buff_has_frags(xdp))) ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; - break; } - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc); + + if (!skb) + break; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, @@ -1257,23 +1295,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) total_rx_pkts++; } - first = rx_ring->first_desc; - while (cached_ntc != first) { - struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc]; - - if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - xdp_xmit |= buf->act; - } else if (buf->act & ICE_XDP_CONSUMED) { - buf->pagecnt_bias++; - } else if (buf->act == ICE_XDP_PASS) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - } - - ice_put_rx_buf(rx_ring, buf); - if (++cached_ntc >= cnt) - cached_ntc = 0; - } rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); -- GitLab From 11c4aa074d547d825b19cd8d9f288254d89d805c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 23 Jan 2025 16:01:17 +0100 Subject: [PATCH 0100/1585] ice: gather page_count()'s of each frag right before XDP prog call If we store the pgcnt on few fragments while being in the middle of gathering the whole frame and we stumbled upon DD bit not being set, we terminate the NAPI Rx processing loop and come back later on. Then on next NAPI execution we work on previously stored pgcnt. Imagine that second half of page was used actively by networking stack and by the time we came back, stack is not busy with this page anymore and decremented the refcnt. The page reuse algorithm in this case should be good to reuse the page but given the old refcnt it will not do so and attempt to release the page via page_frag_cache_drain() with pagecnt_bias used as an arg. This in turn will result in negative refcnt on struct page, which was initially observed by Xu Du. Therefore, move the page count storage from ice_get_rx_buf() to a place where we are sure that whole frame has been collected, but before calling XDP program as it internally can also change the page count of fragments belonging to xdp_buff. Fixes: ac0753391195 ("ice: Store page count inside ice_rx_buf") Reported-and-tested-by: Xu Du Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Co-developed-by: Jacob Keller Signed-off-by: Jacob Keller Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 27 ++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index e173d9c989883..cf46bcf143b4b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -924,7 +924,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) @@ -940,6 +939,31 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, return rx_buf; } +/** + * ice_get_pgcnts - grab page_count() for gathered fragments + * @rx_ring: Rx descriptor ring to store the page counts on + * + * This function is intended to be called right before running XDP + * program so that the page recycling mechanism will be able to take + * a correct decision regarding underlying pages; this is done in such + * way as XDP program can change the refcount of page + */ +static void ice_get_pgcnts(struct ice_rx_ring *rx_ring) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + struct ice_rx_buf *rx_buf; + u32 cnt = rx_ring->count; + + for (int i = 0; i < nr_frags; i++) { + rx_buf = &rx_ring->rx_buf[idx]; + rx_buf->pgcnt = page_count(rx_buf->page); + + if (++idx == cnt) + idx = 0; + } +} + /** * ice_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on @@ -1241,6 +1265,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; + ice_get_pgcnts(rx_ring); ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); if (rx_buf->act == ICE_XDP_PASS) goto construct_skb; -- GitLab From 468a1952df78f65c5991b7ac885c8b5b7dd87bab Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 23 Jan 2025 16:01:18 +0100 Subject: [PATCH 0101/1585] ice: stop storing XDP verdict within ice_rx_buf Idea behind having ice_rx_buf::act was to simplify and speed up the Rx data path by walking through buffers that were representing cleaned HW Rx descriptors. Since it caused us a major headache recently and we rolled back to old approach that 'puts' Rx buffers right after running XDP prog/creating skb, this is useless now and should be removed. Get rid of ice_rx_buf::act and related logic. We still need to take care of a corner case where XDP program releases a particular fragment. Make ice_run_xdp() to return its result and use it within ice_put_rx_mbuf(). Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 62 +++++++++++-------- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 - drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 43 ------------- 3 files changed, 36 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index cf46bcf143b4b..9c9ea4c1b93b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -527,15 +527,14 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action - * @rx_buf: Rx buffer to store the XDP action * @eop_desc: Last descriptor in packet to read metadata from * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ -static void +static u32 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, - struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) + union ice_32b_rx_flex_desc *eop_desc) { unsigned int ret = ICE_XDP_PASS; u32 act; @@ -574,7 +573,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, ret = ICE_XDP_CONSUMED; } exit: - ice_set_rx_bufs_act(xdp, rx_ring, ret); + return ret; } /** @@ -860,10 +859,8 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, xdp_buff_set_frags_flag(xdp); } - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); + if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) return -ENOMEM; - } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); @@ -1075,12 +1072,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) rx_buf->page_offset + headlen, size, xdp->frame_sz); } else { - /* buffer is unused, change the act that should be taken later - * on; data was copied onto skb's linear part so there's no + /* buffer is unused, restore biased page count in Rx buffer; + * data was copied onto skb's linear part so there's no * need for adjusting page offset and we can reuse this buffer * as-is */ - rx_buf->act = ICE_SKB_CONSUMED; + rx_buf->pagecnt_bias++; } if (unlikely(xdp_buff_has_frags(xdp))) { @@ -1133,29 +1130,34 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) * @xdp: XDP buffer carrying linear + frags part * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage * @ntc: a current next_to_clean value to be stored at rx_ring + * @verdict: return code from XDP program execution * * Walk through gathered fragments and satisfy internal page * recycle mechanism; we take here an action related to verdict * returned by XDP program; */ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - u32 *xdp_xmit, u32 ntc) + u32 *xdp_xmit, u32 ntc, u32 verdict) { u32 nr_frags = rx_ring->nr_frags + 1; u32 idx = rx_ring->first_desc; u32 cnt = rx_ring->count; + u32 post_xdp_frags = 1; struct ice_rx_buf *buf; int i; - for (i = 0; i < nr_frags; i++) { + if (unlikely(xdp_buff_has_frags(xdp))) + post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags; + + for (i = 0; i < post_xdp_frags; i++) { buf = &rx_ring->rx_buf[idx]; - if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { + if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) { ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - *xdp_xmit |= buf->act; - } else if (buf->act & ICE_XDP_CONSUMED) { + *xdp_xmit |= verdict; + } else if (verdict & ICE_XDP_CONSUMED) { buf->pagecnt_bias++; - } else if (buf->act == ICE_XDP_PASS) { + } else if (verdict == ICE_XDP_PASS) { ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); } @@ -1164,6 +1166,17 @@ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, if (++idx == cnt) idx = 0; } + /* handle buffers that represented frags released by XDP prog; + * for these we keep pagecnt_bias as-is; refcount from struct page + * has been decremented within XDP prog and we do not have to increase + * the biased refcnt + */ + for (; i < nr_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + ice_put_rx_buf(rx_ring, buf); + if (++idx == cnt) + idx = 0; + } xdp->data = NULL; rx_ring->first_desc = ntc; @@ -1190,9 +1203,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; + u32 cached_ntu, xdp_verdict; u32 cnt = rx_ring->count; u32 xdp_xmit = 0; - u32 cached_ntu; bool failure; xdp_prog = READ_ONCE(rx_ring->xdp_prog); @@ -1255,7 +1268,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { - ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc); + ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED); break; } if (++ntc == cnt) @@ -1266,13 +1279,13 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) continue; ice_get_pgcnts(rx_ring); - ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); - if (rx_buf->act == ICE_XDP_PASS) + xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); + if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc); + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); continue; construct_skb: @@ -1283,12 +1296,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->ring_stats->rx_stats.alloc_page_failed++; - rx_buf->act = ICE_XDP_CONSUMED; - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, - ICE_XDP_CONSUMED); + xdp_verdict = ICE_XDP_CONSUMED; } - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc); + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); if (!skb) break; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index cb347c852ba9e..806bce701df34 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -201,7 +201,6 @@ struct ice_rx_buf { struct page *page; unsigned int page_offset; unsigned int pgcnt; - unsigned int act; unsigned int pagecnt_bias; }; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 79f960c6680d1..6cf32b4041275 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -5,49 +5,6 @@ #define _ICE_TXRX_LIB_H_ #include "ice.h" -/** - * ice_set_rx_bufs_act - propagate Rx buffer action to frags - * @xdp: XDP buffer representing frame (linear and frags part) - * @rx_ring: Rx ring struct - * act: action to store onto Rx buffers related to XDP buffer parts - * - * Set action that should be taken before putting Rx buffer from first frag - * to the last. - */ -static inline void -ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, - const unsigned int act) -{ - u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - u32 nr_frags = rx_ring->nr_frags + 1; - u32 idx = rx_ring->first_desc; - u32 cnt = rx_ring->count; - struct ice_rx_buf *buf; - - for (int i = 0; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[idx]; - buf->act = act; - - if (++idx == cnt) - idx = 0; - } - - /* adjust pagecnt_bias on frags freed by XDP prog */ - if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) { - u32 delta = rx_ring->nr_frags - sinfo_frags; - - while (delta) { - if (idx == 0) - idx = cnt - 1; - else - idx--; - buf = &rx_ring->rx_buf[idx]; - buf->pagecnt_bias--; - delta--; - } - } -} - /** * ice_test_staterr - tests bits in Rx descriptor status and error fields * @status_err_n: Rx descriptor status_error0 or status_error1 bits -- GitLab From 2d1a2dab95cdc6f2e0c6af3c0514b0bea94af482 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 28 Jan 2025 07:22:31 -0800 Subject: [PATCH 0102/1585] nvme: make nvme_tls_attrs_group static To suppress the compiler "warning: symbol 'nvme_tls_attrs_group' was not declared. Should it be static?" Fixes: 1e48b34c9bc79a ("nvme: split off TLS sysfs attributes into a separate group") Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index b68a9e5f1ea39..3a41b9ab0f13c 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -792,7 +792,7 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, return a->mode; } -const struct attribute_group nvme_tls_attrs_group = { +static const struct attribute_group nvme_tls_attrs_group = { .attrs = nvme_tls_attrs, .is_visible = nvme_tls_attrs_are_visible, }; -- GitLab From c8ed6cb5d37bc09c7e25e49a670e9fd1a3bd1dfa Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 28 Jan 2025 17:34:47 +0100 Subject: [PATCH 0103/1585] nvme-fc: use ctrl state getter Do not access the state variable directly, instead use proper synchronization so not stale data is read. Fixes: e6e7f7ac03e4 ("nvme: ensure reset state check ordering") Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 55884d3df6f29..f4f1866fbd5b8 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2087,7 +2087,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) nvme_fc_complete_rq(rq); check_error: - if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING) + if (terminate_assoc && + nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_RESETTING) queue_work(nvme_reset_wq, &ctrl->ioerr_work); } @@ -2541,6 +2542,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + /* * if an error (io timeout, etc) while (re)connecting, the remote * port requested terminating of the association (disconnect_ls) @@ -2548,7 +2551,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) * the controller. Abort any ios on the association and let the * create_association error path resolve things. */ - if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { + if (state == NVME_CTRL_CONNECTING) { __nvme_fc_abort_outstanding_ios(ctrl, true); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", @@ -2557,7 +2560,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) } /* Otherwise, only proceed if in LIVE state - e.g. on first error */ - if (ctrl->ctrl.state != NVME_CTRL_LIVE) + if (state != NVME_CTRL_LIVE) return; dev_warn(ctrl->ctrl.device, -- GitLab From a572593ac80e51eb69ecede7e614289fcccdbf8d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 29 Jan 2025 14:56:35 -0800 Subject: [PATCH 0104/1585] md: Fix linear_set_limits() queue_limits_cancel_update() must only be called if queue_limits_start_update() is called first. Remove the queue_limits_cancel_update() call from linear_set_limits() because there is no corresponding queue_limits_start_update() call. This bug was discovered by annotating all mutex operations with clang thread-safety attributes and by building the kernel with clang and -Wthread-safety. Cc: Yu Kuai Cc: Coly Li Cc: Mike Snitzer Cc: Christoph Hellwig Fixes: 127186cfb184 ("md: reintroduce md-linear") Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250129225636.2667932-1-bvanassche@acm.org Signed-off-by: Song Liu --- drivers/md/md-linear.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index a382929ce7bab..369aed044b409 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -76,10 +76,8 @@ static int linear_set_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.io_min = mddev->chunk_sectors << 9; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); - if (err) { - queue_limits_cancel_update(mddev->gendisk->queue); + if (err) return err; - } return queue_limits_set(mddev->gendisk->queue, &lim); } -- GitLab From 0f1a6c5c9784eff7e31e4915e17285fb89ad3644 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 31 Jan 2025 22:29:24 +0000 Subject: [PATCH 0105/1585] KVM: arm64: Flush/sync debug state in protected mode The recent changes to debug state management broke self-hosted debug for guests when running in protected mode, since both the debug owner and the debug state itself aren't shared with the hyp's view of the vcpu. Fix it by flushing/syncing the relevant bits with the hyp vcpu. Fixes: beb470d96cec ("KVM: arm64: Use debug_owner to track if debug regs need save/restore") Reported-by: Mark Brown Closes: https://lore.kernel.org/kvmarm/5f62740f-a065-42d9-9f56-8fb648b9c63f@sirena.org.uk/ Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20250131222922.1548780-3-oliver.upton@linux.dev Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 5c134520e1805..6e12c070832f7 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -91,11 +91,34 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; } +static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state; +} + +static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state; +} + static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) { struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; fpsimd_sve_flush(); + flush_debug_state(hyp_vcpu); hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; @@ -123,6 +146,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) unsigned int i; fpsimd_sve_sync(&hyp_vcpu->vcpu); + sync_debug_state(hyp_vcpu); host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt; -- GitLab From 46ded709232344b5750a852747a8881763c721ab Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 29 Jan 2025 15:13:42 -0800 Subject: [PATCH 0106/1585] net: bcmgenet: Correct overlaying of PHY and MAC Wake-on-LAN Some Wake-on-LAN modes such as WAKE_FILTER may only be supported by the MAC, while others might be only supported by the PHY. Make sure that the .get_wol() returns the union of both rather than only that of the PHY if the PHY supports Wake-on-LAN. When disabling Wake-on-LAN, make sure that this is done at both the PHY and MAC level, rather than doing an early return from the PHY driver. Fixes: 7e400ff35cbe ("net: bcmgenet: Add support for PHY-based Wake-on-LAN") Fixes: 9ee09edc05f2 ("net: bcmgenet: Properly overlay PHY and MAC Wake-on-LAN capabilities") Signed-off-by: Florian Fainelli Link: https://patch.msgid.link/20250129231342.35013-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/genet/bcmgenet_wol.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 0715ea5bf13ed..3b082114f2e53 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -41,9 +41,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; + u32 phy_wolopts = 0; - if (dev->phydev) + if (dev->phydev) { phy_ethtool_get_wol(dev->phydev, wol); + phy_wolopts = wol->wolopts; + } /* MAC is not wake-up capable, return what the PHY does */ if (!device_can_wakeup(kdev)) @@ -51,9 +54,14 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Overlay MAC capabilities with that of the PHY queried before */ wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; - wol->wolopts = priv->wolopts; - memset(wol->sopass, 0, sizeof(wol->sopass)); + wol->wolopts |= priv->wolopts; + /* Return the PHY configured magic password */ + if (phy_wolopts & WAKE_MAGICSECURE) + return; + + /* Otherwise the MAC one */ + memset(wol->sopass, 0, sizeof(wol->sopass)); if (wol->wolopts & WAKE_MAGICSECURE) memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass)); } @@ -70,7 +78,7 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Try Wake-on-LAN from the PHY first */ if (dev->phydev) { ret = phy_ethtool_set_wol(dev->phydev, wol); - if (ret != -EOPNOTSUPP) + if (ret != -EOPNOTSUPP && wol->wolopts) return ret; } -- GitLab From c71a192976ded2f2f416d03c4f595cdd4478b825 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Jan 2025 19:15:18 -0800 Subject: [PATCH 0107/1585] net: ipv6: fix dst refleaks in rpl, seg6 and ioam6 lwtunnels dst_cache_get() gives us a reference, we need to release it. Discovered by the ioam6.sh test, kmemleak was recently fixed to catch per-cpu memory leaks. Fixes: 985ec6f5e623 ("net: ipv6: rpl_iptunnel: mitigate 2-realloc issue") Fixes: 40475b63761a ("net: ipv6: seg6_iptunnel: mitigate 2-realloc issue") Fixes: dce525185bc9 ("net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue") Reviewed-by: Justin Iurman Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250130031519.2716843-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/ioam6_iptunnel.c | 5 +++-- net/ipv6/rpl_iptunnel.c | 6 ++++-- net/ipv6/seg6_iptunnel.c | 6 ++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 28e5a89dc2557..3936c137a5727 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -336,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb), *cache_dst; + struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; @@ -407,7 +407,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) cache_dst = ip6_route_output(net, NULL, &fl6); if (cache_dst->error) { err = cache_dst->error; - dst_release(cache_dst); goto drop; } @@ -426,8 +425,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); } out: + dst_release(cache_dst); return dst->lwtstate->orig_output(net, sk, skb); drop: + dst_release(cache_dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7ba22d2f2bfef..9b7d035631154 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -232,7 +232,6 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } @@ -251,6 +250,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } @@ -269,8 +269,10 @@ static int rpl_input(struct sk_buff *skb) local_bh_enable(); err = rpl_do_srh(skb, rlwt, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 4bf937bfc2633..eacc4e91b48ef 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -482,8 +482,10 @@ static int seg6_input_core(struct net *net, struct sock *sk, local_bh_enable(); err = seg6_do_srh(skb, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); @@ -571,7 +573,6 @@ static int seg6_output_core(struct net *net, struct sock *sk, dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } @@ -593,6 +594,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } -- GitLab From 92191dd1073088753821b862b791dcc83e558e07 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Jan 2025 19:15:19 -0800 Subject: [PATCH 0108/1585] net: ipv6: fix dst ref loops in rpl, seg6 and ioam6 lwtunnels Some lwtunnels have a dst cache for post-transformation dst. If the packet destination did not change we may end up recording a reference to the lwtunnel in its own cache, and the lwtunnel state will never be freed. Discovered by the ioam6.sh test, kmemleak was recently fixed to catch per-cpu memory leaks. I'm not sure if rpl and seg6 can actually hit this, but in principle I don't see why not. Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation") Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250130031519.2716843-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/ioam6_iptunnel.c | 9 ++++++--- net/ipv6/rpl_iptunnel.c | 9 ++++++--- net/ipv6/seg6_iptunnel.c | 9 ++++++--- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 3936c137a5727..2c383c12a4315 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -410,9 +410,12 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (dst->lwtstate != cache_dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); if (unlikely(err)) diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 9b7d035631154..0ac4283acdf20 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -235,9 +235,12 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index eacc4e91b48ef..33833b2064c07 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -576,9 +576,12 @@ static int seg6_output_core(struct net *net, struct sock *sk, goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) -- GitLab From a8aa6a6ddce9b5585f2b74f27f3feea1427fb4e7 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 31 Jan 2025 01:38:32 +0000 Subject: [PATCH 0109/1585] ice: Add check for devm_kzalloc() Add check for the return value of devm_kzalloc() to guarantee the success of allocation. Fixes: 42c2eb6b1f43 ("ice: Implement devlink-rate API") Signed-off-by: Jiasheng Jiang Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250131013832.24805-1-jiashengjiangcool@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/devlink/devlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index d116e2b10bcea..dbdb83567364c 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -981,6 +981,9 @@ static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv /* preallocate memory for ice_sched_node */ node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + *priv = node; return 0; -- GitLab From 3f1baa91a1fdf3de9dbad4bd615b35fab347874b Mon Sep 17 00:00:00 2001 From: Sankararaman Jayaraman Date: Fri, 31 Jan 2025 09:53:41 +0530 Subject: [PATCH 0110/1585] vmxnet3: Fix tx queue race condition with XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If XDP traffic runs on a CPU which is greater than or equal to the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq() always picks up queue 0 for transmission as it uses reciprocal scale instead of simple modulo operation. vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above returned queue without any locking which can lead to race conditions when multiple XDP xmits run in parallel on different CPU's. This patch uses a simple module scheme when the current CPU equals or exceeds the number of Tx queues on the NIC. It also adds locking in vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions. Fixes: 54f00cce1178 ("vmxnet3: Add XDP support.") Signed-off-by: Sankararaman Jayaraman Signed-off-by: Ronak Doshi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250131042340.156547-1-sankararaman.jayaraman@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_xdp.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index 1341374a4588a..616ecc38d1726 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter) if (likely(cpu < tq_number)) tq = &adapter->tx_queue[cpu]; else - tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)]; + tq = &adapter->tx_queue[cpu % tq_number]; return tq; } @@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, u32 buf_size; u32 dw2; + spin_lock_irq(&tq->tx_lock); dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; dw2 |= xdpf->len; ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; @@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) { tq->stats.tx_ring_full++; + spin_unlock_irq(&tq->tx_lock); return -ENOSPC; } @@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, tbi->dma_addr = dma_map_single(&adapter->pdev->dev, xdpf->data, buf_size, DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) { + spin_unlock_irq(&tq->tx_lock); return -EFAULT; + } tbi->map_type |= VMXNET3_MAP_SINGLE; } else { /* XDP buffer from page pool */ page = virt_to_page(xdpf->data); @@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, dma_wmb(); gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ VMXNET3_TXD_GEN); + spin_unlock_irq(&tq->tx_lock); /* No need to handle the case when tx_num_deferred doesn't reach * threshold. Backend driver at hypervisor side will poll and reset @@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, { struct vmxnet3_adapter *adapter = netdev_priv(dev); struct vmxnet3_tx_queue *tq; + struct netdev_queue *nq; int i; if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) @@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev, if (tq->stopped) return -ENETDOWN; + nq = netdev_get_tx_queue(adapter->netdev, tq->qid); + + __netif_tx_lock(nq, smp_processor_id()); for (i = 0; i < n; i++) { if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) { tq->stats.xdp_xmit_err++; @@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, } } tq->stats.xdp_xmit += i; + __netif_tx_unlock(nq); return i; } -- GitLab From 7faf14a7b0366f153284db0ad3347c457ea70136 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Sun, 26 Jan 2025 17:47:22 +0800 Subject: [PATCH 0111/1585] nfsd: clear acl_access/acl_default after releasing them If getting acl_default fails, acl_access and acl_default will be released simultaneously. However, acl_access will still retain a pointer pointing to the released posix_acl, which will trigger a WARNING in nfs3svc_release_getacl like this: ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 26 PID: 3199 at lib/refcount.c:28 refcount_warn_saturate+0xb5/0x170 Modules linked in: CPU: 26 UID: 0 PID: 3199 Comm: nfsd Not tainted 6.12.0-rc6-00079-g04ae226af01f-dirty #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 RIP: 0010:refcount_warn_saturate+0xb5/0x170 Code: cc cc 0f b6 1d b3 20 a5 03 80 fb 01 0f 87 65 48 d8 00 83 e3 01 75 e4 48 c7 c7 c0 3b 9b 85 c6 05 97 20 a5 03 01 e8 fb 3e 30 ff <0f> 0b eb cd 0f b6 1d 8a3 RSP: 0018:ffffc90008637cd8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff83904fde RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff88871ed36380 RBP: ffff888158beeb40 R08: 0000000000000001 R09: fffff520010c6f56 R10: ffffc90008637ab7 R11: 0000000000000001 R12: 0000000000000001 R13: ffff888140e77400 R14: ffff888140e77408 R15: ffffffff858b42c0 FS: 0000000000000000(0000) GS:ffff88871ed00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000562384d32158 CR3: 000000055cc6a000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? refcount_warn_saturate+0xb5/0x170 ? __warn+0xa5/0x140 ? refcount_warn_saturate+0xb5/0x170 ? report_bug+0x1b1/0x1e0 ? handle_bug+0x53/0xa0 ? exc_invalid_op+0x17/0x40 ? asm_exc_invalid_op+0x1a/0x20 ? tick_nohz_tick_stopped+0x1e/0x40 ? refcount_warn_saturate+0xb5/0x170 ? refcount_warn_saturate+0xb5/0x170 nfs3svc_release_getacl+0xc9/0xe0 svc_process_common+0x5db/0xb60 ? __pfx_svc_process_common+0x10/0x10 ? __rcu_read_unlock+0x69/0xa0 ? __pfx_nfsd_dispatch+0x10/0x10 ? svc_xprt_received+0xa1/0x120 ? xdr_init_decode+0x11d/0x190 svc_process+0x2a7/0x330 svc_handle_xprt+0x69d/0x940 svc_recv+0x180/0x2d0 nfsd+0x168/0x200 ? __pfx_nfsd+0x10/0x10 kthread+0x1a2/0x1e0 ? kthread+0xf4/0x1e0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Kernel panic - not syncing: kernel: panic_on_warn set ... Clear acl_access/acl_default after posix_acl_release is called to prevent UAF from being triggered. Fixes: a257cdd0e217 ("[PATCH] NFSD: Add server support for NFSv3 ACLs.") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241107014705.2509463-1-lilingfeng@huaweicloud.com/ Signed-off-by: Li Lingfeng Reviewed-by: Rick Macklem Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 2 ++ fs/nfsd/nfs3acl.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 4e3be7201b1c4..5fb202acb0fd0 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -84,6 +84,8 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp) fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 5e34e98db969d..7b5433bd30197 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -76,6 +76,8 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp) fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } -- GitLab From b9382e29ca538b879645899ce45d652a304e2ed2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 25 Jan 2025 20:13:18 -0500 Subject: [PATCH 0112/1585] nfsd: validate the nfsd_serv pointer before calling svc_wake_up nfsd_file_dispose_list_delayed can be called from the filecache laundrette, which is shut down after the nfsd threads are shut down and the nfsd_serv pointer is cleared. If nn->nfsd_serv is NULL then there are no threads to wake. Ensure that the nn->nfsd_serv pointer is non-NULL before calling svc_wake_up in nfsd_file_dispose_list_delayed. This is safe since the svc_serv is not freed until after the filecache laundrette is cancelled. Reported-by: Salvatore Bonaccorso Closes: https://bugs.debian.org/1093734 Fixes: ffb402596147 ("nfsd: Don't leave work of closing files to a work queue") Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a1cdba42c4fad..78f4b5573b909 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -445,11 +445,20 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) struct nfsd_file, nf_gc); struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); struct nfsd_fcache_disposal *l = nn->fcache_disposal; + struct svc_serv *serv; spin_lock(&l->lock); list_move_tail(&nf->nf_gc, &l->freeme); spin_unlock(&l->lock); - svc_wake_up(nn->nfsd_serv); + + /* + * The filecache laundrette is shut down after the + * nn->nfsd_serv pointer is cleared, but before the + * svc_serv is freed. + */ + serv = nn->nfsd_serv; + if (serv) + svc_wake_up(serv); } } -- GitLab From b69bb476dee99d564d65d418e9a20acca6f32c3f Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 30 Jan 2025 16:05:42 -0800 Subject: [PATCH 0113/1585] cgroup: fix race between fork and cgroup.kill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tejun reported the following race between fork() and cgroup.kill at [1]. Tejun: I was looking at cgroup.kill implementation and wondering whether there could be a race window. So, __cgroup_kill() does the following: k1. Set CGRP_KILL. k2. Iterate tasks and deliver SIGKILL. k3. Clear CGRP_KILL. The copy_process() does the following: c1. Copy a bunch of stuff. c2. Grab siglock. c3. Check fatal_signal_pending(). c4. Commit to forking. c5. Release siglock. c6. Call cgroup_post_fork() which puts the task on the css_set and tests CGRP_KILL. The intention seems to be that either a forking task gets SIGKILL and terminates on c3 or it sees CGRP_KILL on c6 and kills the child. However, I don't see what guarantees that k3 can't happen before c6. ie. After a forking task passes c5, k2 can take place and then before the forking task reaches c6, k3 can happen. Then, nobody would send SIGKILL to the child. What am I missing? This is indeed a race. One way to fix this race is by taking cgroup_threadgroup_rwsem in write mode in __cgroup_kill() as the fork() side takes cgroup_threadgroup_rwsem in read mode from cgroup_can_fork() to cgroup_post_fork(). However that would be heavy handed as this adds one more potential stall scenario for cgroup.kill which is usually called under extreme situation like memory pressure. To fix this race, let's maintain a sequence number per cgroup which gets incremented on __cgroup_kill() call. On the fork() side, the cgroup_can_fork() will cache the sequence number locally and recheck it against the cgroup's sequence number at cgroup_post_fork() site. If the sequence numbers mismatch, it means __cgroup_kill() can been called and we should send SIGKILL to the newly created task. Reported-by: Tejun Heo Closes: https://lore.kernel.org/all/Z5QHE2Qn-QZ6M-KW@slm.duckdns.org/ [1] Fixes: 661ee6280931 ("cgroup: introduce cgroup.kill") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Shakeel Butt Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 6 +++--- include/linux/sched/task.h | 1 + kernel/cgroup/cgroup.c | 20 ++++++++++++-------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1b20d2d8ef7cc..17960a1e858db 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -71,9 +71,6 @@ enum { /* Cgroup is frozen. */ CGRP_FROZEN, - - /* Control group has to be killed. */ - CGRP_KILL, }; /* cgroup_root->flags */ @@ -461,6 +458,9 @@ struct cgroup { int nr_threaded_children; /* # of live threaded child cgroups */ + /* sequence number for cgroup.kill, serialized by css_set_lock. */ + unsigned int kill_seq; + struct kernfs_node *kn; /* cgroup kernfs entry */ struct cgroup_file procs_file; /* handle for "cgroup.procs" */ struct cgroup_file events_file; /* handle for "cgroup.events" */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 0f2aeb37bbb04..ca1db4b92c324 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -43,6 +43,7 @@ struct kernel_clone_args { void *fn_arg; struct cgroup *cgrp; struct css_set *cset; + unsigned int kill_seq; }; /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d9061bd55436b..afc665b7b1fe5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4013,7 +4013,7 @@ static void __cgroup_kill(struct cgroup *cgrp) lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - set_bit(CGRP_KILL, &cgrp->flags); + cgrp->kill_seq++; spin_unlock_irq(&css_set_lock); css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it); @@ -4029,10 +4029,6 @@ static void __cgroup_kill(struct cgroup *cgrp) send_sig(SIGKILL, task, 0); } css_task_iter_end(&it); - - spin_lock_irq(&css_set_lock); - clear_bit(CGRP_KILL, &cgrp->flags); - spin_unlock_irq(&css_set_lock); } static void cgroup_kill(struct cgroup *cgrp) @@ -6488,6 +6484,10 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) spin_lock_irq(&css_set_lock); cset = task_css_set(current); get_css_set(cset); + if (kargs->cgrp) + kargs->kill_seq = kargs->cgrp->kill_seq; + else + kargs->kill_seq = cset->dfl_cgrp->kill_seq; spin_unlock_irq(&css_set_lock); if (!(kargs->flags & CLONE_INTO_CGROUP)) { @@ -6668,6 +6668,7 @@ void cgroup_post_fork(struct task_struct *child, struct kernel_clone_args *kargs) __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex) { + unsigned int cgrp_kill_seq = 0; unsigned long cgrp_flags = 0; bool kill = false; struct cgroup_subsys *ss; @@ -6681,10 +6682,13 @@ void cgroup_post_fork(struct task_struct *child, /* init tasks are special, only link regular threads */ if (likely(child->pid)) { - if (kargs->cgrp) + if (kargs->cgrp) { cgrp_flags = kargs->cgrp->flags; - else + cgrp_kill_seq = kargs->cgrp->kill_seq; + } else { cgrp_flags = cset->dfl_cgrp->flags; + cgrp_kill_seq = cset->dfl_cgrp->kill_seq; + } WARN_ON_ONCE(!list_empty(&child->cg_list)); cset->nr_tasks++; @@ -6719,7 +6723,7 @@ void cgroup_post_fork(struct task_struct *child, * child down right after we finished preparing it for * userspace. */ - kill = test_bit(CGRP_KILL, &cgrp_flags); + kill = kargs->kill_seq != cgrp_kill_seq; } spin_unlock_irq(&css_set_lock); -- GitLab From 029b6ce733712a41421955194b113f283dcb1026 Mon Sep 17 00:00:00 2001 From: Changwoo Min Date: Sun, 2 Feb 2025 12:37:48 +0900 Subject: [PATCH 0114/1585] sched_ext: Fix incorrect time delta calculation in time_delta() When (s64)(after - before) > 0, the code returns the result of (s64)(after - before) > 0 while the intended result should be (s64)(after - before). That happens because the middle operand of the ternary operator was omitted incorrectly, returning the result of (s64)(after - before) > 0. Thus, add the middle operand -- (s64)(after - before) -- to return the correct time calculation. Fixes: d07be814fc71 ("sched_ext: Add time helpers for BPF schedulers") Signed-off-by: Changwoo Min Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/sched_ext/include/scx/common.bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index f254a39b86a58..d72b60a0c582c 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -432,7 +432,7 @@ void bpf_rcu_read_unlock(void) __ksym; */ static inline s64 time_delta(u64 after, u64 before) { - return (s64)(after - before) > 0 ? : 0; + return (s64)(after - before) > 0 ? (s64)(after - before) : 0; } /** -- GitLab From a8c9a453387640dbe45761970f41301a6985e7fa Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Thu, 16 Jan 2025 06:24:36 -0800 Subject: [PATCH 0115/1585] ASoC: fsl_micfil: Enable default case in micfil_set_quality() If 'micfil->quality' received from micfil_quality_set() somehow ends up with an unpredictable value, switch() operator will fail to initialize local variable qsel before regmap_update_bits() tries to utilize it. While it is unlikely, play it safe and enable a default case that returns -EINVAL error. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: bea1d61d5892 ("ASoC: fsl_micfil: rework quality setting") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Link: https://patch.msgid.link/20250116142436.22389-1-n.zhandarovich@fintech.ru Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_micfil.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 1075598a6647b..fa41366833920 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -183,6 +183,8 @@ static int micfil_set_quality(struct fsl_micfil *micfil) case QUALITY_VLOW2: qsel = MICFIL_QSEL_VLOW2_QUALITY; break; + default: + return -EINVAL; } return regmap_update_bits(micfil->regmap, REG_MICFIL_CTRL2, -- GitLab From 64b48ec36dbed561ab1cd99708c33d96f4b7b729 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 3 Feb 2025 12:47:17 +1100 Subject: [PATCH 0116/1585] drivers/block/sunvdc.c: update the correct AIP call My sparc64 defconfig build failed like this: drivers/block/sunvdc.c: In function 'vdc_queue_drain': drivers/block/sunvdc.c:1130:9: error: too many arguments to function 'blk_mq_unquiesce_queue' 1130 | blk_mq_unquiesce_queue(q, memflags); | ^~~~~~~~~~~~~~~~~~~~~~ In file included from drivers/block/sunvdc.c:10: include/linux/blk-mq.h:895:6: note: declared here 895 | void blk_mq_unquiesce_queue(struct request_queue *q); | ^~~~~~~~~~~~~~~~~~~~~~ drivers/block/sunvdc.c:1131:9: error: too few arguments to function 'blk_mq_unfreeze_queue' 1131 | blk_mq_unfreeze_queue(q); | ^~~~~~~~~~~~~~~~~~~~~ In file included from drivers/block/sunvdc.c:10: include/linux/blk-mq.h:914:1: note: declared here 914 | blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags) | ^~~~~~~~~~~~~~~~~~~~~ Fixes: 1e1a9cecfab3 ("block: force noio scope in blk_mq_freeze_queue") Cc: Christoph Hellwig Cc: Jens Axboe Signed-off-by: Stephen Rothwell Signed-off-by: Jens Axboe --- drivers/block/sunvdc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 05c4aee7f262a..654ed962a772f 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -1127,8 +1127,8 @@ static void vdc_queue_drain(struct vdc_port *port) spin_lock_irq(&port->vio.lock); port->drain = 0; - blk_mq_unquiesce_queue(q, memflags); - blk_mq_unfreeze_queue(q); + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q, memflags); } static void vdc_ldc_reset_timer_work(struct work_struct *work) -- GitLab From f0ada00a9b3801b71d203b0033b7612b687b7d72 Mon Sep 17 00:00:00 2001 From: Imran Shaik Date: Thu, 9 Jan 2025 14:27:44 +0530 Subject: [PATCH 0117/1585] dt-bindings: clock: qcom: Add GPU clocks for QCS8300 The QCS8300 GPU clock controller is a derivative of SA8775P, but has few additional clocks and minor differences. Hence, reuse gpucc bindings of SA8775P and add additional clocks required for QCS8300. Acked-by: Krzysztof Kozlowski Signed-off-by: Imran Shaik Link: https://lore.kernel.org/r/20250109-qcs8300-mm-patches-new-v4-1-63e8ac268b02@quicinc.com Signed-off-by: Rob Herring (Arm) --- .../devicetree/bindings/clock/qcom,gpucc.yaml | 3 +++ include/dt-bindings/clock/qcom,qcs8300-gpucc.h | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,qcs8300-gpucc.h diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml index 0858fd6352822..4cdff6161bf0b 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml @@ -8,6 +8,7 @@ title: Qualcomm Graphics Clock & Reset Controller maintainers: - Taniya Das + - Imran Shaik description: | Qualcomm graphics clock control module provides the clocks, resets and power @@ -23,10 +24,12 @@ description: | include/dt-bindings/clock/qcom,gpucc-sm8150.h include/dt-bindings/clock/qcom,gpucc-sm8250.h include/dt-bindings/clock/qcom,gpucc-sm8350.h + include/dt-bindings/clock/qcom,qcs8300-gpucc.h properties: compatible: enum: + - qcom,qcs8300-gpucc - qcom,sdm845-gpucc - qcom,sa8775p-gpucc - qcom,sc7180-gpucc diff --git a/include/dt-bindings/clock/qcom,qcs8300-gpucc.h b/include/dt-bindings/clock/qcom,qcs8300-gpucc.h new file mode 100644 index 0000000000000..afa187467b4c1 --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs8300-gpucc.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPUCC_QCS8300_H +#define _DT_BINDINGS_CLK_QCOM_GPUCC_QCS8300_H + +#include "qcom,sa8775p-gpucc.h" + +/* QCS8300 introduces below new clocks compared to SA8775P */ + +/* GPU_CC clocks */ +#define GPU_CC_CX_ACCU_SHIFT_CLK 23 +#define GPU_CC_GX_ACCU_SHIFT_CLK 24 + +#endif -- GitLab From 0e193cc558e32a879c717bb2d53a1cf8628b5d20 Mon Sep 17 00:00:00 2001 From: Imran Shaik Date: Thu, 9 Jan 2025 14:27:46 +0530 Subject: [PATCH 0118/1585] dt-bindings: clock: qcom: Add CAMCC clocks for QCS8300 The QCS8300 camera clock controller is a derivative of SA8775P, but has an additional clock and minor differences. Hence, reuse the SA8775P camera bindings and add additional clock required for QCS8300. Reviewed-by: Vladimir Zapolskiy Acked-by: Krzysztof Kozlowski Signed-off-by: Imran Shaik Link: https://lore.kernel.org/r/20250109-qcs8300-mm-patches-new-v4-3-63e8ac268b02@quicinc.com Signed-off-by: Rob Herring (Arm) --- .../bindings/clock/qcom,sa8775p-camcc.yaml | 6 +++++- include/dt-bindings/clock/qcom,qcs8300-camcc.h | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 include/dt-bindings/clock/qcom,qcs8300-camcc.h diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml index 36a60d8f5ae3a..81623f59d11d7 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml @@ -8,16 +8,20 @@ title: Qualcomm Camera Clock & Reset Controller on SA8775P maintainers: - Taniya Das + - Imran Shaik description: | Qualcomm camera clock control module provides the clocks, resets and power domains on SA8775p. - See also: include/dt-bindings/clock/qcom,sa8775p-camcc.h + See also: + include/dt-bindings/clock/qcom,qcs8300-camcc.h + include/dt-bindings/clock/qcom,sa8775p-camcc.h properties: compatible: enum: + - qcom,qcs8300-camcc - qcom,sa8775p-camcc clocks: diff --git a/include/dt-bindings/clock/qcom,qcs8300-camcc.h b/include/dt-bindings/clock/qcom,qcs8300-camcc.h new file mode 100644 index 0000000000000..fc535c8478591 --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs8300-camcc.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_QCS8300_CAM_CC_H +#define _DT_BINDINGS_CLK_QCOM_QCS8300_CAM_CC_H + +#include "qcom,sa8775p-camcc.h" + +/* QCS8300 introduces below new clocks compared to SA8775P */ + +/* CAM_CC clocks */ +#define CAM_CC_TITAN_TOP_ACCU_SHIFT_CLK 86 + +#endif -- GitLab From 3e86e57356f0e2284454d82c7200807c6fa9e65b Mon Sep 17 00:00:00 2001 From: Imran Shaik Date: Thu, 9 Jan 2025 14:27:48 +0530 Subject: [PATCH 0119/1585] dt-bindings: clock: qcom: Add QCS8300 video clock controller The QCS8300 video clock controller is a derivative of SA8775P, but QCS8300 has minor difference. Hence, reuse the SA8775P videocc bindings for QCS8300 platform. Acked-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Signed-off-by: Imran Shaik Link: https://lore.kernel.org/r/20250109-qcs8300-mm-patches-new-v4-5-63e8ac268b02@quicinc.com Signed-off-by: Rob Herring (Arm) --- .../devicetree/bindings/clock/qcom,sa8775p-videocc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml index 928131bff4c19..07e5d811d8161 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml @@ -18,6 +18,7 @@ description: | properties: compatible: enum: + - qcom,qcs8300-videocc - qcom,sa8775p-videocc clocks: -- GitLab From e6649328dc07bff6227367eda6f1b2263d6c10f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 29 Jan 2025 14:35:27 +0100 Subject: [PATCH 0120/1585] of: address: Add kunit test for __of_address_resource_bounds() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The overflow checking has to deal with different datatypes and edgecases. Add a new kunit testcase to make sure it works correctly. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250129-of-address-overflow-v3-1-95d1760ed791@linutronix.de Signed-off-by: Rob Herring (Arm) --- drivers/of/address.c | 5 +- drivers/of/of_private.h | 4 ++ drivers/of/of_test.c | 119 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 126 insertions(+), 2 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 125833e5ce52e..d177a2b9edaf8 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -16,6 +16,8 @@ #include #include /* for bus_dma_region */ +#include + /* Uncomment me to enable of_dump_addr() debugging output */ // #define DEBUG @@ -183,7 +185,7 @@ static u64 of_bus_pci_map(__be32 *addr, const __be32 *range, int na, int ns, #endif /* CONFIG_PCI */ -static int __of_address_resource_bounds(struct resource *r, u64 start, u64 size) +VISIBLE_IF_KUNIT int __of_address_resource_bounds(struct resource *r, u64 start, u64 size) { if (overflows_type(start, r->start)) return -EOVERFLOW; @@ -197,6 +199,7 @@ static int __of_address_resource_bounds(struct resource *r, u64 start, u64 size) return 0; } +EXPORT_SYMBOL_IF_KUNIT(__of_address_resource_bounds); /* * of_pci_range_to_resource - Create a resource from an of_pci_range diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index f3e1193c8ded4..1bdc7ceef3c5f 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -208,4 +208,8 @@ static void __maybe_unused of_dump_addr(const char *s, const __be32 *addr, int n static void __maybe_unused of_dump_addr(const char *s, const __be32 *addr, int na) { } #endif +#if IS_ENABLED(CONFIG_KUNIT) +int __of_address_resource_bounds(struct resource *r, u64 start, u64 size); +#endif + #endif /* _LINUX_OF_PRIVATE_H */ diff --git a/drivers/of/of_test.c b/drivers/of/of_test.c index b0557ded838fd..8bba5a72c9c7c 100644 --- a/drivers/of/of_test.c +++ b/drivers/of/of_test.c @@ -2,6 +2,7 @@ /* * KUnit tests for OF APIs */ +#include #include #include @@ -54,8 +55,124 @@ static struct kunit_suite of_dtb_suite = { .init = of_dtb_test_init, }; +struct of_address_resource_bounds_case { + u64 start; + u64 size; + int ret; + + u64 res_start; + u64 res_end; +}; + +static void of_address_resource_bounds_case_desc(const struct of_address_resource_bounds_case *p, + char *name) +{ + snprintf(name, KUNIT_PARAM_DESC_SIZE, "start=0x%016llx,size=0x%016llx", p->start, p->size); +} + +static const struct of_address_resource_bounds_case of_address_resource_bounds_cases[] = { + { + .start = 0, + .size = 0, + .ret = 0, + .res_start = 0, + .res_end = -1, + }, + { + .start = 0, + .size = 0x1000, + .ret = 0, + .res_start = 0, + .res_end = 0xfff, + }, + { + .start = 0x1000, + .size = 0, + .ret = 0, + .res_start = 0x1000, + .res_end = 0xfff, + }, + { + .start = 0x1000, + .size = 0x1000, + .ret = 0, + .res_start = 0x1000, + .res_end = 0x1fff, + }, + { + .start = 1, + .size = RESOURCE_SIZE_MAX, + .ret = 0, + .res_start = 1, + .res_end = RESOURCE_SIZE_MAX, + }, + { + .start = RESOURCE_SIZE_MAX, + .size = 1, + .ret = 0, + .res_start = RESOURCE_SIZE_MAX, + .res_end = RESOURCE_SIZE_MAX, + }, + { + .start = 2, + .size = RESOURCE_SIZE_MAX, + .ret = -EOVERFLOW, + }, + { + .start = RESOURCE_SIZE_MAX, + .size = 2, + .ret = -EOVERFLOW, + }, + { + .start = ULL(0x100000000), + .size = 1, + .ret = sizeof(resource_size_t) > sizeof(u32) ? 0 : -EOVERFLOW, + .res_start = ULL(0x100000000), + .res_end = ULL(0x100000000), + }, + { + .start = 0x1000, + .size = 0xffffffff, + .ret = sizeof(resource_size_t) > sizeof(u32) ? 0 : -EOVERFLOW, + .res_start = 0x1000, + .res_end = ULL(0x100000ffe), + }, +}; + +KUNIT_ARRAY_PARAM(of_address_resource_bounds, + of_address_resource_bounds_cases, of_address_resource_bounds_case_desc); + +static void of_address_resource_bounds(struct kunit *test) +{ + const struct of_address_resource_bounds_case *param = test->param_value; + struct resource r; /* Intentionally uninitialized */ + int ret; + + if (!IS_ENABLED(CONFIG_OF_ADDRESS)) + kunit_skip(test, "CONFIG_OF_ADDRESS not enabled\n"); + + ret = __of_address_resource_bounds(&r, param->start, param->size); + KUNIT_EXPECT_EQ(test, param->ret, ret); + if (ret == 0) { + KUNIT_EXPECT_EQ(test, (resource_size_t)param->res_start, r.start); + KUNIT_EXPECT_EQ(test, (resource_size_t)param->res_end, r.end); + KUNIT_EXPECT_EQ(test, param->size, resource_size(&r)); + } +} + +static struct kunit_case of_address_test_cases[] = { + KUNIT_CASE_PARAM(of_address_resource_bounds, of_address_resource_bounds_gen_params), + {} +}; + +static struct kunit_suite of_address_suite = { + .name = "of_address", + .test_cases = of_address_test_cases, +}; + kunit_test_suites( - &of_dtb_suite, + &of_dtb_suite, &of_address_suite, ); MODULE_DESCRIPTION("KUnit tests for OF APIs"); +MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); MODULE_LICENSE("GPL"); -- GitLab From fa803513ab68ba07369643393f1754b845160030 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 10 Jan 2025 17:19:49 +0800 Subject: [PATCH 0121/1585] cpufreq/amd-pstate: Fix per-policy boost flag incorrect when fail Commit c8c68c38b56f ("cpufreq: amd-pstate: initialize core precision boost state") sets per-policy boost flag to false when boost fail. However, this boost flag will be set to reverse value in store_local_boost() and cpufreq_boost_trigger_state() in cpufreq.c. This will cause the per-policy boost flag set to true when fail to set boost. Remove the extra assignment in amd_pstate_set_boost() and keep all operations on per-policy boost flag outside of set_boost() to fix this problem. Fixes: c8c68c38b56f ("cpufreq: amd-pstate: initialize core precision boost state") Signed-off-by: Lifeng Zheng Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250110091949.3610770-1-zhenglifeng1@huawei.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index dd9b8d6993d69..7120f035c0be4 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -747,7 +747,6 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_cpu_boost_update(policy, state); - policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); return ret; -- GitLab From e4d4648eac8b4ef39f412d07715eb26f1ccd7342 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Tue, 28 Jan 2025 00:02:01 +0300 Subject: [PATCH 0122/1585] platform/x86: ideapad-laptop: pass a correct pointer to the driver data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_platform_profile_register() expects a pointer to the private driver data but instead an address of the pointer variable is passed due to a typo. This leads to the crashes later: BUG: unable to handle page fault for address: 00000000fe0d0044 PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 6 UID: 0 PID: 1284 Comm: tuned Tainted: G W 6.13.0+ #7 Tainted: [W]=WARN Hardware name: LENOVO 21D0/LNVNB161216, BIOS J6CN45WW 03/17/2023 RIP: 0010:__mutex_lock.constprop.0+0x6bf/0x7f0 Call Trace: dytc_profile_set+0x4a/0x140 [ideapad_laptop] _store_and_notify+0x13/0x40 [platform_profile] class_for_each_device+0x145/0x180 platform_profile_store+0xc0/0x130 [platform_profile] kernfs_fop_write_iter+0x13e/0x1f0 vfs_write+0x290/0x450 ksys_write+0x6c/0xe0 do_syscall_64+0x82/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e Found by Linux Verification Center (linuxtesting.org). Fixes: 249c576f0f9d ("ACPI: platform_profile: Let drivers set drvdata to the class device") Signed-off-by: Fedor Pchelkin Reviewed-by: Kurt Borja Link: https://lore.kernel.org/r/20250127210202.568691-1-pchelkin@ispras.ru Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/ideapad-laptop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index dfb5d4b8c0465..30bd366d7b58a 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1121,7 +1121,7 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv) /* Create platform_profile structure and register */ priv->dytc->ppdev = devm_platform_profile_register(&priv->platform_device->dev, - "ideapad-laptop", &priv->dytc, + "ideapad-laptop", priv->dytc, &dytc_profile_ops); if (IS_ERR(priv->dytc->ppdev)) { err = PTR_ERR(priv->dytc->ppdev); -- GitLab From 583ef25bb2a094813351a727ddec38b35a15b9f8 Mon Sep 17 00:00:00 2001 From: Dmitry Kandybka Date: Fri, 24 Jan 2025 01:07:39 +0300 Subject: [PATCH 0123/1585] platform/x86/intel: pmc: fix ltr decode in pmc_core_ltr_show() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In pmc_core_ltr_show(), promote 'val' to 'u64' to avoid possible integer overflow. Values (10 bit) are multiplied by the scale, the result of expression is in a range from 1 to 34,326,183,936 which is bigger then UINT32_MAX. Compile tested only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Dmitry Kandybka Reviewed-by: Rajneesh Bhardwaj Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250123220739.68087-1-d.kandybka@gmail.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/pmc/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 10f04b9441174..1ee0fb5f8250b 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -626,8 +626,8 @@ static u32 convert_ltr_scale(u32 val) static int pmc_core_ltr_show(struct seq_file *s, void *unused) { struct pmc_dev *pmcdev = s->private; - u64 decoded_snoop_ltr, decoded_non_snoop_ltr; - u32 ltr_raw_data, scale, val; + u64 decoded_snoop_ltr, decoded_non_snoop_ltr, val; + u32 ltr_raw_data, scale; u16 snoop_ltr, nonsnoop_ltr; unsigned int i, index, ltr_index = 0; -- GitLab From 5c8f9a05336cf5cadbd57ad461621b386aadb762 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Thu, 30 Jan 2025 08:38:49 +0300 Subject: [PATCH 0124/1585] arm64: dts: rockchip: Fix broken tsadc pinctrl names for rk3588 The tsadc driver does not handle pinctrl "gpio" and "otpout". Let's use the correct pinctrl names "default" and "sleep". Additionally, Alexey Charkov's testing [1] has established that it is necessary for pinctrl state to reference the &tsadc_shut_org configuration rather than &tsadc_shut for the driver to function correctly. [1] https://lkml.org/lkml/2025/1/24/966 Fixes: 32641b8ab1a5 ("arm64: dts: rockchip: add rk3588 thermal sensor") Cc: stable@vger.kernel.org Reviewed-by: Dragan Simic Signed-off-by: Alexander Shiyan Link: https://lore.kernel.org/r/20250130053849.4902-1-eagle.alexander923@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-base.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi index 8cfa30837ce72..978de506d4348 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi @@ -2668,9 +2668,9 @@ rockchip,hw-tshut-temp = <120000>; rockchip,hw-tshut-mode = <0>; /* tshut mode 0:CRU 1:GPIO */ rockchip,hw-tshut-polarity = <0>; /* tshut polarity 0:LOW 1:HIGH */ - pinctrl-0 = <&tsadc_gpio_func>; - pinctrl-1 = <&tsadc_shut>; - pinctrl-names = "gpio", "otpout"; + pinctrl-0 = <&tsadc_shut_org>; + pinctrl-1 = <&tsadc_gpio_func>; + pinctrl-names = "default", "sleep"; #thermal-sensor-cells = <1>; status = "disabled"; }; -- GitLab From a6a7cba17c544fb95d5a29ab9d9ed4503029cb29 Mon Sep 17 00:00:00 2001 From: Tianling Shen Date: Sun, 19 Jan 2025 17:11:54 +0800 Subject: [PATCH 0125/1585] arm64: dts: rockchip: change eth phy mode to rgmii-id for orangepi r1 plus lts In general the delay should be added by the PHY instead of the MAC, and this improves network stability on some boards which seem to need different delay. Fixes: 387b3bbac5ea ("arm64: dts: rockchip: Add Xunlong OrangePi R1 Plus LTS") Cc: stable@vger.kernel.org # 6.6+ Signed-off-by: Tianling Shen Link: https://lore.kernel.org/r/20250119091154.1110762-1-cnsztl@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts | 3 +-- arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dts | 1 + arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dtsi | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts index 67c246ad8b8c0..ec2ce894da1fc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts @@ -17,8 +17,7 @@ &gmac2io { phy-handle = <&yt8531c>; - tx_delay = <0x19>; - rx_delay = <0x05>; + phy-mode = "rgmii-id"; status = "okay"; mdio { diff --git a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dts b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dts index 324a8e951f7e4..846b931e16d21 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dts @@ -15,6 +15,7 @@ &gmac2io { phy-handle = <&rtl8211e>; + phy-mode = "rgmii"; tx_delay = <0x24>; rx_delay = <0x18>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dtsi b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dtsi index 4f193704e5dc2..09508e324a280 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dtsi @@ -109,7 +109,6 @@ assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clk>, <&gmac_clk>; clock_in_out = "input"; - phy-mode = "rgmii"; phy-supply = <&vcc_io>; pinctrl-0 = <&rgmiim1_pins>; pinctrl-names = "default"; -- GitLab From 4eee627ea59304cdd66c5d4194ef13486a6c44fc Mon Sep 17 00:00:00 2001 From: Lukasz Czechowski Date: Tue, 21 Jan 2025 13:56:03 +0100 Subject: [PATCH 0126/1585] arm64: dts: rockchip: Move uart5 pin configuration to px30 ringneck SoM In the PX30-uQ7 (Ringneck) SoM, the hardware CTS and RTS pins for uart5 cannot be used for the UART CTS/RTS, because they are already allocated for different purposes. CTS pin is routed to SUS_S3# signal, while RTS pin is used internally and is not available on Q7 connector. Move definition of the pinctrl-0 property from px30-ringneck-haikou.dts to px30-ringneck.dtsi. This commit is a dependency to next commit in the patch series, that disables DMA for uart5. Cc: stable@vger.kernel.org Reviewed-by: Quentin Schulz Signed-off-by: Lukasz Czechowski Link: https://lore.kernel.org/r/20250121125604.3115235-2-lukasz.czechowski@thaumatec.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts | 1 - arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts index e4517f47d519c..eb9470a00e549 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -226,7 +226,6 @@ }; &uart5 { - pinctrl-0 = <&uart5_xfer>; rts-gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi index ae050cc6cd050..2c87005c89bd3 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi @@ -396,6 +396,10 @@ status = "okay"; }; +&uart5 { + pinctrl-0 = <&uart5_xfer>; +}; + /* Mule UCAN */ &usb_host0_ehci { status = "okay"; -- GitLab From f3be8a9b1afffbcc70f8e41063b151b1038d7813 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Wed, 29 Jan 2025 13:40:07 +0100 Subject: [PATCH 0127/1585] accel/ivpu: Fix error handling in ivpu_boot() Ensure IRQs and IPC are properly disabled if HW sched or DCT initialization fails. Fixes: cc3c72c7e610 ("accel/ivpu: Refactor failure diagnostics during boot") Cc: stable@vger.kernel.org # v6.13+ Reviewed-by: Karol Wachowski Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20250129124009.1039982-2-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index ca2bf47ce2484..0c4a82271c26d 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -397,15 +397,19 @@ int ivpu_boot(struct ivpu_device *vdev) if (ivpu_fw_is_cold_boot(vdev)) { ret = ivpu_pm_dct_init(vdev); if (ret) - goto err_diagnose_failure; + goto err_disable_ipc; ret = ivpu_hw_sched_init(vdev); if (ret) - goto err_diagnose_failure; + goto err_disable_ipc; } return 0; +err_disable_ipc: + ivpu_ipc_disable(vdev); + ivpu_hw_irq_disable(vdev); + disable_irq(vdev->irq); err_diagnose_failure: ivpu_hw_diagnose_failure(vdev); ivpu_mmu_evtq_dump(vdev); -- GitLab From f2bc2afe34c107a02ce829a4039e85514feafe55 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Wed, 29 Jan 2025 13:40:08 +0100 Subject: [PATCH 0128/1585] accel/ivpu: Clear runtime_error after pm_runtime_resume_and_get() fails pm_runtime_resume_and_get() sets dev->power.runtime_error that causes all subsequent pm_runtime_get_sync() calls to fail. Clear the runtime_error using pm_runtime_set_suspended(), so the driver doesn't have to be reloaded to recover when the NPU fails to boot during runtime resume. Fixes: 7d4b4c74432d ("accel/ivpu: Remove suspend_reschedule_counter") Cc: stable@vger.kernel.org # v6.11+ Reviewed-by: Maciej Falkowski Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20250129124009.1039982-3-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_pm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 949f4233946c6..c3774d2221326 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -309,7 +309,10 @@ int ivpu_rpm_get(struct ivpu_device *vdev) int ret; ret = pm_runtime_resume_and_get(vdev->drm.dev); - drm_WARN_ON(&vdev->drm, ret < 0); + if (ret < 0) { + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } return ret; } -- GitLab From 41a2d8286c905614f29007f1bc8e652d54654b82 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Wed, 29 Jan 2025 13:40:09 +0100 Subject: [PATCH 0129/1585] accel/ivpu: Fix error handling in recovery/reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable runtime PM for the duration of reset/recovery so it is possible to set the correct runtime PM state depending on the outcome of the `ivpu_resume()`. Don’t suspend or reset the HW if the NPU is suspended when the reset/recovery is requested. Also, move common reset/recovery code to separate functions for better code readability. Fixes: 27d19268cf39 ("accel/ivpu: Improve recovery and reset support") Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Maciej Falkowski Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20250129124009.1039982-4-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_pm.c | 79 ++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index c3774d2221326..8b2b050cc41a9 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -115,41 +115,57 @@ static int ivpu_resume(struct ivpu_device *vdev) return ret; } -static void ivpu_pm_recovery_work(struct work_struct *work) +static void ivpu_pm_reset_begin(struct ivpu_device *vdev) { - struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); - struct ivpu_device *vdev = pm->vdev; - char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; - int ret; - - ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); - - ret = pm_runtime_resume_and_get(vdev->drm.dev); - if (ret) - ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); - - ivpu_jsm_state_dump(vdev); - ivpu_dev_coredump(vdev); + pm_runtime_disable(vdev->drm.dev); atomic_inc(&vdev->pm->reset_counter); atomic_set(&vdev->pm->reset_pending, 1); down_write(&vdev->pm->reset_lock); +} + +static void ivpu_pm_reset_complete(struct ivpu_device *vdev) +{ + int ret; - ivpu_suspend(vdev); ivpu_pm_prepare_cold_boot(vdev); ivpu_jobs_abort_all(vdev); ivpu_ms_cleanup_all(vdev); ret = ivpu_resume(vdev); - if (ret) + if (ret) { ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } else { + pm_runtime_set_active(vdev->drm.dev); + } up_write(&vdev->pm->reset_lock); atomic_set(&vdev->pm->reset_pending, 0); - kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + pm_runtime_enable(vdev->drm.dev); +} + +static void ivpu_pm_recovery_work(struct work_struct *work) +{ + struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); + struct ivpu_device *vdev = pm->vdev; + char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; + + ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); + + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_jsm_state_dump(vdev); + ivpu_dev_coredump(vdev); + ivpu_suspend(vdev); + } + + ivpu_pm_reset_complete(vdev); + + kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); } void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) @@ -328,16 +344,13 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) struct ivpu_device *vdev = pci_get_drvdata(pdev); ivpu_dbg(vdev, PM, "Pre-reset..\n"); - atomic_inc(&vdev->pm->reset_counter); - atomic_set(&vdev->pm->reset_pending, 1); - pm_runtime_get_sync(vdev->drm.dev); - down_write(&vdev->pm->reset_lock); - ivpu_prepare_for_reset(vdev); - ivpu_hw_reset(vdev); - ivpu_pm_prepare_cold_boot(vdev); - ivpu_jobs_abort_all(vdev); - ivpu_ms_cleanup_all(vdev); + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_prepare_for_reset(vdev); + ivpu_hw_reset(vdev); + } ivpu_dbg(vdev, PM, "Pre-reset done.\n"); } @@ -345,18 +358,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) void ivpu_pm_reset_done_cb(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - int ret; ivpu_dbg(vdev, PM, "Post-reset..\n"); - ret = ivpu_resume(vdev); - if (ret) - ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); - up_write(&vdev->pm->reset_lock); - atomic_set(&vdev->pm->reset_pending, 0); - ivpu_dbg(vdev, PM, "Post-reset done.\n"); - pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + ivpu_pm_reset_complete(vdev); + + ivpu_dbg(vdev, PM, "Post-reset done.\n"); } void ivpu_pm_init(struct ivpu_device *vdev) -- GitLab From 5ae4dca718eacd0a56173a687a3736eb7e627c77 Mon Sep 17 00:00:00 2001 From: Lukasz Czechowski Date: Tue, 21 Jan 2025 13:56:04 +0100 Subject: [PATCH 0130/1585] arm64: dts: rockchip: Disable DMA for uart5 on px30-ringneck MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UART controllers without flow control seem to behave unstable in case DMA is enabled. The issues were indicated in the message: https://lore.kernel.org/linux-arm-kernel/CAMdYzYpXtMocCtCpZLU_xuWmOp2Ja_v0Aj0e6YFNRA-yV7u14g@mail.gmail.com/ In case of PX30-uQ7 Ringneck SoM, it was noticed that after couple of hours of UART communication, the CPU stall was occurring, leading to the system becoming unresponsive. After disabling the DMA, extensive UART communication tests for up to two weeks were performed, and no issues were further observed. The flow control pins for uart5 are not available on PX30-uQ7 Ringneck, as configured by pinctrl-0, so the DMA nodes were removed on SoM dtsi. Cc: stable@vger.kernel.org Fixes: c484cf93f61b ("arm64: dts: rockchip: add PX30-µQ7 (Ringneck) SoM with Haikou baseboard") Reviewed-by: Quentin Schulz Signed-off-by: Lukasz Czechowski Link: https://lore.kernel.org/r/20250121125604.3115235-3-lukasz.czechowski@thaumatec.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi index 2c87005c89bd3..e80412abec081 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi @@ -397,6 +397,8 @@ }; &uart5 { + /delete-property/ dmas; + /delete-property/ dma-names; pinctrl-0 = <&uart5_xfer>; }; -- GitLab From 2f9eb5262e63396a315c7da34a6c80c5d335df9f Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 16 Jan 2025 15:36:31 +0100 Subject: [PATCH 0131/1585] arm64: dts: rockchip: fix fixed-regulator renames on rk3399-gru devices rk3399-gru chromebooks have a regulator chains where one named regulator supplies multiple regulators pp900-usb pp900_pcie that supply the named peripherals. The dtsi used somewhat creative structure to describe that in creating the base node 3 times with different phandles and describing the EC dependency in a comment. This didn't register in the recent regulator-node renaming, as the additional nodes were empty, so adapt the missing node names for now. Fixes: 5c96e6330197 ("arm64: dts: rockchip: adapt regulator nodenames to preferred form") Tested-by: Vicente Bergas Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250116143631.3650469-1-heiko@sntech.de --- .../dts/rockchip/rk3399-gru-chromebook.dtsi | 8 +++---- .../boot/dts/rockchip/rk3399-gru-scarlet.dtsi | 6 ++--- arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi | 22 +++++++++---------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi index 988e6ca32fac9..a9ea4b0daa04c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi @@ -22,11 +22,11 @@ }; /* EC turns on w/ pp900_usb_en */ - pp900_usb: pp900-ap { + pp900_usb: regulator-pp900-ap { }; /* EC turns on w/ pp900_pcie_en */ - pp900_pcie: pp900-ap { + pp900_pcie: regulator-pp900-ap { }; pp3000: regulator-pp3000 { @@ -126,7 +126,7 @@ }; /* Always on; plain and simple */ - pp3000_ap: pp3000_emmc: pp3000 { + pp3000_ap: pp3000_emmc: regulator-pp3000 { }; pp1500_ap_io: regulator-pp1500-ap-io { @@ -160,7 +160,7 @@ }; /* EC turns on w/ pp3300_usb_en_l */ - pp3300_usb: pp3300 { + pp3300_usb: regulator-pp3300 { }; /* gpio is shared with pp1800_pcie and pinctrl is set there */ diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index 19b23b4389658..5e068377a0a28 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -92,7 +92,7 @@ }; /* EC turns on pp1800_s3_en */ - pp1800_s3: pp1800 { + pp1800_s3: regulator-pp1800 { }; /* pp3300 children, sorted by name */ @@ -109,11 +109,11 @@ }; /* EC turns on pp3300_s0_en */ - pp3300_s0: pp3300 { + pp3300_s0: regulator-pp3300 { }; /* EC turns on pp3300_s3_en */ - pp3300_s3: pp3300 { + pp3300_s3: regulator-pp3300 { }; /* diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 6d9e60b01225e..7eca1da78cffa 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -189,39 +189,39 @@ }; /* EC turns on w/ pp900_ddrpll_en */ - pp900_ddrpll: pp900-ap { + pp900_ddrpll: regulator-pp900-ap { }; /* EC turns on w/ pp900_pll_en */ - pp900_pll: pp900-ap { + pp900_pll: regulator-pp900-ap { }; /* EC turns on w/ pp900_pmu_en */ - pp900_pmu: pp900-ap { + pp900_pmu: regulator-pp900-ap { }; /* EC turns on w/ pp1800_s0_en_l */ - pp1800_ap_io: pp1800_emmc: pp1800_nfc: pp1800_s0: pp1800 { + pp1800_ap_io: pp1800_emmc: pp1800_nfc: pp1800_s0: regulator-pp1800 { }; /* EC turns on w/ pp1800_avdd_en_l */ - pp1800_avdd: pp1800 { + pp1800_avdd: regulator-pp1800 { }; /* EC turns on w/ pp1800_lid_en_l */ - pp1800_lid: pp1800_mic: pp1800 { + pp1800_lid: pp1800_mic: regulator-pp1800 { }; /* EC turns on w/ lpddr_pwr_en */ - pp1800_lpddr: pp1800 { + pp1800_lpddr: regulator-pp1800 { }; /* EC turns on w/ pp1800_pmu_en_l */ - pp1800_pmu: pp1800 { + pp1800_pmu: regulator-pp1800 { }; /* EC turns on w/ pp1800_usb_en_l */ - pp1800_usb: pp1800 { + pp1800_usb: regulator-pp1800 { }; pp3000_sd_slot: regulator-pp3000-sd-slot { @@ -259,11 +259,11 @@ }; /* EC turns on w/ pp3300_trackpad_en_l */ - pp3300_trackpad: pp3300-trackpad { + pp3300_trackpad: regulator-pp3300-trackpad { }; /* EC turns on w/ usb_a_en */ - pp5000_usb_a_vbus: pp5000 { + pp5000_usb_a_vbus: regulator-pp5000 { }; ap_rtc_clk: ap-rtc-clk { -- GitLab From a1d939055a22be06d8c12bf53afb258b9d38575f Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Mon, 13 Jan 2025 18:47:34 +0800 Subject: [PATCH 0132/1585] arm64: dts: rockchip: Fix lcdpwr_en pin for Cool Pi GenBook According to the schematic, the lcdpwr_en pin is GPIO0_C4, not GPIO1_C4. Fixes: 4a8c1161b843 ("arm64: dts: rockchip: Add support for rk3588 based Cool Pi CM5 GenBook") Signed-off-by: Andy Yan Link: https://lore.kernel.org/r/20250113104825.2390427-1-andyshrk@163.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dts b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dts index 92f0ed83c9902..bc6b43a771537 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dts @@ -113,7 +113,7 @@ compatible = "regulator-fixed"; regulator-name = "vcc3v3_lcd"; enable-active-high; - gpio = <&gpio1 RK_PC4 GPIO_ACTIVE_HIGH>; + gpio = <&gpio0 RK_PC4 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&lcdpwr_en>; vin-supply = <&vcc3v3_sys>; @@ -241,7 +241,7 @@ &pinctrl { lcd { lcdpwr_en: lcdpwr-en { - rockchip,pins = <1 RK_PC4 RK_FUNC_GPIO &pcfg_pull_down>; + rockchip,pins = <0 RK_PC4 RK_FUNC_GPIO &pcfg_pull_down>; }; bl_en: bl-en { -- GitLab From 48e487b002891eb0aeaec704c9bed51f028deff1 Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Tue, 21 Jan 2025 20:00:07 +0000 Subject: [PATCH 0133/1585] HID: corsair-void: Add missing delayed work cancel for headset status The cancel_delayed_work_sync() call was missed, causing a use-after-free in corsair_void_remove(). Reported-by: yan kang Reported-by: yue sun Closes: https://lore.kernel.org/all/SY8P300MB042106286A2536707D2FB736A1E42@SY8P300MB0421.AUSP300.PROD.OUTLOOK.COM/ Closes: https://lore.kernel.org/all/SY8P300MB0421872E0AE934C9616FA61EA1E42@SY8P300MB0421.AUSP300.PROD.OUTLOOK.COM/ Fixes: 6ea2a6fd3872 ("HID: corsair-void: Add Corsair Void headset family driver") Cc: stable@vger.kernel.org Signed-off-by: Stuart Hayhurst Signed-off-by: Jiri Kosina --- drivers/hid/hid-corsair-void.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-corsair-void.c b/drivers/hid/hid-corsair-void.c index 6ece56b850fc0..bd8f3d849b58d 100644 --- a/drivers/hid/hid-corsair-void.c +++ b/drivers/hid/hid-corsair-void.c @@ -726,6 +726,7 @@ static void corsair_void_remove(struct hid_device *hid_dev) if (drvdata->battery) power_supply_unregister(drvdata->battery); + cancel_delayed_work_sync(&drvdata->delayed_status_work); cancel_delayed_work_sync(&drvdata->delayed_firmware_work); sysfs_remove_group(&hid_dev->dev.kobj, &corsair_void_attr_group); } -- GitLab From c098363828f7006ef5c5121b673bc5e26571e6c8 Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Tue, 21 Jan 2025 20:00:08 +0000 Subject: [PATCH 0134/1585] HID: corsair-void: Initialise memory for psy_cfg power_supply_config psy_cfg was missing its initialiser, add it in. Fixes: 6ea2a6fd3872 ("HID: corsair-void: Add Corsair Void headset family driver") Cc: stable@vger.kernel.org Signed-off-by: Stuart Hayhurst Signed-off-by: Jiri Kosina --- drivers/hid/hid-corsair-void.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-corsair-void.c b/drivers/hid/hid-corsair-void.c index bd8f3d849b58d..56e858066c3c3 100644 --- a/drivers/hid/hid-corsair-void.c +++ b/drivers/hid/hid-corsair-void.c @@ -553,7 +553,7 @@ static void corsair_void_battery_remove_work_handler(struct work_struct *work) static void corsair_void_battery_add_work_handler(struct work_struct *work) { struct corsair_void_drvdata *drvdata; - struct power_supply_config psy_cfg; + struct power_supply_config psy_cfg = {}; struct power_supply *new_supply; drvdata = container_of(work, struct corsair_void_drvdata, -- GitLab From 4b54ae69197b9f416baa0fceadff7e89075f8454 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 22 Jan 2025 09:29:00 +0800 Subject: [PATCH 0135/1585] HID: intel-ish-hid: fix the length of MNG_SYNC_FW_CLOCK in doorbell The timestamps in the Firmware log and HID sensor samples are incorrect. They show 1970-01-01 because the current IPC driver only uses the first 8 bytes of bootup time when synchronizing time with the firmware. The firmware converts the bootup time to UTC time, which results in the display of 1970-01-01. In write_ipc_from_queue(), when sending the MNG_SYNC_FW_CLOCK message, the clock is updated according to the definition of ipc_time_update_msg. However, in _ish_sync_fw_clock(), the message length is specified as the size of uint64_t when building the doorbell. As a result, the firmware only receives the first 8 bytes of struct ipc_time_update_msg. This patch corrects the length in the doorbell to ensure the entire ipc_time_update_msg is sent, fixing the timestamp issue. Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/ipc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index 3cd53fc80634a..cb956a8c386cb 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -578,14 +578,14 @@ static void fw_reset_work_fn(struct work_struct *work) static void _ish_sync_fw_clock(struct ishtp_device *dev) { static unsigned long prev_sync; - uint64_t usec; + struct ipc_time_update_msg time = {}; if (prev_sync && time_before(jiffies, prev_sync + 20 * HZ)) return; prev_sync = jiffies; - usec = ktime_to_us(ktime_get_boottime()); - ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &usec, sizeof(uint64_t)); + /* The fields of time would be updated while sending message */ + ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time)); } /** -- GitLab From 7e0d1cff12b895f44f4ddc8cf50311bc1f775201 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 22 Jan 2025 09:29:01 +0800 Subject: [PATCH 0136/1585] HID: intel-ish-hid: Send clock sync message immediately after reset The ISH driver performs a clock sync with the firmware once at system startup and then every 20 seconds. If a firmware reset occurs right after a clock sync, the driver would wait 20 seconds before performing another clock sync with the firmware. This is particularly problematic with the introduction of the "load firmware from host" feature, where the driver performs a clock sync with the bootloader and then has to wait 20 seconds before syncing with the main firmware. This patch clears prev_sync immediately upon receiving an IPC reset, so that the main firmware and driver will perform a clock sync immediately after completing the IPC handshake. Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/ipc.c | 9 ++++++--- drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index cb956a8c386cb..4c861119e97aa 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -517,6 +517,10 @@ static int ish_fw_reset_handler(struct ishtp_device *dev) /* ISH FW is dead */ if (!ish_is_input_ready(dev)) return -EPIPE; + + /* Send clock sync at once after reset */ + ishtp_dev->prev_sync = 0; + /* * Set HOST2ISH.ILUP. Apparently we need this BEFORE sending * RESET_NOTIFY_ACK - FW will be checking for it @@ -577,13 +581,12 @@ static void fw_reset_work_fn(struct work_struct *work) */ static void _ish_sync_fw_clock(struct ishtp_device *dev) { - static unsigned long prev_sync; struct ipc_time_update_msg time = {}; - if (prev_sync && time_before(jiffies, prev_sync + 20 * HZ)) + if (dev->prev_sync && time_before(jiffies, dev->prev_sync + 20 * HZ)) return; - prev_sync = jiffies; + dev->prev_sync = jiffies; /* The fields of time would be updated while sending message */ ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time)); } diff --git a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h index 44eddc411e97c..ec9f6e87aaf23 100644 --- a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h +++ b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h @@ -253,6 +253,8 @@ struct ishtp_device { unsigned int ipc_tx_cnt; unsigned long long ipc_tx_bytes_cnt; + /* Time of the last clock sync */ + unsigned long prev_sync; const struct ishtp_hw_ops *ops; size_t mtu; uint32_t ishtp_msg_hdr; -- GitLab From 52572cde8b4a44676557ccb67b035291833112c5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Jan 2025 07:50:57 +0100 Subject: [PATCH 0137/1585] HID: lenovo: select CONFIG_ACPI_PLATFORM_PROFILE A previous patch tried to fix this link failure: x86_64-linux-ld: drivers/hid/hid-lenovo.o: in function `lenovo_raw_event': hid-lenovo.c:(.text+0x22c): undefined reference to `platform_profile_cycle' but got it wrong in three ways: - the link failure still exists with CONFIG_ACPI_PLATFORM_PROFILE=m when hid-lenovo is built-in - There is no way to manually enable CONFIG_ACPI_PLATFORM_PROFILE, as it is intended to be selected by its users. Remove the broken #if check again and instead select the symbol like the other users do. This requires adding a dependency on CONFIG_ACPI. Fixes: 52e7d1f7c2fd ("HID: lenovo: Fix undefined platform_profile_cycle in ThinkPad X12 keyboard patch") Signed-off-by: Arnd Bergmann Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 2 ++ drivers/hid/hid-lenovo.c | 7 +------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index b53eb569bd495..8adb745c5b28c 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -570,6 +570,8 @@ config HID_LED config HID_LENOVO tristate "Lenovo / Thinkpad devices" + depends on ACPI + select ACPI_PLATFORM_PROFILE select NEW_LEDS select LEDS_CLASS help diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 4d00bc4d656e6..a7d9ca02779ea 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -32,9 +32,7 @@ #include #include -#if IS_ENABLED(CONFIG_ACPI_PLATFORM_PROFILE) #include -#endif /* CONFIG_ACPI_PLATFORM_PROFILE */ #include "hid-ids.h" @@ -730,13 +728,10 @@ static int lenovo_raw_event_TP_X12_tab(struct hid_device *hdev, u32 raw_data) if (hdev->product == USB_DEVICE_ID_LENOVO_X12_TAB) { report_key_event(input, KEY_RFKILL); return 1; - } -#if IS_ENABLED(CONFIG_ACPI_PLATFORM_PROFILE) - else { + } else { platform_profile_cycle(); return 1; } -#endif /* CONFIG_ACPI_PLATFORM_PROFILE */ return 0; case TP_X12_RAW_HOTKEY_FN_F10: /* TAB1 has PICKUP Phone and TAB2 use Snipping tool*/ -- GitLab From a5a056c8d2ba60017dffb914bdf92c5562defc48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 23 Jan 2025 14:48:12 +0100 Subject: [PATCH 0138/1585] HID: intel-thc: fix CONFIG_HID dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In drivers/hid/, most drivers depend on CONFIG_HID, while a couple of the drivers in subdirectories instead depend on CONFIG_HID_SUPPORT and use 'select HID'. With the newly added INTEL_THC_HID, this causes a build warning for a circular dependency: WARNING: unmet direct dependencies detected for HID Depends on [m]: HID_SUPPORT [=y] && INPUT [=m] Selected by [y]: - INTEL_THC_HID [=y] && HID_SUPPORT [=y] && X86_64 [=y] && PCI [=y] && ACPI [=y] WARNING: unmet direct dependencies detected for INPUT_FF_MEMLESS Depends on [m]: INPUT [=m] Selected by [y]: - HID_MICROSOFT [=y] && HID_SUPPORT [=y] && HID [=y] - GREENASIA_FF [=y] && HID_SUPPORT [=y] && HID [=y] && HID_GREENASIA [=y] - HID_WIIMOTE [=y] && HID_SUPPORT [=y] && HID [=y] && LEDS_CLASS [=y] - ZEROPLUS_FF [=y] && HID_SUPPORT [=y] && HID [=y] && HID_ZEROPLUS [=y] Selected by [m]: - HID_ACRUX_FF [=y] && HID_SUPPORT [=y] && HID [=y] && HID_ACRUX [=m] - HID_EMS_FF [=m] && HID_SUPPORT [=y] && HID [=y] - HID_GOOGLE_STADIA_FF [=m] && HID_SUPPORT [=y] && HID [=y] - PANTHERLORD_FF [=y] && HID_SUPPORT [=y] && HID [=y] && HID_PANTHERLORD [=m] It's better to be consistent and always use 'depends on HID' for HID drivers. The notable exception here is USB_KBD/USB_MOUSE, which are alternative implementations that do not depend on the HID subsystem. Do this by extending the "if HID" section below, which means that a few of the duplicate "depends on HID" and "depends on INPUT" statements can be removed in the process. Fixes: 1b2d05384c29 ("HID: intel-thc-hid: Add basic THC driver skeleton") Signed-off-by: Arnd Bergmann Reviewed-by: Ilpo Järvinen Reviewed-by: Maximilian Luz Reviewed-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 10 ++++++---- drivers/hid/amd-sfh-hid/Kconfig | 1 - drivers/hid/i2c-hid/Kconfig | 2 +- drivers/hid/intel-ish-hid/Kconfig | 1 - drivers/hid/intel-thc-hid/Kconfig | 1 - drivers/hid/surface-hid/Kconfig | 2 -- drivers/hid/usbhid/Kconfig | 3 +-- net/bluetooth/hidp/Kconfig | 3 +-- 8 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 8adb745c5b28c..ed657ef7281c8 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -1376,10 +1376,6 @@ endmenu source "drivers/hid/bpf/Kconfig" -endif # HID - -source "drivers/hid/usbhid/Kconfig" - source "drivers/hid/i2c-hid/Kconfig" source "drivers/hid/intel-ish-hid/Kconfig" @@ -1390,4 +1386,10 @@ source "drivers/hid/surface-hid/Kconfig" source "drivers/hid/intel-thc-hid/Kconfig" +endif # HID + +# USB support may be used with HID disabled + +source "drivers/hid/usbhid/Kconfig" + endif # HID_SUPPORT diff --git a/drivers/hid/amd-sfh-hid/Kconfig b/drivers/hid/amd-sfh-hid/Kconfig index 329de5e12c1a0..3291786a5ee6a 100644 --- a/drivers/hid/amd-sfh-hid/Kconfig +++ b/drivers/hid/amd-sfh-hid/Kconfig @@ -5,7 +5,6 @@ menu "AMD SFH HID Support" config AMD_SFH_HID tristate "AMD Sensor Fusion Hub" - depends on HID depends on X86 help If you say yes to this option, support will be included for the diff --git a/drivers/hid/i2c-hid/Kconfig b/drivers/hid/i2c-hid/Kconfig index ef7c595c9403c..e8d51f410cc12 100644 --- a/drivers/hid/i2c-hid/Kconfig +++ b/drivers/hid/i2c-hid/Kconfig @@ -2,7 +2,7 @@ menuconfig I2C_HID tristate "I2C HID support" default y - depends on I2C && INPUT && HID + depends on I2C if I2C_HID diff --git a/drivers/hid/intel-ish-hid/Kconfig b/drivers/hid/intel-ish-hid/Kconfig index 253dc10d35ef2..568c8688784e7 100644 --- a/drivers/hid/intel-ish-hid/Kconfig +++ b/drivers/hid/intel-ish-hid/Kconfig @@ -6,7 +6,6 @@ config INTEL_ISH_HID tristate "Intel Integrated Sensor Hub" default n depends on X86 - depends on HID help The Integrated Sensor Hub (ISH) enables the ability to offload sensor polling and algorithm processing to a dedicated low power diff --git a/drivers/hid/intel-thc-hid/Kconfig b/drivers/hid/intel-thc-hid/Kconfig index 91ec84902db8f..0351d11376072 100644 --- a/drivers/hid/intel-thc-hid/Kconfig +++ b/drivers/hid/intel-thc-hid/Kconfig @@ -7,7 +7,6 @@ menu "Intel THC HID Support" config INTEL_THC_HID tristate "Intel Touch Host Controller" depends on ACPI - select HID help THC (Touch Host Controller) is the name of the IP block in PCH that interfaces with Touch Devices (ex: touchscreen, touchpad etc.). It diff --git a/drivers/hid/surface-hid/Kconfig b/drivers/hid/surface-hid/Kconfig index 7ce9b5d641eb7..d0cfd0d299263 100644 --- a/drivers/hid/surface-hid/Kconfig +++ b/drivers/hid/surface-hid/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ menu "Surface System Aggregator Module HID support" depends on SURFACE_AGGREGATOR - depends on INPUT config SURFACE_HID tristate "HID transport driver for Surface System Aggregator Module" @@ -39,4 +38,3 @@ endmenu config SURFACE_HID_CORE tristate - select HID diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig index 7c2032f7f44de..f3194767a45eb 100644 --- a/drivers/hid/usbhid/Kconfig +++ b/drivers/hid/usbhid/Kconfig @@ -5,8 +5,7 @@ menu "USB HID support" config USB_HID tristate "USB HID transport layer" default y - depends on USB && INPUT - select HID + depends on HID help Say Y here if you want to connect USB keyboards, mice, joysticks, graphic tablets, or any other HID based devices diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig index 6746be07e2220..e08aae35351a7 100644 --- a/net/bluetooth/hidp/Kconfig +++ b/net/bluetooth/hidp/Kconfig @@ -1,8 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config BT_HIDP tristate "HIDP protocol support" - depends on BT_BREDR && INPUT && HID_SUPPORT - select HID + depends on BT_BREDR && HID help HIDP (Human Interface Device Protocol) is a transport layer for HID reports. HIDP is required for the Bluetooth Human -- GitLab From e0efe83ed325277bb70f9435d4d9fc70bebdcca8 Mon Sep 17 00:00:00 2001 From: Lenny Szubowicz Date: Thu, 30 Jan 2025 16:57:54 -0500 Subject: [PATCH 0139/1585] tg3: Disable tg3 PCIe AER on system reboot Disable PCIe AER on the tg3 device on system reboot on a limited list of Dell PowerEdge systems. This prevents a fatal PCIe AER event on the tg3 device during the ACPI _PTS (prepare to sleep) method for S5 on those systems. The _PTS is invoked by acpi_enter_sleep_state_prep() as part of the kernel's reboot sequence as a result of commit 38f34dba806a ("PM: ACPI: reboot: Reinstate S5 for reboot"). There was an earlier fix for this problem by commit 2ca1c94ce0b6 ("tg3: Disable tg3 device on system reboot to avoid triggering AER"). But it was discovered that this earlier fix caused a reboot hang when some Dell PowerEdge servers were booted via ipxe. To address this reboot hang, the earlier fix was essentially reverted by commit 9fc3bc764334 ("tg3: power down device only on SYSTEM_POWER_OFF"). This re-exposed the tg3 PCIe AER on reboot problem. This fix is not an ideal solution because the root cause of the AER is in system firmware. Instead, it's a targeted work-around in the tg3 driver. Note also that the PCIe AER must be disabled on the tg3 device even if the system is configured to use "firmware first" error handling. V3: - Fix sparse warning on improper comparison of pdev->current_state - Adhere to netdev comment style Fixes: 9fc3bc764334 ("tg3: power down device only on SYSTEM_POWER_OFF") Signed-off-by: Lenny Szubowicz Reviewed-by: Pavan Chebbi Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 58 +++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1c94bf1db7186..d9d675f1ebfe9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -18212,6 +18213,50 @@ static int tg3_resume(struct device *device) static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume); +/* Systems where ACPI _PTS (Prepare To Sleep) S5 will result in a fatal + * PCIe AER event on the tg3 device if the tg3 device is not, or cannot + * be, powered down. + */ +static const struct dmi_system_id tg3_restart_aer_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R440"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R540"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R640"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R650"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R740"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R750"), + }, + }, + {} +}; + static void tg3_shutdown(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); @@ -18228,6 +18273,19 @@ static void tg3_shutdown(struct pci_dev *pdev) if (system_state == SYSTEM_POWER_OFF) tg3_power_down(tp); + else if (system_state == SYSTEM_RESTART && + dmi_first_match(tg3_restart_aer_quirk_table) && + pdev->current_state != PCI_D3cold && + pdev->current_state != PCI_UNKNOWN) { + /* Disable PCIe AER on the tg3 to avoid a fatal + * error during this system restart. + */ + pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_CERE | + PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + } rtnl_unlock(); -- GitLab From 235174b2bed88501fda689c113c55737f99332d8 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Fri, 31 Jan 2025 00:31:39 -0800 Subject: [PATCH 0140/1585] udp: gso: do not drop small packets when PMTU reduces Commit 4094871db1d6 ("udp: only do GSO if # of segs > 1") avoided GSO for small packets. But the kernel currently dismisses GSO requests only after checking MTU/PMTU on gso_size. This means any packets, regardless of their payload sizes, could be dropped when PMTU becomes smaller than requested gso_size. We encountered this issue in production and it caused a reliability problem that new QUIC connection cannot be established before PMTU cache expired, while non GSO sockets still worked fine at the same time. Ideally, do not check any GSO related constraints when payload size is smaller than requested gso_size, and return EMSGSIZE instead of EINVAL on MTU/PMTU check failure to be more specific on the error cause. Fixes: 4094871db1d6 ("udp: only do GSO if # of segs > 1") Signed-off-by: Yan Zhai Suggested-by: Willem de Bruijn Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- tools/testing/selftests/net/udpgso.c | 26 ++++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c472c9a57cf68..a9bb9ce5438ea 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,9 +1141,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6671daa67f4fa..c6ea438b5c758 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1389,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3f2fca02fec53..36ff28af4b190 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -102,6 +102,19 @@ struct testcase testcases_v4[] = { .gso_len = CONST_MSS_V4, .r_num_mss = 1, }, + { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V4, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4 + 2, + .tfail = true, + }, { /* send a single MSS + 1B */ .tlen = CONST_MSS_V4 + 1, @@ -205,6 +218,19 @@ struct testcase testcases_v6[] = { .gso_len = CONST_MSS_V6, .r_num_mss = 1, }, + { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V6, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6 + 2, + .tfail = true + }, { /* send a single MSS + 1B */ .tlen = CONST_MSS_V6 + 1, -- GitLab From 363236d709e75610b628c2a4337ccbe42e454b6d Mon Sep 17 00:00:00 2001 From: "Chia-Lin Kao (AceLan)" Date: Wed, 15 Jan 2025 15:00:20 +0800 Subject: [PATCH 0141/1585] HID: ignore non-functional sensor in HP 5MP Camera The HP 5MP Camera (USB ID 0408:5473) reports a HID sensor interface that is not actually implemented. Attempting to access this non-functional sensor via iio_info causes system hangs as runtime PM tries to wake up an unresponsive sensor. [453] hid-sensor-hub 0003:0408:5473.0003: Report latency attributes: ffffffff:ffffffff [453] hid-sensor-hub 0003:0408:5473.0003: common attributes: 5:1, 2:1, 3:1 ffffffff:ffffffff Add this device to the HID ignore list since the sensor interface is non-functional by design and should not be exposed to userspace. Signed-off-by: Chia-Lin Kao (AceLan) Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index c448de53bf91e..7debfe0c5cb98 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1095,6 +1095,7 @@ #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001 0x3001 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3003 0x3003 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008 0x3008 +#define USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473 0x5473 #define I2C_VENDOR_ID_RAYDIUM 0x2386 #define I2C_PRODUCT_ID_RAYDIUM_4B33 0x4b33 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index e0bbf0c6345d6..5d7a418ccdbec 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -891,6 +891,7 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DPAD) }, #endif { HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473) }, { } }; -- GitLab From 05c4ede6951b5d8e083b6bb237950cac59bdeb92 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Wed, 15 Jan 2025 17:28:16 -0800 Subject: [PATCH 0142/1585] HID: hid-steam: Fix issues with disabling both gamepad mode and lizard mode When lizard mode is disabled, there were two issues: 1. Switching between gamepad mode and desktop mode still functioned, even though desktop mode did not. This lead to the ability to "break" gamepad mode by holding down the Options key even while lizard mode is disabled 2. If you were in desktop mode when lizard mode is disabled, you would immediately enter this faulty mode. This patch properly disables the ability to switch between gamepad mode and the faulty desktop mode by holding the Options key, as well as effectively removing the faulty mode by bypassing the early returns if lizard mode is disabled. Reported-by: Eugeny Shcheglov Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index af38fc8eb34fd..b008fd0834b94 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1050,10 +1050,10 @@ static void steam_mode_switch_cb(struct work_struct *work) struct steam_device, mode_switch); unsigned long flags; bool client_opened; - steam->gamepad_mode = !steam->gamepad_mode; if (!lizard_mode) return; + steam->gamepad_mode = !steam->gamepad_mode; if (steam->gamepad_mode) steam_set_lizard_mode(steam, false); else { @@ -1599,7 +1599,7 @@ static void steam_do_deck_input_event(struct steam_device *steam, schedule_delayed_work(&steam->mode_switch, 45 * HZ / 100); } - if (!steam->gamepad_mode) + if (!steam->gamepad_mode && lizard_mode) return; lpad_touched = b10 & BIT(3); @@ -1669,7 +1669,7 @@ static void steam_do_deck_sensors_event(struct steam_device *steam, */ steam->sensor_timestamp_us += 4000; - if (!steam->gamepad_mode) + if (!steam->gamepad_mode && lizard_mode) return; input_event(sensors, EV_MSC, MSC_TIMESTAMP, steam->sensor_timestamp_us); -- GitLab From 3fb3cb4350befc4f901c54e0cb4a2a47b1302e08 Mon Sep 17 00:00:00 2001 From: Andrey Vatoropin Date: Thu, 30 Jan 2025 09:00:34 +0000 Subject: [PATCH 0143/1585] power: supply: da9150-fg: fix potential overflow Size of variable sd_gain equals four bytes - DA9150_QIF_SD_GAIN_SIZE. Size of variable shunt_val equals two bytes - DA9150_QIF_SHUNT_VAL_SIZE. The expression sd_gain * shunt_val is currently being evaluated using 32-bit arithmetic. So during the multiplication an overflow may occur. As the value of type 'u64' is used as storage for the eventual result, put ULL variable at the first position of each expression in order to give the compiler complete information about the proper arithmetic to use. According to C99 the guaranteed width for a variable of type 'unsigned long long' >= 64 bits. Remove the explicit cast to u64 as it is meaningless. Just for the sake of consistency, perform the similar trick with another expression concerning 'iavg'. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: a419b4fd9138 ("power: Add support for DA9150 Fuel-Gauge") Signed-off-by: Andrey Vatoropin Link: https://lore.kernel.org/r/20250130090030.53422-1-a.vatoropin@crpt.ru Signed-off-by: Sebastian Reichel --- drivers/power/supply/da9150-fg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/da9150-fg.c b/drivers/power/supply/da9150-fg.c index 652c1f213af1c..4f28ef1bba1a3 100644 --- a/drivers/power/supply/da9150-fg.c +++ b/drivers/power/supply/da9150-fg.c @@ -247,9 +247,9 @@ static int da9150_fg_current_avg(struct da9150_fg *fg, DA9150_QIF_SD_GAIN_SIZE); da9150_fg_read_sync_end(fg); - div = (u64) (sd_gain * shunt_val * 65536ULL); + div = 65536ULL * sd_gain * shunt_val; do_div(div, 1000000); - res = (u64) (iavg * 1000000ULL); + res = 1000000ULL * iavg; do_div(res, div); val->intval = (int) res; -- GitLab From 64dd6edfc421479e416301c48b79cece8d0351fc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 30 Jan 2025 15:00:35 +0100 Subject: [PATCH 0144/1585] power: supply: core: Fix extension related lockdep warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 6037802bbae8 ("power: supply: core: implement extension API") there is the following ABBA deadlock (simplified) between the LED trigger code and the power-supply code: 1) When registering a power-supply class device, power_supply_register() calls led_trigger_register() from power_supply_create_triggers() in a scoped_guard(rwsem_read, &psy->extensions_sem) context. led_trigger_register() then in turn takes a LED subsystem lock. So here we have the following locking order: * Read-lock extensions_sem * Lock LED subsystem lock(s) 2) When registering a LED class device, with its default trigger set to a power-supply LED trigger (which has already been registered) The LED class code calls power_supply_led_trigger_activate() when setting up the default trigger. power_supply_led_trigger_activate() calls power_supply_get_property() to determine the initial value of to assign to the LED and that read-locks extensions_sem. So now we have the following locking order: * Lock LED subsystem lock(s) * Read-lock extensions_sem Fixing this is easy, there is no need to hold the extensions_sem when calling power_supply_create_triggers() since all triggers are always created rather then checking for the presence of certain attributes as power_supply_add_hwmon_sysfs() does. Move power_supply_create_triggers() out of the guard block to fix this. Here is the lockdep report fixed by this change: [ 31.249343] ====================================================== [ 31.249378] WARNING: possible circular locking dependency detected [ 31.249413] 6.13.0-rc6+ #251 Tainted: G C E [ 31.249440] ------------------------------------------------------ [ 31.249471] (udev-worker)/553 is trying to acquire lock: [ 31.249501] ffff892adbcaf660 (&psy->extensions_sem){.+.+}-{4:4}, at: power_supply_get_property.part.0+0x22/0x150 [ 31.249574] but task is already holding lock: [ 31.249603] ffff892adbc0bad0 (&led_cdev->trigger_lock){+.+.}-{4:4}, at: led_trigger_set_default+0x34/0xe0 [ 31.249657] which lock already depends on the new lock. [ 31.249696] the existing dependency chain (in reverse order) is: [ 31.249735] -> #2 (&led_cdev->trigger_lock){+.+.}-{4:4}: [ 31.249778] down_write+0x3b/0xd0 [ 31.249803] led_trigger_set_default+0x34/0xe0 [ 31.249833] led_classdev_register_ext+0x311/0x3a0 [ 31.249863] input_leds_connect+0x1dc/0x2a0 [ 31.249889] input_attach_handler.isra.0+0x75/0x90 [ 31.249921] input_register_device.cold+0xa1/0x150 [ 31.249955] hidinput_connect+0x8a2/0xb80 [ 31.249982] hid_connect+0x582/0x5c0 [ 31.250007] hid_hw_start+0x3f/0x60 [ 31.250030] hid_device_probe+0x122/0x1f0 [ 31.250053] really_probe+0xde/0x340 [ 31.250080] __driver_probe_device+0x78/0x110 [ 31.250105] driver_probe_device+0x1f/0xa0 [ 31.250132] __device_attach_driver+0x85/0x110 [ 31.250160] bus_for_each_drv+0x78/0xc0 [ 31.250184] __device_attach+0xb0/0x1b0 [ 31.250207] bus_probe_device+0x94/0xb0 [ 31.250230] device_add+0x64a/0x860 [ 31.250252] hid_add_device+0xe5/0x240 [ 31.250279] usbhid_probe+0x4dc/0x620 [ 31.250303] usb_probe_interface+0xe4/0x2a0 [ 31.250329] really_probe+0xde/0x340 [ 31.250353] __driver_probe_device+0x78/0x110 [ 31.250377] driver_probe_device+0x1f/0xa0 [ 31.250404] __device_attach_driver+0x85/0x110 [ 31.250431] bus_for_each_drv+0x78/0xc0 [ 31.250455] __device_attach+0xb0/0x1b0 [ 31.250478] bus_probe_device+0x94/0xb0 [ 31.250501] device_add+0x64a/0x860 [ 31.250523] usb_set_configuration+0x606/0x8a0 [ 31.250552] usb_generic_driver_probe+0x3e/0x60 [ 31.250579] usb_probe_device+0x3d/0x120 [ 31.250605] really_probe+0xde/0x340 [ 31.250629] __driver_probe_device+0x78/0x110 [ 31.250653] driver_probe_device+0x1f/0xa0 [ 31.250680] __device_attach_driver+0x85/0x110 [ 31.250707] bus_for_each_drv+0x78/0xc0 [ 31.250731] __device_attach+0xb0/0x1b0 [ 31.250753] bus_probe_device+0x94/0xb0 [ 31.250776] device_add+0x64a/0x860 [ 31.250798] usb_new_device.cold+0x141/0x38f [ 31.250828] hub_event+0x1166/0x1980 [ 31.250854] process_one_work+0x20f/0x580 [ 31.250879] worker_thread+0x1d1/0x3b0 [ 31.250904] kthread+0xee/0x120 [ 31.250926] ret_from_fork+0x30/0x50 [ 31.250954] ret_from_fork_asm+0x1a/0x30 [ 31.250982] -> #1 (triggers_list_lock){++++}-{4:4}: [ 31.251022] down_write+0x3b/0xd0 [ 31.251045] led_trigger_register+0x40/0x1b0 [ 31.251074] power_supply_register_led_trigger+0x88/0x150 [ 31.251107] power_supply_create_triggers+0x55/0xe0 [ 31.251135] __power_supply_register.part.0+0x34e/0x4a0 [ 31.251164] devm_power_supply_register+0x70/0xc0 [ 31.251190] bq27xxx_battery_setup+0x1a1/0x6d0 [bq27xxx_battery] [ 31.251235] bq27xxx_battery_i2c_probe+0xe5/0x17f [bq27xxx_battery_i2c] [ 31.251272] i2c_device_probe+0x125/0x2b0 [ 31.251299] really_probe+0xde/0x340 [ 31.251324] __driver_probe_device+0x78/0x110 [ 31.251348] driver_probe_device+0x1f/0xa0 [ 31.251375] __driver_attach+0xba/0x1c0 [ 31.251398] bus_for_each_dev+0x6b/0xb0 [ 31.251421] bus_add_driver+0x111/0x1f0 [ 31.251445] driver_register+0x6e/0xc0 [ 31.251470] i2c_register_driver+0x41/0xb0 [ 31.251498] do_one_initcall+0x5e/0x3a0 [ 31.251522] do_init_module+0x60/0x220 [ 31.251550] __do_sys_init_module+0x15f/0x190 [ 31.251575] do_syscall_64+0x93/0x180 [ 31.251598] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 31.251629] -> #0 (&psy->extensions_sem){.+.+}-{4:4}: [ 31.251668] __lock_acquire+0x13ce/0x21c0 [ 31.251694] lock_acquire+0xcf/0x2e0 [ 31.251719] down_read+0x3e/0x170 [ 31.251741] power_supply_get_property.part.0+0x22/0x150 [ 31.251774] power_supply_update_leds+0x8d/0x230 [ 31.251804] power_supply_led_trigger_activate+0x18/0x20 [ 31.251837] led_trigger_set+0x1fc/0x300 [ 31.251863] led_trigger_set_default+0x90/0xe0 [ 31.251892] led_classdev_register_ext+0x311/0x3a0 [ 31.251921] devm_led_classdev_multicolor_register_ext+0x6e/0xb80 [led_class_multicolor] [ 31.251969] ktd202x_probe+0x464/0x5c0 [leds_ktd202x] [ 31.252002] i2c_device_probe+0x125/0x2b0 [ 31.252027] really_probe+0xde/0x340 [ 31.252052] __driver_probe_device+0x78/0x110 [ 31.252076] driver_probe_device+0x1f/0xa0 [ 31.252103] __driver_attach+0xba/0x1c0 [ 31.252125] bus_for_each_dev+0x6b/0xb0 [ 31.252148] bus_add_driver+0x111/0x1f0 [ 31.252172] driver_register+0x6e/0xc0 [ 31.252197] i2c_register_driver+0x41/0xb0 [ 31.252225] do_one_initcall+0x5e/0x3a0 [ 31.252248] do_init_module+0x60/0x220 [ 31.252274] __do_sys_init_module+0x15f/0x190 [ 31.253986] do_syscall_64+0x93/0x180 [ 31.255826] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 31.257614] other info that might help us debug this: [ 31.257619] Chain exists of: &psy->extensions_sem --> triggers_list_lock --> &led_cdev->trigger_lock [ 31.257630] Possible unsafe locking scenario: [ 31.257632] CPU0 CPU1 [ 31.257633] ---- ---- [ 31.257634] lock(&led_cdev->trigger_lock); [ 31.257637] lock(triggers_list_lock); [ 31.257640] lock(&led_cdev->trigger_lock); [ 31.257643] rlock(&psy->extensions_sem); [ 31.257646] *** DEADLOCK *** [ 31.289433] 4 locks held by (udev-worker)/553: [ 31.289443] #0: ffff892ad9658108 (&dev->mutex){....}-{4:4}, at: __driver_attach+0xaf/0x1c0 [ 31.289463] #1: ffff892adbc0bbc8 (&led_cdev->led_access){+.+.}-{4:4}, at: led_classdev_register_ext+0x1c7/0x3a0 [ 31.289476] #2: ffffffffad0e30b0 (triggers_list_lock){++++}-{4:4}, at: led_trigger_set_default+0x2c/0xe0 [ 31.289487] #3: ffff892adbc0bad0 (&led_cdev->trigger_lock){+.+.}-{4:4}, at: led_trigger_set_default+0x34/0xe0 Fixes: 6037802bbae8 ("power: supply: core: implement extension API") Cc: Thomas Weißschuh Cc: Armin Wolf Signed-off-by: Hans de Goede Reviewed-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250130140035.20636-1-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index d0bb52a7a0367..76c340b38015a 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1592,11 +1592,11 @@ __power_supply_register(struct device *parent, if (rc) goto register_thermal_failed; - scoped_guard(rwsem_read, &psy->extensions_sem) { - rc = power_supply_create_triggers(psy); - if (rc) - goto create_triggers_failed; + rc = power_supply_create_triggers(psy); + if (rc) + goto create_triggers_failed; + scoped_guard(rwsem_read, &psy->extensions_sem) { rc = power_supply_add_hwmon_sysfs(psy); if (rc) goto add_hwmon_sysfs_failed; -- GitLab From d97505baea64d93538b16baf14ce7b8c1fbad746 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 19 Jan 2025 14:36:13 +0200 Subject: [PATCH 0145/1585] RDMA/mlx5: Fix the recovery flow of the UMR QP This patch addresses an issue in the recovery flow of the UMR QP, ensuring tasks do not get stuck, as highlighted by the call trace [1]. During recovery, before transitioning the QP to the RESET state, the software must wait for all outstanding WRs to complete. Failing to do so can cause the firmware to skip sending some flushed CQEs with errors and simply discard them upon the RESET, as per the IB specification. This race condition can result in lost CQEs and tasks becoming stuck. To resolve this, the patch sends a final WR which serves only as a barrier before moving the QP state to RESET. Once a CQE is received for that final WR, it guarantees that no outstanding WRs remain, making it safe to transition the QP to RESET and subsequently back to RTS, restoring proper functionality. Note: For the barrier WR, we simply reuse the failed and ready WR. Since the QP is in an error state, it will only receive IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier we don't care about its status. [1] INFO: task rdma_resource_l:1922 blocked for more than 120 seconds. Tainted: G W 6.12.0-rc7+ #1626 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:rdma_resource_l state:D stack:0 pid:1922 tgid:1922 ppid:1369 flags:0x00004004 Call Trace: __schedule+0x420/0xd30 schedule+0x47/0x130 schedule_timeout+0x280/0x300 ? mark_held_locks+0x48/0x80 ? lockdep_hardirqs_on_prepare+0xe5/0x1a0 wait_for_completion+0x75/0x130 mlx5r_umr_post_send_wait+0x3c2/0x5b0 [mlx5_ib] ? __pfx_mlx5r_umr_done+0x10/0x10 [mlx5_ib] mlx5r_umr_revoke_mr+0x93/0xc0 [mlx5_ib] __mlx5_ib_dereg_mr+0x299/0x520 [mlx5_ib] ? _raw_spin_unlock_irq+0x24/0x40 ? wait_for_completion+0xfe/0x130 ? rdma_restrack_put+0x63/0xe0 [ib_core] ib_dereg_mr_user+0x5f/0x120 [ib_core] ? lock_release+0xc6/0x280 destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs] uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs] uobj_destroy+0x3f/0x70 [ib_uverbs] ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs] ? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs] ? __lock_acquire+0x64e/0x2080 ? mark_held_locks+0x48/0x80 ? find_held_lock+0x2d/0xa0 ? lock_acquire+0xc1/0x2f0 ? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] ? __fget_files+0xc3/0x1b0 ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs] ? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] __x64_sys_ioctl+0x1b0/0xa70 do_syscall_64+0x6b/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f99c918b17b RSP: 002b:00007ffc766d0468 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffc766d0578 RCX: 00007f99c918b17b RDX: 00007ffc766d0560 RSI: 00000000c0181b01 RDI: 0000000000000003 RBP: 00007ffc766d0540 R08: 00007f99c8f99010 R09: 000000000000bd7e R10: 00007f99c94c1c70 R11: 0000000000000246 R12: 00007ffc766d0530 R13: 000000000000001c R14: 0000000040246a80 R15: 0000000000000000 Fixes: 158e71bb69e3 ("RDMA/mlx5: Add a umr recovery flow") Signed-off-by: Yishai Hadas Reviewed-by: Michael Guralnik Link: https://patch.msgid.link/27b51b92ec42dfb09d8096fcbd51878f397ce6ec.1737290141.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/umr.c | 83 +++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 887fd6fa3ba93..793f3c5c4d012 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -231,30 +231,6 @@ void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev) ib_dealloc_pd(dev->umrc.pd); } -static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) -{ - struct umr_common *umrc = &dev->umrc; - struct ib_qp_attr attr; - int err; - - attr.qp_state = IB_QPS_RESET; - err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); - if (err) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto err; - } - - err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); - if (err) - goto err; - - umrc->state = MLX5_UMR_STATE_ACTIVE; - return 0; - -err: - umrc->state = MLX5_UMR_STATE_ERR; - return err; -} static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, struct mlx5r_umr_wqe *wqe, bool with_data) @@ -302,6 +278,61 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, return err; } +static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey, + struct mlx5r_umr_context *umr_context, + struct mlx5r_umr_wqe *wqe, bool with_data) +{ + struct umr_common *umrc = &dev->umrc; + struct ib_qp_attr attr; + int err; + + mutex_lock(&umrc->lock); + /* Preventing any further WRs to be sent now */ + if (umrc->state != MLX5_UMR_STATE_RECOVER) { + mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n", + umrc->state); + umrc->state = MLX5_UMR_STATE_RECOVER; + } + mutex_unlock(&umrc->lock); + + /* Sending a final/barrier WR (the failed one) and wait for its completion. + * This will ensure that all the previous WRs got a completion before + * we set the QP state to RESET. + */ + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe, + with_data); + if (err) { + mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err); + goto err; + } + + /* Since the QP is in an error state, it will only receive + * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier + * we don't care about its status. + */ + wait_for_completion(&umr_context->done); + + attr.qp_state = IB_QPS_RESET; + err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); + if (err) { + mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err); + goto err; + } + + err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); + if (err) { + mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err); + goto err; + } + + umrc->state = MLX5_UMR_STATE_ACTIVE; + return 0; + +err: + umrc->state = MLX5_UMR_STATE_ERR; + return err; +} + static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -366,9 +397,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, mlx5_ib_warn(dev, "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n", umr_context.status, mkey); - mutex_lock(&umrc->lock); - err = mlx5r_umr_recover(dev); - mutex_unlock(&umrc->lock); + err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data); if (err) mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", err); -- GitLab From 12d044770e12c4205fa69535b4fa8a9981fea98f Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 19 Jan 2025 14:39:46 +0200 Subject: [PATCH 0146/1585] IB/mlx5: Set and get correct qp_num for a DCT QP When a DCT QP is created on an active lag, it's dctc.port is assigned in a round-robin way, which is from 1 to dev->lag_port. In this case when querying this QP, we may get qp_attr.port_num > 2. Fix this by setting qp->port when modifying a DCT QP, and read port_num from qp->port instead of dctc.port when querying it. Fixes: 7c4b1ab9f167 ("IB/mlx5: Add DCT RoCE LAG support") Signed-off-by: Mark Zhang Reviewed-by: Maher Sanalla Link: https://patch.msgid.link/94c76bf0adbea997f87ffa27674e0a7118ad92a9.1737290358.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a43eba9d3572c..08d22db8dca91 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4579,6 +4579,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); MLX5_SET(dctc, dctc, counter_set_id, set_id); + + qp->port = attr->port_num; } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {}; @@ -5074,7 +5076,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp, } if (qp_attr_mask & IB_QP_PORT) - qp_attr->port_num = MLX5_GET(dctc, dctc, port); + qp_attr->port_num = mqp->port; if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak); if (qp_attr_mask & IB_QP_AV) { -- GitLab From 98380110bd48fbfd6a798ee11fffff893d36062c Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Fri, 31 Jan 2025 17:14:51 -0600 Subject: [PATCH 0147/1585] power: supply: axp20x_battery: Fix fault handling for AXP717 Correct the fault handling for the AXP717 by changing the i2c write from regmap_update_bits() to regmap_write_bits(). The update bits function does not work properly on a RW1C register where we must write a 1 back to an existing register to clear it. Additionally, as part of this testing I confirmed the behavior of errors reappearing, so remove comment about assumptions. Fixes: 6625767049c2 ("power: supply: axp20x_battery: add support for AXP717") Signed-off-by: Chris Morgan Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20250131231455.153447-2-macroalpha82@gmail.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/axp20x_battery.c | 31 +++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c index fa27195f074e7..3c3158f31a484 100644 --- a/drivers/power/supply/axp20x_battery.c +++ b/drivers/power/supply/axp20x_battery.c @@ -466,10 +466,9 @@ static int axp717_battery_get_prop(struct power_supply *psy, /* * If a fault is detected it must also be cleared; if the - * condition persists it should reappear (This is an - * assumption, it's actually not documented). A restart was - * not sufficient to clear the bit in testing despite the - * register listed as POR. + * condition persists it should reappear. A restart was not + * sufficient to clear the bit in testing despite the register + * listed as POR. */ case POWER_SUPPLY_PROP_HEALTH: ret = regmap_read(axp20x_batt->regmap, AXP717_PMU_FAULT, @@ -480,26 +479,26 @@ static int axp717_battery_get_prop(struct power_supply *psy, switch (reg & AXP717_BATT_PMU_FAULT_MASK) { case AXP717_BATT_UVLO_2_5V: val->intval = POWER_SUPPLY_HEALTH_DEAD; - regmap_update_bits(axp20x_batt->regmap, - AXP717_PMU_FAULT, - AXP717_BATT_UVLO_2_5V, - AXP717_BATT_UVLO_2_5V); + regmap_write_bits(axp20x_batt->regmap, + AXP717_PMU_FAULT, + AXP717_BATT_UVLO_2_5V, + AXP717_BATT_UVLO_2_5V); return 0; case AXP717_BATT_OVER_TEMP: val->intval = POWER_SUPPLY_HEALTH_HOT; - regmap_update_bits(axp20x_batt->regmap, - AXP717_PMU_FAULT, - AXP717_BATT_OVER_TEMP, - AXP717_BATT_OVER_TEMP); + regmap_write_bits(axp20x_batt->regmap, + AXP717_PMU_FAULT, + AXP717_BATT_OVER_TEMP, + AXP717_BATT_OVER_TEMP); return 0; case AXP717_BATT_UNDER_TEMP: val->intval = POWER_SUPPLY_HEALTH_COLD; - regmap_update_bits(axp20x_batt->regmap, - AXP717_PMU_FAULT, - AXP717_BATT_UNDER_TEMP, - AXP717_BATT_UNDER_TEMP); + regmap_write_bits(axp20x_batt->regmap, + AXP717_PMU_FAULT, + AXP717_BATT_UNDER_TEMP, + AXP717_BATT_UNDER_TEMP); return 0; default: -- GitLab From ac5a41b472b4ef8bb37d7550796d059b377b4646 Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Mon, 27 Jan 2025 21:12:02 +0100 Subject: [PATCH 0148/1585] Revert "mmc: sdhci_am654: Add sdhci_am654_start_signal_voltage_switch" This reverts commit 941a7abd4666912b84ab209396fdb54b0dae685d. This commit uses presence of device-tree properties vmmc-supply and vqmmc-supply for deciding whether to enable a quirk affecting timing of clock and data. The intention was to address issues observed with eMMC and SD on AM62 platforms. This new quirk is however also enabled for AM64 breaking microSD access on the SolidRun HimmingBoard-T which is supported in-tree since v6.11, causing a regression. During boot microSD initialization now fails with the error below: [ 2.008520] mmc1: SDHCI controller on fa00000.mmc [fa00000.mmc] using ADMA 64-bit [ 2.115348] mmc1: error -110 whilst initialising SD card The heuristics for enabling the quirk are clearly not correct as they break at least one but potentially many existing boards. Revert the change and restore original behaviour until a more appropriate method of selecting the quirk is derived. Fixes: 941a7abd4666 ("mmc: sdhci_am654: Add sdhci_am654_start_signal_voltage_switch") Closes: https://lore.kernel.org/linux-mmc/a70fc9fc-186f-4165-a652-3de50733763a@solid-run.com/ Cc: stable@vger.kernel.org Signed-off-by: Josua Mayer Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250127-am654-mmc-regression-v2-1-9bb39fb12810@solid-run.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci_am654.c | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index b73f673db92bb..f75c31815ab00 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -155,7 +155,6 @@ struct sdhci_am654_data { u32 tuning_loop; #define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) -#define SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA BIT(1) }; struct window { @@ -357,29 +356,6 @@ static void sdhci_j721e_4bit_set_clock(struct sdhci_host *host, sdhci_set_clock(host, clock); } -static int sdhci_am654_start_signal_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) -{ - struct sdhci_host *host = mmc_priv(mmc); - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); - int ret; - - if ((sdhci_am654->quirks & SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA) && - ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) { - if (!IS_ERR(mmc->supply.vqmmc)) { - ret = mmc_regulator_set_vqmmc(mmc, ios); - if (ret < 0) { - pr_err("%s: Switching to 1.8V signalling voltage failed,\n", - mmc_hostname(mmc)); - return -EIO; - } - } - return 0; - } - - return sdhci_start_signal_voltage_switch(mmc, ios); -} - static u8 sdhci_am654_write_power_on(struct sdhci_host *host, u8 val, int reg) { writeb(val, host->ioaddr + reg); @@ -868,11 +844,6 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev, if (device_property_read_bool(dev, "ti,fails-without-test-cd")) sdhci_am654->quirks |= SDHCI_AM654_QUIRK_FORCE_CDTEST; - /* Suppress v1p8 ena for eMMC and SD with vqmmc supply */ - if (!!of_parse_phandle(dev->of_node, "vmmc-supply", 0) == - !!of_parse_phandle(dev->of_node, "vqmmc-supply", 0)) - sdhci_am654->quirks |= SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA; - sdhci_get_of_property(pdev); return 0; @@ -969,7 +940,6 @@ static int sdhci_am654_probe(struct platform_device *pdev) goto err_pltfm_free; } - host->mmc_host_ops.start_signal_voltage_switch = sdhci_am654_start_signal_voltage_switch; host->mmc_host_ops.execute_tuning = sdhci_am654_execute_tuning; pm_runtime_get_noresume(dev); -- GitLab From 3e68abf2b9cebe76c6cd4b1aca8e95cd671035a3 Mon Sep 17 00:00:00 2001 From: Andy-ld Lu Date: Thu, 23 Jan 2025 17:26:01 +0800 Subject: [PATCH 0149/1585] mmc: mtk-sd: Fix register settings for hs400(es) mode For hs400(es) mode, the 'hs400-ds-delay' is typically configured in the dts. However, some projects may only define 'mediatek,hs400-ds-dly3', which can lead to initialization failures in hs400es mode. CMD13 reported response crc error in the mmc_switch_status() just after switching to hs400es mode. [ 1.914038][ T82] mmc0: mmc_select_hs400es failed, error -84 [ 1.914954][ T82] mmc0: error -84 whilst initialising MMC card Currently, the hs400_ds_dly3 value is set within the tuning function. This means that the PAD_DS_DLY3 field is not configured before tuning process, which is the reason for the above-mentioned CMD13 response crc error. Move the PAD_DS_DLY3 field configuration into msdc_prepare_hs400_tuning(), and add a value check of hs400_ds_delay to prevent overwriting by zero when the 'hs400-ds-delay' is not set in the dts. In addition, since hs400(es) only tune the PAD_DS_DLY1, the PAD_DS_DLY2_SEL bit should be cleared to bypass it. Fixes: c4ac38c6539b ("mmc: mtk-sd: Add HS400 online tuning support") Signed-off-by: Andy-ld Lu Reviewed-by: AngeloGioacchino Del Regno Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250123092644.7359-1-andy-ld.lu@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 4b6e913725260..345ea91629e0f 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -273,6 +273,7 @@ #define MSDC_PAD_TUNE_CMD2_SEL BIT(21) /* RW */ #define PAD_DS_TUNE_DLY_SEL BIT(0) /* RW */ +#define PAD_DS_TUNE_DLY2_SEL BIT(1) /* RW */ #define PAD_DS_TUNE_DLY1 GENMASK(6, 2) /* RW */ #define PAD_DS_TUNE_DLY2 GENMASK(11, 7) /* RW */ #define PAD_DS_TUNE_DLY3 GENMASK(16, 12) /* RW */ @@ -318,6 +319,7 @@ /* EMMC50_PAD_DS_TUNE mask */ #define PAD_DS_DLY_SEL BIT(16) /* RW */ +#define PAD_DS_DLY2_SEL BIT(15) /* RW */ #define PAD_DS_DLY1 GENMASK(14, 10) /* RW */ #define PAD_DS_DLY3 GENMASK(4, 0) /* RW */ @@ -2504,13 +2506,23 @@ static int msdc_execute_tuning(struct mmc_host *mmc, u32 opcode) static int msdc_prepare_hs400_tuning(struct mmc_host *mmc, struct mmc_ios *ios) { struct msdc_host *host = mmc_priv(mmc); + host->hs400_mode = true; - if (host->top_base) - writel(host->hs400_ds_delay, - host->top_base + EMMC50_PAD_DS_TUNE); - else - writel(host->hs400_ds_delay, host->base + PAD_DS_TUNE); + if (host->top_base) { + if (host->hs400_ds_dly3) + sdr_set_field(host->top_base + EMMC50_PAD_DS_TUNE, + PAD_DS_DLY3, host->hs400_ds_dly3); + if (host->hs400_ds_delay) + writel(host->hs400_ds_delay, + host->top_base + EMMC50_PAD_DS_TUNE); + } else { + if (host->hs400_ds_dly3) + sdr_set_field(host->base + PAD_DS_TUNE, + PAD_DS_TUNE_DLY3, host->hs400_ds_dly3); + if (host->hs400_ds_delay) + writel(host->hs400_ds_delay, host->base + PAD_DS_TUNE); + } /* hs400 mode must set it to 0 */ sdr_clr_bits(host->base + MSDC_PATCH_BIT2, MSDC_PATCH_BIT2_CFGCRCSTS); /* to improve read performance, set outstanding to 2 */ @@ -2530,14 +2542,11 @@ static int msdc_execute_hs400_tuning(struct mmc_host *mmc, struct mmc_card *card if (host->top_base) { sdr_set_bits(host->top_base + EMMC50_PAD_DS_TUNE, PAD_DS_DLY_SEL); - if (host->hs400_ds_dly3) - sdr_set_field(host->top_base + EMMC50_PAD_DS_TUNE, - PAD_DS_DLY3, host->hs400_ds_dly3); + sdr_clr_bits(host->top_base + EMMC50_PAD_DS_TUNE, + PAD_DS_DLY2_SEL); } else { sdr_set_bits(host->base + PAD_DS_TUNE, PAD_DS_TUNE_DLY_SEL); - if (host->hs400_ds_dly3) - sdr_set_field(host->base + PAD_DS_TUNE, - PAD_DS_TUNE_DLY3, host->hs400_ds_dly3); + sdr_clr_bits(host->base + PAD_DS_TUNE, PAD_DS_TUNE_DLY2_SEL); } host->hs400_tuning = true; -- GitLab From c9ccb88f534ca760d06590b67571c353a2f0cbcd Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Sat, 1 Feb 2025 12:43:24 +0100 Subject: [PATCH 0150/1585] Input: ads7846 - fix gpiod allocation commit 767d83361aaa ("Input: ads7846 - Convert to use software nodes") has simplified the code but accidentially converted a devm_gpiod_get() to gpiod_get(). This leaves the gpio reserved on module remove and the driver can no longer be loaded again. Fixes: 767d83361aaa ("Input: ads7846 - Convert to use software nodes") Cc: stable@vger.kernel.org Signed-off-by: H. Nikolaus Schaller Link: https://lore.kernel.org/r/6e9b143f19cdfda835711a8a7a3966e5a2494cff.1738410204.git.hns@goldelico.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 066dc04003fa8..67264c5b49cb4 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1021,7 +1021,7 @@ static int ads7846_setup_pendown(struct spi_device *spi, if (pdata->get_pendown_state) { ts->get_pendown_state = pdata->get_pendown_state; } else { - ts->gpio_pendown = gpiod_get(&spi->dev, "pendown", GPIOD_IN); + ts->gpio_pendown = devm_gpiod_get(&spi->dev, "pendown", GPIOD_IN); if (IS_ERR(ts->gpio_pendown)) { dev_err(&spi->dev, "failed to request pendown GPIO\n"); return PTR_ERR(ts->gpio_pendown); -- GitLab From 6f36f103cff1737094f2187b1f9a7b312820d377 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Feb 2025 15:10:27 +0200 Subject: [PATCH 0151/1585] pinctrl: cy8c95x0: Fix off-by-one in the regmap range settings The range_max is inclusive, so we need to use the number of the last accessible register address. Fixes: 8670de9fae49 ("pinctrl: cy8c95x0: Use regmap ranges") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/20250203131506.3318201-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-cy8c95x0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index 0d6c2027d4c18..5c6bcbf6c3377 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -1438,15 +1438,15 @@ static int cy8c95x0_probe(struct i2c_client *client) switch (chip->tpin) { case 20: strscpy(chip->name, cy8c95x0_id[0].name); - regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 3 * MUXED_STRIDE; + regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 3 * MUXED_STRIDE - 1; break; case 40: strscpy(chip->name, cy8c95x0_id[1].name); - regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 6 * MUXED_STRIDE; + regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 6 * MUXED_STRIDE - 1; break; case 60: strscpy(chip->name, cy8c95x0_id[2].name); - regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 8 * MUXED_STRIDE; + regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 8 * MUXED_STRIDE - 1; break; default: return -ENODEV; -- GitLab From 3fbe3fe28764455e4fc3578afb9765f46f9ce93d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Feb 2025 15:10:28 +0200 Subject: [PATCH 0152/1585] pinctrl: cy8c95x0: Avoid accessing reserved registers The checks for vrtual registers in the cy8c95x0_readable_register() and cy8c95x0_writeable_register() are not aligned and broken. Fix that by explicitly avoiding reserved registers to be accessed. Fixes: 71e4001a0455 ("pinctrl: pinctrl-cy8c95x0: Fix regcache") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/20250203131506.3318201-3-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-cy8c95x0.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index 5c6bcbf6c3377..c787a9aadfdfb 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -328,14 +328,14 @@ static int cypress_get_pin_mask(struct cy8c95x0_pinctrl *chip, unsigned int pin) static bool cy8c95x0_readable_register(struct device *dev, unsigned int reg) { /* - * Only 12 registers are present per port (see Table 6 in the - * datasheet). + * Only 12 registers are present per port (see Table 6 in the datasheet). */ - if (reg >= CY8C95X0_VIRTUAL && (reg % MUXED_STRIDE) < 12) - return true; + if (reg >= CY8C95X0_VIRTUAL && (reg % MUXED_STRIDE) >= 12) + return false; switch (reg) { case 0x24 ... 0x27: + case 0x31 ... 0x3f: return false; default: return true; @@ -344,8 +344,11 @@ static bool cy8c95x0_readable_register(struct device *dev, unsigned int reg) static bool cy8c95x0_writeable_register(struct device *dev, unsigned int reg) { - if (reg >= CY8C95X0_VIRTUAL) - return true; + /* + * Only 12 registers are present per port (see Table 6 in the datasheet). + */ + if (reg >= CY8C95X0_VIRTUAL && (reg % MUXED_STRIDE) >= 12) + return false; switch (reg) { case CY8C95X0_INPUT_(0) ... CY8C95X0_INPUT_(7): @@ -353,6 +356,7 @@ static bool cy8c95x0_writeable_register(struct device *dev, unsigned int reg) case CY8C95X0_DEVID: return false; case 0x24 ... 0x27: + case 0x31 ... 0x3f: return false; default: return true; -- GitLab From aac4470fa6e695e4d6ac94cc77d4690b57f1d2bc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Feb 2025 15:10:29 +0200 Subject: [PATCH 0153/1585] pinctrl: cy8c95x0: Enable regmap locking for debug When regmap locking is disabled, debugfs is also disabled. Enable locking for debug when CONFIG_DEBUG_PINCTRL is set. Fixes: f71aba339a66 ("pinctrl: cy8c95x0: Use single I2C lock") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/20250203131506.3318201-4-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-cy8c95x0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index c787a9aadfdfb..bfa16f70e29ce 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -470,7 +470,11 @@ static const struct regmap_config cy8c9520_i2c_regmap = { .max_register = 0, /* Updated at runtime */ .num_reg_defaults_raw = 0, /* Updated at runtime */ .use_single_read = true, /* Workaround for regcache bug */ +#if IS_ENABLED(CONFIG_DEBUG_PINCTRL) + .disable_locking = false, +#else .disable_locking = true, +#endif }; static inline int cy8c95x0_regmap_update_bits_base(struct cy8c95x0_pinctrl *chip, -- GitLab From 0a7404fc5399e1100b14e7e2a4af2e4fd5e3b602 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Feb 2025 15:10:30 +0200 Subject: [PATCH 0154/1585] pinctrl: cy8c95x0: Rename PWMSEL to SELPWM There are two registers in the hardware, one, "Select PWM", is per-port configuration enabling PWM function instead of GPIO. The other one is "PWM Select" is per-PWM selector to configure PWM itself. Original code uses abbreviation of the latter to describe the former. Rename it to follow the datasheet. Fixes: e6cbbe42944d ("pinctrl: Add Cypress cy8c95x0 support") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/20250203131506.3318201-5-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-cy8c95x0.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index bfa16f70e29ce..75100a9fb8e4c 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -42,7 +42,7 @@ #define CY8C95X0_PORTSEL 0x18 /* Port settings, write PORTSEL first */ #define CY8C95X0_INTMASK 0x19 -#define CY8C95X0_PWMSEL 0x1A +#define CY8C95X0_SELPWM 0x1A #define CY8C95X0_INVERT 0x1B #define CY8C95X0_DIRECTION 0x1C /* Drive mode register change state on writing '1' */ @@ -369,8 +369,8 @@ static bool cy8c95x0_volatile_register(struct device *dev, unsigned int reg) case CY8C95X0_INPUT_(0) ... CY8C95X0_INPUT_(7): case CY8C95X0_INTSTATUS_(0) ... CY8C95X0_INTSTATUS_(7): case CY8C95X0_INTMASK: + case CY8C95X0_SELPWM: case CY8C95X0_INVERT: - case CY8C95X0_PWMSEL: case CY8C95X0_DIRECTION: case CY8C95X0_DRV_PU: case CY8C95X0_DRV_PD: @@ -399,7 +399,7 @@ static bool cy8c95x0_muxed_register(unsigned int reg) { switch (reg) { case CY8C95X0_INTMASK: - case CY8C95X0_PWMSEL: + case CY8C95X0_SELPWM: case CY8C95X0_INVERT: case CY8C95X0_DIRECTION: case CY8C95X0_DRV_PU: @@ -797,7 +797,7 @@ static int cy8c95x0_gpio_get_pincfg(struct cy8c95x0_pinctrl *chip, reg = CY8C95X0_DIRECTION; break; case PIN_CONFIG_MODE_PWM: - reg = CY8C95X0_PWMSEL; + reg = CY8C95X0_SELPWM; break; case PIN_CONFIG_OUTPUT: reg = CY8C95X0_OUTPUT; @@ -876,7 +876,7 @@ static int cy8c95x0_gpio_set_pincfg(struct cy8c95x0_pinctrl *chip, reg = CY8C95X0_DRV_PP_FAST; break; case PIN_CONFIG_MODE_PWM: - reg = CY8C95X0_PWMSEL; + reg = CY8C95X0_SELPWM; break; case PIN_CONFIG_OUTPUT_ENABLE: return cy8c95x0_pinmux_direction(chip, off, !arg); @@ -1161,7 +1161,7 @@ static void cy8c95x0_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file * bitmap_zero(mask, MAX_LINE); __set_bit(pin, mask); - if (cy8c95x0_read_regs_mask(chip, CY8C95X0_PWMSEL, pwm, mask)) { + if (cy8c95x0_read_regs_mask(chip, CY8C95X0_SELPWM, pwm, mask)) { seq_puts(s, "not available"); return; } @@ -1206,7 +1206,7 @@ static int cy8c95x0_set_mode(struct cy8c95x0_pinctrl *chip, unsigned int off, bo u8 port = cypress_get_port(chip, off); u8 bit = cypress_get_pin_mask(chip, off); - return cy8c95x0_regmap_write_bits(chip, CY8C95X0_PWMSEL, port, bit, mode ? bit : 0); + return cy8c95x0_regmap_write_bits(chip, CY8C95X0_SELPWM, port, bit, mode ? bit : 0); } static int cy8c95x0_pinmux_mode(struct cy8c95x0_pinctrl *chip, -- GitLab From 448060463198924c0a485e7e1622fa8a9c03cf3e Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Tue, 17 Dec 2024 14:07:23 +0530 Subject: [PATCH 0155/1585] drm/i915/hdcp: Fix Repeater authentication during topology change When topology changes, before beginning a new HDCP authentication by sending AKE_init message we need to first authenticate only the repeater. Only after repeater authentication failure, it makes sense to start a new HDCP authentication. Even though it made sense to not enable HDCP directly from check_link and schedule it for later, repeater authentication needs to be done immediately. --v2 -Fix comment grammatical errors [Ankit] Fixes: 47ef55a8b784 ("drm/i915/hdcp: Don't enable HDCP2.2 directly from check_link") Signed-off-by: Suraj Kandpal Reviewed-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20241217083723.2883317-1-suraj.kandpal@intel.com (cherry picked from commit 605a33e765890e4f1345315afc25268d4ae0fb7c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_hdcp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 7464b44c8bb36..c60b22aaa819e 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -2188,6 +2188,19 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) drm_dbg_kms(display->drm, "HDCP2.2 Downstream topology change\n"); + + ret = hdcp2_authenticate_repeater_topology(connector); + if (!ret) { + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, + true); + goto out; + } + + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Repeater topology auth failed.(%d)\n", + connector->base.base.id, connector->base.name, + ret); } else { drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] HDCP2.2 link failed, retrying auth\n", -- GitLab From 8dd5a5eb6a209e3bdb4e536e36698400445c6c2e Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 17 Jan 2025 09:42:48 +0530 Subject: [PATCH 0156/1585] drm/i915/hdcp: Use correct function to check if encoder is HDMI Use intel_encoder_is_hdmi function which was recently introduced to see if encoder is HDMI or not. --v2 -Add Fixes tag [Jani] Fixes: 6a3691ca4799 ("drm/i915/hdcp: Disable HDCP Line Rekeying for HDCP2.2 on HDMI") Signed-off-by: Suraj Kandpal Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20250117041247.1084381-1-suraj.kandpal@intel.com (cherry picked from commit 2499212e21601740ed7d5563563f39cf7e7d833a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_hdcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index c60b22aaa819e..1bab7c34a7942 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -41,7 +41,7 @@ intel_hdcp_adjust_hdcp_line_rekeying(struct intel_encoder *encoder, u32 rekey_bit = 0; /* Here we assume HDMI is in TMDS mode of operation */ - if (encoder->type != INTEL_OUTPUT_HDMI) + if (!intel_encoder_is_hdmi(encoder)) return; if (DISPLAY_VER(display) >= 30) { -- GitLab From cb5fab2afd906307876d79537ef0329033c40dd3 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Jan 2025 11:38:39 -0800 Subject: [PATCH 0157/1585] drm/i915/pmu: Fix zero delta busyness issue When running igt@gem_exec_balancer@individual for multiple iterations, it is seen that the delta busyness returned by PMU is 0. The issue stems from a combination of 2 implementation specific details: 1) gt_park is throttling __update_guc_busyness_stats() so that it does not hog PCI bandwidth for some use cases. (Ref: 59bcdb564b3ba) 2) busyness implementation always returns monotonically increasing counters. (Ref: cf907f6d29421) If an application queried an engine while it was active, engine->stats.guc.running is set to true. Following that, if all PM wakeref's are released, then gt is parked. At this time the throttling of __update_guc_busyness_stats() may result in a missed update to the running state of the engine (due to (1) above). This means subsequent calls to guc_engine_busyness() will think that the engine is still running and they will keep updating the cached counter (stats->total). This results in an inflated cached counter. Later when the application runs a workload and queries for busyness, we return the cached value since it is larger than the actual value (due to (2) above) All subsequent queries will return the same large (inflated) value, so the application sees a delta busyness of zero. Fix the issue by resetting the running state of engines each time intel_guc_busyness_park() is called. v2: (Rodrigo) - Use the correct tag in commit message - Drop the redundant wakeref check in guc_engine_busyness() and update commit message Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13366 Fixes: cf907f6d2942 ("i915/guc: Ensure busyness counter increases motonically") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20250123193839.2394694-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit 431b742e2bfc9f6dd713f261629741980996d001) Signed-off-by: Rodrigo Vivi --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 12f1ba7ca9c19..bd4b3d2470e40 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1469,6 +1469,19 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) spin_unlock_irqrestore(&guc->timestamp.lock, flags); } +static void __update_guc_busyness_running_state(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) + engine->stats.guc.running = false; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + static void __update_guc_busyness_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1619,6 +1632,9 @@ void intel_guc_busyness_park(struct intel_gt *gt) if (!guc_submission_initialized(guc)) return; + /* Assume no engines are running and set running state to false */ + __update_guc_busyness_running_state(guc); + /* * There is a race with suspend flow where the worker runs after suspend * and causes an unclaimed register access warning. Cancel the worker -- GitLab From fa6182c8b13ebfdc70ebdc09161a70dd8131f3b1 Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Mon, 27 Jan 2025 15:43:32 -0500 Subject: [PATCH 0158/1585] drm/i915: Fix page cleanup on DMA remap failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When converting to folios the cleanup path of shmem_get_pages() was missed. When a DMA remap fails and the max segment size is greater than PAGE_SIZE it will attempt to retry the remap with a PAGE_SIZEd segment size. The cleanup code isn't properly using the folio apis and as a result isn't handling compound pages correctly. v2 -> v3: (Ville) Just use shmem_sg_free_table() as-is in the failure path of shmem_get_pages(). shmem_sg_free_table() will clear mapping unevictable but it will be reset when it retries in shmem_sg_alloc_table(). v1 -> v2: (Ville) Fixed locations where we were not clearing mapping unevictable. Cc: stable@vger.kernel.org Cc: Ville Syrjala Cc: Vidya Srinivas Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13487 Link: https://lore.kernel.org/lkml/20250116135636.410164-1-bgeffon@google.com/ Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch") Signed-off-by: Brian Geffon Suggested-by: Tomasz Figa Link: https://patchwork.freedesktop.org/patch/msgid/20250127204332.336665-1-bgeffon@google.com Reviewed-by: Jonathan Cavitt Tested-by: Vidya Srinivas Signed-off-by: Ville Syrjälä (cherry picked from commit 9e304a18630875352636ad52a3d2af47c3bde824) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index fe69f2c8527d7..ae3343c81a645 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -209,8 +209,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct address_space *mapping = obj->base.filp->f_mapping; unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; - struct sgt_iter sgt_iter; - struct page *page; int ret; /* @@ -239,9 +237,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) * for PAGE_SIZE chunks instead may be helpful. */ if (max_segment > PAGE_SIZE) { - for_each_sgt_page(page, sgt_iter, st) - put_page(page); - sg_free_table(st); + shmem_sg_free_table(st, mapping, false, false); kfree(st); max_segment = PAGE_SIZE; -- GitLab From c7b49506b3ba7a62335e6f666a43f67d5cd9fd1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 18 Dec 2024 19:36:47 +0200 Subject: [PATCH 0159/1585] drm/i915: Drop 64bpp YUV formats from ICL+ SDR planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm seeing underruns with these 64bpp YUV formats on TGL. The weird details: - only happens on pipe B/C/D SDR planes, pipe A SDR planes seem fine, as do all HDR planes - somehow CDCLK related, higher CDCLK allows for bigger plane with these formats without underruns. With 300MHz CDCLK I can only go up to 1200 pixels wide or so, with 650MHz even a 3840 pixel wide plane was OK - ICL and ADL so far appear unaffected So not really sure what's the deal with this, but bspec does state "64-bit formats supported only on the HDR planes" so let's just drop these formats from the SDR planes. We already disallow 64bpp RGB formats. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20241218173650.19782-2-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila (cherry picked from commit 35e1aacfe536d6e8d8d440cd7155366da2541ad4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ff9764cac1e71..80e558042d97d 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -106,8 +106,6 @@ static const u32 icl_sdr_y_plane_formats[] = { DRM_FORMAT_Y216, DRM_FORMAT_XYUV8888, DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, }; static const u32 icl_sdr_uv_plane_formats[] = { @@ -134,8 +132,6 @@ static const u32 icl_sdr_uv_plane_formats[] = { DRM_FORMAT_Y216, DRM_FORMAT_XYUV8888, DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, }; static const u32 icl_hdr_plane_formats[] = { -- GitLab From 57965269896313e1629a518d3971ad55f599b792 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 14 Jan 2025 16:13:34 -0800 Subject: [PATCH 0160/1585] drm/i915/guc: Debug print LRC state entries only if the context is pinned After the context is unpinned the backing memory can also be unpinned, so any accesses via the lrc_reg_state pointer can end up in unmapped memory. To avoid that, make sure to only access that memory if the context is pinned when printing its info. v2: fix newline alignment Fixes: 28ff6520a34d ("drm/i915/guc: Update GuC debugfs to support new GuC") Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Matthew Brost Cc: # v5.15+ Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20250115001334.3875347-1-daniele.ceraolospurio@intel.com (cherry picked from commit 5bea40687c5cf2a33bf04e9110eb2e2b80222ef5) Signed-off-by: Rodrigo Vivi --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index bd4b3d2470e40..cc05bd9e43b49 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -5535,12 +5535,20 @@ static inline void guc_log_context(struct drm_printer *p, { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); + if (intel_context_pin_if_active(ce)) { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + intel_context_unpin(ce); + } else { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n", + ce->ring->head); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n", + ce->ring->tail); + } drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", -- GitLab From 4466302262b38f5e6c65325035b4036a42efc934 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 30 Jan 2025 10:46:06 +0530 Subject: [PATCH 0161/1585] drm/i915/dp: fix the Adaptive sync Operation mode for SDP Currently we support Adaptive sync operation mode with dynamic frame rate, but instead the operation mode with fixed rate is set. This was initially set correctly in the earlier version of changes but later got changed, while defining a macro for the same. Fixes: a5bd5991cb8a ("drm/i915/display: Compute AS SDP parameters") Cc: Mitul Golani Cc: Ankit Nautiyal Cc: Jani Nikula Reviewed-by: Mitul Golani Signed-off-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20250130051609.1796524-4-mitulkumar.ajitkumar.golani@intel.com (cherry picked from commit c5806862543ff6c2ad242409fcdf0667eac26dae) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f1f3b1bb1e89b..bfc16fd25d22f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2829,7 +2829,6 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC); - /* Currently only DP_AS_SDP_AVT_FIXED_VTOTAL mode supported */ as_sdp->sdp_type = DP_SDP_ADAPTIVE_SYNC; as_sdp->length = 0x9; as_sdp->duration_incr_ms = 0; @@ -2840,7 +2839,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, as_sdp->target_rr = drm_mode_vrefresh(adjusted_mode); as_sdp->target_rr_divider = true; } else { - as_sdp->mode = DP_AS_SDP_AVT_FIXED_VTOTAL; + as_sdp->mode = DP_AS_SDP_AVT_DYNAMIC_VTOTAL; as_sdp->vtotal = adjusted_mode->vtotal; as_sdp->target_rr = 0; } -- GitLab From 985a44b02484a47f2c6ecbe971a5f0c47830120b Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Fri, 31 Jan 2025 09:43:42 +0530 Subject: [PATCH 0162/1585] drm/i915/dp: Return min bpc supported by source instead of 0 Currently, intel_dp_dsc_max_src_input_bpc can return 0 for platforms not supporting DSC, which could theoretically cause issues in clamp() due to a low limit being greater than the high limit. Instead, return the minimum bpc supported by the source to prevent such issues. Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/all/CA+G9fYtNfM399_=_ff81zeRJv=0+z7oFJfPGmJgTp6yrJmU+1w@mail.gmail.com/ Fixes: 160672b86b0d ("drm/i915/dp: Use clamp for pipe_bpp limits with DSC") Cc: Suraj Kandpal Cc: Jani Nikula Cc: Rodrigo Vivi Signed-off-by: Ankit Nautiyal Reviewed-by: Suraj Kandpal Tested-by: Chaitanya Kumar Borah Link: https://patchwork.freedesktop.org/patch/msgid/20250131041342.3086716-1-ankit.k.nautiyal@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit a67221b5eb8d59fb7e1f0df3ef9945b6a0f32cca) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index bfc16fd25d22f..be07034bfcc69 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1791,7 +1791,7 @@ int intel_dp_dsc_max_src_input_bpc(struct intel_display *display) if (DISPLAY_VER(display) == 11) return 10; - return 0; + return intel_dp_dsc_min_src_input_bpc(); } int intel_dp_dsc_compute_max_bpp(const struct intel_connector *connector, -- GitLab From 3cf3ec911d70ee7774978f639fd3364c98d42b2c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 21 Jan 2025 06:52:03 -0800 Subject: [PATCH 0163/1585] drm/i915/backlight: Return immediately when scale() finds invalid parameters The scale() functions detects invalid parameters, but continues its calculations anyway. This causes bad results if negative values are used for unsigned operations. Worst case, a division by 0 error will be seen if source_min == source_max. On top of that, after v6.13, the sequence of WARN_ON() followed by clamp() may result in a build error with gcc 13.x. drivers/gpu/drm/i915/display/intel_backlight.c: In function 'scale': include/linux/compiler_types.h:542:45: error: call to '__compiletime_assert_415' declared with attribute error: clamp() low limit source_min greater than high limit source_max This happens if the compiler decides to rearrange the code as follows. if (source_min > source_max) { WARN(..); /* Do the clamp() knowing that source_min > source_max */ source_val = clamp(source_val, source_min, source_max); } else { /* Do the clamp knowing that source_min <= source_max */ source_val = clamp(source_val, source_min, source_max); } Fix the problem by evaluating the return values from WARN_ON and returning immediately after a warning. While at it, fix divide by zero error seen if source_min == source_max. Analyzed-by: Linus Torvalds Suggested-by: Linus Torvalds Suggested-by: David Laight Cc: David Laight Cc: Jani Nikula Cc: Andy Shevchenko Signed-off-by: Guenter Roeck Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20250121145203.2851237-1-linux@roeck-us.net Signed-off-by: Rodrigo Vivi (cherry picked from commit 6f71507415841d1a6d38118e5fa0eaf0caab9c17) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_backlight.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index fc1e517e074a3..7e6ce905bdafa 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -41,8 +41,9 @@ static u32 scale(u32 source_val, { u64 target_val; - WARN_ON(source_min > source_max); - WARN_ON(target_min > target_max); + if (WARN_ON(source_min >= source_max) || + WARN_ON(target_min > target_max)) + return target_min; /* defensive */ source_val = clamp(source_val, source_min, source_max); -- GitLab From 6bb05a33337b2c842373857b63de5c9bf1ae2a09 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 31 Jan 2025 12:33:23 -0500 Subject: [PATCH 0164/1585] clocksource: Use migrate_disable() to avoid calling get_random_u32() in atomic context The following bug report happened with a PREEMPT_RT kernel: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2012, name: kwatchdog preempt_count: 1, expected: 0 RCU nest depth: 0, expected: 0 get_random_u32+0x4f/0x110 clocksource_verify_choose_cpus+0xab/0x1a0 clocksource_verify_percpu.part.0+0x6b/0x330 clocksource_watchdog_kthread+0x193/0x1a0 It is due to the fact that clocksource_verify_choose_cpus() is invoked with preemption disabled. This function invokes get_random_u32() to obtain random numbers for choosing CPUs. The batched_entropy_32 local lock and/or the base_crng.lock spinlock in driver/char/random.c will be acquired during the call. In PREEMPT_RT kernel, they are both sleeping locks and so cannot be acquired in atomic context. Fix this problem by using migrate_disable() to allow smp_processor_id() to be reliably used without introducing atomic context. preempt_disable() is then called after clocksource_verify_choose_cpus() but before the clocksource measurement is being run to avoid introducing unexpected latency. Fixes: 7560c02bdffb ("clocksource: Check per-CPU clock synchronization when marked unstable") Suggested-by: Sebastian Andrzej Siewior Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/all/20250131173323.891943-2-longman@redhat.com --- kernel/time/clocksource.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 77d9566d3aa68..2a7802ec480cc 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -373,10 +373,10 @@ void clocksource_verify_percpu(struct clocksource *cs) cpumask_clear(&cpus_ahead); cpumask_clear(&cpus_behind); cpus_read_lock(); - preempt_disable(); + migrate_disable(); clocksource_verify_choose_cpus(); if (cpumask_empty(&cpus_chosen)) { - preempt_enable(); + migrate_enable(); cpus_read_unlock(); pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name); return; @@ -384,6 +384,7 @@ void clocksource_verify_percpu(struct clocksource *cs) testcpu = smp_processor_id(); pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + preempt_disable(); for_each_cpu(cpu, &cpus_chosen) { if (cpu == testcpu) continue; @@ -403,6 +404,7 @@ void clocksource_verify_percpu(struct clocksource *cs) cs_nsec_min = cs_nsec; } preempt_enable(); + migrate_enable(); cpus_read_unlock(); if (!cpumask_empty(&cpus_ahead)) pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n", -- GitLab From 4fd2707e3e71bfd5d4df4f4c9656a009f09dfc7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Mon, 3 Feb 2025 16:12:49 +0100 Subject: [PATCH 0165/1585] spi: atmel-quadspi: Fix warning in doc-comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The doc-comment for `struct atmel_qspi_pcal` had a typo in one of the struct members' name, causing a warning with the `W=1` option. Fixes: 5af42209a4d2 ("spi: atmel-quadspi: Add support for sama7g5 QSPI") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501311707.Ltj0qXse-lkp@intel.com/ Signed-off-by: Bence Csókás Link: https://patch.msgid.link/20250203151249.79876-2-csokas.bence@prolan.hu Signed-off-by: Mark Brown --- drivers/spi/atmel-quadspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index abdc49d9d9400..d8c9be64d006a 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -235,8 +235,8 @@ /** * struct atmel_qspi_pcal - Pad Calibration Clock Division * @pclk_rate: peripheral clock rate. - * @pclkdiv: calibration clock division. The clock applied to the calibration - * cell is divided by pclkdiv + 1. + * @pclk_div: calibration clock division. The clock applied to the calibration + * cell is divided by pclk_div + 1. */ struct atmel_qspi_pcal { u32 pclk_rate; -- GitLab From 36e093c8dcc585d0a9e79a005f721f01f3365eba Mon Sep 17 00:00:00 2001 From: Nilton Perim Neto Date: Mon, 3 Feb 2025 07:13:09 -0800 Subject: [PATCH 0166/1585] Input: xpad - add 8BitDo SN30 Pro, Hyperkin X91 and Gamesir G7 SE controllers Add 8BitDo SN30 Pro, Hyperkin X91 and Gamesir G7 SE to the list of recognized controllers, and update vendor comments to match. Signed-off-by: Nilton Perim Neto Link: https://lore.kernel.org/r/20250122214814.102311-2-niltonperimneto@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 8fe2a51df649e..652afb37bf770 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -376,8 +376,10 @@ static const struct xpad_device { { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, + { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, @@ -389,6 +391,7 @@ static const struct xpad_device { { 0x3285, 0x0646, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, { 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, + { 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } @@ -528,12 +531,12 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2563), /* OneXPlayer Gamepad */ XPAD_XBOX360_VENDOR(0x260d), /* Dareu H101 */ - XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */ + XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */ XPAD_XBOX360_VENDOR(0x2c22), /* Qanba Controllers */ - XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller */ - XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller for Xbox */ - XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke Xbox One pad */ - XPAD_XBOX360_VENDOR(0x2f24), /* GameSir controllers */ + XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Controllers */ + XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Controllers */ + XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Controllers */ + XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */ XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */ XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ XPAD_XBOXONE_VENDOR(0x3285), /* Nacon Evol-X */ -- GitLab From 3492321e2e60ddfe91aa438bb9ac209016f48f7a Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Mon, 3 Feb 2025 07:22:27 -0800 Subject: [PATCH 0167/1585] Input: xpad - add multiple supported devices This is based on multiple commits at https://github.com/paroj/xpad that had bouncing email addresses and were not signed off. Signed-off-by: Pavel Rojtberg Link: https://lore.kernel.org/r/20250123175404.23254-1-rojtberg@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 652afb37bf770..16493235bf9ee 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -140,6 +140,7 @@ static const struct xpad_device { { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, + { 0x044f, 0xd01e, "ThrustMaster, Inc. ESWAP X 2 ELDEN RING EDITION", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f10, "Thrustmaster Modena GT Wheel", 0, XTYPE_XBOX }, { 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 }, { 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX }, @@ -177,6 +178,7 @@ static const struct xpad_device { { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0201, "Saitek Adrenalin", 0, XTYPE_XBOX }, { 0x06a3, 0xf51a, "Saitek P3600", 0, XTYPE_XBOX360 }, + { 0x0738, 0x4503, "Mad Catz Racing Wheel", 0, XTYPE_XBOXONE }, { 0x0738, 0x4506, "Mad Catz 4506 Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4516, "Mad Catz Control Pad", 0, XTYPE_XBOX }, { 0x0738, 0x4520, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX }, @@ -238,6 +240,7 @@ static const struct xpad_device { { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, + { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", XTYPE_XBOXONE }, { 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, @@ -276,12 +279,15 @@ static const struct xpad_device { { 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x0f0d, 0x0151, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, + { 0x0f0d, 0x0152, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX }, { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, + { 0x10f5, 0x7005, "Turtle Beach Recon Controller", 0, XTYPE_XBOXONE }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 }, { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, @@ -366,6 +372,7 @@ static const struct xpad_device { { 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x551a, "PowerA FUSION Pro Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x561a, "PowerA FUSION Controller", 0, XTYPE_XBOXONE }, + { 0x24c6, 0x581a, "ThrustMaster XB1 Classic Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5b00, "ThrustMaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, @@ -376,10 +383,12 @@ static const struct xpad_device { { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, - { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, + { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, @@ -387,12 +396,16 @@ static const struct xpad_device { { 0x31e3, 0x1230, "Wooting Two HE (ARM)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 }, + { 0x3285, 0x0603, "Nacon Pro Compact controller for Xbox", 0, XTYPE_XBOXONE }, { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, + { 0x3285, 0x0614, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, { 0x3285, 0x0646, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, + { 0x3285, 0x0662, "Nacon Revolution5 Pro", 0, XTYPE_XBOX360 }, { 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, { 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, + { 0x413d, 0x2104, "Black Shark Green Ghost Gamepad", 0, XTYPE_XBOX360 }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } }; @@ -491,6 +504,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x03f0), /* HP HyperX Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */ XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */ + XPAD_XBOXONE_VENDOR(0x044f), /* Thrustmaster Xbox One controllers */ XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */ @@ -537,11 +551,13 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Controllers */ XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Controllers */ XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */ + XPAD_XBOXONE_VENDOR(0x2e95), /* SCUF Gaming Controller */ XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */ XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ XPAD_XBOXONE_VENDOR(0x3285), /* Nacon Evol-X */ XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */ + XPAD_XBOX360_VENDOR(0x413d), /* Black Shark Green Ghost Controller */ { } }; @@ -694,7 +710,9 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x045e, 0x0b00, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, extra_input_packet_init), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_led_on), + XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_led_on), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_auth), + XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_auth), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), -- GitLab From 9e8b21410f310c50733f6e1730bae5a8e30d3570 Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Sat, 18 Jan 2025 11:31:33 +0530 Subject: [PATCH 0168/1585] usb: gadget: f_midi: Fixing wMaxPacketSize exceeded issue during MIDI bind retries The current implementation sets the wMaxPacketSize of bulk in/out endpoints to 1024 bytes at the end of the f_midi_bind function. However, in cases where there is a failure in the first midi bind attempt, consider rebinding. This scenario may encounter an f_midi_bind issue due to the previous bind setting the bulk endpoint's wMaxPacketSize to 1024 bytes, which exceeds the ep->maxpacket_limit where configured dwc3 TX/RX FIFO's maxpacket size of 512 bytes for IN/OUT endpoints in support HS speed only. Here the term "rebind" in this context refers to attempting to bind the MIDI function a second time in certain scenarios. The situations where rebinding is considered include: * When there is a failure in the first UDC write attempt, which may be caused by other functions bind along with MIDI. * Runtime composition change : Example : MIDI,ADB to MIDI. Or MIDI to MIDI,ADB. This commit addresses this issue by resetting the wMaxPacketSize before endpoint claim. And here there is no need to reset all values in the usb endpoint descriptor structure, as all members except wMaxPacketSize and bEndpointAddress have predefined values. This ensures that restores the endpoint to its expected configuration, and preventing conflicts with value of ep->maxpacket_limit. It also aligns with the approach used in other function drivers, which treat endpoint descriptors as if they were full speed before endpoint claim. Fixes: 46decc82ffd5 ("usb: gadget: unconditionally allocate hs/ss descriptor in bind operation") Cc: stable@vger.kernel.org Signed-off-by: Selvarasu Ganesan Link: https://lore.kernel.org/r/20250118060134.927-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_midi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 837fcdfa3840f..9b991cf5b0f8b 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -907,6 +907,15 @@ static int f_midi_bind(struct usb_configuration *c, struct usb_function *f) status = -ENODEV; + /* + * Reset wMaxPacketSize with maximum packet size of FS bulk transfer before + * endpoint claim. This ensures that the wMaxPacketSize does not exceed the + * limit during bind retries where configured dwc3 TX/RX FIFO's maxpacket + * size of 512 bytes for IN/OUT endpoints in support HS speed only. + */ + bulk_in_desc.wMaxPacketSize = cpu_to_le16(64); + bulk_out_desc.wMaxPacketSize = cpu_to_le16(64); + /* allocate instance-specific endpoints */ midi->in_ep = usb_ep_autoconfig(cdev->gadget, &bulk_in_desc); if (!midi->in_ep) -- GitLab From 309005e448c1f3e4b81e4416406991b7c3339c1d Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 20 Jan 2025 15:42:51 +0100 Subject: [PATCH 0169/1585] usb: phy: generic: Use proper helper for property detection Since commit c141ecc3cecd7 ("of: Warn when of_property_read_bool() is used on non-boolean properties") a warning is raised if this function is used for property detection. of_property_present() is the correct helper for this. Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20250120144251.580981-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 6c3ececf91375..8423be59ec0ff 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -212,7 +212,7 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) if (of_property_read_u32(node, "clock-frequency", &clk_rate)) clk_rate = 0; - needs_clk = of_property_read_bool(node, "clocks"); + needs_clk = of_property_present(node, "clocks"); } nop->gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS); -- GitLab From 1ed3af5a2aaefd0ecd887ecabdc8da07220e31fe Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 21 Jan 2025 23:11:23 +0000 Subject: [PATCH 0170/1585] usb: dwc3: Document nostream_work Add missing description to the nostream_work of dwc3_ep. The work is used by bulk multi-stream endpoints for a NoStream event to reinitiate the stream if needed. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20250120182219.30dcb3c6@canb.auug.org.au/ Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/7cdeaa346d24907712aac533c1c5f90a03151189.1737500936.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index ac7c730f81acf..c955039bb4f62 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -717,6 +717,7 @@ struct dwc3_event_buffer { /** * struct dwc3_ep - device side endpoint representation * @endpoint: usb endpoint + * @nostream_work: work for handling bulk NoStream * @cancelled_list: list of cancelled requests for this endpoint * @pending_list: list of pending requests for this endpoint * @started_list: list of started requests on this endpoint -- GitLab From 335a1fc1193481f8027f176649c72868172f6f8b Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 22 Jan 2025 03:12:31 -0500 Subject: [PATCH 0171/1585] usb: gadget: udc: renesas_usb3: Fix compiler warning drivers/usb/gadget/udc/renesas_usb3.c: In function 'renesas_usb3_probe': drivers/usb/gadget/udc/renesas_usb3.c:2638:73: warning: '%d' directive output may be truncated writing between 1 and 11 bytes into a region of size 6 [-Wformat-truncation=] 2638 | snprintf(usb3_ep->ep_name, sizeof(usb3_ep->ep_name), "ep%d", i); ^~~~~~~~~~~~~~~~~~~~~~~~ ^~ ^ Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Cc: stable@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501201409.BIQPtkeB-lkp@intel.com/ Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20250122081231.47594-1-guoren@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/renesas_usb3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index fce5c41d9f298..89b304cf6d032 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -310,7 +310,7 @@ struct renesas_usb3_request { struct list_head queue; }; -#define USB3_EP_NAME_SIZE 8 +#define USB3_EP_NAME_SIZE 16 struct renesas_usb3_ep { struct usb_ep ep; struct renesas_usb3 *usb3; -- GitLab From 2240fed37afbcdb5e8b627bc7ad986891100e05d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 22 Jan 2025 14:26:17 -0500 Subject: [PATCH 0172/1585] USB: hub: Ignore non-compliant devices with too many configs or interfaces Robert Morris created a test program which can cause usb_hub_to_struct_hub() to dereference a NULL or inappropriate pointer: Oops: general protection fault, probably for non-canonical address 0xcccccccccccccccc: 0000 [#1] SMP DEBUG_PAGEALLOC PTI CPU: 7 UID: 0 PID: 117 Comm: kworker/7:1 Not tainted 6.13.0-rc3-00017-gf44d154d6e3d #14 Hardware name: FreeBSD BHYVE/BHYVE, BIOS 14.0 10/17/2021 Workqueue: usb_hub_wq hub_event RIP: 0010:usb_hub_adjust_deviceremovable+0x78/0x110 ... Call Trace: ? die_addr+0x31/0x80 ? exc_general_protection+0x1b4/0x3c0 ? asm_exc_general_protection+0x26/0x30 ? usb_hub_adjust_deviceremovable+0x78/0x110 hub_probe+0x7c7/0xab0 usb_probe_interface+0x14b/0x350 really_probe+0xd0/0x2d0 ? __pfx___device_attach_driver+0x10/0x10 __driver_probe_device+0x6e/0x110 driver_probe_device+0x1a/0x90 __device_attach_driver+0x7e/0xc0 bus_for_each_drv+0x7f/0xd0 __device_attach+0xaa/0x1a0 bus_probe_device+0x8b/0xa0 device_add+0x62e/0x810 usb_set_configuration+0x65d/0x990 usb_generic_driver_probe+0x4b/0x70 usb_probe_device+0x36/0xd0 The cause of this error is that the device has two interfaces, and the hub driver binds to interface 1 instead of interface 0, which is where usb_hub_to_struct_hub() looks. We can prevent the problem from occurring by refusing to accept hub devices that violate the USB spec by having more than one configuration or interface. Reported-and-tested-by: Robert Morris Cc: stable Closes: https://lore.kernel.org/linux-usb/95564.1737394039@localhost/ Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/c27f3bf4-63d8-4fb5-ac82-09e3cd19f61c@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index c3f839637cb5a..0cd44f1fd56d2 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1848,6 +1848,17 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) desc = intf->cur_altsetting; hdev = interface_to_usbdev(intf); + /* + * The USB 2.0 spec prohibits hubs from having more than one + * configuration or interface, and we rely on this prohibition. + * Refuse to accept a device that violates it. + */ + if (hdev->descriptor.bNumConfigurations > 1 || + hdev->actconfig->desc.bNumInterfaces > 1) { + dev_err(&intf->dev, "Invalid hub with more than one config or interface\n"); + return -EINVAL; + } + /* * Set default autosuspend delay as 0 to speedup bus suspend, * based on the below considerations: -- GitLab From 58cd423820d5b5610977e55e4acdd06628829ede Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 24 Jan 2025 18:33:25 +0100 Subject: [PATCH 0173/1585] usb: dwc2: gadget: remove of_node reference upon udc_stop In dwc2_hsotg_udc_start(), e.g. when binding composite driver, "of_node" is set to hsotg->dev->of_node. It causes errors when binding the gadget driver several times, on stm32mp157c-ev1 board. Below error is seen: "pin PA10 already requested by 49000000.usb-otg; cannot claim for gadget.0" The first time, no issue is seen as when registering the driver, of_node isn't NULL: -> gadget_dev_desc_UDC_store -> usb_gadget_register_driver_owner -> driver_register ... -> really_probe -> pinctrl_bind_pins (no effect) Then dwc2_hsotg_udc_start() sets of_node. The second time (stop the gadget, reconfigure it, then start it again), of_node has been set, so the probing code tries to acquire pins for the gadget. These pins are hold by the controller, hence the error. So clear gadget.dev.of_node in udc_stop() routine to avoid the issue. Fixes: 7d7b22928b90 ("usb: gadget: s3c-hsotg: Propagate devicetree to gadget drivers") Cc: stable Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20250124173325.2747710-1-fabrice.gasnier@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e7bf9cc635be6..bd4c788f03bc1 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -4615,6 +4615,7 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget) spin_lock_irqsave(&hsotg->lock, flags); hsotg->driver = NULL; + hsotg->gadget.dev.of_node = NULL; hsotg->gadget.speed = USB_SPEED_UNKNOWN; hsotg->enabled = 0; -- GitLab From da1668997052ed1cb00322e1f3b63702615c9429 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Thu, 30 Jan 2025 19:50:34 +0000 Subject: [PATCH 0174/1585] usb: gadget: f_midi: fix MIDI Streaming descriptor lengths While the MIDI jacks are configured correctly, and the MIDIStreaming endpoint descriptors are filled with the correct information, bNumEmbMIDIJack and bLength are set incorrectly in these descriptors. This does not matter when the numbers of in and out ports are equal, but when they differ the host will receive broken descriptors with uninitialized stack memory leaking into the descriptor for whichever value is smaller. The precise meaning of "in" and "out" in the port counts is not clearly defined and can be confusing. But elsewhere the driver consistently uses this to match the USB meaning of IN and OUT viewed from the host, so that "in" ports send data to the host and "out" ports receive data from it. Cc: stable Fixes: c8933c3f79568 ("USB: gadget: f_midi: allow a dynamic number of input and output ports") Signed-off-by: John Keeping Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20250130195035.3883857-1-jkeeping@inmusicbrands.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_midi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 9b991cf5b0f8b..47260d65066a8 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -1009,11 +1009,11 @@ static int f_midi_bind(struct usb_configuration *c, struct usb_function *f) } /* configure the endpoint descriptors ... */ - ms_out_desc.bLength = USB_DT_MS_ENDPOINT_SIZE(midi->in_ports); - ms_out_desc.bNumEmbMIDIJack = midi->in_ports; + ms_out_desc.bLength = USB_DT_MS_ENDPOINT_SIZE(midi->out_ports); + ms_out_desc.bNumEmbMIDIJack = midi->out_ports; - ms_in_desc.bLength = USB_DT_MS_ENDPOINT_SIZE(midi->out_ports); - ms_in_desc.bNumEmbMIDIJack = midi->out_ports; + ms_in_desc.bLength = USB_DT_MS_ENDPOINT_SIZE(midi->in_ports); + ms_in_desc.bNumEmbMIDIJack = midi->in_ports; /* ... and add them to the list */ endpoint_descriptor_index = i; -- GitLab From 2255b40cacc2e5ef1b127770fc1808c60de4a2fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 24 Jan 2025 09:43:45 -0500 Subject: [PATCH 0175/1585] drm/amdgpu: add a BO metadata flag to disable write compression for Vulkan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vulkan can't support DCC and Z/S compression on GFX12 without WRITE_COMPRESS_DISABLE in this commit or a completely different DCC interface. AMDGPU_TILING_GFX12_SCANOUT is added because it's already used by userspace. Cc: stable@vger.kernel.org # 6.12.x Signed-off-by: Marek Olšák Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 5 +++-- include/uapi/drm/amdgpu_drm.h | 9 ++++++++- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 817116e53d440..dce9323fb410c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -119,9 +119,10 @@ * - 3.57.0 - Compute tunneling on GFX10+ * - 3.58.0 - Add GFX12 DCC support * - 3.59.0 - Cleared VRAM + * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement) */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 59 +#define KMS_DRIVER_MINOR 60 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ff286940ab430..01ae2f88dec8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -309,7 +309,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); while (src_mm.remaining) { uint64_t from, to, cur_size, tiling_flags; - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_compress_disable; struct dma_fence *next; /* Never copy more than 256MiB at once to avoid a timeout */ @@ -340,9 +340,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE); data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT); + write_compress_disable = + AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE); copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) | AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) | - AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format)); + AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) | + AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE, + write_compress_disable)); } r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 461fb8090ae04..208b7d1d8a277 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -119,6 +119,8 @@ struct amdgpu_copy_mem { #define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1 #define AMDGPU_COPY_FLAGS_SET(field, value) \ (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 9c17df2cf37b8..7e10e94624e34 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1741,11 +1741,12 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, uint32_t byte_count, uint32_t copy_flags) { - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_cm; max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED); data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT); num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE); + write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1; ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | @@ -1762,7 +1763,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED))) ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) | ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | - ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | + ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); else ib->ptr[ib->length_dw++] = 0; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index efe5de6ce208a..aaa4f3bc688b5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -411,13 +411,20 @@ struct drm_amdgpu_gem_userptr { /* GFX12 and later: */ #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 -/* These are DCC recompression setting for memory management: */ +/* These are DCC recompression settings for memory management: */ #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ +/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata + * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */ +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 +/* bit gap */ +#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 /* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ -- GitLab From 8adbb2a98b00926315fd513b5fe2596b5716b82d Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 17 Jan 2025 12:37:11 -0700 Subject: [PATCH 0176/1585] drm/amd/display: Fix out-of-bound accesses [WHAT & HOW] hpo_stream_to_link_encoder_mapping has size MAX_HPO_DP2_ENCODERS(=4), but location can have size up to 6. As a result, it is necessary to check location against MAX_HPO_DP2_ENCODERS. Similiarly, disp_cfg_stream_location can be used as an array index which should be 0..5, so the ASSERT's conditions should be less without equal. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3904 Reviewed-by: Austin Zheng Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/dml21_translation_helper.c | 4 ++-- .../gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index b9c6b45f6872d..0c8ec30ea6726 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1017,7 +1017,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_streams++; - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx); adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]); populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); @@ -1042,7 +1042,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_planes++; - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]); populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index b416320873e11..b8a34abaf519a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -786,7 +786,7 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) + if (location < MAX_HPO_DP2_ENCODERS && dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: @@ -1343,7 +1343,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_timings++; - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2); @@ -1383,7 +1383,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_surfaces++; - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); populate_dml_plane_cfg_from_plane_state( -- GitLab From e01f07cb92513ca4b9b219ab9caa34d607bc1e2d Mon Sep 17 00:00:00 2001 From: Lo-an Chen Date: Fri, 17 Jan 2025 17:56:25 +0800 Subject: [PATCH 0177/1585] drm/amd/display: Fix seamless boot sequence [WHY] When the system powers up eDP with external monitors in seamless boot sequence, stutter get enabled before TTU and HUBP registers being programmed, which resulting in underflow. [HOW] Enable TTU in hubp_init. Change the sequence that do not perpare_bandwidth and optimize_bandwidth while having seamless boot streams. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Signed-off-by: Lo-an Chen Signed-off-by: Paul Hsieh Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c | 3 ++- drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c | 3 ++- drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c | 3 ++- drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c | 3 ++- drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c | 2 ++ drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c | 2 ++ drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 3 ++- 8 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index cecaadf741ad0..f84e795e35f58 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2133,7 +2133,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_enable_stereo(dc, context, dc_streams, context->stream_count); - if (context->stream_count > get_seamless_boot_stream_count(context) || + if (get_seamless_boot_stream_count(context) == 0 || context->stream_count == 0) { /* Must wait for no flips to be pending before doing optimize bw */ hwss_wait_for_no_pipes_pending(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c index fe741100c0f88..d347bb06577ac 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c @@ -129,7 +129,8 @@ bool hubbub3_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF); - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); return wm_pending; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c index 7fb5523f97224..b98505b240a79 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c @@ -750,7 +750,8 @@ static bool hubbub31_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); return wm_pending; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 5264dc26cce1f..32a6be543105c 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -786,7 +786,8 @@ static bool hubbub32_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 5eb3da8d5206e..dce7269959ce7 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -326,7 +326,8 @@ static bool hubbub35_program_watermarks( DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD, 0xA);/*hw delta*/ REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DCHUBBUB_ARB_MAX_QOS_COMMIT_THRESHOLD, 0xF); - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index be0ac613675a2..0da70b50e86d4 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -500,6 +500,8 @@ void hubp3_init(struct hubp *hubp) //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1; REG_WRITE(HUBPREQ_DEBUG, 1 << 26); + REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0); + hubp_reset(hubp); } diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index edd37898d5500..f3a21c623f441 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -168,6 +168,8 @@ void hubp32_init(struct hubp *hubp) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_WRITE(HUBPREQ_DEBUG_DB, 1 << 8); + + REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0); } static struct hubp_funcs dcn32_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 623cde76debfb..b907ad1acedd9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -236,7 +236,8 @@ void dcn35_init_hw(struct dc *dc) } hws->funcs.init_pipes(dc, dc->current_state); - if (dc->res_pool->hubbub->funcs->allow_self_refresh_control) + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control && + !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); } -- GitLab From 588c20079e17dae9e1f49ba42981a05de1c9136e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 16 Jan 2025 19:21:55 -0800 Subject: [PATCH 0178/1585] drm/xe/oa: Preserve oa_ctrl unused bits UMD's have interest in setting unused bits of the oa_ctrl register "out of band" for certain experiments. To facilitate this, don't clobber previous oa_ctrl unused bits, i.e. rmw the values rather than simply write them. Fixes: e936f885f1e9 ("drm/xe/oa/uapi: Expose OA stream fd") Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20250117032155.3048063-1-ashutosh.dixit@intel.com (cherry picked from commit cfa9d40db8c30d894171010fe765d96e9bc6a47e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_oa_regs.h | 6 ++++++ drivers/gpu/drm/xe/xe_oa.c | 12 ++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index a49561e9f3c31..a79ad2da070c2 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -51,6 +51,10 @@ /* Common to all OA units */ #define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) #define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) +#define OAG_OACONTROL_USED_BITS \ + (OAG_OACONTROL_OA_PES_DISAG_EN | OAG_OACONTROL_OA_CCS_SELECT_MASK | \ + OAG_OACONTROL_OA_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE | \ + OA_OACONTROL_REPORT_BC_MASK | OA_OACONTROL_COUNTER_SIZE_MASK) #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) #define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) @@ -78,6 +82,8 @@ #define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) #define OAM_CONTROL_OFFSET (0x194) #define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAM_OACONTROL_USED_BITS \ + (OAM_CONTROL_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE) #define OAM_DEBUG_OFFSET (0x198) #define OAM_STATUS_OFFSET (0x19c) #define OAM_MMIO_TRG_OFFSET (0x1d0) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index eeb96b5f49e2a..6a08e6c92835f 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -452,6 +452,12 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream) return val; } +static u32 __oactrl_used_bits(struct xe_oa_stream *stream) +{ + return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? + OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; +} + static void xe_oa_enable(struct xe_oa_stream *stream) { const struct xe_oa_format *format = stream->oa_buffer.format; @@ -472,14 +478,14 @@ static void xe_oa_enable(struct xe_oa_stream *stream) stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; - xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val); + xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); } static void xe_oa_disable(struct xe_oa_stream *stream) { struct xe_mmio *mmio = &stream->gt->mmio; - xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0); + xe_mmio_rmw32(mmio, __oa_regs(stream)->oa_ctrl, __oactrl_used_bits(stream), 0); if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl, OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) drm_err(&stream->oa->xe->drm, @@ -2534,6 +2540,8 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) u->type = DRM_XE_OA_UNIT_TYPE_OAM; } + xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0); + /* Ensure MMIO trigger remains disabled till there is a stream */ xe_mmio_write32(>->mmio, u->regs.oa_debug, oag_configure_mmio_trigger(NULL, false)); -- GitLab From 9f706fd8024208b0686bb8ec68589d758f765672 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 21 Jan 2025 00:24:43 +0100 Subject: [PATCH 0179/1585] drm/xe/pf: Fix migration initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The migration support only needs to be initialized once, but it was incorrectly called from the xe_gt_sriov_pf_init_hw(), which is part of the reset flow and may be called multiple times. Fixes: d86e3737c7ab ("drm/xe/pf: Add functions to save and restore VF GuC state") Signed-off-by: Michal Wajdeczko Cc: Michał Winiarski Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20250120232443.544-1-michal.wajdeczko@intel.com (cherry picked from commit 9ebb5846e1a3b1705f8a7cbc528888a1aa0b163e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 +++- drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 14 +++++++++++++- drivers/gpu/drm/xe/xe_gt_sriov_pf.h | 6 ++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 26e64530ada27..5d6fb79957b63 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -532,8 +532,10 @@ static int all_fw_domain_init(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - if (IS_SRIOV_PF(gt_to_xe(gt))) + if (IS_SRIOV_PF(gt_to_xe(gt))) { + xe_gt_sriov_pf_init(gt); xe_gt_sriov_pf_init_hw(gt); + } xe_force_wake_put(gt_to_fw(gt), fw_ref); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index e71fc3d2bda22..6f906c8e8108b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -68,6 +68,19 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +/** + * xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF. + * @gt: the &xe_gt to initialize + * + * Late one-time initialization of the PF data. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_init(struct xe_gt *gt) +{ + return xe_gt_sriov_pf_migration_init(gt); +} + static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) { return GRAPHICS_VERx100(xe) == 1200; @@ -90,7 +103,6 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) pf_enable_ggtt_guest_update(gt); xe_gt_sriov_pf_service_update(gt); - xe_gt_sriov_pf_migration_init(gt); } static u32 pf_get_vf_regs_stride(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index 96fab779a906f..f474509411c0c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -10,6 +10,7 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); +int xe_gt_sriov_pf_init(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); void xe_gt_sriov_pf_restart(struct xe_gt *gt); @@ -19,6 +20,11 @@ static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +static inline int xe_gt_sriov_pf_init(struct xe_gt *gt) +{ + return 0; +} + static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } -- GitLab From 990d35edc5d333ca6cd3acfdfc13683dc5bb105f Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 15 Jan 2025 14:20:29 -0800 Subject: [PATCH 0180/1585] drm/xe/oa: Set stream->pollin in xe_oa_buffer_check_unlocked We rely on stream->pollin to decide whether or not to block during poll/read calls. However, currently there are blocking read code paths which don't even set stream->pollin. The best place to consistently set stream->pollin for all code paths is therefore to set it in xe_oa_buffer_check_unlocked. Fixes: e936f885f1e9 ("drm/xe/oa/uapi: Expose OA stream fd") Signed-off-by: Ashutosh Dixit Acked-by: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20250115222029.3002103-1-ashutosh.dixit@intel.com (cherry picked from commit d3fedff828bb7e4a422c42caeafd5d974e24ee43) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 6a08e6c92835f..fa873f3d0a9d1 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -237,7 +237,6 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) u32 tail, hw_tail, partial_report_size, available; int report_size = stream->oa_buffer.format->size; unsigned long flags; - bool pollin; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); @@ -282,11 +281,11 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) stream->oa_buffer.tail = tail; available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); - pollin = available >= stream->wait_num_reports * report_size; + stream->pollin = available >= stream->wait_num_reports * report_size; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - return pollin; + return stream->pollin; } static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) @@ -294,10 +293,8 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) struct xe_oa_stream *stream = container_of(hrtimer, typeof(*stream), poll_check_timer); - if (xe_oa_buffer_check_unlocked(stream)) { - stream->pollin = true; + if (xe_oa_buffer_check_unlocked(stream)) wake_up(&stream->poll_wq); - } hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); -- GitLab From 042c48b73699c47d84b6ace73036e5a31a0d4cfc Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 22 Jan 2025 21:11:11 -0800 Subject: [PATCH 0181/1585] drm/xe/devcoredump: Move exec queue snapshot to Contexts section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having the exec queue snapshot inside a "GuC CT" section was always wrong. Commit c28fd6c358db ("drm/xe/devcoredump: Improve section headings and add tile info") tried to fix that bug, but with that also broke the mesa tool that parses the devcoredump, hence it was reverted in commit a53da2fb25a3 ("drm/xe: Revert some changes that break a mesa debug tool"). With the mesa tool also fixed, this can propagate as a fix on both kernel and userspace side to avoid unnecessary headache for a debug feature. Cc: John Harrison Cc: Julia Filipchuk Cc: José Roberto de Souza Cc: stable@vger.kernel.org Fixes: a53da2fb25a3 ("drm/xe: Revert some changes that break a mesa debug tool") Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20250123051112.1938193-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit a37934ea75d331fafa7fe80b6180642ba5193422) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_devcoredump.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 81dc7795c0651..a7946a76777e7 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -119,11 +119,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, drm_puts(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(ss->guc.ct, &p); - /* - * Don't add a new section header here because the mesa debug decoder - * tool expects the context information to be in the 'GuC CT' section. - */ - /* drm_puts(&p, "\n**** Contexts ****\n"); */ + drm_puts(&p, "\n**** Contexts ****\n"); xe_guc_exec_queue_snapshot_print(ss->ge, &p); drm_puts(&p, "\n**** Job ****\n"); -- GitLab From a9ab6591b45258b79af1cb66112fd9f83c8855da Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 23 Jan 2025 12:22:03 -0800 Subject: [PATCH 0182/1585] drm/xe: Fix and re-enable xe_print_blob_ascii85() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") partially reverted some changes to workaround breakage caused to mesa tools. However, in doing so it also broke fetching the GuC log via debugfs since xe_print_blob_ascii85() simply bails out. The fix is to avoid the extra newlines: the devcoredump interface is line-oriented and adding random newlines in the middle breaks it. If a tool is able to parse it by looking at the data and checking for chars that are out of the ascii85 space, it can still do so. A format change that breaks the line-oriented output on devcoredump however needs better coordination with existing tools. v2: Add suffix description comment v3: Reword explanation of xe_print_blob_ascii85() calling drm_puts() in a loop Reviewed-by: José Roberto de Souza Cc: John Harrison Cc: Julia Filipchuk Cc: José Roberto de Souza Cc: stable@vger.kernel.org Fixes: 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") Fixes: ec1455ce7e35 ("drm/xe/devcoredump: Add ASCII85 dump helper function") Link: https://patchwork.freedesktop.org/patch/msgid/20250123202307.95103-2-jose.souza@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 2c95bbf5002776117a69caed3b31c10bf7341bec) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_devcoredump.c | 34 +++++++++++------------------ drivers/gpu/drm/xe/xe_devcoredump.h | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 3 ++- drivers/gpu/drm/xe/xe_guc_log.c | 4 +++- 4 files changed, 19 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index a7946a76777e7..39fe485d20858 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -391,42 +391,34 @@ int xe_devcoredump_init(struct xe_device *xe) /** * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85 * - * The output is split to multiple lines because some print targets, e.g. dmesg - * cannot handle arbitrarily long lines. Note also that printing to dmesg in - * piece-meal fashion is not possible, each separate call to drm_puts() has a - * line-feed automatically added! Therefore, the entire output line must be - * constructed in a local buffer first, then printed in one atomic output call. + * The output is split into multiple calls to drm_puts() because some print + * targets, e.g. dmesg, cannot handle arbitrarily long lines. These targets may + * add newlines, as is the case with dmesg: each drm_puts() call creates a + * separate line. * * There is also a scheduler yield call to prevent the 'task has been stuck for * 120s' kernel hang check feature from firing when printing to a slow target * such as dmesg over a serial port. * - * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down. - * * @p: the printer object to output to * @prefix: optional prefix to add to output string + * @suffix: optional suffix to add at the end. 0 disables it and is + * not added to the output, which is useful when using multiple calls + * to dump data to @p * @blob: the Binary Large OBject to dump out * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32) * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32) */ -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0; - /* - * Splitting blobs across multiple lines is not compatible with the mesa - * debug decoder tool. Note that even dropping the explicit '\n' below - * doesn't help because the GuC log is so big some underlying implementation - * still splits the lines at 512K characters. So just bail completely for - * the moment. - */ - return; - #define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ + /* Always leave space for the suffix char and the \0 */ +#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -458,7 +450,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; drm_puts(p, line_buff); @@ -470,10 +461,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } } + if (suffix) + line_buff[line_pos++] = suffix; + if (line_pos) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; - drm_puts(p, line_buff); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6a17e6d601022..5391a80a4d1ba 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8b65c5e959cc2..50c8076b51585 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1724,7 +1724,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, snapshot->g2h_outstanding); if (snapshot->ctb) - xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size); + xe_print_blob_ascii85(p, "CTB data", '\n', + snapshot->ctb, 0, snapshot->ctb_size); } else { drm_puts(p, "CT disabled\n"); } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index df4cfb698cdbc..2baa4d95571fb 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -211,8 +211,10 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain); + const char *prefix = i ? NULL : "Log data"; + char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0; - xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size); + xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size); remain -= size; } } -- GitLab From d275a5e0c5f528b4b877ec683b8cd8bfced96af5 Mon Sep 17 00:00:00 2001 From: Devarsh Thakkar Date: Mon, 3 Feb 2025 21:24:31 +0530 Subject: [PATCH 0183/1585] dt-bindings: display: ti: Fix compatible for am62a7 dss Fix incorrect format of compatible string (comma instead of hyphen) for TI's AM62A7 SoC. s/ti,am62a7,dss/ti,am62a7-dss Fixes: 7959ceb767e4 ("dt-bindings: display: ti: Add support for am62a7 dss") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Devarsh Thakkar Link: https://lore.kernel.org/r/20250203155431.2174170-1-devarsht@ti.com Signed-off-by: Rob Herring (Arm) --- Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml index 55e3e490d0e61..31c4ffcb599cd 100644 --- a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml +++ b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml @@ -23,7 +23,7 @@ properties: compatible: enum: - ti,am625-dss - - ti,am62a7,dss + - ti,am62a7-dss - ti,am65x-dss reg: -- GitLab From a96d3e2beca0e51c8444d0a3b6b3ec484c4c5a8f Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sun, 12 Jan 2025 01:08:11 +0100 Subject: [PATCH 0184/1585] iio: light: apds9306: fix max_scale_nano values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two provided max_scale_nano values must be multiplied by 100 and 10 respectively to achieve nano units. According to the comments: Max scale for apds0306 is 16.326432 → the fractional part is 0.326432, which is 326432000 in NANO. The current value is 3264320. Max scale for apds0306-065 is 14.09721 → the fractional part is 0.09712, which is 97120000 in NANO. The current value is 9712000. Update max_scale_nano initialization to use the right NANO fractional parts. Cc: stable@vger.kernel.org Fixes: 620d1e6c7a3f ("iio: light: Add support for APDS9306 Light Sensor") Signed-off-by: Javier Carrasco Tested-by: subhajit.ghosh@tweaklogic.com Link: https://patch.msgid.link/20250112-apds9306_nano_vals-v1-1-82fb145d0b16@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/apds9306.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/apds9306.c b/drivers/iio/light/apds9306.c index 69a0d609cffc9..5ed7e17f49e76 100644 --- a/drivers/iio/light/apds9306.c +++ b/drivers/iio/light/apds9306.c @@ -108,11 +108,11 @@ static const struct part_id_gts_multiplier apds9306_gts_mul[] = { { .part_id = 0xB1, .max_scale_int = 16, - .max_scale_nano = 3264320, + .max_scale_nano = 326432000, }, { .part_id = 0xB3, .max_scale_int = 14, - .max_scale_nano = 9712000, + .max_scale_nano = 97120000, }, }; -- GitLab From 34d93804199fea23da2645dde35d0feb38a5d445 Mon Sep 17 00:00:00 2001 From: Victor Duicu Date: Fri, 17 Jan 2025 08:53:14 +0200 Subject: [PATCH 0185/1585] iio: adc: pac1921: Move ACPI_FREE() to cover all branches This patch moves ACPI_FREE() in pac1921_match_acpi_device() in order to cover all branches. Reported-by: Andy Shevchenko Fixes: 9fdf1d033316 ("iio: adc: pac1921: Add ACPI support to Microchip pac1921") Acked-by: Matteo Martelli Signed-off-by: Victor Duicu Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250117065314.4431-1-victor.duicu@microchip.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/pac1921.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/pac1921.c b/drivers/iio/adc/pac1921.c index 90f61c47b1c46..63f5182151565 100644 --- a/drivers/iio/adc/pac1921.c +++ b/drivers/iio/adc/pac1921.c @@ -1198,11 +1198,11 @@ static int pac1921_match_acpi_device(struct iio_dev *indio_dev) label = devm_kstrdup(dev, status->package.elements[0].string.pointer, GFP_KERNEL); + ACPI_FREE(status); if (!label) return -ENOMEM; indio_dev->label = label; - ACPI_FREE(status); return 0; } -- GitLab From aa5119c36d19639397d29ef305aa53a5ecd72b27 Mon Sep 17 00:00:00 2001 From: Nayab Sayed Date: Wed, 15 Jan 2025 11:37:04 +0530 Subject: [PATCH 0186/1585] iio: adc: at91-sama5d2_adc: fix sama7g5 realbits value The number of valid bits in SAMA7G5 ADC channel data register are 16. Hence changing the realbits value to 16 Fixes: 840bf6cb983f ("iio: adc: at91-sama5d2_adc: add support for sama7g5 device") Signed-off-by: Nayab Sayed Link: https://patch.msgid.link/20250115-fix-sama7g5-adc-realbits-v2-1-58a6e4087584@microchip.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 68 ++++++++++++++++++------------ 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 8e5aaf15a9215..c3a1dea2aa82e 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -329,7 +329,7 @@ static const struct at91_adc_reg_layout sama7g5_layout = { #define AT91_HWFIFO_MAX_SIZE_STR "128" #define AT91_HWFIFO_MAX_SIZE 128 -#define AT91_SAMA5D2_CHAN_SINGLE(index, num, addr) \ +#define AT91_SAMA_CHAN_SINGLE(index, num, addr, rbits) \ { \ .type = IIO_VOLTAGE, \ .channel = num, \ @@ -337,7 +337,7 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .scan_index = index, \ .scan_type = { \ .sign = 'u', \ - .realbits = 14, \ + .realbits = rbits, \ .storagebits = 16, \ }, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ @@ -350,7 +350,13 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .indexed = 1, \ } -#define AT91_SAMA5D2_CHAN_DIFF(index, num, num2, addr) \ +#define AT91_SAMA5D2_CHAN_SINGLE(index, num, addr) \ + AT91_SAMA_CHAN_SINGLE(index, num, addr, 14) + +#define AT91_SAMA7G5_CHAN_SINGLE(index, num, addr) \ + AT91_SAMA_CHAN_SINGLE(index, num, addr, 16) + +#define AT91_SAMA_CHAN_DIFF(index, num, num2, addr, rbits) \ { \ .type = IIO_VOLTAGE, \ .differential = 1, \ @@ -360,7 +366,7 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .scan_index = index, \ .scan_type = { \ .sign = 's', \ - .realbits = 14, \ + .realbits = rbits, \ .storagebits = 16, \ }, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ @@ -373,6 +379,12 @@ static const struct at91_adc_reg_layout sama7g5_layout = { .indexed = 1, \ } +#define AT91_SAMA5D2_CHAN_DIFF(index, num, num2, addr) \ + AT91_SAMA_CHAN_DIFF(index, num, num2, addr, 14) + +#define AT91_SAMA7G5_CHAN_DIFF(index, num, num2, addr) \ + AT91_SAMA_CHAN_DIFF(index, num, num2, addr, 16) + #define AT91_SAMA5D2_CHAN_TOUCH(num, name, mod) \ { \ .type = IIO_POSITIONRELATIVE, \ @@ -666,30 +678,30 @@ static const struct iio_chan_spec at91_sama5d2_adc_channels[] = { }; static const struct iio_chan_spec at91_sama7g5_adc_channels[] = { - AT91_SAMA5D2_CHAN_SINGLE(0, 0, 0x60), - AT91_SAMA5D2_CHAN_SINGLE(1, 1, 0x64), - AT91_SAMA5D2_CHAN_SINGLE(2, 2, 0x68), - AT91_SAMA5D2_CHAN_SINGLE(3, 3, 0x6c), - AT91_SAMA5D2_CHAN_SINGLE(4, 4, 0x70), - AT91_SAMA5D2_CHAN_SINGLE(5, 5, 0x74), - AT91_SAMA5D2_CHAN_SINGLE(6, 6, 0x78), - AT91_SAMA5D2_CHAN_SINGLE(7, 7, 0x7c), - AT91_SAMA5D2_CHAN_SINGLE(8, 8, 0x80), - AT91_SAMA5D2_CHAN_SINGLE(9, 9, 0x84), - AT91_SAMA5D2_CHAN_SINGLE(10, 10, 0x88), - AT91_SAMA5D2_CHAN_SINGLE(11, 11, 0x8c), - AT91_SAMA5D2_CHAN_SINGLE(12, 12, 0x90), - AT91_SAMA5D2_CHAN_SINGLE(13, 13, 0x94), - AT91_SAMA5D2_CHAN_SINGLE(14, 14, 0x98), - AT91_SAMA5D2_CHAN_SINGLE(15, 15, 0x9c), - AT91_SAMA5D2_CHAN_DIFF(16, 0, 1, 0x60), - AT91_SAMA5D2_CHAN_DIFF(17, 2, 3, 0x68), - AT91_SAMA5D2_CHAN_DIFF(18, 4, 5, 0x70), - AT91_SAMA5D2_CHAN_DIFF(19, 6, 7, 0x78), - AT91_SAMA5D2_CHAN_DIFF(20, 8, 9, 0x80), - AT91_SAMA5D2_CHAN_DIFF(21, 10, 11, 0x88), - AT91_SAMA5D2_CHAN_DIFF(22, 12, 13, 0x90), - AT91_SAMA5D2_CHAN_DIFF(23, 14, 15, 0x98), + AT91_SAMA7G5_CHAN_SINGLE(0, 0, 0x60), + AT91_SAMA7G5_CHAN_SINGLE(1, 1, 0x64), + AT91_SAMA7G5_CHAN_SINGLE(2, 2, 0x68), + AT91_SAMA7G5_CHAN_SINGLE(3, 3, 0x6c), + AT91_SAMA7G5_CHAN_SINGLE(4, 4, 0x70), + AT91_SAMA7G5_CHAN_SINGLE(5, 5, 0x74), + AT91_SAMA7G5_CHAN_SINGLE(6, 6, 0x78), + AT91_SAMA7G5_CHAN_SINGLE(7, 7, 0x7c), + AT91_SAMA7G5_CHAN_SINGLE(8, 8, 0x80), + AT91_SAMA7G5_CHAN_SINGLE(9, 9, 0x84), + AT91_SAMA7G5_CHAN_SINGLE(10, 10, 0x88), + AT91_SAMA7G5_CHAN_SINGLE(11, 11, 0x8c), + AT91_SAMA7G5_CHAN_SINGLE(12, 12, 0x90), + AT91_SAMA7G5_CHAN_SINGLE(13, 13, 0x94), + AT91_SAMA7G5_CHAN_SINGLE(14, 14, 0x98), + AT91_SAMA7G5_CHAN_SINGLE(15, 15, 0x9c), + AT91_SAMA7G5_CHAN_DIFF(16, 0, 1, 0x60), + AT91_SAMA7G5_CHAN_DIFF(17, 2, 3, 0x68), + AT91_SAMA7G5_CHAN_DIFF(18, 4, 5, 0x70), + AT91_SAMA7G5_CHAN_DIFF(19, 6, 7, 0x78), + AT91_SAMA7G5_CHAN_DIFF(20, 8, 9, 0x80), + AT91_SAMA7G5_CHAN_DIFF(21, 10, 11, 0x88), + AT91_SAMA7G5_CHAN_DIFF(22, 12, 13, 0x90), + AT91_SAMA7G5_CHAN_DIFF(23, 14, 15, 0x98), IIO_CHAN_SOFT_TIMESTAMP(24), AT91_SAMA5D2_CHAN_TEMP(AT91_SAMA7G5_ADC_TEMP_CHANNEL, "temp", 0xdc), }; -- GitLab From 5d702aa2a47bbab6231382f9ead5be40a287a53b Mon Sep 17 00:00:00 2001 From: Dheeraj Reddy Jonnalagadda Date: Tue, 7 Jan 2025 18:05:10 +0530 Subject: [PATCH 0187/1585] iio: proximity: Fix use-after-free in hx9023s_send_cfg() Reorder the assignment of fw_size to happen before release_firmware() to avoid accessing the firmware structure after it's been freed. Fixes: e9ed97be4fcc ("iio: proximity: hx9023s: Added firmware file parsing functionality") Closes: https://scan7.scan.coverity.com/#/project-view/52337/11354?selectedIssue=1602791 Signed-off-by: Dheeraj Reddy Jonnalagadda Reviewed-by: David Lechner Link: https://patch.msgid.link/20250107123510.44978-1-dheeraj.linuxdev@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/hx9023s.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/proximity/hx9023s.c b/drivers/iio/proximity/hx9023s.c index e092a935dbac7..5aa8e5a22f326 100644 --- a/drivers/iio/proximity/hx9023s.c +++ b/drivers/iio/proximity/hx9023s.c @@ -1036,12 +1036,13 @@ static int hx9023s_send_cfg(const struct firmware *fw, struct hx9023s_data *data return -ENOMEM; memcpy(bin->data, fw->data, fw->size); - release_firmware(fw); bin->fw_size = fw->size; bin->fw_ver = bin->data[FW_VER_OFFSET]; bin->reg_count = get_unaligned_le16(bin->data + FW_REG_CNT_OFFSET); + release_firmware(fw); + return hx9023s_bin_load(data, bin); } -- GitLab From 4eba4d92906c3814ca3ec65c16af27c46c12342e Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 16 Dec 2024 10:05:53 +0000 Subject: [PATCH 0188/1585] iio: hid-sensor-prox: Split difference from multiple channels When the driver was originally created, it was decided that sampling_frequency and hysteresis would be shared_per_type instead of shared_by_all (even though it is internally shared by all). Eg: in_proximity_raw in_proximity_sampling_frequency When we introduced support for more channels, we continued with shared_by_type which. Eg: in_proximity0_raw in_proximity1_raw in_proximity_sampling_frequency in_attention_raw in_attention_sampling_frequency Ideally we should change to shared_by_all, but it is not an option, because the current naming has been a stablished ABI by now. Luckily we can use separate instead. That will be more consistent: in_proximity0_raw in_proximity0_sampling_frequency in_proximity1_raw in_proximity1_sampling_frequency in_attention_raw in_attention_sampling_frequency Fixes: 596ef5cf654b ("iio: hid-sensor-prox: Add support for more channels") Signed-off-by: Ricardo Ribalda Link: https://patch.msgid.link/20241216-fix-hid-sensor-v2-1-ff8c1959ec4a@chromium.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-prox.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 7ab64f5c623c1..76b76d12b3882 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -49,9 +49,10 @@ static const u32 prox_sensitivity_addresses[] = { #define PROX_CHANNEL(_is_proximity, _channel) \ {\ .type = _is_proximity ? IIO_PROXIMITY : IIO_ATTENTION,\ - .info_mask_separate = _is_proximity ? BIT(IIO_CHAN_INFO_RAW) :\ - BIT(IIO_CHAN_INFO_PROCESSED),\ - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) |\ + .info_mask_separate = \ + (_is_proximity ? BIT(IIO_CHAN_INFO_RAW) :\ + BIT(IIO_CHAN_INFO_PROCESSED)) |\ + BIT(IIO_CHAN_INFO_OFFSET) |\ BIT(IIO_CHAN_INFO_SCALE) |\ BIT(IIO_CHAN_INFO_SAMP_FREQ) |\ BIT(IIO_CHAN_INFO_HYSTERESIS),\ -- GitLab From 21d7241faf406e8aee3ce348451cc362d5db6a02 Mon Sep 17 00:00:00 2001 From: Markus Burri Date: Fri, 24 Jan 2025 16:07:03 +0100 Subject: [PATCH 0189/1585] iio: adc: ad7192: fix channel select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Channel configuration doesn't work as expected. For FIELD_PREP the bit mask is needed and not the bit number. Fixes: 874bbd1219c7 ("iio: adc: ad7192: Use bitfield access macros") Signed-off-by: Markus Burri Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250124150703.97848-1-markus.burri@mt.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index e96a5ae92375d..cfaf8f7e0a07d 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -1084,7 +1084,7 @@ static int ad7192_update_scan_mode(struct iio_dev *indio_dev, const unsigned lon conf &= ~AD7192_CONF_CHAN_MASK; for_each_set_bit(i, scan_mask, 8) - conf |= FIELD_PREP(AD7192_CONF_CHAN_MASK, i); + conf |= FIELD_PREP(AD7192_CONF_CHAN_MASK, BIT(i)); ret = ad_sd_write_reg(&st->sd, AD7192_REG_CONF, 3, conf); if (ret < 0) -- GitLab From e17b9f20da7d2bc1f48878ab2230523b2512d965 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Sat, 25 Jan 2025 17:24:32 +0100 Subject: [PATCH 0190/1585] iio: dac: ad3552r: clear reset status flag Clear reset status flag, to keep error status register clean after reset (ad3552r manual, rev B table 38). Reset error flag was left to 1, so debugging registers, the "Error Status Register" was dirty (0x01). It is important to clear this bit, so if there is any reset event over normal working mode, it is possible to detect it. Fixes: 8f2b54824b28 ("drivers:iio:dac: Add AD3552R driver support") Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250125-wip-bl-ad3552r-clear-reset-v2-1-aa3a27f3ff8c@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index e7206af53af61..7944f5c1d264d 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -410,6 +410,12 @@ static int ad3552r_reset(struct ad3552r_desc *dac) return ret; } + /* Clear reset error flag, see ad3552r manual, rev B table 38. */ + ret = ad3552r_write_reg(dac, AD3552R_REG_ADDR_ERR_STATUS, + AD3552R_MASK_RESET_STATUS); + if (ret) + return ret; + return ad3552r_update_reg_field(dac, AD3552R_REG_ADDR_INTERFACE_CONFIG_A, AD3552R_MASK_ADDR_ASCENSION, -- GitLab From 02ccd7e5d81af4ae20852fc1ad67e7d943fa5778 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 29 Jan 2025 12:03:02 +0100 Subject: [PATCH 0191/1585] dt-bindings: iio: dac: adi-axi-adc: fix ad7606 pwm-names Fix make dt_binding_check warning: DTC [C] Documentation/devicetree/bindings/iio/adc/adi,axi-adc.example.dtb .../adc/adi,axi-adc.example.dtb: adc@0: pwm-names: ['convst1'] is too short from schema $id: http://devicetree.org/schemas/iio/adc/adi,ad7606.yaml# Add "minItems" to pwm-names, it allows to use one single pwm when connected to both adc conversion inputs. Fixes: 7c2357b10490 ("dt-bindings: iio: adc: ad7606: Add iio backend bindings") Signed-off-by: Angelo Dureghello Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250129-wip-bl-ad7606_add_backend_sw_mode-v3-1-c3aec77c0ab7@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml index ab5881d0d017f..52d3f1ce33678 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml @@ -146,6 +146,7 @@ properties: maxItems: 2 pwm-names: + minItems: 1 items: - const: convst1 - const: convst2 -- GitLab From bead181694df16de464ca2392d0cec2cf15fb978 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 29 Jan 2025 12:03:04 +0100 Subject: [PATCH 0192/1585] iio: adc: ad7606: fix wrong scale available Fix wrong scale available list since only one value is returned: ... iio:device1: ad7606b (buffer capable) 8 channels found: voltage0: (input, index: 0, format: le:S16/16>>0) 2 channel-specific attributes found: attr 0: scale value: 0.305176 attr 1: scale_available value: 0.076293 Fix as: voltage0: (input, index: 0, format: le:S16/16>>0) 2 channel-specific attributes found: attr 0: scale value: 0.305176 attr 1: scale_available value: 0.076293 0.152588 0.305176 Fixes: 97c6d857041d ("iio: adc: ad7606: rework scale-available to be static") Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250129-wip-bl-ad7606_add_backend_sw_mode-v3-3-c3aec77c0ab7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index d8e3c7a43678c..d39354afd5394 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -1047,7 +1047,7 @@ static int ad7606_read_avail(struct iio_dev *indio_dev, cs = &st->chan_scales[ch]; *vals = (int *)cs->scale_avail; - *length = cs->num_scales; + *length = cs->num_scales * 2; *type = IIO_VAL_INT_PLUS_MICRO; return IIO_AVAIL_LIST; -- GitLab From 1ddee69108d305bbc059cbf31c0b47626796be77 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 Jan 2025 16:21:45 +0200 Subject: [PATCH 0193/1585] pinctrl: cy8c95x0: Respect IRQ trigger settings from firmware Some of the platforms may connect the INT pin via inversion logic effectively make the triggering to be active-low. Remove explicit trigger flag to respect the settings from firmware. Without this change even idling chip produces spurious interrupts and kernel disables the line in the result: irq 33: nobody cared (try booting with the "irqpoll" option) CPU: 0 UID: 0 PID: 125 Comm: irq/33-i2c-INT3 Not tainted 6.12.0-00236-g8b874ed11dae #64 Hardware name: Intel Corp. QUARK/Galileo, BIOS 0x01000900 01/01/2014 ... handlers: [<86e86bea>] irq_default_primary_handler threaded [] cy8c95x0_irq_handler [pinctrl_cy8c95x0] Disabling IRQ #33 Fixes: e6cbbe42944d ("pinctrl: Add Cypress cy8c95x0 support") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/20250117142304.596106-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-cy8c95x0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index 75100a9fb8e4c..d73004b4a45e7 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -1355,7 +1355,7 @@ static int cy8c95x0_irq_setup(struct cy8c95x0_pinctrl *chip, int irq) ret = devm_request_threaded_irq(chip->dev, irq, NULL, cy8c95x0_irq_handler, - IRQF_ONESHOT | IRQF_SHARED | IRQF_TRIGGER_HIGH, + IRQF_ONESHOT | IRQF_SHARED, dev_name(chip->dev), chip); if (ret) { dev_err(chip->dev, "failed to request irq %d\n", irq); -- GitLab From 902e09c8acde117b00369521f54df817a983d4ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Feb 2025 16:16:09 -0500 Subject: [PATCH 0194/1585] fix braino in "9p: fix ->rename_sem exclusion" ->d_op can bloody well be NULL Fucked-up-by: Al Viro Fixes: 30d61efe118c "9p: fix ->rename_sem exclusion" Signed-off-by: Al Viro --- fs/dcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 903142b324e98..8a605681b26ff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2967,11 +2967,11 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: - if (alias->d_op->d_unalias_trylock && + if (alias->d_op && alias->d_op->d_unalias_trylock && !alias->d_op->d_unalias_trylock(alias)) goto out_err; __d_move(alias, dentry, false); - if (alias->d_op->d_unalias_unlock) + if (alias->d_op && alias->d_op->d_unalias_unlock) alias->d_op->d_unalias_unlock(alias); ret = 0; out_err: -- GitLab From 3a4e7193ec37ee2476ce726589de4495a066b565 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 1 Feb 2025 16:50:24 -0800 Subject: [PATCH 0195/1585] MAINTAINERS: list openvswitch docs under its entry Submissions to the docs seem to not get properly CCed. Acked-by: Ilya Maximets Link: https://patch.msgid.link/20250202005024.964262-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index d1086e53a3176..c7b8c6535a1e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17706,6 +17706,7 @@ L: netdev@vger.kernel.org L: dev@openvswitch.org S: Maintained W: http://openvswitch.org +F: Documentation/networking/openvswitch.rst F: include/uapi/linux/openvswitch.h F: net/openvswitch/ F: tools/testing/selftests/net/openvswitch/ -- GitLab From 4d896b35394144c246daaeb5280a015a630958e7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 1 Feb 2025 17:47:26 -0800 Subject: [PATCH 0196/1585] MAINTAINERS: add Kuniyuki Iwashima to TCP reviewers List Kuniyuki as an official TCP reviewer. Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250202014728.1005003-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c7b8c6535a1e2..48677d61c97bd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16614,6 +16614,7 @@ F: tools/testing/selftests/net/mptcp/ NETWORKING [TCP] M: Eric Dumazet M: Neal Cardwell +R: Kuniyuki Iwashima L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst -- GitLab From ae0585b04ab741b536b0db20c12baf24bf7118d2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 1 Feb 2025 17:47:27 -0800 Subject: [PATCH 0197/1585] MAINTAINERS: add a general entry for BSD sockets Create a MAINTAINERS entry for BSD sockets. List the top 3 reviewers as maintainers. The entry is meant to cover core socket code (of which there isn't much) but also reviews of any new socket families. Reviewed-by: Simon Horman Acked-by: Willem de Bruijn Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250202014728.1005003-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 48677d61c97bd..438d85bb97a22 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16642,6 +16642,22 @@ F: include/net/tls.h F: include/uapi/linux/tls.h F: net/tls/* +NETWORKING [SOCKETS] +M: Eric Dumazet +M: Kuniyuki Iwashima +M: Paolo Abeni +M: Willem de Bruijn +S: Maintained +F: include/linux/sock_diag.h +F: include/linux/socket.h +F: include/linux/sockptr.h +F: include/net/sock.h +F: include/net/sock_reuseport.h +F: include/uapi/linux/socket.h +F: net/core/*sock* +F: net/core/scm.c +F: net/socket.c + NETXEN (1/10) GbE SUPPORT M: Manish Chopra M: Rahul Verma -- GitLab From 8a2e22f665a0b5c212057031e94b75cfdc11a4a6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 1 Feb 2025 17:47:28 -0800 Subject: [PATCH 0198/1585] MAINTAINERS: add entry for UNIX sockets Add a MAINTAINERS entry for UNIX socket, Kuniyuki has been the de-facto maintainer of this code for a while. Reviewed-by: Simon Horman Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250202014728.1005003-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 438d85bb97a22..74b09dad46626 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16658,6 +16658,15 @@ F: net/core/*sock* F: net/core/scm.c F: net/socket.c +NETWORKING [UNIX SOCKETS] +M: Kuniyuki Iwashima +S: Maintained +F: include/net/af_unix.h +F: include/net/netns/unix.h +F: include/uapi/linux/unix_diag.h +F: net/unix/ +F: tools/testing/selftests/net/af_unix/ + NETXEN (1/10) GbE SUPPORT M: Manish Chopra M: Rahul Verma -- GitLab From 45ab5166a82d038c898985b0ad43ead69c1f9573 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Thu, 14 Nov 2024 17:19:47 +0800 Subject: [PATCH 0199/1585] HID: winwing: Add NULL check in winwing_init_led() devm_kasprintf() can return a NULL pointer on failure,but this returned value in winwing_init_led() is not checked. Add NULL check in winwing_init_led(), to handle kernel NULL pointer dereference error. Fixes: 266c990debad ("HID: Add WinWing Orion2 throttle support") Signed-off-by: Charles Han Signed-off-by: Jiri Kosina --- drivers/hid/hid-winwing.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-winwing.c b/drivers/hid/hid-winwing.c index 831b760c66ea7..d4afbbd278079 100644 --- a/drivers/hid/hid-winwing.c +++ b/drivers/hid/hid-winwing.c @@ -106,6 +106,8 @@ static int winwing_init_led(struct hid_device *hdev, "%s::%s", dev_name(&input->dev), info->led_name); + if (!led->cdev.name) + return -ENOMEM; ret = devm_led_classdev_register(&hdev->dev, &led->cdev); if (ret) -- GitLab From 9b8e2220d3a052a690b1d1b23019673e612494c5 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Fri, 15 Nov 2024 14:26:21 +0800 Subject: [PATCH 0200/1585] HID: multitouch: Add NULL check in mt_input_configured devm_kasprintf() can return a NULL pointer on failure,but this returned value in mt_input_configured() is not checked. Add NULL check in mt_input_configured(), to handle kernel NULL pointer dereference error. Fixes: 479439463529 ("HID: multitouch: Correct devm device reference for hidinput input_dev name") Signed-off-by: Charles Han Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 82900857bfd87..e50887a6d22c2 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1679,9 +1679,12 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) break; } - if (suffix) + if (suffix) { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); + if (!hi->input->name) + return -ENOMEM; + } return 0; } -- GitLab From 3d4114a1d34413dfffa0094c2eb7b95e61087abd Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Tue, 28 Jan 2025 09:12:06 +0200 Subject: [PATCH 0201/1585] scsi: ufs: core: Ensure clk_gating.lock is used only after initialization Address a lockdep warning triggered by the use of the clk_gating.lock before it is properly initialized. The warning is as follows: [ 4.388838] INFO: trying to register non-static key. [ 4.395673] The code is fine but needs lockdep annotation, or maybe [ 4.402118] you didn't initialize this object before use? [ 4.407673] turning off the locking correctness validator. [ 4.413334] CPU: 5 UID: 0 PID: 58 Comm: kworker/u32:1 Not tainted 6.12-rc1 #185 [ 4.413343] Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT) [ 4.413362] Call trace: [ 4.413364] show_stack+0x18/0x24 (C) [ 4.413374] dump_stack_lvl+0x90/0xd0 [ 4.413384] dump_stack+0x18/0x24 [ 4.413392] register_lock_class+0x498/0x4a8 [ 4.413400] __lock_acquire+0xb4/0x1b90 [ 4.413406] lock_acquire+0x114/0x310 [ 4.413413] _raw_spin_lock_irqsave+0x60/0x88 [ 4.413423] ufshcd_setup_clocks+0x2c0/0x490 [ 4.413433] ufshcd_init+0x198/0x10ec [ 4.413437] ufshcd_pltfrm_init+0x600/0x7c0 [ 4.413444] ufs_qcom_probe+0x20/0x58 [ 4.413449] platform_probe+0x68/0xd8 [ 4.413459] really_probe+0xbc/0x268 [ 4.413466] __driver_probe_device+0x78/0x12c [ 4.413473] driver_probe_device+0x40/0x11c [ 4.413481] __device_attach_driver+0xb8/0xf8 [ 4.413489] bus_for_each_drv+0x84/0xe4 [ 4.413495] __device_attach+0xfc/0x18c [ 4.413502] device_initial_probe+0x14/0x20 [ 4.413510] bus_probe_device+0xb0/0xb4 [ 4.413517] deferred_probe_work_func+0x8c/0xc8 [ 4.413524] process_scheduled_works+0x250/0x658 [ 4.413534] worker_thread+0x15c/0x2c8 [ 4.413542] kthread+0x134/0x200 [ 4.413550] ret_from_fork+0x10/0x20 To fix this issue, ensure that the spinlock is only used after it has been properly initialized before using it in ufshcd_setup_clocks(). Do that unconditionally as initializing a spinlock is a fast operation. Fixes: 209f4e43b806 ("scsi: ufs: core: Introduce a new clock_gating lock") Reported-by: Dmitry Baryshkov Tested-by: Geert Uytterhoeven Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20250128071207.75494-2-avri.altman@wdc.com Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 356e1be33f534..db20b1d505b79 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2120,8 +2120,6 @@ static void ufshcd_init_clk_gating(struct ufs_hba *hba) INIT_DELAYED_WORK(&hba->clk_gating.gate_work, ufshcd_gate_work); INIT_WORK(&hba->clk_gating.ungate_work, ufshcd_ungate_work); - spin_lock_init(&hba->clk_gating.lock); - hba->clk_gating.clk_gating_workq = alloc_ordered_workqueue( "ufs_clk_gating_%d", WQ_MEM_RECLAIM | WQ_HIGHPRI, hba->host->host_no); @@ -10411,6 +10409,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->irq = irq; hba->vps = &ufs_hba_vps; + /* + * Initialize clk_gating.lock early since it is being used in + * ufshcd_setup_clocks() + */ + spin_lock_init(&hba->clk_gating.lock); + err = ufshcd_hba_init(hba); if (err) goto out_error; -- GitLab From 839a74b5649c9f41d939a05059b5ca6b17156d03 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Tue, 28 Jan 2025 09:12:07 +0200 Subject: [PATCH 0202/1585] scsi: ufs: Fix toggling of clk_gating.state when clock gating is not allowed This commit addresses an issue where clk_gating.state is being toggled in ufshcd_setup_clocks() even if clock gating is not allowed. The fix is to add a check for hba->clk_gating.is_initialized before toggling clk_gating.state in ufshcd_setup_clocks(). Since clk_gating.lock is now initialized unconditionally, it can no longer lead to the spinlock being used before it is properly initialized, but instead it is mostly for documentation purposes. Fixes: 1ab27c9cf8b6 ("ufs: Add support for clock gating") Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20250128071207.75494-3-avri.altman@wdc.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index db20b1d505b79..d3741b1f43821 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -9140,7 +9140,7 @@ static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on) if (!IS_ERR_OR_NULL(clki->clk) && clki->enabled) clk_disable_unprepare(clki->clk); } - } else if (!ret && on) { + } else if (!ret && on && hba->clk_gating.is_initialized) { scoped_guard(spinlock_irqsave, &hba->clk_gating.lock) hba->clk_gating.state = CLKS_ON; trace_ufshcd_clk_gating(dev_name(hba->dev), -- GitLab From 18c966b62819b9d3b99eac8fb8cdc8950826e0c2 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Thu, 23 Jan 2025 09:30:44 +0800 Subject: [PATCH 0203/1585] HID: intel-ish-hid: ipc: Add Panther Lake PCI device IDs Add device IDs of Panther Lake-H and Panther Lake-P into ishtp support list. Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/hw-ish.h | 2 ++ drivers/hid/intel-ish-hid/ipc/pci-ish.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index cdd80c653918b..07e90d51f073c 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -36,6 +36,8 @@ #define PCI_DEVICE_ID_INTEL_ISH_ARL_H 0x7745 #define PCI_DEVICE_ID_INTEL_ISH_ARL_S 0x7F78 #define PCI_DEVICE_ID_INTEL_ISH_LNL_M 0xA845 +#define PCI_DEVICE_ID_INTEL_ISH_PTL_H 0xE345 +#define PCI_DEVICE_ID_INTEL_ISH_PTL_P 0xE445 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 9e2401291a2f6..ff0fc80100728 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -26,9 +26,11 @@ enum ishtp_driver_data_index { ISHTP_DRIVER_DATA_NONE, ISHTP_DRIVER_DATA_LNL_M, + ISHTP_DRIVER_DATA_PTL, }; #define ISH_FW_GEN_LNL_M "lnlm" +#define ISH_FW_GEN_PTL "ptl" #define ISH_FIRMWARE_PATH(gen) "intel/ish/ish_" gen ".bin" #define ISH_FIRMWARE_PATH_ALL "intel/ish/ish_*.bin" @@ -37,6 +39,9 @@ static struct ishtp_driver_data ishtp_driver_data[] = { [ISHTP_DRIVER_DATA_LNL_M] = { .fw_generation = ISH_FW_GEN_LNL_M, }, + [ISHTP_DRIVER_DATA_PTL] = { + .fw_generation = ISH_FW_GEN_PTL, + }, }; static const struct pci_device_id ish_pci_tbl[] = { @@ -63,6 +68,8 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_H)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_S)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_LNL_M), .driver_data = ISHTP_DRIVER_DATA_LNL_M}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_H), .driver_data = ISHTP_DRIVER_DATA_PTL}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_P), .driver_data = ISHTP_DRIVER_DATA_PTL}, {} }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); -- GitLab From 9271af9d846c7e49c8709b58d5853cb73c00b193 Mon Sep 17 00:00:00 2001 From: Daniel Brackenbury Date: Tue, 28 Jan 2025 20:08:49 -0500 Subject: [PATCH 0204/1585] HID: topre: Fix n-key rollover on Realforce R3S TKL boards Newer model R3* Topre Realforce keyboards share an issue with their older R2 cousins where a report descriptor fixup is needed in order for n-key rollover to work correctly, otherwise only 6-key rollover is available. This patch adds some new hardware IDs for the R3S 87-key keyboard and makes amendments to the existing hid-topre driver in order to change the correct byte in the new model. Signed-off-by: Daniel Brackenbury Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 3 ++- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-topre.c | 7 +++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index ed657ef7281c8..dfc245867a46a 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -1169,7 +1169,8 @@ config HID_TOPRE tristate "Topre REALFORCE keyboards" depends on HID help - Say Y for N-key rollover support on Topre REALFORCE R2 108/87 key keyboards. + Say Y for N-key rollover support on Topre REALFORCE R2 108/87 key and + Topre REALFORCE R3S 87 key keyboards. config HID_THINGM tristate "ThingM blink(1) USB RGB LED" diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 7debfe0c5cb98..ed1d7f9e8caf4 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1302,6 +1302,7 @@ #define USB_VENDOR_ID_TOPRE 0x0853 #define USB_DEVICE_ID_TOPRE_REALFORCE_R2_108 0x0148 #define USB_DEVICE_ID_TOPRE_REALFORCE_R2_87 0x0146 +#define USB_DEVICE_ID_TOPRE_REALFORCE_R3S_87 0x0313 #define USB_VENDOR_ID_TOPSEED 0x0766 #define USB_DEVICE_ID_TOPSEED_CYBERLINK 0x0204 diff --git a/drivers/hid/hid-topre.c b/drivers/hid/hid-topre.c index 848361f6225df..ccedf8721722e 100644 --- a/drivers/hid/hid-topre.c +++ b/drivers/hid/hid-topre.c @@ -29,6 +29,11 @@ static const __u8 *topre_report_fixup(struct hid_device *hdev, __u8 *rdesc, hid_info(hdev, "fixing up Topre REALFORCE keyboard report descriptor\n"); rdesc[72] = 0x02; + } else if (*rsize >= 106 && rdesc[28] == 0x29 && rdesc[29] == 0xe7 && + rdesc[30] == 0x81 && rdesc[31] == 0x00) { + hid_info(hdev, + "fixing up Topre REALFORCE keyboard report descriptor\n"); + rdesc[31] = 0x02; } return rdesc; } @@ -38,6 +43,8 @@ static const struct hid_device_id topre_id_table[] = { USB_DEVICE_ID_TOPRE_REALFORCE_R2_108) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, USB_DEVICE_ID_TOPRE_REALFORCE_R2_87) }, + { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, + USB_DEVICE_ID_TOPRE_REALFORCE_R3S_87) }, { } }; MODULE_DEVICE_TABLE(hid, topre_id_table); -- GitLab From 5363ee9d110e139584c2d92a0b640bc210588506 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 28 Jan 2025 16:35:39 -0500 Subject: [PATCH 0205/1585] scsi: core: Use GFP_NOIO to avoid circular locking dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Filesystems can write to disk from page reclaim with __GFP_FS set. Marc found a case where scsi_realloc_sdev_budget_map() ends up in page reclaim with GFP_KERNEL, where it could try to take filesystem locks again, leading to a deadlock. WARNING: possible circular locking dependency detected 6.13.0 #1 Not tainted ------------------------------------------------------ kswapd0/70 is trying to acquire lock: ffff8881025d5d78 (&q->q_usage_counter(io)){++++}-{0:0}, at: blk_mq_submit_bio+0x461/0x6e0 but task is already holding lock: ffffffff81ef5f40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x9f/0x760 The full lockdep splat can be found in Marc's report: https://lkml.org/lkml/2025/1/24/1101 Avoid the potential deadlock by doing the allocation with GFP_NOIO, which prevents both filesystem and block layer recursion. Reported-by: Marc Aurèle La France Signed-off-by: Rik van Riel Link: https://lore.kernel.org/r/20250129104525.0ae8421e@fangorn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 087fcbfc9aaa3..96d7e1a9a7c7a 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -246,7 +246,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, } ret = sbitmap_init_node(&sdev->budget_map, scsi_device_max_queue_depth(sdev), - new_shift, GFP_KERNEL, + new_shift, GFP_NOIO, sdev->request_queue->node, false, true); if (!ret) sbitmap_resize(&sdev->budget_map, depth); -- GitLab From 9ff7c383b8ac0c482a1da7989f703406d78445c6 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Fri, 31 Jan 2025 10:44:07 -0800 Subject: [PATCH 0206/1585] scsi: core: Do not retry I/Os during depopulation Fail I/Os instead of retry to prevent user space processes from being blocked on the I/O completion for several minutes. Retrying I/Os during "depopulation in progress" or "depopulation restore in progress" results in a continuous retry loop until the depopulation completes or until the I/O retry loop is aborted due to a timeout by the scsi_cmd_runtime_exceeced(). Depopulation is slow and can take 24+ hours to complete on 20+ TB HDDs. Most I/Os in the depopulation retry loop end up taking several minutes before returning the failure to user space. Cc: stable@vger.kernel.org # 4.18.x: 2bbeb8d scsi: core: Handle depopulation and restoration in progress Cc: stable@vger.kernel.org # 4.18.x Fixes: e37c7d9a0341 ("scsi: core: sanitize++ in progress") Signed-off-by: Igor Pylypiv Link: https://lore.kernel.org/r/20250131184408.859579-1-ipylypiv@google.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d776f13cd160b..be0890e4e7062 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -872,13 +872,18 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) case 0x1a: /* start stop unit in progress */ case 0x1b: /* sanitize in progress */ case 0x1d: /* configuration in progress */ - case 0x24: /* depopulation in progress */ - case 0x25: /* depopulation restore in progress */ action = ACTION_DELAYED_RETRY; break; case 0x0a: /* ALUA state transition */ action = ACTION_DELAYED_REPREP; break; + /* + * Depopulation might take many hours, + * thus it is not worthwhile to retry. + */ + case 0x24: /* depopulation in progress */ + case 0x25: /* depopulation restore in progress */ + fallthrough; default: action = ACTION_FAIL; break; -- GitLab From f8fb2403ddebb5eea0033d90d9daae4c88749ada Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 24 Jan 2025 15:09:00 +0000 Subject: [PATCH 0207/1585] scsi: ufs: core: Fix use-after free in init error and remove paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_blk_crypto_profile_init() registers a cleanup handler to run when the associated (platform-) device is being released. For UFS, the crypto private data and pointers are stored as part of the ufs_hba's data structure 'struct ufs_hba::crypto_profile'. This structure is allocated as part of the underlying ufshcd and therefore Scsi_host allocation. During driver release or during error handling in ufshcd_pltfrm_init(), this structure is released as part of ufshcd_dealloc_host() before the (platform-) device associated with the crypto call above is released. Once this device is released, the crypto cleanup code will run, using the just-released 'struct ufs_hba::crypto_profile'. This causes a use-after-free situation: Call trace: kfree+0x60/0x2d8 (P) kvfree+0x44/0x60 blk_crypto_profile_destroy_callback+0x28/0x70 devm_action_release+0x1c/0x30 release_nodes+0x6c/0x108 devres_release_all+0x98/0x100 device_unbind_cleanup+0x20/0x70 really_probe+0x218/0x2d0 In other words, the initialisation code flow is: platform-device probe ufshcd_pltfrm_init() ufshcd_alloc_host() scsi_host_alloc() allocation of struct ufs_hba creation of scsi-host devices devm_blk_crypto_profile_init() devm registration of cleanup handler using platform-device and during error handling of ufshcd_pltfrm_init() or during driver removal: ufshcd_dealloc_host() scsi_host_put() put_device(scsi-host) release of struct ufs_hba put_device(platform-device) crypto cleanup handler To fix this use-after free, change ufshcd_alloc_host() to register a devres action to automatically cleanup the underlying SCSI device on ufshcd destruction, without requiring explicit calls to ufshcd_dealloc_host(). This way: * the crypto profile and all other ufs_hba-owned resources are destroyed before SCSI (as they've been registered after) * a memleak is plugged in tc-dwc-g210-pci.c remove() as a side-effect * EXPORT_SYMBOL_GPL(ufshcd_dealloc_host) can be removed fully as it's not needed anymore * no future drivers using ufshcd_alloc_host() could ever forget adding the cleanup Fixes: cb77cb5abe1f ("blk-crypto: rename blk_keyslot_manager to blk_crypto_profile") Fixes: d76d9d7d1009 ("scsi: ufs: use devm_blk_ksm_init()") Cc: stable@vger.kernel.org Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250124-ufshcd-fix-v4-1-c5d0144aae59@linaro.org Reviewed-by: Bean Huo Reviewed-by: Manivannan Sadhasivam Acked-by: Eric Biggers Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 31 +++++++++++++++++++++---------- drivers/ufs/host/ufshcd-pci.c | 2 -- drivers/ufs/host/ufshcd-pltfrm.c | 28 +++++++++------------------- include/ufs/ufshcd.h | 1 - 4 files changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index d3741b1f43821..d2de80b2bba46 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10226,16 +10226,6 @@ int ufshcd_system_thaw(struct device *dev) EXPORT_SYMBOL_GPL(ufshcd_system_thaw); #endif /* CONFIG_PM_SLEEP */ -/** - * ufshcd_dealloc_host - deallocate Host Bus Adapter (HBA) - * @hba: pointer to Host Bus Adapter (HBA) - */ -void ufshcd_dealloc_host(struct ufs_hba *hba) -{ - scsi_host_put(hba->host); -} -EXPORT_SYMBOL_GPL(ufshcd_dealloc_host); - /** * ufshcd_set_dma_mask - Set dma mask based on the controller * addressing capability @@ -10254,12 +10244,26 @@ static int ufshcd_set_dma_mask(struct ufs_hba *hba) return dma_set_mask_and_coherent(hba->dev, DMA_BIT_MASK(32)); } +/** + * ufshcd_devres_release - devres cleanup handler, invoked during release of + * hba->dev + * @host: pointer to SCSI host + */ +static void ufshcd_devres_release(void *host) +{ + scsi_host_put(host); +} + /** * ufshcd_alloc_host - allocate Host Bus Adapter (HBA) * @dev: pointer to device handle * @hba_handle: driver private handle * * Return: 0 on success, non-zero value on failure. + * + * NOTE: There is no corresponding ufshcd_dealloc_host() because this function + * keeps track of its allocations using devres and deallocates everything on + * device removal automatically. */ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) { @@ -10281,6 +10285,13 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) err = -ENOMEM; goto out_error; } + + err = devm_add_action_or_reset(dev, ufshcd_devres_release, + host); + if (err) + return dev_err_probe(dev, err, + "failed to add ufshcd dealloc action\n"); + host->nr_maps = HCTX_TYPE_POLL + 1; hba = shost_priv(host); hba->host = host; diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index ea39c5d5b8cf1..9cfcaad23cf92 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -562,7 +562,6 @@ static void ufshcd_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(&pdev->dev); pm_runtime_get_noresume(&pdev->dev); ufshcd_remove(hba); - ufshcd_dealloc_host(hba); } /** @@ -605,7 +604,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = ufshcd_init(hba, mmio_base, pdev->irq); if (err) { dev_err(&pdev->dev, "Initialization failed\n"); - ufshcd_dealloc_host(hba); return err; } diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c index 505572d4fa878..ffe5d1d2b2158 100644 --- a/drivers/ufs/host/ufshcd-pltfrm.c +++ b/drivers/ufs/host/ufshcd-pltfrm.c @@ -465,21 +465,17 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, struct device *dev = &pdev->dev; mmio_base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(mmio_base)) { - err = PTR_ERR(mmio_base); - goto out; - } + if (IS_ERR(mmio_base)) + return PTR_ERR(mmio_base); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - err = irq; - goto out; - } + if (irq < 0) + return irq; err = ufshcd_alloc_host(dev, &hba); if (err) { dev_err(dev, "Allocation failed\n"); - goto out; + return err; } hba->vops = vops; @@ -488,13 +484,13 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, if (err) { dev_err(dev, "%s: clock parse failed %d\n", __func__, err); - goto dealloc_host; + return err; } err = ufshcd_parse_regulator_info(hba); if (err) { dev_err(dev, "%s: regulator init failed %d\n", __func__, err); - goto dealloc_host; + return err; } ufshcd_init_lanes_per_dir(hba); @@ -502,25 +498,20 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, err = ufshcd_parse_operating_points(hba); if (err) { dev_err(dev, "%s: OPP parse failed %d\n", __func__, err); - goto dealloc_host; + return err; } err = ufshcd_init(hba, mmio_base, irq); if (err) { dev_err_probe(dev, err, "Initialization failed with error %d\n", err); - goto dealloc_host; + return err; } pm_runtime_set_active(dev); pm_runtime_enable(dev); return 0; - -dealloc_host: - ufshcd_dealloc_host(hba); -out: - return err; } EXPORT_SYMBOL_GPL(ufshcd_pltfrm_init); @@ -534,7 +525,6 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); ufshcd_remove(hba); - ufshcd_dealloc_host(hba); pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); } diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 650ff238cd74e..8bf31e6ca4e51 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1309,7 +1309,6 @@ static inline void ufshcd_rmwl(struct ufs_hba *hba, u32 mask, u32 val, u32 reg) void ufshcd_enable_irq(struct ufs_hba *hba); void ufshcd_disable_irq(struct ufs_hba *hba); int ufshcd_alloc_host(struct device *, struct ufs_hba **); -void ufshcd_dealloc_host(struct ufs_hba *); int ufshcd_hba_enable(struct ufs_hba *hba); int ufshcd_init(struct ufs_hba *, void __iomem *, unsigned int); int ufshcd_link_recovery(struct ufs_hba *hba); -- GitLab From 87c4b5e8a6b65189abd9ea5010ab308941f964a4 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Jan 2025 19:07:22 -0800 Subject: [PATCH 0208/1585] scsi: storvsc: Set correct data length for sending SCSI command without payload In StorVSC, payload->range.len is used to indicate if this SCSI command carries payload. This data is allocated as part of the private driver data by the upper layer and may get passed to lower driver uninitialized. For example, the SCSI error handling mid layer may send TEST_UNIT_READY or REQUEST_SENSE while reusing the buffer from a failed command. The private data section may have stale data from the previous command. If the SCSI command doesn't carry payload, the driver may use this value as is for communicating with host, resulting in possible corruption. Fix this by always initializing this value. Fixes: be0cf6ca301c ("scsi: storvsc: Set the tablesize based on the information given by the host") Cc: stable@kernel.org Tested-by: Roman Kisel Reviewed-by: Roman Kisel Reviewed-by: Michael Kelley Signed-off-by: Long Li Link: https://lore.kernel.org/r/1737601642-7759-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 5a101ac06c478..a8614e54544e5 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1800,6 +1800,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) length = scsi_bufflen(scmnd); payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb; + payload->range.len = 0; payload_sz = 0; if (scsi_sg_count(scmnd)) { -- GitLab From 1a78a56ea65252bb089e0daace989167227f2d31 Mon Sep 17 00:00:00 2001 From: Seunghui Lee Date: Sat, 18 Jan 2025 11:38:08 +0900 Subject: [PATCH 0209/1585] scsi: ufs: core: Fix error return with query response There is currently no mechanism to return error from query responses. Return the error and print the corresponding error message with it. Signed-off-by: Seunghui Lee Link: https://lore.kernel.org/r/20250118023808.24726-1-sh043.lee@samsung.com Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index d2de80b2bba46..1893a7ad95316 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3104,8 +3104,13 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) case UPIU_TRANSACTION_QUERY_RSP: { u8 response = lrbp->ucd_rsp_ptr->header.response; - if (response == 0) + if (response == 0) { err = ufshcd_copy_query_response(hba, lrbp); + } else { + err = -EINVAL; + dev_err(hba->dev, "%s: unexpected response in Query RSP: %x\n", + __func__, response); + } break; } case UPIU_TRANSACTION_REJECT_UPIU: -- GitLab From 5233e3235dec3065ccc632729675575dbe3c6b8a Mon Sep 17 00:00:00 2001 From: Magnus Lindholm Date: Sat, 25 Jan 2025 10:49:22 +0100 Subject: [PATCH 0210/1585] scsi: qla1280: Fix kernel oops when debug level > 2 A null dereference or oops exception will eventually occur when qla1280.c driver is compiled with DEBUG_QLA1280 enabled and ql_debug_level > 2. I think its clear from the code that the intention here is sg_dma_len(s) not length of sg_next(s) when printing the debug info. Signed-off-by: Magnus Lindholm Link: https://lore.kernel.org/r/20250125095033.26188-1-linmag7@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla1280.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 1fd2da0264e38..47d74f881948f 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -2867,7 +2867,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) dprintk(3, "S/G Segment phys_addr=%x %x, len=0x%x\n", cpu_to_le32(upper_32_bits(dma_handle)), cpu_to_le32(lower_32_bits(dma_handle)), - cpu_to_le32(sg_dma_len(sg_next(s)))); + cpu_to_le32(sg_dma_len(s))); remseg--; } dprintk(5, "qla1280_64bit_start_scsi: Scatter/gather " -- GitLab From 1b0332a42656b798bea867631d739de023633ec6 Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Thu, 30 Jan 2025 22:48:49 +0800 Subject: [PATCH 0211/1585] kthread: Fix return value on kzalloc() failure in kthread_affine_preferred() kthread_affine_preferred() incorrectly returns 0 instead of -ENOMEM when kzalloc() fails. Return 'ret' to ensure the correct error code is propagated. Fixes: 4d13f4304fa4 ("kthread: Implement preferred affinity") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501301528.t0cZVbnq-lkp@intel.com/ Signed-off-by: Yu-Chun Lin Signed-off-by: Frederic Weisbecker --- kernel/kthread.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 4005b13ebd7ff..5dc5b0d7238e8 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -859,7 +859,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) struct kthread *kthread = to_kthread(p); cpumask_var_t affinity; unsigned long flags; - int ret; + int ret = 0; if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) { WARN_ON(1); @@ -892,7 +892,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) out: free_cpumask_var(affinity); - return 0; + return ret; } /* -- GitLab From 244f8aa46fa9e2f4ea5fe0e04988b395d5e30fc7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 31 Jan 2025 17:30:37 -0800 Subject: [PATCH 0212/1585] ethtool: rss: fix hiding unsupported fields in dumps Commit ec6e57beaf8b ("ethtool: rss: don't report key if device doesn't support it") intended to stop reporting key fields for additional rss contexts if device has a global hashing key. Later we added dump support and the filtering wasn't properly added there. So we end up reporting the key fields in dumps but not in dos: # ./pyynl/cli.py --spec netlink/specs/ethtool.yaml --do rss-get \ --json '{"header": {"dev-index":2}, "context": 1 }' { "header": { ... }, "context": 1, "indir": [0, 1, 2, 3, ...]] } # ./pyynl/cli.py --spec netlink/specs/ethtool.yaml --dump rss-get [ ... snip context 0 ... { "header": { ... }, "context": 1, "indir": [0, 1, 2, 3, ...], -> "input_xfrm": 255, -> "hfunc": 1, -> "hkey": "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" } ] Hide these fields correctly. The drivers/net/hw/rss_ctx.py selftest catches this when run on a device with single key, already: # Check| At /root/./ksft-net-drv/drivers/net/hw/rss_ctx.py, line 381, in test_rss_context_dump: # Check| ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero") # Check failed {0} == {0} key is all zero not ok 8 rss_ctx.test_rss_context_dump Fixes: f6122900f4e2 ("ethtool: rss: support dumping RSS contexts") Reviewed-by: Gal Pressman Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250201013040.725123-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/rss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 7cb106b590aba..58df9ad02ce8a 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, u32 total_size, indir_bytes; u8 *rss_config; + data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; + ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); if (!ctx) return -ENOENT; @@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base, if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) return -EOPNOTSUPP; - data->no_key_fields = !ops->rxfh_per_ctx_key; return rss_prepare_ctx(request, dev, data, info); } -- GitLab From 2b91cc1214b165c25ac9b0885db89a0d3224028a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 31 Jan 2025 17:30:38 -0800 Subject: [PATCH 0213/1585] ethtool: ntuple: fix rss + ring_cookie check The info.flow_type is for RXFH commands, ntuple flow_type is inside the flow spec. The check currently does nothing, as info.flow_type is 0 (or even uninitialized by user space) for ETHTOOL_SRXCLSRLINS. Fixes: 9e43ad7a1ede ("net: ethtool: only allow set_rxnfc with rss + ring_cookie if driver opts in") Reviewed-by: Gal Pressman Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250201013040.725123-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 34bee42e12470..7609ce2b2c5e2 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -993,7 +993,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS && + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds && ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; -- GitLab From de379dfd9ada2995699052f4a1ecebe5d8f8d70f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 31 Jan 2025 17:30:39 -0800 Subject: [PATCH 0214/1585] selftests: drv-net: rss_ctx: add missing cleanup in queue reconfigure Commit under Fixes adds ntuple rules but never deletes them. Fixes: 29a4bc1fe961 ("selftest: extend test_rss_context_queue_reconfigure for action addition") Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250201013040.725123-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_ctx.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ca8a7edff3dda..27e24e20749ff 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -252,6 +252,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): try: # this targets queue 4, which doesn't exist ntuple2 = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") except CmdExitFailure: pass else: @@ -260,6 +261,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") # ntuple rule therefore targets queues 1 and 3 ntuple2 = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") # should replace existing filter ksft_eq(ntuple, ntuple2) _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3), -- GitLab From c3da585509aeb8476886adf75a266c81a9b0df6c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 31 Jan 2025 17:30:40 -0800 Subject: [PATCH 0215/1585] selftests: drv-net: rss_ctx: don't fail reconfigure test if queue offset not supported Vast majority of drivers does not support queue offset. Simply return if the rss context + queue ntuple fails. Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250201013040.725123-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_ctx.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 27e24e20749ff..319aaa004c407 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -260,7 +260,12 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): # change the table to target queues 0 and 2 ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") # ntuple rule therefore targets queues 1 and 3 - ntuple2 = ethtool_create(cfg, "-N", flow) + try: + ntuple2 = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + ksft_pr("Driver does not support rss + queue offset") + return + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") # should replace existing filter ksft_eq(ntuple, ntuple2) -- GitLab From d3ed6dee73c560fad0a8e152c8e233b3fb3a2e44 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 1 Feb 2025 19:02:51 +0100 Subject: [PATCH 0216/1585] net: harmonize tstats and dstats After the blamed commits below, some UDP tunnel use dstats for accounting. On the xmit path, all the UDP-base tunnels ends up using iptunnel_xmit_stats() for stats accounting, and the latter assumes the relevant (tunnel) network device uses tstats. The end result is some 'funny' stat report for the mentioned UDP tunnel, e.g. when no packet is actually dropped and a bunch of packets are transmitted: gnv2: mtu 1450 qdisc noqueue \ state UNKNOWN mode DEFAULT group default qlen 1000 link/ether ee:7d:09:87:90:ea brd ff:ff:ff:ff:ff:ff RX: bytes packets errors dropped missed mcast 14916 23 0 15 0 0 TX: bytes packets errors dropped carrier collsns 0 1566 0 0 0 0 Address the issue ensuring the same binary layout for the overlapping fields of dstats and tstats. While this solution is a bit hackish, is smaller and with no performance pitfall compared to other alternatives i.e. supporting both dstat and tstat in iptunnel_xmit_stats() or reverting the blamed commit. With time we should possibly move all the IP-based tunnel (and virtual devices) to dstats. Fixes: c77200c07491 ("bareudp: Handle stats using NETDEV_PCPU_STAT_DSTATS.") Fixes: 6fa6de302246 ("geneve: Handle stats using NETDEV_PCPU_STAT_DSTATS.") Fixes: be226352e8dc ("vxlan: Handle stats using NETDEV_PCPU_STAT_DSTATS.") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/2e1c444cf0f63ae472baff29862c4c869be17031.1738432804.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- net/core/dev.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2a59034a5fa2f..03bb584c62cf8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2904,9 +2904,9 @@ struct pcpu_sw_netstats { struct pcpu_dstats { u64_stats_t rx_packets; u64_stats_t rx_bytes; - u64_stats_t rx_drops; u64_stats_t tx_packets; u64_stats_t tx_bytes; + u64_stats_t rx_drops; u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); diff --git a/net/core/dev.c b/net/core/dev.c index c0021cbd28fc1..b91658e8aedb4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11286,6 +11286,20 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; const struct net_device_core_stats __percpu *p; + /* + * IPv{4,6} and udp tunnels share common stat helpers and use + * different stat type (NETDEV_PCPU_STAT_TSTATS vs + * NETDEV_PCPU_STAT_DSTATS). Ensure the accounting is consistent. + */ + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_bytes) != + offsetof(struct pcpu_dstats, rx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_packets) != + offsetof(struct pcpu_dstats, rx_packets)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_bytes) != + offsetof(struct pcpu_dstats, tx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_packets) != + offsetof(struct pcpu_dstats, tx_packets)); + if (ops->ndo_get_stats64) { memset(storage, 0, sizeof(*storage)); ops->ndo_get_stats64(dev, storage); -- GitLab From fcf5d353b09b3fc212ab24b89ef23a7a8f7b308e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Jan 2025 07:52:44 +0100 Subject: [PATCH 0217/1585] phy: rockchip: fix Kconfig dependency more A previous patch ensured that USB Type C connector support is enabled, but it is still possible to build the phy driver without enabling CONFIG_USB (host support) or CONFIG_USB_GADGET (device support), and in that case the common helper functions are unavailable: aarch64-linux-ld: drivers/phy/rockchip/phy-rockchip-usbdp.o: in function `rk_udphy_probe': phy-rockchip-usbdp.c:(.text+0xe74): undefined reference to `usb_get_maximum_speed' Select CONFIG_USB_COMMON directly here, like we do in some other phy drivers, to make sure this is available even when actual USB support is disabled or in a loadable module that cannot be reached from a built-in phy driver. Fixes: 9c79b779643e ("phy: rockchip: fix CONFIG_TYPEC dependency") Signed-off-by: Arnd Bergmann Reviewed-by: Sebastian Reichel Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250122065249.1390081-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/rockchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig index 2f7a05f21dc59..dcb8e1628632e 100644 --- a/drivers/phy/rockchip/Kconfig +++ b/drivers/phy/rockchip/Kconfig @@ -125,6 +125,7 @@ config PHY_ROCKCHIP_USBDP depends on ARCH_ROCKCHIP && OF depends on TYPEC select GENERIC_PHY + select USB_COMMON help Enable this to support the Rockchip USB3.0/DP combo PHY with Samsung IP block. This is required for USB3 support on RK3588. -- GitLab From 3126ea9be66b53e607f87f067641ba724be24181 Mon Sep 17 00:00:00 2001 From: Chukun Pan Date: Mon, 6 Jan 2025 18:00:01 +0800 Subject: [PATCH 0218/1585] phy: rockchip: naneng-combphy: compatible reset with old DT The device tree of RK3568 did not specify reset-names before. So add fallback to old behaviour to be compatible with old DT. Fixes: fbcbffbac994 ("phy: rockchip: naneng-combphy: fix phy reset") Cc: Jianfeng Liu Signed-off-by: Chukun Pan Reviewed-by: Jonas Karlman Link: https://lore.kernel.org/r/20250106100001.1344418-2-amadeus@jmu.edu.cn Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-naneng-combphy.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c index a1532ef8bbe9d..8c3ce57f89151 100644 --- a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c +++ b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c @@ -324,7 +324,10 @@ static int rockchip_combphy_parse_dt(struct device *dev, struct rockchip_combphy priv->ext_refclk = device_property_present(dev, "rockchip,ext-refclk"); - priv->phy_rst = devm_reset_control_get(dev, "phy"); + priv->phy_rst = devm_reset_control_get_exclusive(dev, "phy"); + /* fallback to old behaviour */ + if (PTR_ERR(priv->phy_rst) == -ENOENT) + priv->phy_rst = devm_reset_control_array_get_exclusive(dev); if (IS_ERR(priv->phy_rst)) return dev_err_probe(dev, PTR_ERR(priv->phy_rst), "failed to get phy reset\n"); -- GitLab From a787ab73e2e43c0a3df10bc8d9b9b7a679129d49 Mon Sep 17 00:00:00 2001 From: Jithu Joseph Date: Fri, 31 Jan 2025 12:53:15 -0800 Subject: [PATCH 0219/1585] platform/x86/intel/ifs: Update documentation with image download path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation previously listed the path to download In Field Scan (IFS) test images as "TBD". Update the documentation to include the correct image download location. Also move the download link to the appropriate section within the documentation. Reported-by: Anisse Astier Signed-off-by: Jithu Joseph Link: https://lore.kernel.org/r/20250131205315.1585663-1-jithu.joseph@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/ifs/ifs.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h index 5c3c0dfa1bf83..f369fb0d3d82f 100644 --- a/drivers/platform/x86/intel/ifs/ifs.h +++ b/drivers/platform/x86/intel/ifs/ifs.h @@ -23,12 +23,14 @@ * IFS Image * --------- * - * Intel provides a firmware file containing the scan tests via - * github [#f1]_. Similar to microcode there is a separate file for each + * Intel provides firmware files containing the scan tests via the webpage [#f1]_. + * Look under "In-Field Scan Test Images Download" section towards the + * end of the page. Similar to microcode, there are separate files for each * family-model-stepping. IFS Images are not applicable for some test types. * Wherever applicable the sysfs directory would provide a "current_batch" file * (see below) for loading the image. * + * .. [#f1] https://intel.com/InFieldScan * * IFS Image Loading * ----------------- @@ -125,9 +127,6 @@ * 2) Hardware allows for some number of cores to be tested in parallel. * The driver does not make use of this, it only tests one core at a time. * - * .. [#f1] https://github.com/intel/TBD - * - * * Structural Based Functional Test at Field (SBAF): * ------------------------------------------------- * -- GitLab From 1739cafdb8decad538410b05a4640055408826de Mon Sep 17 00:00:00 2001 From: Jinghao Jia Date: Mon, 3 Feb 2025 02:55:04 -0600 Subject: [PATCH 0220/1585] samples/hid: remove unnecessary -I flags from libbpf EXTRA_CFLAGS Commit 5a6ea7022ff4 ("samples/bpf: Remove unnecessary -I flags from libbpf EXTRA_CFLAGS") fixed the build error caused by redundant include path for samples/bpf, but not samples/hid. Apply the same fix on samples/hid as well. Fixes: 13b25489b6f8 ("kbuild: change working directory to external module directory with M=") Tested-by: Ruowen Qin Signed-off-by: Jinghao Jia Link: https://patch.msgid.link/20250203085506.220297-2-jinghao7@illinois.edu Signed-off-by: Benjamin Tissoires --- samples/hid/Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/samples/hid/Makefile b/samples/hid/Makefile index 8ea59e9631a33..69159c81d0457 100644 --- a/samples/hid/Makefile +++ b/samples/hid/Makefile @@ -40,16 +40,17 @@ BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic endif endif -TPROGS_CFLAGS += -Wall -O2 -TPROGS_CFLAGS += -Wmissing-prototypes -TPROGS_CFLAGS += -Wstrict-prototypes +COMMON_CFLAGS += -Wall -O2 +COMMON_CFLAGS += -Wmissing-prototypes +COMMON_CFLAGS += -Wstrict-prototypes +TPROGS_CFLAGS += $(COMMON_CFLAGS) TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) TPROGS_CFLAGS += -I$(srctree)/tools/include ifdef SYSROOT -TPROGS_CFLAGS += --sysroot=$(SYSROOT) +COMMON_CFLAGS += --sysroot=$(SYSROOT) TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib endif @@ -112,7 +113,7 @@ clean: $(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like - $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ + $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(HID_SAMPLES_PATH)/../../ \ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ $@ install_headers -- GitLab From 8b125949df58a00e8797c6e6d3f3d3dc08f4d939 Mon Sep 17 00:00:00 2001 From: Jinghao Jia Date: Mon, 3 Feb 2025 02:55:06 -0600 Subject: [PATCH 0221/1585] samples/hid: fix broken vmlinux path for VMLINUX_BTF Commit 13b25489b6f8 ("kbuild: change working directory to external module directory with M=") changed kbuild working directory of hid-bpf sample programs to samples/hid, which broke the vmlinux path for VMLINUX_BTF, as the Makefiles assume the current work directory to be the kernel output directory and use a relative path (i.e., ./vmlinux): Makefile:173: *** Cannot find a vmlinux for VMLINUX_BTF at any of " /path/to/linux/samples/hid/vmlinux", build the kernel or set VMLINUX_BTF or VMLINUX_H variable. Stop. Correctly refer to the kernel output directory using $(objtree). Fixes: 13b25489b6f8 ("kbuild: change working directory to external module directory with M=") Tested-by: Ruowen Qin Suggested-by: Daniel Borkmann Suggested-by: Andrii Nakryiko Signed-off-by: Jinghao Jia Link: https://patch.msgid.link/20250203085506.220297-4-jinghao7@illinois.edu Signed-off-by: Benjamin Tissoires --- samples/hid/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/hid/Makefile b/samples/hid/Makefile index 69159c81d0457..db5a077c77fc8 100644 --- a/samples/hid/Makefile +++ b/samples/hid/Makefile @@ -164,7 +164,7 @@ $(obj)/hid_surface_dial.o: $(obj)/hid_surface_dial.skel.h VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ - $(abspath ./vmlinux) + $(abspath $(objtree)/vmlinux) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) -- GitLab From 32392e04cb50d87bb7a6a7d9213f44a1a0961820 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 3 Feb 2025 15:15:43 -0800 Subject: [PATCH 0222/1585] KVM: arm64: Fail protected mode init if no vgic hardware is present Protected mode assumes that at minimum vgic-v3 is present, however KVM fails to actually enforce this at the time of initialization. As such, when running protected mode in a half-baked state on GICv2 hardware we see the hyp go belly up at vcpu_load() when it tries to restore the vgic-v3 cpuif: $ ./arch_timer_edge_cases [ 130.599140] kvm [4518]: nVHE hyp panic at: [] __kvm_nvhe___vgic_v3_restore_vmcr_aprs+0x8/0x84! [ 130.603685] kvm [4518]: Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE [ 130.611962] kvm [4518]: Hyp Offset: 0xfffeca95ed000000 [ 130.617053] Kernel panic - not syncing: HYP panic: [ 130.617053] PS:800003c9 PC:0000b56a94102b58 ESR:0000000002000000 [ 130.617053] FAR:ffff00007b98d4d0 HPFAR:00000000007b98d0 PAR:0000000000000000 [ 130.617053] VCPU:0000000000000000 [ 130.638013] CPU: 0 UID: 0 PID: 4518 Comm: arch_timer_edge Tainted: G C 6.13.0-rc3-00009-gf7d03fcbf1f4 #1 [ 130.648790] Tainted: [C]=CRAP [ 130.651721] Hardware name: Libre Computer AML-S905X-CC (DT) [ 130.657242] Call trace: [ 130.659656] show_stack+0x18/0x24 (C) [ 130.663279] dump_stack_lvl+0x38/0x90 [ 130.666900] dump_stack+0x18/0x24 [ 130.670178] panic+0x388/0x3e8 [ 130.673196] nvhe_hyp_panic_handler+0x104/0x208 [ 130.677681] kvm_arch_vcpu_load+0x290/0x548 [ 130.681821] vcpu_load+0x50/0x80 [ 130.685013] kvm_arch_vcpu_ioctl_run+0x30/0x868 [ 130.689498] kvm_vcpu_ioctl+0x2e0/0x974 [ 130.693293] __arm64_sys_ioctl+0xb4/0xec [ 130.697174] invoke_syscall+0x48/0x110 [ 130.700883] el0_svc_common.constprop.0+0x40/0xe0 [ 130.705540] do_el0_svc+0x1c/0x28 [ 130.708818] el0_svc+0x30/0xd0 [ 130.711837] el0t_64_sync_handler+0x10c/0x138 [ 130.716149] el0t_64_sync+0x198/0x19c [ 130.719774] SMP: stopping secondary CPUs [ 130.723660] Kernel Offset: disabled [ 130.727103] CPU features: 0x000,00000800,02800000,0200421b [ 130.732537] Memory Limit: none [ 130.735561] ---[ end Kernel panic - not syncing: HYP panic: [ 130.735561] PS:800003c9 PC:0000b56a94102b58 ESR:0000000002000000 [ 130.735561] FAR:ffff00007b98d4d0 HPFAR:00000000007b98d0 PAR:0000000000000000 [ 130.735561] VCPU:0000000000000000 ]--- Fix it by failing KVM initialization if the system doesn't implement vgic-v3, as protected mode will never do anything useful on such hardware. Reported-by: Mark Brown Closes: https://lore.kernel.org/kvmarm/5ca7588c-7bf2-4352-8661-e4a56a9cd9aa@sirena.org.uk/ Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20250203231543.233511-1-oliver.upton@linux.dev Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 0725a0b50a3e9..62c650c2f7b67 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2290,6 +2290,19 @@ static int __init init_subsystems(void) break; case -ENODEV: case -ENXIO: + /* + * No VGIC? No pKVM for you. + * + * Protected mode assumes that VGICv3 is present, so no point + * in trying to hobble along if vgic initialization fails. + */ + if (is_protected_kvm_enabled()) + goto out; + + /* + * Otherwise, userspace could choose to implement a GIC for its + * guest on non-cooperative hardware. + */ vgic_present = false; err = 0; break; -- GitLab From 3648027de1fa91a0c80cffd3ecff263d06e62605 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Jan 2025 18:51:46 +0100 Subject: [PATCH 0223/1585] arm64: Fix 5-level paging support in kexec/hibernate trampoline Add the missing code to allocate P4D level page tables when cloning the the kernel page tables. This fixes a crash that may be observed when attempting to resume from hibernation on an LPA2 capable system with 4k pages, which therefore uses 5 levels of paging. Presumably, kexec is equally affected. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250110175145.785702-2-ardb+git@google.com Signed-off-by: Will Deacon --- arch/arm64/mm/trans_pgd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 19c67ed1a21fe..18543b603c77b 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -162,6 +162,13 @@ static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp, unsigned long next; unsigned long addr = start; + if (pgd_none(READ_ONCE(*dst_pgdp))) { + dst_p4dp = trans_alloc(info); + if (!dst_p4dp) + return -ENOMEM; + pgd_populate(NULL, dst_pgdp, dst_p4dp); + } + dst_p4dp = p4d_offset(dst_pgdp, start); src_p4dp = p4d_offset(src_pgdp, start); do { -- GitLab From f458b2165d7ac0f2401fff48f19c8f864e7e1e38 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 17 Jan 2025 07:55:22 -0500 Subject: [PATCH 0224/1585] arm64: Kconfig: Remove selecting replaced HAVE_FUNCTION_GRAPH_RETVAL Commit a3ed4157b7d8 ("fgraph: Replace fgraph_ret_regs with ftrace_regs") replaces the config HAVE_FUNCTION_GRAPH_RETVAL with the config HAVE_FUNCTION_GRAPH_FREGS, and it replaces all the select commands in the various architecture Kconfig files. In the arm64 architecture, the commit adds the 'select HAVE_FUNCTION_GRAPH_FREGS', but misses to remove the 'select HAVE_FUNCTION_GRAPH_RETVAL', i.e., the select on the replaced config. Remove selecting the replaced config. No functional change, just cleanup. Fixes: a3ed4157b7d8 ("fgraph: Replace fgraph_ret_regs with ftrace_regs") Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20250117125522.99071-1-lukas.bulwahn@redhat.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fcdd0ed3eca89..940343beb3d4c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -225,7 +225,6 @@ config ARM64 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_GRAPH_RETVAL select HAVE_GCC_PLUGINS select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \ HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI -- GitLab From f64f9dddd1f58c41c140034f7d2b0beeef1bc548 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Jan 2025 17:33:22 +0000 Subject: [PATCH 0225/1585] arm64/gcs: Fix documentation for HWCAP In one of the renumberings of the GCS hwcap a stray reference to HWCAP2 was left, fix it. Reported-by: David Spickett Fixes: 7058bf87cd59 ("arm64/gcs: Document the ABI for Guarded Control Stacks") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250124-arm64-gcs-hwcap-doc-v1-1-fa9368b01ca6@kernel.org Signed-off-by: Will Deacon --- Documentation/arch/arm64/gcs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arch/arm64/gcs.rst b/Documentation/arch/arm64/gcs.rst index 1f65a3193e776..226c0b008456f 100644 --- a/Documentation/arch/arm64/gcs.rst +++ b/Documentation/arch/arm64/gcs.rst @@ -37,7 +37,7 @@ intended to be exhaustive. shadow stacks rather than GCS. * Support for GCS is reported to userspace via HWCAP_GCS in the aux vector - AT_HWCAP2 entry. + AT_HWCAP entry. * GCS is enabled per thread. While there is support for disabling GCS at runtime this should be done with great care. -- GitLab From 21fed7c223e20e694b91dbf25936d922a50c8b19 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 3 Feb 2025 20:11:04 +0000 Subject: [PATCH 0226/1585] arm64/hwcap: Remove stray references to SF8MMx Due to SME currently being disabled when removing the SF8MMx support it wasn't noticed that there were some stray references in the hwcap table, delete them. Fixes: 819935464cb2 ("arm64/hwcap: Describe 2024 dpISA extensions to userspace") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250203-arm64-remove-sf8mmx-v1-1-6f1da3dbff82@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4eb7c6698ae43..f0910f20fbf8c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3180,8 +3180,6 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8MM8), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8MM4), HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), -- GitLab From ba69e0750b0362870294adab09339a0c39c3beaf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 1 Feb 2025 18:21:35 +0100 Subject: [PATCH 0227/1585] efi: Avoid cold plugged memory for placing the kernel UEFI 2.11 introduced EFI_MEMORY_HOT_PLUGGABLE to annotate system memory regions that are 'cold plugged' at boot, i.e., hot pluggable memory that is available from early boot, and described as system RAM by the firmware. Existing loaders and EFI applications running in the boot context will happily use this memory for allocating data structures that cannot be freed or moved at runtime, and this prevents the memory from being unplugged. Going forward, the new EFI_MEMORY_HOT_PLUGGABLE attribute should be tested, and memory annotated as such should be avoided for such allocations. In the EFI stub, there are a couple of occurrences where, instead of the high-level AllocatePages() UEFI boot service, a low-level code sequence is used that traverses the EFI memory map and carves out the requested number of pages from a free region. This is needed, e.g., for allocating as low as possible, or for allocating pages at random. While AllocatePages() should presumably avoid special purpose memory and cold plugged regions, this manual approach needs to incorporate this logic itself, in order to prevent the kernel itself from ending up in a hot unpluggable region, preventing it from being unplugged. So add the EFI_MEMORY_HOTPLUGGABLE macro definition, and check for it where appropriate. Cc: stable@vger.kernel.org Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 6 ++++-- drivers/firmware/efi/libstub/randomalloc.c | 3 +++ drivers/firmware/efi/libstub/relocate.c | 3 +++ include/linux/efi.h | 1 + 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 8296bf985d1d1..7309394b8fc98 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -934,13 +934,15 @@ char * __init efi_md_typeattr_format(char *buf, size_t size, EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | EFI_MEMORY_NV | EFI_MEMORY_SP | EFI_MEMORY_CPU_CRYPTO | - EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) + EFI_MEMORY_MORE_RELIABLE | EFI_MEMORY_HOT_PLUGGABLE | + EFI_MEMORY_RUNTIME)) snprintf(pos, size, "|attr=0x%016llx]", (unsigned long long)attr); else snprintf(pos, size, - "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", + "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", attr & EFI_MEMORY_RUNTIME ? "RUN" : "", + attr & EFI_MEMORY_HOT_PLUGGABLE ? "HP" : "", attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "", attr & EFI_MEMORY_CPU_CRYPTO ? "CC" : "", attr & EFI_MEMORY_SP ? "SP" : "", diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index e5872e38d9a46..5a732018be36d 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -25,6 +25,9 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, if (md->type != EFI_CONVENTIONAL_MEMORY) return 0; + if (md->attribute & EFI_MEMORY_HOT_PLUGGABLE) + return 0; + if (efi_soft_reserve_enabled() && (md->attribute & EFI_MEMORY_SP)) return 0; diff --git a/drivers/firmware/efi/libstub/relocate.c b/drivers/firmware/efi/libstub/relocate.c index 99b45d1cd6246..d4264bfb6dc17 100644 --- a/drivers/firmware/efi/libstub/relocate.c +++ b/drivers/firmware/efi/libstub/relocate.c @@ -53,6 +53,9 @@ efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, if (desc->type != EFI_CONVENTIONAL_MEMORY) continue; + if (desc->attribute & EFI_MEMORY_HOT_PLUGGABLE) + continue; + if (efi_soft_reserve_enabled() && (desc->attribute & EFI_MEMORY_SP)) continue; diff --git a/include/linux/efi.h b/include/linux/efi.h index 053c57e618698..db293d7de6864 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -128,6 +128,7 @@ typedef struct { #define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ #define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ #define EFI_MEMORY_CPU_CRYPTO ((u64)0x0000000000080000ULL) /* supports encryption */ +#define EFI_MEMORY_HOT_PLUGGABLE BIT_ULL(20) /* supports unplugging at runtime */ #define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ #define EFI_MEMORY_DESCRIPTOR_VERSION 1 -- GitLab From bbc4578537e350d5bf8a7a2c7d054d6b163b3c41 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 1 Feb 2025 18:21:36 +0100 Subject: [PATCH 0228/1585] efi: Use BIT_ULL() constants for memory attributes For legibility, use the existing BIT_ULL() to generate the u64 type EFI memory attribute macros. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index db293d7de6864..7d63d1d75f22f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -114,22 +114,22 @@ typedef struct { #define EFI_MAX_MEMORY_TYPE 16 /* Attribute values: */ -#define EFI_MEMORY_UC ((u64)0x0000000000000001ULL) /* uncached */ -#define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */ -#define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */ -#define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */ -#define EFI_MEMORY_UCE ((u64)0x0000000000000010ULL) /* uncached, exported */ -#define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ -#define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ -#define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ -#define EFI_MEMORY_NV ((u64)0x0000000000008000ULL) /* non-volatile */ -#define EFI_MEMORY_MORE_RELIABLE \ - ((u64)0x0000000000010000ULL) /* higher reliability */ -#define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ -#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ -#define EFI_MEMORY_CPU_CRYPTO ((u64)0x0000000000080000ULL) /* supports encryption */ +#define EFI_MEMORY_UC BIT_ULL(0) /* uncached */ +#define EFI_MEMORY_WC BIT_ULL(1) /* write-coalescing */ +#define EFI_MEMORY_WT BIT_ULL(2) /* write-through */ +#define EFI_MEMORY_WB BIT_ULL(3) /* write-back */ +#define EFI_MEMORY_UCE BIT_ULL(4) /* uncached, exported */ +#define EFI_MEMORY_WP BIT_ULL(12) /* write-protect */ +#define EFI_MEMORY_RP BIT_ULL(13) /* read-protect */ +#define EFI_MEMORY_XP BIT_ULL(14) /* execute-protect */ +#define EFI_MEMORY_NV BIT_ULL(15) /* non-volatile */ +#define EFI_MEMORY_MORE_RELIABLE BIT_ULL(16) /* higher reliability */ +#define EFI_MEMORY_RO BIT_ULL(17) /* read-only */ +#define EFI_MEMORY_SP BIT_ULL(18) /* soft reserved */ +#define EFI_MEMORY_CPU_CRYPTO BIT_ULL(19) /* supports encryption */ #define EFI_MEMORY_HOT_PLUGGABLE BIT_ULL(20) /* supports unplugging at runtime */ -#define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ +#define EFI_MEMORY_RUNTIME BIT_ULL(63) /* range requires runtime mapping */ + #define EFI_MEMORY_DESCRIPTOR_VERSION 1 #define EFI_PAGE_SHIFT 12 -- GitLab From e8ed246ded863eb862806c5591afdcf70012ab5e Mon Sep 17 00:00:00 2001 From: Andre Werner Date: Tue, 21 Jan 2025 08:18:19 +0100 Subject: [PATCH 0229/1585] serial: sc16is7xx: Fix IRQ number check behavior The logical meaning of the previous version is wrong due to a typo. If the IRQ equals 0, no interrupt pin is available and polling mode shall be used. Additionally, this fix adds a check for IRQ < 0 to increase robustness, because documentation still says that negative IRQ values cannot be absolutely ruled-out. Fixes: 104c1b9dde9d ("serial: sc16is7xx: Add polling mode if no IRQ pin is available") Signed-off-by: Andre Werner Reviewed-by: Jiri Slaby Reviewed-by: Andy Shevchenko Reviewed-by: Maarten Brock Reviewed-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20250121071819.1346672-1-andre.werner@systec-electronic.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 7b51cdc274fd8..560f45ed19aeb 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1561,7 +1561,7 @@ int sc16is7xx_probe(struct device *dev, const struct sc16is7xx_devtype *devtype, /* Always ask for fixed clock rate from a property. */ device_property_read_u32(dev, "clock-frequency", &uartclk); - s->polling = !!irq; + s->polling = (irq <= 0); if (s->polling) dev_dbg(dev, "No interrupt pin definition, falling back to polling mode\n"); -- GitLab From 166ac2bba167d575e7146beaa66093bc7c072f43 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Jan 2025 18:10:46 +0200 Subject: [PATCH 0230/1585] serial: port: Assign ->iotype correctly when ->iobase is set Currently the ->iotype is always assigned to the UPIO_MEM when the respective property is not found. However, this will not support the cases when user wants to have UPIO_PORT to be set or preserved. Support this scenario by checking ->iobase value and default the ->iotype respectively. Fixes: 1117a6fdc7c1 ("serial: 8250_of: Switch to use uart_read_port_properties()") Fixes: e894b6005dce ("serial: port: Introduce a common helper to read properties") Cc: stable Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250124161530.398361-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c index d35f1d24156c2..f28d0633fe6bd 100644 --- a/drivers/tty/serial/serial_port.c +++ b/drivers/tty/serial/serial_port.c @@ -173,6 +173,7 @@ EXPORT_SYMBOL(uart_remove_one_port); * The caller is responsible to initialize the following fields of the @port * ->dev (must be valid) * ->flags + * ->iobase * ->mapbase * ->mapsize * ->regshift (if @use_defaults is false) @@ -214,7 +215,7 @@ static int __uart_read_properties(struct uart_port *port, bool use_defaults) /* Read the registers I/O access type (default: MMIO 8-bit) */ ret = device_property_read_u32(dev, "reg-io-width", &value); if (ret) { - port->iotype = UPIO_MEM; + port->iotype = port->iobase ? UPIO_PORT : UPIO_MEM; } else { switch (value) { case 1: -- GitLab From e8486bd50ecf63c9a1e25271f258a8d959f2672f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Jan 2025 18:10:47 +0200 Subject: [PATCH 0231/1585] serial: port: Always update ->iotype in __uart_read_properties() The documentation of the __uart_read_properties() states that ->iotype member is always altered after the function call, but the code doesn't do that in the case when use_defaults == false and the value of reg-io-width is unsupported. Make sure the code follows the documentation. Note, the current users of the uart_read_and_validate_port_properties() will fail and the change doesn't affect their behaviour, neither users of uart_read_port_properties() will be affected since the alteration happens there even in the current code flow. Fixes: e894b6005dce ("serial: port: Introduce a common helper to read properties") Cc: stable Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250124161530.398361-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c index f28d0633fe6bd..85285c56fabff 100644 --- a/drivers/tty/serial/serial_port.c +++ b/drivers/tty/serial/serial_port.c @@ -228,11 +228,11 @@ static int __uart_read_properties(struct uart_port *port, bool use_defaults) port->iotype = device_is_big_endian(dev) ? UPIO_MEM32BE : UPIO_MEM32; break; default: + port->iotype = UPIO_UNKNOWN; if (!use_defaults) { dev_err(dev, "Unsupported reg-io-width (%u)\n", value); return -EINVAL; } - port->iotype = UPIO_UNKNOWN; break; } } -- GitLab From 12397549b5014071e1d2b315509f68eb93ef9144 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Jan 2025 18:10:48 +0200 Subject: [PATCH 0232/1585] serial: port: Make ->iotype validation global in __uart_read_properties() In order to make code robust against potential changes in the future move ->iotype validation outside of switch in __uart_read_properties(). If any code will be added in between that might leave the ->iotype value unknown the validation catches this up. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250124161530.398361-4-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_port.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c index 85285c56fabff..2fc48cd63f6cf 100644 --- a/drivers/tty/serial/serial_port.c +++ b/drivers/tty/serial/serial_port.c @@ -229,14 +229,15 @@ static int __uart_read_properties(struct uart_port *port, bool use_defaults) break; default: port->iotype = UPIO_UNKNOWN; - if (!use_defaults) { - dev_err(dev, "Unsupported reg-io-width (%u)\n", value); - return -EINVAL; - } break; } } + if (!use_defaults && port->iotype == UPIO_UNKNOWN) { + dev_err(dev, "Unsupported reg-io-width (%u)\n", value); + return -EINVAL; + } + /* Read the address mapping base offset (default: no offset) */ ret = device_property_read_u32(dev, "reg-offset", &value); if (ret) -- GitLab From fe310f75327edbc042c7cc0df32c6b9ec29eb93a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Jan 2025 18:10:49 +0200 Subject: [PATCH 0233/1585] serial: 8250_of: Remove unneeded ->iotype assignment If ->iobase is set the default will be UPIO_PORT for ->iotype after the uart_read_and_validate_port_properties() call. Hence no need to assign that explicitly. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250124161530.398361-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_of.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 64aed7efc5697..11c860ea80f60 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -110,7 +110,6 @@ static int of_platform_serial_setup(struct platform_device *ofdev, spin_lock_init(&port->lock); if (resource_type(&resource) == IORESOURCE_IO) { - port->iotype = UPIO_PORT; port->iobase = resource.start; } else { port->mapbase = resource.start; -- GitLab From 34bbb5d5137f32be3186a995a5ad4c60aaad11a7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Jan 2025 18:10:50 +0200 Subject: [PATCH 0234/1585] serial: 8250_platform: Remove unneeded ->iotype assignment If ->iobase is set the default will be UPIO_PORT for ->iotype after the uart_read_and_validate_port_properties() call. Hence no need to assign that explicitly. Otherwise it will be UPIO_MEM. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250124161530.398361-6-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_platform.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/tty/serial/8250/8250_platform.c b/drivers/tty/serial/8250/8250_platform.c index 8bdc1879d952b..c0343bfb80647 100644 --- a/drivers/tty/serial/8250/8250_platform.c +++ b/drivers/tty/serial/8250/8250_platform.c @@ -112,7 +112,6 @@ static int serial8250_probe_acpi(struct platform_device *pdev) struct device *dev = &pdev->dev; struct uart_8250_port uart = { }; struct resource *regs; - unsigned char iotype; int ret, line; regs = platform_get_mem_or_io(pdev, 0); @@ -122,13 +121,11 @@ static int serial8250_probe_acpi(struct platform_device *pdev) switch (resource_type(regs)) { case IORESOURCE_IO: uart.port.iobase = regs->start; - iotype = UPIO_PORT; break; case IORESOURCE_MEM: uart.port.mapbase = regs->start; uart.port.mapsize = resource_size(regs); uart.port.flags = UPF_IOREMAP; - iotype = UPIO_MEM; break; default: return -EINVAL; @@ -147,12 +144,6 @@ static int serial8250_probe_acpi(struct platform_device *pdev) if (ret) return ret; - /* - * The previous call may not set iotype correctly when reg-io-width - * property is absent and it doesn't support IO port resource. - */ - uart.port.iotype = iotype; - line = serial8250_register_8250_port(&uart); if (line < 0) return line; -- GitLab From 0f3fd9cf6491f5beecbb65abb41556c56135340c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Jan 2025 18:10:51 +0200 Subject: [PATCH 0235/1585] serial: 8250_pnp: Remove unneeded ->iotype assignment If ->iobase is set the default will be UPIO_PORT for ->iotype after the uart_read_and_validate_port_properties() call. Hence no need to assign that explicitly. Otherwise it will be UPIO_MEM. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250124161530.398361-7-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pnp.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pnp.c b/drivers/tty/serial/8250/8250_pnp.c index 7c06ae79d8e23..7a837fdf9df13 100644 --- a/drivers/tty/serial/8250/8250_pnp.c +++ b/drivers/tty/serial/8250/8250_pnp.c @@ -436,7 +436,6 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) { struct uart_8250_port uart, *port; int ret, flags = dev_id->driver_data; - unsigned char iotype; long line; if (flags & UNKNOWN_DEV) { @@ -448,14 +447,11 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) memset(&uart, 0, sizeof(uart)); if ((flags & CIR_PORT) && pnp_port_valid(dev, 2)) { uart.port.iobase = pnp_port_start(dev, 2); - iotype = UPIO_PORT; } else if (pnp_port_valid(dev, 0)) { uart.port.iobase = pnp_port_start(dev, 0); - iotype = UPIO_PORT; } else if (pnp_mem_valid(dev, 0)) { uart.port.mapbase = pnp_mem_start(dev, 0); uart.port.mapsize = pnp_mem_len(dev, 0); - iotype = UPIO_MEM; uart.port.flags = UPF_IOREMAP; } else return -ENODEV; @@ -471,12 +467,6 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) if (ret) return ret; - /* - * The previous call may not set iotype correctly when reg-io-width - * property is absent and it doesn't support IO port resource. - */ - uart.port.iotype = iotype; - if (flags & CIR_PORT) { uart.port.flags |= UPF_FIXED_PORT | UPF_FIXED_TYPE; uart.port.type = PORT_8250_CIR; -- GitLab From 4241a702e0d0c2ca9364cfac08dbf134264962de Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 3 Feb 2025 11:03:04 +0000 Subject: [PATCH 0236/1585] rxrpc: Fix the rxrpc_connection attend queue handling The rxrpc_connection attend queue is never used because conn::attend_link is never initialised and so is always NULL'd out and thus always appears to be busy. This requires the following fix: (1) Fix this the attend queue problem by initialising conn::attend_link. And, consequently, two further fixes for things masked by the above bug: (2) Fix rxrpc_input_conn_event() to handle being invoked with a NULL sk_buff pointer - something that can now happen with the above change. (3) Fix the RXRPC_SKB_MARK_SERVICE_CONN_SECURED message to carry a pointer to the connection and a ref on it. Signed-off-by: David Howells cc: Marc Dionne cc: Jakub Kicinski cc: "David S. Miller" cc: Eric Dumazet cc: Paolo Abeni cc: Simon Horman cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Fixes: f2cce89a074e ("rxrpc: Implement a mechanism to send an event notification to a connection") Link: https://patch.msgid.link/20250203110307.7265-3-dhowells@redhat.com Signed-off-by: Paolo Abeni --- include/trace/events/rxrpc.h | 1 + net/rxrpc/conn_event.c | 17 ++++++++++------- net/rxrpc/conn_object.c | 1 + 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 2f119d18a061f..cad50d91077ef 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -219,6 +219,7 @@ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ + EM(rxrpc_conn_get_poke_secured, "GET secured ") \ EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ EM(rxrpc_conn_new_client, "NEW client ") \ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 713e04394ceb7..74bb49b936cd4 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -272,6 +272,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, * we've already received the packet, put it on the * front of the queue. */ + sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured); skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); skb_queue_head(&conn->local->rx_queue, skb); @@ -437,14 +438,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) rxrpc_abort_calls(conn); - switch (skb->mark) { - case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: - if (conn->state != RXRPC_CONN_SERVICE) - break; + if (skb) { + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure(conn->channels[loop].call); - break; + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure(conn->channels[loop].call); + break; + } } /* Process delayed ACKs whose time has come. */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 7eba4d7d9a380..2f1fd1e2e7e48 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + INIT_LIST_HEAD(&conn->attend_link); mutex_init(&conn->security_lock); mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); -- GitLab From 5417a2e9b130a78bf48cb4cf92630efcee5ccf38 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2025 14:55:54 +0000 Subject: [PATCH 0237/1585] KVM: arm64: Fix nested S2 MMU structures reallocation For each vcpu that userspace creates, we allocate a number of s2_mmu structures that will eventually contain our shadow S2 page tables. Since this is a dynamically allocated array, we reallocate the array and initialise the newly allocated elements. Once everything is correctly initialised, we adjust pointer and size in the kvm structure, and move on. But should that initialisation fail *and* the reallocation triggered a copy to another location, we end-up returning early, with the kvm structure still containing the (now stale) old pointer. Weeee! Cure it by assigning the pointer early, and use this to perform the initialisation. If everything succeeds, we adjust the size. Otherwise, we just leave the size as it was, no harm done, and the new memory is as good as the ol' one (we hope...). Fixes: 4f128f8e1aaac ("KVM: arm64: nv: Support multiple nested Stage-2 mmu structures") Reported-by: Alexander Potapenko Tested-by: Alexander Potapenko Link: https://lore.kernel.org/r/20250204145554.774427-1-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/nested.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 33d2ace686658..0c9387d2f5070 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -67,26 +67,27 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) if (!tmp) return -ENOMEM; + swap(kvm->arch.nested_mmus, tmp); + /* * If we went through a realocation, adjust the MMU back-pointers in * the previously initialised kvm_pgtable structures. */ if (kvm->arch.nested_mmus != tmp) for (int i = 0; i < kvm->arch.nested_mmus_size; i++) - tmp[i].pgt->mmu = &tmp[i]; + kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i]; for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++) - ret = init_nested_s2_mmu(kvm, &tmp[i]); + ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]); if (ret) { for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++) - kvm_free_stage2_pgd(&tmp[i]); + kvm_free_stage2_pgd(&kvm->arch.nested_mmus[i]); return ret; } kvm->arch.nested_mmus_size = num_mmus; - kvm->arch.nested_mmus = tmp; return 0; } -- GitLab From b450dcce93bc2cf6d2bfaf5a0de88a94ebad8f89 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2025 11:00:48 +0000 Subject: [PATCH 0238/1585] KVM: arm64: timer: Always evaluate the need for a soft timer When updating the interrupt state for an emulated timer, we return early and skip the setup of a soft timer that runs in parallel with the guest. While this is OK if we have set the interrupt pending, it is pretty wrong if the guest moved CVAL into the future. In that case, no timer is armed and the guest can wait for a very long time (it will take a full put/load cycle for the situation to resolve). This is specially visible with EDK2 running at EL2, but still using the EL1 virtual timer, which in that case is fully emulated. Any key-press takes ages to be captured, as there is no UART interrupt and EDK2 relies on polling from a timer... The fix is simply to drop the early return. If the timer interrupt is pending, we will still return early, and otherwise arm the soft timer. Fixes: 4d74ecfa6458b ("KVM: arm64: Don't arm a hrtimer for an already pending timer") Cc: stable@vger.kernel.org Tested-by: Dmytro Terletskyi Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250204110050.150560-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d3d243366536c..035e43f5d4f9a 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -471,10 +471,8 @@ static void timer_emulate(struct arch_timer_context *ctx) trace_kvm_timer_emulate(ctx, should_fire); - if (should_fire != ctx->irq.level) { + if (should_fire != ctx->irq.level) kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); - return; - } kvm_timer_update_status(ctx, should_fire); -- GitLab From 1b8705ad5365b5333240b46d5cd24e88ef2ddb14 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2025 11:00:49 +0000 Subject: [PATCH 0239/1585] KVM: arm64: timer: Correctly handle EL1 timer emulation when !FEAT_ECV Both Wei-Lin Chang and Volodymyr Babchuk report that the way we handle the emulation of EL1 timers with NV is completely wrong, specially in the case of HCR_EL2.E2H==0. There are three problems in about as many lines of code: - With E2H==0, the EL1 timers are overwritten with the EL1 state, while they should actually contain the EL2 state (as per the timer map) - With E2H==1, we run the full EL1 timer emulation even when ECV is present, hiding a bug in timer_emulate() (see previous patch) - The comments are actively misleading, and say all the wrong things. This is only attributable to the code having been initially written for FEAT_NV, hacked up to handle FEAT_NV2 *in parallel*, and vaguely hacked again to be FEAT_NV2 only. Oh, and yours truly being a gold plated idiot. The fix is obvious: just delete most of the E2H==0 code, have a unified handling of the timers (because they really are E2H agnostic), and make sure we don't execute any of that when FEAT_ECV is present. Fixes: 4bad3068cfa9f ("KVM: arm64: nv: Sync nested timer state with FEAT_NV2") Reported-by: Wei-Lin Chang Reported-by: Volodymyr Babchuk Link: https://lore.kernel.org/r/fqiqfjzwpgbzdtouu2pwqlu7llhnf5lmy4hzv5vo6ph4v3vyls@jdcfy3fjjc5k Link: https://lore.kernel.org/r/87frl51tse.fsf@epam.com Tested-by: Dmytro Terletskyi Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250204110050.150560-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 035e43f5d4f9a..e59836e0260cf 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -974,31 +974,21 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) * which allows trapping of the timer registers even with NV2. * Still, this is still worse than FEAT_NV on its own. Meh. */ - if (!vcpu_el2_e2h_is_set(vcpu)) { - if (cpus_have_final_cap(ARM64_HAS_ECV)) - return; - - /* - * A non-VHE guest hypervisor doesn't have any direct access - * to its timers: the EL2 registers trap (and the HW is - * fully emulated), while the EL0 registers access memory - * despite the access being notionally direct. Boo. - * - * We update the hardware timer registers with the - * latest value written by the guest to the VNCR page - * and let the hardware take care of the rest. - */ - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL); - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL); - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL); - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL); - } else { + if (!cpus_have_final_cap(ARM64_HAS_ECV)) { /* * For a VHE guest hypervisor, the EL2 state is directly - * stored in the host EL1 timers, while the emulated EL0 + * stored in the host EL1 timers, while the emulated EL1 * state is stored in the VNCR page. The latter could have * been updated behind our back, and we must reset the * emulation of the timers. + * + * A non-VHE guest hypervisor doesn't have any direct access + * to its timers: the EL2 registers trap despite being + * notionally direct (we use the EL1 HW, as for VHE), while + * the EL1 registers access memory. + * + * In both cases, process the emulated timers on each guest + * exit. Boo. */ struct timer_map map; get_timer_map(vcpu, &map); -- GitLab From 0e459810285503fb354537e84049e212c5917c33 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2025 11:00:50 +0000 Subject: [PATCH 0240/1585] KVM: arm64: timer: Don't adjust the EL2 virtual timer offset The way we deal with the EL2 virtual timer is a bit odd. We try to cope with E2H being flipped, and adjust which offset applies to that timer depending on the current E2H value. But that's a complexity we shouldn't have to worry about. What we have to deal with is either E2H being RES1, in which case there is no offset, or E2H being RES0, and the virtual timer simply does not exist. Drop the adjusting of the timer offset, which makes things a bit simpler. At the same time, make sure that accessing the HV timer when E2H is RES0 results in an UNDEF in the guest. Suggested-by: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250204110050.150560-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 15 --------------- arch/arm64/kvm/sys_regs.c | 16 +++++++++++++--- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e59836e0260cf..231c0cd9c7b4b 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -759,21 +759,6 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, timer_irq(map->direct_ptimer), &arch_timer_irq_ops); WARN_ON_ONCE(ret); - - /* - * The virtual offset behaviour is "interesting", as it - * always applies when HCR_EL2.E2H==0, but only when - * accessed from EL1 when HCR_EL2.E2H==1. So make sure we - * track E2H when putting the HV timer in "direct" mode. - */ - if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { - struct arch_timer_offset *offs = &map->direct_vtimer->offset; - - if (vcpu_el2_e2h_is_set(vcpu)) - offs->vcpu_offset = NULL; - else - offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); - } } } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 526d66f24e34a..7968bee0d27ea 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1452,6 +1452,16 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static bool access_hv_timer(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!vcpu_el2_e2h_is_set(vcpu)) + return undef_access(vcpu, p, r); + + return access_arch_timer(vcpu, p, r); +} + static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -3099,9 +3109,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), - { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer }, - EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0), - EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0), + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer }, + EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0), + EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, -- GitLab From ee3a66f431d689b796b9cb48aefd3d223540381c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 4 Feb 2025 11:13:24 -0500 Subject: [PATCH 0241/1585] kvm: x86: SRSO_USER_KERNEL_NO is not synthesized SYNTHESIZED_F() generally is used together with setup_force_cpu_cap(), i.e. when it makes sense to present the feature even if cpuid does not have it *and* the VM is not able to see the difference. For example, it can be used when mitigations on the host automatically protect the guest as well. The "SYNTHESIZED_F(SRSO_USER_KERNEL_NO)" line came in as a conflict resolution between the CPUID overhaul from the KVM tree and support for the feature in the x86 tree. Using it right now does not hurt, or make a difference for that matter, because there is no setup_force_cpu_cap(X86_FEATURE_SRSO_USER_KERNEL_NO). However, it is a little less future proof in case such a setup_force_cpu_cap() appears later, for a case where the kernel somehow is not vulnerable but the guest would have to apply the mitigation. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2cbb3874ad398..8eb3a88707f21 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1180,7 +1180,7 @@ void kvm_set_cpu_caps(void) SYNTHESIZED_F(SBPB), SYNTHESIZED_F(IBPB_BRTYPE), SYNTHESIZED_F(SRSO_NO), - SYNTHESIZED_F(SRSO_USER_KERNEL_NO), + F(SRSO_USER_KERNEL_NO), ); kvm_cpu_cap_init(CPUID_8000_0022_EAX, -- GitLab From 203a53029a9c6935ad38e0343d048e51488797cf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Feb 2025 10:56:47 +0000 Subject: [PATCH 0242/1585] KVM: selftests: Fix spelling mistake "initally" -> "initially" There is a spelling mistake in a literal string and in the function test_get_inital_dirty. Fix them. Signed-off-by: Colin Ian King Message-ID: <20250204105647.367743-1-colin.i.king@gmail.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/s390/cmma_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index e32dd59703a08..85cc8c18d6e70 100644 --- a/tools/testing/selftests/kvm/s390/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c @@ -444,7 +444,7 @@ static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) ); } -static void test_get_inital_dirty(void) +static void test_get_initial_dirty(void) { struct kvm_vm *vm = create_vm_two_memslots(); struct kvm_vcpu *vcpu; @@ -651,7 +651,7 @@ struct testdef { } testlist[] = { { "migration mode and dirty tracking", test_migration_mode }, { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, - { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, + { "GET_CMMA_BITS: all pages are dirty initially", test_get_initial_dirty }, { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, }; -- GitLab From fc016ef7da64fd473d73ee6c261ba1b0b47afe2b Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 4 Feb 2025 13:39:41 +0800 Subject: [PATCH 0243/1585] ASoC: Intel: sof_sdw: Add lookup of quirk using PCI subsystem ID Add lookup of PCI subsystem vendor:device ID to find a quirk. The subsystem ID (SSID) is part of the PCI specification to uniquely identify a particular system-specific implementation of a hardware device. Unlike DMI information, it identifies the sound hardware itself, rather than a specific model of PC. SSID can be more reliable and stable than DMI strings, and is preferred by some vendors as the way to identify the actual sound hardware. Signed-off-by: Richard Fitzgerald Reviewed-by: Liam Girdwood Signed-off-by: Bard Liao Link: https://patch.msgid.link/20250204053943.93596-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index b0d35fda7b179..381fae5943fe5 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "sof_sdw_common.h" #include "../../codecs/rt711.h" @@ -751,6 +752,22 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { {} }; +static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { + {} +}; + +static void sof_sdw_check_ssid_quirk(const struct snd_soc_acpi_mach *mach) +{ + const struct snd_pci_quirk *quirk_entry; + + quirk_entry = snd_pci_quirk_lookup_id(mach->mach_params.subsystem_vendor, + mach->mach_params.subsystem_device, + sof_sdw_ssid_quirk_table); + + if (quirk_entry) + sof_sdw_quirk = quirk_entry->value; +} + static struct snd_soc_dai_link_component platform_component[] = { { /* name might be overridden during probe */ @@ -1278,6 +1295,13 @@ static int mc_probe(struct platform_device *pdev) snd_soc_card_set_drvdata(card, ctx); + if (mach->mach_params.subsystem_id_set) { + snd_soc_card_set_pci_ssid(card, + mach->mach_params.subsystem_vendor, + mach->mach_params.subsystem_device); + sof_sdw_check_ssid_quirk(mach); + } + dmi_check_system(sof_sdw_quirk_table); if (quirk_override != -1) { @@ -1293,12 +1317,6 @@ static int mc_probe(struct platform_device *pdev) for (i = 0; i < ctx->codec_info_list_count; i++) codec_info_list[i].amp_num = 0; - if (mach->mach_params.subsystem_id_set) { - snd_soc_card_set_pci_ssid(card, - mach->mach_params.subsystem_vendor, - mach->mach_params.subsystem_device); - } - ret = sof_card_dai_links_create(card); if (ret < 0) return ret; -- GitLab From 0843449708085c4fb45a3c325c2fbced556f6abf Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 4 Feb 2025 13:39:42 +0800 Subject: [PATCH 0244/1585] ASoC: Intel: sof_sdw: Add quirk for Asus Zenbook S14 Asus laptops with sound PCI subsystem ID 1043:1e13 have the DMICs connected to the host instead of the CS42L43 so need the SOC_SDW_CODEC_MIC quirk. Signed-off-by: Richard Fitzgerald Reviewed-by: Liam Girdwood Signed-off-by: Bard Liao Link: https://patch.msgid.link/20250204053943.93596-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 381fae5943fe5..683e15b459a16 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -753,6 +753,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }; static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { + SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC), {} }; -- GitLab From d8989106287d3735c7e7fc6acb3811d62ebb666c Mon Sep 17 00:00:00 2001 From: Uday M Bhat Date: Tue, 4 Feb 2025 13:39:43 +0800 Subject: [PATCH 0245/1585] ASoC: Intel: sof_sdw: Add support for Fatcat board with BT offload enabled in PTL platform This change adds an entry for fatcat boards in soundwire quirk table and also, enables BT offload for PTL RVP. Signed-off-by: Uday M Bhat Signed-off-by: Jairaj Arava Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://patch.msgid.link/20250204053943.93596-4-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 683e15b459a16..203b07d4d833c 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -749,6 +749,16 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(SOC_SDW_PCH_DMIC), }, + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Google"), + DMI_MATCH(DMI_PRODUCT_NAME, "Fatcat"), + }, + .driver_data = (void *)(SOC_SDW_PCH_DMIC | + SOF_BT_OFFLOAD_SSP(2) | + SOF_SSP_BT_OFFLOAD_PRESENT), + }, {} }; -- GitLab From 3588b76db7ba798f54dee39a55708b16e1c61de4 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 4 Feb 2025 11:31:33 +0800 Subject: [PATCH 0246/1585] ASoC: Intel: soc-acpi-intel-tgl-match: declare adr as ull MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The adr is u64. Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250204033134.92332-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- .../soc/intel/common/soc-acpi-intel-tgl-match.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c index 6f8c064136657..b77aafb0bfb6a 100644 --- a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c @@ -658,25 +658,25 @@ static const struct snd_soc_acpi_endpoint cs35l56_7_fb_endpoints[] = { static const struct snd_soc_acpi_adr_device cs35l56_sdw_eight_1_4_fb_adr[] = { { - .adr = 0x00003301fa355601, + .adr = 0x00003301fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_l_fb_endpoints), .endpoints = cs35l56_l_fb_endpoints, .name_prefix = "AMP1" }, { - .adr = 0x00003201fa355601, + .adr = 0x00003201fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_2_fb_endpoints), .endpoints = cs35l56_2_fb_endpoints, .name_prefix = "AMP2" }, { - .adr = 0x00003101fa355601, + .adr = 0x00003101fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_4_fb_endpoints), .endpoints = cs35l56_4_fb_endpoints, .name_prefix = "AMP3" }, { - .adr = 0x00003001fa355601, + .adr = 0x00003001fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_6_fb_endpoints), .endpoints = cs35l56_6_fb_endpoints, .name_prefix = "AMP4" @@ -685,25 +685,25 @@ static const struct snd_soc_acpi_adr_device cs35l56_sdw_eight_1_4_fb_adr[] = { static const struct snd_soc_acpi_adr_device cs35l56_sdw_eight_5_8_fb_adr[] = { { - .adr = 0x00013701fa355601, + .adr = 0x00013701fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_r_fb_endpoints), .endpoints = cs35l56_r_fb_endpoints, .name_prefix = "AMP8" }, { - .adr = 0x00013601fa355601, + .adr = 0x00013601fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_3_fb_endpoints), .endpoints = cs35l56_3_fb_endpoints, .name_prefix = "AMP7" }, { - .adr = 0x00013501fa355601, + .adr = 0x00013501fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_5_fb_endpoints), .endpoints = cs35l56_5_fb_endpoints, .name_prefix = "AMP6" }, { - .adr = 0x00013401fa355601, + .adr = 0x00013401fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_7_fb_endpoints), .endpoints = cs35l56_7_fb_endpoints, .name_prefix = "AMP5" -- GitLab From 20efccc53abf99fa52ea30a43dec758f6b6b9940 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 4 Feb 2025 11:31:34 +0800 Subject: [PATCH 0247/1585] ASoC: Intel: soc-acpi-intel-mtl-match: declare adr as ull MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The adr is u64. Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250204033134.92332-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-mtl-match.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c index 770e2194a283e..9e611e3667adb 100644 --- a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c @@ -330,7 +330,7 @@ static const struct snd_soc_acpi_adr_device rt1316_3_single_adr[] = { static const struct snd_soc_acpi_adr_device rt1318_1_single_adr[] = { { - .adr = 0x000130025D131801, + .adr = 0x000130025D131801ull, .num_endpoints = 1, .endpoints = &single_endpoint, .name_prefix = "rt1318-1" -- GitLab From 6b24e67b4056ba83b1e95e005b7e50fdb1cc6cf4 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 4 Feb 2025 16:13:10 +0000 Subject: [PATCH 0248/1585] ASoC: rockchip: i2s-tdm: fix shift config for SND_SOC_DAIFMT_DSP_[AB] Commit 2f45a4e289779 ("ASoC: rockchip: i2s_tdm: Fixup config for SND_SOC_DAIFMT_DSP_A/B") applied a partial change to fix the configuration for DSP A and DSP B formats. The shift control also needs updating to set the correct offset for frame data compared to LRCK. Set the correct values. Fixes: 081068fd64140 ("ASoC: rockchip: add support for i2s-tdm controller") Signed-off-by: John Keeping Link: https://patch.msgid.link/20250204161311.2117240-1-jkeeping@inmusicbrands.com Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_i2s_tdm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 7f5fcaecee4b6..78ab88843f861 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -451,11 +451,11 @@ static int rockchip_i2s_tdm_set_fmt(struct snd_soc_dai *cpu_dai, break; case SND_SOC_DAIFMT_DSP_A: val = I2S_TXCR_TFS_TDM_PCM; - tdm_val = TDM_SHIFT_CTRL(0); + tdm_val = TDM_SHIFT_CTRL(2); break; case SND_SOC_DAIFMT_DSP_B: val = I2S_TXCR_TFS_TDM_PCM; - tdm_val = TDM_SHIFT_CTRL(2); + tdm_val = TDM_SHIFT_CTRL(4); break; default: ret = -EINVAL; -- GitLab From 6f61269495260531e15d84d090ee63618110c470 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 Jan 2025 10:26:22 -0500 Subject: [PATCH 0249/1585] KVM: remove kvm_arch_post_init_vm The only statement in a kvm_arch_post_init_vm implementation can be moved into the x86 kvm_arch_init_vm. Do so and remove all traces from architecture-independent code. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 7 +------ include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 15 --------------- 3 files changed, 1 insertion(+), 22 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6d4a6734b2d69..8e77e61d4fbd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12741,6 +12741,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) "does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n"); } + once_init(&kvm->arch.nx_once); return 0; out_uninit_mmu: @@ -12750,12 +12751,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return ret; } -int kvm_arch_post_init_vm(struct kvm *kvm) -{ - once_init(&kvm->arch.nx_once); - return 0; -} - static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { vcpu_load(vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3cb9a32a6330e..f34f4cfaa5134 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1615,7 +1615,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); -int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); void kvm_arch_create_vm_debugfs(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3f04cd5e3a8cf..ba0327e2d0d33 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1070,15 +1070,6 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) return ret; } -/* - * Called after the VM is otherwise initialized, but just before adding it to - * the vm_list. - */ -int __weak kvm_arch_post_init_vm(struct kvm *kvm) -{ - return 0; -} - /* * Called just after removing the VM from the vm_list, but before doing any * other destruction. @@ -1199,10 +1190,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (r) goto out_err_no_debugfs; - r = kvm_arch_post_init_vm(kvm); - if (r) - goto out_err; - mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); mutex_unlock(&kvm_lock); @@ -1212,8 +1199,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) return kvm; -out_err: - kvm_destroy_vm_debugfs(kvm); out_err_no_debugfs: kvm_coalesced_mmio_free(kvm); out_no_coalesced_mmio: -- GitLab From 43fb96ae78551d7bfa4ecca956b258f085d67c40 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 24 Jan 2025 15:46:23 -0800 Subject: [PATCH 0250/1585] KVM: x86/mmu: Ensure NX huge page recovery thread is alive before waking When waking a VM's NX huge page recovery thread, ensure the thread is actually alive before trying to wake it. Now that the thread is spawned on-demand during KVM_RUN, a VM without a recovery thread is reachable via the related module params. BUG: kernel NULL pointer dereference, address: 0000000000000040 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:vhost_task_wake+0x5/0x10 Call Trace: set_nx_huge_pages+0xcc/0x1e0 [kvm] param_attr_store+0x8a/0xd0 module_attr_store+0x1a/0x30 kernfs_fop_write_iter+0x12f/0x1e0 vfs_write+0x233/0x3e0 ksys_write+0x60/0xd0 do_syscall_64+0x5b/0x160 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f3b52710104 Modules linked in: kvm_intel kvm CR2: 0000000000000040 Fixes: 931656b9e2ff ("kvm: defer huge page recovery vhost task to later") Cc: stable@vger.kernel.org Cc: Keith Busch Signed-off-by: Sean Christopherson Message-ID: <20250124234623.3609069-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a45ae60e84ab4..74c20dbb92dae 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7120,6 +7120,19 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } +static void kvm_wake_nx_recovery_thread(struct kvm *kvm) +{ + /* + * The NX recovery thread is spawned on-demand at the first KVM_RUN and + * may not be valid even though the VM is globally visible. Do nothing, + * as such a VM can't have any possible NX huge pages. + */ + struct vhost_task *nx_thread = READ_ONCE(kvm->arch.nx_huge_page_recovery_thread); + + if (nx_thread) + vhost_task_wake(nx_thread); +} + static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp) { if (nx_hugepage_mitigation_hard_disabled) @@ -7180,7 +7193,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) kvm_mmu_zap_all_fast(kvm); mutex_unlock(&kvm->slots_lock); - vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread); + kvm_wake_nx_recovery_thread(kvm); } mutex_unlock(&kvm_lock); } @@ -7315,7 +7328,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) - vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread); + kvm_wake_nx_recovery_thread(kvm); mutex_unlock(&kvm_lock); } @@ -7451,14 +7464,20 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) { struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); struct kvm *kvm = container_of(ka, struct kvm, arch); + struct vhost_task *nx_thread; kvm->arch.nx_huge_page_last = get_jiffies_64(); - kvm->arch.nx_huge_page_recovery_thread = vhost_task_create( - kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill, - kvm, "kvm-nx-lpage-recovery"); + nx_thread = vhost_task_create(kvm_nx_huge_page_recovery_worker, + kvm_nx_huge_page_recovery_worker_kill, + kvm, "kvm-nx-lpage-recovery"); - if (kvm->arch.nx_huge_page_recovery_thread) - vhost_task_start(kvm->arch.nx_huge_page_recovery_thread); + if (!nx_thread) + return; + + vhost_task_start(nx_thread); + + /* Make the task visible only once it is fully started. */ + WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread); } int kvm_mmu_post_init_vm(struct kvm *kvm) -- GitLab From 2a03d2da55b4cd5c86b360db0e917ee93b3f0cb9 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 9 Jan 2025 06:35:49 +0200 Subject: [PATCH 0251/1585] dt-bindings: nvmem: qcom,qfprom: Add SAR2130P compatible Document compatible for the QFPROM on SAR2130P platform. Signed-off-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250109-sar2130p-nvmem-v4-5-633739fe5f11@linaro.org Signed-off-by: Rob Herring (Arm) --- Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index d37f544ab8aa3..39c209249c9c0 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -36,6 +36,7 @@ properties: - qcom,qcs404-qfprom - qcom,qcs615-qfprom - qcom,qcs8300-qfprom + - qcom,sar2130p-qfprom - qcom,sc7180-qfprom - qcom,sc7280-qfprom - qcom,sc8280xp-qfprom -- GitLab From 230b19bc2bcc5897d0e20b4ce7e9790a469a2db0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 31 Jan 2025 14:49:54 +0200 Subject: [PATCH 0252/1585] drm/i915/dp: Iterate DSC BPP from high to low on all platforms Commit 1c56e9a39833 ("drm/i915/dp: Get optimal link config to have best compressed bpp") tries to find the best compressed bpp for the link. However, it iterates from max to min bpp on display 13+, and from min to max on other platforms. This presumably leads to minimum compressed bpp always being chosen on display 11-12. Iterate from high to low on all platforms to actually use the best possible compressed bpp. Fixes: 1c56e9a39833 ("drm/i915/dp: Get optimal link config to have best compressed bpp") Cc: Ankit Nautiyal Cc: Imre Deak Cc: # v6.7+ Reviewed-by: Imre Deak Reviewed-by: Ankit Nautiyal Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/3bba67923cbcd13a59d26ef5fa4bb042b13c8a9b.1738327620.git.jani.nikula@intel.com (cherry picked from commit 56b0337d429356c3b9ecc36a03023c8cc856b196) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index be07034bfcc69..aa77ddcee42c8 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2072,11 +2072,10 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, /* Compressed BPP should be less than the Input DSC bpp */ dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); - for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) { - if (valid_dsc_bpp[i] < dsc_min_bpp) + for (i = ARRAY_SIZE(valid_dsc_bpp) - 1; i >= 0; i--) { + if (valid_dsc_bpp[i] < dsc_min_bpp || + valid_dsc_bpp[i] > dsc_max_bpp) continue; - if (valid_dsc_bpp[i] > dsc_max_bpp) - break; ret = dsc_compute_link_config(intel_dp, pipe_config, -- GitLab From ecee4d0695067ae04959b121028b42a588e75370 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 4 Feb 2025 11:40:25 -0600 Subject: [PATCH 0253/1585] accel/amdxdna: Add MODULE_FIRMWARE() declarations Initramfs building tools such as dracut will look for a MODULE_FIRMWARE() declaration to determine which firmware to include in the initramfs when a driver is included in the initramfs. As amdxdna doesn't declare any firmware this causes the driver to fail to load with -ENOENT when in the initramfs. Add the missing declaration for possible firmware. Reported-by: Renjith Pananchikkal Suggested-by: Alexander Deucher Fixes: 8c9ff1b181ba ("accel/amdxdna: Add a new driver for AMD AI Engine") Reviewed-by: Lizhi Hou Link: https://lore.kernel.org/r/20250204174031.3425762-1-superm1@kernel.org Signed-off-by: Mario Limonciello Link: https://patchwork.freedesktop.org/patch/msgid/20250204174031.3425762-1-superm1@kernel.org --- drivers/accel/amdxdna/amdxdna_pci_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 97d4a032171f1..f5b8497cf5ad6 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -21,6 +21,11 @@ #define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */ +MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); + /* * Bind the driver base on (vendor_id, device_id) pair and later use the * (device_id, rev_id) pair as a key to select the devices. The devices with -- GitLab From aff2355d260e47e780cd96af127beaab18a664b1 Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Tue, 4 Feb 2025 19:45:06 +0200 Subject: [PATCH 0254/1585] spi: pxa2xx: Fix regression when toggling chip select on LPSS devices The commit 78b435c9044a ("spi: pxa2xx: Introduce __lpss_ssp_update_priv() helper") broke speaker output on my ASUS UX5304MA laptop. The problem is in inverted value that got written in the private register. Simple bug, simple fix. Fixes: 78b435c9044a ("spi: pxa2xx: Introduce __lpss_ssp_update_priv() helper") Signed-off-by: Mark Lord Tested-by: Mark Lord Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20250204174506.149978-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 5f9cac41baff6..06711a62fa3dc 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -399,7 +399,7 @@ static void lpss_ssp_cs_control(struct spi_device *spi, bool enable) lpss_ssp_select_cs(spi, config); mask = LPSS_CS_CONTROL_CS_HIGH; - __lpss_ssp_update_priv(drv_data, config->reg_cs_ctrl, mask, enable ? mask : 0); + __lpss_ssp_update_priv(drv_data, config->reg_cs_ctrl, mask, enable ? 0 : mask); if (config->cs_clk_stays_gated) { /* * Changing CS alone when dynamic clock gating is on won't -- GitLab From fd079124112c6e11c1bca2e7c71470a2d60bc363 Mon Sep 17 00:00:00 2001 From: Bharadwaj Raju Date: Wed, 5 Feb 2025 00:59:53 +0530 Subject: [PATCH 0255/1585] selftests/cgroup: use bash in test_cpuset_v1_hp.sh The script uses non-POSIX features like `[[` for conditionals and hence does not work when run with a POSIX /bin/sh. Change the shebang to /bin/bash instead, like the other tests in cgroup. Signed-off-by: Bharadwaj Raju Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh index 3f45512fb512e..7406c24be1ac9 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Test the special cpuset v1 hotplug case where a cpuset become empty of -- GitLab From d6179f6c6204f9932aed3a7a2100b4a295dfed9d Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Thu, 6 Jun 2024 15:31:02 +1200 Subject: [PATCH 0256/1585] gpio: pca953x: Improve interrupt support The GPIO drivers with latch interrupt support (typically types starting with PCAL) have interrupt status registers to determine which particular inputs have caused an interrupt. Unfortunately there is no atomic operation to read these registers and clear the interrupt. Clearing the interrupt is done by reading the input registers. The code was reading the interrupt status registers, and then reading the input registers. If an input changed between these two events it was lost. The solution in this patch is to revert to the non-latch version of code, i.e. remembering the previous input status, and looking for the changes. This system results in no more I2C transfers, so is no slower. The latch property of the device still means interrupts will still be noticed if the input changes back to its initial state. Fixes: 44896beae605 ("gpio: pca953x: add PCAL9535 interrupt support for Galileo Gen2") Signed-off-by: Mark Tomlinson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240606033102.2271916-1-mark.tomlinson@alliedtelesis.co.nz Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index be4c9981ebc40..d63c1030e6ac0 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -841,25 +841,6 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin DECLARE_BITMAP(trigger, MAX_LINE); int ret; - if (chip->driver_data & PCA_PCAL) { - /* Read the current interrupt status from the device */ - ret = pca953x_read_regs(chip, PCAL953X_INT_STAT, trigger); - if (ret) - return false; - - /* Check latched inputs and clear interrupt status */ - ret = pca953x_read_regs(chip, chip->regs->input, cur_stat); - if (ret) - return false; - - /* Apply filter for rising/falling edge selection */ - bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio); - - bitmap_and(pending, new_stat, trigger, gc->ngpio); - - return !bitmap_empty(pending, gc->ngpio); - } - ret = pca953x_read_regs(chip, chip->regs->input, cur_stat); if (ret) return false; -- GitLab From 028676bb189ed6d1b550a0fc570a9d695b6acfd3 Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Mon, 3 Feb 2025 09:36:05 -0500 Subject: [PATCH 0257/1585] net: atlantic: fix warning during hot unplug Firmware deinitialization performs MMIO accesses which are not necessary if the device has already been removed. In some cases, these accesses happen via readx_poll_timeout_atomic which ends up timing out, resulting in a warning at hw_atl2_utils_fw.c:112: [ 104.595913] Call Trace: [ 104.595915] [ 104.595918] ? show_regs+0x6c/0x80 [ 104.595923] ? __warn+0x8d/0x150 [ 104.595925] ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic] [ 104.595934] ? report_bug+0x182/0x1b0 [ 104.595938] ? handle_bug+0x6e/0xb0 [ 104.595940] ? exc_invalid_op+0x18/0x80 [ 104.595942] ? asm_exc_invalid_op+0x1b/0x20 [ 104.595944] ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic] [ 104.595952] ? aq_a2_fw_deinit+0xcf/0xe0 [atlantic] [ 104.595959] aq_nic_deinit.part.0+0xbd/0xf0 [atlantic] [ 104.595964] aq_nic_deinit+0x17/0x30 [atlantic] [ 104.595970] aq_ndev_close+0x2b/0x40 [atlantic] [ 104.595975] __dev_close_many+0xad/0x160 [ 104.595978] dev_close_many+0x99/0x170 [ 104.595979] unregister_netdevice_many_notify+0x18b/0xb20 [ 104.595981] ? __call_rcu_common+0xcd/0x700 [ 104.595984] unregister_netdevice_queue+0xc6/0x110 [ 104.595986] unregister_netdev+0x1c/0x30 [ 104.595988] aq_pci_remove+0xb1/0xc0 [atlantic] Fix this by skipping firmware deinitialization altogether if the PCI device is no longer present. Tested with an AQC113 attached via Thunderbolt by performing repeated unplug cycles while traffic was running via iperf. Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code") Signed-off-by: Jacob Moroni Reviewed-by: Igor Russkikh Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250203143604.24930-3-mail@jakemoroni.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index fe0e3e2a81171..71e50fc65c147 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -1441,7 +1441,9 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down) aq_ptp_ring_free(self); aq_ptp_free(self); - if (likely(self->aq_fw_ops->deinit) && link_down) { + /* May be invoked during hot unplug. */ + if (pci_device_is_present(self->pdev) && + likely(self->aq_fw_ops->deinit) && link_down) { mutex_lock(&self->fwreq_mutex); self->aq_fw_ops->deinit(self->aq_hw); mutex_unlock(&self->fwreq_mutex); -- GitLab From a1300691aed9ee852b0a9192e29e2bdc2411a7e6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Feb 2025 17:08:38 +0000 Subject: [PATCH 0258/1585] net: rose: lock the socket in rose_bind() syzbot reported a soft lockup in rose_loopback_timer(), with a repro calling bind() from multiple threads. rose_bind() must lock the socket to avoid this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+7ff41b5215f0c534534e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67a0f78d.050a0220.d7c5a.00a0.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Link: https://patch.msgid.link/20250203170838.3521361-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/rose/af_rose.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 72c65d938a150..a4a668b88a8f2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net_device *dev; ax25_address *source; ax25_uid_assoc *user; + int err = -EINVAL; int n; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; - if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) return -EINVAL; @@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; - if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) - return -EADDRNOTAVAIL; + lock_sock(sk); + + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_release; + + err = -EADDRNOTAVAIL; + dev = rose_dev_get(&addr->srose_addr); + if (!dev) + goto out_release; source = &addr->srose_call; @@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); - return -EACCES; + err = -EACCES; + goto out_release; } rose->source_call = *source; } @@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rose_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); - - return 0; + err = 0; +out_release: + release_sock(sk); + return err; } static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) -- GitLab From 820ccf8cb2b145ab9fc12651f7f80339614fa46c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 31 Jan 2025 15:31:19 -0700 Subject: [PATCH 0259/1585] drm/amd/display: Respect user's CONFIG_FRAME_WARN more for dml files Currently, there are several files in drm/amd/display that aim to have a higher -Wframe-larger-than value to avoid instances of that warning with a lower value from the user's configuration. However, with the way that it is currently implemented, it does not respect the user's request via CONFIG_FRAME_WARN for a higher stack frame limit, which can cause pain when new instances of the warning appear and break the build due to CONFIG_WERROR. Adjust the logic to switch from a hard coded -Wframe-larger-than value to only using the value as a minimum clamp and deferring to the requested value from CONFIG_FRAME_WARN if it is higher. Suggested-by: Harry Wentland Reported-by: Greg Kroah-Hartman Closes: https://lore.kernel.org/2025013003-audience-opposing-7f95@gregkh/ Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 14 ++++++++----- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 22 ++++++++++++-------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 46f9c05de16e8..e1d500633dfad 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -29,11 +29,15 @@ dml_ccflags := $(CC_FLAGS_FPU) dml_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) -ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) -frame_warn_flag := -Wframe-larger-than=3072 -else -frame_warn_flag := -Wframe-larger-than=2048 -endif + ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) + frame_warn_limit := 3072 + else + frame_warn_limit := 2048 + endif + + ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) + frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit) + endif endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 91c4f3b4bd5f4..21fd466dba26e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -28,15 +28,19 @@ dml2_ccflags := $(CC_FLAGS_FPU) dml2_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) -ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) -ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) -frame_warn_flag := -Wframe-larger-than=4096 -else -frame_warn_flag := -Wframe-larger-than=3072 -endif -else -frame_warn_flag := -Wframe-larger-than=2048 -endif + ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) + ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) + frame_warn_limit := 4096 + else + frame_warn_limit := 3072 + endif + else + frame_warn_limit := 2048 + endif + + ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) + frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit) + endif endif subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2 -- GitLab From f245b400a223a71d6d5f4c72a2cb9b573a7fc2b6 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Tue, 4 Feb 2025 15:07:44 +0800 Subject: [PATCH 0260/1585] Revert "drm/amd/display: Use HW lock mgr for PSR1" This reverts commit a2b5a9956269 ("drm/amd/display: Use HW lock mgr for PSR1") Because it may cause system hang while connect with two edp panel. Acked-by: Wayne Lin Signed-off-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index 5bb8b78bf250a..bf636b28e3e16 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -63,8 +63,7 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, bool should_use_dmub_lock(struct dc_link *link) { - if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 || - link->psr_settings.psr_version == DC_PSR_VERSION_1) + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) return true; if (link->replay_settings.replay_feature_enabled) -- GitLab From ceb5faef848b2fbb5d1e99617093cc9d4deb2b30 Mon Sep 17 00:00:00 2001 From: Tanya Agarwal Date: Fri, 24 Jan 2025 01:07:44 +0530 Subject: [PATCH 0261/1585] integrity: fix typos and spelling errors Fix typos and spelling errors in integrity module comments that were identified using the codespell tool. No functional changes - documentation only. Signed-off-by: Tanya Agarwal Reviewed-by: Mimi Zohar Signed-off-by: Mimi Zohar --- security/integrity/evm/evm_crypto.c | 2 +- security/integrity/evm/evm_main.c | 2 +- security/integrity/ima/ima_main.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 7c06ffd633d24..a5e730ffda57f 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -180,7 +180,7 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, } /* - * Dump large security xattr values as a continuous ascii hexademical string. + * Dump large security xattr values as a continuous ascii hexadecimal string. * (pr_debug is limited to 64 bytes.) */ static void dump_security_xattr_l(const char *prefix, const void *src, diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 377e57e9084f0..0add782e73ba2 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -169,7 +169,7 @@ static int is_unsupported_hmac_fs(struct dentry *dentry) * and compare it against the stored security.evm xattr. * * For performance: - * - use the previoulsy retrieved xattr value and length to calculate the + * - use the previously retrieved xattr value and length to calculate the * HMAC.) * - cache the verification result in the iint, when available. * diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 9f9897a7c217e..f2c9affa0c2ac 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -1011,9 +1011,9 @@ int process_buffer_measurement(struct mnt_idmap *idmap, } /* - * Both LSM hooks and auxilary based buffer measurements are - * based on policy. To avoid code duplication, differentiate - * between the LSM hooks and auxilary buffer measurements, + * Both LSM hooks and auxiliary based buffer measurements are + * based on policy. To avoid code duplication, differentiate + * between the LSM hooks and auxiliary buffer measurements, * retrieving the policy rule information only for the LSM hook * buffer measurements. */ -- GitLab From 57a0ef02fefafc4b9603e33a18b669ba5ce59ba3 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Tue, 4 Feb 2025 13:57:20 +0100 Subject: [PATCH 0262/1585] ima: Reset IMA_NONACTION_RULE_FLAGS after post_setattr Commit 0d73a55208e9 ("ima: re-introduce own integrity cache lock") mistakenly reverted the performance improvement introduced in commit 42a4c603198f0 ("ima: fix ima_inode_post_setattr"). The unused bit mask was subsequently removed by commit 11c60f23ed13 ("integrity: Remove unused macro IMA_ACTION_RULE_FLAGS"). Restore the performance improvement by introducing the new mask IMA_NONACTION_RULE_FLAGS, equal to IMA_NONACTION_FLAGS without IMA_NEW_FILE, which is not a rule-specific flag. Finally, reset IMA_NONACTION_RULE_FLAGS instead of IMA_NONACTION_FLAGS in process_measurement(), if the IMA_CHANGE_ATTR atomic flag is set (after file metadata modification). With this patch, new files for which metadata were modified while they are still open, can be reopened before the last file close (when security.ima is written), since the IMA_NEW_FILE flag is not cleared anymore. Otherwise, appraisal fails because security.ima is missing (files with IMA_NEW_FILE set are an exception). Cc: stable@vger.kernel.org # v4.16.x Fixes: 0d73a55208e9 ("ima: re-introduce own integrity cache lock") Signed-off-by: Roberto Sassu Signed-off-by: Mimi Zohar --- security/integrity/ima/ima.h | 3 +++ security/integrity/ima/ima_main.c | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 24d09ea91b877..a4f284bd846c1 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -149,6 +149,9 @@ struct ima_kexec_hdr { #define IMA_CHECK_BLACKLIST 0x40000000 #define IMA_VERITY_REQUIRED 0x80000000 +/* Exclude non-action flags which are not rule-specific. */ +#define IMA_NONACTION_RULE_FLAGS (IMA_NONACTION_FLAGS & ~IMA_NEW_FILE) + #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_HASH | IMA_APPRAISE_SUBMASK) #define IMA_DONE_MASK (IMA_MEASURED | IMA_APPRAISED | IMA_AUDITED | \ diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index f2c9affa0c2ac..28b8b0db6f9bb 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -269,10 +269,13 @@ static int process_measurement(struct file *file, const struct cred *cred, mutex_lock(&iint->mutex); if (test_and_clear_bit(IMA_CHANGE_ATTR, &iint->atomic_flags)) - /* reset appraisal flags if ima_inode_post_setattr was called */ + /* + * Reset appraisal flags (action and non-action rule-specific) + * if ima_inode_post_setattr was called. + */ iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED | IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK | - IMA_NONACTION_FLAGS); + IMA_NONACTION_RULE_FLAGS); /* * Re-evaulate the file if either the xattr has changed or the -- GitLab From dabbd325b25edb5cdd99c94391817202dd54b651 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 4 Feb 2025 23:50:08 +0000 Subject: [PATCH 0263/1585] ASoC: simple-card-utils.c: add missing dlc->of_node commit 90de551c1bf ("ASoC: simple-card-utils.c: enable multi Component support") added muiti Component support, but was missing to add dlc->of_node. Because of it, Sound device list will indicates strange name if it was DPCM connection and driver supports dai->driver->dai_args, like below > aplay -l card X: sndulcbmix [xxxx], device 0: fe.(null).rsnd-dai.0 (*) [] ... ^^^^^^ It will be fixed by this patch > aplay -l card X: sndulcbmix [xxxx], device 0: fe.sound@ec500000.rsnd-dai.0 (*) [] ... ^^^^^^^^^^^^^^ Signed-off-by: Kuninori Morimoto Reviewed-by: Daniel Baluta Link: https://patch.msgid.link/87ikpp2rtb.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index dd414634b4ac7..c2445c5ccd84c 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1092,6 +1092,7 @@ int graph_util_parse_dai(struct device *dev, struct device_node *ep, args.np = ep; dai = snd_soc_get_dai_via_args(&args); if (dai) { + dlc->of_node = node; dlc->dai_name = snd_soc_dai_name_get(dai); dlc->dai_args = snd_soc_copy_dai_args(dev, &args); if (!dlc->dai_args) -- GitLab From 32ffed055dcee17f6705f545b069e44a66067808 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 5 Feb 2025 00:43:43 +0000 Subject: [PATCH 0264/1585] regmap-irq: Add missing kfree() Add kfree() for "d->main_status_buf" to the error-handling path to prevent a memory leak. Fixes: a2d21848d921 ("regmap: regmap-irq: Add main status register support") Cc: stable@vger.kernel.org # v5.1+ Signed-off-by: Jiasheng Jiang Link: https://patch.msgid.link/20250205004343.14413-1-jiashengjiangcool@gmail.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 0bcd81389a29f..978613407ea3c 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -906,6 +906,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, kfree(d->wake_buf); kfree(d->mask_buf_def); kfree(d->mask_buf); + kfree(d->main_status_buf); kfree(d->status_buf); kfree(d->status_reg_buf); if (d->config_buf) { @@ -981,6 +982,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d) kfree(d->wake_buf); kfree(d->mask_buf_def); kfree(d->mask_buf); + kfree(d->main_status_buf); kfree(d->status_reg_buf); kfree(d->status_buf); if (d->config_buf) { -- GitLab From 76b0a22d4cf7dc9091129560fdc04e73eb9db4cb Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Sat, 1 Feb 2025 11:39:30 -0300 Subject: [PATCH 0265/1585] ALSA: hda/realtek: Limit mic boost on Positivo ARN50 The internal mic boost on the Positivo ARN50 is too high. Fix this by applying the ALC269_FIXUP_LIMIT_INT_MIC_BOOST fixup to the machine to limit the gain. Signed-off-by: Edson Juliano Drosdeck Link: https://patch.msgid.link/20250201143930.25089-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8192be394d0d0..ae0beb52e7b0c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11045,6 +11045,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1f66, 0x0105, "Ayaneo Portable Game Player", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x2014, 0x800a, "Positivo ARN50", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), -- GitLab From b0eddc21900fb44f8c5db95710479865e3700fbd Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Wed, 5 Feb 2025 13:16:56 +0530 Subject: [PATCH 0266/1585] regulator: qcom_smd: Add l2, l5 sub-node to mp5496 regulator Adding l2, l5 sub-node entry to mp5496 regulator node. Cc: stable@vger.kernel.org Acked-by: Rob Herring Signed-off-by: Varadarajan Narayanan Link: https://patch.msgid.link/20250205074657.4142365-2-quic_varada@quicinc.com Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml index f2fd2df68a9ed..b7241ce975b96 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml @@ -22,7 +22,7 @@ description: Each sub-node is identified using the node's name, with valid values listed for each of the pmics below. - For mp5496, s1, s2 + For mp5496, s1, s2, l2, l5 For pm2250, s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22 -- GitLab From 796106e29e5df6cd4b4e2b51262a8a19e9fa0625 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 5 Feb 2025 00:20:36 +0000 Subject: [PATCH 0267/1585] ASoC: rsnd: indicate unsupported clock rate It will indicate "unsupported clock rate" when setup clock failed. But it is unclear what kind of rate was failed. Indicate it. Signed-off-by: Kuninori Morimoto Reviewed-by: Yoshihiro Shimoda Link: https://patch.msgid.link/874j192qej.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/renesas/rcar/ssi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/renesas/rcar/ssi.c b/sound/soc/renesas/rcar/ssi.c index b3d4e8ae07eff..0c6424a1fcac0 100644 --- a/sound/soc/renesas/rcar/ssi.c +++ b/sound/soc/renesas/rcar/ssi.c @@ -336,7 +336,8 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod, return 0; rate_err: - dev_err(dev, "unsupported clock rate\n"); + dev_err(dev, "unsupported clock rate (%d)\n", rate); + return ret; } -- GitLab From c3fc002b206c6c83d1e3702b979733002ba6fb2c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 5 Feb 2025 00:20:42 +0000 Subject: [PATCH 0268/1585] ASoC: rsnd: don't indicate warning on rsnd_kctrl_accept_runtime() rsnd_kctrl_accept_runtime() (1) is used for runtime convert rate (= Synchronous SRC Mode). Now, rsnd driver has 2 kctrls for it (A): "SRC Out Rate Switch" (B): "SRC Out Rate" // it calls (1) (A): can be called anytime (B): can be called only runtime, and will indicate warning if it was used at non-runtime. To use runtime convert rate (= Synchronous SRC Mode), user might uses command in below order. (X): > amixer set "SRC Out Rate" on > aplay xxx.wav & (Y): > amixer set "SRC Out Rate" 48010 // convert rate to 48010Hz (Y): calls B (X): calls both A and B. In this case, when user calls (X), it calls both (A) and (B), but it is not yet start running. So, (B) will indicate warning. This warning was added by commit b5c088689847 ("ASoC: rsnd: add warning message to rsnd_kctrl_accept_runtime()"), but the message sounds like the operation was not correct. Let's update warning message. The message is very SRC specific, implement it in src.c Signed-off-by: Kuninori Morimoto Reviewed-by: Yoshihiro Shimoda Link: https://patch.msgid.link/8734gt2qed.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/renesas/rcar/core.c | 14 -------------- sound/soc/renesas/rcar/rsnd.h | 1 - sound/soc/renesas/rcar/src.c | 18 +++++++++++++++++- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/sound/soc/renesas/rcar/core.c b/sound/soc/renesas/rcar/core.c index d3709fd0409e4..f3f0c3f0bb9f5 100644 --- a/sound/soc/renesas/rcar/core.c +++ b/sound/soc/renesas/rcar/core.c @@ -1770,20 +1770,6 @@ int rsnd_kctrl_accept_anytime(struct rsnd_dai_stream *io) return 1; } -int rsnd_kctrl_accept_runtime(struct rsnd_dai_stream *io) -{ - struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - struct rsnd_priv *priv = rsnd_io_to_priv(io); - struct device *dev = rsnd_priv_to_dev(priv); - - if (!runtime) { - dev_warn(dev, "Can't update kctrl when idle\n"); - return 0; - } - - return 1; -} - struct rsnd_kctrl_cfg *rsnd_kctrl_init_m(struct rsnd_kctrl_cfg_m *cfg) { cfg->cfg.val = cfg->val; diff --git a/sound/soc/renesas/rcar/rsnd.h b/sound/soc/renesas/rcar/rsnd.h index a5f54b65313c4..04c70690f7a25 100644 --- a/sound/soc/renesas/rcar/rsnd.h +++ b/sound/soc/renesas/rcar/rsnd.h @@ -742,7 +742,6 @@ struct rsnd_kctrl_cfg_s { #define rsnd_kctrl_vals(x) ((x).val) /* = (x).cfg.val[0] */ int rsnd_kctrl_accept_anytime(struct rsnd_dai_stream *io); -int rsnd_kctrl_accept_runtime(struct rsnd_dai_stream *io); struct rsnd_kctrl_cfg *rsnd_kctrl_init_m(struct rsnd_kctrl_cfg_m *cfg); struct rsnd_kctrl_cfg *rsnd_kctrl_init_s(struct rsnd_kctrl_cfg_s *cfg); int rsnd_kctrl_new(struct rsnd_mod *mod, diff --git a/sound/soc/renesas/rcar/src.c b/sound/soc/renesas/rcar/src.c index e7f86db0d94c3..3099180297722 100644 --- a/sound/soc/renesas/rcar/src.c +++ b/sound/soc/renesas/rcar/src.c @@ -531,6 +531,22 @@ static irqreturn_t rsnd_src_interrupt(int irq, void *data) return IRQ_HANDLED; } +static int rsnd_src_kctrl_accept_runtime(struct rsnd_dai_stream *io) +{ + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + + if (!runtime) { + struct rsnd_priv *priv = rsnd_io_to_priv(io); + struct device *dev = rsnd_priv_to_dev(priv); + + dev_warn(dev, "\"SRC Out Rate\" can use during running\n"); + + return 0; + } + + return 1; +} + static int rsnd_src_probe_(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) @@ -594,7 +610,7 @@ static int rsnd_src_pcm_new(struct rsnd_mod *mod, rsnd_io_is_play(io) ? "SRC Out Rate" : "SRC In Rate", - rsnd_kctrl_accept_runtime, + rsnd_src_kctrl_accept_runtime, rsnd_src_set_convert_rate, &src->sync, 192000); -- GitLab From 89f9cf185885d4358aa92b48e51d0f09b71775aa Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 5 Feb 2025 00:20:48 +0000 Subject: [PATCH 0269/1585] ASoC: rsnd: adjust convert rate limitation Current rsnd driver supports Synchronous SRC Mode, but HW allow to update rate only within 1% from current rate. Adjust to it. Becially, this feature is used to fine-tune subtle difference that occur during sampling rate conversion in SRC. So, it should be called within 1% margin of rate difference. If there was difference over 1%, it will apply with 1% increments by using loop without indicating error message. Cc: Yoshihiro Shimoda Signed-off-by: Kuninori Morimoto Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://patch.msgid.link/871pwd2qe8.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/renesas/rcar/src.c | 98 ++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 22 deletions(-) diff --git a/sound/soc/renesas/rcar/src.c b/sound/soc/renesas/rcar/src.c index 3099180297722..7d73b183bda68 100644 --- a/sound/soc/renesas/rcar/src.c +++ b/sound/soc/renesas/rcar/src.c @@ -35,6 +35,7 @@ struct rsnd_src { struct rsnd_mod *dma; struct rsnd_kctrl_cfg_s sen; /* sync convert enable */ struct rsnd_kctrl_cfg_s sync; /* sync convert */ + u32 current_sync_rate; int irq; }; @@ -100,7 +101,7 @@ static u32 rsnd_src_convert_rate(struct rsnd_dai_stream *io, if (!rsnd_src_sync_is_enabled(mod)) return rsnd_io_converted_rate(io); - convert_rate = src->sync.val; + convert_rate = src->current_sync_rate; if (!convert_rate) convert_rate = rsnd_io_converted_rate(io); @@ -201,13 +202,73 @@ static const u32 chan222222[] = { static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, struct rsnd_mod *mod) { + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); struct rsnd_priv *priv = rsnd_mod_to_priv(mod); - struct device *dev = rsnd_priv_to_dev(priv); + struct rsnd_src *src = rsnd_mod_to_src(mod); + u32 fin, fout, new_rate; + int inc, cnt, rate; + u64 base, val; + + if (!runtime) + return; + + if (!rsnd_src_sync_is_enabled(mod)) + return; + + fin = rsnd_src_get_in_rate(priv, io); + fout = rsnd_src_get_out_rate(priv, io); + + new_rate = src->sync.val; + + if (!new_rate) + new_rate = fout; + + /* Do nothing if no diff */ + if (new_rate == src->current_sync_rate) + return; + + /* + * SRCm_IFSVR::INTIFS can change within 1% + * see + * SRCm_IFSVR::INTIFS Note + */ + inc = fout / 100; + cnt = abs(new_rate - fout) / inc; + if (fout > new_rate) + inc *= -1; + + /* + * After start running SRC, we can update only SRC_IFSVR + * for Synchronous Mode + */ + base = (u64)0x0400000 * fin; + rate = fout; + for (int i = 0; i < cnt; i++) { + val = base; + rate += inc; + do_div(val, rate); + + rsnd_mod_write(mod, SRC_IFSVR, val); + } + val = base; + do_div(val, new_rate); + + rsnd_mod_write(mod, SRC_IFSVR, val); + + /* update current_sync_rate */ + src->current_sync_rate = new_rate; +} + +static void rsnd_src_init_convert_rate(struct rsnd_dai_stream *io, + struct rsnd_mod *mod) +{ struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + struct rsnd_priv *priv = rsnd_mod_to_priv(mod); + struct device *dev = rsnd_priv_to_dev(priv); int is_play = rsnd_io_is_play(io); int use_src = 0; u32 fin, fout; - u32 ifscr, fsrate, adinr; + u32 ifscr, adinr; u32 cr, route; u32 i_busif, o_busif, tmp; const u32 *bsdsr_table; @@ -245,26 +306,15 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, adinr = rsnd_get_adinr_bit(mod, io) | chan; /* - * SRC_IFSCR / SRC_IFSVR - */ - ifscr = 0; - fsrate = 0; - if (use_src) { - u64 n; - - ifscr = 1; - n = (u64)0x0400000 * fin; - do_div(n, fout); - fsrate = n; - } - - /* + * SRC_IFSCR * SRC_SRCCR / SRC_ROUTE_MODE0 */ + ifscr = 0; cr = 0x00011110; route = 0x0; if (use_src) { route = 0x1; + ifscr = 0x1; if (rsnd_src_sync_is_enabled(mod)) { cr |= 0x1; @@ -335,7 +385,6 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, rsnd_mod_write(mod, SRC_SRCIR, 1); /* initialize */ rsnd_mod_write(mod, SRC_ADINR, adinr); rsnd_mod_write(mod, SRC_IFSCR, ifscr); - rsnd_mod_write(mod, SRC_IFSVR, fsrate); rsnd_mod_write(mod, SRC_SRCCR, cr); rsnd_mod_write(mod, SRC_BSDSR, bsdsr_table[idx]); rsnd_mod_write(mod, SRC_BSISR, bsisr_table[idx]); @@ -348,6 +397,9 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, rsnd_adg_set_src_timesel_gen2(mod, io, fin, fout); + /* update SRC_IFSVR */ + rsnd_src_set_convert_rate(io, mod); + return; convert_rate_err: @@ -467,7 +519,8 @@ static int rsnd_src_init(struct rsnd_mod *mod, int ret; /* reset sync convert_rate */ - src->sync.val = 0; + src->sync.val = + src->current_sync_rate = 0; ret = rsnd_mod_power_on(mod); if (ret < 0) @@ -475,7 +528,7 @@ static int rsnd_src_init(struct rsnd_mod *mod, rsnd_src_activation(mod); - rsnd_src_set_convert_rate(io, mod); + rsnd_src_init_convert_rate(io, mod); rsnd_src_status_clear(mod); @@ -493,7 +546,8 @@ static int rsnd_src_quit(struct rsnd_mod *mod, rsnd_mod_power_off(mod); /* reset sync convert_rate */ - src->sync.val = 0; + src->sync.val = + src->current_sync_rate = 0; return 0; } @@ -601,7 +655,7 @@ static int rsnd_src_pcm_new(struct rsnd_mod *mod, "SRC Out Rate Switch" : "SRC In Rate Switch", rsnd_kctrl_accept_anytime, - rsnd_src_set_convert_rate, + rsnd_src_init_convert_rate, &src->sen, 1); if (ret < 0) return ret; -- GitLab From c4d3dfd8ccaef2cbd374860e307f1e056854a472 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 5 Feb 2025 14:21:36 +0100 Subject: [PATCH 0270/1585] Revert "i2c: Replace list-based mechanism for handling userspace-created clients" This reverts commit 3cfe39b3a845593a485ab1c716615979004ef9f6. Mux handling is not sufficiently implemented. It needs more time. Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 61 ++++++++++++++++++++++++------------- include/linux/i2c.h | 7 ++++- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 5546184df05f9..ddac2f1557180 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1300,12 +1300,14 @@ new_device_store(struct device *dev, struct device_attribute *attr, info.flags |= I2C_CLIENT_SLAVE; } - info.flags |= I2C_CLIENT_USER; - client = i2c_new_client_device(adap, &info); if (IS_ERR(client)) return PTR_ERR(client); + /* Keep track of the added device */ + mutex_lock(&adap->userspace_clients_lock); + list_add_tail(&client->detected, &adap->userspace_clients); + mutex_unlock(&adap->userspace_clients_lock); dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device", info.type, info.addr); @@ -1313,15 +1315,6 @@ new_device_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_WO(new_device); -static int __i2c_find_user_addr(struct device *dev, const void *addrp) -{ - struct i2c_client *client = i2c_verify_client(dev); - unsigned short addr = *(unsigned short *)addrp; - - return client && client->flags & I2C_CLIENT_USER && - i2c_encode_flags_to_addr(client) == addr; -} - /* * And of course let the users delete the devices they instantiated, if * they got it wrong. This interface can only be used to delete devices @@ -1336,7 +1329,7 @@ delete_device_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct i2c_adapter *adap = to_i2c_adapter(dev); - struct device *child_dev; + struct i2c_client *client, *next; unsigned short addr; char end; int res; @@ -1352,19 +1345,28 @@ delete_device_store(struct device *dev, struct device_attribute *attr, return -EINVAL; } - mutex_lock(&core_lock); /* Make sure the device was added through sysfs */ - child_dev = device_find_child(&adap->dev, &addr, __i2c_find_user_addr); - if (child_dev) { - i2c_unregister_device(i2c_verify_client(child_dev)); - put_device(child_dev); - } else { - dev_err(dev, "Can't find userspace-created device at %#x\n", addr); - count = -ENOENT; + res = -ENOENT; + mutex_lock_nested(&adap->userspace_clients_lock, + i2c_adapter_depth(adap)); + list_for_each_entry_safe(client, next, &adap->userspace_clients, + detected) { + if (i2c_encode_flags_to_addr(client) == addr) { + dev_info(dev, "%s: Deleting device %s at 0x%02hx\n", + "delete_device", client->name, client->addr); + + list_del(&client->detected); + i2c_unregister_device(client); + res = count; + break; + } } - mutex_unlock(&core_lock); + mutex_unlock(&adap->userspace_clients_lock); - return count; + if (res < 0) + dev_err(dev, "%s: Can't find device in list\n", + "delete_device"); + return res; } static DEVICE_ATTR_IGNORE_LOCKDEP(delete_device, S_IWUSR, NULL, delete_device_store); @@ -1535,6 +1537,8 @@ static int i2c_register_adapter(struct i2c_adapter *adap) adap->locked_flags = 0; rt_mutex_init(&adap->bus_lock); rt_mutex_init(&adap->mux_lock); + mutex_init(&adap->userspace_clients_lock); + INIT_LIST_HEAD(&adap->userspace_clients); /* Set default timeout to 1 second if not already set */ if (adap->timeout == 0) @@ -1726,6 +1730,7 @@ static int __unregister_dummy(struct device *dev, void *dummy) void i2c_del_adapter(struct i2c_adapter *adap) { struct i2c_adapter *found; + struct i2c_client *client, *next; /* First make sure that this adapter was ever added */ mutex_lock(&core_lock); @@ -1738,6 +1743,18 @@ void i2c_del_adapter(struct i2c_adapter *adap) i2c_acpi_remove_space_handler(adap); + /* Remove devices instantiated from sysfs */ + mutex_lock_nested(&adap->userspace_clients_lock, + i2c_adapter_depth(adap)); + list_for_each_entry_safe(client, next, &adap->userspace_clients, + detected) { + dev_dbg(&adap->dev, "Removing %s at 0x%x\n", client->name, + client->addr); + list_del(&client->detected); + i2c_unregister_device(client); + } + mutex_unlock(&adap->userspace_clients_lock); + /* Detach any active clients. This can't fail, thus we do not * check the returned value. This is a two-pass process, because * we can't remove the dummy devices during the first pass: they diff --git a/include/linux/i2c.h b/include/linux/i2c.h index c31fd1dba3bd2..4955d9e76c5fb 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -313,6 +313,8 @@ struct i2c_driver { * @dev: Driver model device node for the slave. * @init_irq: IRQ that was set at initialization * @irq: indicates the IRQ generated by this device (if any) + * @detected: member of an i2c_driver.clients list or i2c-core's + * userspace_devices list * @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter * calls it to pass on slave events to the slave driver. * @devres_group_id: id of the devres group that will be created for resources @@ -333,7 +335,6 @@ struct i2c_client { #define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */ #define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ #define I2C_CLIENT_AUTO 0x100 /* client was auto-detected */ -#define I2C_CLIENT_USER 0x200 /* client was userspace-created */ #define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ /* Must match I2C_M_STOP|IGNORE_NAK */ @@ -345,6 +346,7 @@ struct i2c_client { struct device dev; /* the device structure */ int init_irq; /* irq set at initialization */ int irq; /* irq issued by device */ + struct list_head detected; #if IS_ENABLED(CONFIG_I2C_SLAVE) i2c_slave_cb_t slave_cb; /* callback for slave mode */ #endif @@ -751,6 +753,9 @@ struct i2c_adapter { char name[48]; struct completion dev_released; + struct mutex userspace_clients_lock; + struct list_head userspace_clients; + struct i2c_bus_recovery_info *bus_recovery_info; const struct i2c_adapter_quirks *quirks; -- GitLab From 3bfa08fe9ec8dd79e183c88e1275be74191e7bc8 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 5 Feb 2025 14:22:12 +0100 Subject: [PATCH 0271/1585] Revert "i2c: Replace list-based mechanism for handling auto-detected clients" This reverts commit 56a50667cbcfaf95eea9128d5676af94e54b51a8. Mux handling is not sufficiently implemented. It needs more time. Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 52 +++++++++++++++++++++++++------------ include/linux/i2c.h | 3 ++- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index ddac2f1557180..35a221e2c11c1 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1704,6 +1704,23 @@ int i2c_add_numbered_adapter(struct i2c_adapter *adap) } EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter); +static void i2c_do_del_adapter(struct i2c_driver *driver, + struct i2c_adapter *adapter) +{ + struct i2c_client *client, *_n; + + /* Remove the devices we created ourselves as the result of hardware + * probing (using a driver's detect method) */ + list_for_each_entry_safe(client, _n, &driver->clients, detected) { + if (client->adapter == adapter) { + dev_dbg(&adapter->dev, "Removing %s at 0x%x\n", + client->name, client->addr); + list_del(&client->detected); + i2c_unregister_device(client); + } + } +} + static int __unregister_client(struct device *dev, void *dummy) { struct i2c_client *client = i2c_verify_client(dev); @@ -1719,6 +1736,12 @@ static int __unregister_dummy(struct device *dev, void *dummy) return 0; } +static int __process_removed_adapter(struct device_driver *d, void *data) +{ + i2c_do_del_adapter(to_i2c_driver(d), data); + return 0; +} + /** * i2c_del_adapter - unregister I2C adapter * @adap: the adapter being unregistered @@ -1742,6 +1765,11 @@ void i2c_del_adapter(struct i2c_adapter *adap) } i2c_acpi_remove_space_handler(adap); + /* Tell drivers about this removal */ + mutex_lock(&core_lock); + bus_for_each_drv(&i2c_bus_type, NULL, adap, + __process_removed_adapter); + mutex_unlock(&core_lock); /* Remove devices instantiated from sysfs */ mutex_lock_nested(&adap->userspace_clients_lock, @@ -1760,10 +1788,8 @@ void i2c_del_adapter(struct i2c_adapter *adap) * we can't remove the dummy devices during the first pass: they * could have been instantiated by real devices wishing to clean * them up properly, so we give them a chance to do that first. */ - mutex_lock(&core_lock); device_for_each_child(&adap->dev, NULL, __unregister_client); device_for_each_child(&adap->dev, NULL, __unregister_dummy); - mutex_unlock(&core_lock); /* device name is gone after device_unregister */ dev_dbg(&adap->dev, "adapter [%s] unregistered\n", adap->name); @@ -1983,6 +2009,7 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver) /* add the driver to the list of i2c drivers in the driver core */ driver->driver.owner = owner; driver->driver.bus = &i2c_bus_type; + INIT_LIST_HEAD(&driver->clients); /* When registration returns, the driver core * will have called probe() for all matching-but-unbound devices. @@ -2000,13 +2027,10 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver) } EXPORT_SYMBOL(i2c_register_driver); -static int __i2c_unregister_detected_client(struct device *dev, void *argp) +static int __process_removed_driver(struct device *dev, void *data) { - struct i2c_client *client = i2c_verify_client(dev); - - if (client && client->flags & I2C_CLIENT_AUTO) - i2c_unregister_device(client); - + if (dev->type == &i2c_adapter_type) + i2c_do_del_adapter(data, to_i2c_adapter(dev)); return 0; } @@ -2017,12 +2041,7 @@ static int __i2c_unregister_detected_client(struct device *dev, void *argp) */ void i2c_del_driver(struct i2c_driver *driver) { - mutex_lock(&core_lock); - /* Satisfy __must_check, function can't fail */ - if (driver_for_each_device(&driver->driver, NULL, NULL, - __i2c_unregister_detected_client)) { - } - mutex_unlock(&core_lock); + i2c_for_each_dev(driver, __process_removed_driver); driver_unregister(&driver->driver); pr_debug("driver [%s] unregistered\n", driver->driver.name); @@ -2449,7 +2468,6 @@ static int i2c_detect_address(struct i2c_client *temp_client, /* Finally call the custom detection function */ memset(&info, 0, sizeof(struct i2c_board_info)); info.addr = addr; - info.flags = I2C_CLIENT_AUTO; err = driver->detect(temp_client, &info); if (err) { /* -ENODEV is returned if the detection fails. We catch it @@ -2476,7 +2494,9 @@ static int i2c_detect_address(struct i2c_client *temp_client, dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n", info.type, info.addr); client = i2c_new_client_device(adapter, &info); - if (IS_ERR(client)) + if (!IS_ERR(client)) + list_add_tail(&client->detected, &driver->clients); + else dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n", info.type, info.addr); } diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4955d9e76c5fb..2b2af24d2a436 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -244,6 +244,7 @@ enum i2c_driver_flags { * @id_table: List of I2C devices supported by this driver * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) + * @clients: List of detected clients we created (for i2c-core use only) * @flags: A bitmask of flags defined in &enum i2c_driver_flags * * The driver.owner field should be set to the module owner of this driver. @@ -298,6 +299,7 @@ struct i2c_driver { /* Device detection callback for automatic device creation */ int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; + struct list_head clients; u32 flags; }; @@ -334,7 +336,6 @@ struct i2c_client { #define I2C_CLIENT_SLAVE 0x20 /* we are the slave */ #define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */ #define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ -#define I2C_CLIENT_AUTO 0x100 /* client was auto-detected */ #define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ /* Must match I2C_M_STOP|IGNORE_NAK */ -- GitLab From 015b7dae084fa95465ff89f6cbf15fe49906a370 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 3 Feb 2025 12:01:23 +0100 Subject: [PATCH 0272/1585] gpio: sim: lock hog configfs items if present Depending on the user config, the leaf entry may be the hog directory, not line. Check it and lock the correct item. Fixes: 8bd76b3d3f3a ("gpio: sim: lock up configfs that an instantiated device depends on") Tested-by: Koichiro Den Link: https://lore.kernel.org/r/20250203110123.87701-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sim.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index a086087ada177..b6c230fab8404 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -1028,20 +1028,23 @@ gpio_sim_device_lockup_configfs(struct gpio_sim_device *dev, bool lock) struct configfs_subsystem *subsys = dev->group.cg_subsys; struct gpio_sim_bank *bank; struct gpio_sim_line *line; + struct config_item *item; /* - * The device only needs to depend on leaf line entries. This is + * The device only needs to depend on leaf entries. This is * sufficient to lock up all the configfs entries that the * instantiated, alive device depends on. */ list_for_each_entry(bank, &dev->bank_list, siblings) { list_for_each_entry(line, &bank->line_list, siblings) { + item = line->hog ? &line->hog->item + : &line->group.cg_item; + if (lock) - WARN_ON(configfs_depend_item_unlocked( - subsys, &line->group.cg_item)); + WARN_ON(configfs_depend_item_unlocked(subsys, + item)); else - configfs_undepend_item_unlocked( - &line->group.cg_item); + configfs_undepend_item_unlocked(item); } } } -- GitLab From 5393f40a640b8c4f716bf87e7b0d4328bf1f22b2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Feb 2025 14:05:03 +0100 Subject: [PATCH 0273/1585] gpio: GPIO_GRGPIO should depend on OF While the Aeroflex Gaisler GRGPIO driver has no build-time dependency on gpiolib-of, it supports only DT-based configuration, and is used only on DT systems. Hence add a dependency on OF, to prevent asking the user about this driver when configuring a kernel without DT support. Fixes: bc40668def384256 ("gpio: grgpio: drop Kconfig dependency on OF_GPIO") Signed-off-by: Geert Uytterhoeven Reviewed-by: Andreas Larsson Link: https://lore.kernel.org/r/db6da3d11bf850d89f199e5c740d8f133e38078d.1738760539.git.geert+renesas@glider.be Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index add5ad29a673c..98b4d1633b258 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -338,6 +338,7 @@ config GPIO_GRANITERAPIDS config GPIO_GRGPIO tristate "Aeroflex Gaisler GRGPIO support" + depends on OF || COMPILE_TEST select GPIO_GENERIC select IRQ_DOMAIN help -- GitLab From 59ff2040f0a58923c787fdba5999100667338230 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 4 Feb 2025 13:45:15 +0200 Subject: [PATCH 0274/1585] MAINTAINERS: Use my kernel.org address for ACPI GPIO work Switch to use my kernel.org address for ACPI GPIO work. Signed-off-by: Mika Westerberg Link: https://lore.kernel.org/r/20250204114515.3971923-1-mika.westerberg@linux.intel.com Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa0654..d1389ca6699df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9878,7 +9878,7 @@ S: Maintained F: drivers/staging/gpib/ GPIO ACPI SUPPORT -M: Mika Westerberg +M: Mika Westerberg M: Andy Shevchenko L: linux-gpio@vger.kernel.org L: linux-acpi@vger.kernel.org -- GitLab From d8d99c3b5c485f339864aeaa29f76269cc0ea975 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 5 Feb 2025 15:52:31 +0200 Subject: [PATCH 0275/1585] ASoC: SOF: stream-ipc: Check for cstream nullity in sof_ipc_msg_data() The nullity of sps->cstream should be checked similarly as it is done in sof_set_stream_data_offset() function. Assuming that it is not NULL if sps->stream is NULL is incorrect and can lead to NULL pointer dereference. Fixes: 090349a9feba ("ASoC: SOF: Add support for compress API for stream data/offset") Cc: stable@vger.kernel.org Reported-by: Curtis Malainey Closes: https://github.com/thesofproject/linux/pull/5214 Signed-off-by: Peter Ujfalusi Reviewed-by: Daniel Baluta Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Curtis Malainey Link: https://patch.msgid.link/20250205135232.19762-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/stream-ipc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/stream-ipc.c b/sound/soc/sof/stream-ipc.c index 794c7bbccbaf9..8262443ac89ad 100644 --- a/sound/soc/sof/stream-ipc.c +++ b/sound/soc/sof/stream-ipc.c @@ -43,7 +43,7 @@ int sof_ipc_msg_data(struct snd_sof_dev *sdev, return -ESTRPIPE; posn_offset = stream->posn_offset; - } else { + } else if (sps->cstream) { struct sof_compr_stream *sstream = sps->cstream->runtime->private_data; @@ -51,6 +51,10 @@ int sof_ipc_msg_data(struct snd_sof_dev *sdev, return -ESTRPIPE; posn_offset = sstream->posn_offset; + + } else { + dev_err(sdev->dev, "%s: No stream opened\n", __func__); + return -EINVAL; } snd_sof_dsp_mailbox_read(sdev, posn_offset, p, sz); -- GitLab From 46c7b901e2a03536df5a3cb40b3b26e2be505df6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 5 Feb 2025 15:52:32 +0200 Subject: [PATCH 0276/1585] ASoC: SOF: pcm: Clear the susbstream pointer to NULL on close The spcm->stream[substream->stream].substream is set during open and was left untouched. After the first PCM stream it will never be NULL and we have code which checks for substream NULLity as indication if the stream is active or not. For the compressed cstream pointer the same has been done, this change will correct the handling of PCM streams. Fixes: 090349a9feba ("ASoC: SOF: Add support for compress API for stream data/offset") Cc: stable@vger.kernel.org Reported-by: Curtis Malainey Closes: https://github.com/thesofproject/linux/pull/5214 Signed-off-by: Peter Ujfalusi Reviewed-by: Daniel Baluta Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Curtis Malainey Link: https://patch.msgid.link/20250205135232.19762-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c index 35a7462d8b693..c5c6353f18cee 100644 --- a/sound/soc/sof/pcm.c +++ b/sound/soc/sof/pcm.c @@ -511,6 +511,8 @@ static int sof_pcm_close(struct snd_soc_component *component, */ } + spcm->stream[substream->stream].substream = NULL; + return 0; } -- GitLab From 738fc998b639407346a9e026514f0562301462cd Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 31 Jan 2025 15:55:28 -0700 Subject: [PATCH 0277/1585] scripts/Makefile.extrawarn: Do not show clang's non-kprintf warnings at W=1 Clang's -Wformat-overflow and -Wformat-truncation have chosen to check '%p' unlike GCC but it does not know about the kernel's pointer extensions in lib/vsprintf.c, so the developers split that part of the warning out for the kernel to disable because there will always be false positives. Commit 908dd508276d ("kbuild: enable -Wformat-truncation on clang") did disabled these warnings but only in a block that would be called when W=1 was not passed, so they would appear with W=1. Move the disabling of the non-kprintf warnings to a block that always runs so that they are never seen, regardless of warning level. Fixes: 908dd508276d ("kbuild: enable -Wformat-truncation on clang") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501291646.VtwF98qd-lkp@intel.com/ Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/Makefile.extrawarn | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index eb719f6d8d536..e976790f84dc6 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -31,6 +31,11 @@ KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds ifdef CONFIG_CC_IS_CLANG # The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable. KBUILD_CFLAGS += -Wno-gnu + +# Clang checks for overflow/truncation with '%p', while GCC does not: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 +KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf) +KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf) else # gcc inanely warns about local variables called 'main' @@ -105,11 +110,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) -else -# Clang checks for overflow/truncation with '%p', while GCC does not: -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 -KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf) -KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf) endif KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) -- GitLab From 4c56eb33e603c3b9eb4bd24efbfdd0283c1c37e4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 2 Feb 2025 03:51:41 +0900 Subject: [PATCH 0278/1585] kbuild: keep symbols for symbol_get() even with CONFIG_TRIM_UNUSED_KSYMS Linus observed that the symbol_request(utf8_data_table) call fails when CONFIG_UNICODE=y and CONFIG_TRIM_UNUSED_KSYMS=y. symbol_get() relies on the symbol data being present in the ksymtab for symbol lookups. However, EXPORT_SYMBOL_GPL(utf8_data_table) is dropped due to CONFIG_TRIM_UNUSED_KSYMS, as no module references it in this case. Probably, this has been broken since commit dbacb0ef670d ("kconfig option for TRIM_UNUSED_KSYMS"). This commit addresses the issue by leveraging modpost. Symbol names passed to symbol_get() are recorded in the special .no_trim_symbol section, which is then parsed by modpost to forcibly keep such symbols. The .no_trim_symbol section is discarded by the linker scripts, so there is no impact on the size of the final vmlinux or modules. This commit cannot resolve the issue for direct calls to __symbol_get() because the symbol name is not known at compile-time. Although symbol_get() may eventually be deprecated, this workaround should be good enough meanwhile. Reported-by: Linus Torvalds Suggested-by: Linus Torvalds Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 1 + include/linux/module.h | 5 ++++- scripts/mod/modpost.c | 35 +++++++++++++++++++++++++++++++ scripts/mod/modpost.h | 6 ++++++ scripts/module.lds.S | 1 + 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 54504013c7491..02a4adb4a9999 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1038,6 +1038,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) *(.discard) \ *(.discard.*) \ *(.export_symbol) \ + *(.no_trim_symbol) \ *(.modinfo) \ /* ld.bfd warns about .gnu.version* even when not emitted */ \ *(.gnu.version*) \ diff --git a/include/linux/module.h b/include/linux/module.h index 23792d5d7b74b..30e5b19bafa98 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -306,7 +306,10 @@ extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x)))) +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) /* modules using other modules: kdb wants to see this. */ struct module_use { diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e18ae7dc8140a..36b28987a2f07 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -507,6 +507,9 @@ static int parse_elf(struct elf_info *info, const char *filename) info->modinfo_len = sechdrs[i].sh_size; } else if (!strcmp(secname, ".export_symbol")) { info->export_symbol_secndx = i; + } else if (!strcmp(secname, ".no_trim_symbol")) { + info->no_trim_symbol = (void *)hdr + sechdrs[i].sh_offset; + info->no_trim_symbol_len = sechdrs[i].sh_size; } if (sechdrs[i].sh_type == SHT_SYMTAB) { @@ -1566,6 +1569,14 @@ static void read_symbols(const char *modname) /* strip trailing .o */ mod = new_module(modname, strlen(modname) - strlen(".o")); + /* save .no_trim_symbol section for later use */ + if (info.no_trim_symbol_len) { + mod->no_trim_symbol = xmalloc(info.no_trim_symbol_len); + memcpy(mod->no_trim_symbol, info.no_trim_symbol, + info.no_trim_symbol_len); + mod->no_trim_symbol_len = info.no_trim_symbol_len; + } + if (!mod->is_vmlinux) { license = get_modinfo(&info, "license"); if (!license) @@ -1728,6 +1739,28 @@ static void handle_white_list_exports(const char *white_list) free(buf); } +/* + * Keep symbols recorded in the .no_trim_symbol section. This is necessary to + * prevent CONFIG_TRIM_UNUSED_KSYMS from dropping EXPORT_SYMBOL because + * symbol_get() relies on the symbol being present in the ksymtab for lookups. + */ +static void keep_no_trim_symbols(struct module *mod) +{ + unsigned long size = mod->no_trim_symbol_len; + + for (char *s = mod->no_trim_symbol; s; s = next_string(s , &size)) { + struct symbol *sym; + + /* + * If find_symbol() returns NULL, this symbol is not provided + * by any module, and symbol_get() will fail. + */ + sym = find_symbol(s); + if (sym) + sym->used = true; + } +} + static void check_modname_len(struct module *mod) { const char *mod_name; @@ -2254,6 +2287,8 @@ int main(int argc, char **argv) read_symbols_from_files(files_source); list_for_each_entry(mod, &modules, list) { + keep_no_trim_symbols(mod); + if (mod->dump_file || mod->is_vmlinux) continue; diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index ffd0a52a606ef..59366f456b765 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -111,6 +111,8 @@ struct module_alias { * * @dump_file: path to the .symvers file if loaded from a file * @aliases: list head for module_aliases + * @no_trim_symbol: .no_trim_symbol section data + * @no_trim_symbol_len: length of the .no_trim_symbol section */ struct module { struct list_head list; @@ -128,6 +130,8 @@ struct module { // Actual imported namespaces struct list_head imported_namespaces; struct list_head aliases; + char *no_trim_symbol; + unsigned int no_trim_symbol_len; char name[]; }; @@ -141,6 +145,8 @@ struct elf_info { char *strtab; char *modinfo; unsigned int modinfo_len; + char *no_trim_symbol; + unsigned int no_trim_symbol_len; /* support for 32bit section numbers */ diff --git a/scripts/module.lds.S b/scripts/module.lds.S index c2f80f9141d40..450f1088d5fd3 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -16,6 +16,7 @@ SECTIONS { *(.discard) *(.discard.*) *(.export_symbol) + *(.no_trim_symbol) } __ksymtab 0 : ALIGN(8) { *(SORT(___ksymtab+*)) } -- GitLab From 98a5cfd2320966f40fe049a9855f8787f0126825 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 5 Feb 2025 09:43:31 +0100 Subject: [PATCH 0279/1585] x86/xen: fix xen_hypercall_hvm() to not clobber %rbx xen_hypercall_hvm(), which is used when running as a Xen PVH guest at most only once during early boot, is clobbering %rbx. Depending on whether the caller relies on %rbx to be preserved across the call or not, this clobbering might result in an early crash of the system. This can be avoided by using an already saved register instead of %rbx. Fixes: b4845bb63838 ("x86/xen: add central hypercall functions") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Andrew Cooper Signed-off-by: Juergen Gross --- arch/x86/xen/xen-head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 9252652afe596..72f28d66e0e52 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -117,8 +117,8 @@ SYM_FUNC_START(xen_hypercall_hvm) pop %ebx pop %eax #else - lea xen_hypercall_amd(%rip), %rbx - cmp %rax, %rbx + lea xen_hypercall_amd(%rip), %rcx + cmp %rax, %rcx #ifdef CONFIG_FRAME_POINTER pop %rax /* Dummy pop. */ #endif -- GitLab From 0bd797b801bd8ee06c822844e20d73aaea0878dd Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 5 Feb 2025 10:07:56 +0100 Subject: [PATCH 0280/1585] x86/xen: add FRAME_END to xen_hypercall_hvm() xen_hypercall_hvm() is missing a FRAME_END at the end, add it. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502030848.HTNTTuo9-lkp@intel.com/ Fixes: b4845bb63838 ("x86/xen: add central hypercall functions") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Andrew Cooper Signed-off-by: Juergen Gross --- arch/x86/xen/xen-head.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 72f28d66e0e52..4e481b0eefc96 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -132,6 +132,7 @@ SYM_FUNC_START(xen_hypercall_hvm) pop %rcx pop %rax #endif + FRAME_END /* Use correct hypercall function. */ jz xen_hypercall_amd jmp xen_hypercall_intel -- GitLab From aaf5eefd374b6e006e1c224a2b37bac9d3737aa2 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 5 Feb 2025 11:24:47 +0100 Subject: [PATCH 0281/1585] x86/xen: remove unneeded dummy push from xen_hypercall_hvm() Stack alignment of the kernel in 64-bit mode is 8, not 16, so the dummy push in xen_hypercall_hvm() for aligning the stack to 16 bytes can be removed. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Andrew Cooper Signed-off-by: Juergen Gross --- arch/x86/xen/xen-head.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 4e481b0eefc96..894edf8d6d62f 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -100,9 +100,6 @@ SYM_FUNC_START(xen_hypercall_hvm) push %r10 push %r9 push %r8 -#ifdef CONFIG_FRAME_POINTER - pushq $0 /* Dummy push for stack alignment. */ -#endif #endif /* Set the vendor specific function. */ call __xen_hypercall_setfunc @@ -119,9 +116,6 @@ SYM_FUNC_START(xen_hypercall_hvm) #else lea xen_hypercall_amd(%rip), %rcx cmp %rax, %rcx -#ifdef CONFIG_FRAME_POINTER - pop %rax /* Dummy pop. */ -#endif pop %r8 pop %r9 pop %r10 -- GitLab From d364eee14c682b141f4667efc3c65191339d88bd Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:12 +0000 Subject: [PATCH 0282/1585] cpufreq/amd-pstate: Remove the goto label in amd_pstate_update_limits Scope based guard/cleanup macros should not be used together with goto labels. Hence, remove the goto label. Fixes: 6c093d5a5b73 ("cpufreq/amd-pstate: convert mutex use to guard()") Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250205112523.201101-2-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 7120f035c0be4..b163c16998218 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -838,8 +838,10 @@ static void amd_pstate_update_limits(unsigned int cpu) guard(mutex)(&amd_pstate_driver_lock); ret = amd_get_highest_perf(cpu, &cur_high); - if (ret) - goto free_cpufreq_put; + if (ret) { + cpufreq_cpu_put(policy); + return; + } prev_high = READ_ONCE(cpudata->prefcore_ranking); highest_perf_changed = (prev_high != cur_high); @@ -849,8 +851,6 @@ static void amd_pstate_update_limits(unsigned int cpu) if (cur_high < CPPC_MAX_PERF) sched_set_itmt_core_prio((int)cur_high, cpu); } - -free_cpufreq_put: cpufreq_cpu_put(policy); if (!highest_perf_changed) -- GitLab From 55db9b73c3a77544efc671d5e796d9674772c330 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:13 +0000 Subject: [PATCH 0283/1585] cpufreq/amd-pstate: Fix max_perf updation with schedutil In adjust_perf() callback, we are setting the max_perf to highest_perf, as opposed to the correct limit value i.e. max_limit_perf. Fix that. Fixes: 3f7b835fa4d0 ("cpufreq/amd-pstate: Move limit updating code") Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250205112523.201101-3-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index b163c16998218..9dc3933bc3261 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -699,7 +699,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (min_perf < lowest_nonlinear_perf) min_perf = lowest_nonlinear_perf; - max_perf = cap_perf; + max_perf = cpudata->max_limit_perf; if (max_perf < min_perf) max_perf = min_perf; -- GitLab From 038e33fcd40e59b60cdca561c2a39998e6759e08 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Thu, 9 Jan 2025 16:41:49 +0100 Subject: [PATCH 0284/1585] dt-bindings: display: Add powertip,{st7272|hx8238a} as DT Schema description This patch provides the DT Schema description of: - powertip,st7272 320 x 240 LCD display - powertip,hx8238a 320 x 240 LCD display Used with the different HW revisions of btt3 devices. Signed-off-by: Lukasz Majewski Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20250109154149.1212631-1-lukma@denx.de Signed-off-by: Rob Herring (Arm) --- .../display/panel/powertip,hx8238a.yaml | 29 +++++++++++++++++++ .../display/panel/powertip,st7272.yaml | 29 +++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/powertip,hx8238a.yaml create mode 100644 Documentation/devicetree/bindings/display/panel/powertip,st7272.yaml diff --git a/Documentation/devicetree/bindings/display/panel/powertip,hx8238a.yaml b/Documentation/devicetree/bindings/display/panel/powertip,hx8238a.yaml new file mode 100644 index 0000000000000..b7d74faeb5d50 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/powertip,hx8238a.yaml @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/powertip,hx8238a.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Powertip Electronic Technology Co. 320 x 240 LCD panel + +maintainers: + - Lukasz Majewski + +allOf: + - $ref: panel-dpi.yaml# + +properties: + compatible: + items: + - const: powertip,hx8238a + - {} # panel-dpi, but not listed here to avoid false select + + height-mm: true + panel-timing: true + port: true + power-supply: true + width-mm: true + +additionalProperties: false + +... diff --git a/Documentation/devicetree/bindings/display/panel/powertip,st7272.yaml b/Documentation/devicetree/bindings/display/panel/powertip,st7272.yaml new file mode 100644 index 0000000000000..f3622800f13f6 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/powertip,st7272.yaml @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/powertip,st7272.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Powertip Electronic Technology Co. 320 x 240 LCD panel + +maintainers: + - Lukasz Majewski + +allOf: + - $ref: panel-dpi.yaml# + +properties: + compatible: + items: + - const: powertip,st7272 + - {} # panel-dpi, but not listed here to avoid false select + + height-mm: true + panel-timing: true + port: true + power-supply: true + width-mm: true + +additionalProperties: false + +... -- GitLab From 069504f1fcfa1532e4e221290df428b15bd9d284 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 4 Feb 2025 17:49:25 +0200 Subject: [PATCH 0285/1585] drm/i915/dp: Fix potential infinite loop in 128b/132b SST Passing 0 as the step only works when there are other reasons to break out of the BPP loop in intel_dp_mtp_tu_compute_config(). Otherwise, an infinite loop might occur. Fix it by explicitly checking for 0 step. Fixes: ef0a0757bbea ("drm/i915/dp: compute config for 128b/132b SST w/o DSC") Reported-by: Imre Deak Closes: https://lore.kernel.org/r/Z6I0knh2Kt5T0JrT@ideak-desk.fi.intel.com Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20250204154925.3001781-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit a40e718d34d3d02c781c295466b013415f68c4f1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 0c44fc7dd86ce..a65cf97ad12df 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -341,6 +341,10 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, break; } + + /* Allow using zero step to indicate one try */ + if (!step) + break; } if (slots < 0) { -- GitLab From 90508a1bb8f00618fa12cb2ad2276bc783656fc5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 3 Feb 2025 16:24:15 +0530 Subject: [PATCH 0286/1585] cpufreq: airoha: modify CONFIG_OF dependency Compile-testing without CONFIG_OF leads to a harmless build warning: drivers/cpufreq/airoha-cpufreq.c:109:34: error: 'airoha_cpufreq_match_list' defined but not used [-Werror=unused-const-variable=] 109 | static const struct of_device_id airoha_cpufreq_match_list[] __initconst = { | ^~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to mark the variable as __maybe_unused to shut up that warning, but a Kconfig dependency seems more appropriate as this still allows build testing in allmodconfig and randconfig builds on all architectures. An earlier commit, b865a8404642 ("cpufreq: airoha: Depends on OF"), tried to fix it incorrectly. ARCH_AIROHA already requires CONFIG_OF, so this change does nothing, and the dependency is still missing for the COMPILE_TEST case. Fix it properly. Fixes: 84cf9e541ccc ("cpufreq: airoha: Add EN7581 CPUFreq SMCCC driver") Fixes: b865a8404642 ("cpufreq: airoha: Depends on OF") Signed-off-by: Arnd Bergmann [ Viresh: updated commit log and fixed rebase conflict ] Signed-off-by: Viresh Kumar Link: https://patch.msgid.link/9d51d2710061dfa7f2568287c6ed125b858b7318.1738580005.git.viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 0ee5c691fb36b..9e46960f6a862 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -17,7 +17,8 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM config ARM_AIROHA_SOC_CPUFREQ tristate "Airoha EN7581 SoC CPUFreq support" - depends on (ARCH_AIROHA && OF) || COMPILE_TEST + depends on ARCH_AIROHA || COMPILE_TEST + depends on OF select PM_OPP default ARCH_AIROHA help -- GitLab From 0813fd2e14ca6ecd4e6ba005a9766f08e26020d7 Mon Sep 17 00:00:00 2001 From: Aboorva Devarajan Date: Wed, 5 Feb 2025 23:43:47 +0530 Subject: [PATCH 0287/1585] cpufreq: prevent NULL dereference in cpufreq_online() Ensure cpufreq_driver->set_boost is non-NULL before using it in cpufreq_online() to prevent a potential NULL pointer dereference. Reported-by: Gautam Menghani Closes: https://lore.kernel.org/all/c9e56c5f54cc33338762c94e9bed7b5a0d5de812.camel@linux.ibm.com/ Fixes: dd016f379ebc ("cpufreq: Introduce a more generic way to set default per-policy boost flag") Suggested-by: Viresh Kumar Signed-off-by: Aboorva Devarajan Link: https://patch.msgid.link/20250205181347.2079272-1-aboorvad@linux.ibm.com [ rjw: Minor edits in the subject and changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e0048856eceee..30ffbddc7ecec 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1571,7 +1571,8 @@ static int cpufreq_online(unsigned int cpu) policy->cdev = of_cpufreq_cooling_register(policy); /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (policy->boost_enabled != cpufreq_boost_enabled()) { + if (cpufreq_driver->set_boost && + policy->boost_enabled != cpufreq_boost_enabled()) { policy->boost_enabled = cpufreq_boost_enabled(); ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); if (ret) { -- GitLab From ab930483eca9f3e816c35824b5868599af0c61d7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Feb 2025 21:46:29 +0200 Subject: [PATCH 0288/1585] ACPI: property: Fix return value for nval == 0 in acpi_data_prop_read() While analysing code for software and OF node for the corner case when caller asks to read zero items in the supposed to be an array of values I found that ACPI behaves differently to what OF does, i.e. 1. It returns -EINVAL when caller asks to read zero items from integer array, while OF returns 0, if no other errors happened. 2. It returns -EINVAL when caller asks to read zero items from string array, while OF returns -ENODATA, if no other errors happened. Amend ACPI implementation to follow what OF does. Fixes: b31384fa5de3 ("Driver core: Unified device properties interface for platform firmware") Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20250203194629.3731895-1-andriy.shevchenko@linux.intel.com [ rjw: Added empty line after a conditional ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 98d93ed583150..436019d96027b 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1187,8 +1187,6 @@ static int acpi_data_prop_read(const struct acpi_device_data *data, } break; } - if (nval == 0) - return -EINVAL; if (obj->type == ACPI_TYPE_BUFFER) { if (proptype != DEV_PROP_U8) @@ -1212,9 +1210,11 @@ static int acpi_data_prop_read(const struct acpi_device_data *data, ret = acpi_copy_property_array_uint(items, (u64 *)val, nval); break; case DEV_PROP_STRING: - ret = acpi_copy_property_array_string( - items, (char **)val, - min_t(u32, nval, obj->package.count)); + nval = min_t(u32, nval, obj->package.count); + if (nval == 0) + return -ENODATA; + + ret = acpi_copy_property_array_string(items, (char **)val, nval); break; default: ret = -EINVAL; -- GitLab From 607ab6f85f4194b644ea95ac5fe660ef575db3b4 Mon Sep 17 00:00:00 2001 From: Gannon Kolding Date: Mon, 27 Jan 2025 02:39:02 -0700 Subject: [PATCH 0289/1585] ACPI: resource: IRQ override for Eluktronics MECH-17 The Eluktronics MECH-17 (GM7RG7N) needs IRQ overriding for the keyboard to work. Adding a DMI_MATCH entry for this laptop model makes the internal keyboard function normally. Signed-off-by: Gannon Kolding Link: https://patch.msgid.link/20250127093902.328361-1-gannon.kolding@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 90aaec923889c..b4cd14e7fa76c 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -563,6 +563,12 @@ static const struct dmi_system_id irq1_edge_low_force_override[] = { DMI_MATCH(DMI_BOARD_NAME, "RP-15"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Eluktronics Inc."), + DMI_MATCH(DMI_BOARD_NAME, "MECH-17"), + }, + }, { /* TongFang GM6XGxX/TUXEDO Stellaris 16 Gen5 AMD */ .matches = { -- GitLab From 7f5704b6a143b8eca640cba820968e798d065e91 Mon Sep 17 00:00:00 2001 From: Aubrey Li Date: Sun, 26 Jan 2025 10:22:50 +0800 Subject: [PATCH 0290/1585] ACPI: PRM: Remove unnecessary strict handler address checks Commit 088984c8d54c ("ACPI: PRM: Find EFI_MEMORY_RUNTIME block for PRM handler and context") added unnecessary strict handler address checks, causing the PRM module to fail in translating memory error addresses. Both static data buffer address and ACPI parameter buffer address may be NULL if they are not needed, as described in section 4.1.2 PRM Handler Information Structure of Platform Runtime Mechanism specification [1]. Here are two examples from real hardware: ----PRMT.dsl---- - staic data address is not used [10Ch 0268 2] Revision : 0000 [10Eh 0270 2] Length : 002C [110h 0272 16] Handler GUID : F6A58D47-E04F-4F5A-86B8-2A50D4AA109B [120h 0288 8] Handler address : 0000000065CE51F4 [128h 0296 8] Satic Data Address : 0000000000000000 [130h 0304 8] ACPI Parameter Address : 000000006522A718 - ACPI parameter address is not used [1B0h 0432 2] Revision : 0000 [1B2h 0434 2] Length : 002C [1B4h 0436 16] Handler GUID : 657E8AE6-A8FC-4877-BB28-42E7DE1899A5 [1C4h 0452 8] Handler address : 0000000065C567C8 [1CCh 0460 8] Satic Data Address : 000000006113FB98 [1D4h 0468 8] ACPI Parameter Address : 0000000000000000 Fixes: 088984c8d54c ("ACPI: PRM: Find EFI_MEMORY_RUNTIME block for PRM handler and context") Reported-and-tested-by: Shi Liu Cc: All applicable Signed-off-by: Aubrey Li Link: https://uefi.org/sites/default/files/resources/Platform%20Runtime%20Mechanism%20-%20with%20legal%20notice.pdf # [1] Reviewed-by: Koba Ko Acked-by: Ard Biesheuvel Link: https://patch.msgid.link/20250126022250.3014210-1-aubrey.li@linux.intel.com [ rjw: Minor changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/prmt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 747f83f7114d2..e549914a636c6 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -287,9 +287,7 @@ static acpi_status acpi_platformrt_space_handler(u32 function, if (!handler || !module) goto invalid_guid; - if (!handler->handler_addr || - !handler->static_data_buffer_addr || - !handler->acpi_param_buffer_addr) { + if (!handler->handler_addr) { buffer->prm_status = PRM_HANDLER_ERROR; return AE_OK; } -- GitLab From b1749432a52d3605151634b000fec0361ad45067 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sat, 1 Feb 2025 12:40:38 -0500 Subject: [PATCH 0291/1585] rust: kbuild: use host dylib naming in rusttestlib-kernel There seems to have been merge skew between commit b2c261fa8629 ("rust: kbuild: expand rusttest target for macros") and commit 0730422bced5 ("rust: use host dylib naming convention to support macOS") ; the latter replaced `libmacros.so` with `$(libmacros_name)` and the former added an instance of `libmacros.so`. The former was not yet applied when the latter was sent, resulting in a stray `libmacros.so`. Replace the stray with `$(libmacros_name)` to allow `rusttest` to build on macOS. Fixes: 0730422bced5 ("rust: use host dylib naming convention to support macOS") Signed-off-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250201-fix-mac-build-again-v1-1-ca665f5d7de7@gmail.com [ Slightly reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index 8fcfd60447bc8..ff4343ca3f7c4 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -144,7 +144,7 @@ rusttestlib-kernel: private rustc_target_flags = --extern ffi \ --extern bindings --extern uapi rusttestlib-kernel: $(src)/kernel/lib.rs \ rusttestlib-bindings rusttestlib-uapi rusttestlib-build_error \ - $(obj)/libmacros.so $(obj)/bindings.o FORCE + $(obj)/$(libmacros_name) $(obj)/bindings.o FORCE +$(call if_changed,rustc_test_library) rusttestlib-bindings: private rustc_target_flags = --extern ffi -- GitLab From c21bdb3d8a850afdfa4afe77eea39ae9533629b0 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 21 Jan 2025 21:09:34 +0100 Subject: [PATCH 0292/1585] rust: init: use explicit ABI to clean warning in future compilers Starting with Rust 1.86.0 (currently in nightly, to be released on 2025-04-03), the `missing_abi` lint is warn-by-default [1]: error: extern declarations without an explicit ABI are deprecated --> rust/doctests_kernel_generated.rs:3158:1 | 3158 | extern { | ^^^^^^ help: explicitly specify the C ABI: `extern "C"` | = note: `-D missing-abi` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(missing_abi)]` Thus clean it up. Cc: # Needed in 6.12.y and 6.13.y only (Rust is pinned in older LTSs). Fixes: 7f8977a7fe6d ("rust: init: add `{pin_}chain` functions to `{Pin}Init`") Link: https://github.com/rust-lang/rust/pull/132397 [1] Reviewed-by: Gary Guo Reviewed-by: Alice Ryhl Reviewed-by: Fiona Behrens Link: https://lore.kernel.org/r/20250121200934.222075-1-ojeda@kernel.org [ Added 6.13.y to Cc: stable tag. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/init.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 3f9236c1c9d56..7fd1ea8265a55 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -870,7 +870,7 @@ pub unsafe trait PinInit: Sized { /// use kernel::{types::Opaque, init::pin_init_from_closure}; /// #[repr(C)] /// struct RawFoo([u8; 16]); - /// extern { + /// extern "C" { /// fn init_foo(_: *mut RawFoo); /// } /// -- GitLab From 5368a67307b3b2c347dc8965ac55b888be665934 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 4 Feb 2025 23:19:53 +0100 Subject: [PATCH 0293/1585] selftests: mptcp: connect: -f: no reconnect The '-f' parameter is there to force the kernel to emit MPTCP FASTCLOSE by closing the connection with unread bytes in the receive queue. The xdisconnect() helper was used to stop the connection, but it does more than that: it will shut it down, then wait before reconnecting to the same address. This causes the mptcp_join's "fastclose test" to fail all the time. This failure is due to a recent change, with commit 218cc166321f ("selftests: mptcp: avoid spurious errors on disconnect"), but that went unnoticed because the test is currently ignored. The recent modification only shown an existing issue: xdisconnect() doesn't need to be used here, only the shutdown() part is needed. Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250204-net-mptcp-sft-conn-f-v1-1-6b470c72fffa@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 414addef9a451..d240d02fa443a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1302,7 +1302,7 @@ int main_loop(void) return ret; if (cfg_truncate > 0) { - xdisconnect(fd); + shutdown(fd, SHUT_WR); } else if (--cfg_repeat > 0) { xdisconnect(fd); -- GitLab From 647cef20e649c576dff271e018d5d15d998b629d Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 3 Feb 2025 16:58:38 -0800 Subject: [PATCH 0294/1585] pfifo_tail_enqueue: Drop new packet when sch->limit == 0 Expected behaviour: In case we reach scheduler's limit, pfifo_tail_enqueue() will drop a packet in scheduler's queue and decrease scheduler's qlen by one. Then, pfifo_tail_enqueue() enqueue new packet and increase scheduler's qlen by one. Finally, pfifo_tail_enqueue() return `NET_XMIT_CN` status code. Weird behaviour: In case we set `sch->limit == 0` and trigger pfifo_tail_enqueue() on a scheduler that has no packet, the 'drop a packet' step will do nothing. This means the scheduler's qlen still has value equal 0. Then, we continue to enqueue new packet and increase scheduler's qlen by one. In summary, we can leverage pfifo_tail_enqueue() to increase qlen by one and return `NET_XMIT_CN` status code. The problem is: Let's say we have two qdiscs: Qdisc_A and Qdisc_B. - Qdisc_A's type must have '->graft()' function to create parent/child relationship. Let's say Qdisc_A's type is `hfsc`. Enqueue packet to this qdisc will trigger `hfsc_enqueue`. - Qdisc_B's type is pfifo_head_drop. Enqueue packet to this qdisc will trigger `pfifo_tail_enqueue`. - Qdisc_B is configured to have `sch->limit == 0`. - Qdisc_A is configured to route the enqueued's packet to Qdisc_B. Enqueue packet through Qdisc_A will lead to: - hfsc_enqueue(Qdisc_A) -> pfifo_tail_enqueue(Qdisc_B) - Qdisc_B->q.qlen += 1 - pfifo_tail_enqueue() return `NET_XMIT_CN` - hfsc_enqueue() check for `NET_XMIT_SUCCESS` and see `NET_XMIT_CN` => hfsc_enqueue() don't increase qlen of Qdisc_A. The whole process lead to a situation where Qdisc_A->q.qlen == 0 and Qdisc_B->q.qlen == 1. Replace 'hfsc' with other type (for example: 'drr') still lead to the same problem. This violate the design where parent's qlen should equal to the sum of its childrens'qlen. Bug impact: This issue can be used for user->kernel privilege escalation when it is reachable. Fixes: 57dbb2d83d10 ("sched: add head drop fifo queue") Reported-by: Quang Le Signed-off-by: Quang Le Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250204005841.223511-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fifo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b50b2c2cc09bc..e6bfd39ff3396 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; + if (unlikely(READ_ONCE(sch->limit) == 0)) + return qdisc_drop(skb, sch, to_free); + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); -- GitLab From 3fe5648d1df1798ce14b5464b2ea49f10cd9db31 Mon Sep 17 00:00:00 2001 From: Quang Le Date: Mon, 3 Feb 2025 16:58:39 -0800 Subject: [PATCH 0295/1585] selftests/tc-testing: Add a test case for pfifo_head_drop qdisc when limit==0 When limit == 0, pfifo_tail_enqueue() must drop new packet and increase dropped packets count of the qdisc. All test results: 1..16 ok 1 a519 - Add bfifo qdisc with system default parameters on egress ok 2 585c - Add pfifo qdisc with system default parameters on egress ok 3 a86e - Add bfifo qdisc with system default parameters on egress with handle of maximum value ok 4 9ac8 - Add bfifo qdisc on egress with queue size of 3000 bytes ok 5 f4e6 - Add pfifo qdisc on egress with queue size of 3000 packets ok 6 b1b1 - Add bfifo qdisc with system default parameters on egress with invalid handle exceeding maximum value ok 7 8d5e - Add bfifo qdisc on egress with unsupported argument ok 8 7787 - Add pfifo qdisc on egress with unsupported argument ok 9 c4b6 - Replace bfifo qdisc on egress with new queue size ok 10 3df6 - Replace pfifo qdisc on egress with new queue size ok 11 7a67 - Add bfifo qdisc on egress with queue size in invalid format ok 12 1298 - Add duplicate bfifo qdisc on egress ok 13 45a0 - Delete nonexistent bfifo qdisc ok 14 972b - Add prio qdisc on egress with invalid format for handles ok 15 4d39 - Delete bfifo qdisc twice ok 16 d774 - Check pfifo_head_drop qdisc enqueue behaviour when limit == 0 Signed-off-by: Quang Le Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250204005841.223511-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/qdiscs/fifo.json | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json index ae3d286a32b2e..6f20d033670d4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json @@ -313,6 +313,29 @@ "matchPattern": "qdisc bfifo 1: root", "matchCount": "0", "teardown": [ + ] + }, + { + "id": "d774", + "name": "Check pfifo_head_drop qdisc enqueue behaviour when limit == 0", + "category": [ + "qdisc", + "pfifo_head_drop" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: pfifo_head_drop limit 0", + "$IP link set dev $DUMMY up || true" + ], + "cmdUnderTest": "ping -c2 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "dropped 2", + "matchCount": "1", + "teardown": [ ] } ] -- GitLab From 638ba5089324796c2ee49af10427459c2de35f71 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 3 Feb 2025 16:58:40 -0800 Subject: [PATCH 0296/1585] netem: Update sch->q.qlen before qdisc_tree_reduce_backlog() qdisc_tree_reduce_backlog() notifies parent qdisc only if child qdisc becomes empty, therefore we need to reduce the backlog of the child qdisc before calling it. Otherwise it would miss the opportunity to call cops->qlen_notify(), in the case of DRR, it resulted in UAF since DRR uses ->qlen_notify() to maintain its active list. Fixes: f8d4bc455047 ("net/sched: netem: account for backlog updates from child qdisc") Cc: Martin Ottens Reported-by: Mingi Cho Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250204005841.223511-4-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 71ec9986ed37f..fdd79d3ccd8ce 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -749,9 +749,9 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, pkt_len); sch->qstats.backlog -= pkt_len; sch->q.qlen--; + qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } -- GitLab From 91aadc16ee73cf958be6b0896da3caea49b7f414 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 3 Feb 2025 16:58:41 -0800 Subject: [PATCH 0297/1585] selftests/tc-testing: Add a test case for qdisc_tree_reduce_backlog() Integrate the test case provided by Mingi Cho into TDC. All test results: 1..4 ok 1 ca5e - Check class delete notification for ffff: ok 2 e4b7 - Check class delete notification for root ffff: ok 3 33a9 - Check ingress is not searchable on backlog update ok 4 a4b9 - Test class qlen notification Cc: Mingi Cho Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250204005841.223511-5-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index d3dd65b05b5f1..9044ac0541672 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -94,5 +94,37 @@ "$TC qdisc del dev $DUMMY ingress", "$IP addr del 10.10.10.10/24 dev $DUMMY" ] - } + }, + { + "id": "a4b9", + "name": "Test class qlen notification", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem", + "$TC qdisc add dev $DUMMY parent 2: handle 3: drr", + "$TC filter add dev $DUMMY parent 3: basic action drop", + "$TC class add dev $DUMMY parent 3: classid 3:1 drr", + "$TC class del dev $DUMMY classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC qdisc ls dev $DUMMY", + "matchPattern": "drr 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + } ] -- GitLab From a70c7b3cbc0688016810bb2e0b9b8a0d6a530045 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 4 Feb 2025 11:10:06 -0500 Subject: [PATCH 0298/1585] tun: revert fix group permission check This reverts commit 3ca459eaba1bf96a8c7878de84fa8872259a01e3. The blamed commit caused a regression when neither tun->owner nor tun->group is set. This is intended to be allowed, but now requires CAP_NET_ADMIN. Discussion in the referenced thread pointed out that the original issue that prompted this patch can be resolved in userspace. The relaxed access control may also make a device accessible when it previously wasn't, while existing users may depend on it to not be. This is a clean pure git revert, except for fixing the indentation on the gid_valid line that checkpatch correctly flagged. Fixes: 3ca459eaba1b ("tun: fix group permission check") Link: https://lore.kernel.org/netdev/CAFqZXNtkCBT4f+PwyVRmQGoT3p1eVa01fCG_aNtpt6dakXncUg@mail.gmail.com/ Signed-off-by: Willem de Bruijn Cc: Ondrej Mosnacek Cc: Stas Sergeev Link: https://patch.msgid.link/20250204161015.739430-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 28624cca91f8d..acf96f2624887 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -574,18 +574,14 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, return ret; } -static inline bool tun_capable(struct tun_struct *tun) +static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); struct net *net = dev_net(tun->dev); - if (ns_capable(net->user_ns, CAP_NET_ADMIN)) - return 1; - if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner)) - return 1; - if (gid_valid(tun->group) && in_egroup_p(tun->group)) - return 1; - return 0; + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !ns_capable(net->user_ns, CAP_NET_ADMIN); } static void tun_set_real_num_queues(struct tun_struct *tun) @@ -2782,7 +2778,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) !!(tun->flags & IFF_MULTI_QUEUE)) return -EINVAL; - if (!tun_capable(tun)) + if (tun_not_capable(tun)) return -EPERM; err = security_tun_dev_open(tun->security); if (err < 0) -- GitLab From 811b8f534fd85e17077bd2ac0413bcd16cc8fb9b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 4 Feb 2025 14:38:39 +0200 Subject: [PATCH 0299/1585] net: sched: Fix truncation of offloaded action statistics In case of tc offload, when user space queries the kernel for tc action statistics, tc will query the offloaded statistics from device drivers. Among other statistics, drivers are expected to pass the number of packets that hit the action since the last query as a 64-bit number. Unfortunately, tc treats the number of packets as a 32-bit number, leading to truncation and incorrect statistics when the number of packets since the last query exceeds 0xffffffff: $ tc -s filter show dev swp2 ingress filter protocol all pref 1 flower chain 0 filter protocol all pref 1 flower chain 0 handle 0x1 skip_sw in_hw in_hw_count 1 action order 1: mirred (Egress Redirect to device swp1) stolen index 1 ref 1 bind 1 installed 58 sec used 0 sec Action statistics: Sent 1133877034176 bytes 536959475 pkt (dropped 0, overlimits 0 requeues 0) [...] According to the above, 2111-byte packets were redirected which is impossible as only 64-byte packets were transmitted and the MTU was 1500. Fix by treating packets as a 64-bit number: $ tc -s filter show dev swp2 ingress filter protocol all pref 1 flower chain 0 filter protocol all pref 1 flower chain 0 handle 0x1 skip_sw in_hw in_hw_count 1 action order 1: mirred (Egress Redirect to device swp1) stolen index 1 ref 1 bind 1 installed 61 sec used 0 sec Action statistics: Sent 1370624380864 bytes 21416005951 pkt (dropped 0, overlimits 0 requeues 0) [...] Which shows that only 64-byte packets were redirected (1370624380864 / 21416005951 = 64). Fixes: 380407023526 ("net/sched: Enable netdev drivers to update statistics of offloaded actions") Reported-by: Joe Botha Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250204123839.1151804-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d635c5b47ebaf..d48c657191cd0 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -851,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, - __u64 bytes, __u32 packets) + __u64 bytes, __u64 packets) { u64_stats_update_begin(&bstats->syncp); u64_stats_add(&bstats->bytes, bytes); -- GitLab From 41b996ce83bf944de5569d6263c8dbd5513e7ed0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Feb 2025 23:05:53 +0000 Subject: [PATCH 0300/1585] rxrpc: Fix call state set to not include the SERVER_SECURING state The RXRPC_CALL_SERVER_SECURING state doesn't really belong with the other states in the call's state set as the other states govern the call's Rx/Tx phase transition and govern when packets can and can't be received or transmitted. The "Securing" state doesn't actually govern the reception of packets and would need to be split depending on whether or not we've received the last packet yet (to mirror RECV_REQUEST/ACK_REQUEST). The "Securing" state is more about whether or not we can start forwarding packets to the application as recvmsg will need to decode them and the decoding can't take place until the challenge/response exchange has completed. Fix this by removing the RXRPC_CALL_SERVER_SECURING state from the state set and, instead, using a flag, RXRPC_CALL_CONN_CHALLENGING, to track whether or not we can queue the call for reception by recvmsg() or notify the kernel app that data is ready. In the event that we've already received all the packets, the connection event handler will poke the app layer in the appropriate manner. Also there's a race whereby the app layer sees the last packet before rxrpc has managed to end the rx phase and change the state to one amenable to allowing a reply. Fix this by queuing the packet after calling rxrpc_end_rx_phase(). Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250204230558.712536-2-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/call_object.c | 6 ++---- net/rxrpc/conn_event.c | 4 +--- net/rxrpc/input.c | 2 +- net/rxrpc/sendmsg.c | 2 +- 5 files changed, 6 insertions(+), 10 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 718193df9d2e2..f251845fe532c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -582,6 +582,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ + RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */ }; /* @@ -602,7 +603,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ - RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 5a543c3f6fb08..c4c8b46a68c67 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", - [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->cong_tstamp = skb->tstamp; __set_bit(RXRPC_CALL_EXPOSED, &call->flags); - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags); break; case RXRPC_CONN_SERVICE: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; case RXRPC_CONN_ABORTED: diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 74bb49b936cd4..4d9c5e21ba785 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -228,10 +228,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) rxrpc_notify_socket(call); - } } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4974b5accafa3..4a152f3c831fd 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -657,7 +657,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb rxrpc_propose_delay_ACK(call, sp->hdr.serial, rxrpc_propose_ack_input_data); } - if (notify) { + if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0e8da909d4f2f..584397aba4a07 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -707,7 +707,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else { switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_AWAIT_CONN: - case RXRPC_CALL_SERVER_SECURING: + case RXRPC_CALL_SERVER_RECV_REQUEST: if (p.command == RXRPC_CMD_SEND_ABORT) break; fallthrough; -- GitLab From 2d7b30aef34dae942e9ab7812b288ce14658ae66 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Feb 2025 23:05:54 +0000 Subject: [PATCH 0301/1585] rxrpc: Fix race in call state changing vs recvmsg() There's a race in between the rxrpc I/O thread recording the end of the receive phase of a call and recvmsg() examining the state of the call to determine whether it has completed. The problem is that call->_state records the I/O thread's view of the call, not the application's view (which may lag), so that alone is not sufficient. To this end, the application also checks whether there is anything left in call->recvmsg_queue for it to pick up. The call must be in state RXRPC_CALL_COMPLETE and the recvmsg_queue empty for the call to be considered fully complete. In rxrpc_input_queue_data(), the latest skbuff is added to the queue and then, if it was marked as LAST_PACKET, the state is advanced... But this is two separate operations with no locking around them. As a consequence, the lack of locking means that sendmsg() can jump into the gap on a service call and attempt to send the reply - but then get rejected because the I/O thread hasn't advanced the state yet. Simply flipping the order in which things are done isn't an option as that impacts the client side, causing the checks in rxrpc_kernel_check_life() as to whether the call is still alive to race instead. Fix this by moving the update of call->_state inside the skb queue spinlocked section where the packet is queued on the I/O thread side. rxrpc's recvmsg() will then automatically sync against this because it has to take the call->recvmsg_queue spinlock in order to dequeue the last packet. rxrpc's sendmsg() doesn't need amending as the app shouldn't be calling it to send a reply until recvmsg() indicates it has returned all of the request. Fixes: 93368b6bd58a ("rxrpc: Move call state changes from recvmsg to I/O thread") Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250204230558.712536-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/input.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4a152f3c831fd..9047ba13bd31e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -448,11 +448,19 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); bool last = sp->hdr.flags & RXRPC_LAST_PACKET; - skb_queue_tail(&call->recvmsg_queue, skb); + spin_lock_irq(&call->recvmsg_queue.lock); + + __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); if (last) + /* Change the state inside the lock so that recvmsg syncs + * correctly with it and using sendmsg() to send a reply + * doesn't race. + */ rxrpc_end_rx_phase(call, sp->hdr.serial); + + spin_unlock_irq(&call->recvmsg_queue.lock); } /* -- GitLab From 77c2e45dbf9d2ced21d2cf6cc3b2a048d57ab7ad Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 5 Feb 2025 13:03:33 -0300 Subject: [PATCH 0302/1585] smb: client: don't trust DFSREF_STORAGE_SERVER bit Some servers don't respect the DFSREF_STORAGE_SERVER bit, so unconditionally tree connect to DFS link target and then decide whether or not continue chasing DFS referrals for DFS interlinks. Otherwise the client would fail to mount such shares. Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/dfs.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index dad521336b5ee..f65a8a90ba279 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -150,25 +150,27 @@ static int __dfs_referral_walk(struct dfs_ref_walk *rw) if (rc) continue; - if (tgt.flags & DFSREF_STORAGE_SERVER) { - rc = cifs_mount_get_tcon(mnt_ctx); - if (!rc) - rc = cifs_is_path_remote(mnt_ctx); + rc = cifs_mount_get_tcon(mnt_ctx); + if (rc) { + if (tgt.server_type == DFS_TYPE_LINK && + DFS_INTERLINK(tgt.flags)) + rc = -EREMOTE; + } else { + rc = cifs_is_path_remote(mnt_ctx); if (!rc) { ref_walk_set_tgt_hint(rw); break; } - if (rc != -EREMOTE) - continue; } - - rc = ref_walk_advance(rw); - if (!rc) { - rc = setup_dfs_ref(&tgt, rw); - if (rc) - break; - ref_walk_mark_end(rw); - goto again; + if (rc == -EREMOTE) { + rc = ref_walk_advance(rw); + if (!rc) { + rc = setup_dfs_ref(&tgt, rw); + if (rc) + break; + ref_walk_mark_end(rw); + goto again; + } } } } while (rc && ref_walk_descend(rw)); -- GitLab From 773dc23ff81838b6f74d7fabba5a441cc6a93982 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 5 Feb 2025 13:22:11 -0300 Subject: [PATCH 0303/1585] smb: client: fix noisy when tree connecting to DFS interlink targets When the client attempts to tree connect to a domain-based DFS namespace from a DFS interlink target, the server will return STATUS_BAD_NETWORK_NAME and the following will appear on dmesg: CIFS: VFS: BAD_NETWORK_NAME: \\dom\dfs Since a DFS share might contain several DFS interlinks and they expire after 10 minutes, the above message might end up being flooded on dmesg when mounting or accessing them. Print this only once per share. Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 40ad9e79437a4..78395195e0165 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2169,7 +2169,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, tcon_error_exit: if (rsp && rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) - cifs_tcon_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); + cifs_dbg(VFS | ONCE, "BAD_NETWORK_NAME: %s\n", tree); goto tcon_exit; } -- GitLab From be1963dd4ce4e467f062b023d1e696f40c926a04 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 5 Feb 2025 13:41:32 -0300 Subject: [PATCH 0304/1585] smb: client: get rid of kstrdup() in get_ses_refpath() After commit 36008fe6e3dc ("smb: client: don't try following DFS links in cifs_tree_connect()"), TCP_Server_Info::leaf_fullpath will no longer be changed, so there is no need to kstrdup() it. Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/dfs.h | 7 +++++++ fs/smb/client/dfs_cache.c | 27 +++++---------------------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h index ed4cd7cf1ec64..e60f0a24a8a1d 100644 --- a/fs/smb/client/dfs.h +++ b/fs/smb/client/dfs.h @@ -188,4 +188,11 @@ static inline void dfs_put_root_smb_sessions(struct list_head *head) } } +static inline const char *dfs_ses_refpath(struct cifs_ses *ses) +{ + const char *path = ses->server->leaf_fullpath; + + return path ? path + 1 : ERR_PTR(-ENOENT); +} + #endif /* _CIFS_DFS_H */ diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 5022bb1f122a1..4dada26d56b5f 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c @@ -1136,33 +1136,19 @@ static bool is_ses_good(struct cifs_ses *ses) return ret; } -static char *get_ses_refpath(struct cifs_ses *ses) -{ - struct TCP_Server_Info *server = ses->server; - char *path = ERR_PTR(-ENOENT); - - if (server->leaf_fullpath) { - path = kstrdup(server->leaf_fullpath + 1, GFP_KERNEL); - if (!path) - path = ERR_PTR(-ENOMEM); - } - return path; -} - /* Refresh dfs referral of @ses */ static void refresh_ses_referral(struct cifs_ses *ses) { struct cache_entry *ce; unsigned int xid; - char *path; + const char *path; int rc = 0; xid = get_xid(); - path = get_ses_refpath(ses); + path = dfs_ses_refpath(ses); if (IS_ERR(path)) { rc = PTR_ERR(path); - path = NULL; goto out; } @@ -1181,7 +1167,6 @@ static void refresh_ses_referral(struct cifs_ses *ses) out: free_xid(xid); - kfree(path); } static int __refresh_tcon_referral(struct cifs_tcon *tcon, @@ -1231,19 +1216,18 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh) struct dfs_info3_param *refs = NULL; struct cache_entry *ce; struct cifs_ses *ses; - unsigned int xid; bool needs_refresh; - char *path; + const char *path; + unsigned int xid; int numrefs = 0; int rc = 0; xid = get_xid(); ses = tcon->ses; - path = get_ses_refpath(ses); + path = dfs_ses_refpath(ses); if (IS_ERR(path)) { rc = PTR_ERR(path); - path = NULL; goto out; } @@ -1271,7 +1255,6 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh) out: free_xid(xid); - kfree(path); free_dfs_info_array(refs, numrefs); } -- GitLab From c7691aec5e991cec9c5c5fdab08c24856a1fc56f Mon Sep 17 00:00:00 2001 From: Vaishnav Achath Date: Wed, 5 Feb 2025 11:52:29 +0530 Subject: [PATCH 0305/1585] arm64: defconfig: Enable TISCI Interrupt Router and Aggregator Enable TISCI Interrupt Router and Interrupt Aggregator drivers. These IPs are found in all TI K3 SoCs like J721E, AM62X and is required for core functionality like DMA, GPIO Interrupts which is necessary during boot, thus make them built-in. bloat-o-meter summary on vmlinux: add/remove: 460/1 grow/shrink: 4/0 up/down: 162483/-8 (162475) ... Total: Before=31615984, After=31778459, chg +0.51% These configs were previously selected for ARCH_K3 in respective Kconfigs till commit b8b26ae398c4 ("irqchip/ti-sci-inta : Add module build support") and commit 2d95ffaecbc2 ("irqchip/ti-sci-intr: Add module build support") dropped them and few driver configs (TI_K3_UDMA, TI_K3_RINGACC) dependent on these also got disabled due to this. While re-enabling the TI_SCI_INT_*_IRQCHIP configs, these configs with missing dependencies (which are already part of arm64 defconfig) also get re-enabled which explains the slightly larger size increase from the bloat-o-meter summary. Fixes: 2d95ffaecbc2 ("irqchip/ti-sci-intr: Add module build support") Fixes: b8b26ae398c4 ("irqchip/ti-sci-inta : Add module build support") Signed-off-by: Vaishnav Achath Tested-by: Dhruva Gole Reviewed-by: Dhruva Gole Link: https://lore.kernel.org/r/20250205062229.3869081-1-vaishnav.a@ti.com Signed-off-by: Nishanth Menon --- arch/arm64/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index cb7da44155999..1f25423de3833 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1551,6 +1551,8 @@ CONFIG_PWM_VISCONTI=m CONFIG_SL28CPLD_INTC=y CONFIG_QCOM_PDC=y CONFIG_QCOM_MPM=y +CONFIG_TI_SCI_INTR_IRQCHIP=y +CONFIG_TI_SCI_INTA_IRQCHIP=y CONFIG_RESET_GPIO=m CONFIG_RESET_IMX7=y CONFIG_RESET_QCOM_AOSS=y -- GitLab From cc668a11e6ac8adb0e016711080d3f314722cc91 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 3 Feb 2025 14:50:59 +0200 Subject: [PATCH 0306/1585] RDMA/mlx5: Fix a race for DMABUF MR which can lead to CQE with error This patch addresses a potential race condition for a DMABUF MR that can result in a CQE with an error on the UMR QP. During the __mlx5_ib_dereg_mr() flow, the following sequence of calls occurs: mlx5_revoke_mr() mlx5r_umr_revoke_mr() mlx5r_umr_post_send_wait() At this point, the lkey is freed from the hardware's perspective. However, concurrently, mlx5_ib_dmabuf_invalidate_cb() might be triggered by another task attempting to invalidate the MR having that freed lkey. Since the lkey has already been freed, this can lead to a CQE error, causing the UMR QP to enter an error state. To resolve this race condition, the dma_resv_lock() which was hold as part of the mlx5_ib_dmabuf_invalidate_cb() is now also acquired as part of the mlx5_revoke_mr() scope. Upon a successful revoke, we set umem_dmabuf->private which points to that MR to NULL, preventing any further invalidation attempts on its lkey. Fixes: e6fb246ccafb ("RDMA/mlx5: Consolidate MR destruction to mlx5_ib_dereg_mr()") Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Link: https://patch.msgid.link/70617067abbfaa0c816a2544c922e7f4346def58.1738587016.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bb02b6adbf2c2..0a3cbb14e1839 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1550,7 +1550,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); - if (!umem_dmabuf->sgt) + if (!umem_dmabuf->sgt || !mr) return; mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); @@ -2022,11 +2022,16 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; bool is_odp = is_odp_mr(mr); + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; int ret = 0; if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL); + if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ @@ -2054,6 +2059,12 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); } + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } + return ret; } -- GitLab From abc7b3f1f056d69a8f11d6dceecc0c9549ace770 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 3 Feb 2025 14:51:43 +0200 Subject: [PATCH 0307/1585] RDMA/mlx5: Fix a WARN during dereg_mr for DM type Memory regions (MR) of type DM (device memory) do not have an associated umem. In the __mlx5_ib_dereg_mr() -> mlx5_free_priv_descs() flow, the code incorrectly takes the wrong branch, attempting to call dma_unmap_single() on a DMA address that is not mapped. This results in a WARN [1], as shown below. The issue is resolved by properly accounting for the DM type and ensuring the correct branch is selected in mlx5_free_priv_descs(). [1] WARNING: CPU: 12 PID: 1346 at drivers/iommu/dma-iommu.c:1230 iommu_dma_unmap_page+0x79/0x90 Modules linked in: ip6table_mangle ip6table_nat ip6table_filter ip6_tables iptable_mangle xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry ovelay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core fuse mlx5_core CPU: 12 UID: 0 PID: 1346 Comm: ibv_rc_pingpong Not tainted 6.12.0-rc7+ #1631 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:iommu_dma_unmap_page+0x79/0x90 Code: 2b 49 3b 29 72 26 49 3b 69 08 73 20 4d 89 f0 44 89 e9 4c 89 e2 48 89 ee 48 89 df 5b 5d 41 5c 41 5d 41 5e 41 5f e9 07 b8 88 ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 66 0f 1f 44 00 RSP: 0018:ffffc90001913a10 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88810194b0a8 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000001 RBP: ffff88810194b0a8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f537abdd740(0000) GS:ffff88885fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f537aeb8000 CR3: 000000010c248001 CR4: 0000000000372eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __warn+0x84/0x190 ? iommu_dma_unmap_page+0x79/0x90 ? report_bug+0xf8/0x1c0 ? handle_bug+0x55/0x90 ? exc_invalid_op+0x13/0x60 ? asm_exc_invalid_op+0x16/0x20 ? iommu_dma_unmap_page+0x79/0x90 dma_unmap_page_attrs+0xe6/0x290 mlx5_free_priv_descs+0xb0/0xe0 [mlx5_ib] __mlx5_ib_dereg_mr+0x37e/0x520 [mlx5_ib] ? _raw_spin_unlock_irq+0x24/0x40 ? wait_for_completion+0xfe/0x130 ? rdma_restrack_put+0x63/0xe0 [ib_core] ib_dereg_mr_user+0x5f/0x120 [ib_core] ? lock_release+0xc6/0x280 destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs] uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs] uobj_destroy+0x3f/0x70 [ib_uverbs] ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs] ? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs] ? lock_acquire+0xc1/0x2f0 ? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] ? ib_uverbs_ioctl+0x116/0x170 [ib_uverbs] ? lock_release+0xc6/0x280 ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs] ? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] __x64_sys_ioctl+0x1b0/0xa70 do_syscall_64+0x6b/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f537adaf17b Code: 0f 1e fa 48 8b 05 1d ad 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ed ac 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffff218f0b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffff218f1d8 RCX: 00007f537adaf17b RDX: 00007ffff218f1c0 RSI: 00000000c0181b01 RDI: 0000000000000003 RBP: 00007ffff218f1a0 R08: 00007f537aa8d010 R09: 0000561ee2e4f270 R10: 00007f537aace3a8 R11: 0000000000000246 R12: 00007ffff218f190 R13: 000000000000001c R14: 0000561ee2e4d7c0 R15: 00007ffff218f450 Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr") Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/2039c22cfc3df02378747ba4d623a558b53fc263.1738587076.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 0a3cbb14e1839..753faa9ad06a8 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1935,7 +1935,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr) { - if (!mr->umem && !mr->data_direct && mr->descs) { + if (!mr->umem && !mr->data_direct && + mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; struct mlx5_ib_dev *dev = to_mdev(device); -- GitLab From 29b7bb98234cc287cebef9bccf638c2e3f39be71 Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Wed, 5 Feb 2025 02:30:05 -0800 Subject: [PATCH 0308/1585] RDMA/mana_ib: Allocate PAGE aligned doorbell index Allocate a PAGE aligned doorbell index to ensure each process gets a separate PAGE sized doorbell area space remapped to it in mana_ib_mmap Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Signed-off-by: Shiraz Saleem Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1738751405-15041-1-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Long Li Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 67c2d43135a8a..457cea6d99095 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -174,7 +174,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; req.num_resources = 1; - req.alignment = 1; + req.alignment = PAGE_SIZE / MANA_PAGE_SIZE; /* Have GDMA start searching from 0 */ req.allocated_resources = 0; -- GitLab From 0af4c120f5e7a1ea70aff7da2dfb65b6148a3e84 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 5 Feb 2025 12:10:58 +0200 Subject: [PATCH 0309/1585] pinctrl: pinconf-generic: Print unsigned value if a format is registered Commit 3ba11e684d16 ("pinctrl: pinconf-generic: print hex value") unconditionally switched to printing hex values in pinconf_generic_dump_one(). However, if a dump format is registered for the dumped pin, the hex value is printed as well. This hex value does not necessarily correspond 1:1 with the hardware register value (as noted by commit 3ba11e684d16 ("pinctrl: pinconf-generic: print hex value")). As a result, user-facing output may include information like: output drive strength (0x100 uA). To address this, check if a dump format is registered for the dumped property, and print the unsigned value instead when applicable. Fixes: 3ba11e684d16 ("pinctrl: pinconf-generic: print hex value") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/20250205101058.2034860-1-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinconf-generic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index 0b13d7f17b325..42547f64453e8 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -89,12 +89,12 @@ static void pinconf_generic_dump_one(struct pinctrl_dev *pctldev, seq_puts(s, items[i].display); /* Print unit if available */ if (items[i].has_arg) { - seq_printf(s, " (0x%x", - pinconf_to_config_argument(config)); + u32 val = pinconf_to_config_argument(config); + if (items[i].format) - seq_printf(s, " %s)", items[i].format); + seq_printf(s, " (%u %s)", val, items[i].format); else - seq_puts(s, ")"); + seq_printf(s, " (0x%x)", val); } } } -- GitLab From 1e3835a8aea5118d58ff9daa656395e69c8806b2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 4 Feb 2025 13:57:29 -0800 Subject: [PATCH 0310/1585] MAINTAINERS: add entry for ethtool Michal did an amazing job converting ethtool to Netlink, but never added an entry to MAINTAINERS for himself. Create a formal entry so that we can delegate (portions) of this code to folks. Over the last 3 years majority of the reviews have been done by Andrew and I. I suppose Michal didn't want to be on the receiving end of the flood of patches. Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250204215729.168992-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 74b09dad46626..20c8daf3ce620 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16455,6 +16455,16 @@ F: include/net/dsa.h F: net/dsa/ F: tools/testing/selftests/drivers/net/dsa/ +NETWORKING [ETHTOOL] +M: Andrew Lunn +M: Jakub Kicinski +F: Documentation/netlink/specs/ethtool.yaml +F: Documentation/networking/ethtool-netlink.rst +F: include/linux/ethtool* +F: include/uapi/linux/ethtool* +F: net/ethtool/ +F: tools/testing/selftests/drivers/net/*/ethtool* + NETWORKING [GENERAL] M: "David S. Miller" M: Eric Dumazet -- GitLab From 82b02a7c459922bbf80e45d5f7e2c4cfef617943 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 4 Feb 2025 13:57:50 -0800 Subject: [PATCH 0311/1585] MAINTAINERS: add a sample ethtool section entry I feel like we don't do a good enough keeping authors of driver APIs around. The ethtool code base was very nicely compartmentalized by Michal. Establish a precedent of creating MAINTAINERS entries for "sections" of the ethtool API. Use Andrew and cable test as a sample entry. The entry should ideally cover 3 elements: a core file, test(s), and keywords. The last one is important because we intend the entries to cover core code *and* reviews of drivers implementing given API! Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250204215750.169249-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 20c8daf3ce620..bd705e9123a3a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16465,6 +16465,12 @@ F: include/uapi/linux/ethtool* F: net/ethtool/ F: tools/testing/selftests/drivers/net/*/ethtool* +NETWORKING [ETHTOOL CABLE TEST] +M: Andrew Lunn +F: net/ethtool/cabletest.c +F: tools/testing/selftests/drivers/net/*/ethtool* +K: cable_test + NETWORKING [GENERAL] M: "David S. Miller" M: Eric Dumazet -- GitLab From ba958ac74800573f7f54dbe2a7a7b9a9a523ed52 Mon Sep 17 00:00:00 2001 From: Oleh Zadorozhnyi Date: Tue, 4 Feb 2025 07:17:30 +0200 Subject: [PATCH 0312/1585] kbuild: fix misspelling in scripts/Makefile.lib Signed-off-by: Oleh Zadorozhnyi Signed-off-by: Masahiro Yamada --- scripts/Makefile.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index ad55ef201aacb..cad20f0e66ee9 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -305,7 +305,7 @@ endef # These are shared by some Makefile.* files. ifdef CONFIG_LTO_CLANG -# Run $(LD) here to covert LLVM IR to ELF in the following cases: +# Run $(LD) here to convert LLVM IR to ELF in the following cases: # - when this object needs objtool processing, as objtool cannot process LLVM IR # - when this is a single-object module, as modpost cannot process LLVM IR cmd_ld_single = $(if $(objtool-enabled)$(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) -- GitLab From 5da7e15fb5a12e78de974d8908f348e279922ce9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 31 Jan 2025 19:01:42 -0800 Subject: [PATCH 0313/1585] net: Add rx_skb of kfree_skb to raw_tp_null_args[]. Yan Zhai reported a BPF prog could trigger a null-ptr-deref [0] in trace_kfree_skb if the prog does not check if rx_sk is NULL. Commit c53795d48ee8 ("net: add rx_sk to trace_kfree_skb") added rx_sk to trace_kfree_skb, but rx_sk is optional and could be NULL. Let's add kfree_skb to raw_tp_null_args[] to let the BPF verifier validate such a prog and prevent the issue. Now we fail to load such a prog: libbpf: prog 'drop': -- BEGIN PROG LOAD LOG -- 0: R1=ctx() R10=fp0 ; int BPF_PROG(drop, struct sk_buff *skb, void *location, @ kfree_skb_sk_null.bpf.c:21 0: (79) r3 = *(u64 *)(r1 +24) func 'kfree_skb' arg3 has btf_id 5253 type STRUCT 'sock' 1: R1=ctx() R3_w=trusted_ptr_or_null_sock(id=1) ; bpf_printk("sk: %d, %d\n", sk, sk->__sk_common.skc_family); @ kfree_skb_sk_null.bpf.c:24 1: (69) r4 = *(u16 *)(r3 +16) R3 invalid mem access 'trusted_ptr_or_null_' processed 2 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 -- END PROG LOAD LOG -- Note this fix requires commit 838a10bd2ebf ("bpf: Augment raw_tp arguments with PTR_MAYBE_NULL"). [0]: BUG: kernel NULL pointer dereference, address: 0000000000000010 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 0 P4D 0 PREEMPT SMP RIP: 0010:bpf_prog_5e21a6db8fcff1aa_drop+0x10/0x2d Call Trace: ? __die+0x1f/0x60 ? page_fault_oops+0x148/0x420 ? search_bpf_extables+0x5b/0x70 ? fixup_exception+0x27/0x2c0 ? exc_page_fault+0x75/0x170 ? asm_exc_page_fault+0x22/0x30 ? bpf_prog_5e21a6db8fcff1aa_drop+0x10/0x2d bpf_trace_run4+0x68/0xd0 ? unix_stream_connect+0x1f4/0x6f0 sk_skb_reason_drop+0x90/0x120 unix_stream_connect+0x1f4/0x6f0 __sys_connect+0x7f/0xb0 __x64_sys_connect+0x14/0x20 do_syscall_64+0x47/0xc30 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: c53795d48ee8 ("net: add rx_sk to trace_kfree_skb") Reported-by: Yan Zhai Closes: https://lore.kernel.org/netdev/Z50zebTRzI962e6X@debian.debian/ Signed-off-by: Kuniyuki Iwashima Tested-by: Yan Zhai Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20250201030142.62703-1-kuniyu@amazon.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 9de6acddd479b..c3223e0db2f51 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6507,6 +6507,8 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = { /* rxrpc */ { "rxrpc_recvdata", 0x1 }, { "rxrpc_resend", 0x10 }, + /* skb */ + {"kfree_skb", 0x1000}, /* sunrpc */ { "xs_stream_read_data", 0x1 }, /* ... from xprt_cong_event event class */ -- GitLab From 2a64c96356c87aa8af826605943e5524bf45e24d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 5 Feb 2025 12:57:47 +0000 Subject: [PATCH 0314/1585] Revert "net: stmmac: Specify hardware capability value when FIFO size isn't specified" This reverts commit 8865d22656b4, which caused breakage for platforms which are not using xgmac2 or gmac4. Only these two cores have the capability of providing the FIFO sizes from hardware capability fields (which are provided in priv->dma_cap.[tr]x_fifo_size.) All other cores can not, which results in these two fields containing zero. We also have platforms that do not provide a value in priv->plat->[tr]x_fifo_size, resulting in these also being zero. This causes the new tests introduced by the reverted commit to fail, and produce e.g.: stmmaceth f0804000.eth: Can't specify Rx FIFO size An example of such a platform which fails is QEMU's npcm750-evb. This uses dwmac1000 which, as noted above, does not have the capability to provide the FIFO sizes from hardware. Therefore, revert the commit to maintain compatibility with the way the driver used to work. Reported-by: Guenter Roeck Link: https://lore.kernel.org/r/4e98f967-f636-46fb-9eca-d383b9495b86@roeck-us.net Signed-off-by: Russell King (Oracle) Tested-by: Steven Price Fixes: 8865d22656b4 ("net: stmmac: Specify hardware capability value when FIFO size isn't specified") Link: https://patch.msgid.link/E1tfeyR-003YGJ-Gb@rmk-PC.armlinux.org.uk Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d04543e5697b0..b34ebb916b898 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2424,6 +2424,11 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 chan = 0; u8 qmode = 0; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */ if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { rxfifosz /= rx_channels_count; @@ -2892,6 +2897,11 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, int rxfifosz = priv->plat->rx_fifo_size; int txfifosz = priv->plat->tx_fifo_size; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Adjust for real per queue fifo size */ rxfifosz /= rx_channels_count; txfifosz /= tx_channels_count; @@ -5868,6 +5878,9 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) const int mtu = new_mtu; int ret; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + txfifosz /= priv->plat->tx_queues_to_use; if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) { @@ -7219,29 +7232,15 @@ static int stmmac_hw_init(struct stmmac_priv *priv) priv->plat->tx_queues_to_use = priv->dma_cap.number_tx_queues; } - if (!priv->plat->rx_fifo_size) { - if (priv->dma_cap.rx_fifo_size) { - priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Rx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.rx_fifo_size && - priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { + if (priv->dma_cap.rx_fifo_size && + priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { dev_warn(priv->device, "Rx FIFO size (%u) exceeds dma capability\n", priv->plat->rx_fifo_size); priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; } - if (!priv->plat->tx_fifo_size) { - if (priv->dma_cap.tx_fifo_size) { - priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Tx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.tx_fifo_size && - priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { + if (priv->dma_cap.tx_fifo_size && + priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { dev_warn(priv->device, "Tx FIFO size (%u) exceeds dma capability\n", priv->plat->tx_fifo_size); -- GitLab From 679074942c2502a95842a80471d8fb718165ac77 Mon Sep 17 00:00:00 2001 From: Vitaly Rodionov Date: Wed, 5 Feb 2025 16:08:46 +0000 Subject: [PATCH 0315/1585] ASoC: arizona/madera: use fsleep() in up/down DAPM event delays. Using `fsleep` instead of `msleep` resolves some customer complaints regarding the precision of up/down DAPM event timing. `fsleep()` automatically selects the appropriate sleep function, making the delay time more predictable. Signed-off-by: Vitaly Rodionov Link: https://patch.msgid.link/20250205160849.500306-1-vitalyr@opensource.cirrus.com Reviewed-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/arizona.c | 14 +++++++------- sound/soc/codecs/madera.c | 10 +++++----- sound/soc/codecs/wm5110.c | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index 402b9a2ff0240..68cdb1027d0c0 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -967,7 +967,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, case ARIZONA_OUT3L_ENA_SHIFT: case ARIZONA_OUT3R_ENA_SHIFT: priv->out_up_pending++; - priv->out_up_delay += 17; + priv->out_up_delay += 17000; break; case ARIZONA_OUT4L_ENA_SHIFT: case ARIZONA_OUT4R_ENA_SHIFT: @@ -977,7 +977,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, case WM8997: break; default: - priv->out_up_delay += 10; + priv->out_up_delay += 10000; break; } break; @@ -999,7 +999,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, if (!priv->out_up_pending && priv->out_up_delay) { dev_dbg(component->dev, "Power up delay: %d\n", priv->out_up_delay); - msleep(priv->out_up_delay); + fsleep(priv->out_up_delay); priv->out_up_delay = 0; } break; @@ -1017,7 +1017,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, case ARIZONA_OUT3L_ENA_SHIFT: case ARIZONA_OUT3R_ENA_SHIFT: priv->out_down_pending++; - priv->out_down_delay++; + priv->out_down_delay += 1000; break; case ARIZONA_OUT4L_ENA_SHIFT: case ARIZONA_OUT4R_ENA_SHIFT: @@ -1028,10 +1028,10 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, break; case WM8998: case WM1814: - priv->out_down_delay += 5; + priv->out_down_delay += 5000; break; default: - priv->out_down_delay++; + priv->out_down_delay += 1000; break; } break; @@ -1053,7 +1053,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, if (!priv->out_down_pending && priv->out_down_delay) { dev_dbg(component->dev, "Power down delay: %d\n", priv->out_down_delay); - msleep(priv->out_down_delay); + fsleep(priv->out_down_delay); priv->out_down_delay = 0; } break; diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index a840a2eb92b93..af109761f3599 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -2323,10 +2323,10 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case CS42L92: case CS47L92: case CS47L93: - out_up_delay = 6; + out_up_delay = 6000; break; default: - out_up_delay = 17; + out_up_delay = 17000; break; } @@ -2357,7 +2357,7 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case MADERA_OUT3R_ENA_SHIFT: priv->out_up_pending--; if (!priv->out_up_pending) { - msleep(priv->out_up_delay); + fsleep(priv->out_up_delay); priv->out_up_delay = 0; } break; @@ -2376,7 +2376,7 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case MADERA_OUT3L_ENA_SHIFT: case MADERA_OUT3R_ENA_SHIFT: priv->out_down_pending++; - priv->out_down_delay++; + priv->out_down_delay += 1000; break; default: break; @@ -2393,7 +2393,7 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case MADERA_OUT3R_ENA_SHIFT: priv->out_down_pending--; if (!priv->out_down_pending) { - msleep(priv->out_down_delay); + fsleep(priv->out_down_delay); priv->out_down_delay = 0; } break; diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 502196253d42a..64eee0d2347da 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -302,7 +302,7 @@ static int wm5110_hp_pre_enable(struct snd_soc_dapm_widget *w) } else { wseq = wm5110_no_dre_left_enable; nregs = ARRAY_SIZE(wm5110_no_dre_left_enable); - priv->out_up_delay += 10; + priv->out_up_delay += 10000; } break; case ARIZONA_OUT1R_ENA_SHIFT: @@ -312,7 +312,7 @@ static int wm5110_hp_pre_enable(struct snd_soc_dapm_widget *w) } else { wseq = wm5110_no_dre_right_enable; nregs = ARRAY_SIZE(wm5110_no_dre_right_enable); - priv->out_up_delay += 10; + priv->out_up_delay += 10000; } break; default: @@ -338,7 +338,7 @@ static int wm5110_hp_pre_disable(struct snd_soc_dapm_widget *w) snd_soc_component_update_bits(component, ARIZONA_SPARE_TRIGGERS, ARIZONA_WS_TRG1, 0); - priv->out_down_delay += 27; + priv->out_down_delay += 27000; } break; case ARIZONA_OUT1R_ENA_SHIFT: @@ -350,7 +350,7 @@ static int wm5110_hp_pre_disable(struct snd_soc_dapm_widget *w) snd_soc_component_update_bits(component, ARIZONA_SPARE_TRIGGERS, ARIZONA_WS_TRG2, 0); - priv->out_down_delay += 27; + priv->out_down_delay += 27000; } break; default: -- GitLab From 1d44a30ae3f9195cb4eb7d81bb9ced2776232094 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Feb 2025 16:48:04 +0000 Subject: [PATCH 0316/1585] ASoC: cs35l41: Fallback to using HID for system_name if no SUB is available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For systems which load firmware on the cs35l41 which use ACPI, the _SUB value is used to differentiate firmware and tuning files for the individual systems. In the case where a system does not have a _SUB defined in ACPI node for cs35l41, there needs to be a fallback to allow the files for that system to be differentiated. Since all ACPI nodes for cs35l41 should have a HID defined, the HID should be a safe option. Signed-off-by: Stefan Binding Reviewed-by: André Almeida Tested-by: André Almeida Link: https://patch.msgid.link/20250205164806.414020-1-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l41.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index 07a5cab35fe10..30b89018b1139 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -1150,19 +1150,28 @@ static int cs35l41_dsp_init(struct cs35l41_private *cs35l41) static int cs35l41_acpi_get_name(struct cs35l41_private *cs35l41) { - acpi_handle handle = ACPI_HANDLE(cs35l41->dev); + struct acpi_device *adev = ACPI_COMPANION(cs35l41->dev); + acpi_handle handle = acpi_device_handle(adev); + const char *hid; const char *sub; - /* If there is no ACPI_HANDLE, there is no ACPI for this system, return 0 */ - if (!handle) + /* If there is no acpi_device, there is no ACPI for this system, return 0 */ + if (!adev) return 0; sub = acpi_get_subsystem_id(handle); if (IS_ERR(sub)) { - /* If bad ACPI, return 0 and fallback to legacy firmware path, otherwise fail */ - if (PTR_ERR(sub) == -ENODATA) - return 0; - else + /* If no _SUB, fallback to _HID, otherwise fail */ + if (PTR_ERR(sub) == -ENODATA) { + hid = acpi_device_hid(adev); + /* If dummy hid, return 0 and fallback to legacy firmware path */ + if (!strcmp(hid, "device")) + return 0; + sub = kstrdup(hid, GFP_KERNEL); + if (!sub) + sub = ERR_PTR(-ENOMEM); + + } else return PTR_ERR(sub); } -- GitLab From 6fd60136d256b3b948333ebdb3835f41a95ab7ef Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 6 Feb 2025 10:46:42 +0200 Subject: [PATCH 0317/1585] ASoC: SOF: ipc4-topology: Harden loops for looking up ALH copiers Other, non DAI copier widgets could have the same stream name (sname) as the ALH copier and in that case the copier->data is NULL, no alh_data is attached, which could lead to NULL pointer dereference. We could check for this NULL pointer in sof_ipc4_prepare_copier_module() and avoid the crash, but a similar loop in sof_ipc4_widget_setup_comp_dai() will miscalculate the ALH device count, causing broken audio. The correct fix is to harden the matching logic by making sure that the 1. widget is a DAI widget - so dai = w->private is valid 2. the dai (and thus the copier) is ALH copier Fixes: a150345aa758 ("ASoC: SOF: ipc4-topology: add SoundWire/ALH aggregation support") Reported-by: Seppo Ingalsuo Link: https://github.com/thesofproject/sof/pull/9652 Signed-off-by: Peter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Link: https://patch.msgid.link/20250206084642.14988-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-topology.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index c04c62478827a..6d5cda813e487 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -765,10 +765,16 @@ static int sof_ipc4_widget_setup_comp_dai(struct snd_sof_widget *swidget) } list_for_each_entry(w, &sdev->widget_list, list) { - if (w->widget->sname && + struct snd_sof_dai *alh_dai; + + if (!WIDGET_IS_DAI(w->id) || !w->widget->sname || strcmp(w->widget->sname, swidget->widget->sname)) continue; + alh_dai = w->private; + if (alh_dai->type != SOF_DAI_INTEL_ALH) + continue; + blob->alh_cfg.device_count++; } @@ -2061,11 +2067,13 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, list_for_each_entry(w, &sdev->widget_list, list) { u32 node_type; - if (w->widget->sname && + if (!WIDGET_IS_DAI(w->id) || !w->widget->sname || strcmp(w->widget->sname, swidget->widget->sname)) continue; dai = w->private; + if (dai->type != SOF_DAI_INTEL_ALH) + continue; alh_copier = (struct sof_ipc4_copier *)dai->private; alh_data = &alh_copier->data; node_type = SOF_IPC4_GET_NODE_TYPE(alh_data->gtw_cfg.node_id); -- GitLab From 33b7dc7843dbdc9b90c91d11ba30b107f9138ffd Mon Sep 17 00:00:00 2001 From: Terry Cheong Date: Thu, 6 Feb 2025 11:47:23 +0200 Subject: [PATCH 0318/1585] ASoC: SOF: Intel: hda: add softdep pre to snd-hda-codec-hdmi module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In enviornment without KMOD requesting module may fail to load snd-hda-codec-hdmi, resulting in HDMI audio not usable. Add softdep to loading HDMI codec module first to ensure we can load it correctly. Signed-off-by: Terry Cheong Reviewed-by: Bard Liao Reviewed-by: Johny Lin Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://patch.msgid.link/20250206094723.18013-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-codec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index 568f3dfe822f5..2f9925830d1d5 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -454,6 +454,7 @@ int hda_codec_i915_exit(struct snd_sof_dev *sdev) } EXPORT_SYMBOL_NS_GPL(hda_codec_i915_exit, "SND_SOC_SOF_HDA_AUDIO_CODEC_I915"); +MODULE_SOFTDEP("pre: snd-hda-codec-hdmi"); #endif MODULE_LICENSE("Dual BSD/GPL"); -- GitLab From 3588b1c0fde2f58d166e3f94a5a58d64b893526c Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Thu, 6 Feb 2025 17:57:47 +0900 Subject: [PATCH 0319/1585] spi: sn-f-ospi: Fix division by zero When there is no dummy cycle in the spi-nor commands, both dummy bus cycle bytes and width are zero. Because of the cpu's warning when divided by zero, the warning should be avoided. Return just zero to avoid such calculations. Fixes: 1b74dd64c861 ("spi: Add Socionext F_OSPI SPI flash controller driver") Co-developed-by: Kohei Ito Signed-off-by: Kohei Ito Signed-off-by: Kunihiko Hayashi Link: https://patch.msgid.link/20250206085747.3834148-1-hayashi.kunihiko@socionext.com Signed-off-by: Mark Brown --- drivers/spi/spi-sn-f-ospi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-sn-f-ospi.c b/drivers/spi/spi-sn-f-ospi.c index 6ad4b729897e3..c4969f66a0ba9 100644 --- a/drivers/spi/spi-sn-f-ospi.c +++ b/drivers/spi/spi-sn-f-ospi.c @@ -116,6 +116,9 @@ struct f_ospi { static u32 f_ospi_get_dummy_cycle(const struct spi_mem_op *op) { + if (!op->dummy.nbytes) + return 0; + return (op->dummy.nbytes * 8) / op->dummy.buswidth; } -- GitLab From 517e8a7835e8cfb398a0aeb0133de50e31cae32b Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 5 Feb 2025 17:00:59 +0000 Subject: [PATCH 0320/1585] bpf: Fix softlockup in arena_map_free on 64k page kernel On an aarch64 kernel with CONFIG_PAGE_SIZE_64KB=y, arena_htab tests cause a segmentation fault and soft lockup. The same failure is not observed with 4k pages on aarch64. It turns out arena_map_free() is calling apply_to_existing_page_range() with the address returned by bpf_arena_get_kern_vm_start(). If this address is not page-aligned the code ends up calling apply_to_pte_range() with that unaligned address causing soft lockup. Fix it by round up GUARD_SZ to PAGE_SIZE << 1 so that the division by 2 in bpf_arena_get_kern_vm_start() returns a page-aligned value. Fixes: 317460317a02 ("bpf: Introduce bpf_arena.") Reported-by: Colm Harrington Suggested-by: Alexei Starovoitov Signed-off-by: Alan Maguire Link: https://lore.kernel.org/r/20250205170059.427458-1-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 870aeb51d70ad..095a9554e1def 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -39,7 +39,7 @@ */ /* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */ -#define GUARD_SZ (1ull << sizeof_field(struct bpf_insn, off) * 8) +#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1) #define KERN_VM_SZ (SZ_4G + GUARD_SZ) struct bpf_arena { -- GitLab From fb97bc2e47f694f79d6358d981ae0428db8e8088 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 29 Jan 2025 15:21:53 +0100 Subject: [PATCH 0321/1585] drm/tests: hdmi: Fix WW_MUTEX_SLOWPATH failures The light_up_connector helper function in the HDMI infrastructure unit tests uses drm_atomic_set_crtc_for_connector(), but fails when it returns an error. This function can return EDEADLK though if the sequence needs to be restarted, and WW_MUTEX_SLOWPATH is meant to test that we handle it properly. Let's handle EDEADLK and restart the sequence in our tests as well. Fixes: eb66d34d793e ("drm/tests: Add output bpc tests") Reported-by: Dave Airlie Closes: https://lore.kernel.org/r/CAPM=9tzJ4-ERDxvuwrCyUPY0=+P44orhp1kLWVGL7MCfpQjMEQ@mail.gmail.com/ Link: https://lore.kernel.org/r/20241031091558.2435850-1-mripard@kernel.org Reviewed-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250129-test-kunit-v2-1-fe59c43805d5@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index b976a5e9aef58..8e6eb94075a5e 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -70,10 +70,17 @@ static int light_up_connector(struct kunit *test, state = drm_kunit_helper_atomic_state_alloc(test, drm, ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); +retry: conn_state = drm_atomic_get_connector_state(state, connector); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); ret = drm_atomic_set_crtc_for_connector(conn_state, crtc); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(ctx); + if (!ret) + goto retry; + } KUNIT_EXPECT_EQ(test, ret, 0); crtc_state = drm_atomic_get_crtc_state(state, crtc); -- GitLab From bb4f929a8875b4801db95b8cf3b2c527c1e475e0 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 29 Jan 2025 15:21:54 +0100 Subject: [PATCH 0322/1585] drm/tests: hdmi: Remove redundant assignments Some tests have the drm pointer assigned multiple times to the same value. Drop the redundant assignments. Reviewed-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250129-test-kunit-v2-2-fe59c43805d5@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index 8e6eb94075a5e..a36422aa9e274 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -481,7 +481,6 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode_vic_1(struct kunit *test) mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -595,7 +594,6 @@ static void drm_test_check_broadcast_rgb_full_cea_mode_vic_1(struct kunit *test) mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -711,7 +709,6 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode_vic_1(struct kunit *te mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1313,7 +1310,6 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) rate = mode->clock * 1500; KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_EXPECT_EQ(test, ret, 0); -- GitLab From 6b6bfd63e1626ceedc738b2a06505aa5b46c1481 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 29 Jan 2025 15:21:55 +0100 Subject: [PATCH 0323/1585] drm/tests: hdmi: Reorder DRM entities variables assignment The tests all deviate slightly in how they assign their local pointers to DRM entities. This makes refactoring pretty difficult, so let's just move the assignment as soon as the entities are allocated. Reviewed-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250129-test-kunit-v2-3-fe59c43805d5@kernel.org Signed-off-by: Maxime Ripard --- .../drm/tests/drm_hdmi_state_helper_test.c | 81 ++++++++++--------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index a36422aa9e274..925724b578789 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -289,15 +289,16 @@ static void drm_test_check_broadcast_rgb_crtc_mode_changed(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; + conn = &priv->connector; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - conn = &priv->connector; preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -352,15 +353,16 @@ static void drm_test_check_broadcast_rgb_crtc_mode_not_changed(struct kunit *tes 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; + conn = &priv->connector; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - conn = &priv->connector; preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -415,6 +417,8 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); @@ -425,8 +429,6 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -526,6 +528,8 @@ static void drm_test_check_broadcast_rgb_full_cea_mode(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); @@ -536,8 +540,6 @@ static void drm_test_check_broadcast_rgb_full_cea_mode(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -641,6 +643,8 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); @@ -651,8 +655,6 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -758,6 +760,8 @@ static void drm_test_check_output_bpc_crtc_mode_changed(struct kunit *test) 10); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, @@ -770,8 +774,6 @@ static void drm_test_check_output_bpc_crtc_mode_changed(struct kunit *test) preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -832,6 +834,8 @@ static void drm_test_check_output_bpc_crtc_mode_not_changed(struct kunit *test) 10); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, @@ -844,8 +848,6 @@ static void drm_test_check_output_bpc_crtc_mode_not_changed(struct kunit *test) preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -903,6 +905,8 @@ static void drm_test_check_output_bpc_dvi(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_dvi_1080p, @@ -918,8 +922,6 @@ static void drm_test_check_output_bpc_dvi(struct kunit *test) preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -950,6 +952,8 @@ static void drm_test_check_tmds_char_rate_rgb_8bpc(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_200mhz, @@ -963,8 +967,6 @@ static void drm_test_check_tmds_char_rate_rgb_8bpc(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -997,6 +999,8 @@ static void drm_test_check_tmds_char_rate_rgb_10bpc(struct kunit *test) 10); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, @@ -1010,8 +1014,6 @@ static void drm_test_check_tmds_char_rate_rgb_10bpc(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1044,6 +1046,8 @@ static void drm_test_check_tmds_char_rate_rgb_12bpc(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, @@ -1057,8 +1061,6 @@ static void drm_test_check_tmds_char_rate_rgb_12bpc(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1095,15 +1097,16 @@ static void drm_test_check_hdmi_funcs_reject_rate(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; + conn = &priv->connector; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - conn = &priv->connector; preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1151,6 +1154,8 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, @@ -1174,8 +1179,6 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 10, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1220,6 +1223,8 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, @@ -1246,8 +1251,6 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1343,6 +1346,8 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, @@ -1374,8 +1379,6 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1410,6 +1413,8 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_200mhz, @@ -1441,8 +1446,6 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1476,6 +1479,8 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, @@ -1499,8 +1504,6 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1536,6 +1539,8 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_340mhz, @@ -1559,8 +1564,6 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); -- GitLab From 5d14c08a47460e8eedf0185a28b116420ea7f29d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 29 Jan 2025 15:21:56 +0100 Subject: [PATCH 0324/1585] drm/tests: hdmi: Fix recursive locking The find_preferred_mode() functions takes the mode_config mutex, but due to the order most tests have, is called with the crtc_ww_class_mutex taken. This raises a warning for a circular dependency when running the tests with lockdep. Reorder the tests to call find_preferred_mode before the acquire context has been created to avoid the issue. Reviewed-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250129-test-kunit-v2-4-fe59c43805d5@kernel.org Signed-off-by: Maxime Ripard --- .../drm/tests/drm_hdmi_state_helper_test.c | 114 +++++++++--------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index 925724b578789..23ecc00accb21 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -293,12 +293,12 @@ static void drm_test_check_broadcast_rgb_crtc_mode_changed(struct kunit *test) crtc = priv->crtc; conn = &priv->connector; - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -357,12 +357,12 @@ static void drm_test_check_broadcast_rgb_crtc_mode_not_changed(struct kunit *tes crtc = priv->crtc; conn = &priv->connector; - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -422,13 +422,13 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode(struct kunit *test) conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -533,13 +533,13 @@ static void drm_test_check_broadcast_rgb_full_cea_mode(struct kunit *test) conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -648,13 +648,13 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode(struct kunit *test) conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -768,12 +768,12 @@ static void drm_test_check_output_bpc_crtc_mode_changed(struct kunit *test) ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -842,12 +842,12 @@ static void drm_test_check_output_bpc_crtc_mode_not_changed(struct kunit *test) ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -916,12 +916,12 @@ static void drm_test_check_output_bpc_dvi(struct kunit *test) info = &conn->display_info; KUNIT_ASSERT_FALSE(test, info->is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -960,13 +960,13 @@ static void drm_test_check_tmds_char_rate_rgb_8bpc(struct kunit *test) ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1007,13 +1007,13 @@ static void drm_test_check_tmds_char_rate_rgb_10bpc(struct kunit *test) ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1054,13 +1054,13 @@ static void drm_test_check_tmds_char_rate_rgb_12bpc(struct kunit *test) ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1101,12 +1101,12 @@ static void drm_test_check_hdmi_funcs_reject_rate(struct kunit *test) crtc = priv->crtc; conn = &priv->connector; - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1166,9 +1166,6 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); @@ -1179,6 +1176,9 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 10, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1235,9 +1235,6 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); @@ -1251,6 +1248,9 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1297,9 +1297,6 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); @@ -1313,6 +1310,9 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) rate = mode->clock * 1500; KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1358,9 +1358,6 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1379,6 +1376,9 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1425,9 +1425,6 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1446,6 +1443,9 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1491,9 +1491,6 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1504,6 +1501,9 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1551,9 +1551,6 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1564,6 +1561,9 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); -- GitLab From 0b06000704f8ae72056ad777a67742b7799d6660 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Thu, 6 Feb 2025 20:38:08 +0800 Subject: [PATCH 0325/1585] ASoC: tas2781: drop a redundant code Report from internal ticket, priv->cali_data.data devm_kzalloc twice, drop the first one, it is the unnecessary one. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250206123808.1590-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2781-i2c.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index a730ab6ad4e31..90c5b2e74d128 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -2,7 +2,7 @@ // // ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier // -// Copyright (C) 2022 - 2024 Texas Instruments Incorporated +// Copyright (C) 2022 - 2025 Texas Instruments Incorporated // https://www.ti.com // // The TAS2563/TAS2781 driver implements a flexible and configurable @@ -1260,8 +1260,6 @@ static int tasdevice_create_cali_ctrls(struct tasdevice_priv *priv) (cali_data->cali_dat_sz_per_dev + 1) + 1 + 15 + 1; priv->cali_data.total_sz = priv->ndev * (cali_data->cali_dat_sz_per_dev + 1); - priv->cali_data.data = devm_kzalloc(priv->dev, - ext_cali_data->max, GFP_KERNEL); cali_ctrls[i].name = cali_name; cali_ctrls[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER; cali_ctrls[i].info = snd_soc_bytes_info_ext; -- GitLab From b029628be267cba3c7684ec684749fe3e4372398 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 12 Jan 2025 23:39:01 -0600 Subject: [PATCH 0326/1585] alpha/elf: Fix misc/setarch test of util-linux by removing 32bit support Richard Henderson writes[1]: > There was a Spec benchmark (I forget which) which was memory bound and ran > twice as fast with 32-bit pointers. > > I copied the idea from DEC to the ELF abi, but never did all the other work > to allow the toolchain to take advantage. > > Amusingly, a later Spec changed the benchmark data sets to not fit into a > 32-bit address space, specifically because of this. > > I expect one could delete the ELF bit and personality and no one would > notice. Not even the 10 remaining Alpha users. In [2] it was pointed out that parts of setarch weren't working properly on alpha because it has it's own SET_PERSONALITY implementation. In the discussion that followed Richard Henderson pointed out that the 32bit pointer support for alpha was never completed. Fix this by removing alpha's 32bit pointer support. As a bit of paranoia refuse to execute any alpha binaries that have the EF_ALPHA_32BIT flag set. Just in case someone somewhere has binaries that try to use alpha's 32bit pointer support. Link: https://lkml.kernel.org/r/CAFXwXrkgu=4Qn-v1PjnOR4SG0oUb9LSa0g6QXpBq4ttm52pJOQ@mail.gmail.com [1] Link: https://lkml.kernel.org/r/20250103140148.370368-1-glaubitz@physik.fu-berlin.de [2] Signed-off-by: Eric W. Biederman Reviewed-by: Richard Henderson Reviewed-by: Arnd Bergmann Reviewed-by: John Paul Adrian Glaubitz Tested-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/87y0zfs26i.fsf_-_@email.froward.int.ebiederm.org Signed-off-by: Kees Cook --- arch/alpha/include/asm/elf.h | 6 +----- arch/alpha/include/asm/pgtable.h | 2 +- arch/alpha/include/asm/processor.h | 8 ++------ arch/alpha/kernel/osf_sys.c | 11 ++--------- 4 files changed, 6 insertions(+), 21 deletions(-) diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h index 4d7c46f50382e..50c82187e60ec 100644 --- a/arch/alpha/include/asm/elf.h +++ b/arch/alpha/include/asm/elf.h @@ -74,7 +74,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; /* * This is used to ensure we don't load something for the wrong architecture. */ -#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA) +#define elf_check_arch(x) (((x)->e_machine == EM_ALPHA) && !((x)->e_flags & EF_ALPHA_32BIT)) /* * These are used to set parameters in the core dumps. @@ -137,10 +137,6 @@ extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task); : amask (AMASK_CIX) ? "ev6" : "ev67"); \ }) -#define SET_PERSONALITY(EX) \ - set_personality(((EX).e_flags & EF_ALPHA_32BIT) \ - ? PER_LINUX_32BIT : PER_LINUX) - extern int alpha_l1i_cacheshape; extern int alpha_l1d_cacheshape; extern int alpha_l2_cacheshape; diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 635f0a5f5bbde..02e8817a89212 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -360,7 +360,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) extern void paging_init(void); -/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */ +/* We have our own get_unmapped_area */ #define HAVE_ARCH_UNMAPPED_AREA #endif /* _ALPHA_PGTABLE_H */ diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 55bb1c09fd39d..5dce5518a2111 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -8,23 +8,19 @@ #ifndef __ASM_ALPHA_PROCESSOR_H #define __ASM_ALPHA_PROCESSOR_H -#include /* for ADDR_LIMIT_32BIT */ - /* * We have a 42-bit user address space: 4TB user VM... */ #define TASK_SIZE (0x40000000000UL) -#define STACK_TOP \ - (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL) +#define STACK_TOP (0x00120000000UL) #define STACK_TOP_MAX 0x00120000000UL /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE \ - ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2) +#define TASK_UNMAPPED_BASE (TASK_SIZE / 2) /* This is dead. Everything has been moved to thread_info. */ struct thread_struct { }; diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index c0424de9e7cda..077a1407be6d7 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1211,8 +1211,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) return ret; } -/* Get an address range which is currently unmapped. Similar to the - generic version except that we know how to honor ADDR_LIMIT_32BIT. */ +/* Get an address range which is currently unmapped. */ static unsigned long arch_get_unmapped_area_1(unsigned long addr, unsigned long len, @@ -1231,13 +1230,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags) { - unsigned long limit; - - /* "32 bit" actually means 31 bit, since pointers sign extend. */ - if (current->personality & ADDR_LIMIT_32BIT) - limit = 0x80000000; - else - limit = TASK_SIZE; + unsigned long limit = TASK_SIZE; if (len > limit) return -ENOMEM; -- GitLab From 7507eb3e7bfac7c3baef8dd377fdf5871eefd42b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 31 Jan 2025 17:29:13 +0200 Subject: [PATCH 0327/1585] PCI/ASPM: Fix L1SS saving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1db806ec06b7 ("PCI/ASPM: Save parent L1SS config in pci_save_aspm_l1ss_state()") aimed to perform L1SS config save for both the Upstream Port and its upstream bridge when handling an Upstream Port, which matches what the L1SS restore side does. However, parent->state_saved can be set true at an earlier time when the upstream bridge saved other parts of its state. Then later when attempting to save the L1SS config while handling the Upstream Port, parent->state_saved is true in pci_save_aspm_l1ss_state() resulting in early return and skipping saving bridge's L1SS config because it is assumed to be already saved. Later on restore, junk is written into L1SS config which causes issues with some devices. Remove parent->state_saved check and unconditionally save L1SS config also for the upstream bridge from an Upstream Port which ought to be harmless from correctness point of view. With the Upstream Port check now present, saving the L1SS config more than once for the bridge is no longer a problem (unlike when the parent->state_saved check got introduced into the fix during its development). Link: https://lore.kernel.org/r/20250131152913.2507-1-ilpo.jarvinen@linux.intel.com Fixes: 1db806ec06b7 ("PCI/ASPM: Save parent L1SS config in pci_save_aspm_l1ss_state()") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219731 Reported-by: Niklāvs Koļesņikovs Reported by: Rafael J. Wysocki Closes: https://lore.kernel.org/r/CAJZ5v0iKmynOQ5vKSQbg1J_FmavwZE-nRONovOZ0mpMVauheWg@mail.gmail.com Reported-by: Paul Menzel Closes: https://lore.kernel.org/r/d7246feb-4f3f-4d0c-bb64-89566b170671@molgen.mpg.de Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Tested-by: Niklāvs Koļesņikovs Tested-by: Paul Menzel # Dell XPS 13 9360 --- drivers/pci/pcie/aspm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index e0bc90597dcad..da3e7edcf49d9 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -108,9 +108,6 @@ void pci_save_aspm_l1ss_state(struct pci_dev *pdev) pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL2, cap++); pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, cap++); - if (parent->state_saved) - return; - /* * Save parent's L1 substate configuration so we have it for * pci_restore_aspm_l1ss_state(pdev) to restore. -- GitLab From 57e4a9bd61c308f607bc3e55e8fa02257b06b552 Mon Sep 17 00:00:00 2001 From: Meetakshi Setiya Date: Thu, 6 Feb 2025 01:50:41 -0500 Subject: [PATCH 0328/1585] smb: client: change lease epoch type from unsigned int to __u16 MS-SMB2 section 2.2.13.2.10 specifies that 'epoch' should be a 16-bit unsigned integer used to track lease state changes. Change the data type of all instances of 'epoch' from unsigned int to __u16. This simplifies the epoch change comparisons and makes the code more compliant with the protocol spec. Cc: stable@vger.kernel.org Signed-off-by: Meetakshi Setiya Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 14 +++++++------- fs/smb/client/smb1ops.c | 2 +- fs/smb/client/smb2ops.c | 18 +++++++++--------- fs/smb/client/smb2pdu.c | 2 +- fs/smb/client/smb2proto.h | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index a68434ad744ae..ac1f890a0d543 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -357,7 +357,7 @@ struct smb_version_operations { int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache); + __u16 epoch, bool *purge_cache); /* process transaction2 response */ bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, char *, int); @@ -552,12 +552,12 @@ struct smb_version_operations { /* if we can do cache read operations */ bool (*is_read_op)(__u32); /* set oplock level for the inode */ - void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int, - bool *); + void (*set_oplock_level)(struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch, + bool *purge_cache); /* create lease context buffer for CREATE request */ char * (*create_lease_buf)(u8 *lease_key, u8 oplock); /* parse lease context buffer and return oplock/epoch info */ - __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey); + __u8 (*parse_lease_buf)(void *buf, __u16 *epoch, char *lkey); ssize_t (*copychunk_range)(const unsigned int, struct cifsFileInfo *src_file, struct cifsFileInfo *target_file, @@ -1447,7 +1447,7 @@ struct cifs_fid { __u8 create_guid[16]; __u32 access; struct cifs_pending_open *pending_open; - unsigned int epoch; + __u16 epoch; #ifdef CONFIG_CIFS_DEBUG2 __u64 mid; #endif /* CIFS_DEBUG2 */ @@ -1480,7 +1480,7 @@ struct cifsFileInfo { bool oplock_break_cancelled:1; bool status_file_deleted:1; /* file has been deleted */ bool offload:1; /* offload final part of _put to a wq */ - unsigned int oplock_epoch; /* epoch from the lease break */ + __u16 oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; spinlock_t file_info_lock; /* protects four flag/count fields above */ @@ -1577,7 +1577,7 @@ struct cifsInodeInfo { spinlock_t open_file_lock; /* protects openFileList */ __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ unsigned int oplock; /* oplock/lease level we have */ - unsigned int epoch; /* used to track lease state changes */ + __u16 epoch; /* used to track lease state changes */ #define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */ #define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */ #define CIFS_INODE_FLAG_UNUSED (2) /* Unused flag */ diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 9756b876a75e1..d6e2fb669c401 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -377,7 +377,7 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) static void cifs_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { cifs_set_oplock_level(cinode, oplock); } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 77309217dab45..ec36bed54b0b9 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3904,22 +3904,22 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, static void smb2_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { server->ops->set_oplock_level(cinode, oplock, 0, NULL); } static void smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache); + __u16 epoch, bool *purge_cache); static void smb3_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { unsigned int old_state = cinode->oplock; - unsigned int old_epoch = cinode->epoch; + __u16 old_epoch = cinode->epoch; unsigned int new_state; if (epoch > old_epoch) { @@ -3939,7 +3939,7 @@ smb3_downgrade_oplock(struct TCP_Server_Info *server, static void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { oplock &= 0xFF; cinode->lease_granted = false; @@ -3963,7 +3963,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, static void smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { char message[5] = {0}; unsigned int new_oplock = 0; @@ -4000,7 +4000,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, static void smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { unsigned int old_oplock = cinode->oplock; @@ -4114,7 +4114,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock) } static __u8 -smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) +smb2_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key) { struct create_lease *lc = (struct create_lease *)buf; @@ -4125,7 +4125,7 @@ smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) } static __u8 -smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) +smb3_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key) { struct create_lease_v2 *lc = (struct create_lease_v2 *)buf; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 78395195e0165..ed7812247ebc0 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2329,7 +2329,7 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, int smb2_parse_contexts(struct TCP_Server_Info *server, struct kvec *rsp_iov, - unsigned int *epoch, + __u16 *epoch, char *lease_key, __u8 *oplock, struct smb2_file_all_info *buf, struct create_posix_rsp *posix) diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 2336dfb23f363..4662c7e2d259c 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -283,7 +283,7 @@ extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); int smb2_parse_contexts(struct TCP_Server_Info *server, struct kvec *rsp_iov, - unsigned int *epoch, + __u16 *epoch, char *lease_key, __u8 *oplock, struct smb2_file_all_info *buf, struct create_posix_rsp *posix); -- GitLab From a9c621a217128eb3fb7522cf763992d9437fd5ba Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Wed, 29 Jan 2025 14:50:02 -0700 Subject: [PATCH 0329/1585] rust: kbuild: add -fzero-init-padding-bits to bindgen_skip_cflags This seems to break the build when building with gcc15: Unable to generate bindings: ClangDiagnostic("error: unknown argument: '-fzero-init-padding-bits=all'\n") Thus skip that flag. Signed-off-by: Justin M. Forbes Fixes: dce4aab8441d ("kbuild: Use -fzero-init-padding-bits=all") Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20250129215003.1736127-1-jforbes@fedoraproject.org [ Slightly reworded commit. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/Makefile b/rust/Makefile index ff4343ca3f7c4..ff8a5e810d65e 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -240,6 +240,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ -fstrict-flex-arrays=% -fmin-function-alignment=% \ + -fzero-init-padding-bits=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. -- GitLab From 6f64b83d9fe9729000a0616830cb1606945465d8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 5 Feb 2025 12:52:13 +0000 Subject: [PATCH 0330/1585] PCI/TPH: Restore TPH Requester Enable correctly When we reenable TPH after changing a Steering Tag value, we need the actual TPH Requester Enable value, not the ST Mode (which only happens to work out by chance for non-extended TPH in interrupt vector mode). Link: https://lore.kernel.org/r/13118098116d7bce07aa20b8c52e28c7d1847246.1738759933.git.robin.murphy@arm.com Fixes: d2e8a34876ce ("PCI/TPH: Add Steering Tag support") Signed-off-by: Robin Murphy Signed-off-by: Bjorn Helgaas Reviewed-by: Wei Huang --- drivers/pci/tph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 1e604fbbda657..07de59ca2ebfa 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -360,7 +360,7 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) return err; } - set_ctrl_reg_req_en(pdev, pdev->tph_mode); + set_ctrl_reg_req_en(pdev, pdev->tph_req_type); pci_dbg(pdev, "set steering tag: %s table, index=%d, tag=%#04x\n", (loc == PCI_TPH_LOC_MSIX) ? "MSI-X" : "ST", index, tag); -- GitLab From 0e446e3145011b8fe39759b59bd69d39fb47cfeb Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Wed, 22 Jan 2025 00:14:43 +0000 Subject: [PATCH 0331/1585] rust: kbuild: do not export generated KASAN ODR symbols ASAN generates special synthetic symbols to help check for ODR violations. These synthetic symbols lack debug information, so gendwarfksyms emits warnings when processing them. No code should ever have a dependency on these symbols, so we should not be exporting them, just like the __cfi symbols. Signed-off-by: Matthew Maurer Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250122-gendwarfksyms-kasan-rust-v1-1-5ee5658f4fb6@google.com [ Fixed typo in commit message. Slightly reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index ff8a5e810d65e..ea3849eb78f65 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -332,7 +332,7 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE $(call if_changed_dep,bindgen) -rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ { printf $(2),$$3 }' +rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ && $$3!~/__odr_asan/ { printf $(2),$$3 }' quiet_cmd_exports = EXPORTS $@ cmd_exports = \ -- GitLab From 3ace20038e19f23fe73259513f1f08d4bf1a3c83 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:20 +0000 Subject: [PATCH 0332/1585] cpufreq/amd-pstate: Fix cpufreq_policy ref counting amd_pstate_update_limits() takes a cpufreq_policy reference but doesn't decrement the refcount in one of the exit paths, fix that. Fixes: 45722e777fd9 ("cpufreq: amd-pstate: Optimize amd_pstate_update_limits()") Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250205112523.201101-10-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9dc3933bc3261..313550fa62d41 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -821,20 +821,21 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) static void amd_pstate_update_limits(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata; u32 prev_high = 0, cur_high = 0; int ret; bool highest_perf_changed = false; + if (!amd_pstate_prefcore) + return; + + policy = cpufreq_cpu_get(cpu); if (!policy) return; cpudata = policy->driver_data; - if (!amd_pstate_prefcore) - return; - guard(mutex)(&amd_pstate_driver_lock); ret = amd_get_highest_perf(cpu, &cur_high); -- GitLab From bb5408801a5f2ecd76b61dcd539a5c466ebaac4c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 4 Feb 2025 09:45:13 -0800 Subject: [PATCH 0333/1585] stackinit: Keep selftest union size small on m68k The stack frame on m68k is very sensitive to the size of what needs to be stored. Like done for long string testing, reduce the size of the large trailing struct in the union initialization testing. Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdXW8VbtOAixO7w+aDOG70aZtZ50j1Ybcr8B3eYnRUcrcA@mail.gmail.com Fixes: e71a29db79da ("stackinit: Add union initialization to selftests") Link: https://lore.kernel.org/r/20250204174509.work.711-kees@kernel.org Signed-off-by: Kees Cook Tested-by: Geert Uytterhoeven --- lib/stackinit_kunit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index fbe910c9c8253..967b345a98fd8 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -75,8 +75,10 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, */ #ifdef CONFIG_M68K #define FILL_SIZE_STRING 8 +#define FILL_SIZE_ARRAY 2 #else #define FILL_SIZE_STRING 16 +#define FILL_SIZE_ARRAY 8 #endif #define INIT_CLONE_SCALAR /**/ @@ -345,7 +347,7 @@ union test_small_start { short three; unsigned long four; struct big_struct { - unsigned long array[8]; + unsigned long array[FILL_SIZE_ARRAY]; } big; }; -- GitLab From 78bba6097b9318f4aa645afeade14024af86af4e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Feb 2025 15:34:07 +0100 Subject: [PATCH 0334/1585] stackinit: Fix comment for test_small_end In union test_small_end, the small members are three and four. Fixes: e71a29db79da1946 ("stackinit: Add union initialization to selftests") Closes: https://lore.kernel.org/CAMuHMdWvcKOc6v5o3-9-SqP_4oh5-GZQjZZb=-krhY=mVRED_Q@mail.gmail.com Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/3f8faa2d7d0d6b36571093ab0fb1fd5157abd7bb.1738593178.git.geert+renesas@glider.be Signed-off-by: Kees Cook --- lib/stackinit_kunit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index 967b345a98fd8..135322592faf8 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -351,7 +351,7 @@ union test_small_start { } big; }; -/* Mismatched sizes, with one and two being small */ +/* Mismatched sizes, with three and four being small */ union test_small_end { short one; unsigned long two; -- GitLab From 0db6b7d49b50c037b5eac19a8d8d1da986db80c6 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 3 Feb 2025 20:04:44 +0200 Subject: [PATCH 0335/1585] MAINTAINERS: wifi: ath: remove Kalle I'm stepping down as ath10k, ath11k and ath12k maintainer so remove me from MAINTAINERS file and Device Tree bindings. Jeff continues as the maintainer. As my quicinc.com email will not work anymore so add an entry to .mailmap file to direct the mail to my kernel.org address. Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20250203180445.1429640-1-kvalo@kernel.org --- .mailmap | 1 + .../devicetree/bindings/net/wireless/qcom,ath10k.yaml | 1 - .../devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml | 1 - .../devicetree/bindings/net/wireless/qcom,ath11k.yaml | 1 - .../devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml | 1 - .../devicetree/bindings/net/wireless/qcom,ath12k.yaml | 1 - MAINTAINERS | 4 ---- 7 files changed, 1 insertion(+), 9 deletions(-) diff --git a/.mailmap b/.mailmap index 42e42cabb36d7..68d36bf46dcb4 100644 --- a/.mailmap +++ b/.mailmap @@ -370,6 +370,7 @@ Juha Yrjola Julien Thierry Iskren Chernev Kalle Valo +Kalle Valo Kalyan Thota Karthikeyan Periyasamy Kathiravan T diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml index 070c4c9b86437..aace072e2d52a 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies ath10k wireless devices maintainers: - - Kalle Valo - Jeff Johnson description: diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml index a71fdf05bc1ea..a4425cf196aba 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml @@ -8,7 +8,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies ath11k wireless devices (PCIe) maintainers: - - Kalle Valo - Jeff Johnson description: | diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml index ff5763dc66a88..a69ffb7b3cb88 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml @@ -8,7 +8,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies ath11k wireless devices maintainers: - - Kalle Valo - Jeff Johnson description: | diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml index cbfb559f6b69b..318f305405e3b 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml @@ -9,7 +9,6 @@ title: Qualcomm Technologies ath12k wireless devices (PCIe) with WSI interface maintainers: - Jeff Johnson - - Kalle Valo description: | Qualcomm Technologies IEEE 802.11be PCIe devices with WSI interface. diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml index 1b5884015b15b..9e557cb838c7a 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml @@ -9,7 +9,6 @@ title: Qualcomm Technologies ath12k wireless devices (PCIe) maintainers: - Jeff Johnson - - Kalle Valo description: Qualcomm Technologies IEEE 802.11be PCIe devices. diff --git a/MAINTAINERS b/MAINTAINERS index 5bcc78c0be70b..2048c75c3c428 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3631,7 +3631,6 @@ F: Documentation/devicetree/bindings/phy/phy-ath79-usb.txt F: drivers/phy/qualcomm/phy-ath79-usb.c ATHEROS ATH GENERIC UTILITIES -M: Kalle Valo M: Jeff Johnson L: linux-wireless@vger.kernel.org S: Supported @@ -19192,7 +19191,6 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/tuners/qt1010* QUALCOMM ATH12K WIRELESS DRIVER -M: Kalle Valo M: Jeff Johnson L: ath12k@lists.infradead.org S: Supported @@ -19202,7 +19200,6 @@ F: drivers/net/wireless/ath/ath12k/ N: ath12k QUALCOMM ATHEROS ATH10K WIRELESS DRIVER -M: Kalle Valo M: Jeff Johnson L: ath10k@lists.infradead.org S: Supported @@ -19212,7 +19209,6 @@ F: drivers/net/wireless/ath/ath10k/ N: ath10k QUALCOMM ATHEROS ATH11K WIRELESS DRIVER -M: Kalle Valo M: Jeff Johnson L: ath11k@lists.infradead.org S: Supported -- GitLab From b76adb9758f8eaaf22b824d0bcdd694551ce0557 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 3 Feb 2025 20:04:45 +0200 Subject: [PATCH 0336/1585] MAINTAINERS: wifi: remove Kalle I'm stepping down as wireless driver maintainer. Johannes kindly voluntereed to be the "custodian"[1] for the drivers until a better solution is found. Link: https://lore.kernel.org/all/21896d2788b8bc6c7fcb534cd43e75671a57f494.camel@sipsolutions.net/ [1] Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20250203180445.1429640-2-kvalo@kernel.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2048c75c3c428..6401e59ec2cc0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16341,7 +16341,7 @@ X: drivers/net/can/ X: drivers/net/wireless/ NETWORKING DRIVERS (WIRELESS) -M: Kalle Valo +M: Johannes Berg L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/ -- GitLab From cf6cb56ef24410fb5308f9655087f1eddf4452e6 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sun, 2 Feb 2025 08:29:20 -0800 Subject: [PATCH 0337/1585] seccomp: passthrough uretprobe systemcall without filtering When attaching uretprobes to processes running inside docker, the attached process is segfaulted when encountering the retprobe. The reason is that now that uretprobe is a system call the default seccomp filters in docker block it as they only allow a specific set of known syscalls. This is true for other userspace applications which use seccomp to control their syscall surface. Since uretprobe is a "kernel implementation detail" system call which is not used by userspace application code directly, it is impractical and there's very little point in forcing all userspace applications to explicitly allow it in order to avoid crashing tracked processes. Pass this systemcall through seccomp without depending on configuration. Note: uretprobe is currently only x86_64 and isn't expected to ever be supported in i386. Fixes: ff474a78cef5 ("uprobe: Add uretprobe syscall to speed up return probe") Reported-by: Rafael Buchbinder Closes: https://lore.kernel.org/lkml/CAHsH6Gs3Eh8DFU0wq58c_LF8A4_+o6z456J7BidmcVY2AqOnHQ@mail.gmail.com/ Link: https://lore.kernel.org/lkml/20250121182939.33d05470@gandalf.local.home/T/#me2676c378eff2d6a33f3054fed4a5f3afa64e65b Link: https://lore.kernel.org/lkml/20250128145806.1849977-1-eyal.birger@gmail.com/ Cc: stable@vger.kernel.org Signed-off-by: Eyal Birger Link: https://lore.kernel.org/r/20250202162921.335813-2-eyal.birger@gmail.com [kees: minimized changes for easier backporting, tweaked commit log] Signed-off-by: Kees Cook --- kernel/seccomp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index f59381c4a2ffb..7bbb408431ebc 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -749,6 +749,15 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog, if (WARN_ON_ONCE(!fprog)) return false; + /* Our single exception to filtering. */ +#ifdef __NR_uretprobe +#ifdef SECCOMP_ARCH_COMPAT + if (sd->arch == SECCOMP_ARCH_NATIVE) +#endif + if (sd->nr == __NR_uretprobe) + return true; +#endif + for (pc = 0; pc < fprog->len; pc++) { struct sock_filter *insn = &fprog->filter[pc]; u16 code = insn->code; @@ -1023,6 +1032,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, */ static const int mode1_syscalls[] = { __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn, +#ifdef __NR_uretprobe + __NR_uretprobe, +#endif -1, /* negative terminated */ }; -- GitLab From c2debdb8544f415eaf9292a866d4073912eeb561 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sun, 2 Feb 2025 08:29:21 -0800 Subject: [PATCH 0338/1585] selftests/seccomp: validate uretprobe syscall passes through seccomp The uretprobe syscall is implemented as a performance enhancement on x86_64 by having the kernel inject a call to it on function exit; User programs cannot call this system call explicitly. As such, this syscall is considered a kernel implementation detail and should not be filtered by seccomp. Enhance the seccomp bpf test suite to check that uretprobes can be attached to processes without the killing the process regardless of seccomp policy. Signed-off-by: Eyal Birger Link: https://lore.kernel.org/r/20250202162921.335813-3-eyal.birger@gmail.com [kees: Skip archs without __NR_uretprobe] Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 8c3a73461475b..14ba51b52095a 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -68,6 +69,10 @@ # define PR_SET_PTRACER 0x59616d61 #endif +#ifndef noinline +#define noinline __attribute__((noinline)) +#endif + #ifndef PR_SET_NO_NEW_PRIVS #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 @@ -4888,6 +4893,200 @@ TEST(tsync_vs_dead_thread_leader) EXPECT_EQ(0, status); } +noinline int probed(void) +{ + return 1; +} + +static int parse_uint_from_file(const char *file, const char *fmt) +{ + int err = -1, ret; + FILE *f; + + f = fopen(file, "re"); + if (f) { + err = fscanf(f, fmt, &ret); + fclose(f); + } + return err == 1 ? ret : err; +} + +static int determine_uprobe_perf_type(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/type"; + + return parse_uint_from_file(file, "%d\n"); +} + +static int determine_uprobe_retprobe_bit(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; + + return parse_uint_from_file(file, "config:%d\n"); +} + +static ssize_t get_uprobe_offset(const void *addr) +{ + size_t start, base, end; + bool found = false; + char buf[256]; + FILE *f; + + f = fopen("/proc/self/maps", "r"); + if (!f) + return -1; + + while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { + if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { + found = true; + break; + } + } + fclose(f); + return found ? (uintptr_t)addr - start + base : -1; +} + +FIXTURE(URETPROBE) { + int fd; +}; + +FIXTURE_VARIANT(URETPROBE) { + /* + * All of the URETPROBE behaviors can be tested with either + * uretprobe attached or not + */ + bool attach; +}; + +FIXTURE_VARIANT_ADD(URETPROBE, attached) { + .attach = true, +}; + +FIXTURE_VARIANT_ADD(URETPROBE, not_attached) { + .attach = false, +}; + +FIXTURE_SETUP(URETPROBE) +{ + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; + ssize_t offset; + int type, bit; + +#ifndef __NR_uretprobe + SKIP(return, "__NR_uretprobe syscall not defined"); +#endif + + if (!variant->attach) + return; + + memset(&attr, 0, attr_sz); + + type = determine_uprobe_perf_type(); + ASSERT_GE(type, 0); + bit = determine_uprobe_retprobe_bit(); + ASSERT_GE(bit, 0); + offset = get_uprobe_offset(probed); + ASSERT_GE(offset, 0); + + attr.config |= 1 << bit; + attr.size = attr_sz; + attr.type = type; + attr.config1 = ptr_to_u64("/proc/self/exe"); + attr.config2 = offset; + + self->fd = syscall(__NR_perf_event_open, &attr, + getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */, + PERF_FLAG_FD_CLOEXEC); +} + +FIXTURE_TEARDOWN(URETPROBE) +{ + /* we could call close(self->fd), but we'd need extra filter for + * that and since we are calling _exit right away.. + */ +} + +static int run_probed_with_filter(struct sock_fprog *prog) +{ + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || + seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) { + return -1; + } + + probed(); + return 0; +} + +TEST_F(URETPROBE, uretprobe_default_allow) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + +TEST_F(URETPROBE, uretprobe_default_block) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + +TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), +#ifdef __NR_uretprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1), +#endif + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + +TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), +#ifdef __NR_uretprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0), +#endif + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + /* * TODO: * - expand NNP testing -- GitLab From 6273a058383e05465083b535ed9469f2c8a48321 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 3 Feb 2025 08:40:57 +0000 Subject: [PATCH 0339/1585] x86: rust: set rustc-abi=x86-softfloat on rustc>=1.86.0 When using Rust on the x86 architecture, we are currently using the unstable target.json feature to specify the compilation target. Rustc is going to change how softfloat is specified in the target.json file on x86, thus update generate_rust_target.rs to specify softfloat using the new option. Note that if you enable this parameter with a compiler that does not recognize it, then that triggers a warning but it does not break the build. [ For future reference, this solves the following error: RUSTC L rust/core.o error: Error loading target specification: target feature `soft-float` is incompatible with the ABI but gets enabled in target spec. Run `rustc --print target-list` for a list of built-in targets - Miguel ] Cc: # Needed in 6.12.y and 6.13.y only (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust/pull/136146 Signed-off-by: Alice Ryhl Acked-by: Dave Hansen # for x86 Link: https://lore.kernel.org/r/20250203-rustc-1-86-x86-softfloat-v1-1-220a72a5003e@google.com [ Added 6.13.y too to Cc: stable tag and added reasoning to avoid over-backporting. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_target.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 0d00ac3723b5e..4fd6b6ab3e329 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -165,6 +165,18 @@ impl KernelConfig { let option = "CONFIG_".to_owned() + option; self.0.contains_key(&option) } + + /// Is the rustc version at least `major.minor.patch`? + fn rustc_version_atleast(&self, major: u32, minor: u32, patch: u32) -> bool { + let check_version = 100000 * major + 100 * minor + patch; + let actual_version = self + .0 + .get("CONFIG_RUSTC_VERSION") + .unwrap() + .parse::() + .unwrap(); + check_version <= actual_version + } } fn main() { @@ -182,6 +194,9 @@ fn main() { } } else if cfg.has("X86_64") { ts.push("arch", "x86_64"); + if cfg.rustc_version_atleast(1, 86, 0) { + ts.push("rustc-abi", "x86-softfloat"); + } ts.push( "data-layout", "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", @@ -215,6 +230,9 @@ fn main() { panic!("32-bit x86 only works under UML"); } ts.push("arch", "x86"); + if cfg.rustc_version_atleast(1, 86, 0) { + ts.push("rustc-abi", "x86-softfloat"); + } ts.push( "data-layout", "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", -- GitLab From 482ad2a4ace2740ca0ff1cbc8f3c7f862f3ab507 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:09 +0000 Subject: [PATCH 0340/1585] net: add dev_net_rcu() helper dev->nd_net can change, readers should either use rcu_read_lock() or RTNL. We currently use a generic helper, dev_net() with no debugging support. We probably have many hidden bugs. Add dev_net_rcu() helper for callers using rcu_read_lock() protection. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 6 ++++++ include/net/net_namespace.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 03bb584c62cf8..c0a86afb85daa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2663,6 +2663,12 @@ struct net *dev_net(const struct net_device *dev) return read_pnet(&dev->nd_net); } +static inline +struct net *dev_net_rcu(const struct net_device *dev) +{ + return read_pnet_rcu(&dev->nd_net); +} + static inline void dev_net_set(struct net_device *dev, struct net *net) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0f5eb9db0c626..7ba1402ca7796 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -398,7 +398,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif } -static inline struct net *read_pnet_rcu(possible_net_t *pnet) +static inline struct net *read_pnet_rcu(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS return rcu_dereference(pnet->net); -- GitLab From 469308552ca4560176cfc100e7ca84add1bebd7c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:10 +0000 Subject: [PATCH 0341/1585] ipv4: add RCU protection to ip4_dst_hoplimit() ip4_dst_hoplimit() must use RCU protection to make sure the net structure it reads does not disappear. Fixes: fa50d974d104 ("ipv4: Namespaceify ip_default_ttl sysctl knob") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index f86775be3e293..c605fd5ec0c08 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -382,10 +382,15 @@ static inline int inet_iif(const struct sk_buff *skb) static inline int ip4_dst_hoplimit(const struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); - struct net *net = dev_net(dst->dev); - if (hoplimit == 0) + if (hoplimit == 0) { + const struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); + rcu_read_unlock(); + } return hoplimit; } -- GitLab From 071d8012869b6af352acca346ade13e7be90a49f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:11 +0000 Subject: [PATCH 0342/1585] ipv4: use RCU protection in ip_dst_mtu_maybe_forward() ip_dst_mtu_maybe_forward() must use RCU protection to make sure the net structure it reads does not disappear. Fixes: f87c10a8aa1e8 ("ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 9f5e33e371fcd..ba7b43447775e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -471,9 +471,12 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { const struct rtable *rt = dst_rtable(dst); - struct net *net = dev_net(dst->dev); - unsigned int mtu; + unsigned int mtu, res; + struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -497,7 +500,11 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, out: mtu = min_t(unsigned int, mtu, IP_MAX_MTU); - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + res = mtu - lwtunnel_headroom(dst->lwtstate, mtu); + + rcu_read_unlock(); + + return res; } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, -- GitLab From 71b8471c93fa0bcab911fcb65da1eb6c4f5f735f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:12 +0000 Subject: [PATCH 0343/1585] ipv4: use RCU protection in ipv4_default_advmss() ipv4_default_advmss() must use RCU protection to make sure the net structure it reads does not disappear. Fixes: 2e9589ff809e ("ipv4: Namespaceify min_adv_mss sysctl knob") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 577b88a43293a..74c074f45758b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1307,10 +1307,15 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { - struct net *net = dev_net(dst->dev); unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); - unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, - net->ipv4.ip_rt_min_advmss); + unsigned int advmss; + struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); + advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, + net->ipv4.ip_rt_min_advmss); + rcu_read_unlock(); return min(advmss, IPV4_MAX_PMTU - header_size); } -- GitLab From dd205fcc33d92d54eee4d7f21bb073af9bd5ce2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:13 +0000 Subject: [PATCH 0344/1585] ipv4: use RCU protection in rt_is_expired() rt_is_expired() must use RCU protection to make sure the net structure it reads does not disappear. Fixes: e84f84f27647 ("netns: place rt_genid into struct net") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 74c074f45758b..e959327c0ba89 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -390,7 +390,13 @@ static inline int ip_rt_proc_init(void) static inline bool rt_is_expired(const struct rtable *rth) { - return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev)); + bool res; + + rcu_read_lock(); + res = rth->rt_genid != rt_genid_ipv4(dev_net_rcu(rth->dst.dev)); + rcu_read_unlock(); + + return res; } void rt_cache_flush(struct net *net) -- GitLab From 719817cd293e4fa389e1f69c396f3f816ed5aa41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:14 +0000 Subject: [PATCH 0345/1585] ipv4: use RCU protection in inet_select_addr() inet_select_addr() must use RCU protection to make sure the net structure it reads does not disappear. Fixes: c4544c724322 ("[NETNS]: Process inet_select_addr inside a namespace.") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250205155120.1676781-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/devinet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c8b3cf5fba4c0..55b8151759bc9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1371,10 +1371,11 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) __be32 addr = 0; unsigned char localnet_scope = RT_SCOPE_HOST; struct in_device *in_dev; - struct net *net = dev_net(dev); + struct net *net; int master_idx; rcu_read_lock(); + net = dev_net_rcu(dev); in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto no_in_dev; -- GitLab From 139512191bd06f1b496117c76372b2ce372c9a41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:15 +0000 Subject: [PATCH 0346/1585] ipv4: use RCU protection in __ip_rt_update_pmtu() __ip_rt_update_pmtu() must use RCU protection to make sure the net structure it reads does not disappear. Fixes: 2fbc6e89b2f1 ("ipv4: Update exception handling for multipath routes via same device") Fixes: 1de6b15a434c ("Namespaceify min_pmtu sysctl") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250205155120.1676781-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e959327c0ba89..753704f75b2c6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1008,9 +1008,9 @@ out: kfree_skb_reason(skb, reason); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; - struct net *net = dev_net(dst->dev); struct fib_result res; bool lock = false; + struct net *net; u32 old_mtu; if (ip_mtu_locked(dst)) @@ -1020,6 +1020,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (old_mtu < mtu) return; + rcu_read_lock(); + net = dev_net_rcu(dst->dev); if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); @@ -1027,9 +1029,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) - return; + goto out; - rcu_read_lock(); if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh_common *nhc; @@ -1043,14 +1044,14 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); } - rcu_read_unlock(); - return; + goto out; } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); } +out: rcu_read_unlock(); } -- GitLab From 4b8474a0951e605d2a27a2c483da4eb4b8c63760 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:16 +0000 Subject: [PATCH 0347/1585] ipv4: icmp: convert to dev_net_rcu() __icmp_send() must ensure rcu_read_lock() is held, as spotted by Jakub. Other ICMP uses of dev_net() seem safe, change them to dev_net_rcu() to get LOCKDEP support. Fixes: dde1bc0e6f86 ("[NETNS]: Add namespace for ICMP replying code.") Closes: https://lore.kernel.org/netdev/20250203153633.46ce0337@kernel.org/ Reported-by: Jakub Kicinski Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250205155120.1676781-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 094084b61bff8..5482edb5aade2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -399,10 +399,10 @@ static void icmp_push_reply(struct sock *sk, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { - struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->dst.dev); + struct net *net = dev_net_rcu(rt->dst.dev); bool apply_ratelimit = false; + struct ipcm_cookie ipc; struct flowi4 fl4; struct sock *sk; struct inet_sock *inet; @@ -608,12 +608,14 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, struct sock *sk; if (!rt) - goto out; + return; + + rcu_read_lock(); if (rt->dst.dev) - net = dev_net(rt->dst.dev); + net = dev_net_rcu(rt->dst.dev); else if (skb_in->dev) - net = dev_net(skb_in->dev); + net = dev_net_rcu(skb_in->dev); else goto out; @@ -785,7 +787,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, icmp_xmit_unlock(sk); out_bh_enable: local_bh_enable(); -out:; +out: + rcu_read_unlock(); } EXPORT_SYMBOL(__icmp_send); @@ -834,7 +837,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) * avoid additional coding at protocol handlers. */ if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) { - __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS); return; } @@ -868,7 +871,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) struct net *net; u32 info = 0; - net = dev_net(skb_dst(skb)->dev); + net = dev_net_rcu(skb_dst(skb)->dev); /* * Incomplete header ? @@ -979,7 +982,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) static enum skb_drop_reason icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { - __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS); return SKB_DROP_REASON_PKT_TOO_SMALL; } @@ -1011,7 +1014,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) struct icmp_bxm icmp_param; struct net *net; - net = dev_net(skb_dst(skb)->dev); + net = dev_net_rcu(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET; @@ -1040,9 +1043,9 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) { + struct net *net = dev_net_rcu(skb->dev); struct icmp_ext_hdr *ext_hdr, _ext_hdr; struct icmp_ext_echo_iio *iio, _iio; - struct net *net = dev_net(skb->dev); struct inet6_dev *in6_dev; struct in_device *in_dev; struct net_device *dev; @@ -1181,7 +1184,7 @@ static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb) return SKB_NOT_DROPPED_YET; out_err: - __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS); return SKB_DROP_REASON_PKT_TOO_SMALL; } @@ -1198,7 +1201,7 @@ int icmp_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->dst.dev); + struct net *net = dev_net_rcu(rt->dst.dev); struct icmphdr *icmph; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { @@ -1371,9 +1374,9 @@ int icmp_err(struct sk_buff *skb, u32 info) struct iphdr *iph = (struct iphdr *)skb->data; int offset = iph->ihl<<2; struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); + struct net *net = dev_net_rcu(skb->dev); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code; - struct net *net = dev_net(skb->dev); /* * Use ping_err to handle all icmp errors except those -- GitLab From afec62cd0a4191cde6dd3a75382be4d51a38ce9b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:17 +0000 Subject: [PATCH 0348/1585] flow_dissector: use RCU protection to fetch dev_net() __skb_flow_dissect() can be called from arbitrary contexts. It must extend its RCU protection section to include the call to dev_net(), which can become dev_net_rcu(). This makes sure the net structure can not disappear under us. Fixes: 9b52e3f267a6 ("flow_dissector: handle no-skb use case") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-10-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/flow_dissector.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0e638a37aa096..5db41bf2ed93e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1108,10 +1108,12 @@ bool __skb_flow_dissect(const struct net *net, FLOW_DISSECTOR_KEY_BASIC, target_container); + rcu_read_lock(); + if (skb) { if (!net) { if (skb->dev) - net = dev_net(skb->dev); + net = dev_net_rcu(skb->dev); else if (skb->sk) net = sock_net(skb->sk); } @@ -1122,7 +1124,6 @@ bool __skb_flow_dissect(const struct net *net, enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; struct bpf_prog_array *run_array; - rcu_read_lock(); run_array = rcu_dereference(init_net.bpf.run_array[type]); if (!run_array) run_array = rcu_dereference(net->bpf.run_array[type]); @@ -1150,17 +1151,17 @@ bool __skb_flow_dissect(const struct net *net, prog = READ_ONCE(run_array->items[0].prog); result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); - if (result == BPF_FLOW_DISSECTOR_CONTINUE) - goto dissect_continue; - __skb_flow_bpf_to_target(&flow_keys, flow_dissector, - target_container); - rcu_read_unlock(); - return result == BPF_OK; + if (result != BPF_FLOW_DISSECTOR_CONTINUE) { + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, + target_container); + rcu_read_unlock(); + return result == BPF_OK; + } } -dissect_continue: - rcu_read_unlock(); } + rcu_read_unlock(); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); -- GitLab From 3c8ffcd248da34fc41e52a46e51505900115fc2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:18 +0000 Subject: [PATCH 0349/1585] ipv6: use RCU protection in ip6_default_advmss() ip6_default_advmss() needs rcu protection to make sure the net structure it reads does not disappear. Fixes: 5578689a4e3c ("[NETNS][IPV6] route6 - make route6 per namespace") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-11-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 78362822b9070..ef2d23a1e3d53 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3196,13 +3196,18 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; unsigned int mtu = dst_mtu(dst); - struct net *net = dev_net(dev); + struct net *net; mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + rcu_read_lock(); + + net = dev_net_rcu(dev); if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) mtu = net->ipv6.sysctl.ip6_rt_min_advmss; + rcu_read_unlock(); + /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. -- GitLab From 34aef2b0ce3aa4eb4ef2e1f5cad3738d527032f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:19 +0000 Subject: [PATCH 0350/1585] ipv6: icmp: convert to dev_net_rcu() icmp6_send() must acquire rcu_read_lock() sooner to ensure the dev_net() call done from a safe context. Other ICMPv6 uses of dev_net() seem safe, change them to dev_net_rcu() to get LOCKDEP support to catch bugs. Fixes: 9a43b709a230 ("[NETNS][IPV6] icmp6 - make icmpv6_socket per namespace") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250205155120.1676781-12-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/icmp.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a6984a29fdb9d..4d14ab7f7e99f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -76,7 +76,7 @@ static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); @@ -473,7 +473,10 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (!skb->dev) return; - net = dev_net(skb->dev); + + rcu_read_lock(); + + net = dev_net_rcu(skb->dev); mark = IP6_REPLY_MARK(net, skb->mark); /* * Make sure we respect the rules @@ -496,7 +499,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && (opt_unrec(skb, info)))) - return; + goto out; saddr = NULL; } @@ -526,7 +529,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); - return; + goto out; } /* @@ -535,7 +538,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (is_ineligible(skb)) { net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); - return; + goto out; } /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ @@ -582,7 +585,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) - goto out; + goto out_unlock; tmp_hdr.icmp6_type = type; tmp_hdr.icmp6_code = code; @@ -600,7 +603,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) - goto out; + goto out_unlock; ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); @@ -616,7 +619,6 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, goto out_dst_release; } - rcu_read_lock(); idev = __in6_dev_get(skb->dev); if (ip6_append_data(sk, icmpv6_getfrag, &msg, @@ -630,13 +632,15 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); } - rcu_read_unlock(); + out_dst_release: dst_release(dst); -out: +out_unlock: icmpv6_xmit_unlock(sk); out_bh_enable: local_bh_enable(); +out: + rcu_read_unlock(); } EXPORT_SYMBOL(icmp6_send); @@ -679,8 +683,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, skb_pull(skb2, nhs); skb_reset_network_header(skb2); - rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, - skb, 0); + rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr, + NULL, 0, skb, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; @@ -717,7 +721,7 @@ EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); struct sock *sk; struct inet6_dev *idev; struct ipv6_pinfo *np; @@ -832,7 +836,7 @@ enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { struct inet6_skb_parm *opt = IP6CB(skb); - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); const struct inet6_protocol *ipprot; enum skb_drop_reason reason; int inner_offset; @@ -889,7 +893,7 @@ enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, static int icmpv6_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; @@ -921,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); + __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -939,7 +943,7 @@ static int icmpv6_rcv(struct sk_buff *skb) type = hdr->icmp6_type; - ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); + ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: @@ -1034,9 +1038,9 @@ static int icmpv6_rcv(struct sk_buff *skb) csum_error: reason = SKB_DROP_REASON_ICMP_CSUM; - __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); + __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: - __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb_reason(skb, reason); return 0; -- GitLab From b768294d449da6d7dc0667c1ec92dc4af6ef766b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:20 +0000 Subject: [PATCH 0351/1585] ipv6: Use RCU in ip6_input() Instead of grabbing rcu_read_lock() from ip6_input_finish(), do it earlier in is caller, so that ip6_input() access to dev_net() can be validated by LOCKDEP. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250205155120.1676781-13-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_input.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 70c0e16c0ae68..39da6a7ce5f12 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -477,9 +477,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_clear_delivery_time(skb); - rcu_read_lock(); ip6_protocol_deliver_rcu(net, skb, 0, false); - rcu_read_unlock(); return 0; } @@ -487,9 +485,15 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk int ip6_input(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, - ip6_input_finish); + int res; + + rcu_read_lock(); + res = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, + dev_net_rcu(skb->dev), NULL, skb, skb->dev, NULL, + ip6_input_finish); + rcu_read_unlock(); + + return res; } EXPORT_SYMBOL_GPL(ip6_input); -- GitLab From 6a774228e890ee04a0ee13f4e6e731ec8554b9c2 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 5 Feb 2025 12:03:01 +0100 Subject: [PATCH 0352/1585] net: ethtool: tsconfig: Fix netlink type of hwtstamp flags Fix the netlink type for hardware timestamp flags, which are represented as a bitset of flags. Although only one flag is supported currently, the correct netlink bitset type should be used instead of u32 to keep consistency with other fields. Address this by adding a new named string set description for the hwtstamp flag structure. The code has been introduced in the current release so the uAPI change is still okay. Signed-off-by: Kory Maincent Fixes: 6e9e2eed4f39 ("net: ethtool: Add support for tsconfig command to get/set hwtstamp config") Link: https://patch.msgid.link/20250205110304.375086-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 3 ++- include/uapi/linux/ethtool.h | 2 ++ net/ethtool/common.c | 5 ++++ net/ethtool/common.h | 2 ++ net/ethtool/strset.c | 5 ++++ net/ethtool/tsconfig.c | 33 +++++++++++++++++------- 6 files changed, 39 insertions(+), 11 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 259cb211a3382..655d8d10fe248 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1524,7 +1524,8 @@ attribute-sets: nested-attributes: bitset - name: hwtstamp-flags - type: u32 + type: nest + nested-attributes: bitset operations: enum-model: directional diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d1089b88efc7d..9b18c4cfe56f8 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -682,6 +682,7 @@ enum ethtool_link_ext_substate_module { * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics * @ETH_SS_STATS_RMON: names of RMON statistics * @ETH_SS_STATS_PHY: names of PHY(dev) statistics + * @ETH_SS_TS_FLAGS: hardware timestamping flags * * @ETH_SS_COUNT: number of defined string sets */ @@ -708,6 +709,7 @@ enum ethtool_stringset { ETH_SS_STATS_ETH_CTRL, ETH_SS_STATS_RMON, ETH_SS_STATS_PHY, + ETH_SS_TS_FLAGS, /* add new constants above here */ ETH_SS_COUNT diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 2bd77c94f9f1a..d88e9080643b8 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -462,6 +462,11 @@ const char ts_rx_filter_names[][ETH_GSTRING_LEN] = { }; static_assert(ARRAY_SIZE(ts_rx_filter_names) == __HWTSTAMP_FILTER_CNT); +const char ts_flags_names[][ETH_GSTRING_LEN] = { + [const_ilog2(HWTSTAMP_FLAG_BONDED_PHC_INDEX)] = "bonded-phc-index", +}; +static_assert(ARRAY_SIZE(ts_flags_names) == __HWTSTAMP_FLAG_CNT); + const char udp_tunnel_type_names[][ETH_GSTRING_LEN] = { [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN] = "vxlan", [ETHTOOL_UDP_TUNNEL_TYPE_GENEVE] = "geneve", diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 850eadde4bfcc..58e9e7db06f90 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -13,6 +13,7 @@ ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT #define __SOF_TIMESTAMPING_CNT (const_ilog2(SOF_TIMESTAMPING_LAST) + 1) +#define __HWTSTAMP_FLAG_CNT (const_ilog2(HWTSTAMP_FLAG_LAST) + 1) struct link_mode_info { int speed; @@ -38,6 +39,7 @@ extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; extern const char ts_tx_type_names[][ETH_GSTRING_LEN]; extern const char ts_rx_filter_names[][ETH_GSTRING_LEN]; +extern const char ts_flags_names[][ETH_GSTRING_LEN]; extern const char udp_tunnel_type_names[][ETH_GSTRING_LEN]; int __ethtool_get_link(struct net_device *dev); diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 818cf01f09110..6b76c05caba4d 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -75,6 +75,11 @@ static const struct strset_info info_template[] = { .count = __HWTSTAMP_FILTER_CNT, .strings = ts_rx_filter_names, }, + [ETH_SS_TS_FLAGS] = { + .per_dev = false, + .count = __HWTSTAMP_FLAG_CNT, + .strings = ts_flags_names, + }, [ETH_SS_UDP_TUNNEL_TYPES] = { .per_dev = false, .count = __ETHTOOL_UDP_TUNNEL_TYPE_CNT, diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c index 9188e088fb2f9..2be356bdfe873 100644 --- a/net/ethtool/tsconfig.c +++ b/net/ethtool/tsconfig.c @@ -54,7 +54,7 @@ static int tsconfig_prepare_data(const struct ethnl_req_info *req_base, data->hwtst_config.tx_type = BIT(cfg.tx_type); data->hwtst_config.rx_filter = BIT(cfg.rx_filter); - data->hwtst_config.flags = BIT(cfg.flags); + data->hwtst_config.flags = cfg.flags; data->hwprov_desc.index = -1; hwprov = rtnl_dereference(dev->hwprov); @@ -91,10 +91,16 @@ static int tsconfig_reply_size(const struct ethnl_req_info *req_base, BUILD_BUG_ON(__HWTSTAMP_TX_CNT > 32); BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT > 32); + BUILD_BUG_ON(__HWTSTAMP_FLAG_CNT > 32); - if (data->hwtst_config.flags) - /* _TSCONFIG_HWTSTAMP_FLAGS */ - len += nla_total_size(sizeof(u32)); + if (data->hwtst_config.flags) { + ret = ethnl_bitset32_size(&data->hwtst_config.flags, + NULL, __HWTSTAMP_FLAG_CNT, + ts_flags_names, compact); + if (ret < 0) + return ret; + len += ret; /* _TSCONFIG_HWTSTAMP_FLAGS */ + } if (data->hwtst_config.tx_type) { ret = ethnl_bitset32_size(&data->hwtst_config.tx_type, @@ -130,8 +136,10 @@ static int tsconfig_fill_reply(struct sk_buff *skb, int ret; if (data->hwtst_config.flags) { - ret = nla_put_u32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, - data->hwtst_config.flags); + ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, + &data->hwtst_config.flags, NULL, + __HWTSTAMP_FLAG_CNT, + ts_flags_names, compact); if (ret < 0) return ret; } @@ -180,7 +188,7 @@ const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1] = [ETHTOOL_A_TSCONFIG_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER] = NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), - [ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_U32 }, + [ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_NESTED }, [ETHTOOL_A_TSCONFIG_RX_FILTERS] = { .type = NLA_NESTED }, [ETHTOOL_A_TSCONFIG_TX_TYPES] = { .type = NLA_NESTED }, }; @@ -296,6 +304,7 @@ static int ethnl_set_tsconfig(struct ethnl_req_info *req_base, BUILD_BUG_ON(__HWTSTAMP_TX_CNT >= 32); BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT >= 32); + BUILD_BUG_ON(__HWTSTAMP_FLAG_CNT > 32); if (!netif_device_present(dev)) return -ENODEV; @@ -377,9 +386,13 @@ static int ethnl_set_tsconfig(struct ethnl_req_info *req_base, } if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS]) { - ethnl_update_u32(&hwtst_config.flags, - tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS], - &config_mod); + ret = ethnl_update_bitset32(&hwtst_config.flags, + __HWTSTAMP_FLAG_CNT, + tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS], + ts_flags_names, info->extack, + &config_mod); + if (ret < 0) + goto err_free_hwprov; } ret = net_hwtstamp_validate(&hwtst_config); -- GitLab From bca0902e61731a75fc4860c8720168d9f1bae3b6 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Mon, 3 Feb 2025 12:12:03 +0300 Subject: [PATCH 0353/1585] ax25: Fix refcount leak caused by setting SO_BINDTODEVICE sockopt If an AX25 device is bound to a socket by setting the SO_BINDTODEVICE socket option, a refcount leak will occur in ax25_release(). Commit 9fd75b66b8f6 ("ax25: Fix refcount leaks caused by ax25_cb_del()") added decrement of device refcounts in ax25_release(). In order for that to work correctly the refcounts must already be incremented when the device is bound to the socket. An AX25 device can be bound to a socket by either calling ax25_bind() or setting SO_BINDTODEVICE socket option. In both cases the refcounts should be incremented, but in fact it is done only in ax25_bind(). This bug leads to the following issue reported by Syzkaller: ================================================================ refcount_t: decrement hit 0; leaking memory. WARNING: CPU: 1 PID: 5932 at lib/refcount.c:31 refcount_warn_saturate+0x1ed/0x210 lib/refcount.c:31 Modules linked in: CPU: 1 UID: 0 PID: 5932 Comm: syz-executor424 Not tainted 6.13.0-rc4-syzkaller-00110-g4099a71718b0 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:refcount_warn_saturate+0x1ed/0x210 lib/refcount.c:31 Call Trace: __refcount_dec include/linux/refcount.h:336 [inline] refcount_dec include/linux/refcount.h:351 [inline] ref_tracker_free+0x710/0x820 lib/ref_tracker.c:236 netdev_tracker_free include/linux/netdevice.h:4156 [inline] netdev_put include/linux/netdevice.h:4173 [inline] netdev_put include/linux/netdevice.h:4169 [inline] ax25_release+0x33f/0xa10 net/ax25/af_ax25.c:1069 __sock_release+0xb0/0x270 net/socket.c:640 sock_close+0x1c/0x30 net/socket.c:1408 ... do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... ================================================================ Fix the implementation of ax25_setsockopt() by adding increment of refcounts for the new device bound, and decrement of refcounts for the old unbound device. Fixes: 9fd75b66b8f6 ("ax25: Fix refcount leaks caused by ax25_cb_del()") Reported-by: syzbot+33841dc6aa3e1d86b78a@syzkaller.appspotmail.com Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20250203091203.1744-1-m.masimov@mt-integration.ru Signed-off-by: Jakub Kicinski --- net/ax25/af_ax25.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index aa6c714892ec9..9f3b8b682adb2 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -685,6 +685,15 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } + if (ax25->ax25_dev) { + if (dev == ax25->ax25_dev->dev) { + rcu_read_unlock(); + break; + } + netdev_put(ax25->ax25_dev->dev, &ax25->dev_tracker); + ax25_dev_put(ax25->ax25_dev); + } + ax25->ax25_dev = ax25_dev_ax25dev(dev); if (!ax25->ax25_dev) { rcu_read_unlock(); @@ -692,6 +701,8 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } ax25_fillin_cb(ax25, ax25->ax25_dev); + netdev_hold(dev, &ax25->dev_tracker, GFP_ATOMIC); + ax25_dev_hold(ax25->ax25_dev); rcu_read_unlock(); break; -- GitLab From 1438f5d07b9a7afb15e1d0e26df04a6fd4e56a3c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 5 Feb 2025 23:10:37 +0100 Subject: [PATCH 0354/1585] rtnetlink: fix netns leak with rtnl_setlink() A call to rtnl_nets_destroy() is needed to release references taken on netns put in rtnl_nets. CC: stable@vger.kernel.org Fixes: 636af13f213b ("rtnetlink: Register rtnl_dellink() and rtnl_setlink() with RTNL_FLAG_DOIT_PERNET_WIP.") Signed-off-by: Nicolas Dichtel Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205221037.2474426-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1f4d4b5570ab8..d1e559fce918d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3432,6 +3432,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, err = -ENODEV; rtnl_nets_unlock(&rtnl_nets); + rtnl_nets_destroy(&rtnl_nets); errout: return err; } -- GitLab From cb7380de9e4cbc9a24216b722ec50e092ae83036 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 5 Feb 2025 12:32:49 -0800 Subject: [PATCH 0355/1585] compiler.h: Move C string helpers into C-only kernel section The C kernel helpers for evaluating C Strings were positioned where they were visible to assembly inclusion, which was not intended. Move them into the kernel and C-only area of the header so future changes won't confuse the assembler. Fixes: d7a516c6eeae ("compiler.h: Fix undefined BUILD_BUG_ON_ZERO()") Fixes: 559048d156ff ("string: Check for "nonstring" attribute on strscpy() arguments") Reviewed-by: Miguel Ojeda Signed-off-by: Kees Cook --- include/linux/compiler.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 240c632c5b957..7af999a131cb2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -214,6 +214,19 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __v; \ }) +#ifdef __CHECKER__ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) +#else /* __CHECKER__ */ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif /* __CHECKER__ */ + +/* &a[0] degrades to a pointer: a different type from an array */ +#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array") + +/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ +#define __must_be_cstr(p) \ + __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") + #endif /* __KERNEL__ */ /** @@ -254,19 +267,6 @@ static inline void *offset_to_ptr(const int *off) #define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym)) -#ifdef __CHECKER__ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) -#else /* __CHECKER__ */ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) -#endif /* __CHECKER__ */ - -/* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array") - -/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ -#define __must_be_cstr(p) \ - __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") - /* * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. -- GitLab From 20e5cc26e56db09cc612721f90b4994cce5e5b7b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 5 Feb 2025 12:48:07 -0800 Subject: [PATCH 0356/1585] compiler.h: Introduce __must_be_byte_array() In preparation for adding stricter type checking to the str/mem*() helpers, provide a way to check that a variable is a byte array via __must_be_byte_array(). Suggested-by: Kent Overstreet Signed-off-by: Kees Cook --- include/linux/compiler.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7af999a131cb2..1c0688319435d 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -221,7 +221,13 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif /* __CHECKER__ */ /* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array") +#define __is_array(a) (!__same_type((a), &(a)[0])) +#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_array(a), \ + "must be array") + +#define __is_byte_array(a) (__is_array(a) && sizeof((a)[0]) == 1) +#define __must_be_byte_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_byte_array(a), \ + "must be byte array") /* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ #define __must_be_cstr(p) \ -- GitLab From 6270f4deba3fbd77d1717fb8634f1fc612ff69e2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 5 Feb 2025 13:45:26 -0800 Subject: [PATCH 0357/1585] string.h: Use ARRAY_SIZE() for memtostr*()/strtomem*() The destination argument of memtostr*() and strtomem*() must be a fixed-size char array at compile time, so there is no need to use __builtin_object_size() (which is useful for when an argument is either a pointer or unknown). Instead use ARRAY_SIZE(), which has the benefit of working around a bug in Clang (fixed[1] in 15+) that got __builtin_object_size() wrong sometimes. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501310832.kiAeOt2z-lkp@intel.com/ Suggested-by: Kent Overstreet Link: https://github.com/llvm/llvm-project/commit/d8e0a6d5e9dd2311641f9a8a5d2bf90829951ddc [1] Tested-by: Suren Baghdasaryan Signed-off-by: Kees Cook --- include/linux/string.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/string.h b/include/linux/string.h index 493ac4862c777..fc5ae145bd78f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -411,7 +411,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem_pad(dest, src, pad) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ @@ -434,7 +435,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ @@ -453,7 +455,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ @@ -478,7 +481,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr_pad(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ -- GitLab From fdfd0ad82890f678398ee670c4e59747738540e7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 1 Feb 2025 12:56:51 -0500 Subject: [PATCH 0358/1585] bcachefs docs: SubmittingPatches.rst Add an (initial?) patch submission checklist, focusing mainly on testing. Yes, all patches must be tested, and that starts (but does not end) with the patch author. Signed-off-by: Kent Overstreet --- .../bcachefs/SubmittingPatches.rst | 98 +++++++++++++++++++ Documentation/filesystems/bcachefs/index.rst | 1 + MAINTAINERS | 1 + 3 files changed, 100 insertions(+) create mode 100644 Documentation/filesystems/bcachefs/SubmittingPatches.rst diff --git a/Documentation/filesystems/bcachefs/SubmittingPatches.rst b/Documentation/filesystems/bcachefs/SubmittingPatches.rst new file mode 100644 index 0000000000000..026b12ae0d6a2 --- /dev/null +++ b/Documentation/filesystems/bcachefs/SubmittingPatches.rst @@ -0,0 +1,98 @@ +Submitting patches to bcachefs: +=============================== + +Patches must be tested before being submitted, either with the xfstests suite +[0], or the full bcachefs test suite in ktest [1], depending on what's being +touched. Note that ktest wraps xfstests and will be an easier method to running +it for most users; it includes single-command wrappers for all the mainstream +in-kernel local filesystems. + +Patches will undergo more testing after being merged (including +lockdep/kasan/preempt/etc. variants), these are not generally required to be +run by the submitter - but do put some thought into what you're changing and +which tests might be relevant, e.g. are you dealing with tricky memory layout +work? kasan, are you doing locking work? then lockdep; and ktest includes +single-command variants for the debug build types you'll most likely need. + +The exception to this rule is incomplete WIP/RFC patches: if you're working on +something nontrivial, it's encouraged to send out a WIP patch to let people +know what you're doing and make sure you're on the right track. Just make sure +it includes a brief note as to what's done and what's incomplete, to avoid +confusion. + +Rigorous checkpatch.pl adherence is not required (many of its warnings are +considered out of date), but try not to deviate too much without reason. + +Focus on writing code that reads well and is organized well; code should be +aesthetically pleasing. + +CI: +=== + +Instead of running your tests locally, when running the full test suite it's +prefereable to let a server farm do it in parallel, and then have the results +in a nice test dashboard (which can tell you which failures are new, and +presents results in a git log view, avoiding the need for most bisecting). + +That exists [2], and community members may request an account. If you work for +a big tech company, you'll need to help out with server costs to get access - +but the CI is not restricted to running bcachefs tests: it runs any ktest test +(which generally makes it easy to wrap other tests that can run in qemu). + +Other things to think about: +============================ + +- How will we debug this code? Is there sufficient introspection to diagnose + when something starts acting wonky on a user machine? + + We don't necessarily need every single field of every data structure visible + with introspection, but having the important fields of all the core data + types wired up makes debugging drastically easier - a bit of thoughtful + foresight greatly reduces the need to have people build custom kernels with + debug patches. + + More broadly, think about all the debug tooling that might be needed. + +- Does it make the codebase more or less of a mess? Can we also try to do some + organizing, too? + +- Do new tests need to be written? New assertions? How do we know and verify + that the code is correct, and what happens if something goes wrong? + + We don't yet have automated code coverage analysis or easy fault injection - + but for now, pretend we did and ask what they might tell us. + + Assertions are hugely important, given that we don't yet have a systems + language that can do ergonomic embedded correctness proofs. Hitting an assert + in testing is much better than wandering off into undefined behaviour la-la + land - use them. Use them judiciously, and not as a replacement for proper + error handling, but use them. + +- Does it need to be performance tested? Should we add new peformance counters? + + bcachefs has a set of persistent runtime counters which can be viewed with + the 'bcachefs fs top' command; this should give users a basic idea of what + their filesystem is currently doing. If you're doing a new feature or looking + at old code, think if anything should be added. + +- If it's a new on disk format feature - have upgrades and downgrades been + tested? (Automated tests exists but aren't in the CI, due to the hassle of + disk image management; coordinate to have them run.) + +Mailing list, IRC: +================== + +Patches should hit the list [3], but much discussion and code review happens on +IRC as well [4]; many people appreciate the more conversational approach and +quicker feedback. + +Additionally, we have a lively user community doing excellent QA work, which +exists primarily on IRC. Please make use of that resource; user feedback is +important for any nontrivial feature, and documenting it in commit messages +would be a good idea. + +[0]: git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git +[1]: https://evilpiepirate.org/git/ktest.git/ +[2]: https://evilpiepirate.org/~testdashboard/ci/ +[3]: linux-bcachefs@vger.kernel.org +[4]: irc.oftc.net#bcache, #bcachefs-dev diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst index 95fc4b90739ed..7db4d7ceab582 100644 --- a/Documentation/filesystems/bcachefs/index.rst +++ b/Documentation/filesystems/bcachefs/index.rst @@ -9,4 +9,5 @@ bcachefs Documentation :numbered: CodingStyle + SubmittingPatches errorcodes diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa0654..c40d3d0c68c70 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3955,6 +3955,7 @@ M: Kent Overstreet L: linux-bcachefs@vger.kernel.org S: Supported C: irc://irc.oftc.net/bcache +P: Documentation/filesystems/bcachefs/SubmittingPatches.rst T: git https://evilpiepirate.org/git/bcachefs.git F: fs/bcachefs/ F: Documentation/filesystems/bcachefs/ -- GitLab From 6b37037d6d1b42083642340efcf80f7a30203039 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Sat, 1 Feb 2025 01:20:31 +0900 Subject: [PATCH 0359/1585] bcachefs: fix incorrect pointer check in __bch2_subvolume_delete() For some unknown reason, checks on struct bkey_s_c_snapshot and struct bkey_s_c_snapshot_tree pointers are missing. Therefore, I think it would be appropriate to fix the incorrect pointer checking through this patch. Fixes: 4bd06f07bcb5 ("bcachefs: Fixes for snapshot_tree.master_subvol") Signed-off-by: Jeongjun Park Signed-off-by: Kent Overstreet --- fs/bcachefs/subvolume.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index e3d0475232e53..b7b96283c3161 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -428,7 +428,7 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, POS(0, snapid), 0, snapshot); - ret = bkey_err(subvol); + ret = bkey_err(snapshot); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, "missing snapshot %u", snapid); if (ret) @@ -440,6 +440,11 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) bch2_bkey_get_iter_typed(trans, &snapshot_tree_iter, BTREE_ID_snapshot_trees, POS(0, treeid), 0, snapshot_tree); + ret = bkey_err(snapshot_tree); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + "missing snapshot tree %u", treeid); + if (ret) + goto err; if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) { struct bkey_i_snapshot_tree *snapshot_tree_mut = -- GitLab From 2ef995df0ce592f665d312008dbe1ad1c4bcf87f Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Sun, 2 Feb 2025 15:13:51 +0900 Subject: [PATCH 0360/1585] bcachefs: fix deadlock in journal_entry_open() In the previous commit b3d82c2f2761, code was added to prevent journal sequence overflow. Among them, the code added to journal_entry_open() uses the bch2_fs_fatal_err_on() function to handle errors. However, __journal_res_get() , which calls journal_entry_open() , calls journal_entry_open() while holding journal->lock , but bch2_fs_fatal_err_on() internally tries to acquire journal->lock , which results in a deadlock. So we need to add a locked helper to handle fatal errors even when the journal->lock is held. Fixes: b3d82c2f2761 ("bcachefs: Guard against journal seq overflow") Signed-off-by: Jeongjun Park Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 17 +++++++++++++++-- fs/bcachefs/journal.h | 1 + fs/bcachefs/super.c | 11 +++++++++++ fs/bcachefs/super.h | 1 + 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index cb2c3722f6741..0a943a27ef449 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -319,6 +319,16 @@ void bch2_journal_halt(struct journal *j) spin_unlock(&j->lock); } +void bch2_journal_halt_locked(struct journal *j) +{ + lockdep_assert_held(&j->lock); + + __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true); + if (!j->err_seq) + j->err_seq = journal_cur_seq(j); + journal_wake(j); +} + static bool journal_entry_want_write(struct journal *j) { bool ret = !journal_entry_is_open(j) || @@ -381,9 +391,12 @@ static int journal_entry_open(struct journal *j) if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf)) return JOURNAL_ERR_max_in_flight; - if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX, - c, "cannot start: journal seq overflow")) + if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) { + bch_err(c, "cannot start: journal seq overflow"); + if (bch2_fs_emergency_read_only_locked(c)) + bch_err(c, "fatal error - emergency read only"); return JOURNAL_ERR_insufficient_devices; /* -EROFS */ + } BUG_ON(!j->cur_entry_sectors); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index dccddd5420adf..107f7f901cd96 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -409,6 +409,7 @@ bool bch2_journal_noflush_seq(struct journal *, u64, u64); int bch2_journal_meta(struct journal *); void bch2_journal_halt(struct journal *); +void bch2_journal_halt_locked(struct journal *); static inline int bch2_journal_error(struct journal *j) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index d97ea7bd1171b..6d97d412fed98 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -411,6 +411,17 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c) return ret; } +bool bch2_fs_emergency_read_only_locked(struct bch_fs *c) +{ + bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags); + + bch2_journal_halt_locked(&c->journal); + bch2_fs_read_only_async(c); + + wake_up(&bch2_read_only_wait); + return ret; +} + static int bch2_fs_read_write_late(struct bch_fs *c) { int ret; diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index fa6d522165108..04f8287eff5c3 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -29,6 +29,7 @@ int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64); struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); +bool bch2_fs_emergency_read_only_locked(struct bch_fs *); void bch2_fs_read_only(struct bch_fs *); int bch2_fs_read_write(struct bch_fs *); -- GitLab From 9e9033522ad1e4bb697c9493aa449630fa2c98d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 27 Jan 2025 01:21:44 -0500 Subject: [PATCH 0361/1585] bcachefs: Fix discard path journal flushing The discard path is supposed to issue journal flushes when there's too many buckets empty buckets that need a journal commit before they can be written to again, but at some point this code seems to have been lost. Bring it back with a new optimization to make sure we don't issue too many journal flushes: the journal now tracks the sequence number of the most recent flush in progress, which the discard path uses when deciding which buckets need a journal flush. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 47 ++++++++++++----------- fs/bcachefs/alloc_foreground.c | 10 +++-- fs/bcachefs/alloc_types.h | 1 + fs/bcachefs/buckets_waiting_for_journal.c | 12 +++--- fs/bcachefs/buckets_waiting_for_journal.h | 4 +- fs/bcachefs/journal.c | 1 + fs/bcachefs/journal_types.h | 1 + fs/bcachefs/trace.h | 14 ++++++- 8 files changed, 55 insertions(+), 35 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index fc2ef33b67b38..3ea809990ef1a 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1803,7 +1803,6 @@ struct discard_buckets_state { u64 open; u64 need_journal_commit; u64 discarded; - u64 need_journal_commit_this_dev; }; static int bch2_discard_one_bucket(struct btree_trans *trans, @@ -1827,11 +1826,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, goto out; } - if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, - pos.inode, pos.offset)) { - s->need_journal_commit++; - s->need_journal_commit_this_dev++; + u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal, + pos.inode, pos.offset); + if (seq_ready > c->journal.flushed_seq_ondisk) { + if (seq_ready > c->journal.flushing_seq) + s->need_journal_commit++; goto out; } @@ -1865,23 +1864,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, discard_locked = true; } - if (!bkey_eq(*discard_pos_done, iter.pos) && - ca->mi.discard && !c->opts.nochanges) { - /* - * This works without any other locks because this is the only - * thread that removes items from the need_discard tree - */ - bch2_trans_unlock_long(trans); - blkdev_issue_discard(ca->disk_sb.bdev, - k.k->p.offset * ca->mi.bucket_size, - ca->mi.bucket_size, - GFP_KERNEL); - *discard_pos_done = iter.pos; + if (!bkey_eq(*discard_pos_done, iter.pos)) { s->discarded++; + *discard_pos_done = iter.pos; - ret = bch2_trans_relock_notrace(trans); - if (ret) - goto out; + if (ca->mi.discard && !c->opts.nochanges) { + /* + * This works without any other locks because this is the only + * thread that removes items from the need_discard tree + */ + bch2_trans_unlock_long(trans); + blkdev_issue_discard(ca->disk_sb.bdev, + k.k->p.offset * ca->mi.bucket_size, + ca->mi.bucket_size, + GFP_KERNEL); + ret = bch2_trans_relock_notrace(trans); + if (ret) + goto out; + } } SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); @@ -1929,6 +1929,9 @@ static void bch2_do_discards_work(struct work_struct *work) POS(ca->dev_idx, U64_MAX), 0, k, bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false))); + if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal)) + bch2_journal_flush_async(&c->journal, NULL); + trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); @@ -2024,7 +2027,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work) break; } - trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); + trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); bch2_trans_put(trans); percpu_ref_put(&ca->io_ref); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 6df41c331a52e..5a781fb4c794b 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -205,8 +205,12 @@ static inline bool may_alloc_bucket(struct bch_fs *c, return false; } - if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) { + u64 journal_seq_ready = + bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal, + bucket.inode, bucket.offset); + if (journal_seq_ready > c->journal.flushed_seq_ondisk) { + if (journal_seq_ready > c->journal.flushing_seq) + s->need_journal_commit++; s->skipped_need_journal_commit++; return false; } @@ -570,7 +574,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl) : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl); - if (s.skipped_need_journal_commit * 2 > avail) + if (s.need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) { diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index 9bbb28e90b934..4aa8ee026cb84 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -18,6 +18,7 @@ struct bucket_alloc_state { u64 buckets_seen; u64 skipped_open; u64 skipped_need_journal_commit; + u64 need_journal_commit; u64 skipped_nocow; u64 skipped_nouse; u64 skipped_mi_btree_bitmap; diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index f9fb150eda706..c8a488e6b7b86 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -22,23 +22,21 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_ memset(t->d, 0, sizeof(t->d[0]) << t->bits); } -bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, - u64 flushed_seq, - unsigned dev, u64 bucket) +u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b, + unsigned dev, u64 bucket) { struct buckets_waiting_for_journal_table *t; u64 dev_bucket = (u64) dev << 56 | bucket; - bool ret = false; - unsigned i; + u64 ret = 0; mutex_lock(&b->lock); t = b->t; - for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { + for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { struct bucket_hashed *h = bucket_hash(t, i, dev_bucket); if (h->dev_bucket == dev_bucket) { - ret = h->journal_seq > flushed_seq; + ret = h->journal_seq; break; } } diff --git a/fs/bcachefs/buckets_waiting_for_journal.h b/fs/bcachefs/buckets_waiting_for_journal.h index d2ae19cbe18c4..365619ca44c87 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.h +++ b/fs/bcachefs/buckets_waiting_for_journal.h @@ -4,8 +4,8 @@ #include "buckets_waiting_for_journal_types.h" -bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *, - u64, unsigned, u64); +u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *, + unsigned, u64); int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *, u64, unsigned, u64, u64); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 0a943a27ef449..24c294d4634e0 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -796,6 +796,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, } buf->must_flush = true; + j->flushing_seq = max(j->flushing_seq, seq); if (parent && !closure_wait(&buf->wait, parent)) BUG(); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 3ba433a48eb8a..a198a81d74784 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -237,6 +237,7 @@ struct journal { /* seq, last_seq from the most recent journal entry successfully written */ u64 seq_ondisk; u64 flushed_seq_ondisk; + u64 flushing_seq; u64 last_seq_ondisk; u64 err_seq; u64 last_empty_seq; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 56a5a7fbc0fd1..c1b51009edf6b 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -727,7 +727,7 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail, TP_ARGS(c, str) ); -TRACE_EVENT(discard_buckets, +DECLARE_EVENT_CLASS(discard_buckets_class, TP_PROTO(struct bch_fs *c, u64 seen, u64 open, u64 need_journal_commit, u64 discarded, const char *err), TP_ARGS(c, seen, open, need_journal_commit, discarded, err), @@ -759,6 +759,18 @@ TRACE_EVENT(discard_buckets, __entry->err) ); +DEFINE_EVENT(discard_buckets_class, discard_buckets, + TP_PROTO(struct bch_fs *c, u64 seen, u64 open, + u64 need_journal_commit, u64 discarded, const char *err), + TP_ARGS(c, seen, open, need_journal_commit, discarded, err) +); + +DEFINE_EVENT(discard_buckets_class, discard_buckets_fast, + TP_PROTO(struct bch_fs *c, u64 seen, u64 open, + u64 need_journal_commit, u64 discarded, const char *err), + TP_ARGS(c, seen, open, need_journal_commit, discarded, err) +); + TRACE_EVENT(bucket_invalidate, TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors), TP_ARGS(c, dev, bucket, sectors), -- GitLab From 3539880ef1a5f8c970d0f69a6fdcfeffc000e63d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 3 Feb 2025 11:35:11 -0500 Subject: [PATCH 0362/1585] bcachefs: Fix rcu imbalance in bch2_fs_btree_key_cache_exit() Spotted by sparse. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index c378b97ebeca7..1821f40c161a1 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -748,7 +748,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) rcu_read_unlock(); mutex_lock(&bc->table.mutex); mutex_unlock(&bc->table.mutex); - rcu_read_lock(); continue; } for (i = 0; i < tbl->size; i++) -- GitLab From 4be214c26936813b636eed2fac906f585ddbf0f9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 25 Jan 2025 21:29:45 -0500 Subject: [PATCH 0363/1585] bcachefs: bch2_bkey_sectors_need_rebalance() now only depends on bch_extent_rebalance Previously, bch2_bkey_sectors_need_rebalance() called bch2_target_accepts_data(), checking whether the target is writable. However, this means that adding or removing devices from a target would change the value of bch2_bkey_sectors_need_rebalance() for an existing extent; this needs to be invariant so that the extent trigger can correctly maintain rebalance_work accounting. Instead, check target_accepts_data() in io_opts_to_rebalance_opts(), before creating the bch_extent_rebalance entry. This fixes (one?) cause of rebalance_work accounting being off. Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.h | 4 +++- fs/bcachefs/opts.h | 14 -------------- fs/bcachefs/rebalance.c | 8 +++----- fs/bcachefs/rebalance.h | 20 ++++++++++++++++++++ 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index d2e134528f0e6..428b9be6af34b 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -285,12 +285,14 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); +#include "rebalance.h" + static inline struct bch_extent_rebalance bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode) { struct bch_io_opts io_opts; bch2_inode_opts_get(&io_opts, c, inode); - return io_opts_to_rebalance_opts(&io_opts); + return io_opts_to_rebalance_opts(c, &io_opts); } int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index a182b5d454ba6..9d397fc2a1f05 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -659,18 +659,4 @@ static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); bool bch2_opt_is_inode_opt(enum bch_opt_id); -/* rebalance opts: */ - -static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_io_opts *opts) -{ - return (struct bch_extent_rebalance) { - .type = BIT(BCH_EXTENT_ENTRY_rebalance), -#define x(_name) \ - ._name = opts->_name, \ - ._name##_from_inode = opts->_name##_from_inode, - BCH_REBALANCE_OPTS() -#undef x - }; -}; - #endif /* _BCACHEFS_OPTS_H */ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 4adc74cd3f70b..d0a1f5cd5c2b3 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -121,12 +121,10 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) } } incompressible: - if (opts->background_target && - bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { + if (opts->background_target) bkey_for_each_ptr_decode(k.k, ptrs, p, entry) if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) sectors += p.crc.compressed_size; - } return sectors; } @@ -140,7 +138,7 @@ static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opt const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { - struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts); + struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, opts); return old == NULL || memcmp(old, &new, sizeof(new)); } else { return old != NULL; @@ -163,7 +161,7 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, k.k->u64s += sizeof(*old) / sizeof(u64); } - *old = io_opts_to_rebalance_opts(opts); + *old = io_opts_to_rebalance_opts(c, opts); } else { if (old) extent_entry_drop(k, (union bch_extent_entry *) old); diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 0a0821ab895d8..62a3859d3823f 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -4,8 +4,28 @@ #include "compress.h" #include "disk_groups.h" +#include "opts.h" #include "rebalance_types.h" +static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_fs *c, + struct bch_io_opts *opts) +{ + struct bch_extent_rebalance r = { + .type = BIT(BCH_EXTENT_ENTRY_rebalance), +#define x(_name) \ + ._name = opts->_name, \ + ._name##_from_inode = opts->_name##_from_inode, + BCH_REBALANCE_OPTS() +#undef x + }; + + if (r.background_target && + !bch2_target_accepts_data(c, BCH_DATA_user, r.background_target)) + r.background_target = 0; + + return r; +}; + u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); int bch2_get_update_rebalance_opts(struct btree_trans *, -- GitLab From 29a61a1f40637ae010b828745fb41f60301c3a3d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Feb 2025 15:22:56 +0100 Subject: [PATCH 0364/1585] genirq: Remove leading space from irq_chip::irq_print_chip() callbacks The space separator was factored out from the multiple chip name prints, but several irq_chip::irq_print_chip() callbacks still print a leading space. Remove the superfluous double spaces. Fixes: 9d9f204bdf7243bf ("genirq/proc: Add missing space separator back") Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/893f7e9646d8933cd6786d5a1ef3eb076d263768.1738764803.git.geert+renesas@glider.be --- arch/powerpc/sysdev/fsl_msi.c | 2 +- drivers/bus/moxtet.c | 2 +- drivers/irqchip/irq-partition-percpu.c | 2 +- drivers/soc/qcom/smp2p.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 1aa0cb097c9c9..7b9a5ea9cad9d 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -75,7 +75,7 @@ static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p) srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK; cascade_virq = msi_data->cascade_array[srs]->virq; - seq_printf(p, " fsl-msi-%d", cascade_virq); + seq_printf(p, "fsl-msi-%d", cascade_virq); } diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 6276551d79680..1e57ebfb76229 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -657,7 +657,7 @@ static void moxtet_irq_print_chip(struct irq_data *d, struct seq_file *p) id = moxtet->modules[pos->idx]; - seq_printf(p, " moxtet-%s.%i#%i", mox_module_name(id), pos->idx, + seq_printf(p, "moxtet-%s.%i#%i", mox_module_name(id), pos->idx, pos->bit); } diff --git a/drivers/irqchip/irq-partition-percpu.c b/drivers/irqchip/irq-partition-percpu.c index 8e76d2913e6be..4441ffe149ea0 100644 --- a/drivers/irqchip/irq-partition-percpu.c +++ b/drivers/irqchip/irq-partition-percpu.c @@ -98,7 +98,7 @@ static void partition_irq_print_chip(struct irq_data *d, struct seq_file *p) struct irq_chip *chip = irq_desc_get_chip(part->chained_desc); struct irq_data *data = irq_desc_get_irq_data(part->chained_desc); - seq_printf(p, " %5s-%lu", chip->name, data->hwirq); + seq_printf(p, "%5s-%lu", chip->name, data->hwirq); } static struct irq_chip partition_irq_chip = { diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c index 4783ab1adb8d9..a3e88ced328a9 100644 --- a/drivers/soc/qcom/smp2p.c +++ b/drivers/soc/qcom/smp2p.c @@ -365,7 +365,7 @@ static void smp2p_irq_print_chip(struct irq_data *irqd, struct seq_file *p) { struct smp2p_entry *entry = irq_data_get_irq_chip_data(irqd); - seq_printf(p, " %8s", dev_name(entry->smp2p->dev)); + seq_printf(p, "%8s", dev_name(entry->smp2p->dev)); } static struct irq_chip smp2p_irq_chip = { -- GitLab From 868c9037df626b3c245ee26a290a03ae1f9f58d3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Feb 2025 17:02:20 +0100 Subject: [PATCH 0365/1585] timers/migration: Fix off-by-one root mis-connection Before attaching a new root to the old root, the children counter of the new root is checked to verify that only the upcoming CPU's top group have been connected to it. However since the recently added commit b729cc1ec21a ("timers/migration: Fix another race between hotplug and idle entry/exit") this check is not valid anymore because the old root is pre-accounted as a child to the new root. Therefore after connecting the upcoming CPU's top group to the new root, the children count to be expected must be 2 and not 1 anymore. This omission results in the old root to not be connected to the new root. Then eventually the system may run with more than one top level, which defeats the purpose of a single idle migrator. Also the old root is pre-accounted but not connected upon the new root creation. But it can be connected to the new root later on. Therefore the old root may be accounted twice to the new root. The propagation of such overcommit can end up creating a double final top-level root with a groupmask incorrectly initialized. Although harmless given that the final top level roots will never have a parent to walk up to, this oddity opportunistically reported the core issue: WARNING: CPU: 8 PID: 0 at kernel/time/timer_migration.c:543 tmigr_requires_handle_remote CPU: 8 UID: 0 PID: 0 Comm: swapper/8 RIP: 0010:tmigr_requires_handle_remote Call Trace: ? tmigr_requires_handle_remote ? hrtimer_run_queues update_process_times tick_periodic tick_handle_periodic __sysvec_apic_timer_interrupt sysvec_apic_timer_interrupt Fix the problem by taking the old root into account in the children count of the new root so the connection is not omitted. Also warn when more than one top level group exists to better detect similar issues in the future. Fixes: b729cc1ec21a ("timers/migration: Fix another race between hotplug and idle entry/exit") Reported-by: Matt Fleming Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250205160220.39467-1-frederic@kernel.org --- kernel/time/timer_migration.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 9cb9b6584ea18..2f6330831f084 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -1675,6 +1675,9 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) } while (i < tmigr_hierarchy_levels); + /* Assert single root */ + WARN_ON_ONCE(!err && !group->parent && !list_is_singular(&tmigr_level_list[top])); + while (i > 0) { group = stack[--i]; @@ -1716,7 +1719,12 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) WARN_ON_ONCE(top == 0); lvllist = &tmigr_level_list[top]; - if (group->num_children == 1 && list_is_singular(lvllist)) { + + /* + * Newly created root level should have accounted the upcoming + * CPU's child group and pre-accounted the old root. + */ + if (group->num_children == 2 && list_is_singular(lvllist)) { /* * The target CPU must never do the prepare work, except * on early boot when the boot CPU is the target. Otherwise -- GitLab From 0fac3ed473dd2955053be6671cdd747807f5e488 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Sun, 19 Jan 2025 10:59:47 +0800 Subject: [PATCH 0366/1585] fs/stat.c: avoid harmless garbage value problem in vfs_statx_path() Clang static checker(scan-build) warning: fs/stat.c:287:21: warning: The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage. 287 | stat->result_mask |= STATX_MNT_ID_UNIQUE; | ~~~~~~~~~~~~~~~~~ ^ fs/stat.c:290:21: warning: The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage. 290 | stat->result_mask |= STATX_MNT_ID; When vfs_getattr() failed because of security_inode_getattr(), 'stat' is uninitialized. In this case, there is a harmless garbage problem in vfs_statx_path(). It's better to return error directly when vfs_getattr() failed, avoiding garbage value and more clearly. Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20250119025946.1168957-1-suhui@nfschina.com Signed-off-by: Christian Brauner --- fs/stat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/stat.c b/fs/stat.c index 2c0e111a098a1..f13308bfdc983 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -281,6 +281,8 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, u32 request_mask) { int error = vfs_getattr(path, stat, request_mask, flags); + if (error) + return error; if (request_mask & STATX_MNT_ID_UNIQUE) { stat->mnt_id = real_mount(path->mnt)->mnt_id_unique; @@ -302,7 +304,7 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, if (S_ISBLK(stat->mode)) bdev_statx(path, stat, request_mask); - return error; + return 0; } static int vfs_statx_fd(int fd, int flags, struct kstat *stat, -- GitLab From 4e7487245abcbc5a1a1aea54e4d3b33c53804bda Mon Sep 17 00:00:00 2001 From: Brahmajit Das Date: Tue, 21 Jan 2025 21:56:48 +0530 Subject: [PATCH 0367/1585] vboxsf: fix building with GCC 15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building with GCC 15 results in build error fs/vboxsf/super.c:24:54: error: initializer-string for array of ‘unsigned char’ is too long [-Werror=unterminated-string-initialization] 24 | static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375"; | ^~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Due to GCC having enabled -Werror=unterminated-string-initialization[0] by default. Separately initializing each array element of VBSF_MOUNT_SIGNATURE to ensure NUL termination, thus satisfying GCC 15 and fixing the build error. [0]: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wno-unterminated-string-initialization Signed-off-by: Brahmajit Das Link: https://lore.kernel.org/r/20250121162648.1408743-1-brahmajit.xyz@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Christian Brauner --- fs/vboxsf/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index e95b8a48d8a02..1d94bb7841081 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -21,7 +21,8 @@ #define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ -static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375"; +static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376', + '\375' }; static int follow_symlinks; module_param(follow_symlinks, int, 0444); -- GitLab From e52e97f09fb66fd868260d05bd6b74a9a3db39ee Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 30 Jan 2025 13:15:00 +0100 Subject: [PATCH 0368/1585] statmount: let unset strings be empty Just like it's normal for unset values to be zero, unset strings should be empty instead of containing random values. It seems to be a typical mistake that the mask returned by statmount is not checked, which can result in various bugs. With this fix, these bugs are prevented, since it is highly likely that userspace would just want to turn the missing mask case into an empty string anyway (most of the recently found cases are of this type). Link: https://lore.kernel.org/all/CAJfpegsVCPfCn2DpM8iiYSS5DpMsLB8QBUCHecoj6s0Vxf4jzg@mail.gmail.com/ Fixes: 68385d77c05b ("statmount: simplify string option retrieval") Fixes: 46eae99ef733 ("add statmount(2) syscall") Cc: stable@vger.kernel.org # v6.8 Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20250130121500.113446-1-mszeredi@redhat.com Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/namespace.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index a3ed3f2980cba..9c4d307a82cdf 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5191,39 +5191,45 @@ static int statmount_string(struct kstatmount *s, u64 flag) size_t kbufsize; struct seq_file *seq = &s->seq; struct statmount *sm = &s->sm; - u32 start = seq->count; + u32 start, *offp; + + /* Reserve an empty string at the beginning for any unset offsets */ + if (!seq->count) + seq_putc(seq, 0); + + start = seq->count; switch (flag) { case STATMOUNT_FS_TYPE: - sm->fs_type = start; + offp = &sm->fs_type; ret = statmount_fs_type(s, seq); break; case STATMOUNT_MNT_ROOT: - sm->mnt_root = start; + offp = &sm->mnt_root; ret = statmount_mnt_root(s, seq); break; case STATMOUNT_MNT_POINT: - sm->mnt_point = start; + offp = &sm->mnt_point; ret = statmount_mnt_point(s, seq); break; case STATMOUNT_MNT_OPTS: - sm->mnt_opts = start; + offp = &sm->mnt_opts; ret = statmount_mnt_opts(s, seq); break; case STATMOUNT_OPT_ARRAY: - sm->opt_array = start; + offp = &sm->opt_array; ret = statmount_opt_array(s, seq); break; case STATMOUNT_OPT_SEC_ARRAY: - sm->opt_sec_array = start; + offp = &sm->opt_sec_array; ret = statmount_opt_sec_array(s, seq); break; case STATMOUNT_FS_SUBTYPE: - sm->fs_subtype = start; + offp = &sm->fs_subtype; statmount_fs_subtype(s, seq); break; case STATMOUNT_SB_SOURCE: - sm->sb_source = start; + offp = &sm->sb_source; ret = statmount_sb_source(s, seq); break; default: @@ -5251,6 +5257,7 @@ static int statmount_string(struct kstatmount *s, u64 flag) seq->buf[seq->count++] = '\0'; sm->mask |= flag; + *offp = start; return 0; } -- GitLab From d9b3a3c70df2c2b87c83ca3f6e8ab49bd092fdbd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 30 Jan 2025 14:56:21 +0100 Subject: [PATCH 0369/1585] gfs2: use lockref_init for gl_lockref Move the initialization of gl_lockref from gfs2_init_glock_once() to gfs2_glock_get(). This allows to use lockref_init() there. Reviewed-by: Christoph Hellwig Signed-off-by: Andreas Gruenbacher Link: https://lore.kernel.org/r/20250130135624.1899988-2-agruenba@redhat.com Signed-off-by: Christian Brauner --- fs/gfs2/glock.c | 2 +- fs/gfs2/main.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8c4c1f871a889..b29eb71e3e29e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1201,8 +1201,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (glops->go_instantiate) gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED); gl->gl_name = name; + lockref_init(&gl->gl_lockref, 1); lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); - gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 04cadc02e5a6e..0727f60ad0288 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -51,7 +51,6 @@ static void gfs2_init_glock_once(void *foo) { struct gfs2_glock *gl = foo; - spin_lock_init(&gl->gl_lockref.lock); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); -- GitLab From 34ad6fa2add2b38f2a89d28518de0142bff8fb43 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 30 Jan 2025 14:56:22 +0100 Subject: [PATCH 0370/1585] gfs2: switch to lockref_init(..., 1) In qd_alloc(), initialize the lockref count to 1 to cover the common case. Compensate for that in gfs2_quota_init() by adjusting the count back down to 0; this only occurs when mounting the filesystem rw. Reviewed-by: Christoph Hellwig Signed-off-by: Andreas Gruenbacher Link: https://lore.kernel.org/r/20250130135624.1899988-3-agruenba@redhat.com Signed-off-by: Christian Brauner --- fs/gfs2/quota.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 58bc5013ca49c..6ae529a5388bc 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -236,7 +236,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str return NULL; qd->qd_sbd = sdp; - lockref_init(&qd->qd_lockref, 0); + lockref_init(&qd->qd_lockref, 1); qd->qd_id = qid; qd->qd_slot = -1; INIT_LIST_HEAD(&qd->qd_lru); @@ -297,7 +297,6 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, spin_lock_bucket(hash); *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); if (qd == NULL) { - new_qd->qd_lockref.count++; *qdp = new_qd; list_add(&new_qd->qd_list, &sdp->sd_quota_list); hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]); @@ -1450,6 +1449,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) if (qd == NULL) goto fail_brelse; + qd->qd_lockref.count = 0; set_bit(QDF_CHANGE, &qd->qd_flags); qd->qd_change = qc_change; qd->qd_slot = slot; -- GitLab From bb504b4d64266fa0d7460c218c85afed371db03a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 30 Jan 2025 14:56:23 +0100 Subject: [PATCH 0371/1585] lockref: remove count argument of lockref_init All users of lockref_init() now initialize the count to 1, so hardcode that and remove the count argument. Reviewed-by: Christoph Hellwig Signed-off-by: Andreas Gruenbacher Link: https://lore.kernel.org/r/20250130135624.1899988-4-agruenba@redhat.com Signed-off-by: Christian Brauner --- fs/dcache.c | 2 +- fs/erofs/zdata.c | 2 +- fs/gfs2/glock.c | 2 +- fs/gfs2/quota.c | 2 +- include/linux/lockref.h | 7 ++++--- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 9cc0d47da321c..7dee242b4195a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1700,7 +1700,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) smp_store_release(&dentry->d_name.name, dname); /* ^^^ */ dentry->d_flags = 0; - lockref_init(&dentry->d_lockref, 1); + lockref_init(&dentry->d_lockref); seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock); dentry->d_inode = NULL; dentry->d_parent = dentry; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 29f8963bb5232..d771e06db7386 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -726,7 +726,7 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe) if (IS_ERR(pcl)) return PTR_ERR(pcl); - lockref_init(&pcl->lockref, 1); /* one ref for this request */ + lockref_init(&pcl->lockref); /* one ref for this request */ pcl->algorithmformat = map->m_algorithmformat; pcl->length = 0; pcl->partial = true; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b29eb71e3e29e..65c07aa957184 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1201,7 +1201,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (glops->go_instantiate) gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED); gl->gl_name = name; - lockref_init(&gl->gl_lockref, 1); + lockref_init(&gl->gl_lockref); lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 6ae529a5388bc..2298e06797ac3 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -236,7 +236,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str return NULL; qd->qd_sbd = sdp; - lockref_init(&qd->qd_lockref, 1); + lockref_init(&qd->qd_lockref); qd->qd_id = qid; qd->qd_slot = -1; INIT_LIST_HEAD(&qd->qd_lru); diff --git a/include/linux/lockref.h b/include/linux/lockref.h index c39f119659ba4..676721ee878d7 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -37,12 +37,13 @@ struct lockref { /** * lockref_init - Initialize a lockref * @lockref: pointer to lockref structure - * @count: initial count + * + * Initializes @lockref->count to 1. */ -static inline void lockref_init(struct lockref *lockref, unsigned int count) +static inline void lockref_init(struct lockref *lockref) { spin_lock_init(&lockref->lock); - lockref->count = count; + lockref->count = 1; } void lockref_get(struct lockref *lockref); -- GitLab From 95101401bb50ae2cf9deee1bbf4d2b28d0dfdc26 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 3 Feb 2025 23:32:03 +0100 Subject: [PATCH 0372/1585] fsnotify: use accessor to set FMODE_NONOTIFY_* The FMODE_NONOTIFY_* bits are a 2-bits mode. Open coding manipulation of those bits is risky. Use an accessor file_set_fsnotify_mode() to set the mode. Rename file_set_fsnotify_mode() => file_set_fsnotify_mode_from_watchers() to make way for the simple accessor name. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20250203223205.861346-2-amir73il@gmail.com Signed-off-by: Christian Brauner --- drivers/tty/pty.c | 2 +- fs/notify/fsnotify.c | 18 ++++++++++++------ fs/open.c | 7 ++++--- include/linux/fs.h | 7 ++++++- include/linux/fsnotify.h | 4 ++-- 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index df08f13052ff4..8bb1a01fef2a1 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -798,7 +798,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) nonseekable_open(inode, filp); /* We refuse fsnotify events on ptmx, since it's a shared resource */ - filp->f_mode |= FMODE_NONOTIFY; + file_set_fsnotify_mode(filp, FMODE_NONOTIFY); retval = tty_alloc_file(filp); if (retval) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 8ee495a58d0ad..fae1b6d397ea0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -648,7 +648,7 @@ EXPORT_SYMBOL_GPL(fsnotify); * Later, fsnotify permission hooks do not check if there are permission event * watches, but that there were permission event watches at open time. */ -void file_set_fsnotify_mode(struct file *file) +void file_set_fsnotify_mode_from_watchers(struct file *file) { struct dentry *dentry = file->f_path.dentry, *parent; struct super_block *sb = dentry->d_sb; @@ -665,7 +665,7 @@ void file_set_fsnotify_mode(struct file *file) */ if (likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT))) { - file->f_mode |= FMODE_NONOTIFY_PERM; + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return; } @@ -676,7 +676,7 @@ void file_set_fsnotify_mode(struct file *file) if ((!d_is_dir(dentry) && !d_is_reg(dentry)) || likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_PRE_CONTENT))) { - file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; + file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); return; } @@ -686,19 +686,25 @@ void file_set_fsnotify_mode(struct file *file) */ mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask, - FSNOTIFY_PRE_CONTENT_EVENTS))) + FSNOTIFY_PRE_CONTENT_EVENTS))) { + /* Enable pre-content events */ + file_set_fsnotify_mode(file, 0); return; + } /* Is parent watching for pre-content events on this file? */ if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { parent = dget_parent(dentry); p_mask = fsnotify_inode_watches_children(d_inode(parent)); dput(parent); - if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) + if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) { + /* Enable pre-content events */ + file_set_fsnotify_mode(file, 0); return; + } } /* Nobody watching for pre-content events from this file */ - file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; + file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); } #endif diff --git a/fs/open.c b/fs/open.c index 932e5a6de63bb..3fcbfff8aede8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -905,7 +905,8 @@ static int do_dentry_open(struct file *f, f->f_sb_err = file_sample_sb_err(f); if (unlikely(f->f_flags & O_PATH)) { - f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY; + f->f_mode = FMODE_PATH | FMODE_OPENED; + file_set_fsnotify_mode(f, FMODE_NONOTIFY); f->f_op = &empty_fops; return 0; } @@ -938,7 +939,7 @@ static int do_dentry_open(struct file *f, * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't * change anything. */ - file_set_fsnotify_mode(f); + file_set_fsnotify_mode_from_watchers(f); error = fsnotify_open_perm(f); if (error) goto cleanup_all; @@ -1122,7 +1123,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags, if (!IS_ERR(f)) { int error; - f->f_mode |= FMODE_NONOTIFY; + file_set_fsnotify_mode(f, FMODE_NONOTIFY); error = vfs_open(path, f); if (error) { fput(f); diff --git a/include/linux/fs.h b/include/linux/fs.h index be3ad155ec9f7..7620547432a84 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -222,7 +222,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_FSNOTIFY_HSM(mode) 0 #endif - /* * Attribute flags. These should be or-ed together to figure out what * has been changed! @@ -3140,6 +3139,12 @@ static inline void exe_file_allow_write_access(struct file *exe_file) allow_write_access(exe_file); } +static inline void file_set_fsnotify_mode(struct file *file, fmode_t mode) +{ + file->f_mode &= ~FMODE_FSNOTIFY_MASK; + file->f_mode |= mode; +} + static inline bool inode_is_open_for_write(const struct inode *inode) { return atomic_read(&inode->i_writecount) > 0; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 1a9ef8f6784dd..6a33288bd6a1f 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -129,7 +129,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS -void file_set_fsnotify_mode(struct file *file); +void file_set_fsnotify_mode_from_watchers(struct file *file); /* * fsnotify_file_area_perm - permission hook before access to file range @@ -213,7 +213,7 @@ static inline int fsnotify_open_perm(struct file *file) } #else -static inline void file_set_fsnotify_mode(struct file *file) +static inline void file_set_fsnotify_mode_from_watchers(struct file *file) { } -- GitLab From 5eb987105357cb7cfa7cf3b1e2f66d5c0977e412 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 29 Jan 2025 16:12:53 +0100 Subject: [PATCH 0373/1585] fs: fix adding security options to statmount.mnt_opt Prepending security options was made conditional on sb->s_op->show_options, but security options are independent of sb options. Fixes: 056d33137bf9 ("fs: prepend statmount.mnt_opts string with security_sb_mnt_opts()") Fixes: f9af549d1fd3 ("fs: export mount options via statmount()") Cc: stable@vger.kernel.org # v6.11 Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20250129151253.33241-1-mszeredi@redhat.com Signed-off-by: Christian Brauner --- fs/namespace.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 9c4d307a82cdf..8f1000f9f3df1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5087,30 +5087,29 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq) { struct vfsmount *mnt = s->mnt; struct super_block *sb = mnt->mnt_sb; + size_t start = seq->count; int err; - if (sb->s_op->show_options) { - size_t start = seq->count; - - err = security_sb_show_options(seq, sb); - if (err) - return err; + err = security_sb_show_options(seq, sb); + if (err) + return err; + if (sb->s_op->show_options) { err = sb->s_op->show_options(seq, mnt->mnt_root); if (err) return err; + } - if (unlikely(seq_has_overflowed(seq))) - return -EAGAIN; + if (unlikely(seq_has_overflowed(seq))) + return -EAGAIN; - if (seq->count == start) - return 0; + if (seq->count == start) + return 0; - /* skip leading comma */ - memmove(seq->buf + start, seq->buf + start + 1, - seq->count - start - 1); - seq->count--; - } + /* skip leading comma */ + memmove(seq->buf + start, seq->buf + start + 1, + seq->count - start - 1); + seq->count--; return 0; } -- GitLab From 2a42754b3104d78a2bc7a2ad8844427411c76ca6 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 3 Feb 2025 23:32:04 +0100 Subject: [PATCH 0374/1585] fsnotify: disable notification by default for all pseudo files Most pseudo files are not applicable for fsnotify events at all, let alone to the new pre-content events. Disable notifications to all files allocated with alloc_file_pseudo() and enable legacy inotify events for the specific cases of pipe and socket, which have known users of inotify events. Pre-content events are also kept disabled for sockets and pipes. Fixes: 20bf82a898b6 ("mm: don't allow huge faults for files with pre content watches") Reported-by: Alex Williamson Closes: https://lore.kernel.org/linux-fsdevel/20250131121703.1e4d00a7.alex.williamson@redhat.com/ Suggested-by: Linus Torvalds Link: https://lore.kernel.org/linux-fsdevel/CAHk-=wi2pThSVY=zhO=ZKxViBj5QCRX-=AS2+rVknQgJnHXDFg@mail.gmail.com/ Tested-by: Alex Williamson Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20250203223205.861346-3-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/file_table.c | 11 +++++++++++ fs/open.c | 4 ++-- fs/pipe.c | 6 ++++++ net/socket.c | 5 +++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index f0291a66f9db4..35b93da6c5cb1 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -375,7 +375,13 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, if (IS_ERR(file)) { ihold(inode); path_put(&path); + return file; } + /* + * Disable all fsnotify events for pseudo files by default. + * They may be enabled by caller with file_set_fsnotify_mode(). + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY); return file; } EXPORT_SYMBOL(alloc_file_pseudo); @@ -400,6 +406,11 @@ struct file *alloc_file_pseudo_noaccount(struct inode *inode, return file; } file_init_path(file, &path, fops); + /* + * Disable all fsnotify events for pseudo files by default. + * They may be enabled by caller with file_set_fsnotify_mode(). + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY); return file; } EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount); diff --git a/fs/open.c b/fs/open.c index 3fcbfff8aede8..1be20de9f283a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -936,8 +936,8 @@ static int do_dentry_open(struct file *f, /* * Set FMODE_NONOTIFY_* bits according to existing permission watches. - * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't - * change anything. + * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a + * pseudo file, this call will not change the mode. */ file_set_fsnotify_mode_from_watchers(f); error = fsnotify_open_perm(f); diff --git a/fs/pipe.c b/fs/pipe.c index 94b59045ab44b..ce1af7592780d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -960,6 +960,12 @@ int create_pipe_files(struct file **res, int flags) res[1] = f; stream_open(inode, res[0]); stream_open(inode, res[1]); + /* + * Disable permission and pre-content events, but enable legacy + * inotify events for legacy users. + */ + file_set_fsnotify_mode(res[0], FMODE_NONOTIFY_PERM); + file_set_fsnotify_mode(res[1], FMODE_NONOTIFY_PERM); return 0; } diff --git a/net/socket.c b/net/socket.c index 262a28b59c7f0..28bae5a942341 100644 --- a/net/socket.c +++ b/net/socket.c @@ -479,6 +479,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) sock->file = file; file->private_data = sock; stream_open(SOCK_INODE(sock), file); + /* + * Disable permission and pre-content events, but enable legacy + * inotify events for legacy users. + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return file; } EXPORT_SYMBOL(sock_alloc_file); -- GitLab From 2cc02059fbc79306b53a44b1f1a4444aa3c76598 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 29 Jan 2025 17:06:41 +0100 Subject: [PATCH 0375/1585] selftests: always check mask returned by statmount(2) STATMOUNT_MNT_OPTS can actually be missing if there are no options. This is a change of behavior since 75ead69a7173 ("fs: don't let statmount return empty strings"). The other checks shouldn't actually trigger, but add them for correctness and for easier debugging if the test fails. Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20250129160641.35485-1-mszeredi@redhat.com Signed-off-by: Christian Brauner --- .../filesystems/statmount/statmount_test.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index 8eb6aa606a0d5..46d289611ce86 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -383,6 +383,10 @@ static void test_statmount_mnt_point(void) return; } + if (!(sm->mask & STATMOUNT_MNT_POINT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n"); + return; + } if (strcmp(sm->str + sm->mnt_point, "/") != 0) { ksft_test_result_fail("unexpected mount point: '%s' != '/'\n", sm->str + sm->mnt_point); @@ -408,6 +412,10 @@ static void test_statmount_mnt_root(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n"); + return; + } mnt_root = sm->str + sm->mnt_root; last_root = strrchr(mnt_root, '/'); if (last_root) @@ -437,6 +445,10 @@ static void test_statmount_fs_type(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_FS_TYPE)) { + ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n"); + return; + } fs_type = sm->str + sm->fs_type; for (s = known_fs; s != NULL; s++) { if (strcmp(fs_type, *s) == 0) @@ -464,6 +476,11 @@ static void test_statmount_mnt_opts(void) return; } + if (!(sm->mask & STATMOUNT_MNT_BASIC)) { + ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n"); + return; + } + while (getline(&line, &len, f_mountinfo) != -1) { int i; char *p, *p2; @@ -514,7 +531,10 @@ static void test_statmount_mnt_opts(void) if (p2) *p2 = '\0'; - statmount_opts = sm->str + sm->mnt_opts; + if (sm->mask & STATMOUNT_MNT_OPTS) + statmount_opts = sm->str + sm->mnt_opts; + else + statmount_opts = ""; if (strcmp(statmount_opts, p) != 0) ksft_test_result_fail( "unexpected mount options: '%s' != '%s'\n", -- GitLab From 711f9b8fbe4f4936302804e246e206f0829f628f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 3 Feb 2025 23:32:05 +0100 Subject: [PATCH 0376/1585] fsnotify: disable pre-content and permission events by default After introducing pre-content events, we had a regression related to disabling huge faults on files that should never have pre-content events enabled. This happened because the default f_mode of allocated files (0) does not disable pre-content events. Pre-content events are disabled in file_set_fsnotify_mode_by_watchers() but internal files may not get to call this helper. Initialize f_mode to disable permission and pre-content events for all files and if needed they will be enabled for the callers of file_set_fsnotify_mode_by_watchers(). Fixes: 20bf82a898b6 ("mm: don't allow huge faults for files with pre content watches") Reported-by: Alex Williamson Closes: https://lore.kernel.org/linux-fsdevel/20250131121703.1e4d00a7.alex.williamson@redhat.com/ Tested-by: Alex Williamson Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20250203223205.861346-4-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/file_table.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/file_table.c b/fs/file_table.c index 35b93da6c5cb1..5c00dc38558da 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -194,6 +194,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred) * refcount bumps we should reinitialize the reused file first. */ file_ref_init(&f->f_ref, 1); + /* + * Disable permission and pre-content events for all files by default. + * They may be enabled later by file_set_fsnotify_mode_from_watchers(). + */ + file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM); return 0; } -- GitLab From 091ee63e36e8289f9067f659a48d497911e49d6f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 4 Feb 2025 14:51:20 +0100 Subject: [PATCH 0377/1585] pidfs: improve ioctl handling Pidfs supports extensible and non-extensible ioctls. The extensible ioctls need to check for the ioctl number itself not just the ioctl command otherwise both backward- and forward compatibility are broken. The pidfs ioctl handler also needs to look at the type of the ioctl command to guard against cases where "[...] a daemon receives some random file descriptor from a (potentially less privileged) client and expects the FD to be of some specific type, it might call ioctl() on this FD with some type-specific command and expect the call to fail if the FD is of the wrong type; but due to the missing type check, the kernel instead performs some action that userspace didn't expect." (cf. [1]] Link: https://lore.kernel.org/r/20250204-work-pidfs-ioctl-v1-1-04987d239575@kernel.org Link: https://lore.kernel.org/r/CAG48ez2K9A5GwtgqO31u9ZL292we8ZwAA=TJwwEv7wRuJ3j4Lw@mail.gmail.com [1] Fixes: 8ce352818820 ("pidfs: check for valid ioctl commands") Acked-by: Luca Boccassi Reported-by: Jann Horn Cc: stable@vger.kernel.org # v6.13; please backport with 8ce352818820 ("pidfs: check for valid ioctl commands") Signed-off-by: Christian Brauner --- fs/pidfs.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 049352f973de3..63f9699ebac36 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -287,7 +287,6 @@ static bool pidfs_ioctl_valid(unsigned int cmd) switch (cmd) { case FS_IOC_GETVERSION: case PIDFD_GET_CGROUP_NAMESPACE: - case PIDFD_GET_INFO: case PIDFD_GET_IPC_NAMESPACE: case PIDFD_GET_MNT_NAMESPACE: case PIDFD_GET_NET_NAMESPACE: @@ -300,6 +299,17 @@ static bool pidfs_ioctl_valid(unsigned int cmd) return true; } + /* Extensible ioctls require some more careful checks. */ + switch (_IOC_NR(cmd)) { + case _IOC_NR(PIDFD_GET_INFO): + /* + * Try to prevent performing a pidfd ioctl when someone + * erronously mistook the file descriptor for a pidfd. + * This is not perfect but will catch most cases. + */ + return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO)); + } + return false; } -- GitLab From 37d11cfc63604b3886308e2111d845d148ced8bc Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 4 Feb 2025 22:32:07 +0100 Subject: [PATCH 0378/1585] vfs: sanity check the length passed to inode_set_cached_link() This costs a strlen() call when instatianating a symlink. Preferably it would be hidden behind VFS_WARN_ON (or compatible), but there is no such facility at the moment. With the facility in place the call can be patched out in production kernels. In the meantime, since the cost is being paid unconditionally, use the result to a fixup the bad caller. This is not expected to persist in the long run (tm). Sample splat: bad length passed for symlink [/tmp/syz-imagegen43743633/file0/file0] (got 131109, expected 37) [rest of WARN blurp goes here] Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20250204213207.337980-1-mjguzik@gmail.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index 7620547432a84..2c3b2f8a621f7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -790,6 +790,19 @@ struct inode { static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) { + int testlen; + + /* + * TODO: patch it into a debug-only check if relevant macros show up. + * In the meantime, since we are suffering strlen even on production kernels + * to find the right length, do a fixup if the wrong value got passed. + */ + testlen = strlen(link); + if (testlen != linklen) { + WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)", + link, linklen, testlen); + linklen = testlen; + } inode->i_link = link; inode->i_linklen = linklen; inode->i_opflags |= IOP_CACHED_LINK; -- GitLab From ca0f4fe7cf7183bfbdc67ca2de56ae1fc3a8db2b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 6 Feb 2025 10:21:38 -0700 Subject: [PATCH 0379/1585] arm64: Handle .ARM.attributes section in linker scripts A recent LLVM commit [1] started generating an .ARM.attributes section similar to the one that exists for 32-bit, which results in orphan section warnings (or errors if CONFIG_WERROR is enabled) from the linker because it is not handled in the arm64 linker scripts. ld.lld: error: arch/arm64/kernel/vdso/vgettimeofday.o:(.ARM.attributes) is being placed in '.ARM.attributes' ld.lld: error: arch/arm64/kernel/vdso/vgetrandom.o:(.ARM.attributes) is being placed in '.ARM.attributes' ld.lld: error: vmlinux.a(lib/vsprintf.o):(.ARM.attributes) is being placed in '.ARM.attributes' ld.lld: error: vmlinux.a(lib/win_minmax.o):(.ARM.attributes) is being placed in '.ARM.attributes' ld.lld: error: vmlinux.a(lib/xarray.o):(.ARM.attributes) is being placed in '.ARM.attributes' Discard the new sections in the necessary linker scripts to resolve the warnings, as the kernel and vDSO do not need to retain it, similar to the .note.gnu.property section. Cc: stable@vger.kernel.org Fixes: b3e5d80d0c48 ("arm64/build: Warn on orphan section placement") Link: https://github.com/llvm/llvm-project/commit/ee99c4d4845db66c4daa2373352133f4b237c942 [1] Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250206-arm64-handle-arm-attributes-in-linker-script-v3-1-d53d169913eb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/vdso.lds.S | 1 + arch/arm64/kernel/vmlinux.lds.S | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 4ec32e86a8da2..47ad6944f9f08 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -41,6 +41,7 @@ SECTIONS */ /DISCARD/ : { *(.note.GNU-stack .note.gnu.property) + *(.ARM.attributes) } .note : { *(.note.*) } :text :note diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f84c71f04d9ea..e73326bd3ff7e 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -162,6 +162,7 @@ SECTIONS /DISCARD/ : { *(.interp .dynamic) *(.dynsym .dynstr .hash .gnu.hash) + *(.ARM.attributes) } . = KIMAGE_VADDR; -- GitLab From 875d742cf5327c93cba1f11e12b08d3cce7a88d2 Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Thu, 6 Feb 2025 12:44:20 -0500 Subject: [PATCH 0380/1585] arm64: cacheinfo: Avoid out-of-bounds write to cacheinfo array The loop that detects/populates cache information already has a bounds check on the array size but does not account for cache levels with separate data/instructions cache. Fix this by incrementing the index for any populated leaf (instead of any populated level). Fixes: 5d425c186537 ("arm64: kernel: add support for cpu cache information") Signed-off-by: Radu Rendec Link: https://lore.kernel.org/r/20250206174420.2178724-1-rrendec@redhat.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cacheinfo.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index d9c9218fa1fdd..309942b06c5bc 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -101,16 +101,18 @@ int populate_cache_leaves(unsigned int cpu) unsigned int level, idx; enum cache_type type; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct cacheinfo *infos = this_cpu_ci->info_list; for (idx = 0, level = 1; level <= this_cpu_ci->num_levels && - idx < this_cpu_ci->num_leaves; idx++, level++) { + idx < this_cpu_ci->num_leaves; level++) { type = get_cache_type(level); if (type == CACHE_TYPE_SEPARATE) { - ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); - ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (idx + 1 >= this_cpu_ci->num_leaves) + break; + ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level); + ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level); } else { - ci_leaf_init(this_leaf++, type, level); + ci_leaf_init(&infos[idx++], type, level); } } return 0; -- GitLab From 2813e00dcd748cef47d2bffaa04071de93fddf00 Mon Sep 17 00:00:00 2001 From: Ievgen Vovk Date: Sun, 12 Jan 2025 13:13:14 +0900 Subject: [PATCH 0381/1585] HID: hid-apple: Apple Magic Keyboard a3203 USB-C support Add Apple Magic Keyboard 2024 model (with USB-C port) device ID (0320) to those recognized by the hid-apple driver. Keyboard is otherwise compatible with the existing implementation for its earlier 2021 model. Signed-off-by: Ievgen Vovk Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 5 +++++ drivers/hid/hid-ids.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 7e1ae2a2bcc24..3c3f67d0bfcfe 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -474,6 +474,7 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2015) table = magic_keyboard_2015_fn_keys; else if (hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 || + hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024 || hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 || hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021) table = apple2021_fn_keys; @@ -1150,6 +1151,10 @@ static const struct hid_device_id apple_devices[] = { .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024), + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024), + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index ed1d7f9e8caf4..7e400624908e3 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -184,6 +184,7 @@ #define USB_DEVICE_ID_APPLE_IRCONTROL4 0x8242 #define USB_DEVICE_ID_APPLE_IRCONTROL5 0x8243 #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 0x029c +#define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024 0x0320 #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 0x029a #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021 0x029f #define USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT 0x8102 -- GitLab From 819083cb6eedcc8495cbf84845877bcc741b93b3 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Thu, 16 Jan 2025 23:12:17 -0700 Subject: [PATCH 0382/1585] HID: apple: fix up the F6 key on the Omoton KB066 keyboard The Omoton KB066 is an Apple A1255 keyboard clone (HID product code 05ac:022c). On both keyboards, the F6 key becomes Num Lock when the Fn key is held. But unlike its Apple exemplar, when the Omoton's F6 key is pressed without Fn, it sends the usage code 0xC0301 from the reserved section of the consumer page instead of the standard F6 usage code 0x7003F from the keyboard page. The nonstandard code is translated to KEY_UNKNOWN and becomes useless on Linux. The Omoton KB066 is a pretty popular keyboard, judging from its 29,058 reviews on Amazon at time of writing, so let's account for its quirk to make it more usable. By the way, it would be nice if we could automatically set fnmode to 0 for Omoton keyboards because they handle the Fn key internally and the kernel's Fn key handling creates undesirable side effects such as making F1 and F2 always Brightness Up and Brightness Down in fnmode=1 (the default) or always F1 and F2 in fnmode=2. Unfortunately I don't think there's a way to identify Bluetooth keyboards more specifically than the HID product code which is obviously inaccurate. Users of Omoton keyboards will just have to set fnmode to 0 manually to get full Fn key functionality. Signed-off-by: Alex Henrie Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 3c3f67d0bfcfe..49812a76b7edd 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -546,6 +546,9 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, } } + if (usage->hid == 0xc0301) /* Omoton KB066 quirk */ + code = KEY_F6; + if (usage->code != code) { input_event_with_scancode(input, usage->type, code, usage->hid, value); -- GitLab From 0b43d98ff29be3144e86294486b1373b5df74c0e Mon Sep 17 00:00:00 2001 From: Tulio Fernandes Date: Wed, 5 Feb 2025 18:50:34 -0300 Subject: [PATCH 0383/1585] HID: hid-thrustmaster: fix stack-out-of-bounds read in usb_check_int_endpoints() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Syzbot[1] has detected a stack-out-of-bounds read of the ep_addr array from hid-thrustmaster driver. This array is passed to usb_check_int_endpoints function from usb.c core driver, which executes a for loop that iterates over the elements of the passed array. Not finding a null element at the end of the array, it tries to read the next, non-existent element, crashing the kernel. To fix this, a 0 element was added at the end of the array to break the for loop. [1] https://syzkaller.appspot.com/bug?extid=9c9179ac46169c56c1ad Reported-by: syzbot+9c9179ac46169c56c1ad@syzkaller.appspotmail.com Fixes: 50420d7c79c3 ("HID: hid-thrustmaster: Fix warning in thrustmaster_probe by adding endpoint check") Signed-off-by: Túlio Fernandes Signed-off-by: Jiri Kosina --- drivers/hid/hid-thrustmaster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index 6c3e758bbb09e..3b81468a1df29 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -171,7 +171,7 @@ static void thrustmaster_interrupts(struct hid_device *hdev) b_ep = ep->desc.bEndpointAddress; /* Are the expected endpoints present? */ - u8 ep_addr[1] = {b_ep}; + u8 ep_addr[2] = {b_ep, 0}; if (!usb_check_int_endpoints(usbif, ep_addr)) { hid_err(hdev, "Unexpected non-int endpoint\n"); -- GitLab From 79504249d7e27cad4a3eeb9afc6386e418728ce0 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Tue, 4 Feb 2025 19:55:27 -0800 Subject: [PATCH 0384/1585] HID: hid-steam: Move hidraw input (un)registering to work Due to an interplay between locking in the input and hid transport subsystems, attempting to register or deregister the relevant input devices during the hidraw open/close events can lead to a lock ordering issue. Though this shouldn't cause a deadlock, this commit moves the input device manipulation to deferred work to sidestep the issue. Fixes: 385a4886778f6 ("HID: steam: remove input device when a hid client is running.") Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index b008fd0834b94..5a17714fedea0 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -313,6 +313,7 @@ struct steam_device { u16 rumble_left; u16 rumble_right; unsigned int sensor_timestamp_us; + struct work_struct unregister_work; }; static int steam_recv_report(struct steam_device *steam, @@ -1072,6 +1073,31 @@ static void steam_mode_switch_cb(struct work_struct *work) } } +static void steam_work_unregister_cb(struct work_struct *work) +{ + struct steam_device *steam = container_of(work, struct steam_device, + unregister_work); + unsigned long flags; + bool connected; + bool opened; + + spin_lock_irqsave(&steam->lock, flags); + opened = steam->client_opened; + connected = steam->connected; + spin_unlock_irqrestore(&steam->lock, flags); + + if (connected) { + if (opened) { + steam_sensors_unregister(steam); + steam_input_unregister(steam); + } else { + steam_set_lizard_mode(steam, lizard_mode); + steam_input_register(steam); + steam_sensors_register(steam); + } + } +} + static bool steam_is_valve_interface(struct hid_device *hdev) { struct hid_report_enum *rep_enum; @@ -1117,8 +1143,7 @@ static int steam_client_ll_open(struct hid_device *hdev) steam->client_opened++; spin_unlock_irqrestore(&steam->lock, flags); - steam_sensors_unregister(steam); - steam_input_unregister(steam); + schedule_work(&steam->unregister_work); return 0; } @@ -1135,11 +1160,7 @@ static void steam_client_ll_close(struct hid_device *hdev) connected = steam->connected && !steam->client_opened; spin_unlock_irqrestore(&steam->lock, flags); - if (connected) { - steam_set_lizard_mode(steam, lizard_mode); - steam_input_register(steam); - steam_sensors_register(steam); - } + schedule_work(&steam->unregister_work); } static int steam_client_ll_raw_request(struct hid_device *hdev, @@ -1231,6 +1252,7 @@ static int steam_probe(struct hid_device *hdev, INIT_LIST_HEAD(&steam->list); INIT_WORK(&steam->rumble_work, steam_haptic_rumble_cb); steam->sensor_timestamp_us = 0; + INIT_WORK(&steam->unregister_work, steam_work_unregister_cb); /* * With the real steam controller interface, do not connect hidraw. @@ -1291,6 +1313,7 @@ static int steam_probe(struct hid_device *hdev, cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); + cancel_work_sync(&steam->unregister_work); return ret; } @@ -1307,6 +1330,7 @@ static void steam_remove(struct hid_device *hdev) cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->work_connect); cancel_work_sync(&steam->rumble_work); + cancel_work_sync(&steam->unregister_work); hid_destroy_device(steam->client_hdev); steam->client_hdev = NULL; steam->client_opened = 0; -- GitLab From b051ffa2aeb2a60e092387b6fb2af1ad42f51a3c Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Tue, 4 Feb 2025 19:55:29 -0800 Subject: [PATCH 0385/1585] HID: hid-steam: Don't use cancel_delayed_work_sync in IRQ context Lockdep reported that, as steam_do_deck_input_event is called from steam_raw_event inside of an IRQ context, it can lead to issues if that IRQ occurs while the work to be cancelled is running. By using cancel_delayed_work, this issue can be avoided. The exact ordering of the work and the event processing is not super important, so this is safe. Fixes: cd438e57dd05 ("HID: hid-steam: Add gamepad-only mode switched to by holding options") Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 5a17714fedea0..c9e65e9088b31 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1617,7 +1617,7 @@ static void steam_do_deck_input_event(struct steam_device *steam, if (!(b9 & BIT(6)) && steam->did_mode_switch) { steam->did_mode_switch = false; - cancel_delayed_work_sync(&steam->mode_switch); + cancel_delayed_work(&steam->mode_switch); } else if (!steam->client_opened && (b9 & BIT(6)) && !steam->did_mode_switch) { steam->did_mode_switch = true; schedule_delayed_work(&steam->mode_switch, 45 * HZ / 100); -- GitLab From 02458fbfaa0170aabf8506f7d4ed054f02414251 Mon Sep 17 00:00:00 2001 From: Rupinderjit Singh Date: Thu, 6 Feb 2025 15:58:03 +0000 Subject: [PATCH 0386/1585] gpu: host1x: Fix a use of uninitialized mutex commit c8347f915e67 ("gpu: host1x: Fix boot regression for Tegra") caused a use of uninitialized mutex leading to below warning when CONFIG_DEBUG_MUTEXES and CONFIG_DEBUG_LOCK_ALLOC are enabled. [ 41.662843] ------------[ cut here ]------------ [ 41.663012] DEBUG_LOCKS_WARN_ON(lock->magic != lock) [ 41.663035] WARNING: CPU: 4 PID: 794 at kernel/locking/mutex.c:587 __mutex_lock+0x670/0x878 [ 41.663458] Modules linked in: rtw88_8822c(+) bluetooth(+) rtw88_pci rtw88_core mac80211 aquantia libarc4 crc_itu_t cfg80211 tegra194_cpufreq dwmac_tegra(+) arm_dsu_pmu stmmac_platform stmmac pcs_xpcs rfkill at24 host1x(+) tegra_bpmp_thermal ramoops reed_solomon fuse loop nfnetlink xfs mmc_block rpmb_core ucsi_ccg ina3221 crct10dif_ce xhci_tegra ghash_ce lm90 sha2_ce sha256_arm64 sha1_ce sdhci_tegra pwm_fan sdhci_pltfm sdhci gpio_keys rtc_tegra cqhci mmc_core phy_tegra_xusb i2c_tegra tegra186_gpc_dma i2c_tegra_bpmp spi_tegra114 dm_mirror dm_region_hash dm_log dm_mod [ 41.665078] CPU: 4 UID: 0 PID: 794 Comm: (udev-worker) Not tainted 6.11.0-29.31_1538613708.el10.aarch64+debug #1 [ 41.665838] Hardware name: NVIDIA NVIDIA Jetson AGX Orin Developer Kit/Jetson, BIOS 36.3.0-gcid-35594366 02/26/2024 [ 41.672555] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 41.679636] pc : __mutex_lock+0x670/0x878 [ 41.683834] lr : __mutex_lock+0x670/0x878 [ 41.688035] sp : ffff800084b77090 [ 41.691446] x29: ffff800084b77160 x28: ffffdd4bebf7b000 x27: ffffdd4be96b1000 [ 41.698799] x26: 1fffe0002308361c x25: 1ffff0001096ee18 x24: 0000000000000000 [ 41.706149] x23: 0000000000000000 x22: 0000000000000002 x21: ffffdd4be6e3c7a0 [ 41.713500] x20: ffff800084b770f0 x19: ffff00011841b1e8 x18: 0000000000000000 [ 41.720675] x17: 0000000000000000 x16: 0000000000000000 x15: 0720072007200720 [ 41.728023] x14: 0000000000000000 x13: 0000000000000001 x12: ffff6001a96eaab3 [ 41.735375] x11: 1fffe001a96eaab2 x10: ffff6001a96eaab2 x9 : ffffdd4be4838bbc [ 41.742723] x8 : 00009ffe5691554e x7 : ffff000d4b755593 x6 : 0000000000000001 [ 41.749985] x5 : ffff000d4b755590 x4 : 1fffe0001d88f001 x3 : dfff800000000000 [ 41.756988] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000ec478000 [ 41.764251] Call trace: [ 41.766695] __mutex_lock+0x670/0x878 [ 41.770373] mutex_lock_nested+0x2c/0x40 [ 41.774134] host1x_intr_start+0x54/0xf8 [host1x] [ 41.778863] host1x_runtime_resume+0x150/0x228 [host1x] [ 41.783935] pm_generic_runtime_resume+0x84/0xc8 [ 41.788485] __rpm_callback+0xa0/0x478 [ 41.792422] rpm_callback+0x15c/0x1a8 [ 41.795922] rpm_resume+0x698/0xc08 [ 41.799597] __pm_runtime_resume+0xa8/0x140 [ 41.803621] host1x_probe+0x810/0xbc0 [host1x] [ 41.807909] platform_probe+0xcc/0x1a8 [ 41.811845] really_probe+0x188/0x800 [ 41.815347] __driver_probe_device+0x164/0x360 [ 41.819810] driver_probe_device+0x64/0x1a8 [ 41.823834] __driver_attach+0x180/0x490 [ 41.827773] bus_for_each_dev+0x104/0x1a0 [ 41.831797] driver_attach+0x44/0x68 [ 41.835296] bus_add_driver+0x23c/0x4e8 [ 41.839235] driver_register+0x15c/0x3a8 [ 41.843170] __platform_register_drivers+0xa4/0x208 [ 41.848159] tegra_host1x_init+0x4c/0xff8 [host1x] [ 41.853147] do_one_initcall+0xd4/0x380 [ 41.856997] do_init_module+0x1dc/0x698 [ 41.860758] load_module+0xc70/0x1300 [ 41.864435] __do_sys_init_module+0x1a8/0x1d0 [ 41.868721] __arm64_sys_init_module+0x74/0xb0 [ 41.873183] invoke_syscall.constprop.0+0xdc/0x1e8 [ 41.877997] do_el0_svc+0x154/0x1d0 [ 41.881671] el0_svc+0x54/0x140 [ 41.884820] el0t_64_sync_handler+0x120/0x130 [ 41.889285] el0t_64_sync+0x1a4/0x1a8 [ 41.892960] irq event stamp: 69737 [ 41.896370] hardirqs last enabled at (69737): [] _raw_spin_unlock_irqrestore+0x44/0xe8 [ 41.905739] hardirqs last disabled at (69736): [] clk_enable_lock+0x98/0x198 [ 41.914314] softirqs last enabled at (68082): [] handle_softirqs+0x4c8/0x890 [ 41.922977] softirqs last disabled at (67945): [] __do_softirq+0x1c/0x28 [ 41.931289] ---[ end trace 0000000000000000 ]--- Inside the probe function when pm_runtime_enable() is called, the PM core invokes a resume callback if the device Host1x is in a suspended state. As it can be seen in the logs above, this leads to host1x_intr_start() function call which is trying to acquire a mutex lock. But, the function host_intr_init() only gets called after the pm_runtime_enable() where mutex is initialised leading to the use of mutex prior to its initialisation. Fix this by moving the mutex initialisation prior to the runtime PM enablement function pm_runtime_enable() in probe. Fixes: c8347f915e67 ("gpu: host1x: Fix boot regression for Tegra") Signed-off-by: Rupinderjit Singh Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding Link: https://patchwork.ozlabs.org/project/linux-tegra/patch/20250206155803.201942-1-rusingh@redhat.com/ --- drivers/gpu/host1x/dev.c | 2 ++ drivers/gpu/host1x/intr.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 7b1d091f3c090..46cae925b0959 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -619,6 +619,8 @@ static int host1x_probe(struct platform_device *pdev) goto free_contexts; } + mutex_init(&host->intr_mutex); + pm_runtime_enable(&pdev->dev); err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index b3285dd101804..f77a678949e96 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -104,8 +104,6 @@ int host1x_intr_init(struct host1x *host) unsigned int id; int i, err; - mutex_init(&host->intr_mutex); - for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) { struct host1x_syncpt *syncpt = &host->syncpt[id]; -- GitLab From 3b32b7f638fe61e9d29290960172f4e360e38233 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Sun, 19 Jan 2025 10:58:29 +0800 Subject: [PATCH 0387/1585] drm/panthor: avoid garbage value in panthor_ioctl_dev_query() 'priorities_info' is uninitialized, and the uninitialized value is copied to user object when calling PANTHOR_UOBJ_SET(). Using memset to initialize 'priorities_info' to avoid this garbage value problem. Fixes: f70000ef2352 ("drm/panthor: Add DEV_QUERY_GROUP_PRIORITIES_INFO dev query") Signed-off-by: Su Hui Reviewed-by: Dan Carpenter Reviewed-by: Boris Brezillon Reviewed-by: Steven Price Signed-off-by: Boris Brezillon Link: https://patchwork.freedesktop.org/patch/msgid/20250119025828.1168419-1-suhui@nfschina.com --- drivers/gpu/drm/panthor/panthor_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index d5dcd3d1b33a0..08136e790ca0a 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -802,6 +802,7 @@ static void panthor_query_group_priorities_info(struct drm_file *file, { int prio; + memset(arg, 0, sizeof(*arg)); for (prio = PANTHOR_GROUP_PRIORITY_REALTIME; prio >= 0; prio--) { if (!group_priority_permit(file, prio)) arg->allowed_mask |= BIT(prio); -- GitLab From 511121a48bbd12df4ae50a099a8936e833df8c46 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 5 Feb 2025 19:42:01 +0100 Subject: [PATCH 0388/1585] MAINTAINERS: Move Pavel to kernel.org address I need to filter my emails better, switch to pavel@kernel.org address to help with that. Signed-off-by: Pavel Machek Signed-off-by: Linus Torvalds --- CREDITS | 6 ++---- MAINTAINERS | 10 +++++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/CREDITS b/CREDITS index 1f9f0f078b4ae..53d11a46fd698 100644 --- a/CREDITS +++ b/CREDITS @@ -2515,11 +2515,9 @@ D: SLS distribution D: Initial implementation of VC's, pty's and select() N: Pavel Machek -E: pavel@ucw.cz +E: pavel@kernel.org P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585 B8C7 C060 2241 92DF CE96 -D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd, -D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB, -D: work on suspend-to-ram/disk, killing duplicates from ioctl32, +D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk, D: Altera SoCFPGA and Nokia N900 support. S: Czech Republic diff --git a/MAINTAINERS b/MAINTAINERS index 873aa2cce4d7f..157818de0b554 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9418,7 +9418,7 @@ F: fs/freevxfs/ FREEZER M: "Rafael J. Wysocki" -M: Pavel Machek +M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported F: Documentation/power/freezing-of-tasks.rst @@ -10253,7 +10253,7 @@ F: drivers/video/fbdev/hgafb.c HIBERNATION (aka Software Suspend, aka swsusp) M: "Rafael J. Wysocki" -M: Pavel Machek +M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org @@ -13124,8 +13124,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: scripts/leaking_addresses.pl LED SUBSYSTEM -M: Pavel Machek M: Lee Jones +M: Pavel Machek L: linux-leds@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/leds.git @@ -16823,7 +16823,7 @@ F: include/linux/tick.h F: kernel/time/tick*.* NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS) -M: Pavel Machek +M: Pavel Machek M: Sakari Ailus L: linux-media@vger.kernel.org S: Maintained @@ -22849,7 +22849,7 @@ F: drivers/sh/ SUSPEND TO RAM M: "Rafael J. Wysocki" M: Len Brown -M: Pavel Machek +M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org -- GitLab From 1b3291f00013c86a9bb349d6158a9a7a4f0334fe Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 7 Feb 2025 03:21:46 +0900 Subject: [PATCH 0389/1585] MAINTAINERS: Remove myself I no longer have any faith left in the kernel development process or community management approach. Apple/ARM platform development will continue downstream. If I feel like sending some patches upstream in the future myself for whatever subtree I may, or I may not. Anyone who feels like fighting the upstreaming fight themselves is welcome to do so. Signed-off-by: Hector Martin Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 157818de0b554..20c9e08712150 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2209,7 +2209,6 @@ F: sound/soc/codecs/cs42l84.* F: sound/soc/codecs/ssm3515.c ARM/APPLE MACHINE SUPPORT -M: Hector Martin M: Sven Peter R: Alyssa Rosenzweig L: asahi@lists.linux.dev -- GitLab From f354fc88a72ae83dacd68370f6fa040e5733bcfe Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 7 Feb 2025 15:08:55 +0800 Subject: [PATCH 0390/1585] kbuild: install-extmod-build: add missing quotation marks for CC variable While attempting to build a Debian packages with CC="ccache gcc", I saw the following error as builddeb builds linux-headers-$KERNELVERSION: make HOSTCC=ccache gcc VPATH= srcroot=. -f ./scripts/Makefile.build obj=debian/linux-headers-6.14.0-rc1/usr/src/linux-headers-6.14.0-rc1/scripts make[6]: *** No rule to make target 'gcc'. Stop. Upon investigation, it seems that one instance of $(CC) variable reference in ./scripts/package/install-extmod-build was missing quotation marks, causing the above error. Add the missing quotation marks around $(CC) to fix build. Fixes: 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package") Co-developed-by: Mingcong Bai Signed-off-by: Mingcong Bai Tested-by: WangYuli Signed-off-by: WangYuli Signed-off-by: Masahiro Yamada --- scripts/package/install-extmod-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index bb6e23c1174ec..b724626ea0ca0 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="$(CC)" VPATH= srcroot=. $(build)='"${destdir}"/scripts rm -f "${destdir}/scripts/Kbuild" fi -- GitLab From 595170d4b660640289003d1881a9a6ef8ded6865 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Feb 2025 22:28:41 -0500 Subject: [PATCH 0391/1585] bcachefs: Fix marking reflink pointers to missing indirect extents reflink pointers to missing indirect extents aren't deleted, they just have an error bit set - in case the indirect extent somehow reappears. fsck/mark and sweep thus needs to ignore these errors. Also, they can be marked AUTOFIX now. Reported-by: Roland Vet Signed-off-by: Kent Overstreet --- fs/bcachefs/reflink.c | 2 ++ fs/bcachefs/sb-errors_format.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 93ba4f4e47cae..376fd0a6e868c 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -381,6 +381,8 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, not_found: if (flags & BTREE_TRIGGER_check_repair) { ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false); + if (ret == -BCH_ERR_missing_indirect_extent) + ret = 0; if (ret) goto err; } diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index ea0a18364751d..b86ec013d7d70 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -180,9 +180,9 @@ enum bch_fsck_flags { x(ptr_crc_nonce_mismatch, 162, 0) \ x(ptr_stripe_redundant, 163, 0) \ x(reservation_key_nr_replicas_invalid, 164, 0) \ - x(reflink_v_refcount_wrong, 165, 0) \ + x(reflink_v_refcount_wrong, 165, FSCK_AUTOFIX) \ x(reflink_v_pos_bad, 292, 0) \ - x(reflink_p_to_missing_reflink_v, 166, 0) \ + x(reflink_p_to_missing_reflink_v, 166, FSCK_AUTOFIX) \ x(reflink_refcount_underflow, 293, 0) \ x(stripe_pos_bad, 167, 0) \ x(stripe_val_size_bad, 168, 0) \ -- GitLab From 01af106a076352182b2916b143fc50272600bd81 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 21 Jan 2025 05:40:51 +0000 Subject: [PATCH 0392/1585] btrfs: fix two misuses of folio_shift() It is meaningless to shift a byte count by folio_shift(). The folio index is in units of PAGE_SIZE, not folio_size(). We can use folio_contains() to make this work for arbitrary-order folios, so remove the assertion that the folios are of order 0. Reviewed-by: Qu Wenruo Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d9f856358704f..6f64ee16744d6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -523,8 +523,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) u64 end; u32 len; - /* For now only order 0 folios are supported for data. */ - ASSERT(folio_order(folio) == 0); btrfs_debug(fs_info, "%s: bi_sector=%llu, err=%d, mirror=%u", __func__, bio->bi_iter.bi_sector, bio->bi_status, @@ -552,7 +550,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) if (likely(uptodate)) { loff_t i_size = i_size_read(inode); - pgoff_t end_index = i_size >> folio_shift(folio); /* * Zero out the remaining part if this range straddles @@ -561,9 +558,11 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) * Here we should only zero the range inside the folio, * not touch anything else. * - * NOTE: i_size is exclusive while end is inclusive. + * NOTE: i_size is exclusive while end is inclusive and + * folio_contains() takes PAGE_SIZE units. */ - if (folio_index(folio) == end_index && i_size <= end) { + if (folio_contains(folio, i_size >> PAGE_SHIFT) && + i_size <= end) { u32 zero_start = max(offset_in_folio(folio, i_size), offset_in_folio(folio, start)); u32 zero_len = offset_in_folio(folio, end) + 1 - @@ -956,7 +955,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, return ret; } - if (folio->index == last_byte >> folio_shift(folio)) { + if (folio_contains(folio, last_byte >> PAGE_SHIFT)) { size_t zero_offset = offset_in_folio(folio, last_byte); if (zero_offset) { -- GitLab From cb827db50a88aebec516151681adb6db10b688ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Feb 2025 08:30:51 +0000 Subject: [PATCH 0393/1585] net: fib_rules: annotate data-races around rule->[io]ifindex rule->iifindex and rule->oifindex can be read without holding RTNL. Add READ_ONCE()/WRITE_ONCE() annotations where needed. Fixes: 32affa5578f0 ("fib: rules: no longer hold RTNL in fib_nl_dumprule()") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250206083051.2494877-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/fib_rules.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e684ba3ebb385..94a7872ab2318 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -37,8 +37,8 @@ static const struct fib_kuid_range fib_kuid_range_unset = { bool fib_rule_matchall(const struct fib_rule *rule) { - if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id || - rule->flags) + if (READ_ONCE(rule->iifindex) || READ_ONCE(rule->oifindex) || + rule->mark || rule->tun_id || rule->flags) return false; if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1) return false; @@ -261,12 +261,14 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { - int ret = 0; + int iifindex, oifindex, ret = 0; - if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) + iifindex = READ_ONCE(rule->iifindex); + if (iifindex && (iifindex != fl->flowi_iif)) goto out; - if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) + oifindex = READ_ONCE(rule->oifindex); + if (oifindex && (oifindex != fl->flowi_oif)) goto out; if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) @@ -1041,14 +1043,14 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->iifname[0]) { if (nla_put_string(skb, FRA_IIFNAME, rule->iifname)) goto nla_put_failure; - if (rule->iifindex == -1) + if (READ_ONCE(rule->iifindex) == -1) frh->flags |= FIB_RULE_IIF_DETACHED; } if (rule->oifname[0]) { if (nla_put_string(skb, FRA_OIFNAME, rule->oifname)) goto nla_put_failure; - if (rule->oifindex == -1) + if (READ_ONCE(rule->oifindex) == -1) frh->flags |= FIB_RULE_OIF_DETACHED; } @@ -1220,10 +1222,10 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) list_for_each_entry(rule, rules, list) { if (rule->iifindex == -1 && strcmp(dev->name, rule->iifname) == 0) - rule->iifindex = dev->ifindex; + WRITE_ONCE(rule->iifindex, dev->ifindex); if (rule->oifindex == -1 && strcmp(dev->name, rule->oifname) == 0) - rule->oifindex = dev->ifindex; + WRITE_ONCE(rule->oifindex, dev->ifindex); } } @@ -1233,9 +1235,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) list_for_each_entry(rule, rules, list) { if (rule->iifindex == dev->ifindex) - rule->iifindex = -1; + WRITE_ONCE(rule->iifindex, -1); if (rule->oifindex == dev->ifindex) - rule->oifindex = -1; + WRITE_ONCE(rule->oifindex, -1); } } -- GitLab From db5fd3cf8bf41b84b577b8ad5234ea95f327c9be Mon Sep 17 00:00:00 2001 From: Muhammad Adeel Date: Fri, 7 Feb 2025 14:24:32 +0000 Subject: [PATCH 0394/1585] cgroup: Remove steal time from usage_usec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CPU usage time is the time when user, system or both are using the CPU. Steal time is the time when CPU is waiting to be run by the Hypervisor. It should not be added to the CPU usage time, hence removing it from the usage_usec entry. Fixes: 936f2a70f2077 ("cgroup: add cpu.stat file to root cgroup") Acked-by: Axel Busch Acked-by: Michal Koutný Signed-off-by: Muhammad Adeel Signed-off-by: Tejun Heo --- kernel/cgroup/rstat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 5877974ece92c..aac91466279f1 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -590,7 +590,6 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat) cputime->sum_exec_runtime += user; cputime->sum_exec_runtime += sys; - cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL]; #ifdef CONFIG_SCHED_CORE bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE]; -- GitLab From 884c3a18dadfda326dffa364477cc027728219de Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 4 Feb 2025 10:25:16 -0700 Subject: [PATCH 0395/1585] bpf: verifier: Do not extract constant map keys for irrelevant maps Previously, we were trying to extract constant map keys for all bpf_map_lookup_elem(), regardless of map type. This is an issue if the map has a u64 key and the value is very high, as it can be interpreted as a negative signed value. This in turn is treated as an error value by check_func_arg() which causes a valid program to be incorrectly rejected. Fix by only extracting constant map keys for relevant maps. This fix works because nullness elision is only allowed for {PERCPU_}ARRAY maps, and keys for these are within u32 range. See next commit for an example via selftest. Acked-by: Eduard Zingerman Reported-by: Marc Hartmayer Reported-by: Ilya Leoshkevich Tested-by: Marc Hartmayer Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/aa868b642b026ff87ba6105ea151bc8693b35932.1738689872.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9971c03adfd5d..e9176a5ce215e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9206,6 +9206,8 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env, return reg->var_off.value; } +static bool can_elide_value_nullness(enum bpf_map_type type); + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn, @@ -9354,9 +9356,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL); if (err) return err; - meta->const_map_key = get_constant_map_key(env, reg, key_size); - if (meta->const_map_key < 0 && meta->const_map_key != -EOPNOTSUPP) - return meta->const_map_key; + if (can_elide_value_nullness(meta->map_ptr->map_type)) { + meta->const_map_key = get_constant_map_key(env, reg, key_size); + if (meta->const_map_key < 0 && meta->const_map_key != -EOPNOTSUPP) + return meta->const_map_key; + } break; case ARG_PTR_TO_MAP_VALUE: if (type_may_be_null(arg_type) && register_is_null(reg)) -- GitLab From 973cb1382ead401c476c82f20525e593ae84788f Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 4 Feb 2025 10:25:17 -0700 Subject: [PATCH 0396/1585] bpf: selftests: Test constant key extraction on irrelevant maps Test that very high constant map keys are not interpreted as an error value by the verifier. This would previously fail. Acked-by: Eduard Zingerman Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/c0590b62eb9303f389b2f52c0c7e9cf22a358a30.1738689872.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_array_access.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c index 29eb9568633ff..0a187ff725cc4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_array_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c @@ -713,4 +713,19 @@ unsigned int non_stack_key_lookup(void) return val->index; } +SEC("socket") +__description("doesn't reject UINT64_MAX as s64 for irrelevant maps") +__success __retval(42) +unsigned int doesnt_reject_irrelevant_maps(void) +{ + __u64 key = 0xFFFFFFFFFFFFFFFF; + struct test_val *val; + + val = bpf_map_lookup_elem(&map_hash_48b, &key); + if (val) + return val->index; + + return 42; +} + char _license[] SEC("license") = "GPL"; -- GitLab From 7968c6581507052c1c6484ee6c5cbe07381e2dbc Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 4 Feb 2025 10:25:18 -0700 Subject: [PATCH 0397/1585] bpf: verifier: Disambiguate get_constant_map_key() errors Refactor get_constant_map_key() to disambiguate the constant key value from potential error values. In the case that the key is negative, it could be confused for an error. It's not currently an issue, as the verifier seems to track s32 spills as u32. So even if the program wrongly uses a negative value for an arraymap key, the verifier just thinks it's an impossibly high value which gets correctly discarded. Refactor anyways to make things cleaner and prevent potential future issues. Acked-by: Eduard Zingerman Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/dfe144259ae7cfc98aa63e1b388a14869a10632a.1738689872.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e9176a5ce215e..98354d7816789 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9149,10 +9149,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env, return 0; } -/* Returns constant key value if possible, else negative error */ -static s64 get_constant_map_key(struct bpf_verifier_env *env, +/* Returns constant key value in `value` if possible, else negative error */ +static int get_constant_map_key(struct bpf_verifier_env *env, struct bpf_reg_state *key, - u32 key_size) + u32 key_size, + s64 *value) { struct bpf_func_state *state = func(env, key); struct bpf_reg_state *reg; @@ -9179,8 +9180,10 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env, /* First handle precisely tracked STACK_ZERO */ for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--) zero_size++; - if (zero_size >= key_size) + if (zero_size >= key_size) { + *value = 0; return 0; + } /* Check that stack contains a scalar spill of expected size */ if (!is_spilled_scalar_reg(&state->stack[spi])) @@ -9203,7 +9206,8 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env, if (err < 0) return err; - return reg->var_off.value; + *value = reg->var_off.value; + return 0; } static bool can_elide_value_nullness(enum bpf_map_type type); @@ -9357,9 +9361,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, if (err) return err; if (can_elide_value_nullness(meta->map_ptr->map_type)) { - meta->const_map_key = get_constant_map_key(env, reg, key_size); - if (meta->const_map_key < 0 && meta->const_map_key != -EOPNOTSUPP) - return meta->const_map_key; + err = get_constant_map_key(env, reg, key_size, &meta->const_map_key); + if (err < 0) { + meta->const_map_key = -1; + if (err == -EOPNOTSUPP) + err = 0; + else + return err; + } } break; case ARG_PTR_TO_MAP_VALUE: -- GitLab From 011b0335903832facca86cd8ed05d7d8d94c9c76 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 6 Feb 2025 22:28:48 +0100 Subject: [PATCH 0398/1585] Revert "net: skb: introduce and use a single page frag cache" This reverts commit dbae2b062824 ("net: skb: introduce and use a single page frag cache"). The intended goal of such change was to counter a performance regression introduced by commit 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs"). Unfortunately, the blamed commit introduces another regression for the virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny size, so that the whole head frag could fit a 512-byte block. The single page frag cache uses a 1K fragment for such allocation, and the additional overhead, under small UDP packets flood, makes the page allocator a bottleneck. Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for typical/small skb->head"), this revert does not re-introduce the original regression. Actually, in the relevant test on top of this revert, I measure a small but noticeable positive delta, just above noise level. The revert itself required some additional mangling due to the introduction of the SKB_HEAD_ALIGN() helper and local lock infra in the affected code. Suggested-by: Eric Dumazet Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache") Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/e649212fde9f0fdee23909ca0d14158d32bb7425.1738877290.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - net/core/dev.c | 17 +++++++ net/core/skbuff.c | 103 ++------------------------------------ 3 files changed, 22 insertions(+), 99 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c0a86afb85daa..365f0e2098d13 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4115,7 +4115,6 @@ void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); -void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) diff --git a/net/core/dev.c b/net/core/dev.c index b91658e8aedb4..55e356a68db66 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6920,6 +6920,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) list_add_rcu(&napi->dev_list, higher); /* adds after higher */ } +/* Double check that napi_get_frags() allocates skbs with + * skb->head being backed by slab, not a page fragment. + * This is to make sure bug fixed in 3226b158e67c + * ("net: avoid 32 x truesize under-estimation for tiny skbs") + * does not accidentally come back. + */ +static void napi_get_frags_check(struct napi_struct *napi) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = napi_get_frags(napi); + WARN_ON_ONCE(skb && skb->head_frag); + napi_free_frags(napi); + local_bh_enable(); +} + void netif_napi_add_weight_locked(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a441613a1e6c1..6a99c453397fc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -220,67 +220,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) #define NAPI_SKB_CACHE_BULK 16 #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2) -#if PAGE_SIZE == SZ_4K - -#define NAPI_HAS_SMALL_PAGE_FRAG 1 -#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc) - -/* specialized page frag allocator using a single order 0 page - * and slicing it into 1K sized fragment. Constrained to systems - * with a very limited amount of 1K fragments fitting a single - * page - to avoid excessive truesize underestimation - */ - -struct page_frag_1k { - void *va; - u16 offset; - bool pfmemalloc; -}; - -static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp) -{ - struct page *page; - int offset; - - offset = nc->offset - SZ_1K; - if (likely(offset >= 0)) - goto use_frag; - - page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - if (!page) - return NULL; - - nc->va = page_address(page); - nc->pfmemalloc = page_is_pfmemalloc(page); - offset = PAGE_SIZE - SZ_1K; - page_ref_add(page, offset / SZ_1K); - -use_frag: - nc->offset = offset; - return nc->va + offset; -} -#else - -/* the small page is actually unused in this build; add dummy helpers - * to please the compiler and avoid later preprocessor's conditionals - */ -#define NAPI_HAS_SMALL_PAGE_FRAG 0 -#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false - -struct page_frag_1k { -}; - -static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask) -{ - return NULL; -} - -#endif - struct napi_alloc_cache { local_lock_t bh_lock; struct page_frag_cache page; - struct page_frag_1k page_small; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; @@ -290,23 +232,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; -/* Double check that napi_get_frags() allocates skbs with - * skb->head being backed by slab, not a page fragment. - * This is to make sure bug fixed in 3226b158e67c - * ("net: avoid 32 x truesize under-estimation for tiny skbs") - * does not accidentally come back. - */ -void napi_get_frags_check(struct napi_struct *napi) -{ - struct sk_buff *skb; - - local_bh_disable(); - skb = napi_get_frags(napi); - WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag); - napi_free_frags(napi); - local_bh_enable(); -} - void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -813,10 +738,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. - * When the small frag allocator is available, prefer it over kmalloc - * for small fragments */ - if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) || + if (len <= SKB_WITH_OVERHEAD(1024) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, @@ -826,32 +749,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) goto skb_success; } + len = SKB_HEAD_ALIGN(len); + if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache); - if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) { - /* we are artificially inflating the allocation size, but - * that is not as bad as it may look like, as: - * - 'len' less than GRO_MAX_HEAD makes little sense - * - On most systems, larger 'len' values lead to fragment - * size above 512 bytes - * - kmalloc would use the kmalloc-1k slab for such values - * - Builds with smaller GRO_MAX_HEAD will very likely do - * little networking, as that implies no WiFi and no - * tunnels support, and 32 bits arches. - */ - len = SZ_1K; - data = page_frag_alloc_1k(&nc->page_small, gfp_mask); - pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small); - } else { - len = SKB_HEAD_ALIGN(len); - - data = page_frag_alloc(&nc->page, len, gfp_mask); - pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); - } + data = page_frag_alloc(&nc->page, len, gfp_mask); + pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!data)) -- GitLab From 8784714d7f27045c7cb72456cf66705b73fbc804 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 6 Feb 2025 02:54:31 -0800 Subject: [PATCH 0399/1585] bpf: Handle allocation failure in acquire_lock_state The acquire_lock_state function needs to handle possible NULL values returned by acquire_reference_state, and return -ENOMEM. Fixes: 769b0f1c8214 ("bpf: Refactor {acquire,release}_reference_state") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250206105435.2159977-24-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98354d7816789..60611df77957a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1501,6 +1501,8 @@ static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum r struct bpf_reference_state *s; s = acquire_reference_state(env, insn_idx); + if (!s) + return -ENOMEM; s->type = type; s->id = id; s->ptr = ptr; -- GitLab From 8f6629c004b193d23612641c3607e785819e97ab Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 17 Oct 2024 10:09:22 -0700 Subject: [PATCH 0400/1585] kbuild: Move -Wenum-enum-conversion to W=2 -Wenum-enum-conversion was strengthened in clang-19 to warn for C, which caused the kernel to move it to W=1 in commit 75b5ab134bb5 ("kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1") because there were numerous instances that would break builds with -Werror. Unfortunately, this is not a full solution, as more and more developers, subsystems, and distributors are building with W=1 as well, so they continue to see the numerous instances of this warning. Since the move to W=1, there have not been many new instances that have appeared through various build reports and the ones that have appeared seem to be following similar existing patterns, suggesting that most instances of this warning will not be real issues. The only alternatives for silencing this warning are adding casts (which is generally seen as an ugly practice) or refactoring the enums to macro defines or a unified enum (which may be undesirable because of type safety in other parts of the code). Move the warning to W=2, where warnings that occur frequently but may be relevant should reside. Cc: stable@vger.kernel.org Fixes: 75b5ab134bb5 ("kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1") Link: https://lore.kernel.org/ZwRA9SOcOjjLJcpi@google.com/ Signed-off-by: Nathan Chancellor Acked-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- scripts/Makefile.extrawarn | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index eb719f6d8d536..a7003c1e66c7d 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -133,7 +133,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += -Wno-enum-compare-conditional -KBUILD_CFLAGS += -Wno-enum-enum-conversion endif endif @@ -157,6 +156,10 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-type-limits KBUILD_CFLAGS += -Wno-shift-negative-value +ifdef CONFIG_CC_IS_CLANG +KBUILD_CFLAGS += -Wno-enum-enum-conversion +endif + ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif -- GitLab From d0b197b6505fe3788860fc2a81b3ce53cbecc69c Mon Sep 17 00:00:00 2001 From: Reyders Morales Date: Mon, 3 Feb 2025 23:47:20 +0100 Subject: [PATCH 0401/1585] Documentation/networking: fix basic node example document ISO 15765-2 In the current struct sockaddr_can tp is member of can_addr. tp is not member of struct sockaddr_can. Signed-off-by: Reyders Morales Reviewed-by: Simon Horman Acked-by: Oliver Hartkopp Link: https://patch.msgid.link/20250203224720.42530-1-reyders1@gmail.com Fixes: 67711e04254c ("Documentation: networking: document ISO 15765-2") Signed-off-by: Marc Kleine-Budde --- Documentation/networking/iso15765-2.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/iso15765-2.rst b/Documentation/networking/iso15765-2.rst index 0e9d960741783..37ebb2c417cb4 100644 --- a/Documentation/networking/iso15765-2.rst +++ b/Documentation/networking/iso15765-2.rst @@ -369,8 +369,8 @@ to their default. addr.can_family = AF_CAN; addr.can_ifindex = if_nametoindex("can0"); - addr.tp.tx_id = 0x18DA42F1 | CAN_EFF_FLAG; - addr.tp.rx_id = 0x18DAF142 | CAN_EFF_FLAG; + addr.can_addr.tp.tx_id = 0x18DA42F1 | CAN_EFF_FLAG; + addr.can_addr.tp.rx_id = 0x18DAF142 | CAN_EFF_FLAG; ret = bind(s, (struct sockaddr *)&addr, sizeof(addr)); if (ret < 0) -- GitLab From 44de577e61ed239db09f0da9d436866bef9b77dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=B6lzl?= Date: Wed, 5 Feb 2025 18:46:51 +0100 Subject: [PATCH 0402/1585] can: j1939: j1939_sk_send_loop(): fix unable to send messages with data length zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The J1939 standard requires the transmission of messages of length 0. For example proprietary messages are specified with a data length of 0 to 1785. The transmission of such messages is not possible. Sending results in no error being returned but no corresponding can frame being generated. Enable the transmission of zero length J1939 messages. In order to facilitate this two changes are necessary: 1) If the transmission of a new message is requested from user space the message is segmented in j1939_sk_send_loop(). Let the segmentation take into account zero length messages, do not terminate immediately, queue the corresponding skb. 2) j1939_session_skb_get_by_offset() selects the next skb to transmit for a session. Take into account that there might be zero length skbs in the queue. Signed-off-by: Alexander Hölzl Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20250205174651.103238-1-alexander.hoelzl@gmx.net Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Cc: stable@vger.kernel.org [mkl: commit message rephrased] Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 4 ++-- net/can/j1939/transport.c | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 305dd72c844c7..17226b2341d03 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -1132,7 +1132,7 @@ static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, todo_size = size; - while (todo_size) { + do { struct j1939_sk_buff_cb *skcb; segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE, @@ -1177,7 +1177,7 @@ static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, todo_size -= segment_size; session->total_queued_size += segment_size; - } + } while (todo_size); switch (ret) { case 0: /* OK */ diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 95f7a7e65a73f..9b72d118d756d 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -382,8 +382,9 @@ sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session, skb_queue_walk(&session->skb_queue, do_skb) { do_skcb = j1939_skb_to_cb(do_skb); - if (offset_start >= do_skcb->offset && - offset_start < (do_skcb->offset + do_skb->len)) { + if ((offset_start >= do_skcb->offset && + offset_start < (do_skcb->offset + do_skb->len)) || + (offset_start == 0 && do_skcb->offset == 0 && do_skb->len == 0)) { skb = do_skb; } } -- GitLab From 9bd24927e3eeb85642c7baa3b28be8bea6c2a078 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Tue, 14 Jan 2025 18:21:38 +0300 Subject: [PATCH 0403/1585] can: ctucanfd: handle skb allocation failure If skb allocation fails, the pointer to struct can_frame is NULL. This is actually handled everywhere inside ctucan_err_interrupt() except for the only place. Add the missed NULL check. Found by Linux Verification Center (linuxtesting.org) with SVACE static analysis tool. Fixes: 2dcb8e8782d8 ("can: ctucanfd: add support for CTU CAN FD open-source IP core - bus independent part.") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Pavel Pisa Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250114152138.139580-1-pchelkin@ispras.ru Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ctucanfd/ctucanfd_base.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/ctucanfd/ctucanfd_base.c b/drivers/net/can/ctucanfd/ctucanfd_base.c index 64c349fd46007..f65c1a1e05ccd 100644 --- a/drivers/net/can/ctucanfd/ctucanfd_base.c +++ b/drivers/net/can/ctucanfd/ctucanfd_base.c @@ -867,10 +867,12 @@ static void ctucan_err_interrupt(struct net_device *ndev, u32 isr) } break; case CAN_STATE_ERROR_ACTIVE: - cf->can_id |= CAN_ERR_CNT; - cf->data[1] = CAN_ERR_CRTL_ACTIVE; - cf->data[6] = bec.txerr; - cf->data[7] = bec.rxerr; + if (skb) { + cf->can_id |= CAN_ERR_CNT; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + cf->data[6] = bec.txerr; + cf->data[7] = bec.rxerr; + } break; default: netdev_warn(ndev, "unhandled error state (%d:%s)!\n", -- GitLab From 257a2cd3eb578ee63d6bf90475dc4f4b16984139 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 12 Jan 2025 13:41:52 +0100 Subject: [PATCH 0404/1585] can: c_can: fix unbalanced runtime PM disable in error path Runtime PM is enabled as one of the last steps of probe(), so all earlier gotos to "exit_free_device" label were not correct and were leading to unbalanced runtime PM disable depth. Fixes: 6e2fe01dd6f9 ("can: c_can: move runtime PM enable/disable to c_can_platform") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250112-syscon-phandle-args-can-v1-1-314d9549906f@linaro.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can_platform.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 6cba9717a6d87..399844809bbea 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -385,15 +385,16 @@ static int c_can_plat_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "registering %s failed (err=%d)\n", KBUILD_MODNAME, ret); - goto exit_free_device; + goto exit_pm_runtime; } dev_info(&pdev->dev, "%s device registered (regs=%p, irq=%d)\n", KBUILD_MODNAME, priv->base, dev->irq); return 0; -exit_free_device: +exit_pm_runtime: pm_runtime_disable(priv->device); +exit_free_device: free_c_can_dev(dev); exit: dev_err(&pdev->dev, "probe failed\n"); -- GitLab From a1ad2109ce41c9e3912dadd07ad8a9c640064ffb Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 5 Feb 2025 00:48:15 +0900 Subject: [PATCH 0405/1585] can: etas_es58x: fix potential NULL pointer dereference on udev->serial The driver assumed that es58x_dev->udev->serial could never be NULL. While this is true on commercially available devices, an attacker could spoof the device identity providing a NULL USB serial number. That would trigger a NULL pointer dereference. Add a check on es58x_dev->udev->serial before accessing it. Reported-by: yan kang Reported-by: yue sun Closes: https://lore.kernel.org/linux-can/SY8P300MB0421E0013C0EBD2AA46BA709A1F42@SY8P300MB0421.AUSP300.PROD.OUTLOOK.COM/ Fixes: 9f06631c3f1f ("can: etas_es58x: export product information through devlink_ops::info_get()") Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250204154859.9797-2-mailhol.vincent@wanadoo.fr Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/etas_es58x/es58x_devlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/etas_es58x/es58x_devlink.c b/drivers/net/can/usb/etas_es58x/es58x_devlink.c index eee20839d96fd..0d155eb1b9e99 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_devlink.c +++ b/drivers/net/can/usb/etas_es58x/es58x_devlink.c @@ -248,7 +248,11 @@ static int es58x_devlink_info_get(struct devlink *devlink, return ret; } - return devlink_info_serial_number_put(req, es58x_dev->udev->serial); + if (es58x_dev->udev->serial) + ret = devlink_info_serial_number_put(req, + es58x_dev->udev->serial); + + return ret; } const struct devlink_ops es58x_dl_ops = { -- GitLab From f7f0adfe64de08803990dc4cbecd2849c04e314a Mon Sep 17 00:00:00 2001 From: Robin van der Gracht Date: Mon, 27 Jan 2025 13:16:44 +0100 Subject: [PATCH 0406/1585] can: rockchip: rkcanfd_handle_rx_fifo_overflow_int(): bail out if skb cannot be allocated Fix NULL pointer check in rkcanfd_handle_rx_fifo_overflow_int() to bail out if skb cannot be allocated. Fixes: ff60bfbaf67f ("can: rockchip_canfd: add driver for Rockchip CAN-FD controller") Cc: stable@vger.kernel.org Signed-off-by: Robin van der Gracht Link: https://patch.msgid.link/20250208-fix-rockchip-canfd-v1-1-ec533c8a9895@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rockchip/rockchip_canfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index df18c85fc0784..d9a937ba126c3 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -622,7 +622,7 @@ rkcanfd_handle_rx_fifo_overflow_int(struct rkcanfd_priv *priv) netdev_dbg(priv->ndev, "RX-FIFO overflow\n"); skb = rkcanfd_alloc_can_err_skb(priv, &cf, ×tamp); - if (skb) + if (!skb) return 0; rkcanfd_get_berr_counter_corrected(priv, &bec); -- GitLab From cc2c3540d9477a9931fb0fd851fcaeba524a5b35 Mon Sep 17 00:00:00 2001 From: Sam Winchenbach Date: Mon, 3 Feb 2025 13:34:34 +0000 Subject: [PATCH 0407/1585] iio: filter: admv8818: Force initialization of SDO When a weak pull-up is present on the SDO line, regmap_update_bits fails to write both the SOFTRESET and SDOACTIVE bits because it incorrectly reads them as already set. Since the soft reset disables the SDO line, performing a read-modify-write operation on ADI_SPI_CONFIG_A to enable the SDO line doesn't make sense. This change directly writes to the register instead of using regmap_update_bits. Fixes: f34fe888ad05 ("iio:filter:admv8818: add support for ADMV8818") Signed-off-by: Sam Winchenbach Link: https://patch.msgid.link/SA1P110MB106904C961B0F3FAFFED74C0BCF5A@SA1P110MB1069.NAMP110.PROD.OUTLOOK.COM Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/filter/admv8818.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c index 848baa6e3bbf5..d85b7d3de8660 100644 --- a/drivers/iio/filter/admv8818.c +++ b/drivers/iio/filter/admv8818.c @@ -574,21 +574,15 @@ static int admv8818_init(struct admv8818_state *st) struct spi_device *spi = st->spi; unsigned int chip_id; - ret = regmap_update_bits(st->regmap, ADMV8818_REG_SPI_CONFIG_A, - ADMV8818_SOFTRESET_N_MSK | - ADMV8818_SOFTRESET_MSK, - FIELD_PREP(ADMV8818_SOFTRESET_N_MSK, 1) | - FIELD_PREP(ADMV8818_SOFTRESET_MSK, 1)); + ret = regmap_write(st->regmap, ADMV8818_REG_SPI_CONFIG_A, + ADMV8818_SOFTRESET_N_MSK | ADMV8818_SOFTRESET_MSK); if (ret) { dev_err(&spi->dev, "ADMV8818 Soft Reset failed.\n"); return ret; } - ret = regmap_update_bits(st->regmap, ADMV8818_REG_SPI_CONFIG_A, - ADMV8818_SDOACTIVE_N_MSK | - ADMV8818_SDOACTIVE_MSK, - FIELD_PREP(ADMV8818_SDOACTIVE_N_MSK, 1) | - FIELD_PREP(ADMV8818_SDOACTIVE_MSK, 1)); + ret = regmap_write(st->regmap, ADMV8818_REG_SPI_CONFIG_A, + ADMV8818_SDOACTIVE_N_MSK | ADMV8818_SDOACTIVE_MSK); if (ret) { dev_err(&spi->dev, "ADMV8818 SDO Enable failed.\n"); return ret; -- GitLab From c8c9b1d2d5b4377c72a979f5a26e842a869aefc9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 8 Feb 2025 00:15:11 -0500 Subject: [PATCH 0408/1585] fgraph: Fix set_graph_notrace with setting TRACE_GRAPH_NOTRACE_BIT The code was restructured where the function graph notrace code, that would not trace a function and all its children is done by setting a NOTRACE flag when the function that is not to be traced is hit. There's a TRACE_GRAPH_NOTRACE_BIT which defines the bit in the flags and a TRACE_GRAPH_NOTRACE which is the mask with that bit set. But the restructuring used TRACE_GRAPH_NOTRACE_BIT when it should have used TRACE_GRAPH_NOTRACE. For example: # cd /sys/kernel/tracing # echo set_track_prepare stack_trace_save > set_graph_notrace # echo function_graph > current_tracer # cat trace [..] 0) | __slab_free() { 0) | free_to_partial_list() { 0) | arch_stack_walk() { 0) | __unwind_start() { 0) 0.501 us | get_stack_info(); Where a non filter trace looks like: # echo > set_graph_notrace # cat trace 0) | free_to_partial_list() { 0) | set_track_prepare() { 0) | stack_trace_save() { 0) | arch_stack_walk() { 0) | __unwind_start() { Where the filter should look like: # cat trace 0) | free_to_partial_list() { 0) | _raw_spin_lock_irqsave() { 0) 0.350 us | preempt_count_add(); 0) 0.351 us | do_raw_spin_lock(); 0) 2.440 us | } Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250208001511.535be150@batman.local.home Fixes: b84214890a9bc ("function_graph: Move graph notrace bit to shadow stack global var") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 54d850997c0a1..136c750b0b4da 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -198,7 +198,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, * returning from the function. */ if (ftrace_graph_notrace_addr(trace->func)) { - *task_var |= TRACE_GRAPH_NOTRACE_BIT; + *task_var |= TRACE_GRAPH_NOTRACE; /* * Need to return 1 to have the return called * that will clear the NOTRACE bit. -- GitLab From 3724062ca2b1364f02cf44dbea1a552227844ad1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jan 2025 13:57:58 -0800 Subject: [PATCH 0409/1585] objtool: Ignore dangling jump table entries Clang sometimes leaves dangling unused jump table entries which point to the end of the function. Ignore them. Closes: https://lore.kernel.org/20250113235835.vqgvb7cdspksy5dn@jpoimboe Reported-by: Klaus Kusche Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/ee25c0b7e80113e950bd1d4c208b671d35774ff4.1736891751.git.jpoimboe@kernel.org --- tools/objtool/check.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 753dbc4f81985..3520a45ebde8b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1975,6 +1975,14 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, reloc_addend(reloc) == pfunc->offset) break; + /* + * Clang sometimes leaves dangling unused jump table entries + * which point to the end of the function. Ignore them. + */ + if (reloc->sym->sec == pfunc->sec && + reloc_addend(reloc) == pfunc->offset + pfunc->len) + goto next; + dest_insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); if (!dest_insn) break; @@ -1992,6 +2000,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, alt->insn = dest_insn; alt->next = insn->alts; insn->alts = alt; +next: prev_offset = reloc_offset(reloc); } -- GitLab From 7e501637bd5b702a2fa627e903a0025654110e1e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Feb 2025 11:12:08 +0100 Subject: [PATCH 0410/1585] objtool: Move dodgy linker warn to verbose The lld.ld borkage is fixed in the latest llvm release (?) but will not be backported, meaning we're stuck with broken linker for a fair while. Lets not spam all clang build logs and move warning to verbose. Signed-off-by: Peter Zijlstra (Intel) --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3520a45ebde8b..497cb8dfb3eb3 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2273,7 +2273,7 @@ static int read_annotate(struct objtool_file *file, if (sec->sh.sh_entsize != 8) { static bool warned = false; - if (!warned) { + if (!warned && opts.verbose) { WARN("%s: dodgy linker, sh_entsize != 8", sec->name); warned = true; } -- GitLab From bcc6244e13b4d4903511a1ea84368abf925031c0 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 29 Jan 2025 20:53:03 +0100 Subject: [PATCH 0411/1585] sched: Clarify wake_up_q()'s write to task->wake_q.next Clarify that wake_up_q() does an atomic write to task->wake_q.next, after which a concurrent __wake_q_add() can immediately overwrite task->wake_q.next again. Signed-off-by: Jann Horn Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250129-sched-wakeup-prettier-v1-1-2f51f5f663fa@google.com --- kernel/sched/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3e5a6bf587f91..8931d9b1e895e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1055,9 +1055,10 @@ void wake_up_q(struct wake_q_head *head) struct task_struct *task; task = container_of(node, struct task_struct, wake_q); - /* Task can safely be re-inserted now: */ node = node->next; - task->wake_q.next = NULL; + /* pairs with cmpxchg_relaxed() in __wake_q_add() */ + WRITE_ONCE(task->wake_q.next, NULL); + /* Task can safely be re-inserted now. */ /* * wake_up_process() executes a full barrier, which pairs with -- GitLab From 469c76a83bb9f6b2c7b2989c46617c4fe01fee79 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 29 Jan 2025 08:05:14 +0000 Subject: [PATCH 0412/1585] perf/x86/rapl: Fix the error checking order After the commit b4943b8bfc41 ("perf/x86/rapl: Add core energy counter support for AMD CPUs"), the default "perf record"/"perf top" command is broken in systems where there isn't a PMU registered for type PERF_TYPE_RAW. This is due to the change in order of error checks in rapl_pmu_event_init() Due to which we return -EINVAL instead of -ENOENT, when we reach here from the fallback loop in perf_init_event(). Move the "PMU and event type match" back to the beginning of the function so that we return -ENOENT early on. Closes: https://lore.kernel.org/all/uv7mz6vew2bzgre5jdpmwldxljp5djzmuiksqdcdwipfm4zm7w@ribobcretidk/ Fixes: b4943b8bfc41 ("perf/x86/rapl: Add core energy counter support for AMD CPUs") Reported-by: Koichiro Den Signed-off-by: Dhananjay Ugwekar Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250129080513.30353-1-dhananjay.ugwekar@amd.com --- arch/x86/events/rapl.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index d3bb3865c1b1f..4952faf03e82d 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -370,6 +370,10 @@ static int rapl_pmu_event_init(struct perf_event *event) unsigned int rapl_pmu_idx; struct rapl_pmus *rapl_pmus; + /* only look at RAPL events */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + /* unsupported modes and filters */ if (event->attr.sample_period) /* no sampling */ return -EINVAL; @@ -387,10 +391,6 @@ static int rapl_pmu_event_init(struct perf_event *event) rapl_pmus_scope = rapl_pmus->pmu.scope; if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) { - /* only look at RAPL package events */ - if (event->attr.type != rapl_pmus_pkg->pmu.type) - return -ENOENT; - cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1); if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) return -EINVAL; @@ -398,10 +398,6 @@ static int rapl_pmu_event_init(struct perf_event *event) bit = cfg - 1; event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr; } else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) { - /* only look at RAPL core events */ - if (event->attr.type != rapl_pmus_core->pmu.type) - return -ENOENT; - cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1); if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) return -EINVAL; -- GitLab From 0a5561501397e2bbd0fb0e300eb489f72a90597a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Jan 2025 07:48:18 -0800 Subject: [PATCH 0413/1585] perf/x86/intel: Clean up PEBS-via-PT on hybrid The PEBS-via-PT feature is exposed for the e-core of some hybrid platforms, e.g., ADL and MTL. But it never works. $ dmesg | grep PEBS [ 1.793888] core: cpu_atom PMU driver: PEBS-via-PT $ perf record -c 1000 -e '{intel_pt/branch=0/, cpu_atom/cpu-cycles,aux-output/pp}' -C8 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cpu_atom/cpu-cycles,aux-output/pp). "dmesg | grep -i perf" may provide additional information. The "PEBS-via-PT" is printed if the corresponding bit of per-PMU capabilities is set. Since the feature is supported by the e-core HW, perf sets the bit for e-core. However, for Intel PT, if a feature is not supported on all CPUs, it is not supported at all. The PEBS-via-PT event cannot be created successfully. The PEBS-via-PT is no longer enumerated on the latest hybrid platform. It will be deprecated on future platforms with Arch PEBS. Let's remove it from the existing hybrid platforms. Fixes: d9977c43bff8 ("perf/x86: Register hybrid PMUs") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250129154820.3755948-2-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 10 ---------- arch/x86/events/intel/ds.c | 10 +++++++++- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7601196d1d18e..966f7832497de 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4941,11 +4941,6 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) else pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); - if (pmu->intel_cap.pebs_output_pt_available) - pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; - else - pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; - intel_pmu_check_event_constraints(pmu->event_constraints, pmu->cntr_mask64, pmu->fixed_cntr_mask64, @@ -5023,9 +5018,6 @@ static bool init_hybrid_pmu(int cpu) pr_info("%s PMU driver: ", pmu->name); - if (pmu->intel_cap.pebs_output_pt_available) - pr_cont("PEBS-via-PT "); - pr_cont("\n"); x86_pmu_show_pmu_cap(&pmu->pmu); @@ -6370,11 +6362,9 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; if (pmu->pmu_type & hybrid_small_tiny) { pmu->intel_cap.perf_metrics = 0; - pmu->intel_cap.pebs_output_pt_available = 1; pmu->mid_ack = true; } else if (pmu->pmu_type & hybrid_big) { pmu->intel_cap.perf_metrics = 1; - pmu->intel_cap.pebs_output_pt_available = 0; pmu->late_ack = true; } } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba74e11983280..c2e2eae7309c3 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2578,7 +2578,15 @@ void __init intel_ds_init(void) } pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); - if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) { + /* + * The PEBS-via-PT is not supported on hybrid platforms, + * because not all CPUs of a hybrid machine support it. + * The global x86_pmu.intel_cap, which only contains the + * common capabilities, is used to check the availability + * of the feature. The per-PMU pebs_output_pt_available + * in a hybrid machine should be ignored. + */ + if (x86_pmu.intel_cap.pebs_output_pt_available) { pr_cont("PEBS-via-PT, "); x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; } -- GitLab From 47a973fd75639fe80d59f9e1860113bb2a0b112b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Jan 2025 07:48:19 -0800 Subject: [PATCH 0414/1585] perf/x86/intel: Fix ARCH_PERFMON_NUM_COUNTER_LEAF The EAX of the CPUID Leaf 023H enumerates the mask of valid sub-leaves. To tell the availability of the sub-leaf 1 (enumerate the counter mask), perf should check the bit 1 (0x2) of EAS, rather than bit 0 (0x1). The error is not user-visible on bare metal. Because the sub-leaf 0 and the sub-leaf 1 are always available. However, it may bring issues in a virtualization environment when a VMM only enumerates the sub-leaf 0. Introduce the cpuid35_e?x to replace the macros, which makes the implementation style consistent. Fixes: eb467aaac21e ("perf/x86/intel: Support Architectural PerfMon Extension leaf") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250129154820.3755948-3-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 18 ++++++++++-------- arch/x86/include/asm/perf_event.h | 28 +++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 966f7832497de..f3d5b718f93f3 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4905,20 +4905,22 @@ static inline bool intel_pmu_broken_perf_cap(void) static void update_pmu_cap(struct x86_hybrid_pmu *pmu) { - unsigned int sub_bitmaps, eax, ebx, ecx, edx; + unsigned int cntr, fixed_cntr, ecx, edx; + union cpuid35_eax eax; + union cpuid35_ebx ebx; - cpuid(ARCH_PERFMON_EXT_LEAF, &sub_bitmaps, &ebx, &ecx, &edx); + cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx); - if (ebx & ARCH_PERFMON_EXT_UMASK2) + if (ebx.split.umask2) pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2; - if (ebx & ARCH_PERFMON_EXT_EQ) + if (ebx.split.eq) pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ; - if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) { + if (eax.split.cntr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, - &eax, &ebx, &ecx, &edx); - pmu->cntr_mask64 = eax; - pmu->fixed_cntr_mask64 = ebx; + &cntr, &fixed_cntr, &ecx, &edx); + pmu->cntr_mask64 = cntr; + pmu->fixed_cntr_mask64 = fixed_cntr; } if (!intel_pmu_broken_perf_cap()) { diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 1ac79f3616456..0ba8d20f2d1d5 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -188,11 +188,33 @@ union cpuid10_edx { * detection/enumeration details: */ #define ARCH_PERFMON_EXT_LEAF 0x00000023 -#define ARCH_PERFMON_EXT_UMASK2 0x1 -#define ARCH_PERFMON_EXT_EQ 0x2 -#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1 #define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1 +union cpuid35_eax { + struct { + unsigned int leaf0:1; + /* Counters Sub-Leaf */ + unsigned int cntr_subleaf:1; + /* Auto Counter Reload Sub-Leaf */ + unsigned int acr_subleaf:1; + /* Events Sub-Leaf */ + unsigned int events_subleaf:1; + unsigned int reserved:28; + } split; + unsigned int full; +}; + +union cpuid35_ebx { + struct { + /* UnitMask2 Supported */ + unsigned int umask2:1; + /* EQ-bit Supported */ + unsigned int eq:1; + unsigned int reserved:30; + } split; + unsigned int full; +}; + /* * Intel Architectural LBR CPUID detection/enumeration details: */ -- GitLab From c631a2de7ae48d50434bdc205d901423f8577c65 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 30 Jan 2025 17:07:21 -0800 Subject: [PATCH 0415/1585] perf/x86/intel: Ensure LBRs are disabled when a CPU is starting Explicitly clear DEBUGCTL.LBR when a CPU is starting, prior to purging the LBR MSRs themselves, as at least one system has been found to transfer control to the kernel with LBRs enabled (it's unclear whether it's a BIOS flaw or a CPU goof). Because the kernel preserves the original DEBUGCTL, even when toggling LBRs, leaving DEBUGCTL.LBR as is results in running with LBRs enabled at all times. Closes: https://lore.kernel.org/all/c9d8269bff69f6359731d758e3b1135dedd7cc61.camel@redhat.com Reported-by: Maxim Levitsky Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Maxim Levitsky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250131010721.470503-1-seanjc@google.com --- arch/x86/events/intel/core.c | 5 ++++- arch/x86/include/asm/msr-index.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f3d5b718f93f3..e86333eee2668 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5042,8 +5042,11 @@ static void intel_pmu_cpu_starting(int cpu) init_debug_store_on_cpu(cpu); /* - * Deal with CPUs that don't clear their LBRs on power-up. + * Deal with CPUs that don't clear their LBRs on power-up, and that may + * even boot with LBRs enabled. */ + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr) + msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT); intel_pmu_lbr_reset(); cpuc->lbr_sel = NULL; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9a71880eec070..72765b2fe0d87 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -395,7 +395,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) -- GitLab From 9ab127a18018fb06bd42a54ed38bb7b8c449d686 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Jan 2025 08:10:02 +0100 Subject: [PATCH 0416/1585] drm/hisilicon/hibmc: select CONFIG_DRM_DISPLAY_DP_HELPER Without the DP helper code, the newly added displayport support causes a link failure: x86_64-linux-ld: drivers/gpu/drm/hisilicon/hibmc/dp/dp_aux.o: in function `hibmc_dp_aux_init': dp_aux.c:(.text+0x37e): undefined reference to `drm_dp_aux_init' x86_64-linux-ld: drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.o: in function `hibmc_dp_link_set_pattern': dp_link.c:(.text+0xae): undefined reference to `drm_dp_dpcd_write' x86_64-linux-ld: drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.o: in function `hibmc_dp_link_get_adjust_train': dp_link.c:(.text+0x121): undefined reference to `drm_dp_get_adjust_request_voltage' x86_64-linux-ld: dp_link.c:(.text+0x12e): undefined reference to `drm_dp_get_adjust_request_pre_emphasis' x86_64-linux-ld: drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.o: in function `hibmc_dp_link_training': dp_link.c:(.text+0x2b0): undefined reference to `drm_dp_dpcd_write' x86_64-linux-ld: dp_link.c:(.text+0x2e3): undefined reference to `drm_dp_dpcd_write' Add both DRM_DISPLAY_DP_HELPER and DRM_DISPLAY_HELPER, which is in turn required by the former. Fixes: 0ab6ea261c1f ("drm/hisilicon/hibmc: add dp module in hibmc") Signed-off-by: Arnd Bergmann Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20250127071059.617567-1-arnd@kernel.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/Kconfig b/drivers/gpu/drm/hisilicon/hibmc/Kconfig index 93b8d32e3be16..98d77d74999d5 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Kconfig +++ b/drivers/gpu/drm/hisilicon/hibmc/Kconfig @@ -4,6 +4,8 @@ config DRM_HISI_HIBMC depends on DRM && PCI depends on MMU select DRM_CLIENT_SELECTION + select DRM_DISPLAY_HELPER + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select DRM_VRAM_HELPER select DRM_TTM -- GitLab From 2fa0fbeb69edd367b7c44f484e8dc5a5a1a311ef Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Feb 2025 10:58:23 -1000 Subject: [PATCH 0417/1585] sched_ext: Implement auto local dispatching of migration disabled tasks Migration disabled tasks are special and pinned to their previous CPUs. They tripped up some unsuspecting BPF schedulers as their ->nr_cpus_allowed may not agree with the bits set in ->cpus_ptr. Make it easier for BPF schedulers by automatically dispatching them to the pinned local DSQs by default. If a BPF scheduler wants to handle migration disabled tasks explicitly, it can set SCX_OPS_ENQ_MIGRATION_DISABLED. Signed-off-by: Tejun Heo Acked-by: Andrea Righi --- kernel/sched/ext.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index a6d6d6dadde51..efdbf4d85a215 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -122,6 +122,19 @@ enum scx_ops_flags { */ SCX_OPS_SWITCH_PARTIAL = 1LLU << 3, + /* + * A migration disabled task can only execute on its current CPU. By + * default, such tasks are automatically put on the CPU's local DSQ with + * the default slice on enqueue. If this ops flag is set, they also go + * through ops.enqueue(). + * + * A migration disabled task never invokes ops.select_cpu() as it can + * only select the current CPU. Also, p->cpus_ptr will only contain its + * current CPU while p->nr_cpus_allowed keeps tracking p->user_cpus_ptr + * and thus may disagree with cpumask_weight(p->cpus_ptr). + */ + SCX_OPS_ENQ_MIGRATION_DISABLED = 1LLU << 4, + /* * CPU cgroup support flags */ @@ -130,6 +143,7 @@ enum scx_ops_flags { SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE | SCX_OPS_ENQ_LAST | SCX_OPS_ENQ_EXITING | + SCX_OPS_ENQ_MIGRATION_DISABLED | SCX_OPS_SWITCH_PARTIAL | SCX_OPS_HAS_CGROUP_WEIGHT, }; @@ -882,6 +896,7 @@ static bool scx_warned_zero_slice; static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_last); static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_exiting); +static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_migration_disabled); static DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt); static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled); @@ -2014,6 +2029,11 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, unlikely(p->flags & PF_EXITING)) goto local; + /* see %SCX_OPS_ENQ_MIGRATION_DISABLED */ + if (!static_branch_unlikely(&scx_ops_enq_migration_disabled) && + is_migration_disabled(p)) + goto local; + if (!SCX_HAS_OP(enqueue)) goto global; @@ -5052,6 +5072,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work) static_branch_disable(&scx_has_op[i]); static_branch_disable(&scx_ops_enq_last); static_branch_disable(&scx_ops_enq_exiting); + static_branch_disable(&scx_ops_enq_migration_disabled); static_branch_disable(&scx_ops_cpu_preempt); static_branch_disable(&scx_builtin_idle_enabled); synchronize_rcu(); @@ -5661,6 +5682,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (ops->flags & SCX_OPS_ENQ_EXITING) static_branch_enable(&scx_ops_enq_exiting); + if (ops->flags & SCX_OPS_ENQ_MIGRATION_DISABLED) + static_branch_enable(&scx_ops_enq_migration_disabled); if (scx_ops.cpu_acquire || scx_ops.cpu_release) static_branch_enable(&scx_ops_cpu_preempt); -- GitLab From 32966821574cd2917bd60f2554f435fe527f4702 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Feb 2025 10:59:06 -1000 Subject: [PATCH 0418/1585] sched_ext: Fix migration disabled handling in targeted dispatches A dispatch operation that can target a specific local DSQ - scx_bpf_dsq_move_to_local() or scx_bpf_dsq_move() - checks whether the task can be migrated to the target CPU using task_can_run_on_remote_rq(). If the task can't be migrated to the targeted CPU, it is bounced through a global DSQ. task_can_run_on_remote_rq() assumes that the task is on a CPU that's different from the targeted CPU but the callers doesn't uphold the assumption and may call the function when the task is already on the target CPU. When such task has migration disabled, task_can_run_on_remote_rq() ends up returning %false incorrectly unnecessarily bouncing the task to a global DSQ. Fix it by updating the callers to only call task_can_run_on_remote_rq() when the task is on a different CPU than the target CPU. As this is a bit subtle, for clarity and documentation: - Make task_can_run_on_remote_rq() trigger SCHED_WARN_ON() if the task is on the same CPU as the target CPU. - is_migration_disabled() test in task_can_run_on_remote_rq() cannot trigger if the task is on a different CPU than the target CPU as the preceding task_allowed_on_cpu() test should fail beforehand. Convert the test into SCHED_WARN_ON(). Signed-off-by: Tejun Heo Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") Fixes: 0366017e0973 ("sched_ext: Use task_can_run_on_remote_rq() test in dispatch_to_local_dsq()") Cc: stable@vger.kernel.org # v6.12+ --- kernel/sched/ext.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index efdbf4d85a215..e01144340d679 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2333,12 +2333,16 @@ static void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags, * * - The BPF scheduler is bypassed while the rq is offline and we can always say * no to the BPF scheduler initiated migrations while offline. + * + * The caller must ensure that @p and @rq are on different CPUs. */ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, bool trigger_error) { int cpu = cpu_of(rq); + SCHED_WARN_ON(task_cpu(p) == cpu); + /* * We don't require the BPF scheduler to avoid dispatching to offline * CPUs mostly for convenience but also because CPUs can go offline @@ -2352,8 +2356,11 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, return false; } - if (unlikely(is_migration_disabled(p))) - return false; + /* + * If @p has migration disabled, @p->cpus_ptr only contains its current + * CPU and the above task_allowed_on_cpu() test should have failed. + */ + SCHED_WARN_ON(is_migration_disabled(p)); if (!scx_rq_online(rq)) return false; @@ -2457,7 +2464,8 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags, if (dst_dsq->id == SCX_DSQ_LOCAL) { dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq); - if (!task_can_run_on_remote_rq(p, dst_rq, true)) { + if (src_rq != dst_rq && + unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { dst_dsq = find_global_dsq(p); dst_rq = src_rq; } @@ -2611,7 +2619,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, } #ifdef CONFIG_SMP - if (unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { + if (src_rq != dst_rq && + unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { dispatch_enqueue(find_global_dsq(p), p, enq_flags | SCX_ENQ_CLEAR_OPSS); return; -- GitLab From ee9d1619ef6e4a3412a13788256cb8c3e5efbe3d Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 7 Feb 2025 13:46:28 -0800 Subject: [PATCH 0419/1585] MAINTAINERS: update maintainer for Microsoft MANA RDMA driver Ajay is no longer working on the MANA RDMA driver. Konstantin Taranov has made significant contributions to implementing RC QP in both kernel and user-mode. He will take the responsibility of fixing bugs, reviewing patches and developing new features for MANA RDMA driver. Signed-off-by: Long Li Link: https://patch.msgid.link/1738964792-21140-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Leon Romanovsky --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa0654..c51503268a026 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15689,7 +15689,7 @@ F: include/uapi/linux/cciss*.h MICROSOFT MANA RDMA DRIVER M: Long Li -M: Ajay Sharma +M: Konstantin Taranov L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/mana/ -- GitLab From 9747c0c7791d4a5a62018a0c9c563dd2e6f6c1c0 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Sat, 8 Feb 2025 18:59:30 +0800 Subject: [PATCH 0420/1585] RDMA/hns: Fix mbox timing out by adding retry mechanism If a QP is modified to error state and a flush CQE process is triggered, the subsequent QP destruction mbox can still be successfully posted but will be blocked in HW until the flush CQE process finishes. This causes further mbox posting timeouts in driver. The blocking time is related to QP depth. Considering an extreme case where SQ depth and RQ depth are both 32K, the blocking time can reach about 135ms. This patch adds a retry mechanism for mbox posting. For each try, FW waits 15ms for HW to complete the previous mbox, otherwise return a timeout error code to driver. Counting other time consumption in FW, set 8 tries for mbox posting and a 5ms time gap before each retry to increase to a sufficient timeout limit. Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250208105930.522796-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 ++++++++++++++++------ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 + 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index dded339802b33..160e8927d364e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1286,10 +1286,8 @@ static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout) return tx_timeout; } -static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); u32 timeout = 0; do { @@ -1299,8 +1297,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) } while (++timeout < tx_timeout); } -static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, - struct hns_roce_cmq_desc *desc, int num) +static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, + int num, u32 tx_timeout) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; @@ -1309,8 +1308,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, int ret; int i; - spin_lock_bh(&csq->lock); - tail = csq->head; for (i = 0; i < num; i++) { @@ -1324,22 +1321,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); - hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode)); + hns_roce_wait_csq_done(hr_dev, tx_timeout); if (hns_roce_cmq_csq_done(hr_dev)) { ret = 0; for (i = 0; i < num; i++) { /* check the result of hardware write back */ - desc[i] = csq->desc[tail++]; + desc_ret = le16_to_cpu(csq->desc[tail++].retval); if (tail == csq->desc_num) tail = 0; - - desc_ret = le16_to_cpu(desc[i].retval); if (likely(desc_ret == CMD_EXEC_SUCCESS)) continue; - dev_err_ratelimited(hr_dev->dev, - "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", - desc->opcode, desc_ret); ret = hns_roce_cmd_err_convert_errno(desc_ret); } } else { @@ -1354,14 +1346,54 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ret = -EAGAIN; } - spin_unlock_bh(&csq->lock); - if (ret) atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); return ret; } +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; + u16 opcode = le16_to_cpu(desc->opcode); + u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); + u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT; + u32 rsv_tail; + int ret; + int i; + + while (try_cnt) { + try_cnt--; + + spin_lock_bh(&csq->lock); + rsv_tail = csq->head; + ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout); + if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME && + try_cnt) { + spin_unlock_bh(&csq->lock); + mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC); + continue; + } + + for (i = 0; i < num; i++) { + desc[i] = csq->desc[rsv_tail++]; + if (rsv_tail == csq->desc_num) + rsv_tail = 0; + } + spin_unlock_bh(&csq->lock); + break; + } + + if (ret) + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = 0x%x, return = %d.\n", + opcode, ret); + + return ret; +} + static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index cbdbc9edbce6e..91a5665465ffb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -230,6 +230,8 @@ enum hns_roce_opcode_type { }; #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 +#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8 +#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5 struct hns_roce_cmdq_tx_timeout_map { u16 opcode; u32 tx_timeout; -- GitLab From c53fbdb60fb61fd6bda2bc0dc89837966625c5dc Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 7 Feb 2025 14:54:37 +0000 Subject: [PATCH 0421/1585] KVM: arm64: Improve error handling from check_host_shared_guest() The check_host_shared_guest() path expects to find a last-level valid PTE in the guest's stage-2 page-table. However, it checks the PTE's level before its validity, which makes it hard for callers to figure out what went wrong. To make error handling simpler, check the PTE's validity first. Signed-off-by: Quentin Perret Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250207145438.1333475-2-qperret@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 7ad7b133b81a8..41847c04b270f 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -943,10 +943,10 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); if (ret) return ret; - if (level != KVM_PGTABLE_LAST_LEVEL) - return -E2BIG; if (!kvm_pte_valid(pte)) return -ENOENT; + if (level != KVM_PGTABLE_LAST_LEVEL) + return -E2BIG; state = guest_get_page_state(pte, ipa); if (state != PKVM_PAGE_SHARED_BORROWED) -- GitLab From eabc7aaef7a553b64bf6e631ce04526af6c8d104 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 7 Feb 2025 14:54:38 +0000 Subject: [PATCH 0422/1585] KVM: arm64: Simplify np-guest hypercalls When the handling of a guest stage-2 permission fault races with an MMU notifier, the faulting page might be gone from the guest's stage-2 by the point we attempt to call (p)kvm_pgtable_stage2_relax_perms(). In the normal KVM case, this leads to returning -EAGAIN which user_mem_abort() handles correctly by simply re-entering the guest. However, the pKVM hypercall implementation has additional logic to check the page state using __check_host_shared_guest() which gets confused with absence of a page mapped at the requested IPA and returns -ENOENT, hence breaking user_mem_abort() and hilarity ensues. Luckily, several of the hypercalls for managing the stage-2 page-table of NP guests have no effect on the pKVM ownership tracking (wrprotect, test_clear_young, mkyoung, and crucially relax_perms), so the extra state checking logic is in fact not strictly necessary. So, to fix the discrepancy between standard KVM and pKVM, let's just drop the superfluous __check_host_shared_guest() logic from those hypercalls and make the extra state checking a debug assertion dependent on CONFIG_NVHE_EL2_DEBUG as we already do for other transitions. Signed-off-by: Quentin Perret Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250207145438.1333475-3-qperret@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 69 +++++++++++++++------------ 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 41847c04b270f..4c2f6a6a2efe1 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -998,63 +998,73 @@ int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm) return ret; } -int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) +static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa) { - struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); - u64 ipa = hyp_pfn_to_phys(gfn); u64 phys; int ret; - if (prot & ~KVM_PGTABLE_PROT_RWX) - return -EINVAL; + if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) + return; host_lock_component(); guest_lock_component(vm); ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); guest_unlock_component(vm); host_unlock_component(); - return ret; + WARN_ON(ret && ret != -ENOENT); } -int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) +int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) { + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); u64 ipa = hyp_pfn_to_phys(gfn); - u64 phys; int ret; - host_lock_component(); - guest_lock_component(vm); + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); + if (prot & ~KVM_PGTABLE_PROT_RWX) + return -EINVAL; + assert_host_shared_guest(vm, ipa); + guest_lock_component(vm); + ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); guest_unlock_component(vm); - host_unlock_component(); return ret; } -int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) +int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) { u64 ipa = hyp_pfn_to_phys(gfn); - u64 phys; int ret; - host_lock_component(); + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; + + assert_host_shared_guest(vm, ipa); guest_lock_component(vm); + ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); + guest_unlock_component(vm); - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); + return ret; +} +int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) +{ + u64 ipa = hyp_pfn_to_phys(gfn); + int ret; + + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; + + assert_host_shared_guest(vm, ipa); + guest_lock_component(vm); + ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); guest_unlock_component(vm); - host_unlock_component(); return ret; } @@ -1063,18 +1073,15 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) { struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); u64 ipa = hyp_pfn_to_phys(gfn); - u64 phys; int ret; - host_lock_component(); - guest_lock_component(vm); - - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; + assert_host_shared_guest(vm, ipa); + guest_lock_component(vm); + kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); guest_unlock_component(vm); - host_unlock_component(); return ret; } -- GitLab From 7585946243d614bd2cd4e13377be2c711c9539e0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 8 Feb 2025 18:54:28 +0100 Subject: [PATCH 0423/1585] PM: sleep: core: Restrict power.set_active propagation Commit 3775fc538f53 ("PM: sleep: core: Synchronize runtime PM status of parents and children") exposed an issue related to simple_pm_bus_pm_ops that uses pm_runtime_force_suspend() and pm_runtime_force_resume() as bus type PM callbacks for the noirq phases of system-wide suspend and resume. The problem is that pm_runtime_force_suspend() does not distinguish runtime-suspended devices from devices for which runtime PM has never been enabled, so if it sees a device with runtime PM status set to RPM_ACTIVE, it will assume that runtime PM is enabled for that device and so it will attempt to suspend it with the help of its runtime PM callbacks which may not be ready for that. As it turns out, this causes simple_pm_bus_runtime_suspend() to crash due to a NULL pointer dereference. Another problem related to the above commit and simple_pm_bus_pm_ops is that setting runtime PM status of a device handled by the latter to RPM_ACTIVE will actually prevent it from being resumed because pm_runtime_force_resume() only resumes devices with runtime PM status set to RPM_SUSPENDED. To mitigate these issues, do not allow power.set_active to propagate beyond the parent of the device with DPM_FLAG_SMART_SUSPEND set that will need to be resumed, which should be a sufficient stop-gap for the time being, but they will need to be properly addressed in the future because in general during system-wide resume it is necessary to resume all devices in a dependency chain in which at least one device is going to be resumed. Fixes: 3775fc538f53 ("PM: sleep: core: Synchronize runtime PM status of parents and children") Closes: https://lore.kernel.org/linux-pm/1c2433d4-7e0f-4395-b841-b8eac7c25651@nvidia.com/ Reported-by: Jon Hunter Tested-by: Johan Hovold Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/6137505.lOV4Wx5bFT@rjwysocki.net --- drivers/base/power/main.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d497d448e4b2a..40e1d8d8a5893 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1191,24 +1191,18 @@ static pm_message_t resume_event(pm_message_t sleep_state) return PMSG_ON; } -static void dpm_superior_set_must_resume(struct device *dev, bool set_active) +static void dpm_superior_set_must_resume(struct device *dev) { struct device_link *link; int idx; - if (dev->parent) { + if (dev->parent) dev->parent->power.must_resume = true; - if (set_active) - dev->parent->power.set_active = true; - } idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) link->supplier->power.must_resume = true; - if (set_active) - link->supplier->power.set_active = true; - } device_links_read_unlock(idx); } @@ -1287,9 +1281,12 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state, bool asy dev->power.must_resume = true; if (dev->power.must_resume) { - dev->power.set_active = dev->power.set_active || - dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - dpm_superior_set_must_resume(dev, dev->power.set_active); + if (dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) { + dev->power.set_active = true; + if (dev->parent && !dev->parent->power.ignore_children) + dev->parent->power.set_active = true; + } + dpm_superior_set_must_resume(dev); } Complete: -- GitLab From a64dcfb451e254085a7daee5fe51bf22959d52d3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Feb 2025 12:45:03 -0800 Subject: [PATCH 0424/1585] Linux 6.14-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9e0d63d9d94b9..89628e354ca7b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* -- GitLab From dc9c5166c3cb044f8a001e397195242fd6796eee Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 3 Feb 2025 11:14:57 +0100 Subject: [PATCH 0425/1585] powerpc/code-patching: Disable KASAN report during patching via temporary mm Erhard reports the following KASAN hit on Talos II (power9) with kernel 6.13: [ 12.028126] ================================================================== [ 12.028198] BUG: KASAN: user-memory-access in copy_to_kernel_nofault+0x8c/0x1a0 [ 12.028260] Write of size 8 at addr 0000187e458f2000 by task systemd/1 [ 12.028346] CPU: 87 UID: 0 PID: 1 Comm: systemd Tainted: G T 6.13.0-P9-dirty #3 [ 12.028408] Tainted: [T]=RANDSTRUCT [ 12.028446] Hardware name: T2P9D01 REV 1.01 POWER9 0x4e1202 opal:skiboot-bc106a0 PowerNV [ 12.028500] Call Trace: [ 12.028536] [c000000008dbf3b0] [c000000001656a48] dump_stack_lvl+0xbc/0x110 (unreliable) [ 12.028609] [c000000008dbf3f0] [c0000000006e2fc8] print_report+0x6b0/0x708 [ 12.028666] [c000000008dbf4e0] [c0000000006e2454] kasan_report+0x164/0x300 [ 12.028725] [c000000008dbf600] [c0000000006e54d4] kasan_check_range+0x314/0x370 [ 12.028784] [c000000008dbf640] [c0000000006e6310] __kasan_check_write+0x20/0x40 [ 12.028842] [c000000008dbf660] [c000000000578e8c] copy_to_kernel_nofault+0x8c/0x1a0 [ 12.028902] [c000000008dbf6a0] [c0000000000acfe4] __patch_instructions+0x194/0x210 [ 12.028965] [c000000008dbf6e0] [c0000000000ade80] patch_instructions+0x150/0x590 [ 12.029026] [c000000008dbf7c0] [c0000000001159bc] bpf_arch_text_copy+0x6c/0xe0 [ 12.029085] [c000000008dbf800] [c000000000424250] bpf_jit_binary_pack_finalize+0x40/0xc0 [ 12.029147] [c000000008dbf830] [c000000000115dec] bpf_int_jit_compile+0x3bc/0x930 [ 12.029206] [c000000008dbf990] [c000000000423720] bpf_prog_select_runtime+0x1f0/0x280 [ 12.029266] [c000000008dbfa00] [c000000000434b18] bpf_prog_load+0xbb8/0x1370 [ 12.029324] [c000000008dbfb70] [c000000000436ebc] __sys_bpf+0x5ac/0x2e00 [ 12.029379] [c000000008dbfd00] [c00000000043a228] sys_bpf+0x28/0x40 [ 12.029435] [c000000008dbfd20] [c000000000038eb4] system_call_exception+0x334/0x610 [ 12.029497] [c000000008dbfe50] [c00000000000c270] system_call_vectored_common+0xf0/0x280 [ 12.029561] --- interrupt: 3000 at 0x3fff82f5cfa8 [ 12.029608] NIP: 00003fff82f5cfa8 LR: 00003fff82f5cfa8 CTR: 0000000000000000 [ 12.029660] REGS: c000000008dbfe80 TRAP: 3000 Tainted: G T (6.13.0-P9-dirty) [ 12.029735] MSR: 900000000280f032 CR: 42004848 XER: 00000000 [ 12.029855] IRQMASK: 0 GPR00: 0000000000000169 00003fffdcf789a0 00003fff83067100 0000000000000005 GPR04: 00003fffdcf78a98 0000000000000090 0000000000000000 0000000000000008 GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR12: 0000000000000000 00003fff836ff7e0 c000000000010678 0000000000000000 GPR16: 0000000000000000 0000000000000000 00003fffdcf78f28 00003fffdcf78f90 GPR20: 0000000000000000 0000000000000000 0000000000000000 00003fffdcf78f80 GPR24: 00003fffdcf78f70 00003fffdcf78d10 00003fff835c7239 00003fffdcf78bd8 GPR28: 00003fffdcf78a98 0000000000000000 0000000000000000 000000011f547580 [ 12.030316] NIP [00003fff82f5cfa8] 0x3fff82f5cfa8 [ 12.030361] LR [00003fff82f5cfa8] 0x3fff82f5cfa8 [ 12.030405] --- interrupt: 3000 [ 12.030444] ================================================================== Commit c28c15b6d28a ("powerpc/code-patching: Use temporary mm for Radix MMU") is inspired from x86 but unlike x86 is doesn't disable KASAN reports during patching. This wasn't a problem at the begining because __patch_mem() is not instrumented. Commit 465cabc97b42 ("powerpc/code-patching: introduce patch_instructions()") use copy_to_kernel_nofault() to copy several instructions at once. But when using temporary mm the destination is not regular kernel memory but a kind of kernel-like memory located in user address space. Because it is not in kernel address space it is not covered by KASAN shadow memory. Since commit e4137f08816b ("mm, kasan, kmsan: instrument copy_from/to_kernel_nofault") KASAN reports bad accesses from copy_to_kernel_nofault(). Here a bad access to user memory is reported because KASAN detects the lack of shadow memory and the address is below TASK_SIZE. Do like x86 in commit b3fd8e83ada0 ("x86/alternatives: Use temporary mm for text poking") and disable KASAN reports during patching when using temporary mm. Reported-by: Erhard Furtner Close: https://lore.kernel.org/all/20250201151435.48400261@yea/ Fixes: 465cabc97b42 ("powerpc/code-patching: introduce patch_instructions()") Signed-off-by: Christophe Leroy Acked-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/1c05b2a1b02ad75b981cfc45927e0b4a90441046.1738577687.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/code-patching.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index af97fbb3c257e..81c0f673eb252 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -493,7 +493,9 @@ static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool rep orig_mm = start_using_temp_mm(patching_mm); + kasan_disable_current(); err = __patch_instructions(patch_addr, code, len, repeat_instr); + kasan_enable_current(); /* context synchronisation performed by __patch_instructions */ stop_using_temp_mm(patching_mm, orig_mm); -- GitLab From 61bcc752d1b81fde3cae454ff20c1d3c359df500 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 12 Jan 2025 19:24:46 +0100 Subject: [PATCH 0426/1585] powerpc/64s: Rewrite __real_pte() and __rpte_to_hidx() as static inline Rewrite __real_pte() and __rpte_to_hidx() as static inline in order to avoid following warnings/errors when building with 4k page size: CC arch/powerpc/mm/book3s64/hash_tlb.o arch/powerpc/mm/book3s64/hash_tlb.c: In function 'hpte_need_flush': arch/powerpc/mm/book3s64/hash_tlb.c:49:16: error: variable 'offset' set but not used [-Werror=unused-but-set-variable] 49 | int i, offset; | ^~~~~~ CC arch/powerpc/mm/book3s64/hash_native.o arch/powerpc/mm/book3s64/hash_native.c: In function 'native_flush_hash_range': arch/powerpc/mm/book3s64/hash_native.c:782:29: error: variable 'index' set but not used [-Werror=unused-but-set-variable] 782 | unsigned long hash, index, hidx, shift, slot; | ^~~~~ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501081741.AYFwybsq-lkp@intel.com/ Fixes: ff31e105464d ("powerpc/mm/hash64: Store the slot information at the right offset for hugetlb") Signed-off-by: Christophe Leroy Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/e0d340a5b7bd478ecbf245d826e6ab2778b74e06.1736706263.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index c3efacab4b941..aa90a048f319a 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -77,9 +77,17 @@ /* * With 4K page size the real_pte machinery is all nops. */ -#define __real_pte(e, p, o) ((real_pte_t){(e)}) +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset) +{ + return (real_pte_t){pte}; +} + #define __rpte_to_pte(r) ((r).pte) -#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) + +static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) +{ + return pte_val(__rpte_to_pte(rpte)) >> H_PAGE_F_GIX_SHIFT; +} #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ do { \ -- GitLab From a1f7b7ff0e10ae574d388131596390157222f986 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 10 Feb 2025 10:17:27 +0200 Subject: [PATCH 0427/1585] PCI: pci_ids: add INTEL_HDA_PTL_H Add Intel PTL-H audio Device ID. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Bard Liao Acked-by: Bjorn Helgaas Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250210081730.22916-2-peter.ujfalusi@linux.intel.com --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index de5deb1a0118f..1a2594a38199f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3134,6 +3134,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828 #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 #define PCI_DEVICE_ID_INTEL_HDA_BMG 0xe2f7 +#define PCI_DEVICE_ID_INTEL_HDA_PTL_H 0xe328 #define PCI_DEVICE_ID_INTEL_HDA_PTL 0xe428 #define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8 #define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8 -- GitLab From 214e6be2d91d5d58f28d3a37630480077a1aafbd Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 10 Feb 2025 10:17:28 +0200 Subject: [PATCH 0428/1585] ALSA: hda: intel-dsp-config: Add PTL-H support Use same recipes as PTL for PTL-H. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Bard Liao Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250210081730.22916-3-peter.ujfalusi@linux.intel.com --- sound/hda/intel-dsp-config.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index f564ec7af1940..ce3ae2cba6607 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -539,6 +539,11 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = PCI_DEVICE_ID_INTEL_HDA_PTL, }, + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = PCI_DEVICE_ID_INTEL_HDA_PTL_H, + }, + #endif }; -- GitLab From 4e9c87cfcd0584f2a2e2f352a43ff003d688f3a4 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 10 Feb 2025 10:17:29 +0200 Subject: [PATCH 0429/1585] ASoC: SOF: Intel: pci-ptl: Add support for PTL-H PTL-H uses the same configuration as PTL. Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Bard Liao Acked-by: Mark Brown Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250210081730.22916-4-peter.ujfalusi@linux.intel.com --- sound/soc/sof/intel/pci-ptl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/pci-ptl.c b/sound/soc/sof/intel/pci-ptl.c index 0aacdfac9fb43..c4fb6a2441b76 100644 --- a/sound/soc/sof/intel/pci-ptl.c +++ b/sound/soc/sof/intel/pci-ptl.c @@ -50,6 +50,7 @@ static const struct sof_dev_desc ptl_desc = { /* PCI IDs */ static const struct pci_device_id sof_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, HDA_PTL, &ptl_desc) }, /* PTL */ + { PCI_DEVICE_DATA(INTEL, HDA_PTL_H, &ptl_desc) }, /* PTL-H */ { 0, } }; MODULE_DEVICE_TABLE(pci, sof_pci_ids); -- GitLab From d7e2447a4d51de5c3c03e3b7892898e98ddd9769 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 10 Feb 2025 10:17:30 +0200 Subject: [PATCH 0430/1585] ALSA: hda: hda-intel: add Panther Lake-H support Add Intel PTL-H audio Device ID. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Bard Liao Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250210081730.22916-5-peter.ujfalusi@linux.intel.com --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 7d7f9aac50a91..67540e0373099 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2496,6 +2496,8 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE_DATA(INTEL, HDA_ARL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE) }, /* Panther Lake */ { PCI_DEVICE_DATA(INTEL, HDA_PTL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_LNL) }, + /* Panther Lake-H */ + { PCI_DEVICE_DATA(INTEL, HDA_PTL_H, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_LNL) }, /* Apollolake (Broxton-P) */ { PCI_DEVICE_DATA(INTEL, HDA_APL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_BROXTON) }, /* Gemini-Lake */ -- GitLab From 70e90680c2592c38c62e5716f1296a2d74bae7af Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:46:33 +0100 Subject: [PATCH 0431/1585] ALSA: Switch to use hrtimer_setup() hrtimer_setup() takes the callback function pointer as argument and initializes the timer completely. Replace hrtimer_init() and the open coded initialization of hrtimer::function with the new setup mechanism. Patch was created by using Coccinelle. Acked-by: Zack Rusin Signed-off-by: Nam Cao Cc: Takashi Iwai Link: https://patch.msgid.link/598031332ce738c82286a158cb66eb7e735b2e79.1738746904.git.namcao@linutronix.de Signed-off-by: Takashi Iwai --- sound/core/hrtimer.c | 3 +-- sound/drivers/dummy.c | 3 +-- sound/drivers/pcsp/pcsp.c | 3 +-- sound/sh/sh_dac_audio.c | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index 147c1fea4708e..e9c60dce59fbf 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -66,9 +66,8 @@ static int snd_hrtimer_open(struct snd_timer *t) stime = kzalloc(sizeof(*stime), GFP_KERNEL); if (!stime) return -ENOMEM; - hrtimer_init(&stime->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stime->timer = t; - stime->hrt.function = snd_hrtimer_callback; + hrtimer_setup(&stime->hrt, snd_hrtimer_callback, CLOCK_MONOTONIC, HRTIMER_MODE_REL); t->private_data = stime; return 0; } diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 8f5df9b3aaaaa..c1a3efb633c53 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -457,8 +457,7 @@ static int dummy_hrtimer_create(struct snd_pcm_substream *substream) if (!dpcm) return -ENOMEM; substream->runtime->private_data = dpcm; - hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - dpcm->timer.function = dummy_hrtimer_callback; + hrtimer_setup(&dpcm->timer, dummy_hrtimer_callback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); dpcm->substream = substream; atomic_set(&dpcm->running, 0); return 0; diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c index 78c9b1c7590f5..e8482c2290c3d 100644 --- a/sound/drivers/pcsp/pcsp.c +++ b/sound/drivers/pcsp/pcsp.c @@ -103,8 +103,7 @@ static int snd_card_pcsp_probe(int devnum, struct device *dev) if (devnum != 0) return -EINVAL; - hrtimer_init(&pcsp_chip.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - pcsp_chip.timer.function = pcsp_do_timer; + hrtimer_setup(&pcsp_chip.timer, pcsp_do_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); err = snd_devm_card_new(dev, index, id, THIS_MODULE, 0, &card); if (err < 0) diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c index 3f5422145c5e2..84a4b17a0cc23 100644 --- a/sound/sh/sh_dac_audio.c +++ b/sound/sh/sh_dac_audio.c @@ -312,8 +312,7 @@ static int snd_sh_dac_create(struct snd_card *card, chip->card = card; - hrtimer_init(&chip->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - chip->hrtimer.function = sh_dac_audio_timer; + hrtimer_setup(&chip->hrtimer, sh_dac_audio_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); dac_audio_reset(chip); chip->rate = 8000; -- GitLab From a27c6f46dcec8f697cbf15c8a10f8534c7b8a2c3 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 4 Feb 2025 00:21:22 -0800 Subject: [PATCH 0432/1585] RDMA/bnxt_re: Fix an issue in bnxt_re_async_notifier In the bnxt_re_async_notifier() callback, the way driver retrieves rdev pointer is wrong. The rdev pointer should be parsed from adev pointer as while registering with the L2 for ULP, driver uses the aux device pointer for the handle. Fixes: 7fea32784068 ("RDMA/bnxt_re: Add Async event handling support") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1738657285-23968-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index e9e4da4dd576b..c4c3d67c42cc6 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -396,11 +396,16 @@ static void bnxt_re_dcb_wq_task(struct work_struct *work) static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl) { - struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle; + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); struct bnxt_re_dcb_work *dcb_work; + struct bnxt_re_dev *rdev; u32 data1, data2; u16 event_id; + rdev = en_info->rdev; + if (!rdev) + return; + event_id = le16_to_cpu(cmpl->event_id); data1 = le32_to_cpu(cmpl->event_data1); data2 = le32_to_cpu(cmpl->event_data2); -- GitLab From f0df225d12fcb049429fb5bf5122afe143c2dd15 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 4 Feb 2025 00:21:23 -0800 Subject: [PATCH 0433/1585] RDMA/bnxt_re: Add sanity checks on rdev validity There is a possibility that ulp_irq_stop and ulp_irq_start callbacks will be called when the device is in detached state. This can cause a crash due to NULL pointer dereference as the rdev is already freed. Fixes: cc5b9b48d447 ("RDMA/bnxt_re: Recover the device when FW error is detected") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1738657285-23968-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index c4c3d67c42cc6..89ac5c21ca7ad 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -438,6 +438,8 @@ static void bnxt_re_stop_irq(void *handle, bool reset) int indx; rdev = en_info->rdev; + if (!rdev) + return; rcfw = &rdev->rcfw; if (reset) { @@ -466,6 +468,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) int indx, rc; rdev = en_info->rdev; + if (!rdev) + return; msix_ent = rdev->nqr->msix_entries; rcfw = &rdev->rcfw; if (!ent) { @@ -2438,6 +2442,7 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx", __func__, en_dev->en_state); bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev); + bnxt_re_update_en_info_rdev(NULL, en_info, adev); mutex_unlock(&bnxt_re_mutex); return 0; -- GitLab From e2f105277411c4ebacd00d4ae1a57f693ba7d22d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 4 Feb 2025 00:21:24 -0800 Subject: [PATCH 0434/1585] RDMA/bnxt_re: Fix issue in the unload path The cited comment removed the netdev notifier register call from the driver. But, it did not remove the cleanup code from the unload path. As a result, driver unload is not clean and resulted in undesired behaviour. Fixes: d3b15fcc4201 ("RDMA/bnxt_re: Remove deliver net device event") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1738657285-23968-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 1 - drivers/infiniband/hw/bnxt_re/main.c | 10 ---------- 2 files changed, 11 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index b91a85a491d05..3721446c6ba4b 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -187,7 +187,6 @@ struct bnxt_re_dev { #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 struct net_device *netdev; struct auxiliary_device *adev; - struct notifier_block nb; unsigned int version, major, minor; struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 89ac5c21ca7ad..a94c8c5387d9e 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1359,7 +1359,6 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev, return NULL; } /* Default values */ - rdev->nb.notifier_call = NULL; rdev->netdev = en_dev->net; rdev->en_dev = en_dev; rdev->adev = adev; @@ -2354,15 +2353,6 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type) static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type, struct auxiliary_device *aux_dev) { - if (rdev->nb.notifier_call) { - unregister_netdevice_notifier(&rdev->nb); - rdev->nb.notifier_call = NULL; - } else { - /* If notifier is null, we should have already done a - * clean up before coming here. - */ - return; - } bnxt_re_setup_cc(rdev, false); ib_unregister_device(&rdev->ibdev); bnxt_re_dev_uninit(rdev, op_type); -- GitLab From 8238c7bd84209c8216b1381ab0dbe6db9e203769 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2025 00:21:25 -0800 Subject: [PATCH 0435/1585] RDMA/bnxt_re: Fix the statistics for Gen P7 VF Gen P7 VF support the extended stats and is prevented by a VF check. Fix the check to issue the FW command for GenP7 VFs also. Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices") Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1738657285-23968-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 4 ++-- drivers/infiniband/hw/bnxt_re/qplib_res.h | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 3ac47f4e61229..f039aefcaf675 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -348,8 +348,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } bnxt_re_copy_err_stats(rdev, stats, err_s); - if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags) && - !rdev->is_virtfn) { + if (bnxt_ext_stats_supported(rdev->chip_ctx, rdev->dev_attr->dev_cap_flags, + rdev->is_virtfn)) { rc = bnxt_re_get_ext_stat(rdev, stats); if (rc) { clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index be5d907a036b6..711990232de1c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -547,6 +547,14 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags) CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; } +static inline int bnxt_ext_stats_supported(struct bnxt_qplib_chip_ctx *ctx, + u16 flags, bool virtfn) +{ + /* ext stats supported if cap flag is set AND is a PF OR a Thor2 VF */ + return (_is_ext_stats_supported(flags) && + ((virtfn && bnxt_qplib_is_chip_gen_p7(ctx)) || (!virtfn))); +} + static inline bool _is_hw_retx_supported(u16 dev_cap_flags) { return dev_cap_flags & -- GitLab From 8dbccafce3c8ae026606f5c7bc6637667d9d5595 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Feb 2025 09:17:58 +0000 Subject: [PATCH 0436/1585] KVM: arm64: Fix __pkvm_host_mkyoung_guest() return value Don't use an uninitialised stack variable, and just return 0 on the non-error path. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502100911.8c9DbtKD-lkp@intel.com/ Reviewed-by: Quentin Perret Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 4c2f6a6a2efe1..19c3c631708ce 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -1073,7 +1073,6 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) { struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); u64 ipa = hyp_pfn_to_phys(gfn); - int ret; if (pkvm_hyp_vm_is_protected(vm)) return -EPERM; @@ -1083,5 +1082,5 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); guest_unlock_component(vm); - return ret; + return 0; } -- GitLab From b921f66ccf5e8cf1b8a5052b35ceda454f19f5dd Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Mon, 3 Feb 2025 15:30:56 -0600 Subject: [PATCH 0437/1585] dt-bindings: rockchip: pmu: Ensure all properties are defined Device specific schemas should not allow undefined properties which is what 'additionalProperties: true' allows. Add the missing child nodes and fix this constraint. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250203213056.13827-1-robh@kernel.org Signed-off-by: Heiko Stuebner --- Documentation/devicetree/bindings/arm/rockchip/pmu.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml b/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml index 932f981265ccb..52016a141227b 100644 --- a/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml @@ -53,11 +53,17 @@ properties: reg: maxItems: 1 + power-controller: + type: object + + reboot-mode: + type: object + required: - compatible - reg -additionalProperties: true +additionalProperties: false examples: - | -- GitLab From 2c202e6c4f4dd19d2e8c1dfac9df05170aa3934f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 8 Feb 2025 08:29:15 +0900 Subject: [PATCH 0438/1585] ata: libahci_platform: Do not set mask_port_map when not needed Commit 8c87215dd3a2 ("ata: libahci_platform: support non-consecutive port numbers") modified ahci_platform_get_resources() to allow identifying the ports of a controller that are defined as child nodes of the controller node in order to support non-consecutive port numbers (as defined by the platform device tree). However, this commit also erroneously sets bit 0 of hpriv->mask_port_map when the platform devices tree does not define port child nodes, to match the fact that the temporary default number of ports used in that case is 1 (which is also consistent with the fact that only index 0 of hpriv->phys[] is initialized with the call to ahci_platform_get_phy(). But doing so causes ahci_platform_init_host() to initialize and probe only the first port, even if this function determines that the controller has in fact multiple ports using the capability register of the controller (through a call to ahci_nr_ports()). This can be seen with the ahci_mvebu driver (Armada 385 SoC) with the second port declared as "dummy": ahci-mvebu f10a8000.sata: masking port_map 0x3 -> 0x1 ahci-mvebu f10a8000.sata: AHCI vers 0001.0000, 32 command slots, 6 Gbps, platform mode ahci-mvebu f10a8000.sata: 1/2 ports implemented (port mask 0x1) ahci-mvebu f10a8000.sata: flags: 64bit ncq sntf led only pmp fbs pio slum part sxs scsi host0: ahci-mvebu scsi host1: ahci-mvebu ata1: SATA max UDMA/133 mmio [mem 0xf10a8000-0xf10a9fff] port 0x100 irq 40 lpm-pol 0 ata2: DUMMY Fix this issue by removing setting bit 0 of hpriv->mask_port_map when the platform device tree does not define port child nodes. Reported-by: Klaus Kudielka Fixes: 8c87215dd3a2 ("ata: libahci_platform: support non-consecutive port numbers") Tested-by: Klaus Kudielka Signed-off-by: Damien Le Moal Acked-by: Josua Mayer Link: https://lore.kernel.org/r/20250207232915.1439174-1-dlemoal@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libahci_platform.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 53b2c7719dc51..91d44302eac92 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -651,8 +651,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, * If no sub-node was found, keep this for device tree * compatibility */ - hpriv->mask_port_map |= BIT(0); - rc = ahci_platform_get_phy(hpriv, 0, dev, dev->of_node); if (rc) goto err_out; -- GitLab From 78ccf6a6bae11e451e20a52dd2bc2ab98f66326b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 10 Feb 2025 11:19:53 +0800 Subject: [PATCH 0439/1585] ASoC: Intel: soc-acpi-intel-ptl-match: revise typo of rt712_vb + rt1320 support s/lnl/ptl Fixes: bd40d912728f ("ASoC: Intel: soc-acpi-intel-ptl-match: add rt712_vb + rt1320 support") Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://patch.msgid.link/20250210031954.6287-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 9eb4a43e3e7a8..e487c4e1c034f 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -270,7 +270,7 @@ static const struct snd_soc_acpi_link_adr lnl_sdw_rt713_vb_l2_rt1320_l13[] = { {} }; -static const struct snd_soc_acpi_link_adr lnl_sdw_rt712_vb_l2_rt1320_l1[] = { +static const struct snd_soc_acpi_link_adr ptl_sdw_rt712_vb_l2_rt1320_l1[] = { { .mask = BIT(2), .num_adr = ARRAY_SIZE(rt712_vb_2_group1_adr), @@ -337,10 +337,10 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = { }, { .link_mask = BIT(1) | BIT(2), - .links = lnl_sdw_rt712_vb_l2_rt1320_l1, + .links = ptl_sdw_rt712_vb_l2_rt1320_l1, .drv_name = "sof_sdw", .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb, - .sof_tplg_filename = "sof-lnl-rt712-l2-rt1320-l1.tplg" + .sof_tplg_filename = "sof-ptl-rt712-l2-rt1320-l1.tplg" }, { .link_mask = BIT(1) | BIT(2) | BIT(3), -- GitLab From cb78b8dc7834066539253c039f276b3625fecd9f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 10 Feb 2025 11:19:54 +0800 Subject: [PATCH 0440/1585] ASoC: Intel: soc-acpi-intel-ptl-match: revise typo of rt713_vb_l2_rt1320_l13 s/lnl/ptl Fixes: a7ebb0255188 ("ASoC: Intel: soc-acpi-intel-ptl-match: add rt713_vb_l2_rt1320_l13 support") Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://patch.msgid.link/20250210031954.6287-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index e487c4e1c034f..dd7993b76dee3 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -251,7 +251,7 @@ static const struct snd_soc_acpi_link_adr ptl_rvp[] = { {} }; -static const struct snd_soc_acpi_link_adr lnl_sdw_rt713_vb_l2_rt1320_l13[] = { +static const struct snd_soc_acpi_link_adr ptl_sdw_rt713_vb_l2_rt1320_l13[] = { { .mask = BIT(2), .num_adr = ARRAY_SIZE(rt713_vb_2_adr), @@ -344,10 +344,10 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = { }, { .link_mask = BIT(1) | BIT(2) | BIT(3), - .links = lnl_sdw_rt713_vb_l2_rt1320_l13, + .links = ptl_sdw_rt713_vb_l2_rt1320_l13, .drv_name = "sof_sdw", .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb, - .sof_tplg_filename = "sof-lnl-rt713-l2-rt1320-l13.tplg" + .sof_tplg_filename = "sof-ptl-rt713-l2-rt1320-l13.tplg" }, {}, }; -- GitLab From 91b98d5a6e8067c5226207487681a48f0d651e46 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 7 Feb 2025 13:46:02 +0200 Subject: [PATCH 0441/1585] ASoC: SOF: amd: Add post_fw_run_delay ACP quirk Stress testing resume from suspend on Valve Steam Deck OLED (Galileo) revealed that the DSP firmware could enter an unrecoverable faulty state, where the kernel ring buffer is flooded with IPC related error messages: [ +0.017002] snd_sof_amd_vangogh 0000:04:00.5: acp_sof_ipc_send_msg: Failed to acquire HW lock [ +0.000054] snd_sof_amd_vangogh 0000:04:00.5: ipc3_tx_msg_unlocked: ipc message send for 0x30100000 failed: -22 [ +0.000005] snd_sof_amd_vangogh 0000:04:00.5: Failed to setup widget PIPELINE.6.ACPHS1.IN [ +0.000004] snd_sof_amd_vangogh 0000:04:00.5: Failed to restore pipeline after resume -22 [ +0.000003] snd_sof_amd_vangogh 0000:04:00.5: PM: dpm_run_callback(): pci_pm_resume returns -22 [ +0.000009] snd_sof_amd_vangogh 0000:04:00.5: PM: failed to resume async: error -22 [...] [ +0.002582] PM: suspend exit [ +0.065085] snd_sof_amd_vangogh 0000:04:00.5: ipc tx error for 0x30130000 (msg/reply size: 12/0): -22 [ +0.000499] snd_sof_amd_vangogh 0000:04:00.5: error: failed widget list set up for pcm 1 dir 0 [ +0.000011] snd_sof_amd_vangogh 0000:04:00.5: error: set pcm hw_params after resume [ +0.000006] snd_sof_amd_vangogh 0000:04:00.5: ASoC: error at snd_soc_pcm_component_prepare on 0000:04:00.5: -22 [...] A system reboot would be necessary to restore the speakers functionality. However, by delaying a bit any host to DSP transmission right after the firmware boot completed, the issue could not be reproduced anymore and sound continued to work flawlessly even after performing thousands of suspend/resume cycles. Introduce the post_fw_run_delay ACP quirk to allow providing the aforementioned delay via the snd_sof_dsp_ops->post_fw_run() callback for the affected devices. Signed-off-by: Cristian Ciocaltea Link: https://patch.msgid.link/20250207-sof-vangogh-fixes-v1-1-67824c1e4c9a@collabora.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp.c | 1 + sound/soc/sof/amd/acp.h | 1 + sound/soc/sof/amd/vangogh.c | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 33648ff8b8336..9e13c96528be3 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -27,6 +27,7 @@ MODULE_PARM_DESC(enable_fw_debug, "Enable Firmware debug"); static struct acp_quirk_entry quirk_valve_galileo = { .signed_fw_image = true, .skip_iram_dram_size_mod = true, + .post_fw_run_delay = true, }; const struct dmi_system_id acp_sof_quirk_table[] = { diff --git a/sound/soc/sof/amd/acp.h b/sound/soc/sof/amd/acp.h index 800594440f739..2a19d82d62002 100644 --- a/sound/soc/sof/amd/acp.h +++ b/sound/soc/sof/amd/acp.h @@ -220,6 +220,7 @@ struct sof_amd_acp_desc { struct acp_quirk_entry { bool signed_fw_image; bool skip_iram_dram_size_mod; + bool post_fw_run_delay; }; /* Common device data struct for ACP devices */ diff --git a/sound/soc/sof/amd/vangogh.c b/sound/soc/sof/amd/vangogh.c index 8e2672106ac60..d5f1dddd43e72 100644 --- a/sound/soc/sof/amd/vangogh.c +++ b/sound/soc/sof/amd/vangogh.c @@ -11,6 +11,7 @@ * Hardware interface for Audio DSP on Vangogh platform */ +#include #include #include @@ -136,6 +137,20 @@ static struct snd_soc_dai_driver vangogh_sof_dai[] = { }, }; +static int sof_vangogh_post_fw_run_delay(struct snd_sof_dev *sdev) +{ + /* + * Resuming from suspend in some cases my cause the DSP firmware + * to enter an unrecoverable faulty state. Delaying a bit any host + * to DSP transmission right after firmware boot completion seems + * to resolve the issue. + */ + if (!sdev->first_boot) + usleep_range(100, 150); + + return 0; +} + /* Vangogh ops */ struct snd_sof_dsp_ops sof_vangogh_ops; EXPORT_SYMBOL_NS(sof_vangogh_ops, "SND_SOC_SOF_AMD_COMMON"); @@ -157,6 +172,9 @@ int sof_vangogh_ops_init(struct snd_sof_dev *sdev) if (quirks->signed_fw_image) sof_vangogh_ops.load_firmware = acp_sof_load_signed_firmware; + + if (quirks->post_fw_run_delay) + sof_vangogh_ops.post_fw_run = sof_vangogh_post_fw_run_delay; } return 0; -- GitLab From 2ecbc2e9f3b19e2199e8bc3ba603d299f1985f09 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 7 Feb 2025 13:46:03 +0200 Subject: [PATCH 0442/1585] ASoC: SOF: amd: Drop unused includes from Vangogh driver Remove all the includes for headers which are not (directly) used from the Vangogh SOF driver sources. Signed-off-by: Cristian Ciocaltea Reviewed-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250207-sof-vangogh-fixes-v1-2-67824c1e4c9a@collabora.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/pci-vangogh.c | 2 -- sound/soc/sof/amd/vangogh.c | 4 ---- 2 files changed, 6 deletions(-) diff --git a/sound/soc/sof/amd/pci-vangogh.c b/sound/soc/sof/amd/pci-vangogh.c index 53f64d6bc91ba..28f2d4050a676 100644 --- a/sound/soc/sof/amd/pci-vangogh.c +++ b/sound/soc/sof/amd/pci-vangogh.c @@ -13,11 +13,9 @@ #include #include -#include #include #include -#include "../ops.h" #include "../sof-pci-dev.h" #include "../../amd/mach-config.h" #include "acp.h" diff --git a/sound/soc/sof/amd/vangogh.c b/sound/soc/sof/amd/vangogh.c index d5f1dddd43e72..6ed5f9aaa414e 100644 --- a/sound/soc/sof/amd/vangogh.c +++ b/sound/soc/sof/amd/vangogh.c @@ -12,13 +12,9 @@ */ #include -#include #include -#include "../ops.h" -#include "../sof-audio.h" #include "acp.h" -#include "acp-dsp-offset.h" #define I2S_HS_INSTANCE 0 #define I2S_BT_INSTANCE 1 -- GitLab From ac84ca815adb4171a4276b1d44096b75f6a150b7 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 7 Feb 2025 13:46:04 +0200 Subject: [PATCH 0443/1585] ASoC: SOF: amd: Handle IPC replies before FW_BOOT_COMPLETE In some cases, e.g. during resuming from suspend, there is a possibility that some IPC reply messages get received by the host while the DSP firmware has not yet reached the complete boot state. Detect when this happens and do not attempt to process the unexpected replies from DSP. Instead, provide proper debugging support. Signed-off-by: Cristian Ciocaltea Link: https://patch.msgid.link/20250207-sof-vangogh-fixes-v1-3-67824c1e4c9a@collabora.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp-ipc.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c index 5f371d9263f3b..12caefd087885 100644 --- a/sound/soc/sof/amd/acp-ipc.c +++ b/sound/soc/sof/amd/acp-ipc.c @@ -167,6 +167,7 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context) if (sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE) { acp_mailbox_read(sdev, sdev->dsp_box.offset, &status, sizeof(status)); + if ((status & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) { snd_sof_dsp_panic(sdev, sdev->dsp_box.offset + sizeof(status), true); @@ -188,13 +189,21 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context) dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write); if (dsp_ack) { - spin_lock_irq(&sdev->ipc_lock); - /* handle immediate reply from DSP core */ - acp_dsp_ipc_get_reply(sdev); - snd_sof_ipc_reply(sdev, 0); - /* set the done bit */ - acp_dsp_ipc_dsp_done(sdev); - spin_unlock_irq(&sdev->ipc_lock); + if (likely(sdev->fw_state == SOF_FW_BOOT_COMPLETE)) { + spin_lock_irq(&sdev->ipc_lock); + + /* handle immediate reply from DSP core */ + acp_dsp_ipc_get_reply(sdev); + snd_sof_ipc_reply(sdev, 0); + /* set the done bit */ + acp_dsp_ipc_dsp_done(sdev); + + spin_unlock_irq(&sdev->ipc_lock); + } else { + dev_dbg_ratelimited(sdev->dev, "IPC reply before FW_BOOT_COMPLETE: %#x\n", + dsp_ack); + } + ipc_irq = true; } -- GitLab From ccc8480d90e8cb60f06bd90e227f34784927e19f Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 7 Feb 2025 13:46:05 +0200 Subject: [PATCH 0444/1585] ASoC: SOF: amd: Add branch prediction hint in ACP IRQ handler The conditional involving sdev->first_boot in acp_sof_ipc_irq_thread() will succeed only once, i.e. during the very first run of the DSP firmware. Use the unlikely() annotation to help improve branch prediction accuracy. Signed-off-by: Cristian Ciocaltea Reviewed-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250207-sof-vangogh-fixes-v1-4-67824c1e4c9a@collabora.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp-ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c index 12caefd087885..22d4b807e1bb7 100644 --- a/sound/soc/sof/amd/acp-ipc.c +++ b/sound/soc/sof/amd/acp-ipc.c @@ -165,7 +165,7 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context) int dsp_msg, dsp_ack; unsigned int status; - if (sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE) { + if (unlikely(sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE)) { acp_mailbox_read(sdev, sdev->dsp_box.offset, &status, sizeof(status)); if ((status & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) { -- GitLab From 9759ae2cee7cd42b95f1c48aa3749bd02b5ddb08 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 17 Jan 2025 13:58:00 +0800 Subject: [PATCH 0445/1585] iommu: Fix potential memory leak in iopf_queue_remove_device() The iopf_queue_remove_device() helper removes a device from the per-iommu iopf queue when PRI is disabled on the device. It responds to all outstanding iopf's with an IOMMU_PAGE_RESP_INVALID code and detaches the device from the queue. However, it fails to release the group structure that represents a group of iopf's awaiting for a response after responding to the hardware. This can cause a memory leak if iopf_queue_remove_device() is called with pending iopf's. Fix it by calling iopf_free_group() after the iopf group is responded. Fixes: 199112327135 ("iommu: Track iopf group instead of last fault") Cc: stable@vger.kernel.org Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250117055800.782462-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgfault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 4674e618797c1..8b5926c1452ed 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -478,6 +478,7 @@ void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) ops->page_response(dev, iopf, &resp); list_del_init(&group->pending_node); + iopf_free_group(group); } mutex_unlock(&fault_param->lock); -- GitLab From 17987453a9d997c4d0749abc52f047bfa275427a Mon Sep 17 00:00:00 2001 From: Mohan Kumar D Date: Mon, 10 Feb 2025 19:24:12 +0530 Subject: [PATCH 0446/1585] dmaengine: tegra210-adma: Use div_u64 for 64 bit division The ADMA base and page address are represented using a 64-bit variable. To accurately derive the exact ADMA page number provided from the DT properties, use the div_u64() to divide the address difference between adma page and base address by the page offset. This change fixes the below error "ERROR: modpost: "__udivdi3" [drivers/dma/tegra210-adma.ko] undefined! ld: drivers/dma/tegra210-adma.o: in function `tegra_adma_probe': tegra210-adma.c:(.text+0x12cf): undefined reference to `__udivdi3'" Fixes: 68811c928f88 ("dmaengine: tegra210-adma: Support channel page") Cc: stable@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202412250204.GCQhdKe3-lkp@intel.com/ Signed-off-by: Mohan Kumar D Reviewed-by: Jon Hunter Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20250210135413.2504272-2-mkumard@nvidia.com Signed-off-by: Vinod Koul --- drivers/dma/tegra210-adma.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 6896da8ac7ef6..a0bd4822ed808 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -887,7 +887,8 @@ static int tegra_adma_probe(struct platform_device *pdev) const struct tegra_adma_chip_data *cdata; struct tegra_adma *tdma; struct resource *res_page, *res_base; - int ret, i, page_no; + u64 page_no, page_offset; + int ret, i; cdata = of_device_get_match_data(&pdev->dev); if (!cdata) { @@ -914,10 +915,16 @@ static int tegra_adma_probe(struct platform_device *pdev) res_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "global"); if (res_base) { - page_no = (res_page->start - res_base->start) / cdata->ch_base_offset; - if (page_no <= 0) + if (WARN_ON(res_page->start <= res_base->start)) return -EINVAL; - tdma->ch_page_no = page_no - 1; + + page_offset = res_page->start - res_base->start; + page_no = div_u64(page_offset, cdata->ch_base_offset); + + if (WARN_ON(page_no == 0)) + return -EINVAL; + + tdma->ch_page_no = lower_32_bits(page_no) - 1; tdma->base_addr = devm_ioremap_resource(&pdev->dev, res_base); if (IS_ERR(tdma->base_addr)) return PTR_ERR(tdma->base_addr); -- GitLab From 76ed9b7d177ed5aa161a824ea857619b88542de1 Mon Sep 17 00:00:00 2001 From: Mohan Kumar D Date: Mon, 10 Feb 2025 19:24:13 +0530 Subject: [PATCH 0447/1585] dmaengine: tegra210-adma: check for adma max page Have additional check for max channel page during the probe to cover if any offset overshoot happens due to wrong DT configuration. Fixes: 68811c928f88 ("dmaengine: tegra210-adma: Support channel page") Cc: stable@vger.kernel.org Signed-off-by: Mohan Kumar D Reviewed-by: Jon Hunter Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20250210135413.2504272-3-mkumard@nvidia.com Signed-off-by: Vinod Koul --- drivers/dma/tegra210-adma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index a0bd4822ed808..801740ad8e0d9 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -83,7 +83,9 @@ struct tegra_adma; * @nr_channels: Number of DMA channels available. * @ch_fifo_size_mask: Mask for FIFO size field. * @sreq_index_offset: Slave channel index offset. + * @max_page: Maximum ADMA Channel Page. * @has_outstanding_reqs: If DMA channel can have outstanding requests. + * @set_global_pg_config: Global page programming. */ struct tegra_adma_chip_data { unsigned int (*adma_get_burst_config)(unsigned int burst_size); @@ -99,6 +101,7 @@ struct tegra_adma_chip_data { unsigned int nr_channels; unsigned int ch_fifo_size_mask; unsigned int sreq_index_offset; + unsigned int max_page; bool has_outstanding_reqs; void (*set_global_pg_config)(struct tegra_adma *tdma); }; @@ -854,6 +857,7 @@ static const struct tegra_adma_chip_data tegra210_chip_data = { .nr_channels = 22, .ch_fifo_size_mask = 0xf, .sreq_index_offset = 2, + .max_page = 0, .has_outstanding_reqs = false, .set_global_pg_config = NULL, }; @@ -871,6 +875,7 @@ static const struct tegra_adma_chip_data tegra186_chip_data = { .nr_channels = 32, .ch_fifo_size_mask = 0x1f, .sreq_index_offset = 4, + .max_page = 4, .has_outstanding_reqs = true, .set_global_pg_config = tegra186_adma_global_page_config, }; @@ -921,7 +926,7 @@ static int tegra_adma_probe(struct platform_device *pdev) page_offset = res_page->start - res_base->start; page_no = div_u64(page_offset, cdata->ch_base_offset); - if (WARN_ON(page_no == 0)) + if (WARN_ON(page_no == 0 || page_no > cdata->max_page)) return -EINVAL; tdma->ch_page_no = lower_32_bits(page_no) - 1; -- GitLab From 1046cac109225eda0973b898e053aeb3d6c10e1d Mon Sep 17 00:00:00 2001 From: Sybil Isabel Dorsett Date: Mon, 3 Feb 2025 16:33:15 +0000 Subject: [PATCH 0448/1585] platform/x86: thinkpad_acpi: Fix invalid fan speed on ThinkPad X120e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ThinkPad X120e, fan speed is reported in ticks per revolution rather than RPM. Recalculate the fan speed value reported for ThinkPad X120e to RPM based on a 22.5 kHz clock. Based on the information on https://www.thinkwiki.org/wiki/How_to_control_fan_speed, the same problem is highly likely to be relevant to at least Edge11, but Edge11 is not addressed in this patch. Signed-off-by: Sybil Isabel Dorsett Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250203163255.5525-1-sybdorsett@proton.me Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 1fcb0f99695a7..e7778ea41478e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -7885,6 +7885,7 @@ static struct ibm_struct volume_driver_data = { #define FAN_NS_CTRL_STATUS BIT(2) /* Bit which determines control is enabled or not */ #define FAN_NS_CTRL BIT(4) /* Bit which determines control is by host or EC */ +#define FAN_CLOCK_TPM (22500*60) /* Ticks per minute for a 22.5 kHz clock */ enum { /* Fan control constants */ fan_status_offset = 0x2f, /* EC register 0x2f */ @@ -7940,6 +7941,7 @@ static int fan_watchdog_maxinterval; static bool fan_with_ns_addr; static bool ecfw_with_fan_dec_rpm; +static bool fan_speed_in_tpr; static struct mutex fan_mutex; @@ -8142,8 +8144,11 @@ static int fan_get_speed(unsigned int *speed) !acpi_ec_read(fan_rpm_offset + 1, &hi))) return -EIO; - if (likely(speed)) + if (likely(speed)) { *speed = (hi << 8) | lo; + if (fan_speed_in_tpr && *speed != 0) + *speed = FAN_CLOCK_TPM / *speed; + } break; case TPACPI_FAN_RD_TPEC_NS: if (!acpi_ec_read(fan_rpm_status_ns, &lo)) @@ -8176,8 +8181,11 @@ static int fan2_get_speed(unsigned int *speed) if (rc) return -EIO; - if (likely(speed)) + if (likely(speed)) { *speed = (hi << 8) | lo; + if (fan_speed_in_tpr && *speed != 0) + *speed = FAN_CLOCK_TPM / *speed; + } break; case TPACPI_FAN_RD_TPEC_NS: @@ -8788,6 +8796,7 @@ static const struct attribute_group fan_driver_attr_group = { #define TPACPI_FAN_NOFAN 0x0008 /* no fan available */ #define TPACPI_FAN_NS 0x0010 /* For EC with non-Standard register addresses */ #define TPACPI_FAN_DECRPM 0x0020 /* For ECFW's with RPM in register as decimal */ +#define TPACPI_FAN_TPR 0x0040 /* Fan speed is in Ticks Per Revolution */ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), @@ -8817,6 +8826,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('R', '0', 'V', TPACPI_FAN_NS), /* 11e Gen5 KL-Y */ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ TPACPI_Q_LNV3('R', '0', 'Q', TPACPI_FAN_DECRPM),/* L480 */ + TPACPI_Q_LNV('8', 'F', TPACPI_FAN_TPR), /* ThinkPad x120e */ }; static int __init fan_init(struct ibm_init_struct *iibm) @@ -8887,6 +8897,8 @@ static int __init fan_init(struct ibm_init_struct *iibm) if (quirks & TPACPI_FAN_Q1) fan_quirk1_setup(); + if (quirks & TPACPI_FAN_TPR) + fan_speed_in_tpr = true; /* Try and probe the 2nd fan */ tp_features.second_fan = 1; /* needed for get_speed to work */ res = fan2_get_speed(&speed); -- GitLab From 9cff907cbf8c7fb5345918dbcc7b74a01656f34f Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Thu, 6 Feb 2025 14:39:41 -0500 Subject: [PATCH 0449/1585] platform/x86: thinkpad_acpi: Support for V9 DYTC platform profiles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer Thinkpad AMD platforms are using V9 DYTC and this changes the profiles used for PSC mode. Add support for this update. Tested on P14s G5 AMD Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20250206193953.58365-1-mpearson-lenovo@squebb.ca Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 34 ++++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e7778ea41478e..99cdea723d32c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10331,6 +10331,10 @@ static struct ibm_struct proxsensor_driver_data = { #define DYTC_MODE_PSC_BALANCE 5 /* Default mode aka balanced */ #define DYTC_MODE_PSC_PERFORM 7 /* High power mode aka performance */ +#define DYTC_MODE_PSCV9_LOWPOWER 1 /* Low power mode */ +#define DYTC_MODE_PSCV9_BALANCE 3 /* Default mode aka balanced */ +#define DYTC_MODE_PSCV9_PERFORM 4 /* High power mode aka performance */ + #define DYTC_ERR_MASK 0xF /* Bits 0-3 in cmd result are the error result */ #define DYTC_ERR_SUCCESS 1 /* CMD completed successful */ @@ -10351,6 +10355,10 @@ static int dytc_capabilities; static bool dytc_mmc_get_available; static int profile_force; +static int platform_psc_profile_lowpower = DYTC_MODE_PSC_LOWPOWER; +static int platform_psc_profile_balanced = DYTC_MODE_PSC_BALANCE; +static int platform_psc_profile_performance = DYTC_MODE_PSC_PERFORM; + static int convert_dytc_to_profile(int funcmode, int dytcmode, enum platform_profile_option *profile) { @@ -10372,19 +10380,15 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, } return 0; case DYTC_FUNCTION_PSC: - switch (dytcmode) { - case DYTC_MODE_PSC_LOWPOWER: + if (dytcmode == platform_psc_profile_lowpower) *profile = PLATFORM_PROFILE_LOW_POWER; - break; - case DYTC_MODE_PSC_BALANCE: + else if (dytcmode == platform_psc_profile_balanced) *profile = PLATFORM_PROFILE_BALANCED; - break; - case DYTC_MODE_PSC_PERFORM: + else if (dytcmode == platform_psc_profile_performance) *profile = PLATFORM_PROFILE_PERFORMANCE; - break; - default: /* Unknown mode */ + else return -EINVAL; - } + return 0; case DYTC_FUNCTION_AMT: /* For now return balanced. It's the closest we have to 'auto' */ @@ -10405,19 +10409,19 @@ static int convert_profile_to_dytc(enum platform_profile_option profile, int *pe if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_LOWPOWER; else if (dytc_capabilities & BIT(DYTC_FC_PSC)) - *perfmode = DYTC_MODE_PSC_LOWPOWER; + *perfmode = platform_psc_profile_lowpower; break; case PLATFORM_PROFILE_BALANCED: if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_BALANCE; else if (dytc_capabilities & BIT(DYTC_FC_PSC)) - *perfmode = DYTC_MODE_PSC_BALANCE; + *perfmode = platform_psc_profile_balanced; break; case PLATFORM_PROFILE_PERFORMANCE: if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_PERFORM; else if (dytc_capabilities & BIT(DYTC_FC_PSC)) - *perfmode = DYTC_MODE_PSC_PERFORM; + *perfmode = platform_psc_profile_performance; break; default: /* Unknown profile */ return -EOPNOTSUPP; @@ -10611,6 +10615,7 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) if (output & BIT(DYTC_QUERY_ENABLE_BIT)) dytc_version = (output >> DYTC_QUERY_REV_BIT) & 0xF; + dbg_printk(TPACPI_DBG_INIT, "DYTC version %d\n", dytc_version); /* Check DYTC is enabled and supports mode setting */ if (dytc_version < 5) return -ENODEV; @@ -10649,6 +10654,11 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) } } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ pr_debug("PSC is supported\n"); + if (dytc_version >= 9) { /* update profiles for DYTC 9 and up */ + platform_psc_profile_lowpower = DYTC_MODE_PSCV9_LOWPOWER; + platform_psc_profile_balanced = DYTC_MODE_PSCV9_BALANCE; + platform_psc_profile_performance = DYTC_MODE_PSCV9_PERFORM; + } } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n"); return -ENODEV; -- GitLab From 2b9df00cded911e2ca2cfae5c45082166b24f8aa Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Mon, 10 Feb 2025 13:35:49 +0800 Subject: [PATCH 0450/1585] mtd: rawnand: cadence: fix error code in cadence_nand_init() Replace dma_request_channel() with dma_request_chan_by_mask() and use helper functions to return proper error code instead of fixed -EBUSY. Fixes: ec4ba01e894d ("mtd: rawnand: Add new Cadence NAND driver to MTD subsystem") Cc: stable@vger.kernel.org Signed-off-by: Niravkumar L Rabara Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/cadence-nand-controller.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index 8d1d710e439dd..fb5f671bdb7bb 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -2904,11 +2904,10 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) dma_cap_set(DMA_MEMCPY, mask); if (cdns_ctrl->caps1->has_dma) { - cdns_ctrl->dmac = dma_request_channel(mask, NULL, NULL); - if (!cdns_ctrl->dmac) { - dev_err(cdns_ctrl->dev, - "Unable to get a DMA channel\n"); - ret = -EBUSY; + cdns_ctrl->dmac = dma_request_chan_by_mask(&mask); + if (IS_ERR(cdns_ctrl->dmac)) { + ret = dev_err_probe(cdns_ctrl->dev, PTR_ERR(cdns_ctrl->dmac), + "%d: Failed to get a DMA channel\n", ret); goto disable_irq; } } -- GitLab From d76d22b5096c5b05208fd982b153b3f182350b19 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Mon, 10 Feb 2025 13:35:50 +0800 Subject: [PATCH 0451/1585] mtd: rawnand: cadence: use dma_map_resource for sdma address Remap the slave DMA I/O resources to enhance driver portability. Using a physical address causes DMA translation failure when the ARM SMMU is enabled. Fixes: ec4ba01e894d ("mtd: rawnand: Add new Cadence NAND driver to MTD subsystem") Cc: stable@vger.kernel.org Signed-off-by: Niravkumar L Rabara Signed-off-by: Miquel Raynal --- .../mtd/nand/raw/cadence-nand-controller.c | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index fb5f671bdb7bb..47950a0ac6d28 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -471,6 +471,8 @@ struct cdns_nand_ctrl { struct { void __iomem *virt; dma_addr_t dma; + dma_addr_t iova_dma; + u32 size; } io; int irq; @@ -1835,11 +1837,11 @@ static int cadence_nand_slave_dma_transfer(struct cdns_nand_ctrl *cdns_ctrl, } if (dir == DMA_FROM_DEVICE) { - src_dma = cdns_ctrl->io.dma; + src_dma = cdns_ctrl->io.iova_dma; dst_dma = buf_dma; } else { src_dma = buf_dma; - dst_dma = cdns_ctrl->io.dma; + dst_dma = cdns_ctrl->io.iova_dma; } tx = dmaengine_prep_dma_memcpy(cdns_ctrl->dmac, dst_dma, src_dma, len, @@ -2869,6 +2871,7 @@ cadence_nand_irq_cleanup(int irqnum, struct cdns_nand_ctrl *cdns_ctrl) static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) { dma_cap_mask_t mask; + struct dma_device *dma_dev = cdns_ctrl->dmac->device; int ret; cdns_ctrl->cdma_desc = dma_alloc_coherent(cdns_ctrl->dev, @@ -2912,6 +2915,16 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) } } + cdns_ctrl->io.iova_dma = dma_map_resource(dma_dev->dev, cdns_ctrl->io.dma, + cdns_ctrl->io.size, + DMA_BIDIRECTIONAL, 0); + + ret = dma_mapping_error(dma_dev->dev, cdns_ctrl->io.iova_dma); + if (ret) { + dev_err(cdns_ctrl->dev, "Failed to map I/O resource to DMA\n"); + goto dma_release_chnl; + } + nand_controller_init(&cdns_ctrl->controller); INIT_LIST_HEAD(&cdns_ctrl->chips); @@ -2922,18 +2935,22 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) if (ret) { dev_err(cdns_ctrl->dev, "Failed to register MTD: %d\n", ret); - goto dma_release_chnl; + goto unmap_dma_resource; } kfree(cdns_ctrl->buf); cdns_ctrl->buf = kzalloc(cdns_ctrl->buf_size, GFP_KERNEL); if (!cdns_ctrl->buf) { ret = -ENOMEM; - goto dma_release_chnl; + goto unmap_dma_resource; } return 0; +unmap_dma_resource: + dma_unmap_resource(dma_dev->dev, cdns_ctrl->io.iova_dma, + cdns_ctrl->io.size, DMA_BIDIRECTIONAL, 0); + dma_release_chnl: if (cdns_ctrl->dmac) dma_release_channel(cdns_ctrl->dmac); @@ -2955,6 +2972,8 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) static void cadence_nand_remove(struct cdns_nand_ctrl *cdns_ctrl) { cadence_nand_chips_cleanup(cdns_ctrl); + dma_unmap_resource(cdns_ctrl->dmac->device->dev, cdns_ctrl->io.iova_dma, + cdns_ctrl->io.size, DMA_BIDIRECTIONAL, 0); cadence_nand_irq_cleanup(cdns_ctrl->irq, cdns_ctrl); kfree(cdns_ctrl->buf); dma_free_coherent(cdns_ctrl->dev, sizeof(struct cadence_nand_cdma_desc), @@ -3019,7 +3038,9 @@ static int cadence_nand_dt_probe(struct platform_device *ofdev) cdns_ctrl->io.virt = devm_platform_get_and_ioremap_resource(ofdev, 1, &res); if (IS_ERR(cdns_ctrl->io.virt)) return PTR_ERR(cdns_ctrl->io.virt); + cdns_ctrl->io.dma = res->start; + cdns_ctrl->io.size = resource_size(res); dt->clk = devm_clk_get(cdns_ctrl->dev, "nf_clk"); if (IS_ERR(dt->clk)) -- GitLab From f37d135b42cb484bdecee93f56b9f483214ede78 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Mon, 10 Feb 2025 13:35:51 +0800 Subject: [PATCH 0452/1585] mtd: rawnand: cadence: fix incorrect device in dma_unmap_single dma_map_single is using physical/bus device (DMA) but dma_unmap_single is using framework device(NAND controller), which is incorrect. Fixed dma_unmap_single to use correct physical/bus device. Fixes: ec4ba01e894d ("mtd: rawnand: Add new Cadence NAND driver to MTD subsystem") Cc: stable@vger.kernel.org Signed-off-by: Niravkumar L Rabara Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/cadence-nand-controller.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index 47950a0ac6d28..0b2db4173e723 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -1863,12 +1863,12 @@ static int cadence_nand_slave_dma_transfer(struct cdns_nand_ctrl *cdns_ctrl, dma_async_issue_pending(cdns_ctrl->dmac); wait_for_completion(&finished); - dma_unmap_single(cdns_ctrl->dev, buf_dma, len, dir); + dma_unmap_single(dma_dev->dev, buf_dma, len, dir); return 0; err_unmap: - dma_unmap_single(cdns_ctrl->dev, buf_dma, len, dir); + dma_unmap_single(dma_dev->dev, buf_dma, len, dir); err: dev_dbg(cdns_ctrl->dev, "Fall back to CPU I/O\n"); -- GitLab From fc876c9524e2a9f816f51d533ed31df789cff65a Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 5 Feb 2025 10:40:42 +0530 Subject: [PATCH 0453/1585] drm/xe/client: bo->client does not need bos_lock bos_lock is to protect list of bos used by client, it is not required to protect bo->client so bring it outside of bos_lock. Fixes: b27970f3e11c ("drm/xe: Add tracking support for bos per client") Signed-off-by: Tejas Upadhyay Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20250205051042.1991192-1-tejas.upadhyay@intel.com Signed-off-by: Nirmoy Das (cherry picked from commit f74fd53ba34551b7626193fb70c17226f06e9bf1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_drm_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 63f30b6df70b9..2d4874d2b9225 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -135,8 +135,8 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, XE_WARN_ON(bo->client); XE_WARN_ON(!list_empty(&bo->client_link)); - spin_lock(&client->bos_lock); bo->client = xe_drm_client_get(client); + spin_lock(&client->bos_lock); list_add_tail(&bo->client_link, &client->bos_list); spin_unlock(&client->bos_lock); } -- GitLab From 53139b3f9998ea07289e7b70b909fea2264a0de9 Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Thu, 30 Jan 2025 09:19:31 +0000 Subject: [PATCH 0454/1585] drm/i915/selftests: avoid using uninitialized context There is an error path in igt_ppgtt_alloc(), which leads to ww object being passed down to i915_gem_ww_ctx_fini() without initialization. Correct that by only putting ppgtt->vm and returning early. Fixes: 480ae79537b2 ("drm/i915/selftests: Prepare gtt tests for obj->mm.lock removal") Signed-off-by: Krzysztof Karas Reviewed-by: Mikolaj Wasiak Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/iuaonpjc3rywmvhna6umjlvzilocn2uqsrxfxfob24e2taocbi@lkaivvfp4777 (cherry picked from commit 8d8334632ea62424233ac6529712868241d0f8df) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 5c397a2df70e2..5d27e1c733c52 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -168,7 +168,7 @@ static int igt_ppgtt_alloc(void *arg) return PTR_ERR(ppgtt); if (!ppgtt->vm.allocate_va_range) - goto err_ppgtt_cleanup; + goto ppgtt_vm_put; /* * While we only allocate the page tables here and so we could @@ -236,7 +236,7 @@ static int igt_ppgtt_alloc(void *arg) goto retry; } i915_gem_ww_ctx_fini(&ww); - +ppgtt_vm_put: i915_vm_put(&ppgtt->vm); return err; } -- GitLab From fd75f371f3a1b04a33d2e750363d6ad76abf734e Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 10 Feb 2025 11:35:15 +0100 Subject: [PATCH 0455/1585] phy: stm32: Fix constant-value overflow assertion Rework the workaround as the lookup tables always fits into the bitfield, and the default values are defined by the hardware and cannot be 0: Guard against false positive with a WARN_ON check to make the compiler happy: The offset range is pre-checked against the sorted imp_lookup_table values and overflow should not happen and would be caught by a warning and return in error. Also guard against a true positive found during the max_vswing lookup, as a max vswing value can be 802000 or 803000 microvolt depending on the current impedance. Therefore set the default impedence index. Fixes: 2de679ecd724 ("phy: stm32: work around constant-value overflow assertion") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250210103515.2598377-1-christian.bruel@foss.st.com Signed-off-by: Vinod Koul --- drivers/phy/st/phy-stm32-combophy.c | 38 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/phy/st/phy-stm32-combophy.c b/drivers/phy/st/phy-stm32-combophy.c index 49e9fa90a6819..607b4d607eb5e 100644 --- a/drivers/phy/st/phy-stm32-combophy.c +++ b/drivers/phy/st/phy-stm32-combophy.c @@ -111,6 +111,7 @@ static const struct clk_impedance imp_lookup[] = { { 4204000, { 511000, 609000, 706000, 802000 } }, { 3999000, { 571000, 648000, 726000, 803000 } } }; +#define DEFAULT_IMP_INDEX 3 /* Default impedance is 50 Ohm */ static int stm32_impedance_tune(struct stm32_combophy *combophy) { @@ -119,10 +120,9 @@ static int stm32_impedance_tune(struct stm32_combophy *combophy) u8 imp_of, vswing_of; u32 max_imp = imp_lookup[0].microohm; u32 min_imp = imp_lookup[imp_size - 1].microohm; - u32 max_vswing = imp_lookup[imp_size - 1].vswing[vswing_size - 1]; + u32 max_vswing; u32 min_vswing = imp_lookup[0].vswing[0]; u32 val; - u32 regval; if (!of_property_read_u32(combophy->dev->of_node, "st,output-micro-ohms", &val)) { if (val < min_imp || val > max_imp) { @@ -130,45 +130,43 @@ static int stm32_impedance_tune(struct stm32_combophy *combophy) return -EINVAL; } - regval = 0; - for (imp_of = 0; imp_of < ARRAY_SIZE(imp_lookup); imp_of++) { - if (imp_lookup[imp_of].microohm <= val) { - regval = FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_OHM, imp_of); + for (imp_of = 0; imp_of < ARRAY_SIZE(imp_lookup); imp_of++) + if (imp_lookup[imp_of].microohm <= val) break; - } - } + + if (WARN_ON(imp_of == ARRAY_SIZE(imp_lookup))) + return -EINVAL; dev_dbg(combophy->dev, "Set %u micro-ohms output impedance\n", imp_lookup[imp_of].microohm); regmap_update_bits(combophy->regmap, SYSCFG_PCIEPRGCR, STM32MP25_PCIEPRG_IMPCTRL_OHM, - regval); - } else { - regmap_read(combophy->regmap, SYSCFG_PCIEPRGCR, &val); - imp_of = FIELD_GET(STM32MP25_PCIEPRG_IMPCTRL_OHM, val); - } + FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_OHM, imp_of)); + } else + imp_of = DEFAULT_IMP_INDEX; if (!of_property_read_u32(combophy->dev->of_node, "st,output-vswing-microvolt", &val)) { + max_vswing = imp_lookup[imp_of].vswing[vswing_size - 1]; + if (val < min_vswing || val > max_vswing) { dev_err(combophy->dev, "Invalid value %u for output vswing\n", val); return -EINVAL; } - regval = 0; - for (vswing_of = 0; vswing_of < ARRAY_SIZE(imp_lookup[imp_of].vswing); vswing_of++) { - if (imp_lookup[imp_of].vswing[vswing_of] >= val) { - regval = FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_VSWING, vswing_of); + for (vswing_of = 0; vswing_of < ARRAY_SIZE(imp_lookup[imp_of].vswing); vswing_of++) + if (imp_lookup[imp_of].vswing[vswing_of] >= val) break; - } - } + + if (WARN_ON(vswing_of == ARRAY_SIZE(imp_lookup[imp_of].vswing))) + return -EINVAL; dev_dbg(combophy->dev, "Set %u microvolt swing\n", imp_lookup[imp_of].vswing[vswing_of]); regmap_update_bits(combophy->regmap, SYSCFG_PCIEPRGCR, STM32MP25_PCIEPRG_IMPCTRL_VSWING, - regval); + FIELD_PREP(STM32MP25_PCIEPRG_IMPCTRL_VSWING, vswing_of)); } return 0; -- GitLab From e2158c953c973adb49383ddea2504faf08d375b7 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 9 Feb 2025 00:29:30 +0530 Subject: [PATCH 0456/1585] phy: exynos5-usbdrd: fix MPLL_MULTIPLIER and SSC_REFCLKSEL masks in refclk In exynos5_usbdrd_{pipe3,utmi}_set_refclk(), the masks PHYCLKRST_MPLL_MULTIPLIER_MASK and PHYCLKRST_SSC_REFCLKSEL_MASK are not inverted when applied to the register values. Fix it. Cc: stable@vger.kernel.org Fixes: 59025887fb08 ("phy: Add new Exynos5 USB 3.0 PHY driver") Signed-off-by: Kaustabh Chakraborty Reviewed-by: Krzysztof Kozlowski Reviewed-by: Anand Moon Link: https://lore.kernel.org/r/20250209-exynos5-usbdrd-masks-v1-1-4f7f83f323d7@disroot.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5-usbdrd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index c421b495eb0fe..4a108fdab118c 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -488,9 +488,9 @@ exynos5_usbdrd_pipe3_set_refclk(struct phy_usb_instance *inst) reg |= PHYCLKRST_REFCLKSEL_EXT_REFCLK; /* FSEL settings corresponding to reference clock */ - reg &= ~PHYCLKRST_FSEL_PIPE_MASK | - PHYCLKRST_MPLL_MULTIPLIER_MASK | - PHYCLKRST_SSC_REFCLKSEL_MASK; + reg &= ~(PHYCLKRST_FSEL_PIPE_MASK | + PHYCLKRST_MPLL_MULTIPLIER_MASK | + PHYCLKRST_SSC_REFCLKSEL_MASK); switch (phy_drd->extrefclk) { case EXYNOS5_FSEL_50MHZ: reg |= (PHYCLKRST_MPLL_MULTIPLIER_50M_REF | @@ -532,9 +532,9 @@ exynos5_usbdrd_utmi_set_refclk(struct phy_usb_instance *inst) reg &= ~PHYCLKRST_REFCLKSEL_MASK; reg |= PHYCLKRST_REFCLKSEL_EXT_REFCLK; - reg &= ~PHYCLKRST_FSEL_UTMI_MASK | - PHYCLKRST_MPLL_MULTIPLIER_MASK | - PHYCLKRST_SSC_REFCLKSEL_MASK; + reg &= ~(PHYCLKRST_FSEL_UTMI_MASK | + PHYCLKRST_MPLL_MULTIPLIER_MASK | + PHYCLKRST_SSC_REFCLKSEL_MASK); reg |= PHYCLKRST_FSEL(phy_drd->extrefclk); return reg; -- GitLab From b19181638182d1f5c43757b471c056b6196c8ca3 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Mon, 10 Feb 2025 16:32:50 +0000 Subject: [PATCH 0457/1585] ASoC: cs35l41: Fix acpi_device_hid() not found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function acpi_device_hid() is only defined if CONFIG_ACPI is set. Use #ifdef CONFIG_ACPI to ensure that cs35l41 driver only calls this function is CONFIG_ACPI is define. Fixes: 1d44a30ae3f9 ("ASoC: cs35l41: Fallback to using HID for system_name if no SUB is available") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502090100.SbXmGFqs-lkp@intel.com/ Signed-off-by: Stefan Binding Reviewed-by: André Almeida Link: https://patch.msgid.link/20250210163256.1722350-1-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l41.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index 30b89018b1139..ff4134bee858e 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -1148,6 +1148,7 @@ static int cs35l41_dsp_init(struct cs35l41_private *cs35l41) return ret; } +#ifdef CONFIG_ACPI static int cs35l41_acpi_get_name(struct cs35l41_private *cs35l41) { struct acpi_device *adev = ACPI_COMPANION(cs35l41->dev); @@ -1180,6 +1181,12 @@ static int cs35l41_acpi_get_name(struct cs35l41_private *cs35l41) return 0; } +#else +static int cs35l41_acpi_get_name(struct cs35l41_private *cs35l41) +{ + return 0; +} +#endif /* CONFIG_ACPI */ int cs35l41_probe(struct cs35l41_private *cs35l41, const struct cs35l41_hw_cfg *hw_cfg) { -- GitLab From 5fb25161217370eeee86b63e47060870b67ed2b4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 28 Jan 2025 10:05:03 +1100 Subject: [PATCH 0458/1585] nfsd: fix uninitialised slot info when a request is retried A recent patch moved the assignment of seq->maxslots from before the test for a resent request (which ends with a goto) to after, resulting in it not being run in that case. This results in the server returning bogus "high slot id" and "target high slot id" values. The assignments to ->maxslots and ->target_maxslots need to be *after* the out: label so that the correct values are returned in replies to requests that are served from cache. Fixes: 60aa6564317d ("nfsd: allocate new session-based DRC slots on demand.") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b7a0cfd05401d..153eeea2c7c99 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4459,10 +4459,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } } while (slot && --cnt > 0); } + +out: seq->maxslots = max(session->se_target_maxslots, seq->maxslots); seq->target_maxslots = session->se_target_maxslots; -out: switch (clp->cl_cb_state) { case NFSD4_CB_DOWN: seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; -- GitLab From d9d6b74e4be989f919498798fa40df37a74b5bb0 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 28 Jan 2025 11:58:06 -0500 Subject: [PATCH 0459/1585] nfsd: fix __fh_verify for localio __fh_verify() added a call to svc_xprt_set_valid() to help do connection management but during LOCALIO path rqstp argument is NULL, leading to NULL pointer dereferencing and a crash. Fixes: eccbbc7c00a5 ("nfsd: don't use sv_nrthreads in connection limiting calculations.") Signed-off-by: Olga Kornievskaia Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsfh.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index bf59f83c6224e..91bf0e6d58950 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -381,8 +381,9 @@ __fh_verify(struct svc_rqst *rqstp, error = check_nfsd_access(exp, rqstp, may_bypass_gss); if (error) goto out; - - svc_xprt_set_valid(rqstp->rq_xprt); + /* During LOCALIO call to fh_verify will be called with a NULL rqstp */ + if (rqstp) + svc_xprt_set_valid(rqstp->rq_xprt); /* Finally, check access permissions. */ error = nfsd_permission(cred, exp, dentry, access); -- GitLab From 036ac2778f7b28885814c6fbc07e156ad1624d03 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Thu, 30 Jan 2025 11:01:27 -0800 Subject: [PATCH 0460/1585] NFSD: fix hang in nfsd4_shutdown_callback If nfs4_client is in courtesy state then there is no point to send the callback. This causes nfsd4_shutdown_callback to hang since cl_cb_inflight is not 0. This hang lasts about 15 minutes until TCP notifies NFSD that the connection was dropped. This patch modifies nfsd4_run_cb_work to skip the RPC call if nfs4_client is in courtesy state. Signed-off-by: Dai Ngo Fixes: 66af25799940 ("NFSD: add courteous server support for thread with only delegation") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 50e468bdb8d48..cf6d29828f4e5 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1583,8 +1583,11 @@ nfsd4_run_cb_work(struct work_struct *work) nfsd4_process_cb_update(cb); clnt = clp->cl_cb_client; - if (!clnt) { - /* Callback channel broken, or client killed; give up: */ + if (!clnt || clp->cl_state == NFSD4_COURTESY) { + /* + * Callback channel broken, client killed or + * nfs4_client in courtesy state; give up. + */ nfsd41_destroy_cb(cb); return; } -- GitLab From 4990d098433db18c854e75fb0f90d941eb7d479e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 10 Feb 2025 11:43:31 -0500 Subject: [PATCH 0461/1585] NFSD: Fix CB_GETATTR status fix Jeff says: Now that I look, 1b3e26a5ccbf is wrong. The patch on the ml was correct, but the one that got committed is different. It should be: status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status); if (unlikely(status || cb->cb_status)) If "status" is non-zero, decoding failed (usu. BADXDR), but we also want to bail out and not decode the rest of the call if the decoded cb_status is non-zero. That's not happening here, cb_seq_status has already been checked and is non-zero, so this ends up trying to decode the rest of the CB_GETATTR reply when it doesn't exist. Reported-by: Jeff Layton Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219737 Fixes: 1b3e26a5ccbf ("NFSD: fix decoding in nfs4_xdr_dec_cb_getattr") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index cf6d29828f4e5..484077200c5d7 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -679,7 +679,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, return status; status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status); - if (unlikely(status || cb->cb_seq_status)) + if (unlikely(status || cb->cb_status)) return status; if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0) return -NFSERR_BAD_XDR; -- GitLab From f3f08c3acfb8860e07a22814a344e83c99ad7398 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Feb 2025 09:27:09 -1000 Subject: [PATCH 0462/1585] sched_ext: Fix incorrect assumption about migration disabled tasks in task_can_run_on_remote_rq() While fixing migration disabled task handling, 32966821574c ("sched_ext: Fix migration disabled handling in targeted dispatches") assumed that a migration disabled task's ->cpus_ptr would only have the pinned CPU. While this is eventually true for migration disabled tasks that are switched out, ->cpus_ptr update is performed by migrate_disable_switch() which is called right before context_switch() in __scheduler(). However, the task is enqueued earlier during pick_next_task() via put_prev_task_scx(), so there is a race window where another CPU can see the task on a DSQ. If the CPU tries to dispatch the migration disabled task while in that window, task_allowed_on_cpu() will succeed and task_can_run_on_remote_rq() will subsequently trigger SCHED_WARN(is_migration_disabled()). WARNING: CPU: 8 PID: 1837 at kernel/sched/ext.c:2466 task_can_run_on_remote_rq+0x12e/0x140 Sched_ext: layered (enabled+all), task: runnable_at=-10ms RIP: 0010:task_can_run_on_remote_rq+0x12e/0x140 ... consume_dispatch_q+0xab/0x220 scx_bpf_dsq_move_to_local+0x58/0xd0 bpf_prog_84dd17b0654b6cf0_layered_dispatch+0x290/0x1cfa bpf__sched_ext_ops_dispatch+0x4b/0xab balance_one+0x1fe/0x3b0 balance_scx+0x61/0x1d0 prev_balance+0x46/0xc0 __pick_next_task+0x73/0x1c0 __schedule+0x206/0x1730 schedule+0x3a/0x160 __do_sys_sched_yield+0xe/0x20 do_syscall_64+0xbb/0x1e0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fix it by converting the SCHED_WARN() back to a regular failure path. Also, perform the migration disabled test before task_allowed_on_cpu() test so that BPF schedulers which fail to handle migration disabled tasks can be noticed easily. While at it, adjust scx_ops_error() message for !task_allowed_on_cpu() case for brevity and consistency. Signed-off-by: Tejun Heo Fixes: 32966821574c ("sched_ext: Fix migration disabled handling in targeted dispatches") Acked-by: Andrea Righi Reported-by: Jake Hillion --- kernel/sched/ext.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index e01144340d679..54edd0e2132a6 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2343,6 +2343,25 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, SCHED_WARN_ON(task_cpu(p) == cpu); + /* + * If @p has migration disabled, @p->cpus_ptr is updated to contain only + * the pinned CPU in migrate_disable_switch() while @p is being switched + * out. However, put_prev_task_scx() is called before @p->cpus_ptr is + * updated and thus another CPU may see @p on a DSQ inbetween leading to + * @p passing the below task_allowed_on_cpu() check while migration is + * disabled. + * + * Test the migration disabled state first as the race window is narrow + * and the BPF scheduler failing to check migration disabled state can + * easily be masked if task_allowed_on_cpu() is done first. + */ + if (unlikely(is_migration_disabled(p))) { + if (trigger_error) + scx_ops_error("SCX_DSQ_LOCAL[_ON] cannot move migration disabled %s[%d] from CPU %d to %d", + p->comm, p->pid, task_cpu(p), cpu); + return false; + } + /* * We don't require the BPF scheduler to avoid dispatching to offline * CPUs mostly for convenience but also because CPUs can go offline @@ -2351,17 +2370,11 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, */ if (!task_allowed_on_cpu(p, cpu)) { if (trigger_error) - scx_ops_error("SCX_DSQ_LOCAL[_ON] verdict target cpu %d not allowed for %s[%d]", - cpu_of(rq), p->comm, p->pid); + scx_ops_error("SCX_DSQ_LOCAL[_ON] target CPU %d not allowed for %s[%d]", + cpu, p->comm, p->pid); return false; } - /* - * If @p has migration disabled, @p->cpus_ptr only contains its current - * CPU and the above task_allowed_on_cpu() test should have failed. - */ - SCHED_WARN_ON(is_migration_disabled(p)); - if (!scx_rq_online(rq)) return false; -- GitLab From cb6cc8ed77177c7553c2f8ac8605d32de58f43ac Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Fri, 7 Feb 2025 16:56:39 +0800 Subject: [PATCH 0463/1585] net: stmmac: Apply new page pool parameters when SPH is enabled Commit df542f669307 ("net: stmmac: Switch to zero-copy in non-XDP RX path") makes DMA write received frame into buffer at offset of NET_SKB_PAD and sets page pool parameters to sync from offset of NET_SKB_PAD. But when Header Payload Split is enabled, the header is written at offset of NET_SKB_PAD, while the payload is written at offset of zero. Uncorrect offset parameter for the payload breaks dma coherence [1] since both CPU and DMA touch the page buffer from offset of zero which is not handled by the page pool sync parameter. And in case the DMA cannot split the received frame, for example, a large L2 frame, pp_params.max_len should grow to match the tail of entire frame. [1] https://lore.kernel.org/netdev/d465f277-bac7-439f-be1d-9a47dfe2d951@nvidia.com/ Reported-by: Jon Hunter Reported-by: Brad Griffis Suggested-by: Ido Schimmel Fixes: df542f669307 ("net: stmmac: Switch to zero-copy in non-XDP RX path") Signed-off-by: Furong Xu <0x1207@gmail.com> Tested-by: Jon Hunter Tested-by: Thierry Reding Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250207085639.13580-1-0x1207@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b34ebb916b898..c0ae7db96f46f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2094,6 +2094,11 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, pp_params.offset = stmmac_rx_offset(priv); pp_params.max_len = dma_conf->dma_buf_sz; + if (priv->sph) { + pp_params.offset = 0; + pp_params.max_len += stmmac_rx_offset(priv); + } + rx_q->page_pool = page_pool_create(&pp_params); if (IS_ERR(rx_q->page_pool)) { ret = PTR_ERR(rx_q->page_pool); -- GitLab From 48145a57d4bbe3496e8e4880b23ea6b511e6e519 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:33 +0000 Subject: [PATCH 0464/1585] ndisc: ndisc_send_redirect() must use dev_get_by_index_rcu() ndisc_send_redirect() is called under RCU protection, not RTNL. It must use dev_get_by_index_rcu() instead of __dev_get_by_index() Fixes: 2f17becfbea5 ("vrf: check the original netdevice for generating redirect") Signed-off-by: Eric Dumazet Cc: Stephen Suryaputra Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d044c67019de6..264b10a947577 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1694,7 +1694,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) bool ret; if (netif_is_l3_master(skb->dev)) { - dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); + dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif); if (!dev) return; } -- GitLab From 628e6d18930bbd21f2d4562228afe27694f66da9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:34 +0000 Subject: [PATCH 0465/1585] ndisc: use RCU protection in ndisc_alloc_skb() ndisc_alloc_skb() can be called without RTNL or RCU being held. Add RCU protection to avoid possible UAF. Fixes: de09334b9326 ("ndisc: Introduce ndisc_alloc_skb() helper.") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ndisc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 264b10a947577..90f8aa2d7af2e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -418,15 +418,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); - if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", - __func__); + if (!skb) return NULL; - } skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -437,7 +433,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, /* Manually assign socket ownership as we avoid calling * sock_alloc_send_pskb() to bypass wmem buffer limits */ - skb_set_owner_w(skb, sk); + rcu_read_lock(); + skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk); + rcu_read_unlock(); return skb; } -- GitLab From becbd5850c03ed33b232083dd66c6e38c0c0e569 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:35 +0000 Subject: [PATCH 0466/1585] neighbour: use RCU protection in __neigh_notify() __neigh_notify() can be called without RTNL or RCU protection. Use RCU protection to avoid potential UAF. Fixes: 426b5303eb43 ("[NETNS]: Modify the neighbour table code so it handles multiple network namespaces") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 89656d180bc60..bd0251bd74a1f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3447,10 +3447,12 @@ static const struct seq_operations neigh_stat_seq_ops = { static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid) { - struct net *net = dev_net(n->dev); struct sk_buff *skb; int err = -ENOBUFS; + struct net *net; + rcu_read_lock(); + net = dev_net_rcu(n->dev); skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; @@ -3463,9 +3465,11 @@ static void __neigh_notify(struct neighbour *n, int type, int flags, goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); - return; + goto out; errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); +out: + rcu_read_unlock(); } void neigh_app_ns(struct neighbour *n) -- GitLab From a42b69f692165ec39db42d595f4f65a4c8f42e44 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:36 +0000 Subject: [PATCH 0467/1585] arp: use RCU protection in arp_xmit() arp_xmit() can be called without RTNL or RCU protection. Use RCU protection to avoid potential UAF. Fixes: 29a26a568038 ("netfilter: Pass struct net into the netfilter hooks") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/arp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index cb9a7ed8abd3a..f23a1ec6694cb 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -659,10 +659,12 @@ static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb */ void arp_xmit(struct sk_buff *skb) { + rcu_read_lock(); /* Send it off, maybe filter it using firewalling first. */ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, - dev_net(skb->dev), NULL, skb, NULL, skb->dev, + dev_net_rcu(skb->dev), NULL, skb, NULL, skb->dev, arp_xmit_finish); + rcu_read_unlock(); } EXPORT_SYMBOL(arp_xmit); -- GitLab From 90b2f49a502fa71090d9f4fe29a2f51fe5dff76d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:37 +0000 Subject: [PATCH 0468/1585] openvswitch: use RCU protection in ovs_vport_cmd_fill_info() ovs_vport_cmd_fill_info() can be called without RTNL or RCU. Use RCU protection and dev_net_rcu() to avoid potential UAF. Fixes: 9354d4520342 ("openvswitch: reliable interface indentification in port dumps") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 225f6048867f4..5d548eda742df 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2101,6 +2101,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; + struct net *net_vport; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, @@ -2117,12 +2118,15 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) goto nla_put_failure; - if (!net_eq(net, dev_net(vport->dev))) { - int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); + rcu_read_lock(); + net_vport = dev_net_rcu(vport->dev); + if (!net_eq(net, net_vport)) { + int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC); if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) - goto nla_put_failure; + goto nla_put_failure_unlock; } + rcu_read_unlock(); ovs_vport_get_stats(vport, &vport_stats); if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, @@ -2143,6 +2147,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, genlmsg_end(skb, ovs_header); return 0; +nla_put_failure_unlock: + rcu_read_unlock(); nla_put_failure: err = -EMSGSIZE; error: -- GitLab From 6d0ce46a93135d96b7fa075a94a88fe0da8e8773 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:38 +0000 Subject: [PATCH 0469/1585] vrf: use RCU protection in l3mdev_l3_out() l3mdev_l3_out() can be called without RCU being held: raw_sendmsg() ip_push_pending_frames() ip_send_skb() ip_local_out() __ip_local_out() l3mdev_ip_out() Add rcu_read_lock() / rcu_read_unlock() pair to avoid a potential UAF. Fixes: a8e3e1a9f020 ("net: l3mdev: Add hook to output path") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/l3mdev.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 2d6141f28b530..f7fe796e8429a 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -198,10 +198,12 @@ struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(dev)) { struct net_device *master; + rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); if (master && master->l3mdev_ops->l3mdev_l3_out) skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, skb, proto); + rcu_read_unlock(); } return skb; -- GitLab From ed6ae1f325d3c43966ec1b62ac1459e2b8e45640 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:39 +0000 Subject: [PATCH 0470/1585] ndisc: extend RCU protection in ndisc_send_skb() ndisc_send_skb() can be called without RTNL or RCU held. Acquire rcu_read_lock() earlier, so that we can use dev_net_rcu() and avoid a potential UAF. Fixes: 1762f7e88eb3 ("[NETNS][IPV6] ndisc - make socket control per namespace") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ndisc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 90f8aa2d7af2e..8699d1a188dc4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -471,16 +471,20 @@ static void ip6_nd_hdr(struct sk_buff *skb, void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct icmp6hdr *icmp6h = icmp6_hdr(skb); struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(skb->dev); - struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; + struct net *net; + struct sock *sk; int err; - struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; + rcu_read_lock(); + + net = dev_net_rcu(skb->dev); + sk = net->ipv6.ndisc_sk; if (!dst) { struct flowi6 fl6; int oif = skb->dev->ifindex; @@ -488,6 +492,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { + rcu_read_unlock(); kfree_skb(skb); return; } @@ -502,7 +507,6 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); - rcu_read_lock(); idev = __in6_dev_get(dst->dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); -- GitLab From 087c1faa594fa07a66933d750c0b2610aa1a2946 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:40 +0000 Subject: [PATCH 0471/1585] ipv6: mcast: extend RCU protection in igmp6_send() igmp6_send() can be called without RTNL or RCU being held. Extend RCU protection so that we can safely fetch the net pointer and avoid a potential UAF. Note that we no longer can use sock_alloc_send_skb() because ipv6.igmp_sk uses GFP_KERNEL allocations which can sleep. Instead use alloc_skb() and charge the net->ipv6.igmp_sk socket under RCU protection. Fixes: b8ad0cbc58f7 ("[NETNS][IPV6] mcast - handle several network namespace") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/mcast.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9dfdb40988b0f..81a739ebf7094 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2165,21 +2165,21 @@ static void mld_send_cr(struct inet6_dev *idev) static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.igmp_sk; + const struct in6_addr *snd_addr, *saddr; + int err, len, payload_len, full_len; + struct in6_addr addr_buf; struct inet6_dev *idev; struct sk_buff *skb; struct mld_msg *hdr; - const struct in6_addr *snd_addr, *saddr; - struct in6_addr addr_buf; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - int err, len, payload_len, full_len; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; - struct flowi6 fl6; struct dst_entry *dst; + struct flowi6 fl6; + struct net *net; + struct sock *sk; if (type == ICMPV6_MGM_REDUCTION) snd_addr = &in6addr_linklocal_allrouters; @@ -2190,19 +2190,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) payload_len = len + sizeof(ra); full_len = sizeof(struct ipv6hdr) + payload_len; - rcu_read_lock(); - IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); - rcu_read_unlock(); + skb = alloc_skb(hlen + tlen + full_len, GFP_KERNEL); - skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); + rcu_read_lock(); + net = dev_net_rcu(dev); + idev = __in6_dev_get(dev); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); if (!skb) { - rcu_read_lock(); - IP6_INC_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return; } + sk = net->ipv6.igmp_sk; + skb_set_owner_w(skb, sk); + skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, hlen); @@ -2227,9 +2229,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPPROTO_ICMPV6, csum_partial(hdr, len, 0)); - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); -- GitLab From de1d0d160f64ee76df1d364d521b2faf465a091c Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Thu, 6 Feb 2025 18:46:00 +0100 Subject: [PATCH 0472/1585] gpio: bcm-kona: Fix GPIO lock/unlock for banks above bank 0 The GPIO lock/unlock functions clear/write a bit to the relevant register for each bank. However, due to an oversight the bit that was being written was based on the total GPIO number, not the index of the GPIO within the relevant bank, causing it to fail for any GPIO above 32 (thus any GPIO for banks above bank 0). Fix lock/unlock for these banks by using the correct bit. Fixes: bdb93c03c550 ("gpio: bcm281xx: Centralize register locking") Reviewed-by: Florian Fainelli Reviewed-by: Markus Mayer Signed-off-by: Artur Weber Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250206-kona-gpio-fixes-v2-1-409135eab780@gmail.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-bcm-kona.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 5321ef98f4427..77bd4ec93a231 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -86,11 +86,12 @@ static void bcm_kona_gpio_lock_gpio(struct bcm_kona_gpio *kona_gpio, u32 val; unsigned long flags; int bank_id = GPIO_BANK(gpio); + int bit = GPIO_BIT(gpio); raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val |= BIT(gpio); + val |= BIT(bit); bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); @@ -102,11 +103,12 @@ static void bcm_kona_gpio_unlock_gpio(struct bcm_kona_gpio *kona_gpio, u32 val; unsigned long flags; int bank_id = GPIO_BANK(gpio); + int bit = GPIO_BIT(gpio); raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val &= ~BIT(gpio); + val &= ~BIT(bit); bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); -- GitLab From 57f5db77a915cc29461a679a6bcae7097967be1a Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Thu, 6 Feb 2025 18:46:01 +0100 Subject: [PATCH 0473/1585] gpio: bcm-kona: Make sure GPIO bits are unlocked when requesting IRQ The settings for all GPIOs are locked by default in bcm_kona_gpio_reset. The settings for a GPIO are unlocked when requesting it as a GPIO, but not when requesting it as an interrupt, causing the IRQ settings to not get applied. Fix this by making sure to unlock the right bits when an IRQ is requested. To avoid a situation where an IRQ being released causes a lock despite the same GPIO being used by a GPIO request or vice versa, add an unlock counter and only lock if it reaches 0. Fixes: 757651e3d60e ("gpio: bcm281xx: Add GPIO driver") Reviewed-by: Florian Fainelli Reviewed-by: Markus Mayer Signed-off-by: Artur Weber Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250206-kona-gpio-fixes-v2-2-409135eab780@gmail.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-bcm-kona.c | 67 +++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 77bd4ec93a231..17f3f210fee9d 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -69,6 +69,22 @@ struct bcm_kona_gpio { struct bcm_kona_gpio_bank { int id; int irq; + /* + * Used to keep track of lock/unlock operations for each GPIO in the + * bank. + * + * All GPIOs are locked by default (see bcm_kona_gpio_reset), and the + * unlock count for all GPIOs is 0 by default. Each unlock increments + * the counter, and each lock decrements the counter. + * + * The lock function only locks the GPIO once its unlock counter is + * down to 0. This is necessary because the GPIO is unlocked in two + * places in this driver: once for requested GPIOs, and once for + * requested IRQs. Since it is possible for a GPIO to be requested + * as both a GPIO and an IRQ, we need to ensure that we don't lock it + * too early. + */ + u8 gpio_unlock_count[GPIO_PER_BANK]; /* Used in the interrupt handler */ struct bcm_kona_gpio *kona_gpio; }; @@ -87,14 +103,23 @@ static void bcm_kona_gpio_lock_gpio(struct bcm_kona_gpio *kona_gpio, unsigned long flags; int bank_id = GPIO_BANK(gpio); int bit = GPIO_BIT(gpio); + struct bcm_kona_gpio_bank *bank = &kona_gpio->banks[bank_id]; - raw_spin_lock_irqsave(&kona_gpio->lock, flags); + if (bank->gpio_unlock_count[bit] == 0) { + dev_err(kona_gpio->gpio_chip.parent, + "Unbalanced locks for GPIO %u\n", gpio); + return; + } - val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val |= BIT(bit); - bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); + if (--bank->gpio_unlock_count[bit] == 0) { + raw_spin_lock_irqsave(&kona_gpio->lock, flags); - raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); + val |= BIT(bit); + bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); + + raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + } } static void bcm_kona_gpio_unlock_gpio(struct bcm_kona_gpio *kona_gpio, @@ -104,14 +129,19 @@ static void bcm_kona_gpio_unlock_gpio(struct bcm_kona_gpio *kona_gpio, unsigned long flags; int bank_id = GPIO_BANK(gpio); int bit = GPIO_BIT(gpio); + struct bcm_kona_gpio_bank *bank = &kona_gpio->banks[bank_id]; - raw_spin_lock_irqsave(&kona_gpio->lock, flags); + if (bank->gpio_unlock_count[bit] == 0) { + raw_spin_lock_irqsave(&kona_gpio->lock, flags); - val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val &= ~BIT(bit); - bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); + val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); + val &= ~BIT(bit); + bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); - raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + } + + ++bank->gpio_unlock_count[bit]; } static int bcm_kona_gpio_get_dir(struct gpio_chip *chip, unsigned gpio) @@ -362,6 +392,7 @@ static void bcm_kona_gpio_irq_mask(struct irq_data *d) kona_gpio = irq_data_get_irq_chip_data(d); reg_base = kona_gpio->reg_base; + raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(reg_base + GPIO_INT_MASK(bank_id)); @@ -384,6 +415,7 @@ static void bcm_kona_gpio_irq_unmask(struct irq_data *d) kona_gpio = irq_data_get_irq_chip_data(d); reg_base = kona_gpio->reg_base; + raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(reg_base + GPIO_INT_MSKCLR(bank_id)); @@ -479,15 +511,26 @@ static void bcm_kona_gpio_irq_handler(struct irq_desc *desc) static int bcm_kona_gpio_irq_reqres(struct irq_data *d) { struct bcm_kona_gpio *kona_gpio = irq_data_get_irq_chip_data(d); + unsigned int gpio = d->hwirq; + + /* + * We need to unlock the GPIO before any other operations are performed + * on the relevant GPIO configuration registers + */ + bcm_kona_gpio_unlock_gpio(kona_gpio, gpio); - return gpiochip_reqres_irq(&kona_gpio->gpio_chip, d->hwirq); + return gpiochip_reqres_irq(&kona_gpio->gpio_chip, gpio); } static void bcm_kona_gpio_irq_relres(struct irq_data *d) { struct bcm_kona_gpio *kona_gpio = irq_data_get_irq_chip_data(d); + unsigned int gpio = d->hwirq; + + /* Once we no longer use it, lock the GPIO again */ + bcm_kona_gpio_lock_gpio(kona_gpio, gpio); - gpiochip_relres_irq(&kona_gpio->gpio_chip, d->hwirq); + gpiochip_relres_irq(&kona_gpio->gpio_chip, gpio); } static struct irq_chip bcm_gpio_irq_chip = { -- GitLab From 615279db222c3ac56d5c93716efd72b843295c1f Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Thu, 6 Feb 2025 18:46:02 +0100 Subject: [PATCH 0474/1585] gpio: bcm-kona: Add missing newline to dev_err format string Add a missing newline to the format string of the "Couldn't get IRQ for bank..." error message. Fixes: 757651e3d60e ("gpio: bcm281xx: Add GPIO driver") Reviewed-by: Florian Fainelli Reviewed-by: Markus Mayer Signed-off-by: Artur Weber Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250206-kona-gpio-fixes-v2-3-409135eab780@gmail.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-bcm-kona.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 17f3f210fee9d..64908f1a5e7f9 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -659,7 +659,7 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev) bank->irq = platform_get_irq(pdev, i); bank->kona_gpio = kona_gpio; if (bank->irq < 0) { - dev_err(dev, "Couldn't get IRQ for bank %d", i); + dev_err(dev, "Couldn't get IRQ for bank %d\n", i); ret = -ENOENT; goto err_irq_domain; } -- GitLab From 7b07b040257c1b658ef3eca86e4b6ae02d65069c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 7 Feb 2025 10:39:02 +0100 Subject: [PATCH 0475/1585] ptp: vmclock: Add .owner to vmclock_miscdev_fops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without the .owner field, the module can be unloaded while /dev/vmclock0 is open, leading to an oops. Fixes: 205032724226 ("ptp: Add support for the AMZNC10C 'vmclock' device") Cc: stable@vger.kernel.org Signed-off-by: David Woodhouse Signed-off-by: Thomas Weißschuh Signed-off-by: Paolo Abeni --- drivers/ptp/ptp_vmclock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 0a2cfc8ad3c54..dbc73e5382935 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -414,6 +414,7 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, } static const struct file_operations vmclock_miscdev_fops = { + .owner = THIS_MODULE, .mmap = vmclock_miscdev_mmap, .read = vmclock_miscdev_read, }; -- GitLab From f7d07cd4f77d77f366c8ffbb8ba8b61f614e5fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 7 Feb 2025 10:39:03 +0100 Subject: [PATCH 0476/1585] ptp: vmclock: Set driver data before its usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If vmclock_ptp_register() fails during probing, vmclock_remove() is called to clean up the ptp clock and misc device. It uses dev_get_drvdata() to access the vmclock state. However the driver data is not yet set at this point. Assign the driver data earlier. Fixes: 205032724226 ("ptp: Add support for the AMZNC10C 'vmclock' device") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Reviewed-by: Mateusz Polchlopek Acked-by: Richard Cochran Reviewed-by: David Woodhouse Signed-off-by: Paolo Abeni --- drivers/ptp/ptp_vmclock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index dbc73e5382935..1ba30a2da570f 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -525,6 +525,8 @@ static int vmclock_probe(struct platform_device *pdev) goto out; } + dev_set_drvdata(dev, st); + if (le32_to_cpu(st->clk->magic) != VMCLOCK_MAGIC || le32_to_cpu(st->clk->size) > resource_size(&st->res) || le16_to_cpu(st->clk->version) != 1) { @@ -588,8 +590,6 @@ static int vmclock_probe(struct platform_device *pdev) (st->miscdev.minor && st->ptp_clock) ? ", " : "", st->ptp_clock ? "PTP" : ""); - dev_set_drvdata(dev, st); - out: return ret; } -- GitLab From 39e926c3a21b25af6cae479fbb752f193240ce03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 7 Feb 2025 10:39:04 +0100 Subject: [PATCH 0477/1585] ptp: vmclock: Don't unregister misc device if it was not registered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmclock_remove() tries to detect the successful registration of the misc device based on the value of its minor value. However that check is incorrect if the misc device registration was not attempted in the first place. Always initialize the minor number, so the check works properly. Fixes: 205032724226 ("ptp: Add support for the AMZNC10C 'vmclock' device") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Acked-by: Richard Cochran Reviewed-by: David Woodhouse Signed-off-by: Paolo Abeni --- drivers/ptp/ptp_vmclock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 1ba30a2da570f..9b8bd626a3973 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -550,6 +550,8 @@ static int vmclock_probe(struct platform_device *pdev) goto out; } + st->miscdev.minor = MISC_DYNAMIC_MINOR; + /* * If the structure is big enough, it can be mapped to userspace. * Theoretically a guest OS even using larger pages could still @@ -557,7 +559,6 @@ static int vmclock_probe(struct platform_device *pdev) * cross that bridge if/when we come to it. */ if (le32_to_cpu(st->clk->size) >= PAGE_SIZE) { - st->miscdev.minor = MISC_DYNAMIC_MINOR; st->miscdev.fops = &vmclock_miscdev_fops; st->miscdev.name = st->name; -- GitLab From 9a884c3800b207bac36e27be4ec7277c78a84568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 7 Feb 2025 10:39:05 +0100 Subject: [PATCH 0478/1585] ptp: vmclock: Clean up miscdev and ptp clock through devres MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most resources owned by the vmclock device are managed through devres. Only the miscdev and ptp clock are managed manually. This makes the code slightly harder to understand than necessary. Switch them over to devres and remove the now unnecessary drvdata. Signed-off-by: Thomas Weißschuh Acked-by: Richard Cochran Reviewed-by: David Woodhouse Signed-off-by: Paolo Abeni --- drivers/ptp/ptp_vmclock.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 9b8bd626a3973..09c023fb94b7e 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -421,10 +421,9 @@ static const struct file_operations vmclock_miscdev_fops = { /* module operations */ -static void vmclock_remove(struct platform_device *pdev) +static void vmclock_remove(void *data) { - struct device *dev = &pdev->dev; - struct vmclock_state *st = dev_get_drvdata(dev); + struct vmclock_state *st = data; if (st->ptp_clock) ptp_clock_unregister(st->ptp_clock); @@ -525,8 +524,6 @@ static int vmclock_probe(struct platform_device *pdev) goto out; } - dev_set_drvdata(dev, st); - if (le32_to_cpu(st->clk->magic) != VMCLOCK_MAGIC || le32_to_cpu(st->clk->size) > resource_size(&st->res) || le16_to_cpu(st->clk->version) != 1) { @@ -552,6 +549,10 @@ static int vmclock_probe(struct platform_device *pdev) st->miscdev.minor = MISC_DYNAMIC_MINOR; + ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, st); + if (ret) + goto out; + /* * If the structure is big enough, it can be mapped to userspace. * Theoretically a guest OS even using larger pages could still @@ -574,7 +575,6 @@ static int vmclock_probe(struct platform_device *pdev) if (IS_ERR(st->ptp_clock)) { ret = PTR_ERR(st->ptp_clock); st->ptp_clock = NULL; - vmclock_remove(pdev); goto out; } } @@ -603,7 +603,6 @@ MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); static struct platform_driver vmclock_platform_driver = { .probe = vmclock_probe, - .remove = vmclock_remove, .driver = { .name = "vmclock", .acpi_match_table = vmclock_acpi_ids, -- GitLab From b4c1fde5ced93d9f4ad89e2c940d3fd56ad82288 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 7 Feb 2025 10:39:06 +0100 Subject: [PATCH 0479/1585] ptp: vmclock: Remove goto-based cleanup logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmclock_probe() uses an "out:" label to return from the function on error. This indicates that some cleanup operation is necessary. However the label does not do anything as all resources are managed through devres, making the code slightly harder to read. Remove the label and just return directly. Signed-off-by: Thomas Weißschuh Acked-by: Richard Cochran Reviewed-by: David Woodhouse Signed-off-by: Paolo Abeni --- drivers/ptp/ptp_vmclock.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 09c023fb94b7e..b3a83b03d9c14 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -506,14 +506,13 @@ static int vmclock_probe(struct platform_device *pdev) if (ret) { dev_info(dev, "Failed to obtain physical address: %d\n", ret); - goto out; + return ret; } if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) { dev_info(dev, "Region too small (0x%llx)\n", resource_size(&st->res)); - ret = -EINVAL; - goto out; + return -EINVAL; } st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res), MEMREMAP_WB | MEMREMAP_DEC); @@ -521,37 +520,34 @@ static int vmclock_probe(struct platform_device *pdev) ret = PTR_ERR(st->clk); dev_info(dev, "failed to map shared memory\n"); st->clk = NULL; - goto out; + return ret; } if (le32_to_cpu(st->clk->magic) != VMCLOCK_MAGIC || le32_to_cpu(st->clk->size) > resource_size(&st->res) || le16_to_cpu(st->clk->version) != 1) { dev_info(dev, "vmclock magic fields invalid\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } ret = ida_alloc(&vmclock_ida, GFP_KERNEL); if (ret < 0) - goto out; + return ret; st->index = ret; ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st); if (ret) - goto out; + return ret; st->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "vmclock%d", st->index); - if (!st->name) { - ret = -ENOMEM; - goto out; - } + if (!st->name) + return -ENOMEM; st->miscdev.minor = MISC_DYNAMIC_MINOR; ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, st); if (ret) - goto out; + return ret; /* * If the structure is big enough, it can be mapped to userspace. @@ -565,7 +561,7 @@ static int vmclock_probe(struct platform_device *pdev) ret = misc_register(&st->miscdev); if (ret) - goto out; + return ret; } /* If there is valid clock information, register a PTP clock */ @@ -575,15 +571,14 @@ static int vmclock_probe(struct platform_device *pdev) if (IS_ERR(st->ptp_clock)) { ret = PTR_ERR(st->ptp_clock); st->ptp_clock = NULL; - goto out; + return ret; } } if (!st->miscdev.minor && !st->ptp_clock) { /* Neither miscdev nor PTP registered */ dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registering\n"); - ret = -ENODEV; - goto out; + return -ENODEV; } dev_info(dev, "%s: registered %s%s%s\n", st->name, @@ -591,8 +586,7 @@ static int vmclock_probe(struct platform_device *pdev) (st->miscdev.minor && st->ptp_clock) ? ", " : "", st->ptp_clock ? "PTP" : ""); - out: - return ret; + return 0; } static const struct acpi_device_id vmclock_acpi_ids[] = { -- GitLab From 49f27f29446a5bfe633dd2cc0cfebd48a1a5e77f Mon Sep 17 00:00:00 2001 From: Vitaliy Shevtsov Date: Fri, 31 Jan 2025 20:26:55 +0500 Subject: [PATCH 0480/1585] wifi: nl80211: reject cooked mode if it is set along with other flags It is possible to set both MONITOR_FLAG_COOK_FRAMES and MONITOR_FLAG_ACTIVE flags simultaneously on the same monitor interface from the userspace. This causes a sub-interface to be created with no IEEE80211_SDATA_IN_DRIVER bit set because the monitor interface is in the cooked state and it takes precedence over all other states. When the interface is then being deleted the kernel calls WARN_ONCE() from check_sdata_in_driver() because of missing that bit. Fix this by rejecting MONITOR_FLAG_COOK_FRAMES if it is set along with other flags. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 66f7ac50ed7c ("nl80211: Add monitor interface configuration flags") Cc: stable@vger.kernel.org Reported-by: syzbot+2e5c1e55b9e5c28a3da7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2e5c1e55b9e5c28a3da7 Signed-off-by: Vitaliy Shevtsov Link: https://patch.msgid.link/20250131152657.5606-1-v.shevtsov@mt-integration.ru Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d7d3da0f6833d..fdb2aac951d18 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4220,6 +4220,11 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) if (flags[flag]) *mntrflags |= (1< Date: Tue, 4 Feb 2025 13:31:29 +0100 Subject: [PATCH 0481/1585] wifi: mac80211: Cleanup sta TXQs on flush Drop the sta TXQs on flush when the drivers is not supporting flush. ieee80211_set_disassoc() tries to clean up everything for the sta. But it ignored queued frames in the sta TX queues when the driver isn't supporting the flush driver ops. Signed-off-by: Alexander Wetzel Link: https://patch.msgid.link/20250204123129.9162-1-Alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/mac80211/util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f6b631faf4f7f..7f02bd5891eb9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -687,7 +687,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, unsigned int queues, bool drop) { - if (!local->ops->flush) + if (!local->ops->flush && !drop) return; /* @@ -714,7 +714,8 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, } } - drv_flush(local, sdata, queues, drop); + if (local->ops->flush) + drv_flush(local, sdata, queues, drop); ieee80211_wake_queues_by_reason(&local->hw, queues, IEEE80211_QUEUE_STOP_REASON_FLUSH, -- GitLab From 646262c71aca87bb66945933abe4e620796d6c5a Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Tue, 4 Feb 2025 17:42:40 +0100 Subject: [PATCH 0482/1585] wifi: mac80211: remove debugfs dir for virtual monitor Don't call ieee80211_debugfs_recreate_netdev() for virtual monitor interface when deleting it. The virtual monitor interface shouldn't have debugfs entries and trying to update them will *create* them on deletion. And when the virtual monitor interface is created/destroyed multiple times we'll get warnings about debugfs name conflicts. Signed-off-by: Alexander Wetzel Link: https://patch.msgid.link/20250204164240.370153-1-Alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 10 ++++++++-- net/mac80211/iface.c | 11 ++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 299d38e9e8630..2fc60e1e77a55 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -116,8 +116,14 @@ void drv_remove_interface(struct ieee80211_local *local, sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; - /* Remove driver debugfs entries */ - ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links); + /* + * Remove driver debugfs entries. + * The virtual monitor interface doesn't get a debugfs + * entry, so it's exempt here. + */ + if (sdata != local->monitor_sdata) + ieee80211_debugfs_recreate_netdev(sdata, + sdata->vif.valid_links); trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 0ea7e77860b73..738de269e13f0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1206,16 +1206,17 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) return; } - RCU_INIT_POINTER(local->monitor_sdata, NULL); - mutex_unlock(&local->iflist_mtx); - - synchronize_net(); - + clear_bit(SDATA_STATE_RUNNING, &sdata->state); ieee80211_link_release_channel(&sdata->deflink); if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) drv_remove_interface(local, sdata); + RCU_INIT_POINTER(local->monitor_sdata, NULL); + mutex_unlock(&local->iflist_mtx); + + synchronize_net(); + kfree(sdata); } -- GitLab From 7774e3920029398ad49dc848b23840593f14d515 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 9 Feb 2025 14:34:45 +0200 Subject: [PATCH 0483/1585] wifi: iwlwifi: fw: allocate chained SG tables for dump The firmware dumps can be pretty big, and since we use single pages for each SG table entry, even the table itself may end up being an order-5 allocation. Build chained tables so that we need not allocate a higher-order table here. This could be improved and cleaned up, e.g. by using the SG pool code or simply kvmalloc(), but all of that would require also updating the devcoredump first since that frees it all, so we need to be more careful. SG pool might also run against the CONFIG_ARCH_NO_SG_CHAIN limitation, which is irrelevant here. Also use _devcd_free_sgtable() for the error paths now, much simpler especially since it's in two places now. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.697c7a465ac9.Iea982df46b5c075bfb77ade36f187d99a70c63db@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 86 ++++++++++++++------- 1 file changed, 58 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index fb2ea38e89aca..6594216f873c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -558,41 +558,71 @@ static void iwl_dump_prph(struct iwl_fw_runtime *fwrt, } /* - * alloc_sgtable - allocates scallerlist table in the given size, - * fills it with pages and returns it + * alloc_sgtable - allocates (chained) scatterlist in the given size, + * fills it with pages and returns it * @size: the size (in bytes) of the table -*/ -static struct scatterlist *alloc_sgtable(int size) + */ +static struct scatterlist *alloc_sgtable(ssize_t size) { - int alloc_size, nents, i; - struct page *new_page; - struct scatterlist *iter; - struct scatterlist *table; + struct scatterlist *result = NULL, *prev; + int nents, i, n_prev; nents = DIV_ROUND_UP(size, PAGE_SIZE); - table = kcalloc(nents, sizeof(*table), GFP_KERNEL); - if (!table) - return NULL; - sg_init_table(table, nents); - iter = table; - for_each_sg(table, iter, sg_nents(table), i) { - new_page = alloc_page(GFP_KERNEL); - if (!new_page) { - /* release all previous allocated pages in the table */ - iter = table; - for_each_sg(table, iter, sg_nents(table), i) { - new_page = sg_page(iter); - if (new_page) - __free_page(new_page); - } - kfree(table); + +#define N_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(*result)) + /* + * We need an additional entry for table chaining, + * this ensures the loop can finish i.e. we can + * fit at least two entries per page (obviously, + * many more really fit.) + */ + BUILD_BUG_ON(N_ENTRIES_PER_PAGE < 2); + + while (nents > 0) { + struct scatterlist *new, *iter; + int n_fill, n_alloc; + + if (nents <= N_ENTRIES_PER_PAGE) { + /* last needed table */ + n_fill = nents; + n_alloc = nents; + nents = 0; + } else { + /* fill a page with entries */ + n_alloc = N_ENTRIES_PER_PAGE; + /* reserve one for chaining */ + n_fill = n_alloc - 1; + nents -= n_fill; + } + + new = kcalloc(n_alloc, sizeof(*new), GFP_KERNEL); + if (!new) { + if (result) + _devcd_free_sgtable(result); return NULL; } - alloc_size = min_t(int, size, PAGE_SIZE); - size -= PAGE_SIZE; - sg_set_page(iter, new_page, alloc_size, 0); + sg_init_table(new, n_alloc); + + if (!result) + result = new; + else + sg_chain(prev, n_prev, new); + prev = new; + n_prev = n_alloc; + + for_each_sg(new, iter, n_fill, i) { + struct page *new_page = alloc_page(GFP_KERNEL); + + if (!new_page) { + _devcd_free_sgtable(result); + return NULL; + } + + sg_set_page(iter, new_page, PAGE_SIZE, 0); + } } - return table; + + return result; } static void iwl_fw_get_prph_len(struct iwl_fw_runtime *fwrt, -- GitLab From 3f8aa0b8a53df2247a84eaf3b3aa38b6ef86cb1c Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 9 Feb 2025 14:34:46 +0200 Subject: [PATCH 0484/1585] wifi: iwlwifi: fw: avoid using an uninitialized variable iwl_fwrt_read_err_table can return true also when it failed to read the memory. In this case, err_id argument is not initialized, but the callers are still using it. Simply initialize it to 0. If the error table was read successfully it'll be overridden. Fixes: 43e0b2ada519 ("wifi: iwlwifi: fw: add an error table status getter") Signed-off-by: Miri Korenblit Reviewed-by: Emmanuel Grumbach Link: https://patch.msgid.link/20250209143303.37cdbba4eb56.I95fe9bd95303b8179f946766558a9f15f4fe254c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/dump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dump.c b/drivers/net/wireless/intel/iwlwifi/fw/dump.c index 8e0c85a1240d7..c7b261c8ec969 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dump.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dump.c @@ -540,6 +540,9 @@ bool iwl_fwrt_read_err_table(struct iwl_trans *trans, u32 base, u32 *err_id) } err_info = {}; int ret; + if (err_id) + *err_id = 0; + if (!base) return false; -- GitLab From f9751163bffd3fe60794929829f810968c6de73d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 9 Feb 2025 14:34:47 +0200 Subject: [PATCH 0485/1585] wifi: iwlwifi: mvm: clean up ROC on failure If the firmware fails to start the session protection, then we do call iwl_mvm_roc_finished() here, but that won't do anything at all because IWL_MVM_STATUS_ROC_P2P_RUNNING was never set. Set IWL_MVM_STATUS_ROC_P2P_RUNNING in the failure/stop path. If it started successfully before, it's already set, so that doesn't matter, and if it didn't start it needs to be set to clean up. Not doing so will lead to a WARN_ON() later on a fresh remain- on-channel, since the link is already active when activated as it was never deactivated. Fixes: 35c1bbd93c4e ("wifi: iwlwifi: mvm: remove IWL_MVM_STATUS_NEED_FLUSH_P2P") Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.0fe36c291068.I67f5dac742170dd937f11e4d4f937f45f71b7cb4@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 9216c43a35c4d..ebfa88b38b71b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -1030,6 +1030,8 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm, /* End TE, notify mac80211 */ mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID; mvmvif->time_event_data.link_id = -1; + /* set the bit so the ROC cleanup will actually clean up */ + set_bit(IWL_MVM_STATUS_ROC_P2P_RUNNING, &mvm->status); iwl_mvm_roc_finished(mvm); ieee80211_remain_on_channel_expired(mvm->hw); } else if (le32_to_cpu(notif->start)) { -- GitLab From d48ff3ce92259bae7e77732c7cfd7cbc7992c021 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 9 Feb 2025 14:34:48 +0200 Subject: [PATCH 0486/1585] wifi: iwlwifi: mvm: don't dump the firmware state upon RFKILL while suspend This is not really a firmware error. We need to reload the firmware, but this doesn't mean that we should consider this as a firmware error. When the firmware was restarted upon resume, this wasn't felt by the driver. Now that we keep the firmware running during suspend even if we don't have wowlan, this started to pop-up. Fixes: e8bb19c1d590 ("wifi: iwlwifi: support fast resume") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.a10463a40318.I14131781c3124b58e60e1f5e9d793a2bc88b464c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 77 ++++++++++++++------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 129b6bdf9ef90..82ca7f8b1bb27 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -3092,8 +3092,14 @@ static void iwl_mvm_d3_disconnect_iter(void *data, u8 *mac, ieee80211_resume_disconnect(vif); } -static bool iwl_mvm_check_rt_status(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) +enum rt_status { + FW_ALIVE, + FW_NEEDS_RESET, + FW_ERROR, +}; + +static enum rt_status iwl_mvm_check_rt_status(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { u32 err_id; @@ -3101,29 +3107,35 @@ static bool iwl_mvm_check_rt_status(struct iwl_mvm *mvm, if (iwl_fwrt_read_err_table(mvm->trans, mvm->trans->dbg.lmac_error_event_table[0], &err_id)) { - if (err_id == RF_KILL_INDICATOR_FOR_WOWLAN && vif) { - struct cfg80211_wowlan_wakeup wakeup = { - .rfkill_release = true, - }; - ieee80211_report_wowlan_wakeup(vif, &wakeup, - GFP_KERNEL); + if (err_id == RF_KILL_INDICATOR_FOR_WOWLAN) { + IWL_WARN(mvm, "Rfkill was toggled during suspend\n"); + if (vif) { + struct cfg80211_wowlan_wakeup wakeup = { + .rfkill_release = true, + }; + + ieee80211_report_wowlan_wakeup(vif, &wakeup, + GFP_KERNEL); + } + + return FW_NEEDS_RESET; } - return true; + return FW_ERROR; } /* check if we have lmac2 set and check for error */ if (iwl_fwrt_read_err_table(mvm->trans, mvm->trans->dbg.lmac_error_event_table[1], NULL)) - return true; + return FW_ERROR; /* check for umac error */ if (iwl_fwrt_read_err_table(mvm->trans, mvm->trans->dbg.umac_error_event_table, NULL)) - return true; + return FW_ERROR; - return false; + return FW_ALIVE; } /* @@ -3492,6 +3504,7 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) bool d0i3_first = fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_D0I3_END_FIRST); bool resume_notif_based = iwl_mvm_d3_resume_notif_based(mvm); + enum rt_status rt_status; bool keep = false; mutex_lock(&mvm->mutex); @@ -3515,14 +3528,19 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) iwl_fw_dbg_read_d3_debug_data(&mvm->fwrt); - if (iwl_mvm_check_rt_status(mvm, vif)) { - IWL_ERR(mvm, "FW Error occurred during suspend. Restarting.\n"); + rt_status = iwl_mvm_check_rt_status(mvm, vif); + if (rt_status != FW_ALIVE) { set_bit(STATUS_FW_ERROR, &mvm->trans->status); - iwl_mvm_dump_nic_error_log(mvm); - iwl_dbg_tlv_time_point(&mvm->fwrt, - IWL_FW_INI_TIME_POINT_FW_ASSERT, NULL); - iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert, - false, 0); + if (rt_status == FW_ERROR) { + IWL_ERR(mvm, "FW Error occurred during suspend. Restarting.\n"); + iwl_mvm_dump_nic_error_log(mvm); + iwl_dbg_tlv_time_point(&mvm->fwrt, + IWL_FW_INI_TIME_POINT_FW_ASSERT, + NULL); + iwl_fw_dbg_collect_desc(&mvm->fwrt, + &iwl_dump_desc_assert, + false, 0); + } ret = 1; goto err; } @@ -3679,6 +3697,7 @@ int iwl_mvm_fast_resume(struct iwl_mvm *mvm) .notif_expected = IWL_D3_NOTIF_D3_END_NOTIF, }; + enum rt_status rt_status; int ret; lockdep_assert_held(&mvm->mutex); @@ -3688,14 +3707,20 @@ int iwl_mvm_fast_resume(struct iwl_mvm *mvm) mvm->last_reset_or_resume_time_jiffies = jiffies; iwl_fw_dbg_read_d3_debug_data(&mvm->fwrt); - if (iwl_mvm_check_rt_status(mvm, NULL)) { - IWL_ERR(mvm, "FW Error occurred during suspend. Restarting.\n"); + rt_status = iwl_mvm_check_rt_status(mvm, NULL); + if (rt_status != FW_ALIVE) { set_bit(STATUS_FW_ERROR, &mvm->trans->status); - iwl_mvm_dump_nic_error_log(mvm); - iwl_dbg_tlv_time_point(&mvm->fwrt, - IWL_FW_INI_TIME_POINT_FW_ASSERT, NULL); - iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert, - false, 0); + if (rt_status == FW_ERROR) { + IWL_ERR(mvm, + "iwl_mvm_check_rt_status failed, device is gone during suspend\n"); + iwl_mvm_dump_nic_error_log(mvm); + iwl_dbg_tlv_time_point(&mvm->fwrt, + IWL_FW_INI_TIME_POINT_FW_ASSERT, + NULL); + iwl_fw_dbg_collect_desc(&mvm->fwrt, + &iwl_dump_desc_assert, + false, 0); + } mvm->trans->state = IWL_TRANS_NO_FW; ret = -ENODEV; -- GitLab From d73d2c6e3313f0ba60711ab4f4b9044eddca9ca5 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 9 Feb 2025 14:34:49 +0200 Subject: [PATCH 0487/1585] wifi: iwlwifi: mvm: don't try to talk to a dead firmware This fixes: bad state = 0 WARNING: CPU: 10 PID: 702 at drivers/net/wireless/inel/iwlwifi/iwl-trans.c:178 iwl_trans_send_cmd+0xba/0xe0 [iwlwifi] Call Trace: ? __warn+0xca/0x1c0 ? iwl_trans_send_cmd+0xba/0xe0 [iwlwifi 64fa9ad799a0e0d2ba53d4af93a53ad9a531f8d4] iwl_fw_dbg_clear_monitor_buf+0xd7/0x110 [iwlwifi 64fa9ad799a0e0d2ba53d4af93a53ad9a531f8d4] _iwl_dbgfs_fw_dbg_clear_write+0xe2/0x120 [iwlmvm 0e8adb18cea92d2c341766bcc10b18699290068a] Ask whether the firmware is alive before sending a command. Fixes: 268712dc3b34 ("wifi: iwlwifi: mvm: add a debugfs hook to clear the monitor data") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.8e1597b62c70.I12ea71dd9b805b095c9fc12a10c9f34a4e801b61@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 83e3c11603622..55d035b896e91 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1479,6 +1479,13 @@ static ssize_t iwl_dbgfs_fw_dbg_clear_write(struct iwl_mvm *mvm, if (mvm->trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_9000) return -EOPNOTSUPP; + /* + * If the firmware is not running, silently succeed since there is + * no data to clear. + */ + if (!iwl_mvm_firmware_running(mvm)) + return count; + mutex_lock(&mvm->mutex); iwl_fw_dbg_clear_monitor_buf(&mvm->fwrt); mutex_unlock(&mvm->mutex); -- GitLab From a03e2082e678ea10d0d8bdf3ed933eb05a8ddbb0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 9 Feb 2025 14:34:50 +0200 Subject: [PATCH 0488/1585] wifi: iwlwifi: mvm: use the right version of the rate API The firmware uses the newer version of the API in recent devices. For older devices, we translate the rate to the new format. Don't parse the rate with old parsing macros. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.13d70cdcbb4e.Ic92193bce4013b70a823cfef250ee79c16cf7c17@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 09fd8752046ee..14ea89f931bbf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -995,7 +995,7 @@ iwl_mvm_decode_he_phy_ru_alloc(struct iwl_mvm_rx_phy_data *phy_data, */ u8 ru = le32_get_bits(phy_data->d1, IWL_RX_PHY_DATA1_HE_RU_ALLOC_MASK); u32 rate_n_flags = phy_data->rate_n_flags; - u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK_V1; + u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK; u8 offs = 0; rx_status->bw = RATE_INFO_BW_HE_RU; @@ -1050,13 +1050,13 @@ iwl_mvm_decode_he_phy_ru_alloc(struct iwl_mvm_rx_phy_data *phy_data, if (he_mu) he_mu->flags2 |= - le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK_V1, + le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK, rate_n_flags), IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW); - else if (he_type == RATE_MCS_HE_TYPE_TRIG_V1) + else if (he_type == RATE_MCS_HE_TYPE_TRIG) he->data6 |= cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_KNOWN) | - le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK_V1, + le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK, rate_n_flags), IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW); } -- GitLab From e0dc2c1bef722cbf16ae557690861e5f91208129 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 9 Feb 2025 14:34:51 +0200 Subject: [PATCH 0489/1585] wifi: iwlwifi: limit printed string from FW file There's no guarantee here that the file is always with a NUL-termination, so reading the string may read beyond the end of the TLV. If that's the last TLV in the file, it can perhaps even read beyond the end of the file buffer. Fix that by limiting the print format to the size of the buffer we have. Fixes: aee1b6385e29 ("iwlwifi: support fseq tlv and print fseq version") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.cb5f9d0c2f5d.Idec695d53c6c2234aade306f7647b576c7e3d928@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index d3a65f33097cb..352b6e73e08f3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1181,7 +1181,7 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, if (tlv_len != sizeof(*fseq_ver)) goto invalid_tlv_len; - IWL_INFO(drv, "TLV_FW_FSEQ_VERSION: %s\n", + IWL_INFO(drv, "TLV_FW_FSEQ_VERSION: %.32s\n", fseq_ver->version); } break; -- GitLab From 3b08e608d50c44ca1135beed179f266aa0461da7 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 9 Feb 2025 14:34:52 +0200 Subject: [PATCH 0490/1585] wifi: iwlwifi: Free pages allocated when failing to build A-MSDU When failing to prepare the data needed for A-MSDU transmission, the memory allocated for the TSO management was not freed. Fix it. Fixes: 7f5e3038f029 ("wifi: iwlwifi: map entire SKB when sending AMSDUs") Signed-off-by: Ilan Peer Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.bc27fad9b3d5.Ibf43dd18fb652b1a59061204e081f11c9fa34a3f@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 1f483f15c2383..dce5096db82b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -347,6 +347,7 @@ iwl_tfh_tfd *iwl_txq_gen2_build_tx_amsdu(struct iwl_trans *trans, return tfd; out_err: + iwl_pcie_free_tso_pages(trans, skb, out_meta); iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); return NULL; } -- GitLab From 3640dbc1f75ce15d128ea4af44226960d894f3fd Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 9 Feb 2025 14:34:53 +0200 Subject: [PATCH 0491/1585] wifi: iwlwifi: Fix A-MSDU TSO preparation The TSO preparation assumed that the skb head contained the headers while the rest of the data was in the fragments. Since this is not always true, e.g., it is possible that the data was linearised, modify the TSO preparation to start the data processing after the network headers. Fixes: 7f5e3038f029 ("wifi: iwlwifi: map entire SKB when sending AMSDUs") Signed-off-by: Ilan Peer Reviewed-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250209143303.75769a4769bf.Iaf79e8538093cdf8c446c292cc96164ad6498f61@changeid Signed-off-by: Johannes Berg --- .../wireless/intel/iwlwifi/pcie/internal.h | 5 +++-- .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 5 +++-- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 20 +++++++++++-------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 856b7e9f717d5..45460f93d24ad 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2003-2015, 2018-2024 Intel Corporation + * Copyright (C) 2003-2015, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -646,7 +646,8 @@ dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, unsigned int len); struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, - u8 **hdr, unsigned int hdr_room); + u8 **hdr, unsigned int hdr_room, + unsigned int offset); void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index dce5096db82b6..401919f9fe88e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020, 2023-2024 Intel Corporation + * Copyright (C) 2018-2020, 2023-2025 Intel Corporation */ #include #include @@ -188,7 +188,8 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); /* Our device supports 9 segments at most, it will fit in 1 page */ - sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room); + sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room, + snap_ip_tcp_hdrlen + hdr_len); if (!sgt) return -ENOMEM; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 334ebd4c12fa7..7b6071a59b694 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2003-2014, 2018-2021, 2023-2024 Intel Corporation + * Copyright (C) 2003-2014, 2018-2021, 2023-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -1855,6 +1855,7 @@ dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, * @cmd_meta: command meta to store the scatter list information for unmapping * @hdr: output argument for TSO headers * @hdr_room: requested length for TSO headers + * @offset: offset into the data from which mapping should start * * Allocate space for a scatter gather list and TSO headers and map the SKB * using the scatter gather list. The SKB is unmapped again when the page is @@ -1864,18 +1865,20 @@ dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, */ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, - u8 **hdr, unsigned int hdr_room) + u8 **hdr, unsigned int hdr_room, + unsigned int offset) { struct sg_table *sgt; + unsigned int n_segments; if (WARN_ON_ONCE(skb_has_frag_list(skb))) return NULL; + n_segments = DIV_ROUND_UP(skb->len - offset, skb_shinfo(skb)->gso_size); *hdr = iwl_pcie_get_page_hdr(trans, hdr_room + __alignof__(struct sg_table) + sizeof(struct sg_table) + - (skb_shinfo(skb)->nr_frags + 1) * - sizeof(struct scatterlist), + n_segments * sizeof(struct scatterlist), skb); if (!*hdr) return NULL; @@ -1883,11 +1886,11 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, sgt = (void *)PTR_ALIGN(*hdr + hdr_room, __alignof__(struct sg_table)); sgt->sgl = (void *)(sgt + 1); - sg_init_table(sgt->sgl, skb_shinfo(skb)->nr_frags + 1); + sg_init_table(sgt->sgl, n_segments); /* Only map the data, not the header (it is copied to the TSO page) */ - sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, skb_headlen(skb), - skb->data_len); + sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, offset, + skb->len - offset); if (WARN_ON_ONCE(sgt->orig_nents <= 0)) return NULL; @@ -1939,7 +1942,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len; /* Our device supports 9 segments at most, it will fit in 1 page */ - sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room); + sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room, + snap_ip_tcp_hdrlen + hdr_len + iv_len); if (!sgt) return -ENOMEM; -- GitLab From 86ede0a61f8576a84bb0a93c5d9861d2ec1cdf9a Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Sun, 9 Feb 2025 15:09:38 +0100 Subject: [PATCH 0492/1585] mtd: rawnand: qcom: fix broken config in qcom_param_page_type_exec Fix broken config in qcom_param_page_type_exec caused by copy-paste error from commit 0c08080fd71c ("mtd: rawnand: qcom: use FIELD_PREP and GENMASK") In qcom_param_page_type_exec the value needs to be set to nandc->regs->cfg0 instead of host->cfg0. This wrong configuration caused the Qcom NANDC driver to malfunction on any device that makes use of it (IPQ806x, IPQ40xx, IPQ807x, IPQ60xx) with the following error: [ 0.885369] nand: device found, Manufacturer ID: 0x2c, Chip ID: 0xaa [ 0.885909] nand: Micron NAND 256MiB 1,8V 8-bit [ 0.892499] nand: 256 MiB, SLC, erase size: 128 KiB, page size: 2048, OOB size: 64 [ 0.896823] nand: ECC (step, strength) = (512, 8) does not fit in OOB [ 0.896836] qcom-nandc 79b0000.nand-controller: No valid ECC settings possible [ 0.910996] bam-dma-engine 7984000.dma-controller: Cannot free busy channel [ 0.918070] qcom-nandc: probe of 79b0000.nand-controller failed with error -28 Restore original configuration fix the problem and makes the driver work again. Also restore the wrongly dropped cpu_to_le32 to correctly support BE systems. Cc: stable@vger.kernel.org Fixes: 0c08080fd71c ("mtd: rawnand: qcom: use FIELD_PREP and GENMASK") Tested-by: Robert Marko # IPQ8074 and IPQ6018 Signed-off-by: Christian Marangi Reviewed-by: Manivannan Sadhasivam Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/qcom_nandc.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index d2d2aeee42a7e..6720b547892ba 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -1881,18 +1881,18 @@ static int qcom_param_page_type_exec(struct nand_chip *chip, const struct nand_ nandc->regs->addr0 = 0; nandc->regs->addr1 = 0; - host->cfg0 = FIELD_PREP(CW_PER_PAGE_MASK, 0) | - FIELD_PREP(UD_SIZE_BYTES_MASK, 512) | - FIELD_PREP(NUM_ADDR_CYCLES_MASK, 5) | - FIELD_PREP(SPARE_SIZE_BYTES_MASK, 0); - - host->cfg1 = FIELD_PREP(NAND_RECOVERY_CYCLES_MASK, 7) | - FIELD_PREP(BAD_BLOCK_BYTE_NUM_MASK, 17) | - FIELD_PREP(CS_ACTIVE_BSY, 0) | - FIELD_PREP(BAD_BLOCK_IN_SPARE_AREA, 1) | - FIELD_PREP(WR_RD_BSY_GAP_MASK, 2) | - FIELD_PREP(WIDE_FLASH, 0) | - FIELD_PREP(DEV0_CFG1_ECC_DISABLE, 1); + nandc->regs->cfg0 = cpu_to_le32(FIELD_PREP(CW_PER_PAGE_MASK, 0) | + FIELD_PREP(UD_SIZE_BYTES_MASK, 512) | + FIELD_PREP(NUM_ADDR_CYCLES_MASK, 5) | + FIELD_PREP(SPARE_SIZE_BYTES_MASK, 0)); + + nandc->regs->cfg1 = cpu_to_le32(FIELD_PREP(NAND_RECOVERY_CYCLES_MASK, 7) | + FIELD_PREP(BAD_BLOCK_BYTE_NUM_MASK, 17) | + FIELD_PREP(CS_ACTIVE_BSY, 0) | + FIELD_PREP(BAD_BLOCK_IN_SPARE_AREA, 1) | + FIELD_PREP(WR_RD_BSY_GAP_MASK, 2) | + FIELD_PREP(WIDE_FLASH, 0) | + FIELD_PREP(DEV0_CFG1_ECC_DISABLE, 1)); if (!nandc->props->qpic_version2) nandc->regs->ecc_buf_cfg = cpu_to_le32(ECC_CFG_ECC_DISABLE); -- GitLab From 2afd96a4a0b1d62c7a44227e535b073926d73368 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Tue, 11 Feb 2025 16:39:41 +0800 Subject: [PATCH 0493/1585] ALSA: hda/tas2781: Update tas2781 hda SPI driver Because firmware issue of platform, found spi device is not stable, so add status check before firmware download, and remove some operations which is not must in current stage. Signed-off-by: Baojun Xu Fixes: bb5f86ea50ff ("ALSA: hda/tas2781: Add tas2781 hda SPI driver") Link: https://patch.msgid.link/20250211083941.5574-1-baojun.xu@ti.com Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_spi.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_spi.c b/sound/pci/hda/tas2781_hda_spi.c index a42fa990e7b9e..04db80af53c08 100644 --- a/sound/pci/hda/tas2781_hda_spi.c +++ b/sound/pci/hda/tas2781_hda_spi.c @@ -912,7 +912,7 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) struct tasdevice_priv *tas_priv = context; struct tas2781_hda *tas_hda = dev_get_drvdata(tas_priv->dev); struct hda_codec *codec = tas_priv->codec; - int i, j, ret; + int i, j, ret, val; pm_runtime_get_sync(tas_priv->dev); guard(mutex)(&tas_priv->codec_lock); @@ -981,13 +981,16 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) /* Perform AMP reset before firmware download. */ tas_priv->rcabin.profile_cfg_id = TAS2781_PRE_POST_RESET_CFG; - tasdevice_spi_tuning_switch(tas_priv, 0); tas2781_spi_reset(tas_priv); tas_priv->rcabin.profile_cfg_id = 0; - tasdevice_spi_tuning_switch(tas_priv, 1); tas_priv->fw_state = TASDEVICE_DSP_FW_ALL_OK; - ret = tasdevice_spi_prmg_load(tas_priv, 0); + ret = tasdevice_spi_dev_read(tas_priv, TAS2781_REG_CLK_CONFIG, &val); + if (ret < 0) + goto out; + + if (val == TAS2781_REG_CLK_CONFIG_RESET) + ret = tasdevice_spi_prmg_load(tas_priv, 0); if (ret < 0) { dev_err(tas_priv->dev, "FW download failed = %d\n", ret); goto out; @@ -1001,7 +1004,6 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) * If calibrated data occurs error, dsp will still works with default * calibrated data inside algo. */ - tas_priv->save_calibration(tas_priv); out: if (fmw) @@ -1160,7 +1162,8 @@ static int tas2781_runtime_suspend(struct device *dev) guard(mutex)(&tas_hda->priv->codec_lock); - tasdevice_spi_tuning_switch(tas_hda->priv, 1); + if (tas_hda->priv->playback_started) + tasdevice_spi_tuning_switch(tas_hda->priv, 1); tas_hda->priv->cur_book = -1; tas_hda->priv->cur_conf = -1; @@ -1174,7 +1177,8 @@ static int tas2781_runtime_resume(struct device *dev) guard(mutex)(&tas_hda->priv->codec_lock); - tasdevice_spi_tuning_switch(tas_hda->priv, 0); + if (tas_hda->priv->playback_started) + tasdevice_spi_tuning_switch(tas_hda->priv, 0); return 0; } @@ -1189,12 +1193,9 @@ static int tas2781_system_suspend(struct device *dev) return ret; /* Shutdown chip before system suspend */ - tasdevice_spi_tuning_switch(tas_hda->priv, 1); - tas2781_spi_reset(tas_hda->priv); - /* - * Reset GPIO may be shared, so cannot reset here. - * However beyond this point, amps may be powered down. - */ + if (tas_hda->priv->playback_started) + tasdevice_spi_tuning_switch(tas_hda->priv, 1); + return 0; } -- GitLab From db79e75460fc59b19f9c89d4b068e61cee59f37d Mon Sep 17 00:00:00 2001 From: "Chester A. Unal" Date: Fri, 24 Jan 2025 10:28:00 +0000 Subject: [PATCH 0494/1585] USB: serial: option: add MeiG Smart SLM828 MeiG Smart SLM828 is an LTE-A CAT6 modem with the mPCIe form factor. The "Cls=ff(vend.) Sub=10 Prot=02" and "Cls=ff(vend.) Sub=10 Prot=03" interfaces respond to AT commands. Add these interfaces. The product ID the modem uses is shared across multiple modems. Therefore, add comments to describe which interface is used for which modem. T: Bus=01 Lev=01 Prnt=05 Port=01 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2dee ProdID=4d22 Rev=05.04 S: Manufacturer=MEIG S: Product=LTE-A Module S: SerialNumber=4da7ec42 C: #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=10 Prot=01 Driver=(none) E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=10 Prot=02 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=10 Prot=03 Driver=(none) E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=10 Prot=04 Driver=(none) E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=88(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=10 Prot=05 Driver=qmi_wwan E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Chester A. Unal Link: https://lore.kernel.org/20250124-for-johan-meig-slm828-v2-1-6b4cd3f6344f@arinc9.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1e2ae0c6c41c7..887a1c687b52c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -621,7 +621,10 @@ static void option_instat_callback(struct urb *urb); /* MeiG Smart Technology products */ #define MEIGSMART_VENDOR_ID 0x2dee -/* MeiG Smart SRM815/SRM825L based on Qualcomm 315 */ +/* + * MeiG Smart SLM828, SRM815, and SRM825L use the same product ID. SLM828 is + * based on Qualcomm SDX12. SRM815 and SRM825L are based on Qualcomm 315. + */ #define MEIGSMART_PRODUCT_SRM825L 0x4d22 /* MeiG Smart SLM320 based on UNISOC UIS8910 */ #define MEIGSMART_PRODUCT_SLM320 0x4d41 @@ -2405,10 +2408,12 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM770A, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x30) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x40) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0, 0) }, /* MeiG Smart SRM815 */ + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0x10, 0x02) }, /* MeiG Smart SLM828 */ + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0x10, 0x03) }, /* MeiG Smart SLM828 */ + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x30) }, /* MeiG Smart SRM815 and SRM825L */ + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x40) }, /* MeiG Smart SRM825L */ + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x60) }, /* MeiG Smart SRM825L */ { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ .driver_info = NCTRL(1) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0640, 0xff), /* TCL IK512 ECM */ -- GitLab From 5728c92ae112301936006c5e305677beb1a7f578 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 24 Jan 2025 13:16:44 -0600 Subject: [PATCH 0495/1585] mfd: syscon: Restore device_node_to_regmap() for non-syscon nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit ba5095ebbc7a ("mfd: syscon: Allow syscon nodes without a "syscon" compatible") broke drivers which call device_node_to_regmap() on nodes without a "syscon" compatible. Restore the prior behavior for device_node_to_regmap(). This also makes using device_node_to_regmap() incompatible with of_syscon_register_regmap() again, so add kerneldoc for device_node_to_regmap() and syscon_node_to_regmap() to make it clear how and when each one should be used. Fixes: ba5095ebbc7a ("mfd: syscon: Allow syscon nodes without a "syscon" compatible") Reported-by: Vaishnav Achath Signed-off-by: Rob Herring (Arm) Reviewed-by: Daniel Golle Reviewed-by: AngeloGioacchino Del Regno Tested-by: Chen-Yu Tsai Tested-by: Nishanth Menon Tested-by: Daniel Golle Tested-by: Frank Wunderlich Tested-by: Dhruva Gole Tested-by: Nícolas F. R. A. Prado Tested-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20250124191644.2309790-1-robh@kernel.org Signed-off-by: Lee Jones --- drivers/mfd/syscon.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 226915ca3c93d..aa4a9940b569a 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -159,6 +159,7 @@ static struct syscon *of_syscon_register(struct device_node *np, bool check_res) } static struct regmap *device_node_get_regmap(struct device_node *np, + bool create_regmap, bool check_res) { struct syscon *entry, *syscon = NULL; @@ -172,7 +173,7 @@ static struct regmap *device_node_get_regmap(struct device_node *np, } if (!syscon) { - if (of_device_is_compatible(np, "syscon")) + if (create_regmap) syscon = of_syscon_register(np, check_res); else syscon = ERR_PTR(-EINVAL); @@ -233,15 +234,37 @@ int of_syscon_register_regmap(struct device_node *np, struct regmap *regmap) } EXPORT_SYMBOL_GPL(of_syscon_register_regmap); +/** + * device_node_to_regmap() - Get or create a regmap for specified device node + * @np: Device tree node + * + * Get a regmap for the specified device node. If there's not an existing + * regmap, then one is instantiated. This function should not be used if the + * device node has a custom regmap driver or has resources (clocks, resets) to + * be managed. Use syscon_node_to_regmap() instead for those cases. + * + * Return: regmap ptr on success, negative error code on failure. + */ struct regmap *device_node_to_regmap(struct device_node *np) { - return device_node_get_regmap(np, false); + return device_node_get_regmap(np, true, false); } EXPORT_SYMBOL_GPL(device_node_to_regmap); +/** + * syscon_node_to_regmap() - Get or create a regmap for specified syscon device node + * @np: Device tree node + * + * Get a regmap for the specified device node. If there's not an existing + * regmap, then one is instantiated if the node is a generic "syscon". This + * function is safe to use for a syscon registered with + * of_syscon_register_regmap(). + * + * Return: regmap ptr on success, negative error code on failure. + */ struct regmap *syscon_node_to_regmap(struct device_node *np) { - return device_node_get_regmap(np, true); + return device_node_get_regmap(np, of_device_is_compatible(np, "syscon"), true); } EXPORT_SYMBOL_GPL(syscon_node_to_regmap); -- GitLab From c979fb5ece2dc11cc9cc3d5c66f750e210bfdee2 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Wed, 5 Feb 2025 18:16:45 +0100 Subject: [PATCH 0496/1585] USB: serial: option: add Telit Cinterion FN990B compositions Add the following Telit Cinterion FN990B40 compositions: 0x10d0: rmnet + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 17 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10d0 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN990 S: SerialNumber=43b38f19 C: #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10d1: MBIM + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10d1 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN990 S: SerialNumber=43b38f19 C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10d2: RNDIS + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 18 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10d2 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN990 S: SerialNumber=43b38f19 C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=ef(misc ) Sub=04 Prot=01 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10d3: ECM + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 20 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10d3 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN990 S: SerialNumber=43b38f19 C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Reviewed-by: Daniele Palmas Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 887a1c687b52c..7f6eff505085d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1406,6 +1406,22 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c8, 0xff), /* Telit FE910C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x60) }, /* Telit FN990B (rmnet) */ + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x40) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x30), + .driver_info = NCTRL(5) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x60) }, /* Telit FN990B (MBIM) */ + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x40) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x30), + .driver_info = NCTRL(6) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x60) }, /* Telit FN990B (RNDIS) */ + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x40) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x30), + .driver_info = NCTRL(6) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x60) }, /* Telit FN990B (ECM) */ + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x40) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x30), + .driver_info = NCTRL(6) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), -- GitLab From 12606fe73f33647c5e79bf666833bf0b225e649d Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Wed, 5 Feb 2025 18:16:47 +0100 Subject: [PATCH 0497/1585] USB: serial: option: fix Telit Cinterion FN990A name The correct name for FN990 is FN990A so use it in order to avoid confusion with FN990B. Signed-off-by: Fabio Porcedda Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7f6eff505085d..4a59a40f750a6 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1370,15 +1370,15 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990 (rmnet) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990A (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990 (MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990A (MBIM) */ .driver_info = NCTRL(0) | RSVD(1) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990 (RNDIS) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990A (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990A (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990 (PCIe) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990A (PCIe) */ .driver_info = RSVD(0) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990 (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, -- GitLab From 1c316eb57c11fb3dc447b04ef765459cd61c8647 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Mon, 10 Feb 2025 11:04:22 +0800 Subject: [PATCH 0498/1585] bcachefs: Fix use after free acc->k.data should be used with the lock hold: 00221 ========= TEST generic/187 00221 run fstests generic/187 at 2025-02-09 21:08:10 00221 spectre-v4 mitigation disabled by command-line option 00222 bcachefs (vdc): starting version 1.20: directory_size opts=errors=ro 00222 bcachefs (vdc): initializing new filesystem 00222 bcachefs (vdc): going read-write 00222 bcachefs (vdc): marking superblocks 00222 bcachefs (vdc): initializing freespace 00222 bcachefs (vdc): done initializing freespace 00222 bcachefs (vdc): reading snapshots table 00222 bcachefs (vdc): reading snapshots done 00222 bcachefs (vdc): done starting filesystem 00222 bcachefs (vdc): shutting down 00222 bcachefs (vdc): going read-only 00222 bcachefs (vdc): finished waiting for writes to stop 00223 bcachefs (vdc): flushing journal and stopping allocators, journal seq 6 00223 bcachefs (vdc): flushing journal and stopping allocators complete, journal seq 8 00223 bcachefs (vdc): clean shutdown complete, journal seq 9 00223 bcachefs (vdc): marking filesystem clean 00223 bcachefs (vdc): shutdown complete 00223 bcachefs (vdc): starting version 1.20: directory_size opts=errors=ro 00223 bcachefs (vdc): initializing new filesystem 00223 bcachefs (vdc): going read-write 00223 bcachefs (vdc): marking superblocks 00223 bcachefs (vdc): initializing freespace 00223 bcachefs (vdc): done initializing freespace 00223 bcachefs (vdc): reading snapshots table 00223 bcachefs (vdc): reading snapshots done 00223 bcachefs (vdc): done starting filesystem 00244 hrtimer: interrupt took 123350440 ns 00264 bcachefs (vdc): shutting down 00264 bcachefs (vdc): going read-only 00264 bcachefs (vdc): finished waiting for writes to stop 00264 bcachefs (vdc): flushing journal and stopping allocators, journal seq 97 00265 bcachefs (vdc): flushing journal and stopping allocators complete, journal seq 101 00265 bcachefs (vdc): clean shutdown complete, journal seq 102 00265 bcachefs (vdc): marking filesystem clean 00265 bcachefs (vdc): shutdown complete 00265 bcachefs (vdc): starting version 1.20: directory_size opts=errors=ro 00265 bcachefs (vdc): recovering from clean shutdown, journal seq 102 00265 bcachefs (vdc): accounting_read... 00265 ================================================================== 00265 done 00265 BUG: KASAN: slab-use-after-free in bch2_fs_to_text+0x12b4/0x1728 00265 bcachefs (vdc): alloc_read... done 00265 bcachefs (vdc): stripes_read... done 00265 Read of size 4 at addr ffffff80c57eac00 by task cat/7531 00265 bcachefs (vdc): snapshots_read... done 00265 00265 CPU: 6 UID: 0 PID: 7531 Comm: cat Not tainted 6.13.0-rc3-ktest-g16fc6fa3819d #14103 00265 Hardware name: linux,dummy-virt (DT) 00265 Call trace: 00265 show_stack+0x1c/0x30 (C) 00265 dump_stack_lvl+0x6c/0x80 00265 print_report+0xf8/0x5d8 00265 kasan_report+0x90/0xd0 00265 __asan_report_load4_noabort+0x1c/0x28 00265 bch2_fs_to_text+0x12b4/0x1728 00265 bch2_fs_show+0x94/0x188 00265 sysfs_kf_seq_show+0x1a4/0x348 00265 kernfs_seq_show+0x12c/0x198 00265 seq_read_iter+0x27c/0xfd0 00265 kernfs_fop_read_iter+0x390/0x4f8 00265 vfs_read+0x480/0x7f0 00265 ksys_read+0xe0/0x1e8 00265 __arm64_sys_read+0x70/0xa8 00265 invoke_syscall.constprop.0+0x74/0x1e8 00265 do_el0_svc+0xc8/0x1c8 00265 el0_svc+0x20/0x60 00265 el0t_64_sync_handler+0x104/0x130 00265 el0t_64_sync+0x154/0x158 00265 00265 Allocated by task 7510: 00265 kasan_save_stack+0x28/0x50 00265 kasan_save_track+0x1c/0x38 00265 kasan_save_alloc_info+0x3c/0x50 00265 __kasan_kmalloc+0xac/0xb0 00265 __kmalloc_node_noprof+0x168/0x348 00265 __kvmalloc_node_noprof+0x20/0x140 00265 __bch2_darray_resize_noprof+0x90/0x1b0 00265 __bch2_accounting_mem_insert+0x76c/0xb08 00265 bch2_accounting_mem_insert+0x224/0x3b8 00265 bch2_accounting_mem_mod_locked+0x480/0xc58 00265 bch2_accounting_read+0xa94/0x3eb8 00265 bch2_run_recovery_pass+0x80/0x178 00265 bch2_run_recovery_passes+0x340/0x698 00265 bch2_fs_recovery+0x1c98/0x2bd8 00265 bch2_fs_start+0x240/0x490 00265 bch2_fs_get_tree+0xe1c/0x1458 00265 vfs_get_tree+0x7c/0x250 00265 path_mount+0xe24/0x1648 00265 __arm64_sys_mount+0x240/0x438 00265 invoke_syscall.constprop.0+0x74/0x1e8 00265 do_el0_svc+0xc8/0x1c8 00265 el0_svc+0x20/0x60 00265 el0t_64_sync_handler+0x104/0x130 00265 el0t_64_sync+0x154/0x158 00265 00265 Freed by task 7510: 00265 kasan_save_stack+0x28/0x50 00265 kasan_save_track+0x1c/0x38 00265 kasan_save_free_info+0x48/0x88 00265 __kasan_slab_free+0x48/0x60 00265 kfree+0x188/0x408 00265 kvfree+0x3c/0x50 00265 __bch2_darray_resize_noprof+0xe0/0x1b0 00265 __bch2_accounting_mem_insert+0x76c/0xb08 00265 bch2_accounting_mem_insert+0x224/0x3b8 00265 bch2_accounting_mem_mod_locked+0x480/0xc58 00265 bch2_accounting_read+0xa94/0x3eb8 00265 bch2_run_recovery_pass+0x80/0x178 00265 bch2_run_recovery_passes+0x340/0x698 00265 bch2_fs_recovery+0x1c98/0x2bd8 00265 bch2_fs_start+0x240/0x490 00265 bch2_fs_get_tree+0xe1c/0x1458 00265 vfs_get_tree+0x7c/0x250 00265 path_mount+0xe24/0x1648 00265 bcachefs (vdc): going read-write 00265 __arm64_sys_mount+0x240/0x438 00265 invoke_syscall.constprop.0+0x74/0x1e8 00265 do_el0_svc+0xc8/0x1c8 00265 el0_svc+0x20/0x60 00265 el0t_64_sync_handler+0x104/0x130 00265 el0t_64_sync+0x154/0x158 00265 00265 The buggy address belongs to the object at ffffff80c57eac00 00265 which belongs to the cache kmalloc-128 of size 128 00265 The buggy address is located 0 bytes inside of 00265 freed 128-byte region [ffffff80c57eac00, ffffff80c57eac80) 00265 00265 The buggy address belongs to the physical page: 00265 page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1057ea 00265 head: order:1 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 00265 flags: 0x8000000000000040(head|zone=2) 00265 page_type: f5(slab) 00265 raw: 8000000000000040 ffffff80c0002800 dead000000000100 dead000000000122 00265 raw: 0000000000000000 0000000000200020 00000001f5000000 ffffff80c57a6400 00265 head: 8000000000000040 ffffff80c0002800 dead000000000100 dead000000000122 00265 head: 0000000000000000 0000000000200020 00000001f5000000 ffffff80c57a6400 00265 head: 8000000000000001 fffffffec315fa81 ffffffffffffffff 0000000000000000 00265 head: 0000000000000002 0000000000000000 00000000ffffffff 0000000000000000 00265 page dumped because: kasan: bad access detected 00265 00265 Memory state around the buggy address: 00265 ffffff80c57eab00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00265 ffffff80c57eab80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc 00265 >ffffff80c57eac00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb 00265 ^ 00265 ffffff80c57eac80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc 00265 ffffff80c57ead00: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc 00265 ================================================================== 00265 Kernel panic - not syncing: kasan.fault=panic set ... 00265 CPU: 6 UID: 0 PID: 7531 Comm: cat Not tainted 6.13.0-rc3-ktest-g16fc6fa3819d #14103 00265 Hardware name: linux,dummy-virt (DT) 00265 Call trace: 00265 show_stack+0x1c/0x30 (C) 00265 dump_stack_lvl+0x30/0x80 00265 dump_stack+0x18/0x20 00265 panic+0x4d4/0x518 00265 start_report.constprop.0+0x0/0x90 00265 kasan_report+0xa0/0xd0 00265 __asan_report_load4_noabort+0x1c/0x28 00265 bch2_fs_to_text+0x12b4/0x1728 00265 bch2_fs_show+0x94/0x188 00265 sysfs_kf_seq_show+0x1a4/0x348 00265 kernfs_seq_show+0x12c/0x198 00265 seq_read_iter+0x27c/0xfd0 00265 kernfs_fop_read_iter+0x390/0x4f8 00265 vfs_read+0x480/0x7f0 00265 ksys_read+0xe0/0x1e8 00265 __arm64_sys_read+0x70/0xa8 00265 invoke_syscall.constprop.0+0x74/0x1e8 00265 do_el0_svc+0xc8/0x1c8 00265 el0_svc+0x20/0x60 00265 el0t_64_sync_handler+0x104/0x130 00265 el0t_64_sync+0x154/0x158 00265 SMP: stopping secondary CPUs 00265 Kernel Offset: disabled 00265 CPU features: 0x000,00000070,00000010,8240500b 00265 Memory Limit: none 00265 ---[ end Kernel panic - not syncing: kasan.fault=panic set ... ]--- 00270 ========= FAILED TIMEOUT generic.187 in 1200s Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 5360cbb3ec298..f4372cafea2e9 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -210,11 +210,13 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem * static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, u64 *v, unsigned nr) { + percpu_down_read(&c->mark_lock); struct bch_accounting_mem *acc = &c->accounting; unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &p); bch2_accounting_mem_read_counters(acc, idx, v, nr, false); + percpu_up_read(&c->mark_lock); } static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) -- GitLab From 1e690efa72596a1163dc56709707f459221889d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Feb 2025 11:34:59 -0500 Subject: [PATCH 0499/1585] bcachefs: Split out journal pins by btree level This lets us flush the journal to go read-only more effectively. Flushing the journal and going read-only requires halting mutually recursive processes, which strictly speaking are not guaranteed to terminate. Flushing btree node journal pins will kick off a btree node write, and btree node writes on completion must do another btree update to the parent node to update the 'sectors_written' field for that node's key. If the parent node is full and requires a split or compaction, that's going to generate a whole bunch of additional btree updates - alloc info, LRU btree, and more - which then have to be flushed, and the cycle repeats. This process will terminate much more effectively if we tweak journal reclaim to flush btree updates leaf to root: i.e., don't flush updates for a given btree node (kicking off a write, and consuming space within that node up to the next block boundary) if there might still be unflushed updates in child nodes. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 37 +++++++++++++++++------------------ fs/bcachefs/journal_types.h | 5 ++++- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 6a9cefb635d63..d373cd181a7f5 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -384,12 +384,16 @@ void bch2_journal_pin_drop(struct journal *j, spin_unlock(&j->lock); } -static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn) +static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin, + journal_pin_flush_fn fn) { if (fn == bch2_btree_node_flush0 || - fn == bch2_btree_node_flush1) - return JOURNAL_PIN_TYPE_btree; - else if (fn == bch2_btree_key_cache_journal_flush) + fn == bch2_btree_node_flush1) { + unsigned idx = fn == bch2_btree_node_flush1; + struct btree *b = container_of(pin, struct btree, writes[idx].journal); + + return JOURNAL_PIN_TYPE_btree0 - b->c.level; + } else if (fn == bch2_btree_key_cache_journal_flush) return JOURNAL_PIN_TYPE_key_cache; else return JOURNAL_PIN_TYPE_other; @@ -441,7 +445,7 @@ void bch2_journal_pin_copy(struct journal *j, bool reclaim = __journal_pin_drop(j, dst); - bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn)); + bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn)); if (reclaim) bch2_journal_reclaim_fast(j); @@ -465,7 +469,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq, bool reclaim = __journal_pin_drop(j, pin); - bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn)); + bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); if (reclaim) bch2_journal_reclaim_fast(j); @@ -587,7 +591,7 @@ static size_t journal_flush_pins(struct journal *j, spin_lock(&j->lock); /* Pin might have been dropped or rearmed: */ if (likely(!err && !j->flush_in_progress_dropped)) - list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]); + list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); j->flush_in_progress = NULL; j->flush_in_progress_dropped = false; spin_unlock(&j->lock); @@ -869,18 +873,13 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, mutex_lock(&j->reclaim_lock); - if (journal_flush_pins_or_still_flushing(j, seq_to_flush, - BIT(JOURNAL_PIN_TYPE_key_cache)| - BIT(JOURNAL_PIN_TYPE_other))) { - *did_work = true; - goto unlock; - } - - if (journal_flush_pins_or_still_flushing(j, seq_to_flush, - BIT(JOURNAL_PIN_TYPE_btree))) { - *did_work = true; - goto unlock; - } + for (int type = JOURNAL_PIN_TYPE_NR - 1; + type >= 0; + --type) + if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) { + *did_work = true; + goto unlock; + } if (seq_to_flush > journal_cur_seq(j)) bch2_journal_entry_close(j); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index a198a81d74784..1ef3a28ed6ab3 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -53,7 +53,10 @@ struct journal_buf { */ enum journal_pin_type { - JOURNAL_PIN_TYPE_btree, + JOURNAL_PIN_TYPE_btree3, + JOURNAL_PIN_TYPE_btree2, + JOURNAL_PIN_TYPE_btree1, + JOURNAL_PIN_TYPE_btree0, JOURNAL_PIN_TYPE_key_cache, JOURNAL_PIN_TYPE_other, JOURNAL_PIN_TYPE_NR, -- GitLab From 9f734cd076931fa4d7feb5728e5cd95cde0af114 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Feb 2025 17:46:36 -0500 Subject: [PATCH 0500/1585] bcachefs: Fix want_new_bset() so we write until the end of the btree node want_new_bset() returns the address of a new bset to initialize if we wish to do so in a btree node - either because the previous one is too big, or because it's been written. The case for 'previous bset was written' was wrong: it's only supposed to check for if we have space in the node for one more block, but because it subtracted the header from the space available it would never initialize a new bset if we were down to the last block in a node. Fixing this results in fewer btree node splits/compactions, which fixes a bug with flushing the journal to go read-only sometimes not terminating or taking excessively long. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 7930ffea3075d..26d646e1275c0 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -278,12 +278,12 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct bt { struct bset_tree *t = bset_tree_last(b); struct btree_node_entry *bne = max(write_block(b), - (void *) btree_bkey_last(b, bset_tree_last(b))); + (void *) btree_bkey_last(b, t)); ssize_t remaining_space = __bch2_btree_u64s_remaining(b, bne->keys.start); if (unlikely(bset_written(b, bset(b, t)))) { - if (remaining_space > (ssize_t) (block_bytes(c) >> 3)) + if (b->written + block_sectors(c) <= btree_sectors(c)) return bne; } else { if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) && -- GitLab From 6aa8a63c471eb6756aabd03f880feffe6a7af6c9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 11 Feb 2025 15:45:16 +0100 Subject: [PATCH 0501/1585] USB: serial: option: drop MeiG Smart defines Several MeiG Smart modems apparently use the same product id, making the defines even less useful. Drop them in favour of using comments consistently to make the id table slightly less unwieldy. Cc: stable@vger.kernel.org Acked-by: Chester A. Unal Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4a59a40f750a6..58bd54e8c483a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -619,18 +619,6 @@ static void option_instat_callback(struct urb *urb); /* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ #define LUAT_PRODUCT_AIR720U 0x4e00 -/* MeiG Smart Technology products */ -#define MEIGSMART_VENDOR_ID 0x2dee -/* - * MeiG Smart SLM828, SRM815, and SRM825L use the same product ID. SLM828 is - * based on Qualcomm SDX12. SRM815 and SRM825L are based on Qualcomm 315. - */ -#define MEIGSMART_PRODUCT_SRM825L 0x4d22 -/* MeiG Smart SLM320 based on UNISOC UIS8910 */ -#define MEIGSMART_PRODUCT_SLM320 0x4d41 -/* MeiG Smart SLM770A based on ASR1803 */ -#define MEIGSMART_PRODUCT_SLM770A 0x4d57 - /* Device flags */ /* Highest interface number which can be used with NCTRL() and RSVD() */ @@ -2366,6 +2354,14 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a05, 0xff) }, /* Fibocom FM650-CN (NCM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a06, 0xff) }, /* Fibocom FM650-CN (RNDIS mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a07, 0xff) }, /* Fibocom FM650-CN (MBIM mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d41, 0xff, 0, 0) }, /* MeiG Smart SLM320 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d57, 0xff, 0, 0) }, /* MeiG Smart SLM770A */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0, 0) }, /* MeiG Smart SRM815 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0x10, 0x02) }, /* MeiG Smart SLM828 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0x10, 0x03) }, /* MeiG Smart SLM828 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0xff, 0x30) }, /* MeiG Smart SRM815 and SRM825L */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0xff, 0x40) }, /* MeiG Smart SRM825L */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0xff, 0x60) }, /* MeiG Smart SRM825L */ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ @@ -2422,14 +2418,6 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM770A, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0, 0) }, /* MeiG Smart SRM815 */ - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0x10, 0x02) }, /* MeiG Smart SLM828 */ - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0x10, 0x03) }, /* MeiG Smart SLM828 */ - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x30) }, /* MeiG Smart SRM815 and SRM825L */ - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x40) }, /* MeiG Smart SRM825L */ - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x60) }, /* MeiG Smart SRM825L */ { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ .driver_info = NCTRL(1) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0640, 0xff), /* TCL IK512 ECM */ -- GitLab From 35e21de48e693af1dcfdbf2dc3d73dcfa3c8f2d9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 11 Feb 2025 16:48:06 +0100 Subject: [PATCH 0502/1585] regulator: core: let dt properties override driver init_data This reverts commit cd7a38c40b231350a3cd0fd774f4e6bb68c4b411. When submitting the change above, it was thought that the origin of the init_data should be a clear choice, from the driver or from DT but not both. It turns out some devices, such as qcom-msm8974-lge-nexus5-hammerhead, relied on the old behaviour to override the init_data provided by the driver, making it some kind of default if none is provided by the platform. Using the init_data provided by the driver when it is present broke these devices so revert the change to fixup the situation and add a comment to make things a bit more clear Reported-by: Luca Weiss Closes: https://lore.kernel.org/lkml/5857103.DvuYhMxLoT@lucaweiss.eu Fixes: cd7a38c40b23 ("regulator: core: do not silently ignore provided init_data") Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20250211-regulator-init-data-fixup-v1-1-5ce1c6cff990@baylibre.com Signed-off-by: Mark Brown --- drivers/regulator/core.c | 61 ++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 89578b91c4680..4ddf0efead682 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5774,43 +5774,36 @@ regulator_register(struct device *dev, goto clean; } - if (config->init_data) { - /* - * Providing of_match means the framework is expected to parse - * DT to get the init_data. This would conflict with provided - * init_data, if set. Warn if it happens. - */ - if (regulator_desc->of_match) - dev_warn(dev, "Using provided init data - OF match ignored\n"); + /* + * DT may override the config->init_data provided if the platform + * needs to do so. If so, config->init_data is completely ignored. + */ + init_data = regulator_of_get_init_data(dev, regulator_desc, config, + &rdev->dev.of_node); + /* + * Sometimes not all resources are probed already so we need to take + * that into account. This happens most the time if the ena_gpiod comes + * from a gpio extender or something else. + */ + if (PTR_ERR(init_data) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto clean; + } + + /* + * We need to keep track of any GPIO descriptor coming from the + * device tree until we have handled it over to the core. If the + * config that was passed in to this function DOES NOT contain + * a descriptor, and the config after this call DOES contain + * a descriptor, we definitely got one from parsing the device + * tree. + */ + if (!cfg->ena_gpiod && config->ena_gpiod) + dangling_of_gpiod = true; + if (!init_data) { init_data = config->init_data; rdev->dev.of_node = of_node_get(config->of_node); - - } else { - init_data = regulator_of_get_init_data(dev, regulator_desc, - config, - &rdev->dev.of_node); - - /* - * Sometimes not all resources are probed already so we need to - * take that into account. This happens most the time if the - * ena_gpiod comes from a gpio extender or something else. - */ - if (PTR_ERR(init_data) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; - goto clean; - } - - /* - * We need to keep track of any GPIO descriptor coming from the - * device tree until we have handled it over to the core. If the - * config that was passed in to this function DOES NOT contain a - * descriptor, and the config after this call DOES contain a - * descriptor, we definitely got one from parsing the device - * tree. - */ - if (!cfg->ena_gpiod && config->ena_gpiod) - dangling_of_gpiod = true; } ww_mutex_init(&rdev->mutex, ®ulator_ww_class); -- GitLab From 69ab25a74e2df53edc2de4acfce0a484bdb88155 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 10 Jan 2025 16:29:22 -0800 Subject: [PATCH 0503/1585] idpf: fix handling rsc packet with a single segment Handle rsc packet with a single segment same as a multi segment rsc packet so that CHECKSUM_PARTIAL is set in the skb->ip_summed field. The current code is passing CHECKSUM_NONE resulting in TCP GRO layer doing checksum in SW and hiding the issue. This will fail when using dmabufs as payload buffers as skb frag would be unreadable. Fixes: 3a8845af66ed ("idpf: add RX splitq napi poll support") Signed-off-by: Sridhar Samudrala Reviewed-by: Przemek Kitszel Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 2fa9c36e33c9c..c9fcf8f4d7363 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3008,8 +3008,6 @@ static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb, return -EINVAL; rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_seg_len); - if (unlikely(rsc_segments == 1)) - return 0; NAPI_GRO_CB(skb)->count = rsc_segments; skb_shinfo(skb)->gso_size = rsc_seg_len; -- GitLab From 2ff66c2f9ea4e9311e9a00004348b6c465bd5d3b Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 10 Jan 2025 16:29:58 -0800 Subject: [PATCH 0504/1585] idpf: record rx queue in skb for RSC packets Move the call to skb_record_rx_queue in idpf_rx_process_skb_fields() so that RX queue is recorded for RSC packets too. Fixes: 90912f9f4f2d ("idpf: convert header split mode to libeth + napi_build_skb()") Signed-off-by: Sridhar Samudrala Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index c9fcf8f4d7363..9be6a6b59c4e1 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3070,6 +3070,7 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, idpf_rx_hash(rxq, skb, rx_desc, decoded); skb->protocol = eth_type_trans(skb, rxq->netdev); + skb_record_rx_queue(skb, rxq->idx); if (le16_get_bits(rx_desc->hdrlen_flags, VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) @@ -3078,8 +3079,6 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, csum_bits = idpf_rx_splitq_extract_csum_bits(rx_desc); idpf_rx_csum(rxq, skb, csum_bits, decoded); - skb_record_rx_queue(skb, rxq->idx); - return 0; } -- GitLab From 52c11d31b5a1d1c747bb5f36cc4808e93e2348f4 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Tue, 4 Feb 2025 18:08:11 -0800 Subject: [PATCH 0505/1585] idpf: call set_real_num_queues in idpf_open On initial driver load, alloc_etherdev_mqs is called with whatever max queue values are provided by the control plane. However, if the driver is loaded on a system where num_online_cpus() returns less than the max queues, the netdev will think there are more queues than are actually available. Only num_online_cpus() will be allocated, but skb_get_queue_mapping(skb) could possibly return an index beyond the range of allocated queues. Consequently, the packet is silently dropped and it appears as if TX is broken. Set the real number of queues during open so the netdev knows how many queues will be allocated. Fixes: 1c325aac10a8 ("idpf: configure resources for TX queues") Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index b4fbb99bfad20..a3d6b8f198a86 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2159,8 +2159,13 @@ static int idpf_open(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); + err = idpf_set_real_num_queues(vport); + if (err) + goto unlock; + err = idpf_vport_open(vport); +unlock: idpf_vport_ctrl_unlock(netdev); return err; -- GitLab From 61fb097f9a644407b9342a8169d0edef868612d7 Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Fri, 31 Jan 2025 13:14:50 +0100 Subject: [PATCH 0506/1585] ixgbe: Fix possible skb NULL pointer dereference The commit c824125cbb18 ("ixgbe: Fix passing 0 to ERR_PTR in ixgbe_run_xdp()") stopped utilizing the ERR-like macros for xdp status encoding. Propagate this logic to the ixgbe_put_rx_buffer(). The commit also relaxed the skb NULL pointer check - caught by Smatch. Restore this check. Fixes: c824125cbb18 ("ixgbe: Fix passing 0 to ERR_PTR in ixgbe_run_xdp()") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/intel-wired-lan/2c7d6c31-192a-4047-bd90-9566d0e14cc0@stanley.mountain/ Acked-by: Maciej Fijalkowski Signed-off-by: Piotr Kwapulinski Reviewed-by: Simon Horman Tested-by: Saritha Sanigani (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7236f20c9a309..467f81239e12f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2105,7 +2105,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, /* hand second half of page back to the ring */ ixgbe_reuse_rx_page(rx_ring, rx_buffer); } else { - if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) { + if (skb && IXGBE_CB(skb)->dma == rx_buffer->dma) { /* the page has been released from the ring */ IXGBE_CB(skb)->page_released = true; } else { -- GitLab From 7822dd4d6d4bebca5045a395e1784ef09cae2d43 Mon Sep 17 00:00:00 2001 From: Zdenek Bouska Date: Tue, 28 Jan 2025 13:26:48 +0100 Subject: [PATCH 0507/1585] igc: Fix HW RX timestamp when passed by ZC XDP Fixes HW RX timestamp in the following scenario: - AF_PACKET socket with enabled HW RX timestamps is created - AF_XDP socket with enabled zero copy is created - frame is forwarded to the BPF program, where the timestamp should still be readable (extracted by igc_xdp_rx_timestamp(), kfunc behind bpf_xdp_metadata_rx_timestamp()) - the frame got XDP_PASS from BPF program, redirecting to the stack - AF_PACKET socket receives the frame with HW RX timestamp Moves the skb timestamp setting from igc_dispatch_skb_zc() to igc_construct_skb_zc() so that igc_construct_skb_zc() is similar to igc_construct_skb(). This issue can also be reproduced by running: # tools/testing/selftests/bpf/xdp_hw_metadata enp1s0 When a frame with the wrong port 9092 (instead of 9091) is used: # echo -n xdp | nc -u -q1 192.168.10.9 9092 then the RX timestamp is missing and xdp_hw_metadata prints: skb hwtstamp is not found! With this fix or when copy mode is used: # tools/testing/selftests/bpf/xdp_hw_metadata -c enp1s0 then RX timestamp is found and xdp_hw_metadata prints: found skb hwtstamp = 1736509937.852786132 Fixes: 069b142f5819 ("igc: Add support for PTP .getcyclesx64()") Signed-off-by: Zdenek Bouska Acked-by: Vinicius Costa Gomes Reviewed-by: Simon Horman Reviewed-by: Florian Bezdeka Reviewed-by: Song Yoong Siang Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 56a35d58e7a62..21f318f12a8d6 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2701,8 +2701,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, - struct xdp_buff *xdp) + struct igc_xdp_buff *ctx) { + struct xdp_buff *xdp = &ctx->xdp; unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; @@ -2721,27 +2722,28 @@ static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, __skb_pull(skb, metasize); } + if (ctx->rx_ts) { + skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; + skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; + } + return skb; } static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, union igc_adv_rx_desc *desc, - struct xdp_buff *xdp, - ktime_t timestamp) + struct igc_xdp_buff *ctx) { struct igc_ring *ring = q_vector->rx.ring; struct sk_buff *skb; - skb = igc_construct_skb_zc(ring, xdp); + skb = igc_construct_skb_zc(ring, ctx); if (!skb) { ring->rx_stats.alloc_failed++; set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags); return; } - if (timestamp) - skb_hwtstamps(skb)->hwtstamp = timestamp; - if (igc_cleanup_headers(ring, desc, skb)) return; @@ -2777,7 +2779,6 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) union igc_adv_rx_desc *desc; struct igc_rx_buffer *bi; struct igc_xdp_buff *ctx; - ktime_t timestamp = 0; unsigned int size; int res; @@ -2807,6 +2808,8 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) */ bi->xdp->data_meta += IGC_TS_HDR_LEN; size -= IGC_TS_HDR_LEN; + } else { + ctx->rx_ts = NULL; } bi->xdp->data_end = bi->xdp->data + size; @@ -2815,7 +2818,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) res = __igc_xdp_run_prog(adapter, prog, bi->xdp); switch (res) { case IGC_XDP_PASS: - igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp); + igc_dispatch_skb_zc(q_vector, desc, ctx); fallthrough; case IGC_XDP_CONSUMED: xsk_buff_free(bi->xdp); -- GitLab From 63f20f00d23d569e4e67859b4e8dcc9de79221cb Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Wed, 5 Feb 2025 10:36:03 +0800 Subject: [PATCH 0508/1585] igc: Set buffer type for empty frames in igc_init_empty_frame Set the buffer type to IGC_TX_BUFFER_TYPE_SKB for empty frame in the igc_init_empty_frame function. This ensures that the buffer type is correctly identified and handled during Tx ring cleanup. Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") Cc: stable@vger.kernel.org # 6.2+ Signed-off-by: Song Yoong Siang Acked-by: Maciej Fijalkowski Reviewed-by: Simon Horman Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 21f318f12a8d6..84307bb7313e0 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1096,6 +1096,7 @@ static int igc_init_empty_frame(struct igc_ring *ring, return -ENOMEM; } + buffer->type = IGC_TX_BUFFER_TYPE_SKB; buffer->skb = skb; buffer->protocol = 0; buffer->bytecount = skb->len; -- GitLab From fc22b06fbd2afefa1eddff69a6fd30c539cef577 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 11 Feb 2025 09:28:39 +0200 Subject: [PATCH 0509/1585] platform/x86: int3472: Use correct type for "polarity", call it gpio_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Struct gpiod_lookup flags field's type is unsigned long. Thus use unsigned long for values to be assigned to that field. Similarly, also call the field gpio_flags which it really is. Signed-off-by: Sakari Ailus Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250211072841.7713-2-sakari.ailus@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/int3472/discrete.c | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 31015ebe20d89..b891b064fbf7e 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -55,7 +55,7 @@ static void skl_int3472_log_sensor_module_name(struct int3472_discrete_device *i static int skl_int3472_fill_gpiod_lookup(struct gpiod_lookup *table_entry, struct acpi_resource_gpio *agpio, - const char *func, u32 polarity) + const char *func, unsigned long gpio_flags) { char *path = agpio->resource_source.string_ptr; struct acpi_device *adev; @@ -70,14 +70,14 @@ static int skl_int3472_fill_gpiod_lookup(struct gpiod_lookup *table_entry, if (!adev) return -ENODEV; - *table_entry = GPIO_LOOKUP(acpi_dev_name(adev), agpio->pin_table[0], func, polarity); + *table_entry = GPIO_LOOKUP(acpi_dev_name(adev), agpio->pin_table[0], func, gpio_flags); return 0; } static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int3472, struct acpi_resource_gpio *agpio, - const char *func, u32 polarity) + const char *func, unsigned long gpio_flags) { int ret; @@ -87,7 +87,7 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347 } ret = skl_int3472_fill_gpiod_lookup(&int3472->gpios.table[int3472->n_sensor_gpios], - agpio, func, polarity); + agpio, func, gpio_flags); if (ret) return ret; @@ -100,7 +100,7 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347 static struct gpio_desc * skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472, struct acpi_resource_gpio *agpio, - const char *func, u32 polarity) + const char *func, unsigned long gpio_flags) { struct gpio_desc *desc; int ret; @@ -111,7 +111,7 @@ skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472, return ERR_PTR(-ENOMEM); lookup->dev_id = dev_name(int3472->dev); - ret = skl_int3472_fill_gpiod_lookup(&lookup->table[0], agpio, func, polarity); + ret = skl_int3472_fill_gpiod_lookup(&lookup->table[0], agpio, func, gpio_flags); if (ret) return ERR_PTR(ret); @@ -122,32 +122,33 @@ skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472, return desc; } -static void int3472_get_func_and_polarity(u8 type, const char **func, u32 *polarity) +static void int3472_get_func_and_polarity(u8 type, const char **func, + unsigned long *gpio_flags) { switch (type) { case INT3472_GPIO_TYPE_RESET: *func = "reset"; - *polarity = GPIO_ACTIVE_LOW; + *gpio_flags = GPIO_ACTIVE_LOW; break; case INT3472_GPIO_TYPE_POWERDOWN: *func = "powerdown"; - *polarity = GPIO_ACTIVE_LOW; + *gpio_flags = GPIO_ACTIVE_LOW; break; case INT3472_GPIO_TYPE_CLK_ENABLE: *func = "clk-enable"; - *polarity = GPIO_ACTIVE_HIGH; + *gpio_flags = GPIO_ACTIVE_HIGH; break; case INT3472_GPIO_TYPE_PRIVACY_LED: *func = "privacy-led"; - *polarity = GPIO_ACTIVE_HIGH; + *gpio_flags = GPIO_ACTIVE_HIGH; break; case INT3472_GPIO_TYPE_POWER_ENABLE: *func = "power-enable"; - *polarity = GPIO_ACTIVE_HIGH; + *gpio_flags = GPIO_ACTIVE_HIGH; break; default: *func = "unknown"; - *polarity = GPIO_ACTIVE_HIGH; + *gpio_flags = GPIO_ACTIVE_HIGH; break; } } @@ -194,7 +195,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, struct gpio_desc *gpio; const char *err_msg; const char *func; - u32 polarity; + unsigned long gpio_flags; int ret; if (!acpi_gpio_get_io_resource(ares, &agpio)) @@ -217,7 +218,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, type = FIELD_GET(INT3472_GPIO_DSM_TYPE, obj->integer.value); - int3472_get_func_and_polarity(type, &func, &polarity); + int3472_get_func_and_polarity(type, &func, &gpio_flags); pin = FIELD_GET(INT3472_GPIO_DSM_PIN, obj->integer.value); /* Pin field is not really used under Windows and wraps around at 8 bits */ @@ -227,16 +228,16 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, active_value = FIELD_GET(INT3472_GPIO_DSM_SENSOR_ON_VAL, obj->integer.value); if (!active_value) - polarity ^= GPIO_ACTIVE_LOW; + gpio_flags ^= GPIO_ACTIVE_LOW; dev_dbg(int3472->dev, "%s %s pin %d active-%s\n", func, agpio->resource_source.string_ptr, agpio->pin_table[0], - str_high_low(polarity == GPIO_ACTIVE_HIGH)); + str_high_low(gpio_flags == GPIO_ACTIVE_HIGH)); switch (type) { case INT3472_GPIO_TYPE_RESET: case INT3472_GPIO_TYPE_POWERDOWN: - ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, func, polarity); + ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, func, gpio_flags); if (ret) err_msg = "Failed to map GPIO pin to sensor\n"; @@ -244,7 +245,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, case INT3472_GPIO_TYPE_CLK_ENABLE: case INT3472_GPIO_TYPE_PRIVACY_LED: case INT3472_GPIO_TYPE_POWER_ENABLE: - gpio = skl_int3472_gpiod_get_from_temp_lookup(int3472, agpio, func, polarity); + gpio = skl_int3472_gpiod_get_from_temp_lookup(int3472, agpio, func, gpio_flags); if (IS_ERR(gpio)) { ret = PTR_ERR(gpio); err_msg = "Failed to get GPIO\n"; -- GitLab From 569617dbbd06286fb73f3f1c2ac91e51d863c7de Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 11 Feb 2025 09:28:40 +0200 Subject: [PATCH 0510/1585] platform/x86: int3472: Call "reset" GPIO "enable" for INT347E MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DT bindings for ov7251 specify "enable" GPIO (xshutdown in documentation) but the int3472 indiscriminately provides this as a "reset" GPIO to sensor drivers. Take this into account by assigning it as "enable" with active high polarity for INT347E devices, i.e. ov7251. "reset" with active low polarity remains the default GPIO name for other devices. Signed-off-by: Sakari Ailus Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250211072841.7713-3-sakari.ailus@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/int3472/discrete.c | 52 +++++++++++++++++-- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index b891b064fbf7e..092252eb95a87 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -2,6 +2,7 @@ /* Author: Dan Scally */ #include +#include #include #include #include @@ -122,10 +123,53 @@ skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472, return desc; } -static void int3472_get_func_and_polarity(u8 type, const char **func, - unsigned long *gpio_flags) +/** + * struct int3472_gpio_map - Map GPIOs to whatever is expected by the + * sensor driver (as in DT bindings) + * @hid: The ACPI HID of the device without the instance number e.g. INT347E + * @type_from: The GPIO type from ACPI ?SDT + * @type_to: The assigned GPIO type, typically same as @type_from + * @func: The function, e.g. "enable" + * @polarity_low: GPIO_ACTIVE_LOW true if the @polarity_low is true, + * GPIO_ACTIVE_HIGH otherwise + */ +struct int3472_gpio_map { + const char *hid; + u8 type_from; + u8 type_to; + bool polarity_low; + const char *func; +}; + +static const struct int3472_gpio_map int3472_gpio_map[] = { + { "INT347E", INT3472_GPIO_TYPE_RESET, INT3472_GPIO_TYPE_RESET, false, "enable" }, +}; + +static void int3472_get_func_and_polarity(struct acpi_device *adev, u8 *type, + const char **func, unsigned long *gpio_flags) { - switch (type) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(int3472_gpio_map); i++) { + /* + * Map the firmware-provided GPIO to whatever a driver expects + * (as in DT bindings). First check if the type matches with the + * GPIO map, then further check that the device _HID matches. + */ + if (*type != int3472_gpio_map[i].type_from) + continue; + + if (!acpi_dev_hid_uid_match(adev, int3472_gpio_map[i].hid, NULL)) + continue; + + *type = int3472_gpio_map[i].type_to; + *gpio_flags = int3472_gpio_map[i].polarity_low ? + GPIO_ACTIVE_LOW : GPIO_ACTIVE_HIGH; + *func = int3472_gpio_map[i].func; + return; + } + + switch (*type) { case INT3472_GPIO_TYPE_RESET: *func = "reset"; *gpio_flags = GPIO_ACTIVE_LOW; @@ -218,7 +262,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, type = FIELD_GET(INT3472_GPIO_DSM_TYPE, obj->integer.value); - int3472_get_func_and_polarity(type, &func, &gpio_flags); + int3472_get_func_and_polarity(int3472->sensor, &type, &func, &gpio_flags); pin = FIELD_GET(INT3472_GPIO_DSM_PIN, obj->integer.value); /* Pin field is not really used under Windows and wraps around at 8 bits */ -- GitLab From 318e8c339c9a0891c389298bb328ed0762a9935e Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Wed, 5 Feb 2025 14:04:41 +0000 Subject: [PATCH 0511/1585] x86/cpu/kvm: SRSO: Fix possible missing IBPB on VM-Exit In [1] the meaning of the synthetic IBPB flags has been redefined for a better separation of concerns: - ENTRY_IBPB -- issue IBPB on entry only - IBPB_ON_VMEXIT -- issue IBPB on VM-Exit only and the Retbleed mitigations have been updated to match this new semantics. Commit [2] was merged shortly before [1], and their interaction was not handled properly. This resulted in IBPB not being triggered on VM-Exit in all SRSO mitigation configs requesting an IBPB there. Specifically, an IBPB on VM-Exit is triggered only when X86_FEATURE_IBPB_ON_VMEXIT is set. However: - X86_FEATURE_IBPB_ON_VMEXIT is not set for "spec_rstack_overflow=ibpb", because before [1] having X86_FEATURE_ENTRY_IBPB was enough. Hence, an IBPB is triggered on entry but the expected IBPB on VM-exit is not. - X86_FEATURE_IBPB_ON_VMEXIT is not set also when "spec_rstack_overflow=ibpb-vmexit" if X86_FEATURE_ENTRY_IBPB is already set. That's because before [1] this was effectively redundant. Hence, e.g. a "retbleed=ibpb spec_rstack_overflow=bpb-vmexit" config mistakenly reports the machine still vulnerable to SRSO, despite an IBPB being triggered both on entry and VM-Exit, because of the Retbleed selected mitigation config. - UNTRAIN_RET_VM won't still actually do anything unless CONFIG_MITIGATION_IBPB_ENTRY is set. For "spec_rstack_overflow=ibpb", enable IBPB on both entry and VM-Exit and clear X86_FEATURE_RSB_VMEXIT which is made superfluous by X86_FEATURE_IBPB_ON_VMEXIT. This effectively makes this mitigation option similar to the one for 'retbleed=ibpb', thus re-order the code for the RETBLEED_MITIGATION_IBPB option to be less confusing by having all features enabling before the disabling of the not needed ones. For "spec_rstack_overflow=ibpb-vmexit", guard this mitigation setting with CONFIG_MITIGATION_IBPB_ENTRY to ensure UNTRAIN_RET_VM sequence is effectively compiled in. Drop instead the CONFIG_MITIGATION_SRSO guard, since none of the SRSO compile cruft is required in this configuration. Also, check only that the required microcode is present to effectively enabled the IBPB on VM-Exit. Finally, update the KConfig description for CONFIG_MITIGATION_IBPB_ENTRY to list also all SRSO config settings enabled by this guard. Fixes: 864bcaa38ee4 ("x86/cpu/kvm: Provide UNTRAIN_RET_VM") [1] Fixes: d893832d0e1e ("x86/srso: Add IBPB on VMEXIT") [2] Reported-by: Yosry Ahmed Signed-off-by: Patrick Bellasi Reviewed-by: Borislav Petkov (AMD) Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 3 ++- arch/x86/kernel/cpu/bugs.c | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 87198d957e2f1..be2c311f5118d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2599,7 +2599,8 @@ config MITIGATION_IBPB_ENTRY depends on CPU_SUP_AMD && X86_64 default y help - Compile the kernel with support for the retbleed=ibpb mitigation. + Compile the kernel with support for the retbleed=ibpb and + spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations. config MITIGATION_IBRS_ENTRY bool "Enable IBRS on kernel entry" diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5a505aa654899..a5d0998d76049 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1115,6 +1115,8 @@ static void __init retbleed_select_mitigation(void) case RETBLEED_MITIGATION_IBPB: setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + mitigate_smt = true; /* * IBPB on entry already obviates the need for @@ -1124,9 +1126,6 @@ static void __init retbleed_select_mitigation(void) setup_clear_cpu_cap(X86_FEATURE_UNRET); setup_clear_cpu_cap(X86_FEATURE_RETHUNK); - setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); - mitigate_smt = true; - /* * There is no need for RSB filling: entry_ibpb() ensures * all predictions, including the RSB, are invalidated, @@ -2646,6 +2645,7 @@ static void __init srso_select_mitigation(void) if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB; /* @@ -2655,6 +2655,13 @@ static void __init srso_select_mitigation(void) */ setup_clear_cpu_cap(X86_FEATURE_UNRET); setup_clear_cpu_cap(X86_FEATURE_RETHUNK); + + /* + * There is no need for RSB filling: entry_ibpb() ensures + * all predictions, including the RSB, are invalidated, + * regardless of IBPB implementation. + */ + setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); } } else { pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); @@ -2663,8 +2670,8 @@ static void __init srso_select_mitigation(void) ibpb_on_vmexit: case SRSO_CMD_IBPB_ON_VMEXIT: - if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) { - if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { + if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { + if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; @@ -2676,8 +2683,8 @@ static void __init srso_select_mitigation(void) setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); } } else { - pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); - } + pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); + } break; default: break; -- GitLab From 8d1d1e8d3345b56d3d8a64f845962c71468cd776 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 30 Jan 2025 14:56:07 +0100 Subject: [PATCH 0512/1585] s390/configs: Remove CONFIG_LSM s390 defconfig does not have BPF LSM, resulting in systemd[1]: bpf-restrict-fs: BPF LSM hook not enabled in the kernel, BPF LSM not supported. with the respective kernels. The other architectures do not explicitly set it, and the default values have BPF in them, so just drop it. Reported-by: Marc Hartmayer Acked-by: Vasily Gorbik Signed-off-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - arch/s390/configs/zfcpdump_defconfig | 1 - 3 files changed, 3 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index d6beec5292a00..44f01a4bc810f 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -740,7 +740,6 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 8cfbfb10bba8c..8bcd37edd3c97 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -725,7 +725,6 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index bcbaa069de96e..853b2326a171b 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -62,7 +62,6 @@ CONFIG_ZFCP=y # CONFIG_INOTIFY_USER is not set # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_ZLIB_DFLTCC is not set CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y -- GitLab From 32ae4a2992529e2c7934e422035fad1d9b0f1fb5 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Fri, 31 Jan 2025 12:02:55 +0100 Subject: [PATCH 0513/1585] s390/cio: Fix CHPID "configure" attribute caching In some environments, the SCLP firmware interface used to query a CHPID's configured state is not supported. On these environments, rapidly reading the corresponding sysfs attribute produces inconsistent results: $ cat /sys/devices/css0/chp0.00/configure cat: /sys/devices/css0/chp0.00/configure: Operation not supported $ cat /sys/devices/css0/chp0.00/configure 3 This occurs for example when Linux is run as a KVM guest. The inconsistency is a result of CIO using cached results for generating the value of the "configure" attribute while failing to handle the situation where no data was returned by SCLP. Fix this by not updating the cache-expiration timestamp when SCLP returns no data. With the fix applied, the system response is consistent: $ cat /sys/devices/css0/chp0.00/configure cat: /sys/devices/css0/chp0.00/configure: Operation not supported $ cat /sys/devices/css0/chp0.00/configure cat: /sys/devices/css0/chp0.00/configure: Operation not supported Reviewed-by: Vineeth Vijayan Reviewed-by: Eric Farman Tested-by: Eric Farman Signed-off-by: Peter Oberparleiter Signed-off-by: Vasily Gorbik --- drivers/s390/cio/chp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 4a0b3f19bd8ef..4f01b1929240e 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -695,7 +695,8 @@ static int info_update(void) if (time_after(jiffies, chp_info_expires)) { /* Data is too old, update. */ rc = sclp_chp_read_info(&chp_info); - chp_info_expires = jiffies + CHP_INFO_UPDATE_INTERVAL ; + if (!rc) + chp_info_expires = jiffies + CHP_INFO_UPDATE_INTERVAL; } mutex_unlock(&info_lock); -- GitLab From 6166caf3bbe2429e4fac71b77e1c8254f2690383 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 10 Feb 2025 09:56:53 +0100 Subject: [PATCH 0514/1585] s390/bitops: Disable arch_test_bit() optimization for PROFILE_ALL_BRANCHES With PROFILE_ALL_BRANCHES enabled gcc sometimes fails to handle __builtin_constant_p() correctly: In function 'arch_test_bit', inlined from 'node_state' at include/linux/nodemask.h:423:9, inlined from 'warn_if_node_offline' at include/linux/gfp.h:252:2, inlined from '__alloc_pages_node_noprof' at include/linux/gfp.h:267:2, inlined from 'alloc_pages_node_noprof' at include/linux/gfp.h:296:9, inlined from 'vm_area_alloc_pages.constprop' at mm/vmalloc.c:3591:11: >> arch/s390/include/asm/bitops.h:60:17: warning: 'asm' operand 2 probably does not match constraints 60 | asm volatile( | ^~~ >> arch/s390/include/asm/bitops.h:60:17: error: impossible constraint in 'asm' Therefore disable the optimization for this case. This is similar to commit 63678eecec57 ("s390/preempt: disable __preempt_count_add() optimization for PROFILE_ALL_BRANCHES") Fixes: b2bc1b1a77c0 ("s390/bitops: Provide optimized arch_test_bit()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502091912.xL2xTCGw-lkp@intel.com/ Acked-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/bitops.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index d5125296ade25..a5ca0a9476916 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -53,7 +53,11 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig unsigned long mask; int cc; - if (__builtin_constant_p(nr)) { + /* + * With CONFIG_PROFILE_ALL_BRANCHES enabled gcc fails to + * handle __builtin_constant_p() in some cases. + */ + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && __builtin_constant_p(nr)) { addr = (const volatile unsigned char *)ptr; addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE; mask = 1UL << (nr & (BITS_PER_BYTE - 1)); -- GitLab From 05793884a1f30509e477de9da233ab73584b1c8c Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 7 Feb 2025 13:30:16 +0100 Subject: [PATCH 0515/1585] s390/pci: Pull search for parent PF out of zpci_iov_setup_virtfn() This creates a new zpci_iov_find_parent_pf() function which a future commit can use to find if a VF has a configured parent PF. Use zdev->rid instead of zdev->devfn such that the new function can be used before it has been decided if the RID will be exposed and zdev->devfn is set. Also handle the hypotheical case that the RID is not available but there is an otherwise matching zbus. Fixes: 25f39d3dcb48 ("s390/pci: Ignore RID for isolated VFs") Cc: stable@vger.kernel.org Reviewed-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_iov.c | 56 ++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c index ead062bf2b41c..c7fdf5e79b3cc 100644 --- a/arch/s390/pci/pci_iov.c +++ b/arch/s390/pci/pci_iov.c @@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in return 0; } -int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +/** + * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function + * @zbus: The bus that the PCI function is on, or would be added on + * @zdev: The PCI function + * + * Finds the parent PF, if it exists and is configured, of the given PCI function + * and increments its refcount. Th PF is searched for on the provided bus so the + * caller has to ensure that this is the correct bus to search. This function may + * be used before adding the PCI function to a zbus. + * + * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not + * found. If the function is not a VF or has no RequesterID information, + * NULL is returned as well. + */ +static struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) { - int i, cand_devfn; - struct zpci_dev *zdev; + int i, vfid, devfn, cand_devfn; struct pci_dev *pdev; - int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ - int rc = 0; if (!zbus->multifunction) - return 0; - - /* If the parent PF for the given VF is also configured in the + return NULL; + /* Non-VFs and VFs without RID available don't have a parent */ + if (!zdev->vfn || !zdev->rid_available) + return NULL; + /* Linux vfid starts at 0 vfn at 1 */ + vfid = zdev->vfn - 1; + devfn = zdev->rid & ZPCI_RID_MASK_DEVFN; + /* + * If the parent PF for the given VF is also configured in the * instance, it must be on the same zbus. * We can then identify the parent PF by checking what * devfn the VF would have if it belonged to that PF using the PF's @@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn if (!pdev) continue; cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); - if (cand_devfn == virtfn->devfn) { - rc = zpci_iov_link_virtfn(pdev, virtfn, vfid); - /* balance pci_get_slot() */ - pci_dev_put(pdev); - break; - } + if (cand_devfn == devfn) + return pdev; /* balance pci_get_slot() */ pci_dev_put(pdev); } } + return NULL; +} + +int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +{ + struct zpci_dev *zdev = to_zpci(virtfn); + struct pci_dev *pdev_pf; + int rc = 0; + + pdev_pf = zpci_iov_find_parent_pf(zbus, zdev); + if (pdev_pf) { + /* Linux' vfids start at 0 while zdev->vfn starts at 1 */ + rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1); + pci_dev_put(pdev_pf); + } return rc; } -- GitLab From 2844ddbd540fc84d7571cca65d6c43088e4d6952 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 7 Feb 2025 13:30:17 +0100 Subject: [PATCH 0516/1585] s390/pci: Fix handling of isolated VFs In contrast to the commit message of the fixed commit VFs whose parent PF is not configured are not always isolated, that is put on their own PCI domain. This is because for VFs to be added to an existing PCI domain it is enough for that PCI domain to share the same topology ID or PCHID. Such a matching PCI domain without a parent PF may exist when a PF from the same PCI card created the domain with the VF being a child of a different, non accessible, PF. While not causing technical issues it makes the rules which VFs are isolated inconsistent. Fix this by explicitly checking that the parent PF exists on the PCI domain determined by the topology ID or PCHID before registering the VF. This works because a parent PF which is under control of this Linux instance must be enabled and configured at the point where its child VFs appear because otherwise SR-IOV could not have been enabled on the parent. Fixes: 25f39d3dcb48 ("s390/pci: Ignore RID for isolated VFs") Cc: stable@vger.kernel.org Reviewed-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_bus.c | 20 ++++++++++++++++++++ arch/s390/pci/pci_iov.c | 2 +- arch/s390/pci/pci_iov.h | 7 +++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 857afbc4828f0..39a481ec4a402 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -331,6 +331,17 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) return rc; } +static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + struct pci_dev *pdev; + + pdev = zpci_iov_find_parent_pf(zbus, zdev); + if (!pdev) + return true; + pci_dev_put(pdev); + return false; +} + int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) { bool topo_is_tid = zdev->tid_avail; @@ -345,6 +356,15 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) topo = topo_is_tid ? zdev->tid : zdev->pchid; zbus = zpci_bus_get(topo, topo_is_tid); + /* + * An isolated VF gets its own domain/bus even if there exists + * a matching domain/bus already + */ + if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) { + zpci_bus_put(zbus); + zbus = NULL; + } + if (!zbus) { zbus = zpci_bus_alloc(topo, topo_is_tid); if (!zbus) diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c index c7fdf5e79b3cc..191e56a623f62 100644 --- a/arch/s390/pci/pci_iov.c +++ b/arch/s390/pci/pci_iov.c @@ -74,7 +74,7 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in * found. If the function is not a VF or has no RequesterID information, * NULL is returned as well. */ -static struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) { int i, vfid, devfn, cand_devfn; struct pci_dev *pdev; diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h index e3fa4e77fc867..d2c2793eb0f34 100644 --- a/arch/s390/pci/pci_iov.h +++ b/arch/s390/pci/pci_iov.h @@ -19,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev); int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn); +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev); + #else /* CONFIG_PCI_IOV */ static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {} @@ -28,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v { return 0; } + +static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + return NULL; +} #endif /* CONFIG_PCI_IOV */ #endif /* __S390_PCI_IOV_h */ -- GitLab From c195b9c6ab9c383d7aa3f4a65879b3ca90cb378b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 8 Feb 2025 15:49:07 +0800 Subject: [PATCH 0517/1585] thermal/netlink: Prevent userspace segmentation fault by adjusting UAPI header The intel-lpmd tool [1], which uses the THERMAL_GENL_ATTR_CPU_CAPABILITY attribute to receive HFI events from kernel space, encounters a segmentation fault after commit 1773572863c4 ("thermal: netlink: Add the commands and the events for the thresholds"). The issue arises because the THERMAL_GENL_ATTR_CPU_CAPABILITY raw value was changed while intel_lpmd still uses the old value. Although intel_lpmd can be updated to check the THERMAL_GENL_VERSION and use the appropriate THERMAL_GENL_ATTR_CPU_CAPABILITY value, the commit itself is questionable. The commit introduced a new element in the middle of enum thermal_genl_attr, which affects many existing attributes and introduces potential risks and unnecessary maintenance burdens for userspace thermal netlink event users. Solve the issue by moving the newly introduced THERMAL_GENL_ATTR_TZ_PREV_TEMP attribute to the end of the enum thermal_genl_attr. This ensures that all existing thermal generic netlink attributes remain unaffected. Link: https://github.com/intel/intel-lpmd [1] Fixes: 1773572863c4 ("thermal: netlink: Add the commands and the events for the thresholds") Signed-off-by: Zhang Rui Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/20250208074907.5679-1-rui.zhang@intel.com [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 349718c271ebf..46a2633d33aaa 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -30,7 +30,6 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_TZ, THERMAL_GENL_ATTR_TZ_ID, THERMAL_GENL_ATTR_TZ_TEMP, - THERMAL_GENL_ATTR_TZ_PREV_TEMP, THERMAL_GENL_ATTR_TZ_TRIP, THERMAL_GENL_ATTR_TZ_TRIP_ID, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, @@ -54,6 +53,7 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_THRESHOLD, THERMAL_GENL_ATTR_THRESHOLD_TEMP, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, + THERMAL_GENL_ATTR_TZ_PREV_TEMP, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) -- GitLab From a6768c4f92e152265590371975d44c071a5279c7 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 11 Feb 2025 09:47:11 +0100 Subject: [PATCH 0518/1585] thermal/cpufreq_cooling: Remove structure member documentation The structure member documentation refers to a member which does not exist any more. Remove it. Link: https://lore.kernel.org/all/202501220046.h3PMBCti-lkp@intel.com/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501220046.h3PMBCti-lkp@intel.com/ Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250211084712.2746705-1-daniel.lezcano@linaro.org [ rjw: Minor changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/cpufreq_cooling.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 280071be30b15..6b7ab1814c12d 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -57,8 +57,6 @@ struct time_in_idle { * @max_level: maximum cooling level. One less than total number of valid * cpufreq frequencies. * @em: Reference on the Energy Model of the device - * @cdev: thermal_cooling_device pointer to keep track of the - * registered cooling device. * @policy: cpufreq policy. * @cooling_ops: cpufreq callbacks to thermal cooling device ops * @idle_time: idle time stats -- GitLab From 7d1163fc08936fcb5cf5d9daf366c322c3b4e882 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 7 Feb 2025 15:39:01 +0100 Subject: [PATCH 0519/1585] arm64: dts: rockchip: disable IOMMU when running rk3588 in PCIe endpoint mode Commit da92d3dfc871 ("arm64: dts: rockchip: enable the mmu600_pcie IOMMU on the rk3588 SoC") enabled the mmu600_pcie IOMMU, both in the normal case (when all PCIe controllers are running in Root Complex mode) and in the case when running the pcie3x4 PCIe controller in Endpoint mode. There have been no issues detected when running the PCIe controllers in Root Complex mode. During PCI probe time, we will add a SID to the IOMMU for each PCI device enumerated on the bus, including the root port itself. However, when running the pcie3x4 PCIe controller in Endpoint mode, we will only add a single SID to the IOMMU (the SID specified in the iommus DT property). The enablement of IOMMU in endpoint mode was verified on setup with two Rock 5b:s, where the BDF of the Root Complex has BDF (00:00.0). A Root Complex sending a TLP to the Endpoint will have Requester ID set to the BDF of the initiator. On the EP side, the Requester ID will then be used as the SID. This works fine if the Root Complex has a BDF that matches the iommus DT property, however, if the Root Complex has any other BDF, we will see something like: arm-smmu-v3 fc900000.iommu: event: C_BAD_STREAMID client: (unassigned sid) sid: 0x1600 ssid: 0x0 on the endpoint side. For PCIe controllers running in endpoint mode that always uses the incoming Requester ID as the SID, the iommus DT property simply isn't a viable solution. (Neither is iommu-map a viable solution, as there is no enumeration done on the endpoint side.) Thus, partly revert commit da92d3dfc871 ("arm64: dts: rockchip: enable the mmu600_pcie IOMMU on the rk3588 SoC") by disabling the PCI IOMMU when running the pcie3x4 PCIe controller in Endpoint mode. Since the PCI IOMMU is working as expected in the normal case, keep it enabled when running all PCIe controllers in Root Complex mode. Fixes: da92d3dfc871 ("arm64: dts: rockchip: enable the mmu600_pcie IOMMU on the rk3588 SoC") Signed-off-by: Niklas Cassel Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20250207143900.2047949-2-cassel@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi | 1 - arch/arm64/boot/dts/rockchip/rk3588-rock-5b-pcie-ep.dtso | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi index 4a950907ea6f5..840b638af1c24 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi @@ -213,7 +213,6 @@ interrupt-names = "sys", "pmc", "msg", "legacy", "err", "dma0", "dma1", "dma2", "dma3"; max-link-speed = <3>; - iommus = <&mmu600_pcie 0x0000>; num-lanes = <4>; phys = <&pcie30phy>; phy-names = "pcie-phy"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b-pcie-ep.dtso b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b-pcie-ep.dtso index 672d748fcc67c..f229cb49da680 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b-pcie-ep.dtso +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b-pcie-ep.dtso @@ -23,3 +23,7 @@ vpcie3v3-supply = <&vcc3v3_pcie30>; status = "okay"; }; + +&mmu600_pcie { + status = "disabled"; +}; -- GitLab From 8546cfd08aa4b982acd2357403a1f15495d622ec Mon Sep 17 00:00:00 2001 From: Patrick Wildt Date: Mon, 10 Feb 2025 22:37:29 +0100 Subject: [PATCH 0520/1585] arm64: dts: rockchip: adjust SMMU interrupt type on rk3588 The SMMU architecture requires wired interrupts to be edge triggered, which does not align with the DT description for the RK3588. This leads to interrupt storms, as the SMMU continues to hold the pin high and only pulls it down for a short amount when issuing an IRQ. Update the DT description to be in line with the spec and perceived reality. Signed-off-by: Patrick Wildt Fixes: cd81d3a0695c ("arm64: dts: rockchip: add rk3588 pcie and php IOMMUs") Reviewed-by: Niklas Cassel Link: https://lore.kernel.org/r/Z6pxme2Chmf3d3uK@windev.fritz.box Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-base.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi index 978de506d4348..c3abdfb04f8f4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi @@ -549,10 +549,10 @@ mmu600_pcie: iommu@fc900000 { compatible = "arm,smmu-v3"; reg = <0x0 0xfc900000 0x0 0x200000>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; interrupt-names = "eventq", "gerror", "priq", "cmdq-sync"; #iommu-cells = <1>; }; @@ -560,10 +560,10 @@ mmu600_php: iommu@fcb00000 { compatible = "arm,smmu-v3"; reg = <0x0 0xfcb00000 0x0 0x200000>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; interrupt-names = "eventq", "gerror", "priq", "cmdq-sync"; #iommu-cells = <1>; status = "disabled"; -- GitLab From acc18e1c1d8c0d59d793cf87790ccfcafb1bf5f0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 4 Feb 2025 11:02:32 +0000 Subject: [PATCH 0521/1585] btrfs: fix stale page cache after race between readahead and direct IO write After commit ac325fc2aad5 ("btrfs: do not hold the extent lock for entire read") we can now trigger a race between a task doing a direct IO write and readahead. When this race is triggered it results in tasks getting stale data when they attempt do a buffered read (including the task that did the direct IO write). This race can be sporadically triggered with test case generic/418, failing like this: $ ./check generic/418 FSTYP -- btrfs PLATFORM -- Linux/x86_64 debian0 6.13.0-rc7-btrfs-next-185+ #17 SMP PREEMPT_DYNAMIC Mon Feb 3 12:28:46 WET 2025 MKFS_OPTIONS -- /dev/sdc MOUNT_OPTIONS -- /dev/sdc /home/fdmanana/btrfs-tests/scratch_1 generic/418 14s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//generic/418.out.bad) --- tests/generic/418.out 2020-06-10 19:29:03.850519863 +0100 +++ /home/fdmanana/git/hub/xfstests/results//generic/418.out.bad 2025-02-03 15:42:36.974609476 +0000 @@ -1,2 +1,5 @@ QA output created by 418 +cmpbuf: offset 0: Expected: 0x1, got 0x0 +[6:0] FAIL - comparison failed, offset 24576 +diotest -wp -b 4096 -n 8 -i 4 failed at loop 3 Silence is golden ... (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/generic/418.out /home/fdmanana/git/hub/xfstests/results//generic/418.out.bad' to see the entire diff) Ran: generic/418 Failures: generic/418 Failed 1 of 1 tests The race happens like this: 1) A file has a prealloc extent for the range [16K, 28K); 2) Task A starts a direct IO write against file range [24K, 28K). At the start of the direct IO write it invalidates the page cache at __iomap_dio_rw() with kiocb_invalidate_pages() for the 4K page at file offset 24K; 3) Task A enters btrfs_dio_iomap_begin() and locks the extent range [24K, 28K); 4) Task B starts a readahead for file range [16K, 28K), entering btrfs_readahead(). First it attempts to read the page at offset 16K by entering btrfs_do_readpage(), where it calls get_extent_map(), locks the range [16K, 20K) and gets the extent map for the range [16K, 28K), caching it into the 'em_cached' variable declared in the local stack of btrfs_readahead(), and then unlocks the range [16K, 20K). Since the extent map has the prealloc flag, at btrfs_do_readpage() we zero out the page's content and don't submit any bio to read the page from the extent. Then it attempts to read the page at offset 20K entering btrfs_do_readpage() where we reuse the previously cached extent map (decided by get_extent_map()) since it spans the page's range and it's still in the inode's extent map tree. Just like for the previous page, we zero out the page's content since the extent map has the prealloc flag set. Then it attempts to read the page at offset 24K entering btrfs_do_readpage() where we reuse the previously cached extent map (decided by get_extent_map()) since it spans the page's range and it's still in the inode's extent map tree. Just like for the previous pages, we zero out the page's content since the extent map has the prealloc flag set. Note that we didn't lock the extent range [24K, 28K), so we didn't synchronize with the ongoing direct IO write being performed by task A; 5) Task A enters btrfs_create_dio_extent() and creates an ordered extent for the range [24K, 28K), with the flags BTRFS_ORDERED_DIRECT and BTRFS_ORDERED_PREALLOC set; 6) Task A unlocks the range [24K, 28K) at btrfs_dio_iomap_begin(); 7) The ordered extent enters btrfs_finish_one_ordered() and locks the range [24K, 28K); 8) Task A enters fs/iomap/direct-io.c:iomap_dio_complete() and it tries to invalidate the page at offset 24K by calling kiocb_invalidate_post_direct_write(), resulting in a call chain that ends up at btrfs_release_folio(). The btrfs_release_folio() call ends up returning false because the range for the page at file offset 24K is currently locked by the task doing the ordered extent completion in the previous step (7), so we have: btrfs_release_folio() -> __btrfs_release_folio() -> try_release_extent_mapping() -> try_release_extent_state() This last function checking that the range is locked and returning false and propagating it up to btrfs_release_folio(). So this results in a failure to invalidate the page and kiocb_invalidate_post_direct_write() triggers this message logged in dmesg: Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O! After this we leave the page cache with stale data for the file range [24K, 28K), filled with zeroes instead of the data written by direct IO write (all bytes with a 0x01 value), so any task attempting to read with buffered IO, including the task that did the direct IO write, will get all bytes in the range with a 0x00 value instead of the written data. Fix this by locking the range, with btrfs_lock_and_flush_ordered_range(), at the two callers of btrfs_do_readpage() instead of doing it at get_extent_map(), just like we did before commit ac325fc2aad5 ("btrfs: do not hold the extent lock for entire read"), and unlocking the range after all the calls to btrfs_do_readpage(). This way we never reuse a cached extent map without flushing any pending ordered extents from a concurrent direct IO write. Fixes: ac325fc2aad5 ("btrfs: do not hold the extent lock for entire read") Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6f64ee16744d6..d8ded597edada 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -898,7 +898,6 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode, u64 len, struct extent_map **em_cached) { struct extent_map *em; - struct extent_state *cached_state = NULL; ASSERT(em_cached); @@ -914,14 +913,12 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode, *em_cached = NULL; } - btrfs_lock_and_flush_ordered_range(inode, start, start + len - 1, &cached_state); em = btrfs_get_extent(inode, folio, start, len); if (!IS_ERR(em)) { BUG_ON(*em_cached); refcount_inc(&em->refs); *em_cached = em; } - unlock_extent(&inode->io_tree, start, start + len - 1, &cached_state); return em; } @@ -1078,11 +1075,18 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, int btrfs_read_folio(struct file *file, struct folio *folio) { + struct btrfs_inode *inode = folio_to_inode(folio); + const u64 start = folio_pos(folio); + const u64 end = start + folio_size(folio) - 1; + struct extent_state *cached_state = NULL; struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ }; struct extent_map *em_cached = NULL; int ret; + btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state); ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL); + unlock_extent(&inode->io_tree, start, end, &cached_state); + free_extent_map(em_cached); /* @@ -2379,12 +2383,20 @@ void btrfs_readahead(struct readahead_control *rac) { struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD }; struct folio *folio; + struct btrfs_inode *inode = BTRFS_I(rac->mapping->host); + const u64 start = readahead_pos(rac); + const u64 end = start + readahead_length(rac) - 1; + struct extent_state *cached_state = NULL; struct extent_map *em_cached = NULL; u64 prev_em_start = (u64)-1; + btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state); + while ((folio = readahead_folio(rac)) != NULL) btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start); + unlock_extent(&inode->io_tree, start, end, &cached_state); + if (em_cached) free_extent_map(em_cached); submit_one_bio(&bio_ctrl); -- GitLab From da2dccd7451de62b175fb8f0808d644959e964c7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 5 Feb 2025 17:36:48 +0000 Subject: [PATCH 0522/1585] btrfs: fix hole expansion when writing at an offset beyond EOF At btrfs_write_check() if our file's i_size is not sector size aligned and we have a write that starts at an offset larger than the i_size that falls within the same page of the i_size, then we end up not zeroing the file range [i_size, write_offset). The code is this: start_pos = round_down(pos, fs_info->sectorsize); oldsize = i_size_read(inode); if (start_pos > oldsize) { /* Expand hole size to cover write data, preventing empty gap */ loff_t end_pos = round_up(pos + count, fs_info->sectorsize); ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, end_pos); if (ret) return ret; } So if our file's i_size is 90269 bytes and a write at offset 90365 bytes comes in, we get 'start_pos' set to 90112 bytes, which is less than the i_size and therefore we don't zero out the range [90269, 90365) by calling btrfs_cont_expand(). This is an old bug introduced in commit 9036c10208e1 ("Btrfs: update hole handling v2"), from 2008, and the buggy code got moved around over the years. Fix this by discarding 'start_pos' and comparing against the write offset ('pos') without any alignment. This bug was recently exposed by test case generic/363 which tests this scenario by polluting ranges beyond EOF with an mmap write and than verify that after a file increases we get zeroes for the range which is supposed to be a hole and not what we wrote with the previous mmaped write. We're only seeing this exposed now because generic/363 used to run only on xfs until last Sunday's fstests update. The test was failing like this: $ ./check generic/363 FSTYP -- btrfs PLATFORM -- Linux/x86_64 debian0 6.13.0-rc7-btrfs-next-185+ #17 SMP PREEMPT_DYNAMIC Mon Feb 3 12:28:46 WET 2025 MKFS_OPTIONS -- /dev/sdc MOUNT_OPTIONS -- /dev/sdc /home/fdmanana/btrfs-tests/scratch_1 generic/363 0s ... [failed, exit status 1]- output mismatch (see /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad) --- tests/generic/363.out 2025-02-05 15:31:14.013646509 +0000 +++ /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad 2025-02-05 17:25:33.112630781 +0000 @@ -1 +1,46 @@ QA output created by 363 +READ BAD DATA: offset = 0xdcad, size = 0xd921, fname = /home/fdmanana/btrfs-tests/dev/junk +OFFSET GOOD BAD RANGE +0x1609d 0x0000 0x3104 0x0 +operation# (mod 256) for the bad data may be 4 +0x1609e 0x0000 0x0472 0x1 +operation# (mod 256) for the bad data may be 4 ... (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/generic/363.out /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad' to see the entire diff) Ran: generic/363 Failures: generic/363 Failed 1 of 1 tests Fixes: 9036c10208e1 ("Btrfs: update hole handling v2") CC: stable@vger.kernel.org Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 36f51c311bb12..ed3c0d6546c5d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1039,7 +1039,6 @@ int btrfs_write_check(struct kiocb *iocb, size_t count) loff_t pos = iocb->ki_pos; int ret; loff_t oldsize; - loff_t start_pos; /* * Quickly bail out on NOWAIT writes if we don't have the nodatacow or @@ -1066,9 +1065,8 @@ int btrfs_write_check(struct kiocb *iocb, size_t count) inode_inc_iversion(inode); } - start_pos = round_down(pos, fs_info->sectorsize); oldsize = i_size_read(inode); - if (start_pos > oldsize) { + if (pos > oldsize) { /* Expand hole size to cover write data, preventing empty gap */ loff_t end_pos = round_up(pos + count, fs_info->sectorsize); -- GitLab From 5805402dcc56241987bca674a1b4da79a249bab7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Feb 2025 10:52:42 +0000 Subject: [PATCH 0523/1585] vxlan: check vxlan_vnigroup_init() return value vxlan_init() must check vxlan_vnigroup_init() success otherwise a crash happens later, spotted by syzbot. Oops: general protection fault, probably for non-canonical address 0xdffffc000000002c: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000160-0x0000000000000167] CPU: 0 UID: 0 PID: 7313 Comm: syz-executor147 Not tainted 6.14.0-rc1-syzkaller-00276-g69b54314c975 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:vxlan_vnigroup_uninit+0x89/0x500 drivers/net/vxlan/vxlan_vnifilter.c:912 Code: 00 48 8b 44 24 08 4c 8b b0 98 41 00 00 49 8d 86 60 01 00 00 48 89 c2 48 89 44 24 10 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 4d 04 00 00 49 8b 86 60 01 00 00 48 ba 00 00 00 RSP: 0018:ffffc9000cc1eea8 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000001 RCX: ffffffff8672effb RDX: 000000000000002c RSI: ffffffff8672ecb9 RDI: ffff8880461b4f18 RBP: ffff8880461b4ef4 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000020000 R13: ffff8880461b0d80 R14: 0000000000000000 R15: dffffc0000000000 FS: 00007fecfa95d6c0(0000) GS:ffff88806a600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fecfa95cfb8 CR3: 000000004472c000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: vxlan_uninit+0x1ab/0x200 drivers/net/vxlan/vxlan_core.c:2942 unregister_netdevice_many_notify+0x12d6/0x1f30 net/core/dev.c:11824 unregister_netdevice_many net/core/dev.c:11866 [inline] unregister_netdevice_queue+0x307/0x3f0 net/core/dev.c:11736 register_netdevice+0x1829/0x1eb0 net/core/dev.c:10901 __vxlan_dev_create+0x7c6/0xa30 drivers/net/vxlan/vxlan_core.c:3981 vxlan_newlink+0xd1/0x130 drivers/net/vxlan/vxlan_core.c:4407 rtnl_newlink_create net/core/rtnetlink.c:3795 [inline] __rtnl_newlink net/core/rtnetlink.c:3906 [inline] Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Reported-by: syzbot+6a9624592218c2c5e7aa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67a9d9b4.050a0220.110943.002d.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Roopa Prabhu Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250210105242.883482-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 05c10acb2a57e..92516189e792f 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2898,8 +2898,11 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); int err; - if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) - vxlan_vnigroup_init(vxlan); + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { + err = vxlan_vnigroup_init(vxlan); + if (err) + return err; + } err = gro_cells_init(&vxlan->gro_cells, dev); if (err) -- GitLab From 1942b1c6f687b9d1efc93f35239f185a84900e93 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 8 Feb 2025 11:52:23 +0000 Subject: [PATCH 0524/1585] net: phylink: make configuring clock-stop dependent on MAC support We should not be configuring the PHYs clock-stop settings unless the MAC supports phylink managed EEE. Make this dependent on MAC support. This was noticed in a suspicious RCU usage report from the kernel test robot (the suspicious RCU usage due to calling phy_detach() remains unaddressed, but is triggered by the error this was generating.) Fixes: 03abf2a7c654 ("net: phylink: add EEE management") Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1tgjNn-003q0w-Pw@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 214b62fba991e..b00a315de0601 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2265,12 +2265,15 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, /* Allow the MAC to stop its clock if the PHY has the capability */ pl->mac_tx_clk_stop = phy_eee_tx_clock_stop_capable(phy) > 0; - /* Explicitly configure whether the PHY is allowed to stop it's - * receive clock. - */ - ret = phy_eee_rx_clock_stop(phy, pl->config->eee_rx_clk_stop_enable); - if (ret == -EOPNOTSUPP) - ret = 0; + if (pl->mac_supports_eee_ops) { + /* Explicitly configure whether the PHY is allowed to stop it's + * receive clock. + */ + ret = phy_eee_rx_clock_stop(phy, + pl->config->eee_rx_clk_stop_enable); + if (ret == -EOPNOTSUPP) + ret = 0; + } return ret; } -- GitLab From f1bf10d7e909fe898a112f5cae1e97ce34d6484d Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 11 Feb 2025 10:00:25 +0000 Subject: [PATCH 0525/1585] cifs: pick channels for individual subrequests The netfs library could break down a read request into multiple subrequests. When multichannel is used, there is potential to improve performance when each of these subrequests pick a different channel. Today we call cifs_pick_channel when the main read request is initialized in cifs_init_request. This change moves this to cifs_prepare_read, which is the right place to pick channel since it gets called for each subrequest. Interestingly cifs_prepare_write already does channel selection for individual subreq, but looks like it was missed for read. This is especially important when multichannel is used with increased rasize. In my test setup, with rasize set to 8MB, a sequential read of large file was taking 11.5s without this change. With the change, it completed in 9s. The difference is even more signigicant with bigger rasize. Cc: Cc: David Howells Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 - fs/smb/client/file.c | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index ac1f890a0d543..4bdd6a43e5215 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1508,7 +1508,6 @@ struct cifs_io_parms { struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; - struct TCP_Server_Info *server; pid_t pid; }; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 79de2f2f9c41a..8582cf61242c6 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -147,7 +147,7 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) struct netfs_io_request *rreq = subreq->rreq; struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); - struct TCP_Server_Info *server = req->server; + struct TCP_Server_Info *server; struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); size_t size; int rc = 0; @@ -156,6 +156,8 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) rdata->xid = get_xid(); rdata->have_xid = true; } + + server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; if (cifs_sb->ctx->rsize == 0) @@ -198,7 +200,7 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq) struct netfs_io_request *rreq = subreq->rreq; struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); - struct TCP_Server_Info *server = req->server; + struct TCP_Server_Info *server = rdata->server; int rc = 0; cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n", @@ -266,7 +268,6 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) open_file = file->private_data; rreq->netfs_priv = file->private_data; req->cfile = cifsFileInfo_get(open_file); - req->server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) req->pid = req->cfile->pid; } else if (rreq->origin != NETFS_WRITEBACK) { -- GitLab From 06ea2c9c4163b8a8fde890a9e21d1059f22bb76d Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 9 Feb 2025 20:07:55 +0000 Subject: [PATCH 0526/1585] rxrpc: Fix alteration of headers whilst zerocopy pending rxrpc: Fix alteration of headers whilst zerocopy pending AF_RXRPC now uses MSG_SPLICE_PAGES to do zerocopy of the DATA packets when it transmits them, but to reduce the number of descriptors required in the DMA ring, it allocates a space for the protocol header in the memory immediately before the data content so that it can include both in a single descriptor. This is used for either the main RX header or the smaller jumbo subpacket header as appropriate: +----+------+ | RX | | +-+--+DATA | |JH| | +--+------+ Now, when it stitches a large jumbo packet together from a number of individual DATA packets (each of which is 1412 bytes of data), it uses the full RX header from the first and then the jumbo subpacket header for the rest of the components: +---+--+------+--+------+--+------+--+------+--+------+--+------+ |UDP|RX|DATA |JH|DATA |JH|DATA |JH|DATA |JH|DATA |JH|DATA | +---+--+------+--+------+--+------+--+------+--+------+--+------+ As mentioned, the main RX header and the jumbo header overlay one another in memory and the formats don't match, so switching from one to the other means rearranging the fields and adjusting the flags. However, now that TLP has been included, it wants to retransmit the last subpacket as a new data packet on its own, which means switching between the header formats... and if the transmission is still pending, because of the MSG_SPLICE_PAGES, we end up corrupting the jumbo subheader. This has a variety of effects, with the RX service number overwriting the jumbo checksum/key number field and the RX checksum overwriting the jumbo flags - resulting in, at the very least, a confused connection-level abort from the peer. Fix this by leaving the jumbo header in the allocation with the data, but allocating the RX header from the page frag allocator and concocting it on the fly at the point of transmission as it does for ACK packets. Fixes: 7c482665931b ("rxrpc: Implement RACK/TLP to deal with transmission stalls [RFC8985]") Signed-off-by: David Howells cc: Marc Dionne cc: Chuck Lever cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2181712.1739131675@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 7 +++--- net/rxrpc/output.c | 50 ++++++++++++++++++++++++++++------------- net/rxrpc/rxkad.c | 13 +++++------ net/rxrpc/sendmsg.c | 4 +--- net/rxrpc/txbuf.c | 37 +++++++++--------------------- 5 files changed, 54 insertions(+), 57 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index f251845fe532c..5e740c4862034 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -327,8 +327,8 @@ struct rxrpc_local { * packet with a maximum set of jumbo subpackets or a PING ACK padded * out to 64K with zeropages for PMTUD. */ - struct kvec kvec[RXRPC_MAX_NR_JUMBO > 3 + 16 ? - RXRPC_MAX_NR_JUMBO : 3 + 16]; + struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? + 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; }; /* @@ -874,8 +874,7 @@ struct rxrpc_txbuf { #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ __be16 cksum; /* Checksum to go in header */ bool jumboable; /* Can be non-terminal jumbo subpacket */ - u8 nr_kvec; /* Amount of kvec[] used */ - struct kvec kvec[1]; + void *data; /* Data with preceding jumbo header */ }; static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 6f7a125d6e908..95905b85a8d71 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -428,13 +428,13 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_send_data_req *req, struct rxrpc_txbuf *txb, + struct rxrpc_wire_header *whdr, rxrpc_serial_t serial, int subpkt) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxrpc_jumbo_header *jumbo = (void *)(whdr + 1) - sizeof(*jumbo); + struct rxrpc_jumbo_header *jumbo = txb->data - sizeof(*jumbo); enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; - struct kvec *kv = &call->local->kvec[subpkt]; + struct kvec *kv = &call->local->kvec[1 + subpkt]; size_t len = txb->pkt_len; bool last; u8 flags; @@ -491,18 +491,15 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, } dont_set_request_ack: - /* The jumbo header overlays the wire header in the txbuf. */ + /* There's a jumbo header prepended to the data if we need it. */ if (subpkt < req->n - 1) flags |= RXRPC_JUMBO_PACKET; else flags &= ~RXRPC_JUMBO_PACKET; if (subpkt == 0) { whdr->flags = flags; - whdr->serial = htonl(txb->serial); whdr->cksum = txb->cksum; - whdr->serviceId = htons(conn->service_id); - kv->iov_base = whdr; - len += sizeof(*whdr); + kv->iov_base = txb->data; } else { jumbo->flags = flags; jumbo->pad = 0; @@ -535,7 +532,9 @@ static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq, /* * Prepare a (jumbo) packet for transmission. */ -static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) +static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, + struct rxrpc_send_data_req *req, + struct rxrpc_wire_header *whdr) { struct rxrpc_txqueue *tq = req->tq; rxrpc_serial_t serial; @@ -549,6 +548,18 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se /* Each transmission of a Tx packet needs a new serial number */ serial = rxrpc_get_next_serials(call->conn, req->n); + whdr->epoch = htonl(call->conn->proto.epoch); + whdr->cid = htonl(call->cid); + whdr->callNumber = htonl(call->call_id); + whdr->seq = htonl(seq); + whdr->serial = htonl(serial); + whdr->type = RXRPC_PACKET_TYPE_DATA; + whdr->flags = 0; + whdr->userStatus = 0; + whdr->securityIndex = call->security_ix; + whdr->_rsvd = 0; + whdr->serviceId = htons(call->conn->service_id); + call->tx_last_serial = serial + req->n - 1; call->tx_last_sent = req->now; xmit_ts = rxrpc_prepare_txqueue(tq, req); @@ -576,7 +587,7 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se if (i + 1 == req->n) /* Only sample the last subpacket in a jumbo. */ __set_bit(ix, &tq->rtt_samples); - len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i); + len += rxrpc_prepare_data_subpacket(call, req, txb, whdr, serial, i); serial++; seq++; i++; @@ -618,6 +629,7 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se } rxrpc_set_keepalive(call, req->now); + page_frag_free(whdr); return len; } @@ -626,25 +638,33 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se */ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) { + struct rxrpc_wire_header *whdr; struct rxrpc_connection *conn = call->conn; enum rxrpc_tx_point frag; struct rxrpc_txqueue *tq = req->tq; struct rxrpc_txbuf *txb; struct msghdr msg; rxrpc_seq_t seq = req->seq; - size_t len; + size_t len = sizeof(*whdr); bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); int ret, stat_ix; _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1); + whdr = page_frag_alloc(&call->local->tx_alloc, sizeof(*whdr), GFP_NOFS); + if (!whdr) + return; /* Drop the packet if no memory. */ + + call->local->kvec[0].iov_base = whdr; + call->local->kvec[0].iov_len = sizeof(*whdr); + stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1; atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]); - len = rxrpc_prepare_data_packet(call, req); + len += rxrpc_prepare_data_packet(call, req, whdr); txb = tq->bufs[seq & RXRPC_TXQ_MASK]; - iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, req->n, len); + iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1 + req->n, len); msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -695,13 +715,13 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req if (ret == -EMSGSIZE) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize); - trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag); + trace_rxrpc_tx_packet(call->debug_id, whdr, frag); ret = 0; } else if (ret < 0) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag); } else { - trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag); + trace_rxrpc_tx_packet(call->debug_id, whdr, frag); } rxrpc_tx_backoff(call, ret); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 62b09d23ec08c..6cb37b0eb77f4 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -257,8 +257,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, struct rxrpc_txbuf *txb, struct skcipher_request *req) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxkad_level1_hdr *hdr = (void *)(whdr + 1); + struct rxkad_level1_hdr *hdr = txb->data; struct rxrpc_crypt iv; struct scatterlist sg; size_t pad; @@ -274,7 +273,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, pad = RXKAD_ALIGN - pad; pad &= RXKAD_ALIGN - 1; if (pad) { - memset(txb->kvec[0].iov_base + txb->offset, 0, pad); + memset(txb->data + txb->offset, 0, pad); txb->pkt_len += pad; } @@ -300,8 +299,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct skcipher_request *req) { const struct rxrpc_key_token *token; - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxkad_level2_hdr *rxkhdr = (void *)(whdr + 1); + struct rxkad_level2_hdr *rxkhdr = txb->data; struct rxrpc_crypt iv; struct scatterlist sg; size_t content, pad; @@ -319,7 +317,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, txb->pkt_len = round_up(content, RXKAD_ALIGN); pad = txb->pkt_len - content; if (pad) - memset(txb->kvec[0].iov_base + txb->offset, 0, pad); + memset(txb->data + txb->offset, 0, pad); /* encrypt from the session key */ token = call->conn->key->payload.data[0]; @@ -407,9 +405,8 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) /* Clear excess space in the packet */ if (txb->pkt_len < txb->alloc_size) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; size_t gap = txb->alloc_size - txb->pkt_len; - void *p = whdr + 1; + void *p = txb->data; memset(p + txb->pkt_len, 0, gap); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 584397aba4a07..84dc6c94f23b1 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -419,7 +419,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, size_t copy = umin(txb->space, msg_data_left(msg)); _debug("add %zu", copy); - if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset, + if (!copy_from_iter_full(txb->data + txb->offset, copy, &msg->msg_iter)) goto efault; _debug("added"); @@ -445,8 +445,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, ret = call->security->secure_packet(call, txb); if (ret < 0) goto out; - - txb->kvec[0].iov_len += txb->len; rxrpc_queue_packet(rx, call, txb, notify_end_tx); txb = NULL; } diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 131d9e55c8e97..c550991d48faa 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -19,17 +19,19 @@ atomic_t rxrpc_nr_txbuf; struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, size_t data_align, gfp_t gfp) { - struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; - size_t total, hoff; + size_t total, doff, jsize = sizeof(struct rxrpc_jumbo_header); void *buf; txb = kzalloc(sizeof(*txb), gfp); if (!txb) return NULL; - hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); - total = hoff + sizeof(*whdr) + data_size; + /* We put a jumbo header in the buffer, but not a full wire header to + * avoid delayed-corruption problems with zerocopy. + */ + doff = round_up(jsize, data_align); + total = doff + data_size; data_align = umax(data_align, L1_CACHE_BYTES); mutex_lock(&call->conn->tx_data_alloc_lock); @@ -41,30 +43,15 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ return NULL; } - whdr = buf + hoff; - refcount_set(&txb->ref, 1); txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); txb->alloc_size = data_size; txb->space = data_size; - txb->offset = sizeof(*whdr); + txb->offset = 0; txb->flags = call->conn->out_clientflag; txb->seq = call->send_top + 1; - txb->nr_kvec = 1; - txb->kvec[0].iov_base = whdr; - txb->kvec[0].iov_len = sizeof(*whdr); - - whdr->epoch = htonl(call->conn->proto.epoch); - whdr->cid = htonl(call->cid); - whdr->callNumber = htonl(call->call_id); - whdr->seq = htonl(txb->seq); - whdr->type = RXRPC_PACKET_TYPE_DATA; - whdr->flags = 0; - whdr->userStatus = 0; - whdr->securityIndex = call->security_ix; - whdr->_rsvd = 0; - whdr->serviceId = htons(call->dest_srx.srx_service); + txb->data = buf + doff; trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, rxrpc_txbuf_alloc_data); @@ -90,14 +77,10 @@ void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb) { - int i; - trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0, rxrpc_txbuf_free); - for (i = 0; i < txb->nr_kvec; i++) - if (txb->kvec[i].iov_base && - !is_zero_pfn(page_to_pfn(virt_to_page(txb->kvec[i].iov_base)))) - page_frag_free(txb->kvec[i].iov_base); + if (txb->data) + page_frag_free(txb->data); kfree(txb); atomic_dec(&rxrpc_nr_txbuf); } -- GitLab From e589adf5b70c07b1ab974d077046fdbf583b2f36 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 6 Feb 2025 09:51:08 -0800 Subject: [PATCH 0527/1585] iavf: Fix a locking bug in an error path If the netdev lock has been obtained, unlock it before returning. This bug has been detected by the Clang thread-safety analyzer. Fixes: afc664987ab3 ("eth: iavf: extend the netdev_lock usage") Signed-off-by: Bart Van Assche Link: https://patch.msgid.link/20250206175114.1974171-28-bvanassche@acm.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2d7a18fcc3be4..852e5b62f0a5d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2903,8 +2903,8 @@ static void iavf_watchdog_task(struct work_struct *work) } mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); restart_watchdog: + netdev_unlock(netdev); if (adapter->state >= __IAVF_DOWN) queue_work(adapter->wq, &adapter->adminq_task); if (adapter->aq_required) -- GitLab From 59115e2e25f42924181055ed7cc1d123af7598b7 Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Thu, 16 Jan 2025 06:12:24 +0000 Subject: [PATCH 0528/1585] x86/hyperv/vtl: Stop kernel from probing VTL0 low memory For Linux, running in Hyper-V VTL (Virtual Trust Level), kernel in VTL2 tries to access VTL0 low memory in probe_roms. This memory is not described in the e820 map. Initialize probe_roms call to no-ops during boot for VTL2 kernel to avoid this. The issue got identified in OpenVMM which detects invalid accesses initiated from kernel running in VTL2. Co-developed-by: Saurabh Sengar Signed-off-by: Saurabh Sengar Signed-off-by: Naman Jain Tested-by: Roman Kisel Reviewed-by: Roman Kisel Link: https://lore.kernel.org/r/20250116061224.1701-1-namjain@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250116061224.1701-1-namjain@linux.microsoft.com> --- arch/x86/hyperv/hv_vtl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 4e1b1e3b56584..3f4e20d7b724b 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -30,6 +30,7 @@ void __init hv_vtl_init_platform(void) x86_platform.realmode_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; -- GitLab From 174448badb4409491bfba2e6b46f7aa078741c5e Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 12 Feb 2025 14:40:46 +0800 Subject: [PATCH 0529/1585] ALSA: hda/realtek: Fixup ALC225 depop procedure Headset MIC will no function when power_save=0. Fixes: 1fd50509fe14 ("ALSA: hda/realtek: Update ALC225 depop procedure") Link: https://bugzilla.kernel.org/show_bug.cgi?id=219743 Signed-off-by: Kailang Yang Link: https://lore.kernel.org/0474a095ab0044d0939ec4bf4362423d@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ae0beb52e7b0c..224616fbec4fa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3788,6 +3788,7 @@ static void alc225_init(struct hda_codec *codec) AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); msleep(75); + alc_update_coef_idx(codec, 0x4a, 3 << 10, 0); alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x4); /* Hight power */ } } -- GitLab From 8743d66979e494c5378563e6b5a32e913380abd8 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 11 Feb 2025 14:32:01 -0600 Subject: [PATCH 0530/1585] gpiolib: acpi: Add a quirk for Acer Nitro ANV14 Spurious immediate wake up events are reported on Acer Nitro ANV14. GPIO 11 is specified as an edge triggered input and also a wake source but this pin is supposed to be an output pin for an LED, so it's effectively floating. Block the interrupt from getting set up for this GPIO on this device. Cc: stable@vger.kernel.org Reported-by: Delgan Tested-by: Delgan Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3954 Signed-off-by: Mario Limonciello Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20250211203222.761206-1-superm1@kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 1f9fe50bba005..f7746c57ba76a 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1689,6 +1689,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_wake = "PNP0C50:00@8", }, }, + { + /* + * Spurious wakeups from GPIO 11 + * Found in BIOS 1.04 + * https://gitlab.freedesktop.org/drm/amd/-/issues/3954 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "Acer Nitro V 14"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_interrupt = "AMDI0030:00@11", + }, + }, {} /* Terminating entry */ }; -- GitLab From d262a192d38e527faa5984629aabda2e0d1c4f54 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 12 Feb 2025 07:46:28 +0100 Subject: [PATCH 0531/1585] powerpc/code-patching: Fix KASAN hit by not flagging text patching area as VM_ALLOC Erhard reported the following KASAN hit while booting his PowerMac G4 with a KASAN-enabled kernel 6.13-rc6: BUG: KASAN: vmalloc-out-of-bounds in copy_to_kernel_nofault+0xd8/0x1c8 Write of size 8 at addr f1000000 by task chronyd/1293 CPU: 0 UID: 123 PID: 1293 Comm: chronyd Tainted: G W 6.13.0-rc6-PMacG4 #2 Tainted: [W]=WARN Hardware name: PowerMac3,6 7455 0x80010303 PowerMac Call Trace: [c2437590] [c1631a84] dump_stack_lvl+0x70/0x8c (unreliable) [c24375b0] [c0504998] print_report+0xdc/0x504 [c2437610] [c050475c] kasan_report+0xf8/0x108 [c2437690] [c0505a3c] kasan_check_range+0x24/0x18c [c24376a0] [c03fb5e4] copy_to_kernel_nofault+0xd8/0x1c8 [c24376c0] [c004c014] patch_instructions+0x15c/0x16c [c2437710] [c00731a8] bpf_arch_text_copy+0x60/0x7c [c2437730] [c0281168] bpf_jit_binary_pack_finalize+0x50/0xac [c2437750] [c0073cf4] bpf_int_jit_compile+0xb30/0xdec [c2437880] [c0280394] bpf_prog_select_runtime+0x15c/0x478 [c24378d0] [c1263428] bpf_prepare_filter+0xbf8/0xc14 [c2437990] [c12677ec] bpf_prog_create_from_user+0x258/0x2b4 [c24379d0] [c027111c] do_seccomp+0x3dc/0x1890 [c2437ac0] [c001d8e0] system_call_exception+0x2dc/0x420 [c2437f30] [c00281ac] ret_from_syscall+0x0/0x2c --- interrupt: c00 at 0x5a1274 NIP: 005a1274 LR: 006a3b3c CTR: 005296c8 REGS: c2437f40 TRAP: 0c00 Tainted: G W (6.13.0-rc6-PMacG4) MSR: 0200f932 CR: 24004422 XER: 00000000 GPR00: 00000166 af8f3fa0 a7ee3540 00000001 00000000 013b6500 005a5858 0200f932 GPR08: 00000000 00001fe9 013d5fc8 005296c8 2822244c 00b2fcd8 00000000 af8f4b57 GPR16: 00000000 00000001 00000000 00000000 00000000 00000001 00000000 00000002 GPR24: 00afdbb0 00000000 00000000 00000000 006e0004 013ce060 006e7c1c 00000001 NIP [005a1274] 0x5a1274 LR [006a3b3c] 0x6a3b3c --- interrupt: c00 The buggy address belongs to the virtual mapping at [f1000000, f1002000) created by: text_area_cpu_up+0x20/0x190 The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:00000000 index:0x0 pfn:0x76e30 flags: 0x80000000(zone=2) raw: 80000000 00000000 00000122 00000000 00000000 00000000 ffffffff 00000001 raw: 00000000 page dumped because: kasan: bad access detected Memory state around the buggy address: f0ffff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f0ffff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >f1000000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ^ f1000080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f1000100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ================================================================== f8 corresponds to KASAN_VMALLOC_INVALID which means the area is not initialised hence not supposed to be used yet. Powerpc text patching infrastructure allocates a virtual memory area using get_vm_area() and flags it as VM_ALLOC. But that flag is meant to be used for vmalloc() and vmalloc() allocated memory is not supposed to be used before a call to __vmalloc_node_range() which is never called for that area. That went undetected until commit e4137f08816b ("mm, kasan, kmsan: instrument copy_from/to_kernel_nofault") The area allocated by text_area_cpu_up() is not vmalloc memory, it is mapped directly on demand when needed by map_kernel_page(). There is no VM flag corresponding to such usage, so just pass no flag. That way the area will be unpoisonned and usable immediately. Reported-by: Erhard Furtner Closes: https://lore.kernel.org/all/20250112135832.57c92322@yea/ Fixes: 37bc3e5fd764 ("powerpc/lib/code-patching: Use alternate map for patch_instruction()") Signed-off-by: Christophe Leroy Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/06621423da339b374f48c0886e3a5db18e896be8.1739342693.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/code-patching.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 81c0f673eb252..f84e0337cc029 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -108,7 +108,7 @@ static int text_area_cpu_up(unsigned int cpu) unsigned long addr; int err; - area = get_vm_area(PAGE_SIZE, VM_ALLOC); + area = get_vm_area(PAGE_SIZE, 0); if (!area) { WARN_ONCE(1, "Failed to create text area for cpu %d\n", cpu); -- GitLab From cf56aa8dd26328a9af4ffe7fb0bd8fcfa9407112 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 7 Feb 2025 13:25:57 +0100 Subject: [PATCH 0532/1585] Revert "netfilter: flowtable: teardown flow if cached mtu is stale" This reverts commit b8baac3b9c5cc4b261454ff87d75ae8306016ffd. IPv4 packets with no DF flag set on result in frequent flow entry teardown cycles, this is visible in the network topology that is used in the nft_flowtable.sh test. nft_flowtable.sh test ocassionally fails reporting that the dscp_fwd test sees no packets going through the flowtable path. Fixes: b8baac3b9c5c ("netfilter: flowtable: teardown flow if cached mtu is stale") Reported-by: Jakub Kicinski Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 97c6eb8847a02..8cd4cf7ae2112 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -381,10 +381,8 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; - if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) { - flow_offload_teardown(flow); + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) return 0; - } iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); thoff = (iph->ihl * 4) + ctx->offset; @@ -662,10 +660,8 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; - if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) { - flow_offload_teardown(flow); + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) return 0; - } ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); thoff = sizeof(*ip6h) + ctx->offset; -- GitLab From b9644fbfbcab13da7f8b37bef7c51e5b8407d031 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Wed, 12 Feb 2025 10:18:49 +0800 Subject: [PATCH 0533/1585] gpio: stmpe: Check return value of stmpe_reg_read in stmpe_gpio_irq_sync_unlock The stmpe_reg_read function can fail, but its return value is not checked in stmpe_gpio_irq_sync_unlock. This can lead to silent failures and incorrect behavior if the hardware access fails. This patch adds checks for the return value of stmpe_reg_read. If the function fails, an error message is logged and the function returns early to avoid further issues. Fixes: b888fb6f2a27 ("gpio: stmpe: i2c transfer are forbiden in atomic context") Cc: stable@vger.kernel.org # 4.16+ Signed-off-by: Wentao Liang Link: https://lore.kernel.org/r/20250212021849.275-1-vulab@iscas.ac.cn Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-stmpe.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index 75a3633ceddbb..222279a9d82b2 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -191,7 +191,7 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) [REG_IE][CSB] = STMPE_IDX_IEGPIOR_CSB, [REG_IE][MSB] = STMPE_IDX_IEGPIOR_MSB, }; - int i, j; + int ret, i, j; /* * STMPE1600: to be able to get IRQ from pins, @@ -199,8 +199,16 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) * GPSR or GPCR registers */ if (stmpe->partnum == STMPE1600) { - stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_LSB]); - stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_CSB]); + ret = stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_LSB]); + if (ret < 0) { + dev_err(stmpe->dev, "Failed to read GPMR_LSB: %d\n", ret); + goto err; + } + ret = stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_CSB]); + if (ret < 0) { + dev_err(stmpe->dev, "Failed to read GPMR_CSB: %d\n", ret); + goto err; + } } for (i = 0; i < CACHE_NR_REGS; i++) { @@ -222,6 +230,7 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) } } +err: mutex_unlock(&stmpe_gpio->irq_lock); } -- GitLab From 56d5f3eba3f5de0efdd556de4ef381e109b973a9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 11 Feb 2025 18:15:59 +0100 Subject: [PATCH 0534/1585] acct: perform last write from workqueue In [1] it was reported that the acct(2) system call can be used to trigger NULL deref in cases where it is set to write to a file that triggers an internal lookup. This can e.g., happen when pointing acc(2) to /sys/power/resume. At the point the where the write to this file happens the calling task has already exited and called exit_fs(). A lookup will thus trigger a NULL-deref when accessing current->fs. Reorganize the code so that the the final write happens from the workqueue but with the caller's credentials. This preserves the (strange) permission model and has almost no regression risk. This api should stop to exist though. Link: https://lore.kernel.org/r/20250127091811.3183623-1-quzicheng@huawei.com [1] Link: https://lore.kernel.org/r/20250211-work-acct-v1-1-1c16aecab8b3@kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Zicheng Qu Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- kernel/acct.c | 120 +++++++++++++++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 50 deletions(-) diff --git a/kernel/acct.c b/kernel/acct.c index 31222e8cd534f..48283efe8a123 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -103,48 +103,50 @@ struct bsd_acct_struct { atomic_long_t count; struct rcu_head rcu; struct mutex lock; - int active; + bool active; + bool check_space; unsigned long needcheck; struct file *file; struct pid_namespace *ns; struct work_struct work; struct completion done; + acct_t ac; }; -static void do_acct_process(struct bsd_acct_struct *acct); +static void fill_ac(struct bsd_acct_struct *acct); +static void acct_write_process(struct bsd_acct_struct *acct); /* * Check the amount of free space and suspend/resume accordingly. */ -static int check_free_space(struct bsd_acct_struct *acct) +static bool check_free_space(struct bsd_acct_struct *acct) { struct kstatfs sbuf; - if (time_is_after_jiffies(acct->needcheck)) - goto out; + if (!acct->check_space) + return acct->active; /* May block */ if (vfs_statfs(&acct->file->f_path, &sbuf)) - goto out; + return acct->active; if (acct->active) { u64 suspend = sbuf.f_blocks * SUSPEND; do_div(suspend, 100); if (sbuf.f_bavail <= suspend) { - acct->active = 0; + acct->active = false; pr_info("Process accounting paused\n"); } } else { u64 resume = sbuf.f_blocks * RESUME; do_div(resume, 100); if (sbuf.f_bavail >= resume) { - acct->active = 1; + acct->active = true; pr_info("Process accounting resumed\n"); } } acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; -out: return acct->active; } @@ -189,7 +191,11 @@ static void acct_pin_kill(struct fs_pin *pin) { struct bsd_acct_struct *acct = to_acct(pin); mutex_lock(&acct->lock); - do_acct_process(acct); + /* + * Fill the accounting struct with the exiting task's info + * before punting to the workqueue. + */ + fill_ac(acct); schedule_work(&acct->work); wait_for_completion(&acct->done); cmpxchg(&acct->ns->bacct, pin, NULL); @@ -202,6 +208,9 @@ static void close_work(struct work_struct *work) { struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); struct file *file = acct->file; + + /* We were fired by acct_pin_kill() which holds acct->lock. */ + acct_write_process(acct); if (file->f_op->flush) file->f_op->flush(file, NULL); __fput_sync(file); @@ -430,13 +439,27 @@ static u32 encode_float(u64 value) * do_exit() or when switching to a different output file. */ -static void fill_ac(acct_t *ac) +static void fill_ac(struct bsd_acct_struct *acct) { struct pacct_struct *pacct = ¤t->signal->pacct; + struct file *file = acct->file; + acct_t *ac = &acct->ac; u64 elapsed, run_time; time64_t btime; struct tty_struct *tty; + lockdep_assert_held(&acct->lock); + + if (time_is_after_jiffies(acct->needcheck)) { + acct->check_space = false; + + /* Don't fill in @ac if nothing will be written. */ + if (!acct->active) + return; + } else { + acct->check_space = true; + } + /* * Fill the accounting struct with the needed info as recorded * by the different kernel functions. @@ -484,64 +507,61 @@ static void fill_ac(acct_t *ac) ac->ac_majflt = encode_comp_t(pacct->ac_majflt); ac->ac_exitcode = pacct->ac_exitcode; spin_unlock_irq(¤t->sighand->siglock); -} -/* - * do_acct_process does all actual work. Caller holds the reference to file. - */ -static void do_acct_process(struct bsd_acct_struct *acct) -{ - acct_t ac; - unsigned long flim; - const struct cred *orig_cred; - struct file *file = acct->file; - - /* - * Accounting records are not subject to resource limits. - */ - flim = rlimit(RLIMIT_FSIZE); - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; - /* Perform file operations on behalf of whoever enabled accounting */ - orig_cred = override_creds(file->f_cred); - /* - * First check to see if there is enough free_space to continue - * the process accounting system. - */ - if (!check_free_space(acct)) - goto out; - - fill_ac(&ac); /* we really need to bite the bullet and change layout */ - ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); - ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); + ac->ac_uid = from_kuid_munged(file->f_cred->user_ns, current_uid()); + ac->ac_gid = from_kgid_munged(file->f_cred->user_ns, current_gid()); #if ACCT_VERSION == 1 || ACCT_VERSION == 2 /* backward-compatible 16 bit fields */ - ac.ac_uid16 = ac.ac_uid; - ac.ac_gid16 = ac.ac_gid; + ac->ac_uid16 = ac->ac_uid; + ac->ac_gid16 = ac->ac_gid; #elif ACCT_VERSION == 3 { struct pid_namespace *ns = acct->ns; - ac.ac_pid = task_tgid_nr_ns(current, ns); + ac->ac_pid = task_tgid_nr_ns(current, ns); rcu_read_lock(); - ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), - ns); + ac->ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); rcu_read_unlock(); } #endif +} + +static void acct_write_process(struct bsd_acct_struct *acct) +{ + struct file *file = acct->file; + const struct cred *cred; + acct_t *ac = &acct->ac; + + /* Perform file operations on behalf of whoever enabled accounting */ + cred = override_creds(file->f_cred); + /* - * Get freeze protection. If the fs is frozen, just skip the write - * as we could deadlock the system otherwise. + * First check to see if there is enough free_space to continue + * the process accounting system. Then get freeze protection. If + * the fs is frozen, just skip the write as we could deadlock + * the system otherwise. */ - if (file_start_write_trylock(file)) { + if (check_free_space(acct) && file_start_write_trylock(file)) { /* it's been opened O_APPEND, so position is irrelevant */ loff_t pos = 0; - __kernel_write(file, &ac, sizeof(acct_t), &pos); + __kernel_write(file, ac, sizeof(acct_t), &pos); file_end_write(file); } -out: + + revert_creds(cred); +} + +static void do_acct_process(struct bsd_acct_struct *acct) +{ + unsigned long flim; + + /* Accounting records are not subject to resource limits. */ + flim = rlimit(RLIMIT_FSIZE); + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + fill_ac(acct); + acct_write_process(acct); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; - revert_creds(orig_cred); } /** -- GitLab From 890ed45bde808c422c3c27d3285fc45affa0f930 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 11 Feb 2025 18:16:00 +0100 Subject: [PATCH 0535/1585] acct: block access to kernel internal filesystems There's no point in allowing anything kernel internal nor procfs or sysfs. Link: https://lore.kernel.org/r/20250127091811.3183623-1-quzicheng@huawei.com Link: https://lore.kernel.org/r/20250211-work-acct-v1-2-1c16aecab8b3@kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Amir Goldstein Reported-by: Zicheng Qu Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- kernel/acct.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/acct.c b/kernel/acct.c index 48283efe8a123..6520baa136693 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -243,6 +243,20 @@ static int acct_on(struct filename *pathname) return -EACCES; } + /* Exclude kernel kernel internal filesystems. */ + if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) { + kfree(acct); + filp_close(file, NULL); + return -EINVAL; + } + + /* Exclude procfs and sysfs. */ + if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) { + kfree(acct); + filp_close(file, NULL); + return -EINVAL; + } + if (!(file->f_mode & FMODE_CAN_WRITE)) { kfree(acct); filp_close(file, NULL); -- GitLab From b3e127dacad60a384c92baafdc74f1508bf7dd47 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Tue, 11 Feb 2025 12:36:11 -0500 Subject: [PATCH 0536/1585] platform/x86: thinkpad_acpi: Fix registration of tpacpi platform driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent platform profile changes prevent the tpacpi platform driver from registering. This error is seen in the kernel logs, and the various tpacpi entries are not created: [ 7550.642171] platform thinkpad_acpi: Resources present before probing This happens because devm_platform_profile_register() is called before tpacpi_pdev probes (thanks to Kurt Borja for identifying the root cause). For now revert back to the old platform_profile_register to fix the issue. This is quick fix and will be re-implemented later as more testing is needed for full solution. Tested on X1 Carbon G12. Fixes: 31658c916fa6 ("platform/x86: thinkpad_acpi: Use devm_platform_profile_register()") Signed-off-by: Mark Pearson Reviewed-by: Kurt Borja Link: https://lore.kernel.org/r/20250211173620.16522-1-mpearson-lenovo@squebb.ca Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 99cdea723d32c..72a10ed2017ce 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10668,8 +10668,8 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) "DYTC version %d: thermal mode available\n", dytc_version); /* Create platform_profile structure and register */ - tpacpi_pprof = devm_platform_profile_register(&tpacpi_pdev->dev, "thinkpad-acpi", - NULL, &dytc_profile_ops); + tpacpi_pprof = platform_profile_register(&tpacpi_pdev->dev, "thinkpad-acpi-profile", + NULL, &dytc_profile_ops); /* * If for some reason platform_profiles aren't enabled * don't quit terminally. @@ -10687,8 +10687,15 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) return 0; } +static void dytc_profile_exit(void) +{ + if (!IS_ERR_OR_NULL(tpacpi_pprof)) + platform_profile_remove(tpacpi_pprof); +} + static struct ibm_struct dytc_profile_driver_data = { .name = "dytc-profile", + .exit = dytc_profile_exit, }; /************************************************************************* -- GitLab From e977499820782ab1c69f354d9f41b6d9ad1f43d9 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 10 Feb 2025 15:36:54 +0100 Subject: [PATCH 0537/1585] drm/xe: Carve out wopcm portion from the stolen memory The top of stolen memory is WOPCM, which shouldn't be accessed. Remove this portion from the stolen memory region for discrete platforms. This was already done for integrated, but was missing for discrete platforms. This also moves get_wopcm_size() so detect_bar2_dgfx() and detect_bar2_integrated can use the same function. v2: Improve commit message and suitable stable version tag(Lucas) Fixes: d8b52a02cb40 ("drm/xe: Implement stolen memory.") Cc: Maarten Lankhorst Cc: Matthew Auld Cc: Lucas De Marchi Cc: stable@vger.kernel.org # v6.11+ Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250210143654.2076747-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das (cherry picked from commit 2c7f45cc7e197a792ce5c693e56ea48f60b312da) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 54 ++++++++++++++------------ 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 423856cc18d40..d414421f8c131 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -57,12 +57,35 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); } +static u32 get_wopcm_size(struct xe_device *xe) +{ + u32 wopcm_size; + u64 val; + + val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED); + val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); + + switch (val) { + case 0x5 ... 0x6: + val--; + fallthrough; + case 0x0 ... 0x3: + wopcm_size = (1U << val) * SZ_1M; + break; + default: + WARN(1, "Missing case wopcm_size=%llx\n", val); + wopcm_size = 0; + } + + return wopcm_size; +} + static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - u64 stolen_size; + u64 stolen_size, wopcm_size; u64 tile_offset; u64 tile_size; @@ -74,7 +97,13 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) return 0; + /* Carve out the top of DSM as it contains the reserved WOPCM region */ + wopcm_size = get_wopcm_size(xe); + if (drm_WARN_ON(&xe->drm, !wopcm_size)) + return 0; + stolen_size = tile_size - mgr->stolen_base; + stolen_size -= wopcm_size; /* Verify usage fits in the actual resource available */ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) @@ -89,29 +118,6 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) return ALIGN_DOWN(stolen_size, SZ_1M); } -static u32 get_wopcm_size(struct xe_device *xe) -{ - u32 wopcm_size; - u64 val; - - val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED); - val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); - - switch (val) { - case 0x5 ... 0x6: - val--; - fallthrough; - case 0x0 ... 0x3: - wopcm_size = (1U << val) * SZ_1M; - break; - default: - WARN(1, "Missing case wopcm_size=%llx\n", val); - wopcm_size = 0; - } - - return wopcm_size; -} - static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); -- GitLab From 06521ac0485effdcc9c792cb0b40ed8e6f2f5fb8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 12 Feb 2025 13:33:24 +0000 Subject: [PATCH 0538/1585] io_uring/waitid: don't abuse io_tw_state struct io_tw_state is managed by core io_uring, and opcode handling code must never try to cheat and create their own instances, it's plain incorrect. io_waitid_complete() attempts exactly that outside of the task work context, and even though the ring is locked, there would be no one to reap the requests from the defer completion list. It only works now because luckily it's called before io_uring_try_cancel_uring_cmd(), which flushes completions. Fixes: f31ecf671ddc4 ("io_uring: add IORING_OP_WAITID support") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/waitid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index 853e97a7b0ecb..c4096d93a2870 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -118,7 +118,6 @@ static int io_waitid_finish(struct io_kiocb *req, int ret) static void io_waitid_complete(struct io_kiocb *req, int ret) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); - struct io_tw_state ts = {}; /* anyone completing better be holding a reference */ WARN_ON_ONCE(!(atomic_read(&iw->refs) & IO_WAITID_REF_MASK)); @@ -131,7 +130,6 @@ static void io_waitid_complete(struct io_kiocb *req, int ret) if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); - io_req_task_complete(req, &ts); } static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) @@ -153,6 +151,7 @@ static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) list_del_init(&iwa->wo.child_wait.entry); spin_unlock_irq(&iw->head->lock); io_waitid_complete(req, -ECANCELED); + io_req_queue_tw_complete(req, -ECANCELED); return true; } @@ -258,6 +257,7 @@ static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts) } io_waitid_complete(req, ret); + io_req_task_complete(req, ts); } static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode, -- GitLab From 8802766324e1f5d414a81ac43365c20142e85603 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 12 Feb 2025 13:46:46 +0000 Subject: [PATCH 0539/1585] io_uring/kbuf: reallocate buf lists on upgrade IORING_REGISTER_PBUF_RING can reuse an old struct io_buffer_list if it was created for legacy selected buffer and has been emptied. It violates the requirement that most of the field should stay stable after publish. Always reallocate it instead. Cc: stable@vger.kernel.org Reported-by: Pumpkin Chang Fixes: 2fcabce2d7d34 ("io_uring: disallow mixed provided buffer group registrations") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 04bf493eecae0..8e72de7712ac9 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -415,6 +415,13 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) } } +static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) +{ + scoped_guard(mutex, &ctx->mmap_lock) + WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl); + io_put_bl(ctx, bl); +} + int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); @@ -636,12 +643,13 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) /* if mapped buffer ring OR classic exists, don't allow */ if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list)) return -EEXIST; - } else { - free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); - if (!bl) - return -ENOMEM; + io_destroy_bl(ctx, bl); } + free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); + if (!bl) + return -ENOMEM; + mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT; ring_size = flex_array_size(br, bufs, reg.ring_entries); -- GitLab From a8de7f100bb5989d9c3627d3a223ee1c863f3b69 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 17 Jan 2025 16:34:51 -0800 Subject: [PATCH 0540/1585] KVM: x86: Reject Hyper-V's SEND_IPI hypercalls if local APIC isn't in-kernel Advertise support for Hyper-V's SEND_IPI and SEND_IPI_EX hypercalls if and only if the local API is emulated/virtualized by KVM, and explicitly reject said hypercalls if the local APIC is emulated in userspace, i.e. don't rely on userspace to opt-in to KVM_CAP_HYPERV_ENFORCE_CPUID. Rejecting SEND_IPI and SEND_IPI_EX fixes a NULL-pointer dereference if Hyper-V enlightenments are exposed to the guest without an in-kernel local APIC: dump_stack+0xbe/0xfd __kasan_report.cold+0x34/0x84 kasan_report+0x3a/0x50 __apic_accept_irq+0x3a/0x5c0 kvm_hv_send_ipi.isra.0+0x34e/0x820 kvm_hv_hypercall+0x8d9/0x9d0 kvm_emulate_hypercall+0x506/0x7e0 __vmx_handle_exit+0x283/0xb60 vmx_handle_exit+0x1d/0xd0 vcpu_enter_guest+0x16b0/0x24c0 vcpu_run+0xc0/0x550 kvm_arch_vcpu_ioctl_run+0x170/0x6d0 kvm_vcpu_ioctl+0x413/0xb20 __se_sys_ioctl+0x111/0x160 do_syscal1_64+0x30/0x40 entry_SYSCALL_64_after_hwframe+0x67/0xd1 Note, checking the sending vCPU is sufficient, as the per-VM irqchip_mode can't be modified after vCPUs are created, i.e. if one vCPU has an in-kernel local APIC, then all vCPUs have an in-kernel local APIC. Reported-by: Dongjie Zou Fixes: 214ff83d4473 ("KVM: x86: hyperv: implement PV IPI send hypercalls") Fixes: 2bc39970e932 ("x86/kvm/hyper-v: Introduce KVM_GET_SUPPORTED_HV_CPUID") Cc: stable@vger.kernel.org Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20250118003454.2619573-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/hyperv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6a6dd5a84f228..6ebeb6cea6c0d 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2226,6 +2226,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) u32 vector; bool all_cpus; + if (!lapic_in_kernel(vcpu)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (hc->code == HVCALL_SEND_IPI) { if (!hc->fast) { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi, @@ -2852,7 +2855,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; - ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; + if (!vcpu || lapic_in_kernel(vcpu)) + ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; if (evmcs_ver) ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; -- GitLab From 0b6db0dc43eefb4f89181546785c3609fd276524 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 17 Jan 2025 16:34:52 -0800 Subject: [PATCH 0541/1585] KVM: selftests: Mark test_hv_cpuid_e2big() static in Hyper-V CPUID test Make the Hyper-V CPUID test's local helper test_hv_cpuid_e2big() static, it's not used outside of the test (and isn't intended to be). Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20250118003454.2619573-3-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/hyperv_cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 4f5881d4ef66d..9a0fcc7133503 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -111,7 +111,7 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, } } -void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { static struct kvm_cpuid2 cpuid = {.nent = 0}; int ret; -- GitLab From cd5a0c2f0faeb4a3fab3b78f6693a2d55ee51efa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 17 Jan 2025 16:34:53 -0800 Subject: [PATCH 0542/1585] KVM: selftests: Manage CPUID array in Hyper-V CPUID test's core helper Allocate, get, and free the CPUID array in the Hyper-V CPUID test in the test's core helper, instead of copy+pasting code at each call site. In addition to deduplicating a small amount of code, restricting visibility of the array to a single invocation of the core test prevents "leaking" an array across test cases. Passing in @vcpu to the helper will also allow pivoting on VM-scoped information without needing to pass more booleans, e.g. to conditionally assert on features that require an in-kernel APIC. To avoid use-after-free bugs due to overzealous and careless developers, opportunstically add a comment to explain that the system-scoped helper caches the Hyper-V CPUID entries, i.e. that the caller is not responsible for freeing the memory. Cc: Vitaly Kuznetsov Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20250118003454.2619573-4-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/x86/hyperv_cpuid.c | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 9a0fcc7133503..3188749ec6e12 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -41,13 +41,18 @@ static bool smt_possible(void) return res; } -static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, - bool evmcs_expected) +static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) { + const struct kvm_cpuid2 *hv_cpuid_entries; int i; int nent_expected = 10; u32 test_val; + if (vcpu) + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); + else + hv_cpuid_entries = kvm_get_supported_hv_cpuid(); + TEST_ASSERT(hv_cpuid_entries->nent == nent_expected, "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" " (returned %d)", @@ -109,6 +114,13 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, * entry->edx); */ } + + /* + * Note, the CPUID array returned by the system-scoped helper is a one- + * time allocation, i.e. must not be freed. + */ + if (vcpu) + free((void *)hv_cpuid_entries); } static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) @@ -129,7 +141,6 @@ static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) int main(int argc, char *argv[]) { struct kvm_vm *vm; - const struct kvm_cpuid2 *hv_cpuid_entries; struct kvm_vcpu *vcpu; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); @@ -138,10 +149,7 @@ int main(int argc, char *argv[]) /* Test vCPU ioctl version */ test_hv_cpuid_e2big(vm, vcpu); - - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, false); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, false); if (!kvm_cpu_has(X86_FEATURE_VMX) || !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { @@ -149,9 +157,7 @@ int main(int argc, char *argv[]) goto do_sys; } vcpu_enable_evmcs(vcpu); - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, true); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, true); do_sys: /* Test system ioctl version */ @@ -161,9 +167,7 @@ int main(int argc, char *argv[]) } test_hv_cpuid_e2big(vm, NULL); - - hv_cpuid_entries = kvm_get_supported_hv_cpuid(); - test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX)); + test_hv_cpuid(NULL, kvm_cpu_has(X86_FEATURE_VMX)); out: kvm_vm_free(vm); -- GitLab From e36454461c5ebe6372952560b2abad5dc9ac579d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 17 Jan 2025 16:34:54 -0800 Subject: [PATCH 0543/1585] KVM: selftests: Add CPUID tests for Hyper-V features that need in-kernel APIC Add testcases to x86's Hyper-V CPUID test to verify that KVM advertises support for features that require an in-kernel local APIC appropriately, i.e. that KVM hides support from the vCPU-scoped ioctl if the VM doesn't have an in-kernel local APIC. Cc: Vitaly Kuznetsov Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20250118003454.2619573-5-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/hyperv_cpuid.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 3188749ec6e12..4e920705681ae 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -43,6 +43,7 @@ static bool smt_possible(void) static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) { + const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip; const struct kvm_cpuid2 *hv_cpuid_entries; int i; int nent_expected = 10; @@ -85,12 +86,19 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) entry->eax, evmcs_expected ); break; + case 0x40000003: + TEST_ASSERT(has_irqchip || !(entry->edx & BIT(19)), + "\"Direct\" Synthetic Timers should require in-kernel APIC"); + break; case 0x40000004: test_val = entry->eax & (1UL << 18); TEST_ASSERT(!!test_val == !smt_possible(), "NoNonArchitecturalCoreSharing bit" " doesn't reflect SMT setting"); + + TEST_ASSERT(has_irqchip || !(entry->eax & BIT(10)), + "Cluster IPI (i.e. SEND_IPI) should require in-kernel APIC"); break; case 0x4000000A: TEST_ASSERT(entry->eax & (1UL << 19), @@ -145,9 +153,14 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + /* Test the vCPU ioctl without an in-kernel local APIC. */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_hv_cpuid(vcpu, false); + kvm_vm_free(vm); /* Test vCPU ioctl version */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); test_hv_cpuid_e2big(vm, vcpu); test_hv_cpuid(vcpu, false); -- GitLab From 46d6c6f3ef0eaff71c2db6d77d4e2ebb7adac34f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Jan 2025 17:08:25 -0800 Subject: [PATCH 0544/1585] KVM: nSVM: Enter guest mode before initializing nested NPT MMU When preparing vmcb02 for nested VMRUN (or state restore), "enter" guest mode prior to initializing the MMU for nested NPT so that guest_mode is set in the MMU's role. KVM's model is that all L2 MMUs are tagged with guest_mode, as the behavior of hypervisor MMUs tends to be significantly different than kernel MMUs. Practically speaking, the bug is relatively benign, as KVM only directly queries role.guest_mode in kvm_mmu_free_guest_mode_roots() and kvm_mmu_page_ad_need_write_protect(), which SVM doesn't use, and in paths that are optimizations (mmu_page_zap_pte() and shadow_mmu_try_split_huge_pages()). And while the role is incorprated into shadow page usage, because nested NPT requires KVM to be using NPT for L1, reusing shadow pages across L1 and L2 is impossible as L1 MMUs will always have direct=1, while L2 MMUs will have direct=0. Hoist the TLB processing and setting of HF_GUEST_MASK to the beginning of the flow instead of forcing guest_mode in the MMU, as nothing in nested_vmcb02_prepare_control() between the old and new locations touches TLB flush requests or HF_GUEST_MASK, i.e. there's no reason to present inconsistent vCPU state to the MMU. Fixes: 69cb877487de ("KVM: nSVM: move MMU setup to nested_prepare_vmcb_control") Cc: stable@vger.kernel.org Reported-by: Yosry Ahmed Reviewed-by: Yosry Ahmed Link: https://lore.kernel.org/r/20250130010825.220346-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/svm/nested.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 74c20dbb92dae..d4ac4a1f8b81b 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5540,7 +5540,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, union kvm_mmu_page_role root_role; /* NPT requires CR0.PG=1. */ - WARN_ON_ONCE(cpu_role.base.direct); + WARN_ON_ONCE(cpu_role.base.direct || !cpu_role.base.guest_mode); root_role = cpu_role.base; root_role.level = kvm_mmu_get_tdp_level(vcpu); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index d77b094d9a4d6..04c375bf1ac2a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -646,6 +646,11 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, u32 pause_count12; u32 pause_thresh12; + nested_svm_transition_tlb_flush(vcpu); + + /* Enter Guest-Mode */ + enter_guest_mode(vcpu); + /* * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. @@ -762,11 +767,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, } } - nested_svm_transition_tlb_flush(vcpu); - - /* Enter Guest-Mode */ - enter_guest_mode(vcpu); - /* * Merge guest and host intercepts - must be called with vcpu in * guest-mode to take effect. -- GitLab From c2fee09fc167c74a64adb08656cb993ea475197e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 24 Jan 2025 17:18:33 -0800 Subject: [PATCH 0545/1585] KVM: x86: Load DR6 with guest value only before entering .vcpu_run() loop Move the conditional loading of hardware DR6 with the guest's DR6 value out of the core .vcpu_run() loop to fix a bug where KVM can load hardware with a stale vcpu->arch.dr6. When the guest accesses a DR and host userspace isn't debugging the guest, KVM disables DR interception and loads the guest's values into hardware on VM-Enter and saves them on VM-Exit. This allows the guest to access DRs at will, e.g. so that a sequence of DR accesses to configure a breakpoint only generates one VM-Exit. For DR0-DR3, the logic/behavior is identical between VMX and SVM, and also identical between KVM_DEBUGREG_BP_ENABLED (userspace debugging the guest) and KVM_DEBUGREG_WONT_EXIT (guest using DRs), and so KVM handles loading DR0-DR3 in common code, _outside_ of the core kvm_x86_ops.vcpu_run() loop. But for DR6, the guest's value doesn't need to be loaded into hardware for KVM_DEBUGREG_BP_ENABLED, and SVM provides a dedicated VMCB field whereas VMX requires software to manually load the guest value, and so loading the guest's value into DR6 is handled by {svm,vmx}_vcpu_run(), i.e. is done _inside_ the core run loop. Unfortunately, saving the guest values on VM-Exit is initiated by common x86, again outside of the core run loop. If the guest modifies DR6 (in hardware, when DR interception is disabled), and then the next VM-Exit is a fastpath VM-Exit, KVM will reload hardware DR6 with vcpu->arch.dr6 and clobber the guest's actual value. The bug shows up primarily with nested VMX because KVM handles the VMX preemption timer in the fastpath, and the window between hardware DR6 being modified (in guest context) and DR6 being read by guest software is orders of magnitude larger in a nested setup. E.g. in non-nested, the VMX preemption timer would need to fire precisely between #DB injection and the #DB handler's read of DR6, whereas with a KVM-on-KVM setup, the window where hardware DR6 is "dirty" extends all the way from L1 writing DR6 to VMRESUME (in L1). L1's view: ========== CPU 0/KVM-7289 [023] d.... 2925.640961: kvm_entry: vcpu 0 A: L1 Writes DR6 CPU 0/KVM-7289 [023] d.... 2925.640963: : Set DRs, DR6 = 0xffff0ff1 B: CPU 0/KVM-7289 [023] d.... 2925.640967: kvm_exit: vcpu 0 reason EXTERNAL_INTERRUPT intr_info 0x800000ec D: L1 reads DR6, arch.dr6 = 0 CPU 0/KVM-7289 [023] d.... 2925.640969: : Sync DRs, DR6 = 0xffff0ff0 CPU 0/KVM-7289 [023] d.... 2925.640976: kvm_entry: vcpu 0 L2 reads DR6, L1 disables DR interception CPU 0/KVM-7289 [023] d.... 2925.640980: kvm_exit: vcpu 0 reason DR_ACCESS info1 0x0000000000000216 CPU 0/KVM-7289 [023] d.... 2925.640983: kvm_entry: vcpu 0 CPU 0/KVM-7289 [023] d.... 2925.640983: : Set DRs, DR6 = 0xffff0ff0 L2 detects failure CPU 0/KVM-7289 [023] d.... 2925.640987: kvm_exit: vcpu 0 reason HLT L1 reads DR6 (confirms failure) CPU 0/KVM-7289 [023] d.... 2925.640990: : Sync DRs, DR6 = 0xffff0ff0 L0's view: ========== L2 reads DR6, arch.dr6 = 0 CPU 23/KVM-5046 [001] d.... 3410.005610: kvm_exit: vcpu 23 reason DR_ACCESS info1 0x0000000000000216 CPU 23/KVM-5046 [001] ..... 3410.005610: kvm_nested_vmexit: vcpu 23 reason DR_ACCESS info1 0x0000000000000216 L2 => L1 nested VM-Exit CPU 23/KVM-5046 [001] ..... 3410.005610: kvm_nested_vmexit_inject: reason: DR_ACCESS ext_inf1: 0x0000000000000216 CPU 23/KVM-5046 [001] d.... 3410.005610: kvm_entry: vcpu 23 CPU 23/KVM-5046 [001] d.... 3410.005611: kvm_exit: vcpu 23 reason VMREAD CPU 23/KVM-5046 [001] d.... 3410.005611: kvm_entry: vcpu 23 CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_exit: vcpu 23 reason VMREAD CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_entry: vcpu 23 L1 writes DR7, L0 disables DR interception CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_exit: vcpu 23 reason DR_ACCESS info1 0x0000000000000007 CPU 23/KVM-5046 [001] d.... 3410.005613: kvm_entry: vcpu 23 L0 writes DR6 = 0 (arch.dr6) CPU 23/KVM-5046 [001] d.... 3410.005613: : Set DRs, DR6 = 0xffff0ff0 A: B: CPU 23/KVM-5046 [001] d.... 3410.005614: kvm_exit: vcpu 23 reason PREEMPTION_TIMER CPU 23/KVM-5046 [001] d.... 3410.005614: kvm_entry: vcpu 23 C: L0 writes DR6 = 0 (arch.dr6) CPU 23/KVM-5046 [001] d.... 3410.005614: : Set DRs, DR6 = 0xffff0ff0 L1 => L2 nested VM-Enter CPU 23/KVM-5046 [001] d.... 3410.005616: kvm_exit: vcpu 23 reason VMRESUME L0 reads DR6, arch.dr6 = 0 Reported-by: John Stultz Closes: https://lkml.kernel.org/r/CANDhNCq5_F3HfFYABqFGCA1bPd_%2BxgNj-iDQhH4tDk%2Bwi8iZZg%40mail.gmail.com Fixes: 375e28ffc0cf ("KVM: X86: Set host DR6 only on VMX and for KVM_DEBUGREG_WONT_EXIT") Fixes: d67668e9dd76 ("KVM: x86, SVM: isolate vcpu->arch.dr6 from vmcb->save.dr6") Cc: stable@vger.kernel.org Cc: Jim Mattson Tested-by: John Stultz Link: https://lore.kernel.org/r/20250125011833.3644371-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/svm.c | 13 ++++++------- arch/x86/kvm/vmx/main.c | 1 + arch/x86/kvm/vmx/vmx.c | 10 ++++++---- arch/x86/kvm/vmx/x86_ops.h | 1 + arch/x86/kvm/x86.c | 3 +++ 7 files changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index c35550581da0c..823c0434bbad1 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -48,6 +48,7 @@ KVM_X86_OP(set_idt) KVM_X86_OP(get_gdt) KVM_X86_OP(set_gdt) KVM_X86_OP(sync_dirty_debug_regs) +KVM_X86_OP(set_dr6) KVM_X86_OP(set_dr7) KVM_X86_OP(cache_reg) KVM_X86_OP(get_rflags) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b15cde0a9b5ca..0b7af5902ff75 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1696,6 +1696,7 @@ struct kvm_x86_ops { void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); + void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7640a84e554a6..a713c803a3a37 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1991,11 +1991,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) svm->asid = sd->next_asid++; } -static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) { - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = to_svm(vcpu)->vmcb; - if (svm->vcpu.arch.guest_state_protected) + if (vcpu->arch.guest_state_protected) return; if (unlikely(value != vmcb->save.dr6)) { @@ -4247,10 +4247,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, * Run with all-zero DR6 unless needed, so that we can get the exact cause * of a #DB. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - svm_set_dr6(svm, vcpu->arch.dr6); - else - svm_set_dr6(svm, DR6_ACTIVE_LOW); + if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))) + svm_set_dr6(vcpu, DR6_ACTIVE_LOW); clgi(); kvm_load_guest_xsave_state(vcpu); @@ -5043,6 +5041,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, + .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 2427f918e7638..43ee9ed11291b 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -61,6 +61,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, + .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, .cache_reg = vmx_cache_reg, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f72835e85b6d5..6c56d5235f0f3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5648,6 +5648,12 @@ void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) set_debugreg(DR6_RESERVED, 6); } +void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) +{ + lockdep_assert_irqs_disabled(); + set_debugreg(vcpu->arch.dr6, 6); +} + void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); @@ -7417,10 +7423,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) vmx->loaded_vmcs->host_state.cr4 = cr4; } - /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - set_debugreg(vcpu->arch.dr6, 6); - /* When single-stepping over STI and MOV SS, we must clear the * corresponding interruptibility bits in the guest state. Otherwise * vmentry fails as it then expects bit 14 (BS) in pending debug diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index ce3295a67c048..430773a5ef8e3 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -73,6 +73,7 @@ void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val); void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val); void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu); void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8e77e61d4fbd4..02159c967d29e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10961,6 +10961,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); + /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ + if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) + kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6); } else if (unlikely(hw_breakpoint_active())) { set_debugreg(0, 7); } -- GitLab From 34cae91215c6f65bed2a124fb9283da6ec0b8dd9 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 12 Feb 2025 13:45:45 -0700 Subject: [PATCH 0546/1585] io_uring/uring_cmd: don't assume io_uring_cmd_data layout eaf72f7b414f ("io_uring/uring_cmd: cleanup struct io_uring_cmd_data layout") removed most of the places assuming struct io_uring_cmd_data has sqes as its first field. However, the EAGAIN case in io_uring_cmd() still compares ioucmd->sqe to the struct io_uring_cmd_data pointer using a void * cast. Since fa3595523d72 ("io_uring: get rid of alloc cache init_once handling"), sqes is no longer io_uring_cmd_data's first field. As a result, the pointers will always compare unequal and memcpy() may be called with the same source and destination. Replace the incorrect void * cast with the address of the sqes field. Signed-off-by: Caleb Sander Mateos Fixes: eaf72f7b414f ("io_uring/uring_cmd: cleanup struct io_uring_cmd_data layout") Link: https://lore.kernel.org/r/20250212204546.3751645-2-csander@purestorage.com Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 1f6a82128b475..cfb22e1de0e75 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -252,7 +252,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) if (ret == -EAGAIN) { struct io_uring_cmd_data *cache = req->async_data; - if (ioucmd->sqe != (void *) cache) + if (ioucmd->sqe != cache->sqes) memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); return -EAGAIN; } else if (ret == -EIOCBQUEUED) { -- GitLab From e663da62ba8672aaa66843f1af8b20e3bb1a0515 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 12 Feb 2025 13:45:46 -0700 Subject: [PATCH 0547/1585] io_uring/uring_cmd: switch sqe to async_data on EAGAIN 5eff57fa9f3a ("io_uring/uring_cmd: defer SQE copying until it's needed") moved the unconditional memcpy() of the uring_cmd SQE to async_data to 2 cases when the request goes async: - If REQ_F_FORCE_ASYNC is set to force the initial issue to go async - If ->uring_cmd() returns -EAGAIN in the initial non-blocking issue Unlike the REQ_F_FORCE_ASYNC case, in the EAGAIN case, io_uring_cmd() copies the SQE to async_data but neglects to update the io_uring_cmd's sqe field to point to async_data. As a result, sqe still points to the slot in the userspace-mapped SQ. At the end of io_submit_sqes(), the kernel advances the SQ head index, allowing userspace to reuse the slot for a new SQE. If userspace reuses the slot before the io_uring worker reissues the original SQE, the io_uring_cmd's SQE will be corrupted. Introduce a helper io_uring_cmd_cache_sqes() to copy the original SQE to the io_uring_cmd's async_data and point sqe there. Use it for both the REQ_F_FORCE_ASYNC and EAGAIN cases. This ensures the uring_cmd doesn't read from the SQ slot after it has been returned to userspace. Signed-off-by: Caleb Sander Mateos Fixes: 5eff57fa9f3a ("io_uring/uring_cmd: defer SQE copying until it's needed") Link: https://lore.kernel.org/r/20250212204546.3751645-3-csander@purestorage.com Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index cfb22e1de0e75..bcfca18395c4c 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -168,6 +168,15 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2, } EXPORT_SYMBOL_GPL(io_uring_cmd_done); +static void io_uring_cmd_cache_sqes(struct io_kiocb *req) +{ + struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); + struct io_uring_cmd_data *cache = req->async_data; + + memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = cache->sqes; +} + static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -179,14 +188,10 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, return -ENOMEM; cache->op_data = NULL; - if (!(req->flags & REQ_F_FORCE_ASYNC)) { - /* defer memcpy until we need it */ - ioucmd->sqe = sqe; - return 0; - } - - memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = cache->sqes; + ioucmd->sqe = sqe; + /* defer memcpy until we need it */ + if (unlikely(req->flags & REQ_F_FORCE_ASYNC)) + io_uring_cmd_cache_sqes(req); return 0; } @@ -253,7 +258,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) struct io_uring_cmd_data *cache = req->async_data; if (ioucmd->sqe != cache->sqes) - memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); + io_uring_cmd_cache_sqes(req); return -EAGAIN; } else if (ret == -EIOCBQUEUED) { return -EIOCBQUEUED; -- GitLab From 472ff48e2c09e49f2f90eeb6922f747306559506 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 12 Feb 2025 11:53:32 -0700 Subject: [PATCH 0548/1585] PCI: Fix BUILD_BUG_ON usage for old gcc As reported in the below link, it seems older versions of gcc cannot determine that the howmany variable is known for all callers. Include a test so that newer compilers can enforce this sanity check and older compilers can still work. Add __always_inline attribute to give the compiler an even better chance to know the inputs. Link: https://lore.kernel.org/r/20250212185337.293023-1-alex.williamson@redhat.com Fixes: 4453f360862e ("PCI: Batch BAR sizing operations") Reported-by: Oleg Nesterov Link: https://lore.kernel.org/all/20250209154512.GA18688@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Tested-by: Oleg Nesterov Tested-by: Mitchell Augustin --- drivers/pci/probe.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b6536ed599c37..246744d8d268a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -339,13 +339,14 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, return (res->flags & IORESOURCE_MEM_64) ? 1 : 0; } -static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) +static __always_inline void pci_read_bases(struct pci_dev *dev, + unsigned int howmany, int rom) { u32 rombar, stdbars[PCI_STD_NUM_BARS]; unsigned int pos, reg; u16 orig_cmd; - BUILD_BUG_ON(howmany > PCI_STD_NUM_BARS); + BUILD_BUG_ON(statically_true(howmany > PCI_STD_NUM_BARS)); if (dev->non_compliant_bars) return; -- GitLab From cee6f9a9c87b6ecfb51845950c28216b231c3610 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 12 Jan 2025 15:39:51 +0100 Subject: [PATCH 0549/1585] objtool/rust: add one more `noreturn` Rust function Starting with Rust 1.85.0 (currently in beta, to be released 2025-02-20), under some kernel configurations with `CONFIG_RUST_DEBUG_ASSERTIONS=y`, one may trigger a new `objtool` warning: rust/kernel.o: warning: objtool: _R...securityNtB2_11SecurityCtx8as_bytes() falls through to next function _R...core3ops4drop4Drop4drop() due to a call to the `noreturn` symbol: core::panicking::assert_failed:: Thus add it to the list so that `objtool` knows it is actually `noreturn`. Do so matching with `strstr` since it is a generic. See commit 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") for more details. Cc: stable@vger.kernel.org # Needed in 6.12.y and 6.13.y only (Rust is pinned in older LTSs). Fixes: 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20250112143951.751139-1-ojeda@kernel.org [ Updated Cc: stable@ to include 6.13.y. - Miguel ] Signed-off-by: Miguel Ojeda --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 753dbc4f81985..a027d1c0bb2b0 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -227,6 +227,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || + strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || (strstr(func->name, "_4core5slice5index24slice_") && str_ends_with(func->name, "_fail")); -- GitLab From 2e4f982cf392af2f1282b5537a72144e064799e3 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 7 Feb 2025 00:20:22 +0100 Subject: [PATCH 0550/1585] rust: rbtree: fix overindented list item Starting with Rust 1.86.0 (to be released 2025-04-03), Clippy will have a new lint, `doc_overindented_list_items` [1], which catches cases of overindented list items. The lint has been added by Yutaro Ohno, based on feedback from the kernel [2] on a patch that fixed a similar case -- commit 0c5928deada1 ("rust: block: fix formatting in GenDisk doc"). Clippy reports a few cases in the kernel, apart from the one already fixed in the commit above. One is this one: error: doc list item overindented --> rust/kernel/rbtree.rs:1152:5 | 1152 | /// null, it is a pointer to the root of the [`RBTree`]. | ^^^^ help: try using ` ` (2 spaces) | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#doc_overindented_list_items = note: `-D clippy::doc-overindented-list-items` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(clippy::doc_overindented_list_items)]` Thus clean it up. Cc: Yutaro Ohno Cc: stable@vger.kernel.org # Needed in 6.12.y and 6.13.y only (Rust is pinned in older LTSs). Fixes: a335e9591404 ("rust: rbtree: add `RBTree::entry`") Link: https://github.com/rust-lang/rust-clippy/pull/13711 [1] Link: https://github.com/rust-lang/rust-clippy/issues/13601 [2] Reviewed-by: Alice Ryhl Reviewed-by: Yutaro Ohno Link: https://lore.kernel.org/r/20250206232022.599998-1-ojeda@kernel.org [ There are a few other cases, so updated message. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/rbtree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index ee2731dad72d9..0d1e75810664e 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -1149,7 +1149,7 @@ pub struct VacantEntry<'a, K, V> { /// # Invariants /// - `parent` may be null if the new node becomes the root. /// - `child_field_of_parent` is a valid pointer to the left-child or right-child of `parent`. If `parent` is -/// null, it is a pointer to the root of the [`RBTree`]. +/// null, it is a pointer to the root of the [`RBTree`]. struct RawVacantEntry<'a, K, V> { rbtree: *mut RBTree, /// The node that will become the parent of the new node if we insert one. -- GitLab From 0edf1283a9d1419a2095b4fcdd95c11ac00a191c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 12 Feb 2025 14:05:11 -0700 Subject: [PATCH 0551/1585] io_uring/uring_cmd: remove dead req_has_async_data() check Any uring_cmd always has async data allocated now, there's no reason to check and clear a cached copy of the SQE. Fixes: d10f19dff56e ("io_uring/uring_cmd: switch to always allocating async data") Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index bcfca18395c4c..8af7780407b7e 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -54,9 +54,6 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx, continue; if (cmd->flags & IORING_URING_CMD_CANCELABLE) { - /* ->sqe isn't available if no async data */ - if (!req_has_async_data(req)) - cmd->sqe = NULL; file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL | IO_URING_F_COMPLETE_DEFER); ret = true; -- GitLab From 5298b7cffa8461009a4410f4e23f1c50ade39182 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 4 Dec 2024 08:48:27 +0100 Subject: [PATCH 0552/1585] um: add back support for FXSAVE registers It was reported that qemu may not enable the XSTATE CPU extension, which is a requirement after commit 3f17fed21491 ("um: switch to regset API and depend on XSTATE"). Add a fallback to use FXSAVE (FP registers on x86_64 and XFP on i386) which is just a shorter version of the same data. The only difference is that the XSTATE magic should not be set in the signal frame. Note that this still drops support for the older i386 FP register layout as supporting this would require more backward compatibility to build a correct signal frame. Fixes: 3f17fed21491 ("um: switch to regset API and depend on XSTATE") Reported-by: SeongJae Park Closes: https://lore.kernel.org/r/20241203070218.240797-1-sj@kernel.org Tested-by: SeongJae Park Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20241204074827.1582917-1-benjamin@sipsolutions.net Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/x86/um/os-Linux/registers.c | 21 ++++++++++++++++++--- arch/x86/um/signal.c | 5 +++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index 76eaeb93928cc..eb1cdadc8a61d 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -18,6 +18,7 @@ #include #include +static unsigned long ptrace_regset; unsigned long host_fp_size; int get_fp_registers(int pid, unsigned long *regs) @@ -27,7 +28,7 @@ int get_fp_registers(int pid, unsigned long *regs) .iov_len = host_fp_size, }; - if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + if (ptrace(PTRACE_GETREGSET, pid, ptrace_regset, &iov) < 0) return -errno; return 0; } @@ -39,7 +40,7 @@ int put_fp_registers(int pid, unsigned long *regs) .iov_len = host_fp_size, }; - if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + if (ptrace(PTRACE_SETREGSET, pid, ptrace_regset, &iov) < 0) return -errno; return 0; } @@ -58,9 +59,23 @@ int arch_init_registers(int pid) return -ENOMEM; /* GDB has x86_xsave_length, which uses x86_cpuid_count */ - ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov); + ptrace_regset = NT_X86_XSTATE; + ret = ptrace(PTRACE_GETREGSET, pid, ptrace_regset, &iov); if (ret) ret = -errno; + + if (ret == -ENODEV) { +#ifdef CONFIG_X86_32 + ptrace_regset = NT_PRXFPREG; +#else + ptrace_regset = NT_PRFPREG; +#endif + iov.iov_len = 2 * 1024 * 1024; + ret = ptrace(PTRACE_GETREGSET, pid, ptrace_regset, &iov); + if (ret) + ret = -errno; + } + munmap(iov.iov_base, 2 * 1024 * 1024); host_fp_size = iov.iov_len; diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 75087e85b6fdb..ea5b3bcc42456 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -187,7 +187,12 @@ static int copy_sc_to_user(struct sigcontext __user *to, * Put magic/size values for userspace. We do not bother to verify them * later on, however, userspace needs them should it try to read the * XSTATE data. And ptrace does not fill in these parts. + * + * Skip this if we do not have an XSTATE frame. */ + if (host_fp_size <= sizeof(to_fp64->fpstate)) + return 0; + BUILD_BUG_ON(sizeof(int) != FP_XSTATE_MAGIC2_SIZE); #ifdef CONFIG_X86_32 __put_user(offsetof(struct _fpstate_32, _fxsr_env) + -- GitLab From 8891b176d350ec5ea9a39c6ef4c99bd63d68e64c Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 17 Dec 2024 21:27:44 +0100 Subject: [PATCH 0553/1585] um: avoid copying FP state from init_task The init_task instance of struct task_struct is statically allocated and does not contain the dynamic area for the userspace FP registers. As such, limit the copy to the valid area of init_task and fill the rest with zero. Note that the FP state is only needed for userspace, and as such it is entirely reasonable for init_task to not contain it. Reported-by: Brian Norris Closes: https://lore.kernel.org/Z1ySXmjZm-xOqk90@google.com Fixes: 3f17fed21491 ("um: switch to regset API and depend on XSTATE") Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20241217202745.1402932-3-benjamin@sipsolutions.net Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/kernel/process.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index e5a2d4d897e0c..0cd6fad3d908d 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -191,7 +191,15 @@ void initial_thread_cb(void (*proc)(void *), void *arg) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - memcpy(dst, src, arch_task_struct_size); + /* init_task is not dynamically sized (missing FPU state) */ + if (unlikely(src == &init_task)) { + memcpy(dst, src, sizeof(init_task)); + memset((void *)dst + sizeof(init_task), 0, + arch_task_struct_size - sizeof(init_task)); + } else { + memcpy(dst, src, arch_task_struct_size); + } + return 0; } -- GitLab From 3c2fc7434d90338cf4c1b37bc95994208d23bfc6 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 7 Jan 2025 14:35:09 +0100 Subject: [PATCH 0554/1585] um: properly align signal stack on x86_64 The stack needs to be properly aligned so 16 byte memory accesses on the stack are correct. This was broken when introducing the dynamic math register sizing as the rounding was not moved appropriately. Fixes: 3f17fed21491 ("um: switch to regset API and depend on XSTATE") Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20250107133509.265576-1-benjamin@sipsolutions.net Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/x86/um/signal.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index ea5b3bcc42456..2934e170b0fe0 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -372,11 +372,13 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, int err = 0, sig = ksig->sig; unsigned long fp_to; - frame = (struct rt_sigframe __user *) - round_down(stack_top - sizeof(struct rt_sigframe), 16); + frame = (void __user *)stack_top - sizeof(struct rt_sigframe); /* Add required space for math frame */ - frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size); + frame = (void __user *)((unsigned long)frame - math_size); + + /* ABI requires 16 byte boundary alignment */ + frame = (void __user *)round_down((unsigned long)frame, 16); /* Subtract 128 for a red zone and 8 for proper alignment */ frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8); -- GitLab From f82a9e7b9fa922bb9cccb00aae684a27b79e6df7 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 13 Jan 2025 10:41:07 +0100 Subject: [PATCH 0555/1585] um: fix execve stub execution on old host OSs The stub execution uses the somewhat new close_range and execveat syscalls. Of these two, the execveat call is essential, but the close_range call is more about stub process hygiene rather than safety (and its result is ignored). Replace both calls with a raw syscall as older machines might not have a recent enough kernel for close_range (with CLOSE_RANGE_CLOEXEC) or a libc that does not yet expose both of the syscalls. Fixes: 32e8eaf263d9 ("um: use execveat to create userspace MMs") Reported-by: Glenn Washburn Closes: https://lore.kernel.org/20250108022404.05e0de1e@crass-HP-ZBook-15-G2 Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20250113094107.674738-1-benjamin@sipsolutions.net Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/os-Linux/skas/process.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index f683cfc9e51a5..e2f8f156402f5 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -181,6 +181,10 @@ extern char __syscall_stub_start[]; static int stub_exe_fd; +#ifndef CLOSE_RANGE_CLOEXEC +#define CLOSE_RANGE_CLOEXEC (1U << 2) +#endif + static int userspace_tramp(void *stack) { char *const argv[] = { "uml-userspace", NULL }; @@ -202,8 +206,12 @@ static int userspace_tramp(void *stack) init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); init_data.stub_data_offset = MMAP_OFFSET(offset); - /* Set CLOEXEC on all FDs and then unset on all memory related FDs */ - close_range(0, ~0U, CLOSE_RANGE_CLOEXEC); + /* + * Avoid leaking unneeded FDs to the stub by setting CLOEXEC on all FDs + * and then unsetting it on all memory related FDs. + * This is not strictly necessary from a safety perspective. + */ + syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC); fcntl(init_data.stub_data_fd, F_SETFD, 0); for (iomem = iomem_regions; iomem; iomem = iomem->next) @@ -224,7 +232,9 @@ static int userspace_tramp(void *stack) if (ret != sizeof(init_data)) exit(4); - execveat(stub_exe_fd, "", argv, NULL, AT_EMPTY_PATH); + /* Raw execveat for compatibility with older libc versions */ + syscall(__NR_execveat, stub_exe_fd, (unsigned long)"", + (unsigned long)argv, NULL, AT_EMPTY_PATH); exit(5); } -- GitLab From 5b166b782d327f4b66190cc43afd3be36f2b3b7a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 Jan 2025 13:54:04 +0100 Subject: [PATCH 0556/1585] um: virt-pci: don't use kmalloc() This code can be called deep in the IRQ handling, for example, and then cannot normally use kmalloc(). Have its own pre-allocated memory and use from there instead so this doesn't occur. Only in the (very rare) case of memcpy_toio() we'd still need to allocate memory. Link: https://patch.msgid.link/20250110125550.32479-6-johannes@sipsolutions.net Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/virt-pci.c | 198 +++++++++++++++++++------------------ 1 file changed, 102 insertions(+), 96 deletions(-) diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 744e7f31e8ef1..dd5580f975cc0 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -25,8 +25,10 @@ #define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) #define NUM_IRQ_MSGS 10 -#define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1)) -#define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1) +struct um_pci_message_buffer { + struct virtio_pcidev_msg hdr; + u8 data[8]; +}; struct um_pci_device { struct virtio_device *vdev; @@ -36,6 +38,11 @@ struct um_pci_device { struct virtqueue *cmd_vq, *irq_vq; +#define UM_PCI_WRITE_BUFS 20 + struct um_pci_message_buffer bufs[UM_PCI_WRITE_BUFS + 1]; + void *extra_ptrs[UM_PCI_WRITE_BUFS + 1]; + DECLARE_BITMAP(used_bufs, UM_PCI_WRITE_BUFS); + #define UM_PCI_STAT_WAITING 0 unsigned long status; @@ -61,12 +68,40 @@ static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; static unsigned int um_pci_max_delay_us = 40000; module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644); -struct um_pci_message_buffer { - struct virtio_pcidev_msg hdr; - u8 data[8]; -}; +static int um_pci_get_buf(struct um_pci_device *dev, bool *posted) +{ + int i; + + for (i = 0; i < UM_PCI_WRITE_BUFS; i++) { + if (!test_and_set_bit(i, dev->used_bufs)) + return i; + } -static struct um_pci_message_buffer __percpu *um_pci_msg_bufs; + *posted = false; + return UM_PCI_WRITE_BUFS; +} + +static void um_pci_free_buf(struct um_pci_device *dev, void *buf) +{ + int i; + + if (buf == &dev->bufs[UM_PCI_WRITE_BUFS]) { + kfree(dev->extra_ptrs[UM_PCI_WRITE_BUFS]); + dev->extra_ptrs[UM_PCI_WRITE_BUFS] = NULL; + return; + } + + for (i = 0; i < UM_PCI_WRITE_BUFS; i++) { + if (buf == &dev->bufs[i]) { + kfree(dev->extra_ptrs[i]); + dev->extra_ptrs[i] = NULL; + WARN_ON(!test_and_clear_bit(i, dev->used_bufs)); + return; + } + } + + WARN_ON(1); +} static int um_pci_send_cmd(struct um_pci_device *dev, struct virtio_pcidev_msg *cmd, @@ -82,7 +117,9 @@ static int um_pci_send_cmd(struct um_pci_device *dev, }; struct um_pci_message_buffer *buf; int delay_count = 0; + bool bounce_out; int ret, len; + int buf_idx; bool posted; if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) @@ -101,26 +138,28 @@ static int um_pci_send_cmd(struct um_pci_device *dev, break; } - buf = get_cpu_var(um_pci_msg_bufs); - if (buf) - memcpy(buf, cmd, cmd_size); + bounce_out = !posted && cmd_size <= sizeof(*cmd) && + out && out_size <= sizeof(buf->data); - if (posted) { - u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); - - if (ncmd) { - memcpy(ncmd, cmd, cmd_size); - if (extra) - memcpy(ncmd + cmd_size, extra, extra_size); - cmd = (void *)ncmd; - cmd_size += extra_size; - extra = NULL; - extra_size = 0; - } else { - /* try without allocating memory */ - posted = false; - cmd = (void *)buf; + buf_idx = um_pci_get_buf(dev, &posted); + buf = &dev->bufs[buf_idx]; + memcpy(buf, cmd, cmd_size); + + if (posted && extra && extra_size > sizeof(buf) - cmd_size) { + dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size, + GFP_ATOMIC); + + if (!dev->extra_ptrs[buf_idx]) { + um_pci_free_buf(dev, buf); + return -ENOMEM; } + extra = dev->extra_ptrs[buf_idx]; + } else if (extra && extra_size <= sizeof(buf) - cmd_size) { + memcpy((u8 *)buf + cmd_size, extra, extra_size); + cmd_size += extra_size; + extra_size = 0; + extra = NULL; + cmd = (void *)buf; } else { cmd = (void *)buf; } @@ -128,39 +167,40 @@ static int um_pci_send_cmd(struct um_pci_device *dev, sg_init_one(&out_sg, cmd, cmd_size); if (extra) sg_init_one(&extra_sg, extra, extra_size); - if (out) + /* allow stack for small buffers */ + if (bounce_out) + sg_init_one(&in_sg, buf->data, out_size); + else if (out) sg_init_one(&in_sg, out, out_size); /* add to internal virtio queue */ ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, extra ? 2 : 1, out ? 1 : 0, - posted ? cmd : HANDLE_NO_FREE(cmd), - GFP_ATOMIC); + cmd, GFP_ATOMIC); if (ret) { - if (posted) - kfree(cmd); - goto out; + um_pci_free_buf(dev, buf); + return ret; } if (posted) { virtqueue_kick(dev->cmd_vq); - ret = 0; - goto out; + return 0; } /* kick and poll for getting a response on the queue */ set_bit(UM_PCI_STAT_WAITING, &dev->status); virtqueue_kick(dev->cmd_vq); + ret = 0; while (1) { void *completed = virtqueue_get_buf(dev->cmd_vq, &len); - if (completed == HANDLE_NO_FREE(cmd)) + if (completed == buf) break; - if (completed && !HANDLE_IS_NO_FREE(completed)) - kfree(completed); + if (completed) + um_pci_free_buf(dev, completed); if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || ++delay_count > um_pci_max_delay_us, @@ -172,8 +212,11 @@ static int um_pci_send_cmd(struct um_pci_device *dev, } clear_bit(UM_PCI_STAT_WAITING, &dev->status); -out: - put_cpu_var(um_pci_msg_bufs); + if (bounce_out) + memcpy(out, buf->data, out_size); + + um_pci_free_buf(dev, buf); + return ret; } @@ -187,20 +230,13 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, .size = size, .addr = offset, }; - /* buf->data is maximum size - we may only use parts of it */ - struct um_pci_message_buffer *buf; - u8 *data; - unsigned long ret = ULONG_MAX; - size_t bytes = sizeof(buf->data); + /* max 8, we might not use it all */ + u8 data[8]; if (!dev) return ULONG_MAX; - buf = get_cpu_var(um_pci_msg_bufs); - data = buf->data; - - if (buf) - memset(data, 0xff, bytes); + memset(data, 0xff, sizeof(data)); switch (size) { case 1: @@ -212,34 +248,26 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, break; default: WARN(1, "invalid config space read size %d\n", size); - goto out; + return ULONG_MAX; } - if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes)) - goto out; + if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size)) + return ULONG_MAX; switch (size) { case 1: - ret = data[0]; - break; + return data[0]; case 2: - ret = le16_to_cpup((void *)data); - break; + return le16_to_cpup((void *)data); case 4: - ret = le32_to_cpup((void *)data); - break; + return le32_to_cpup((void *)data); #ifdef CONFIG_64BIT case 8: - ret = le64_to_cpup((void *)data); - break; + return le64_to_cpup((void *)data); #endif default: - break; + return ULONG_MAX; } - -out: - put_cpu_var(um_pci_msg_bufs); - return ret; } static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, @@ -312,13 +340,8 @@ static void um_pci_bar_copy_from(void *priv, void *buffer, static unsigned long um_pci_bar_read(void *priv, unsigned int offset, int size) { - /* buf->data is maximum size - we may only use parts of it */ - struct um_pci_message_buffer *buf; - u8 *data; - unsigned long ret = ULONG_MAX; - - buf = get_cpu_var(um_pci_msg_bufs); - data = buf->data; + /* 8 is maximum size - we may only use parts of it */ + u8 data[8]; switch (size) { case 1: @@ -330,33 +353,25 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset, break; default: WARN(1, "invalid config space read size %d\n", size); - goto out; + return ULONG_MAX; } um_pci_bar_copy_from(priv, data, offset, size); switch (size) { case 1: - ret = data[0]; - break; + return data[0]; case 2: - ret = le16_to_cpup((void *)data); - break; + return le16_to_cpup((void *)data); case 4: - ret = le32_to_cpup((void *)data); - break; + return le32_to_cpup((void *)data); #ifdef CONFIG_64BIT case 8: - ret = le64_to_cpup((void *)data); - break; + return le64_to_cpup((void *)data); #endif default: - break; + return ULONG_MAX; } - -out: - put_cpu_var(um_pci_msg_bufs); - return ret; } static void um_pci_bar_copy_to(void *priv, unsigned int offset, @@ -523,11 +538,8 @@ static void um_pci_cmd_vq_cb(struct virtqueue *vq) if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) return; - while ((cmd = virtqueue_get_buf(vq, &len))) { - if (WARN_ON(HANDLE_IS_NO_FREE(cmd))) - continue; - kfree(cmd); - } + while ((cmd = virtqueue_get_buf(vq, &len))) + um_pci_free_buf(dev, cmd); } static void um_pci_irq_vq_cb(struct virtqueue *vq) @@ -1006,10 +1018,6 @@ static int __init um_pci_init(void) "No virtio device ID configured for PCI - no PCI support\n")) return 0; - um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer); - if (!um_pci_msg_bufs) - return -ENOMEM; - bridge = pci_alloc_host_bridge(0); if (!bridge) { err = -ENOMEM; @@ -1070,7 +1078,6 @@ static int __init um_pci_init(void) pci_free_resource_list(&bridge->windows); pci_free_host_bridge(bridge); } - free_percpu(um_pci_msg_bufs); return err; } module_init(um_pci_init); @@ -1082,6 +1089,5 @@ static void __exit um_pci_exit(void) irq_domain_remove(um_pci_inner_domain); pci_free_resource_list(&bridge->windows); pci_free_host_bridge(bridge); - free_percpu(um_pci_msg_bufs); } module_exit(um_pci_exit); -- GitLab From daa1a05ba431540097ec925d4e01d53ef29a98f1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 Jan 2025 13:54:05 +0100 Subject: [PATCH 0557/1585] um: virtio_uml: use raw spinlock This is needed because at least in time-travel the code can be called directly from the deep architecture and IRQ handling code. Link: https://patch.msgid.link/20250110125550.32479-7-johannes@sipsolutions.net Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 65df43fa9be58..ad8d78fb1d9aa 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -52,7 +52,7 @@ struct virtio_uml_device { struct platform_device *pdev; struct virtio_uml_platform_data *pdata; - spinlock_t sock_lock; + raw_spinlock_t sock_lock; int sock, req_fd, irq; u64 features; u64 protocol_features; @@ -246,7 +246,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, if (request_ack) msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; - spin_lock_irqsave(&vu_dev->sock_lock, flags); + raw_spin_lock_irqsave(&vu_dev->sock_lock, flags); rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); if (rc < 0) goto out; @@ -266,7 +266,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, } out: - spin_unlock_irqrestore(&vu_dev->sock_lock, flags); + raw_spin_unlock_irqrestore(&vu_dev->sock_lock, flags); return rc; } @@ -1239,7 +1239,7 @@ static int virtio_uml_probe(struct platform_device *pdev) goto error_free; vu_dev->sock = rc; - spin_lock_init(&vu_dev->sock_lock); + raw_spin_lock_init(&vu_dev->sock_lock); rc = vhost_user_init(vu_dev); if (rc) -- GitLab From 96178631c3f53398044ed437010f7632ad764bf8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 Jan 2025 13:54:06 +0100 Subject: [PATCH 0558/1585] um: convert irq_lock to raw spinlock Since this is deep in the architecture, and the code is called nested into other deep management code, this really needs to be a raw spinlock. Convert it. Link: https://patch.msgid.link/20250110125550.32479-8-johannes@sipsolutions.net Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/kernel/irq.c | 79 ++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 338450741aac5..a4991746f5eac 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -52,7 +52,7 @@ struct irq_entry { bool sigio_workaround; }; -static DEFINE_SPINLOCK(irq_lock); +static DEFINE_RAW_SPINLOCK(irq_lock); static LIST_HEAD(active_fds); static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); static bool irqs_suspended; @@ -257,7 +257,7 @@ static struct irq_entry *get_irq_entry_by_fd(int fd) return NULL; } -static void free_irq_entry(struct irq_entry *to_free, bool remove) +static void remove_irq_entry(struct irq_entry *to_free, bool remove) { if (!to_free) return; @@ -265,7 +265,6 @@ static void free_irq_entry(struct irq_entry *to_free, bool remove) if (remove) os_del_epoll_fd(to_free->fd); list_del(&to_free->list); - kfree(to_free); } static bool update_irq_entry(struct irq_entry *entry) @@ -286,17 +285,19 @@ static bool update_irq_entry(struct irq_entry *entry) return false; } -static void update_or_free_irq_entry(struct irq_entry *entry) +static struct irq_entry *update_or_remove_irq_entry(struct irq_entry *entry) { - if (!update_irq_entry(entry)) - free_irq_entry(entry, false); + if (update_irq_entry(entry)) + return NULL; + remove_irq_entry(entry, false); + return entry; } static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, void (*timetravel_handler)(int, int, void *, struct time_travel_event *)) { - struct irq_entry *irq_entry; + struct irq_entry *irq_entry, *to_free = NULL; int err, events = os_event_mask(type); unsigned long flags; @@ -304,9 +305,10 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, if (err < 0) goto out; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); irq_entry = get_irq_entry_by_fd(fd); if (irq_entry) { +already: /* cannot register the same FD twice with the same type */ if (WARN_ON(irq_entry->reg[type].events)) { err = -EALREADY; @@ -316,11 +318,22 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, /* temporarily disable to avoid IRQ-side locking */ os_del_epoll_fd(fd); } else { - irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); - if (!irq_entry) { - err = -ENOMEM; - goto out_unlock; + struct irq_entry *new; + + /* don't restore interrupts */ + raw_spin_unlock(&irq_lock); + new = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); + if (!new) { + local_irq_restore(flags); + return -ENOMEM; } + raw_spin_lock(&irq_lock); + irq_entry = get_irq_entry_by_fd(fd); + if (irq_entry) { + to_free = new; + goto already; + } + irq_entry = new; irq_entry->fd = fd; list_add_tail(&irq_entry->list, &active_fds); maybe_sigio_broken(fd); @@ -339,12 +352,11 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, #endif WARN_ON(!update_irq_entry(irq_entry)); - spin_unlock_irqrestore(&irq_lock, flags); - - return 0; + err = 0; out_unlock: - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); out: + kfree(to_free); return err; } @@ -358,19 +370,20 @@ void free_irq_by_fd(int fd) struct irq_entry *to_free; unsigned long flags; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); to_free = get_irq_entry_by_fd(fd); - free_irq_entry(to_free, true); - spin_unlock_irqrestore(&irq_lock, flags); + remove_irq_entry(to_free, true); + raw_spin_unlock_irqrestore(&irq_lock, flags); + kfree(to_free); } EXPORT_SYMBOL(free_irq_by_fd); static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { - struct irq_entry *entry; + struct irq_entry *entry, *to_free = NULL; unsigned long flags; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { enum um_irq_type i; @@ -386,12 +399,13 @@ static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) os_del_epoll_fd(entry->fd); reg->events = 0; - update_or_free_irq_entry(entry); + to_free = update_or_remove_irq_entry(entry); goto out; } } out: - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); + kfree(to_free); } void deactivate_fd(int fd, int irqnum) @@ -402,7 +416,7 @@ void deactivate_fd(int fd, int irqnum) os_del_epoll_fd(fd); - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); entry = get_irq_entry_by_fd(fd); if (!entry) goto out; @@ -414,9 +428,10 @@ void deactivate_fd(int fd, int irqnum) entry->reg[i].events = 0; } - update_or_free_irq_entry(entry); + entry = update_or_remove_irq_entry(entry); out: - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); + kfree(entry); ignore_sigio_fd(fd); } @@ -546,7 +561,7 @@ void um_irqs_suspend(void) irqs_suspended = true; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { enum um_irq_type t; bool clear = true; @@ -579,7 +594,7 @@ void um_irqs_suspend(void) !__ignore_sigio_fd(entry->fd); } } - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); } void um_irqs_resume(void) @@ -588,7 +603,7 @@ void um_irqs_resume(void) unsigned long flags; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { if (entry->suspended) { int err = os_set_fd_async(entry->fd); @@ -602,7 +617,7 @@ void um_irqs_resume(void) } } } - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); irqs_suspended = false; send_sigio_to_self(); @@ -613,7 +628,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on) struct irq_entry *entry; unsigned long flags; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { enum um_irq_type t; @@ -628,7 +643,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on) } } unlock: - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); return 0; } #else -- GitLab From 2b4fc4cd43f28e9e39179c8702e6ee821258584d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 12 Feb 2025 15:52:54 -0700 Subject: [PATCH 0559/1585] io_uring/waitid: setup async data in the prep handler This is the idiomatic way that opcodes should setup their async data, so that it's always valid inside ->issue() without issue needing to do that. Fixes: f31ecf671ddc4 ("io_uring: add IORING_OP_WAITID support") Signed-off-by: Jens Axboe --- io_uring/waitid.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index c4096d93a2870..15a7daf3ff4f3 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -285,10 +285,16 @@ static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode, int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + struct io_waitid_async *iwa; if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags) return -EINVAL; + iwa = io_uring_alloc_async_data(NULL, req); + if (!unlikely(iwa)) + return -ENOMEM; + iwa->req = req; + iw->which = READ_ONCE(sqe->len); iw->upid = READ_ONCE(sqe->fd); iw->options = READ_ONCE(sqe->file_index); @@ -299,16 +305,10 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) int io_waitid(struct io_kiocb *req, unsigned int issue_flags) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + struct io_waitid_async *iwa = req->async_data; struct io_ring_ctx *ctx = req->ctx; - struct io_waitid_async *iwa; int ret; - iwa = io_uring_alloc_async_data(NULL, req); - if (!iwa) - return -ENOMEM; - - iwa->req = req; - ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info, iw->options, NULL); if (ret) -- GitLab From 960a62877466067adc89bd37fe36d3b6edddb965 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Feb 2025 18:18:29 -0500 Subject: [PATCH 0560/1585] drm/amdgpu/pm: fix UVD handing in amdgpu_dpm_set_powergating_by_smu() UVD and VCN were split into separate dpm helpers in commit ff69bba05f08 ("drm/amd/pm: add inst to dpm_set_powergating_by_smu") as such, there is no need to include UVD in the is_vcn variable since UVD and VCN are handled by separate dpm helpers now. Fix the check. Fixes: ff69bba05f08 ("drm/amd/pm: add inst to dpm_set_powergating_by_smu") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3959 Link: https://lists.freedesktop.org/archives/amd-gfx/2025-February/119827.html Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: Boyuan Zhang --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 6a9e26905edfc..7a22aef6e59c3 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -78,7 +78,7 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, int ret = 0; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; enum ip_power_state pwr_state = gate ? POWER_STATE_OFF : POWER_STATE_ON; - bool is_vcn = (block_type == AMD_IP_BLOCK_TYPE_UVD || block_type == AMD_IP_BLOCK_TYPE_VCN); + bool is_vcn = block_type == AMD_IP_BLOCK_TYPE_VCN; if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state && (!is_vcn || adev->vcn.num_vcn_inst == 1)) { -- GitLab From 9cf6b84b71adb97f3c19476ebb5a42228fad89b5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:12:31 -0400 Subject: [PATCH 0561/1585] bcachefs: CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS Incorrectly handled transaction restarts can be a source of heisenbugs; add a mode where we randomly inject them to shake them out. Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 7 +++++++ fs/bcachefs/btree_iter.c | 33 +++++++++++++++++++++++++++++++- fs/bcachefs/btree_iter.h | 12 ++++++++++++ fs/bcachefs/btree_trans_commit.c | 4 ++++ fs/bcachefs/btree_types.h | 3 +++ 5 files changed, 58 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index 85eea7a4dea30..fc7efd0a7525e 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -61,6 +61,13 @@ config BCACHEFS_DEBUG The resulting code will be significantly slower than normal; you probably shouldn't select this option unless you're a developer. +config BCACHEFS_INJECT_TRANSACTION_RESTARTS + bool "Randomly inject transaction restarts" + depends on BCACHEFS_DEBUG + help + Randomly inject transaction restarts in a few core paths - may have a + significant performance penalty + config BCACHEFS_TESTS bool "bcachefs unit and performance tests" depends on BCACHEFS_FS diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 5988219c69084..e32fce4fd2583 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2357,6 +2357,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en bch2_btree_iter_verify_entry_exit(iter); EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + if (iter->update_path) { bch2_path_put_nokeep(trans, iter->update_path, iter->flags & BTREE_ITER_intent); @@ -2622,6 +2628,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp bch2_btree_iter_verify_entry_exit(iter); EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); + int ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + while (1) { k = __bch2_btree_iter_peek_prev(iter, search_key); if (unlikely(!k.k)) @@ -2749,6 +2761,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) bch2_btree_iter_verify_entry_exit(iter); EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_is_extents) && unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { @@ -3106,6 +3124,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (ret) + return ERR_PTR(ret); + struct btree_transaction_stats *s = btree_trans_stats(trans); s->max_mem = max(s->max_mem, new_bytes); @@ -3163,7 +3185,8 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) if (old_bytes) { trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); - return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); + return ERR_PTR(btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); } out_change_top: p = trans->mem + trans->mem_top; @@ -3271,6 +3294,14 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->last_begin_ip = _RET_IP_; +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + if (trans->restarted) { + trans->restart_count_this_trans++; + } else { + trans->restart_count_this_trans = 0; + } +#endif + trans_set_locked(trans, false); if (trans->restarted) { diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index b9538e6e6d65e..8c16d9a3ec1d8 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -355,6 +355,18 @@ static int btree_trans_restart(struct btree_trans *trans, int err) return btree_trans_restart_ip(trans, err, _THIS_IP_); } +static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip) +{ +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) { + trace_and_count(trans->c, trans_restart_injected, trans, ip); + return btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_fault_inject, ip); + } +#endif + return 0; +} + bool bch2_btree_node_upgrade(struct btree_trans *, struct btree_path *, unsigned); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 2760dd9569ed9..c4f524b2ca9a0 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -999,6 +999,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) bch2_trans_verify_not_unlocked_or_in_restart(trans); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) + goto out_reset; + if (!trans->nr_updates && !trans->journal_entries_u64s) goto out_reset; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index a6f251eb41646..a09cbe9cd94f1 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -509,6 +509,9 @@ struct btree_trans { bool notrace_relock_fail:1; enum bch_errcode restarted:16; u32 restart_count; +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + u32 restart_count_this_trans; +#endif u64 last_begin_time; unsigned long last_begin_ip; -- GitLab From 531323a2efc3fbe20b540e3f41ecc94d68e74b76 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Thu, 13 Feb 2025 02:11:01 +0800 Subject: [PATCH 0562/1585] bcachefs: Pass _orig_restart_count to trans_was_restarted _orig_restart_count is unused now, according to the logic, trans_was_restarted should be using _orig_restart_count. Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 8c16d9a3ec1d8..b96157f3dc9c7 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -751,7 +751,7 @@ transaction_restart: \ if (!_ret2) \ bch2_trans_verify_not_restarted(_trans, _restart_count);\ \ - _ret2 ?: trans_was_restarted(_trans, _restart_count); \ + _ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \ }) #define for_each_btree_key_max_continue(_trans, _iter, \ -- GitLab From 406e445b3c6be65ab0ee961145e74bfd7ef6c9e1 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Wed, 12 Feb 2025 17:27:51 +0800 Subject: [PATCH 0563/1585] bcachefs: Reuse transaction bch2_nocow_write_convert_unwritten is already in transaction context: 00191 ========= TEST generic/648 00242 kernel BUG at fs/bcachefs/btree_iter.c:3332! 00242 Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP 00242 Modules linked in: 00242 CPU: 4 UID: 0 PID: 2593 Comm: fsstress Not tainted 6.13.0-rc3-ktest-g345af8f855b7 #14403 00242 Hardware name: linux,dummy-virt (DT) 00242 pstate: 60001005 (nZCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00242 pc : __bch2_trans_get+0x120/0x410 00242 lr : __bch2_trans_get+0xcc/0x410 00242 sp : ffffff80d89af600 00242 x29: ffffff80d89af600 x28: ffffff80ddb23000 x27: 00000000fffff705 00242 x26: ffffff80ddb23028 x25: ffffff80d8903fe0 x24: ffffff80ebb30168 00242 x23: ffffff80c8aeb500 x22: 000000000000005d x21: ffffff80d8904078 00242 x20: ffffff80d8900000 x19: ffffff80da9e8000 x18: 0000000000000000 00242 x17: 64747568735f6c61 x16: 6e72756f6a20726f x15: 0000000000000028 00242 x14: 0000000000000004 x13: 000000000000f787 x12: ffffffc081bbcdc8 00242 x11: 0000000000000000 x10: 0000000000000003 x9 : ffffffc08094efbc 00242 x8 : 000000001092c111 x7 : 000000000000000c x6 : ffffffc083c31fc4 00242 x5 : ffffffc083c31f28 x4 : ffffff80c8aeb500 x3 : ffffff80ebb30000 00242 x2 : 0000000000000001 x1 : 0000000000000a21 x0 : 000000000000028e 00242 Call trace: 00242 __bch2_trans_get+0x120/0x410 (P) 00242 bch2_inum_offset_err_msg+0x48/0xb0 00242 bch2_nocow_write_convert_unwritten+0x3d0/0x530 00242 bch2_nocow_write+0xeb0/0x1000 00242 __bch2_write+0x330/0x4e8 00242 bch2_write+0x1f0/0x530 00242 bch2_direct_write+0x530/0xc00 00242 bch2_write_iter+0x160/0xbe0 00242 vfs_write+0x1cc/0x360 00242 ksys_write+0x5c/0xf0 00242 __arm64_sys_write+0x20/0x30 00242 invoke_syscall.constprop.0+0x54/0xe8 00242 do_el0_svc+0x44/0xc0 00242 el0_svc+0x34/0xa0 00242 el0t_64_sync_handler+0x104/0x130 00242 el0t_64_sync+0x154/0x158 00242 Code: 6b01001f 54ffff01 79408460 3617fec0 (d4210000) 00242 ---[ end trace 0000000000000000 ]--- 00242 Kernel panic - not syncing: Oops - BUG: Fatal exception Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index dd508d93e9fc8..03892388832b6 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -411,6 +411,16 @@ void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) __bch2_write_op_error(out, op, op->pos.offset); } +static void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out, + struct bch_write_op *op, u64 offset) +{ + bch2_inum_offset_err_msg_trans(trans, out, + (subvol_inum) { op->subvol, op->pos.inode, }, + offset << 9); + prt_printf(out, "write error%s: ", + op->flags & BCH_WRITE_MOVE ? "(internal move)" : ""); +} + void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, enum bch_data_type type, const struct bkey_i *k, @@ -1193,7 +1203,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); struct printbuf buf = PRINTBUF; - __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); + bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k)); prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); printbuf_exit(&buf); -- GitLab From b35eb9128ebeec534eed1cefd6b9b1b7282cf5ba Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Jan 2025 11:55:22 -0500 Subject: [PATCH 0564/1585] drm/amdgpu/gfx9: manually control gfxoff for CS on RV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When mesa started using compute queues more often we started seeing additional hangs with compute queues. Disabling gfxoff seems to mitigate that. Manually control gfxoff and gfx pg with command submissions to avoid any issues related to gfxoff. KFD already does the same thing for these chips. v2: limit to compute v3: limit to APUs v4: limit to Raven/PCO v5: only update the compute ring_funcs v6: Disable GFX PG v7: adjust order Reviewed-by: Lijo Lazar Suggested-by: Błażej Szczygieł Suggested-by: Sergey Kovalenko Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3861 Link: https://lists.freedesktop.org/archives/amd-gfx/2025-January/119116.html Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 36 +++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fa572b40989e3..0dce4421418c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7437,6 +7437,38 @@ static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } +static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ip_block *gfx_block = + amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + + amdgpu_gfx_enforce_isolation_ring_begin_use(ring); + + /* Raven and PCO APUs seem to have stability issues + * with compute and gfxoff and gfx pg. Disable gfx pg during + * submission and allow again afterwards. + */ + if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) + gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE); +} + +static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ip_block *gfx_block = + amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + + /* Raven and PCO APUs seem to have stability issues + * with compute and gfxoff and gfx pg. Disable gfx pg during + * submission and allow again afterwards. + */ + if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) + gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE); + + amdgpu_gfx_enforce_isolation_ring_end_use(ring); +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -7613,8 +7645,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wave_limit = gfx_v9_0_emit_wave_limit, .reset = gfx_v9_0_reset_kcq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v9_0_ring_begin_use_compute, + .end_use = gfx_v9_0_ring_end_use_compute, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { -- GitLab From 55ed2b1b50d029dd7e49a35f6628ca64db6d75d8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 31 Jan 2025 13:53:40 -0500 Subject: [PATCH 0565/1585] drm/amdgpu: bump version for RV/PCO compute fix Bump the driver version for RV/PCO compute stability fix so mesa can use this check to enable compute queues on RV/PCO. Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index dce9323fb410c..95a05b03f799d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -120,9 +120,10 @@ * - 3.58.0 - Add GFX12 DCC support * - 3.59.0 - Cleared VRAM * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement) + * - 3.61.0 - Contains fix for RV/PCO compute queues */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 60 +#define KMS_DRIVER_MINOR 61 #define KMS_DRIVER_PATCHLEVEL 0 /* -- GitLab From a33f7f9660705fb2ecf3467b2c48965564f392ce Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Sun, 26 Jan 2025 17:21:10 +0800 Subject: [PATCH 0566/1585] amdkfd: properly free gang_ctx_bo when failed to init user queue The destructor of a gtt bo is declared as void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj); Which takes void** as the second parameter. GCC allows passing void* to the function because void* can be implicitly casted to any other types, so it can pass compiling. However, passing this void* parameter into the function's execution process(which expects void** and dereferencing void**) will result in errors. Signed-off-by: Zhu Lingshan Reviewed-by: Felix Kuehling Fixes: fb91065851cd ("drm/amdkfd: Refactor queue wptr_bo GART mapping") Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index bcddd989c7f39..bd36a75309e12 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -300,7 +300,7 @@ static int init_user_queue(struct process_queue_manager *pqm, return 0; free_gang_ctx_bo: - amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo); cleanup: uninit_queue(*q); *q = NULL; -- GitLab From a0a455b4bc7483ad60e8b8a50330c1e05bb7bfcf Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Feb 2025 14:28:51 +0800 Subject: [PATCH 0567/1585] drm/amdgpu: bail out when failed to load fw in psp_init_cap_microcode() In function psp_init_cap_microcode(), it should bail out when failed to load firmware, otherwise it may cause invalid memory access. Fixes: 07dbfc6b102e ("drm/amd: Use `amdgpu_ucode_*` helpers for PSP") Reviewed-by: Lijo Lazar Signed-off-by: Jiang Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index babe94ade2478..e5fc80ed06eae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3815,9 +3815,10 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name) if (err == -ENODEV) { dev_warn(adev->dev, "cap microcode does not exist, skip\n"); err = 0; - goto out; + } else { + dev_err(adev->dev, "fail to initialize cap microcode\n"); } - dev_err(adev->dev, "fail to initialize cap microcode\n"); + goto out; } info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP]; -- GitLab From d584198a6fe4c51f4aa88ad72f258f8961a0f11c Mon Sep 17 00:00:00 2001 From: Lancelot SIX Date: Tue, 28 Jan 2025 19:16:49 +0000 Subject: [PATCH 0568/1585] drm/amdkfd: Ensure consistent barrier state saved in gfx12 trap handler It is possible for some waves in a workgroup to finish their save sequence before the group leader has had time to capture the workgroup barrier state. When this happens, having those waves exit do impact the barrier state. As a consequence, the state captured by the group leader is invalid, and is eventually incorrectly restored. This patch proposes to have all waves in a workgroup wait for each other at the end of their save sequence (just before calling s_endpgm_saved). Signed-off-by: Lancelot SIX Reviewed-by: Jay Cornwall Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 3 ++- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 984f0e7050788..651660958e5b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -4121,7 +4121,8 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0x0000ffff, 0x8bfe7e7e, 0x8bea6a6a, 0xb97af804, 0xbe804ec2, 0xbf94fffe, - 0xbe804a6c, 0xbfb10000, + 0xbe804a6c, 0xbe804ec2, + 0xbf94fffe, 0xbfb10000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm index 1740e98c6719d..7b9d36e5fa437 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm @@ -1049,6 +1049,10 @@ L_SKIP_BARRIER_RESTORE: s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution L_END_PGM: + // Make sure that no wave of the workgroup can exit the trap handler + // before the workgroup barrier state is saved. + s_barrier_signal -2 + s_barrier_wait -2 s_endpgm_saved end -- GitLab From 1abb2648698bf10783d2236a6b4a7ca5e8021699 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Feb 2025 14:44:14 +0800 Subject: [PATCH 0569/1585] drm/amdgpu: avoid buffer overflow attach in smu_sys_set_pp_table() It malicious user provides a small pptable through sysfs and then a bigger pptable, it may cause buffer overflow attack in function smu_sys_set_pp_table(). Reviewed-by: Lijo Lazar Signed-off-by: Jiang Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8ca793c222ff2..ed9dac00ebfb1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -612,7 +612,8 @@ static int smu_sys_set_pp_table(void *handle, return -EIO; } - if (!smu_table->hardcode_pptable) { + if (!smu_table->hardcode_pptable || smu_table->power_play_table_size < size) { + kfree(smu_table->hardcode_pptable); smu_table->hardcode_pptable = kzalloc(size, GFP_KERNEL); if (!smu_table->hardcode_pptable) return -ENOMEM; -- GitLab From 15d6f74f03f84c5b8d032bb1be6b90af82e5b679 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 10 Feb 2025 10:24:55 -0300 Subject: [PATCH 0570/1585] MAINTAINERS: Add sctp headers to the general netdev entry All SCTP patches are picked up by netdev maintainers. Two headers were missing to be listed there. Reported-by: Thorsten Blum Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Simon Horman Link: https://patch.msgid.link/b3c2dc3a102eb89bd155abca2503ebd015f50ee0.1739193671.git.marcelo.leitner@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5f9535f4f1964..b182021f4906e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16508,6 +16508,7 @@ F: include/linux/netdev* F: include/linux/netlink.h F: include/linux/netpoll.h F: include/linux/rtnetlink.h +F: include/linux/sctp.h F: include/linux/seq_file_net.h F: include/linux/skbuff* F: include/net/ @@ -16524,6 +16525,7 @@ F: include/uapi/linux/netdev* F: include/uapi/linux/netlink.h F: include/uapi/linux/netlink_diag.h F: include/uapi/linux/rtnetlink.h +F: include/uapi/linux/sctp.h F: lib/net_utils.c F: lib/random32.c F: net/ -- GitLab From 78dafe1cf3afa02ed71084b350713b07e72a18fb Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 10 Feb 2025 13:15:00 +0100 Subject: [PATCH 0571/1585] vsock: Orphan socket after transport release During socket release, sock_orphan() is called without considering that it sets sk->sk_wq to NULL. Later, if SO_LINGER is enabled, this leads to a null pointer dereferenced in virtio_transport_wait_close(). Orphan the socket only after transport release. Partially reverts the 'Fixes:' commit. KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] lock_acquire+0x19e/0x500 _raw_spin_lock_irqsave+0x47/0x70 add_wait_queue+0x46/0x230 virtio_transport_release+0x4e7/0x7f0 __vsock_release+0xfd/0x490 vsock_release+0x90/0x120 __sock_release+0xa3/0x250 sock_close+0x14/0x20 __fput+0x35e/0xa90 __x64_sys_close+0x78/0xd0 do_syscall_64+0x93/0x1b0 entry_SYSCALL_64_after_hwframe+0x76/0x7e Reported-by: syzbot+9d55b199192a4be7d02c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9d55b199192a4be7d02c Fixes: fcdd2242c023 ("vsock: Keep the binding until socket destruction") Tested-by: Luigi Leonardi Reviewed-by: Luigi Leonardi Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20250210-vsock-linger-nullderef-v3-1-ef6244d02b54@rbox.co Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 075695173648d..53a081d49d28a 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -824,13 +824,19 @@ static void __vsock_release(struct sock *sk, int level) */ lock_sock_nested(sk, level); - sock_orphan(sk); + /* Indicate to vsock_remove_sock() that the socket is being released and + * can be removed from the bound_table. Unlike transport reassignment + * case, where the socket must remain bound despite vsock_remove_sock() + * being called from the transport release() callback. + */ + sock_set_flag(sk, SOCK_DEAD); if (vsk->transport) vsk->transport->release(vsk); else if (sock_type_connectible(sk->sk_type)) vsock_remove_sock(vsk); + sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; skb_queue_purge(&sk->sk_receive_queue); -- GitLab From 440c9d488705366b00372ea7213af69827a6c7af Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 10 Feb 2025 13:15:01 +0100 Subject: [PATCH 0572/1585] vsock/test: Add test for SO_LINGER null ptr deref Explicitly close() a TCP_ESTABLISHED (connectible) socket with SO_LINGER enabled. As for now, test does not verify if close() actually lingers. On an unpatched machine, may trigger a null pointer dereference. Tested-by: Luigi Leonardi Reviewed-by: Luigi Leonardi Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20250210-vsock-linger-nullderef-v3-2-ef6244d02b54@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 41 ++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index dfff8b288265f..d0f6d253ac72d 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1788,6 +1788,42 @@ static void test_stream_connect_retry_server(const struct test_opts *opts) close(fd); } +static void test_stream_linger_client(const struct test_opts *opts) +{ + struct linger optval = { + .l_onoff = 1, + .l_linger = 1 + }; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) { + perror("setsockopt(SO_LINGER)"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_linger_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1943,6 +1979,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_connect_retry_client, .run_server = test_stream_connect_retry_server, }, + { + .name = "SOCK_STREAM SO_LINGER null-ptr-deref", + .run_client = test_stream_linger_client, + .run_server = test_stream_linger_server, + }, {}, }; -- GitLab From edb1942542bc538707cea221e9c7923a6270465f Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 13 Feb 2025 12:02:35 +0800 Subject: [PATCH 0573/1585] LoongArch: Fix idle VS timer enqueue LoongArch re-enables interrupts on its idle routine and performs a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep. The IRQs firing between the check and the idle instruction may set the TIF_NEED_RESCHED flag. In order to deal with such a race, IRQs interrupting __arch_cpu_idle() rollback their return address to the beginning of __arch_cpu_idle() so that TIF_NEED_RESCHED is checked again before going back to sleep. However idle IRQs can also queue timers that may require a tick reprogramming through a new generic idle loop iteration but those timers would go unnoticed here because __arch_cpu_idle() only checks TIF_NEED_RESCHED. It doesn't check for pending timers. Fix this with fast-forwarding idle IRQs return address to the end of the idle routine instead of the beginning, so that the generic idle loop can handle both TIF_NEED_RESCHED and pending timers. Fixes: 0603839b18f4 ("LoongArch: Add exception/interrupt handling") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Frederic Weisbecker Signed-off-by: Marco Crivellari Signed-off-by: Huacai Chen --- arch/loongarch/kernel/genex.S | 28 +++++++++++++++------------- arch/loongarch/kernel/idle.c | 3 +-- arch/loongarch/kernel/reset.c | 6 +++--- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 86d5d90ebefe5..4f09121417818 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -18,16 +18,19 @@ .align 5 SYM_FUNC_START(__arch_cpu_idle) - /* start of rollback region */ - LONG_L t0, tp, TI_FLAGS - nop - andi t0, t0, _TIF_NEED_RESCHED - bnez t0, 1f - nop - nop - nop + /* start of idle interrupt region */ + ori t0, zero, CSR_CRMD_IE + /* idle instruction needs irq enabled */ + csrxchg t0, t0, LOONGARCH_CSR_CRMD + /* + * If an interrupt lands here; between enabling interrupts above and + * going idle on the next instruction, we must *NOT* go idle since the + * interrupt could have set TIF_NEED_RESCHED or caused an timer to need + * reprogramming. Fall through -- see handle_vint() below -- and have + * the idle loop take care of things. + */ idle 0 - /* end of rollback region */ + /* end of idle interrupt region */ 1: jr ra SYM_FUNC_END(__arch_cpu_idle) @@ -35,11 +38,10 @@ SYM_CODE_START(handle_vint) UNWIND_HINT_UNDEFINED BACKUP_T0T1 SAVE_ALL - la_abs t1, __arch_cpu_idle + la_abs t1, 1b LONG_L t0, sp, PT_ERA - /* 32 byte rollback region */ - ori t0, t0, 0x1f - xori t0, t0, 0x1f + /* 3 instructions idle interrupt region */ + ori t0, t0, 0b1100 bne t0, t1, 1f LONG_S t0, sp, PT_ERA 1: move a0, sp diff --git a/arch/loongarch/kernel/idle.c b/arch/loongarch/kernel/idle.c index 0b5dd2faeb90b..54b247d8cdb69 100644 --- a/arch/loongarch/kernel/idle.c +++ b/arch/loongarch/kernel/idle.c @@ -11,7 +11,6 @@ void __cpuidle arch_cpu_idle(void) { - raw_local_irq_enable(); - __arch_cpu_idle(); /* idle instruction needs irq enabled */ + __arch_cpu_idle(); raw_local_irq_disable(); } diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index 1ef8c63835351..de8fa5a8a825c 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -33,7 +33,7 @@ void machine_halt(void) console_flush_on_panic(CONSOLE_FLUSH_PENDING); while (true) { - __arch_cpu_idle(); + __asm__ __volatile__("idle 0" : : : "memory"); } } @@ -53,7 +53,7 @@ void machine_power_off(void) #endif while (true) { - __arch_cpu_idle(); + __asm__ __volatile__("idle 0" : : : "memory"); } } @@ -74,6 +74,6 @@ void machine_restart(char *command) acpi_reboot(); while (true) { - __arch_cpu_idle(); + __asm__ __volatile__("idle 0" : : : "memory"); } } -- GitLab From 619b52777a4972bdb6ddf86ac54c6f68a47b51c4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 13 Feb 2025 12:02:35 +0800 Subject: [PATCH 0574/1585] LoongArch: Fix kernel_page_present() for KPRANGE/XKPRANGE Now kernel_page_present() always return true for KPRANGE/XKPRANGE addresses, this isn't correct because hibernation (ACPI S4) use it to distinguish whether a page is saveable. If all KPRANGE/XKPRANGE addresses are considered as saveable, then reserved memory such as EFI_RUNTIME_SERVICES_CODE / EFI_RUNTIME_SERVICES_DATA will also be saved and restored. Fix this by returning true only if the KPRANGE/XKPRANGE address is in memblock.memory. Signed-off-by: Huacai Chen --- arch/loongarch/mm/pageattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c index bf86782484440..99165903908a4 100644 --- a/arch/loongarch/mm/pageattr.c +++ b/arch/loongarch/mm/pageattr.c @@ -3,6 +3,7 @@ * Copyright (C) 2024 Loongson Technology Corporation Limited */ +#include #include #include #include @@ -167,7 +168,7 @@ bool kernel_page_present(struct page *page) unsigned long addr = (unsigned long)page_address(page); if (addr < vm_map_base) - return true; + return memblock_is_memory(__pa(addr)); pgd = pgd_offset_k(addr); if (pgd_none(pgdp_get(pgd))) -- GitLab From 03a99d16e64fad41c8d39700bef9b0ac9c4e148b Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Thu, 13 Feb 2025 12:02:35 +0800 Subject: [PATCH 0575/1585] LoongArch: Use str_yes_no() helper function for /proc/cpuinfo Remove hard-coded strings by using the str_yes_no() helper function. Similar to commit c4a0a4a45a45 ("MIPS: kernel: proc: Use str_yes_no() helper function"). Co-developed-by: Wentao Guan Signed-off-by: Wentao Guan Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/proc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 6ce46d92f1f19..56a230238c9cf 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -91,10 +91,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_lbt_mips) seq_printf(m, " lbt_mips"); seq_printf(m, "\n"); - seq_printf(m, "Hardware Watchpoint\t: %s", - cpu_has_watch ? "yes, " : "no\n"); + seq_printf(m, "Hardware Watchpoint\t: %s", str_yes_no(cpu_has_watch)); if (cpu_has_watch) { - seq_printf(m, "iwatch count: %d, dwatch count: %d\n", + seq_printf(m, ", iwatch count: %d, dwatch count: %d", cpu_data[n].watch_ireg_count, cpu_data[n].watch_dreg_count); } @@ -104,7 +103,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) raw_notifier_call_chain(&proc_cpuinfo_chain, 0, &proc_cpuinfo_notifier_args); - seq_printf(m, "\n"); + seq_printf(m, "\n\n"); return 0; } -- GitLab From 6b72cd9ef062702390fc96c469beea1729a5dffe Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Thu, 13 Feb 2025 12:02:40 +0800 Subject: [PATCH 0576/1585] LoongArch: Remove the deprecated notifier hook mechanism The notifier hook mechanism in proc and cpuinfo is actually unnecessary for LoongArch because it's not used anywhere. It was originally added to the MIPS code in commit d6d3c9afaab4 ("MIPS: MT: proc: Add support for printing VPE and TC ids"), and LoongArch then inherited it. But as the kernel code stands now, this notifier hook mechanism doesn't really make sense for either LoongArch or MIPS. In addition, the seq_file forward declaration needs to be moved to its proper place, as only the show_ipi_list() function in smp.c requires it. Co-developed-by: Wentao Guan Signed-off-by: Wentao Guan Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-info.h | 21 --------------------- arch/loongarch/include/asm/smp.h | 2 ++ arch/loongarch/kernel/proc.c | 22 ---------------------- 3 files changed, 2 insertions(+), 43 deletions(-) diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index 35e0a230a4849..7f5bc0ad9d505 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -76,27 +76,6 @@ extern const char *__cpu_full_name[]; #define cpu_family_string() __cpu_family[raw_smp_processor_id()] #define cpu_full_name_string() __cpu_full_name[raw_smp_processor_id()] -struct seq_file; -struct notifier_block; - -extern int register_proc_cpuinfo_notifier(struct notifier_block *nb); -extern int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v); - -#define proc_cpuinfo_notifier(fn, pri) \ -({ \ - static struct notifier_block fn##_nb = { \ - .notifier_call = fn, \ - .priority = pri \ - }; \ - \ - register_proc_cpuinfo_notifier(&fn##_nb); \ -}) - -struct proc_cpuinfo_notifier_args { - struct seq_file *m; - unsigned long n; -}; - static inline bool cpus_are_siblings(int cpua, int cpub) { struct cpuinfo_loongarch *infoa = &cpu_data[cpua]; diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 3383c9d24e942..b87d1d5e58905 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -77,6 +77,8 @@ extern int __cpu_logical_map[NR_CPUS]; #define SMP_IRQ_WORK BIT(ACTION_IRQ_WORK) #define SMP_CLEAR_VECTOR BIT(ACTION_CLEAR_VECTOR) +struct seq_file; + struct secondary_data { unsigned long stack; unsigned long thread_info; diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 56a230238c9cf..cea30768ae92a 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -13,28 +13,12 @@ #include #include -/* - * No lock; only written during early bootup by CPU 0. - */ -static RAW_NOTIFIER_HEAD(proc_cpuinfo_chain); - -int __ref register_proc_cpuinfo_notifier(struct notifier_block *nb) -{ - return raw_notifier_chain_register(&proc_cpuinfo_chain, nb); -} - -int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v) -{ - return raw_notifier_call_chain(&proc_cpuinfo_chain, val, v); -} - static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; unsigned int isa = cpu_data[n].isa_level; unsigned int version = cpu_data[n].processor_id & 0xff; unsigned int fp_version = cpu_data[n].fpu_vers; - struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args; #ifdef CONFIG_SMP if (!cpu_online(n)) @@ -97,12 +81,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) cpu_data[n].watch_ireg_count, cpu_data[n].watch_dreg_count); } - proc_cpuinfo_notifier_args.m = m; - proc_cpuinfo_notifier_args.n = n; - - raw_notifier_call_chain(&proc_cpuinfo_chain, 0, - &proc_cpuinfo_notifier_args); - seq_printf(m, "\n\n"); return 0; -- GitLab From 6287f1a8c16138c2ec750953e35039634018c84a Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Thu, 13 Feb 2025 12:02:40 +0800 Subject: [PATCH 0577/1585] LoongArch: csum: Fix OoB access in IP checksum code for negative lengths Commit 69e3a6aa6be2 ("LoongArch: Add checksum optimization for 64-bit system") would cause an undefined shift and an out-of-bounds read. Commit 8bd795fedb84 ("arm64: csum: Fix OoB access in IP checksum code for negative lengths") fixes the same issue on ARM64. Fixes: 69e3a6aa6be2 ("LoongArch: Add checksum optimization for 64-bit system") Co-developed-by: Wentao Guan Signed-off-by: Wentao Guan Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/lib/csum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c index a5e84b403c3b3..df309ae4045de 100644 --- a/arch/loongarch/lib/csum.c +++ b/arch/loongarch/lib/csum.c @@ -25,7 +25,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len) const u64 *ptr; u64 data, sum64 = 0; - if (unlikely(len == 0)) + if (unlikely(len <= 0)) return 0; offset = (unsigned long)buff & 7; -- GitLab From bdb13252e5d1518823b81f458d9975c85d5240c2 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 13 Feb 2025 12:02:56 +0800 Subject: [PATCH 0578/1585] LoongArch: KVM: Fix typo issue about GCFG feature detection This is typo issue and misusage about GCFG feature macro. The code is wrong, only that it does not cause obvious problem since GCFG is set again on vCPU context switch. Fixes: 0d0df3c99d4f ("LoongArch: KVM: Implement kvm hardware enable, disable interface") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index bf9268bf26d5b..f6d3242b9234a 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -303,9 +303,9 @@ int kvm_arch_enable_virtualization_cpu(void) * TOE=0: Trap on Exception. * TIT=0: Trap on Timer. */ - if (env & CSR_GCFG_GCIP_ALL) + if (env & CSR_GCFG_GCIP_SECURE) gcfg |= CSR_GCFG_GCI_SECURE; - if (env & CSR_GCFG_MATC_ROOT) + if (env & CSR_GCFG_MATP_ROOT) gcfg |= CSR_GCFG_MATC_ROOT; write_csr_gcfg(gcfg); -- GitLab From d8cc4fee3f8ad21f83326ec8a6d200e04c8f0a00 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 13 Feb 2025 12:02:56 +0800 Subject: [PATCH 0579/1585] LoongArch: KVM: Remove duplicated cache attribute setting Cache attribute comes from GPA->HPA secondary mmu page table and is configured when kvm is enabled. It is the same for all VMs, so remove duplicated cache attribute setting on vCPU context switch. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index fb72095c8077e..20f941af3e9ea 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1548,9 +1548,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Restore timer state regardless */ kvm_restore_timer(vcpu); - - /* Control guest page CCA attribute */ - change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); /* Restore hardware PMU CSRs */ -- GitLab From 3011b29ec5a33ec16502e687c4264d57416a8b1f Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 13 Feb 2025 12:02:56 +0800 Subject: [PATCH 0580/1585] LoongArch: KVM: Set host with kernel mode when switch to VM mode PRMD register is only meaningful on the beginning stage of exception entry, and it is overwritten with nested irq or exception. When CPU runs in VM mode, interrupt need be enabled on host. And the mode for host had better be kernel mode rather than random or user mode. When VM is running, the running mode with top command comes from CRMD register, and running mode should be kernel mode since kernel function is executing with perf command. It needs be consistent with both top and perf command. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/switch.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index 0c292f8184927..1be185e948072 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -85,7 +85,7 @@ * Guest CRMD comes from separate GCSR_CRMD register */ ori t0, zero, CSR_PRMD_PIE - csrxchg t0, t0, LOONGARCH_CSR_PRMD + csrwr t0, LOONGARCH_CSR_PRMD /* Set PVM bit to setup ertn to guest context */ ori t0, zero, CSR_GSTAT_PVM -- GitLab From 5db843258de1e4e6b1ef1cbd1797923c9e3de548 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 10 Feb 2025 16:52:15 +0200 Subject: [PATCH 0581/1585] net: ethernet: ti: am65-cpsw: fix memleak in certain XDP cases If the XDP program doesn't result in XDP_PASS then we leak the memory allocated by am65_cpsw_build_skb(). It is pointless to allocate SKB memory before running the XDP program as we would be wasting CPU cycles for cases other than XDP_PASS. Move the SKB allocation after evaluating the XDP program result. This fixes the memleak. A performance boost is seen for XDP_DROP test. XDP_DROP test: Before: 460256 rx/s 0 err/s After: 784130 rx/s 0 err/s Fixes: 8acacc40f733 ("net: ethernet: ti: am65-cpsw: Add minimal XDP support") Signed-off-by: Roger Quadros Link: https://patch.msgid.link/20250210-am65-cpsw-xdp-fixes-v1-1-ec6b1f7f1aca@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index b663271e79f72..021c5a4df8fd4 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -842,7 +842,8 @@ static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) static struct sk_buff *am65_cpsw_build_skb(void *page_addr, struct net_device *ndev, - unsigned int len) + unsigned int len, + unsigned int headroom) { struct sk_buff *skb; @@ -852,7 +853,7 @@ static struct sk_buff *am65_cpsw_build_skb(void *page_addr, if (unlikely(!skb)) return NULL; - skb_reserve(skb, AM65_CPSW_HEADROOM); + skb_reserve(skb, headroom); skb->dev = ndev; return skb; @@ -1315,16 +1316,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, dev_dbg(dev, "%s rx csum_info:%#x\n", __func__, csum_info); dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - skb = am65_cpsw_build_skb(page_addr, ndev, - AM65_CPSW_MAX_PACKET_SIZE); - if (unlikely(!skb)) { - new_page = page; - goto requeue; - } - if (port->xdp_prog) { xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]); xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM, @@ -1334,9 +1327,16 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, if (*xdp_state != AM65_CPSW_XDP_PASS) goto allocate; - /* Compute additional headroom to be reserved */ - headroom = (xdp.data - xdp.data_hard_start) - skb_headroom(skb); - skb_reserve(skb, headroom); + headroom = xdp.data - xdp.data_hard_start; + } else { + headroom = AM65_CPSW_HEADROOM; + } + + skb = am65_cpsw_build_skb(page_addr, ndev, + AM65_CPSW_MAX_PACKET_SIZE, headroom); + if (unlikely(!skb)) { + new_page = page; + goto requeue; } ndev_priv = netdev_priv(ndev); -- GitLab From 8a9f82ff15da03a6804cdd6557fb36ff71c0924f Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 10 Feb 2025 16:52:16 +0200 Subject: [PATCH 0582/1585] net: ethernet: ti: am65-cpsw: fix RX & TX statistics for XDP_TX case For successful XDP_TX and XDP_REDIRECT cases, the packet was received successfully so update RX statistics. Use original received packet length for that. TX packets statistics are incremented on TX completion so don't update it while TX queueing. If xdp_convert_buff_to_frame() fails, increment tx_dropped. Signed-off-by: Roger Quadros Fixes: 8acacc40f733 ("net: ethernet: ti: am65-cpsw: Add minimal XDP support") Link: https://patch.msgid.link/20250210-am65-cpsw-xdp-fixes-v1-2-ec6b1f7f1aca@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 021c5a4df8fd4..38f3be310928d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1170,9 +1170,11 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, struct xdp_frame *xdpf; struct bpf_prog *prog; struct page *page; + int pkt_len; u32 act; int err; + pkt_len = *len; prog = READ_ONCE(port->xdp_prog); if (!prog) return AM65_CPSW_XDP_PASS; @@ -1190,8 +1192,10 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) + if (unlikely(!xdpf)) { + ndev->stats.tx_dropped++; goto drop; + } __netif_tx_lock(netif_txq, cpu); err = am65_cpsw_xdp_tx_frame(ndev, tx_chn, xdpf, @@ -1200,14 +1204,14 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, if (err) goto drop; - dev_sw_netstats_tx_add(ndev, 1, *len); + dev_sw_netstats_rx_add(ndev, pkt_len); ret = AM65_CPSW_XDP_CONSUMED; goto out; case XDP_REDIRECT: if (unlikely(xdp_do_redirect(ndev, xdp, prog))) goto drop; - dev_sw_netstats_rx_add(ndev, *len); + dev_sw_netstats_rx_add(ndev, pkt_len); ret = AM65_CPSW_XDP_REDIRECT; goto out; default: -- GitLab From 4542536f664f752db5feba2c5998b165933c34f2 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 10 Feb 2025 16:52:17 +0200 Subject: [PATCH 0583/1585] net: ethernet: ti: am65_cpsw: fix tx_cleanup for XDP case For XDP transmit case, swdata doesn't contain SKB but the XDP Frame. Infer the correct swdata based on buffer type and return the XDP Frame for XDP transmit case. Signed-off-by: Roger Quadros Fixes: 8acacc40f733 ("net: ethernet: ti: am65-cpsw: Add minimal XDP support") Link: https://patch.msgid.link/20250210-am65-cpsw-xdp-fixes-v1-3-ec6b1f7f1aca@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 38f3be310928d..2806238629f82 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -828,16 +828,24 @@ static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) { struct am65_cpsw_tx_chn *tx_chn = data; + enum am65_cpsw_tx_buf_type buf_type; struct cppi5_host_desc_t *desc_tx; + struct xdp_frame *xdpf; struct sk_buff *skb; void **swdata; desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); - skb = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); + buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); + if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { + skb = *(swdata); + dev_kfree_skb_any(skb); + } else { + xdpf = *(swdata); + xdp_return_frame(xdpf); + } - dev_kfree_skb_any(skb); + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); } static struct sk_buff *am65_cpsw_build_skb(void *page_addr, -- GitLab From e00a2e5d485faf53c7a24b9d1b575a642227947f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 12 Feb 2025 18:18:51 +0200 Subject: [PATCH 0584/1585] drm: Fix DSC BPP increment decoding Starting with DPCD version 2.0 bits 6:3 of the DP_DSC_BITS_PER_PIXEL_INC DPCD register contains the NativeYCbCr422_MAX_bpp_DELTA field, which can be non-zero as opposed to earlier DPCD versions, hence decoding the bit_per_pixel increment value at bits 2:0 in the same register requires applying a mask, do so. Cc: Ankit Nautiyal Fixes: 0c2287c96521 ("drm/display/dp: Add helper function to get DSC bpp precision") Reviewed-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20250212161851.4007005-1-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_helper.c | 2 +- include/drm/display/drm_dp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index da3c8521a7fa7..61c7c2c588c6e 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2544,7 +2544,7 @@ u8 drm_dp_dsc_sink_bpp_incr(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]) { u8 bpp_increment_dpcd = dsc_dpcd[DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT]; - switch (bpp_increment_dpcd) { + switch (bpp_increment_dpcd & DP_DSC_BITS_PER_PIXEL_MASK) { case DP_DSC_BITS_PER_PIXEL_1_16: return 16; case DP_DSC_BITS_PER_PIXEL_1_8: diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index a6f8b098c56f1..3bd9f482f0c3e 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -359,6 +359,7 @@ # define DP_DSC_BITS_PER_PIXEL_1_4 0x2 # define DP_DSC_BITS_PER_PIXEL_1_2 0x3 # define DP_DSC_BITS_PER_PIXEL_1_1 0x4 +# define DP_DSC_BITS_PER_PIXEL_MASK 0x7 #define DP_PSR_SUPPORT 0x070 /* XXX 1.2? */ # define DP_PSR_IS_SUPPORTED 1 -- GitLab From d923782b041218ef3804b2fed87619b5b1a497f3 Mon Sep 17 00:00:00 2001 From: Beata Michalska Date: Fri, 31 Jan 2025 15:58:42 +0000 Subject: [PATCH 0585/1585] arm64: amu: Delay allocating cpumask for AMU FIE support For the time being, the amu_fie_cpus cpumask is being exclusively used by the AMU-related internals of FIE support and is guaranteed to be valid on every access currently made. Still the mask is not being invalidated on one of the error handling code paths, which leaves a soft spot with theoretical risk of UAF for CPUMASK_OFFSTACK cases. To make things sound, delay allocating said cpumask (for CPUMASK_OFFSTACK) avoiding otherwise nasty sanitising case failing to register the cpufreq policy notifications. Signed-off-by: Beata Michalska Reviewed-by: Prasanna Kumar T S M Reviewed-by: Sumit Gupta Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20250131155842.3839098-1-beata.michalska@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 1a2c72f3e7f80..cb180684d10d5 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -194,12 +194,19 @@ static void amu_fie_setup(const struct cpumask *cpus) int cpu; /* We are already set since the last insmod of cpufreq driver */ - if (unlikely(cpumask_subset(cpus, amu_fie_cpus))) + if (cpumask_available(amu_fie_cpus) && + unlikely(cpumask_subset(cpus, amu_fie_cpus))) return; - for_each_cpu(cpu, cpus) { + for_each_cpu(cpu, cpus) if (!freq_counters_valid(cpu)) return; + + if (!cpumask_available(amu_fie_cpus) && + !zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) { + WARN_ONCE(1, "Failed to allocate FIE cpumask for CPUs[%*pbl]\n", + cpumask_pr_args(cpus)); + return; } cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus); @@ -237,17 +244,8 @@ static struct notifier_block init_amu_fie_notifier = { static int __init init_amu_fie(void) { - int ret; - - if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) - return -ENOMEM; - - ret = cpufreq_register_notifier(&init_amu_fie_notifier, + return cpufreq_register_notifier(&init_amu_fie_notifier, CPUFREQ_POLICY_NOTIFIER); - if (ret) - free_cpumask_var(amu_fie_cpus); - - return ret; } core_initcall(init_amu_fie); -- GitLab From f818227a2f3d1d4f26469347e428323d61cc83f0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 28 Jan 2025 00:17:49 +0000 Subject: [PATCH 0586/1585] ACPI: GTDT: Relax sanity checking on Platform Timers array count Perhaps unsurprisingly there are some platforms where the GTDT isn't quite right and the Platforms Timer array overflows the length of the overall table. While the recently-added sanity checking isn't wrong, it makes it impossible to boot the kernel on offending platforms. Try to hobble along and limit the Platform Timer count to the bounds of the table. Cc: Marc Zyngier Cc: Lorenzo Pieralisi Cc: Zheng Zengkai Cc: stable@vger.kernel.org Fixes: 263e22d6bd1f ("ACPI: GTDT: Tighten the check for the array of platform timer structures") Signed-off-by: Oliver Upton Acked-by: Marc Zyngier Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20250128001749.3132656-1-oliver.upton@linux.dev Signed-off-by: Will Deacon --- drivers/acpi/arm64/gtdt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index 3561553eff8b5..70f8290b659de 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -163,7 +163,7 @@ int __init acpi_gtdt_init(struct acpi_table_header *table, { void *platform_timer; struct acpi_table_gtdt *gtdt; - int cnt = 0; + u32 cnt = 0; gtdt = container_of(table, struct acpi_table_gtdt, header); acpi_gtdt_desc.gtdt = gtdt; @@ -188,13 +188,17 @@ int __init acpi_gtdt_init(struct acpi_table_header *table, cnt++; if (cnt != gtdt->platform_timer_count) { + cnt = min(cnt, gtdt->platform_timer_count); + pr_err(FW_BUG "limiting Platform Timer count to %d\n", cnt); + } + + if (!cnt) { acpi_gtdt_desc.platform_timer = NULL; - pr_err(FW_BUG "invalid timer data.\n"); - return -EINVAL; + return 0; } if (platform_timer_count) - *platform_timer_count = gtdt->platform_timer_count; + *platform_timer_count = cnt; return 0; } -- GitLab From a4cc8494f1d853a0945d2a655b4891935d717355 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Feb 2025 00:30:42 +0000 Subject: [PATCH 0587/1585] arm64: Add missing registrations of hwcaps Commit 819935464cb2 ("arm64/hwcap: Describe 2024 dpISA extensions to userspace") added definitions for HWCAP_FPRCVT, HWCAP_F8MM8 and HWCAP_F8MM4 but did not include the crucial registration in arm64_elf_hwcaps. Add it. Fixes: 819935464cb2 ("arm64/hwcap: Describe 2024 dpISA extensions to userspace") Reported-by: Mark Rutland Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250212-arm64-fix-2024-dpisa-v2-1-67a1c11d6001@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f0910f20fbf8c..d561cf3b8ac7b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3091,6 +3091,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM), HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG), + HWCAP_CAP(ID_AA64ISAR3_EL1, FPRCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_FPRCVT), HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD), @@ -3190,6 +3191,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4), HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM8), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM4), HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3), HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2), #ifdef CONFIG_ARM64_POE -- GitLab From 571b69f2f9b1ec7cf7d0e9b79e52115a87a869c4 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 13 Feb 2025 15:05:18 +0800 Subject: [PATCH 0588/1585] ASoC: imx-audmix: remove cpu_mclk which is from cpu dai device When defer probe happens, there may be below error: platform 59820000.sai: Resources present before probing The cpu_mclk clock is from the cpu dai device, if it is not released, then the cpu dai device probe will fail for the second time. The cpu_mclk is used to get rate for rate constraint, rate constraint may be specific for each platform, which is not necessary for machine driver, so remove it. Fixes: b86ef5367761 ("ASoC: fsl: Add Audio Mixer machine driver") Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20250213070518.547375-1-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-audmix.c | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c index 231400661c906..50ecc5f51100e 100644 --- a/sound/soc/fsl/imx-audmix.c +++ b/sound/soc/fsl/imx-audmix.c @@ -23,7 +23,6 @@ struct imx_audmix { struct snd_soc_card card; struct platform_device *audmix_pdev; struct platform_device *out_pdev; - struct clk *cpu_mclk; int num_dai; struct snd_soc_dai_link *dai; int num_dai_conf; @@ -32,34 +31,11 @@ struct imx_audmix { struct snd_soc_dapm_route *dapm_routes; }; -static const u32 imx_audmix_rates[] = { - 8000, 12000, 16000, 24000, 32000, 48000, 64000, 96000, -}; - -static const struct snd_pcm_hw_constraint_list imx_audmix_rate_constraints = { - .count = ARRAY_SIZE(imx_audmix_rates), - .list = imx_audmix_rates, -}; - static int imx_audmix_fe_startup(struct snd_pcm_substream *substream) { - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); - struct imx_audmix *priv = snd_soc_card_get_drvdata(rtd->card); struct snd_pcm_runtime *runtime = substream->runtime; - struct device *dev = rtd->card->dev; - unsigned long clk_rate = clk_get_rate(priv->cpu_mclk); int ret; - if (clk_rate % 24576000 == 0) { - ret = snd_pcm_hw_constraint_list(runtime, 0, - SNDRV_PCM_HW_PARAM_RATE, - &imx_audmix_rate_constraints); - if (ret < 0) - return ret; - } else { - dev_warn(dev, "mclk may be not supported %lu\n", clk_rate); - } - ret = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_CHANNELS, 1, 8); if (ret < 0) @@ -323,13 +299,6 @@ static int imx_audmix_probe(struct platform_device *pdev) } put_device(&cpu_pdev->dev); - priv->cpu_mclk = devm_clk_get(&cpu_pdev->dev, "mclk1"); - if (IS_ERR(priv->cpu_mclk)) { - ret = PTR_ERR(priv->cpu_mclk); - dev_err(&cpu_pdev->dev, "failed to get DAI mclk1: %d\n", ret); - return ret; - } - priv->audmix_pdev = audmix_pdev; priv->out_pdev = cpu_pdev; -- GitLab From ed975485a13d1f6080218aa71c29425ba2dfb332 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 11 Feb 2025 18:22:30 +0000 Subject: [PATCH 0589/1585] MIPS: Export syscall stack arguments properly for remote use We have several places across the kernel where we want to access another task's syscall arguments, such as ptrace(2), seccomp(2), etc., by making a call to syscall_get_arguments(). This works for register arguments right away by accessing the task's `regs' member of `struct pt_regs', however for stack arguments seen with 32-bit/o32 kernels things are more complicated. Technically they ought to be obtained from the user stack with calls to an access_remote_vm(), but we have an easier way available already. So as to be able to access syscall stack arguments as regular function arguments following the MIPS calling convention we copy them over from the user stack to the kernel stack in arch/mips/kernel/scall32-o32.S, in handle_sys(), to the current stack frame's outgoing argument space at the top of the stack, which is where the handler called expects to see its incoming arguments. This area is also pointed at by the `pt_regs' pointer obtained by task_pt_regs(). Make the o32 stack argument space a proper member of `struct pt_regs' then, by renaming the existing member from `pad0' to `args' and using generated offsets to access the space. No functional change though. With the change in place the o32 kernel stack frame layout at the entry to a syscall handler invoked by handle_sys() is therefore as follows: $sp + 68 -> | ... | <- pt_regs.regs[9] +---------------------+ $sp + 64 -> | $t0 | <- pt_regs.regs[8] +---------------------+ $sp + 60 -> | $a3/argument #4 | <- pt_regs.regs[7] +---------------------+ $sp + 56 -> | $a2/argument #3 | <- pt_regs.regs[6] +---------------------+ $sp + 52 -> | $a1/argument #2 | <- pt_regs.regs[5] +---------------------+ $sp + 48 -> | $a0/argument #1 | <- pt_regs.regs[4] +---------------------+ $sp + 44 -> | $v1 | <- pt_regs.regs[3] +---------------------+ $sp + 40 -> | $v0 | <- pt_regs.regs[2] +---------------------+ $sp + 36 -> | $at | <- pt_regs.regs[1] +---------------------+ $sp + 32 -> | $zero | <- pt_regs.regs[0] +---------------------+ $sp + 28 -> | stack argument #8 | <- pt_regs.args[7] +---------------------+ $sp + 24 -> | stack argument #7 | <- pt_regs.args[6] +---------------------+ $sp + 20 -> | stack argument #6 | <- pt_regs.args[5] +---------------------+ $sp + 16 -> | stack argument #5 | <- pt_regs.args[4] +---------------------+ $sp + 12 -> | psABI space for $a3 | <- pt_regs.args[3] +---------------------+ $sp + 8 -> | psABI space for $a2 | <- pt_regs.args[2] +---------------------+ $sp + 4 -> | psABI space for $a1 | <- pt_regs.args[1] +---------------------+ $sp + 0 -> | psABI space for $a0 | <- pt_regs.args[0] +---------------------+ holding user data received and with the first 4 frame slots reserved by the psABI for the compiler to spill the incoming arguments from $a0-$a3 registers (which it sometimes does according to its needs) and the next 4 frame slots designated by the psABI for any stack function arguments that follow. This data is also available for other tasks to peek/poke at as reqired and where permitted. Signed-off-by: Maciej W. Rozycki Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/ptrace.h | 4 ++-- arch/mips/kernel/asm-offsets.c | 6 ++++++ arch/mips/kernel/scall32-o32.S | 8 ++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index 4a2b40ce39e09..85fa9962266a2 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -27,8 +27,8 @@ */ struct pt_regs { #ifdef CONFIG_32BIT - /* Pad bytes for argument save space on the stack. */ - unsigned long pad0[8]; + /* Saved syscall stack arguments; entries 0-3 unused. */ + unsigned long args[8]; #endif /* Saved main processor registers. */ diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index cb1045ebab062..b910ec54a3a17 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -27,6 +27,12 @@ void output_ptreg_defines(void); void output_ptreg_defines(void) { COMMENT("MIPS pt_regs offsets."); +#ifdef CONFIG_32BIT + OFFSET(PT_ARG4, pt_regs, args[4]); + OFFSET(PT_ARG5, pt_regs, args[5]); + OFFSET(PT_ARG6, pt_regs, args[6]); + OFFSET(PT_ARG7, pt_regs, args[7]); +#endif OFFSET(PT_R0, pt_regs, regs[0]); OFFSET(PT_R1, pt_regs, regs[1]); OFFSET(PT_R2, pt_regs, regs[2]); diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 2c604717e6308..4947a4f39e371 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -64,10 +64,10 @@ load_a6: user_lw(t7, 24(t0)) # argument #7 from usp load_a7: user_lw(t8, 28(t0)) # argument #8 from usp loads_done: - sw t5, 16(sp) # argument #5 to ksp - sw t6, 20(sp) # argument #6 to ksp - sw t7, 24(sp) # argument #7 to ksp - sw t8, 28(sp) # argument #8 to ksp + sw t5, PT_ARG4(sp) # argument #5 to ksp + sw t6, PT_ARG5(sp) # argument #6 to ksp + sw t7, PT_ARG6(sp) # argument #7 to ksp + sw t8, PT_ARG7(sp) # argument #8 to ksp .set pop .section __ex_table,"a" -- GitLab From 733a90561ad0a4a74035d2d627098da85d43b592 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 12 Feb 2025 01:02:09 +0200 Subject: [PATCH 0590/1585] MIPS: fix mips_get_syscall_arg() for o32 This makes ptrace/get_syscall_info selftest pass on mips o32 and mips64 o32 by fixing the following two test assertions: 1. get_syscall_info test assertion on mips o32: # get_syscall_info.c:218:get_syscall_info:Expected exp_args[5] (3134521044) == info.entry.args[4] (4911432) # get_syscall_info.c:219:get_syscall_info:wait #1: entry stop mismatch 2. get_syscall_info test assertion on mips64 o32: # get_syscall_info.c:209:get_syscall_info:Expected exp_args[2] (3134324433) == info.entry.args[1] (18446744072548908753) # get_syscall_info.c:210:get_syscall_info:wait #1: entry stop mismatch The first assertion happens due to mips_get_syscall_arg() trying to access another task's context but failing to do it properly because get_user() it calls just peeks at the current task's context. It usually does not crash because the default user stack always gets assigned the same VMA, but it is pure luck which mips_get_syscall_arg() wouldn't have if e.g. the stack was switched (via setcontext(3) or however) or a non-default process's thread peeked at, and in any case irrelevant data is obtained just as observed with the test case. mips_get_syscall_arg() ought to be using access_remote_vm() instead to retrieve the other task's stack contents, but given that the data has been already obtained and saved in `struct pt_regs' it would be an overkill. The first assertion is fixed for mips o32 by using struct pt_regs.args instead of get_user() to obtain syscall arguments. This approach works due to this piece in arch/mips/kernel/scall32-o32.S: /* * Ok, copy the args from the luser stack to the kernel stack. */ .set push .set noreorder .set nomacro load_a4: user_lw(t5, 16(t0)) # argument #5 from usp load_a5: user_lw(t6, 20(t0)) # argument #6 from usp load_a6: user_lw(t7, 24(t0)) # argument #7 from usp load_a7: user_lw(t8, 28(t0)) # argument #8 from usp loads_done: sw t5, PT_ARG4(sp) # argument #5 to ksp sw t6, PT_ARG5(sp) # argument #6 to ksp sw t7, PT_ARG6(sp) # argument #7 to ksp sw t8, PT_ARG7(sp) # argument #8 to ksp .set pop .section __ex_table,"a" PTR_WD load_a4, bad_stack_a4 PTR_WD load_a5, bad_stack_a5 PTR_WD load_a6, bad_stack_a6 PTR_WD load_a7, bad_stack_a7 .previous arch/mips/kernel/scall64-o32.S has analogous code for mips64 o32 that allows fixing the issue by obtaining syscall arguments from struct pt_regs.regs[4..11] instead of the erroneous use of get_user(). The second assertion is fixed by truncating 64-bit values to 32-bit syscall arguments. Fixes: c0ff3c53d4f9 ("MIPS: Enable HAVE_ARCH_TRACEHOOK.") Signed-off-by: Dmitry V. Levin Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/syscall.h | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index ebdf4d910af2f..056aa1b713e23 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -57,37 +57,21 @@ static inline void mips_syscall_update_nr(struct task_struct *task, static inline void mips_get_syscall_arg(unsigned long *arg, struct task_struct *task, struct pt_regs *regs, unsigned int n) { - unsigned long usp __maybe_unused = regs->regs[29]; - +#ifdef CONFIG_32BIT switch (n) { case 0: case 1: case 2: case 3: *arg = regs->regs[4 + n]; - - return; - -#ifdef CONFIG_32BIT - case 4: case 5: case 6: case 7: - get_user(*arg, (int *)usp + n); return; -#endif - -#ifdef CONFIG_64BIT case 4: case 5: case 6: case 7: -#ifdef CONFIG_MIPS32_O32 - if (test_tsk_thread_flag(task, TIF_32BIT_REGS)) - get_user(*arg, (int *)usp + n); - else -#endif - *arg = regs->regs[4 + n]; - + *arg = regs->args[n]; return; -#endif - - default: - BUG(); } - - unreachable(); +#else + *arg = regs->regs[4 + n]; + if ((IS_ENABLED(CONFIG_MIPS32_O32) && + test_tsk_thread_flag(task, TIF_32BIT_REGS))) + *arg = (unsigned int)*arg; +#endif } static inline long syscall_get_error(struct task_struct *task, -- GitLab From 446a8351f160d65a1c5df7097f31c74102ed2bb1 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 10 Feb 2025 17:37:32 +0100 Subject: [PATCH 0591/1585] arm64: rust: clean Rust 1.85.0 warning using softfloat target Starting with Rust 1.85.0 (to be released 2025-02-20), `rustc` warns [1] about disabling neon in the aarch64 hardfloat target: warning: target feature `neon` cannot be toggled with `-Ctarget-feature`: unsound on hard-float targets because it changes float ABI | = note: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! = note: for more information, see issue #116344 Thus, instead, use the softfloat target instead. While trying it out, I found that the kernel sanitizers were not enabled for that built-in target [2]. Upstream Rust agreed to backport the enablement for the current beta so that it is ready for the 1.85.0 release [3] -- thanks! However, that still means that before Rust 1.85.0, we cannot switch since sanitizers could be in use. Thus conditionally do so. Cc: stable@vger.kernel.org # Needed in 6.12.y and 6.13.y only (Rust is pinned in older LTSs). Cc: Catalin Marinas Cc: Will Deacon Cc: Matthew Maurer Cc: Alice Ryhl Cc: Ralf Jung Cc: Jubilee Young Link: https://github.com/rust-lang/rust/pull/133417 [1] Link: https://rust-lang.zulipchat.com/#narrow/channel/131828-t-compiler/topic/arm64.20neon.20.60-Ctarget-feature.60.20warning/near/495358442 [2] Link: https://github.com/rust-lang/rust/pull/135905 [3] Link: https://github.com/rust-lang/rust/issues/116344 Signed-off-by: Miguel Ojeda Reviewed-by: Trevor Gross Tested-by: Matthew Maurer Reviewed-by: Ralf Jung Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250210163732.281786-1-ojeda@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 358c68565bfd0..2b25d671365f2 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -48,7 +48,11 @@ KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) \ KBUILD_CFLAGS += $(call cc-disable-warning, psabi) KBUILD_AFLAGS += $(compat_vdso) +ifeq ($(call test-ge, $(CONFIG_RUSTC_VERSION), 108500),y) +KBUILD_RUSTFLAGS += --target=aarch64-unknown-none-softfloat +else KBUILD_RUSTFLAGS += --target=aarch64-unknown-none -Ctarget-feature="-neon" +endif KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) -- GitLab From 85fcb57c983f423180ba6ec5d0034242da05cc54 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 10 Feb 2025 08:43:39 +0100 Subject: [PATCH 0592/1585] xen/swiotlb: relax alignment requirements When mapping a buffer for DMA via .map_page or .map_sg DMA operations, there is no need to check the machine frames to be aligned according to the mapped areas size. All what is needed in these cases is that the buffer is contiguous at machine level. So carve out the alignment check from range_straddles_page_boundary() and move it to a helper called by xen_swiotlb_alloc_coherent() and xen_swiotlb_free_coherent() directly. Fixes: 9f40ec84a797 ("xen/swiotlb: add alignment check for dma buffers") Reported-by: Jan Vejvalka Tested-by: Jan Vejvalka Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index a337edcf8faf7..26c62e0d34e98 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -74,19 +74,21 @@ static inline phys_addr_t xen_dma_to_phys(struct device *dev, return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr)); } +static inline bool range_requires_alignment(phys_addr_t p, size_t size) +{ + phys_addr_t algn = 1ULL << (get_order(size) + PAGE_SHIFT); + phys_addr_t bus_addr = pfn_to_bfn(XEN_PFN_DOWN(p)) << XEN_PAGE_SHIFT; + + return IS_ALIGNED(p, algn) && !IS_ALIGNED(bus_addr, algn); +} + static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) { unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p); unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size); - phys_addr_t algn = 1ULL << (get_order(size) + PAGE_SHIFT); next_bfn = pfn_to_bfn(xen_pfn); - /* If buffer is physically aligned, ensure DMA alignment. */ - if (IS_ALIGNED(p, algn) && - !IS_ALIGNED((phys_addr_t)next_bfn << XEN_PAGE_SHIFT, algn)) - return 1; - for (i = 1; i < nr_pages; i++) if (pfn_to_bfn(++xen_pfn) != ++next_bfn) return 1; @@ -156,7 +158,8 @@ xen_swiotlb_alloc_coherent(struct device *dev, size_t size, *dma_handle = xen_phys_to_dma(dev, phys); if (*dma_handle + size - 1 > dma_mask || - range_straddles_page_boundary(phys, size)) { + range_straddles_page_boundary(phys, size) || + range_requires_alignment(phys, size)) { if (xen_create_contiguous_region(phys, order, fls64(dma_mask), dma_handle) != 0) goto out_free_pages; @@ -182,7 +185,8 @@ xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, size = ALIGN(size, XEN_PAGE_SIZE); if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) || - WARN_ON_ONCE(range_straddles_page_boundary(phys, size))) + WARN_ON_ONCE(range_straddles_page_boundary(phys, size) || + range_requires_alignment(phys, size))) return; if (TestClearPageXenRemapped(virt_to_page(vaddr))) -- GitLab From e93ec87286bd1fd30b7389e7a387cfb259f297e3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 11 Feb 2025 11:16:28 +0100 Subject: [PATCH 0593/1585] x86/xen: allow larger contiguous memory regions in PV guests Today a PV guest (including dom0) can create 2MB contiguous memory regions for DMA buffers at max. This has led to problems at least with the megaraid_sas driver, which wants to allocate a 2.3MB DMA buffer. The limiting factor is the frame array used to do the hypercall for making the memory contiguous, which has 512 entries and is just a static array in mmu_pv.c. In order to not waste memory for non-PV guests, put the initial frame array into .init.data section and dynamically allocate an array from the .init_after_bootmem hook of PV guests. In case a contiguous memory area larger than the initially supported 2MB is requested, allocate a larger buffer for the frame list. Note that such an allocation is tried only after memory management has been initialized properly, which is tested via a flag being set in the .init_after_bootmem hook. Fixes: 9f40ec84a797 ("xen/swiotlb: add alignment check for dma buffers") Signed-off-by: Juergen Gross Tested-by: Alan Robinson Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross --- arch/x86/xen/mmu_pv.c | 71 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2c70cd35e72c5..d078de2c952b3 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -111,6 +111,51 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; */ static DEFINE_SPINLOCK(xen_reservation_lock); +/* Protected by xen_reservation_lock. */ +#define MIN_CONTIG_ORDER 9 /* 2MB */ +static unsigned int discontig_frames_order = MIN_CONTIG_ORDER; +static unsigned long discontig_frames_early[1UL << MIN_CONTIG_ORDER] __initdata; +static unsigned long *discontig_frames __refdata = discontig_frames_early; +static bool discontig_frames_dyn; + +static int alloc_discontig_frames(unsigned int order) +{ + unsigned long *new_array, *old_array; + unsigned int old_order; + unsigned long flags; + + BUG_ON(order < MIN_CONTIG_ORDER); + BUILD_BUG_ON(sizeof(discontig_frames_early) != PAGE_SIZE); + + new_array = (unsigned long *)__get_free_pages(GFP_KERNEL, + order - MIN_CONTIG_ORDER); + if (!new_array) + return -ENOMEM; + + spin_lock_irqsave(&xen_reservation_lock, flags); + + old_order = discontig_frames_order; + + if (order > discontig_frames_order || !discontig_frames_dyn) { + if (!discontig_frames_dyn) + old_array = NULL; + else + old_array = discontig_frames; + + discontig_frames = new_array; + discontig_frames_order = order; + discontig_frames_dyn = true; + } else { + old_array = new_array; + } + + spin_unlock_irqrestore(&xen_reservation_lock, flags); + + free_pages((unsigned long)old_array, old_order - MIN_CONTIG_ORDER); + + return 0; +} + /* * Note about cr3 (pagetable base) values: * @@ -814,6 +859,9 @@ static void __init xen_after_bootmem(void) SetPagePinned(virt_to_page(level3_user_vsyscall)); #endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); + + if (alloc_discontig_frames(MIN_CONTIG_ORDER)) + BUG(); } static void xen_unpin_page(struct mm_struct *mm, struct page *page, @@ -2203,10 +2251,6 @@ void __init xen_init_mmu_ops(void) memset(dummy_mapping, 0xff, PAGE_SIZE); } -/* Protected by xen_reservation_lock. */ -#define MAX_CONTIG_ORDER 9 /* 2MB */ -static unsigned long discontig_frames[1< MAX_CONTIG_ORDER)) - return -ENOMEM; + if (unlikely(order > discontig_frames_order)) { + if (!discontig_frames_dyn) + return -ENOMEM; + + if (alloc_discontig_frames(order)) + return -ENOMEM; + } memset((void *) vstart, 0, PAGE_SIZE << order); spin_lock_irqsave(&xen_reservation_lock, flags); + in_frames = discontig_frames; + /* 1. Zap current PTEs, remembering MFNs. */ xen_zap_pfn_range(vstart, order, in_frames, NULL); @@ -2358,12 +2409,12 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { - unsigned long *out_frames = discontig_frames, in_frame; + unsigned long *out_frames, in_frame; unsigned long flags; int success; unsigned long vstart; - if (unlikely(order > MAX_CONTIG_ORDER)) + if (unlikely(order > discontig_frames_order)) return; vstart = (unsigned long)phys_to_virt(pstart); @@ -2371,6 +2422,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) spin_lock_irqsave(&xen_reservation_lock, flags); + out_frames = discontig_frames; + /* 1. Find start MFN of contiguous extent. */ in_frame = virt_to_mfn((void *)vstart); -- GitLab From 75ad02318af2e4ae669e26a79f001bd5e1f97472 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 12 Feb 2025 16:14:38 +0100 Subject: [PATCH 0594/1585] Xen/swiotlb: mark xen_swiotlb_fixup() __init It's sole user (pci_xen_swiotlb_init()) is __init, too. Signed-off-by: Jan Beulich Reviewed-by: Stefano Stabellini Message-ID: Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 26c62e0d34e98..1f65795cf5d7a 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -113,7 +113,7 @@ static struct io_tlb_pool *xen_swiotlb_find_pool(struct device *dev, } #ifdef CONFIG_X86 -int xen_swiotlb_fixup(void *buf, unsigned long nslabs) +int __init xen_swiotlb_fixup(void *buf, unsigned long nslabs) { int rc; unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT); -- GitLab From 4cf7d58620bfc2ebe934e3dfa97208f13f14ab8b Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Sun, 9 Feb 2025 09:46:50 +0530 Subject: [PATCH 0595/1585] genirq: Remove unused CONFIG_GENERIC_PENDING_IRQ_CHIPFLAGS CONFIG_GENERIC_PENDING_IRQ_CHIPFLAGS is not used anymore, hence remove it. Fixes: f94a18249b7f ("genirq: Remove IRQ_MOVE_PCNTXT and related code") Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250209041655.331470-7-apatel@ventanamicro.com --- kernel/irq/Kconfig | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5432418c0feaf..875f25ed6f710 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -31,10 +31,6 @@ config GENERIC_IRQ_EFFECTIVE_AFF_MASK config GENERIC_PENDING_IRQ bool -# Deduce delayed migration from top-level interrupt chip flags -config GENERIC_PENDING_IRQ_CHIPFLAGS - bool - # Support for generic irq migrating off cpu before the cpu is offline. config GENERIC_IRQ_MIGRATION bool -- GitLab From ab027c488fc4a1fff0a5b712d4bdb2d2d324e8f8 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 23 Jan 2025 14:34:41 +0800 Subject: [PATCH 0596/1585] firmware: arm_scmi: imx: Correct tx size of scmi_imx_misc_ctrl_set 'struct scmi_imx_misc_ctrl_set_in' has a zero length array in the end, The sizeof will not count 'value[]', and hence Tx size will be smaller than actual size for Tx,and SCMI firmware will flag this as protocol error. Fix this by enlarge the Tx size with 'num * sizeof(__le32)' to count in the size of data. Fixes: 61c9f03e22fc ("firmware: arm_scmi: Add initial support for i.MX MISC protocol") Reviewed-by: Jacky Bai Tested-by: Shengjiu Wang Acked-by: Jason Liu Signed-off-by: Peng Fan Message-Id: <20250123063441.392555-1-peng.fan@oss.nxp.com> (sudeep.holla: Commit rewording and replace hardcoded sizeof(__le32) value) Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c b/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c index 83b69fc4fba5b..a8915d3b4df51 100644 --- a/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c +++ b/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c @@ -254,8 +254,8 @@ static int scmi_imx_misc_ctrl_set(const struct scmi_protocol_handle *ph, if (num > max_num) return -EINVAL; - ret = ph->xops->xfer_get_init(ph, SCMI_IMX_MISC_CTRL_SET, sizeof(*in), - 0, &t); + ret = ph->xops->xfer_get_init(ph, SCMI_IMX_MISC_CTRL_SET, + sizeof(*in) + num * sizeof(__le32), 0, &t); if (ret) return ret; -- GitLab From 81f64e925c29fe6e99f04b131fac1935ac931e81 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 12 Feb 2025 13:35:16 -0600 Subject: [PATCH 0597/1585] PCI: Avoid FLR for Mediatek MT7922 WiFi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Mediatek MT7922 WiFi device advertises FLR support, but it apparently does not work, and all subsequent config reads return ~0: pci 0000:01:00.0: [14c3:0616] type 00 class 0x028000 PCIe Endpoint pciback 0000:01:00.0: not ready 65535ms after FLR; giving up After an FLR, pci_dev_wait() waits for the device to become ready. Prior to d591f6804e7e ("PCI: Wait for device readiness with Configuration RRS"), it polls PCI_COMMAND until it is something other that PCI_POSSIBLE_ERROR (~0). If it times out, pci_dev_wait() returns -ENOTTY and __pci_reset_function_locked() tries the next available reset method. Typically this is Secondary Bus Reset, which does work, so the MT7922 is eventually usable. After d591f6804e7e, if Configuration Request Retry Status Software Visibility (RRS SV) is enabled, pci_dev_wait() polls PCI_VENDOR_ID until it is something other than the special 0x0001 Vendor ID that indicates a completion with RRS status. When RRS SV is enabled, reads of PCI_VENDOR_ID should return either 0x0001, i.e., the config read was completed with RRS, or a valid Vendor ID. On the MT7922, it seems that all config reads after FLR return ~0 indefinitely. When pci_dev_wait() reads PCI_VENDOR_ID and gets 0xffff, it assumes that's a valid Vendor ID and the device is now ready, so it returns with success. After pci_dev_wait() returns success, we restore config space and continue. Since the MT7922 is not actually ready after the FLR, the restore fails and the device is unusable. We considered changing pci_dev_wait() to continue polling if a PCI_VENDOR_ID read returns either 0x0001 or 0xffff. This "works" as it did before d591f6804e7e, although we have to wait for the timeout and then fall back to SBR. But it doesn't work for SR-IOV VFs, which *always* return 0xffff as the Vendor ID. Mark Mediatek MT7922 WiFi devices to avoid the use of FLR completely. This will cause fallback to another reset method, such as SBR. Link: https://lore.kernel.org/r/20250212193516.88741-1-helgaas@kernel.org Fixes: d591f6804e7e ("PCI: Wait for device readiness with Configuration RRS") Link: https://github.com/QubesOS/qubes-issues/issues/9689#issuecomment-2582927149 Link: https://lore.kernel.org/r/Z4pHll_6GX7OUBzQ@mail-itl Signed-off-by: Bjorn Helgaas Tested-by: Marek Marczykowski-Górecki Cc: stable@vger.kernel.org --- drivers/pci/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b84ff7bade822..82b21e34c545e 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5522,7 +5522,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443, quirk_intel_qat_vf_cap); * AMD Matisse USB 3.0 Host Controller 0x149c * Intel 82579LM Gigabit Ethernet Controller 0x1502 * Intel 82579V Gigabit Ethernet Controller 0x1503 - * + * Mediatek MT7922 802.11ax PCI Express Wireless Network Adapter */ static void quirk_no_flr(struct pci_dev *dev) { @@ -5534,6 +5534,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x149c, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_MEDIATEK, 0x0616, quirk_no_flr); /* FLR may cause the SolidRun SNET DPU (rev 0x1) to hang */ static void quirk_no_flr_snet(struct pci_dev *dev) -- GitLab From 1d0013962d220b166d9f7c9fe2746f1542e459a3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Feb 2025 22:23:59 +0000 Subject: [PATCH 0598/1585] netfs: Fix a number of read-retry hangs Fix a number of hangs in the netfslib read-retry code, including: (1) netfs_reissue_read() doubles up the getting of references on subrequests, thereby leaking the subrequest and causing inode eviction to wait indefinitely. This can lead to the kernel reporting a hang in the filesystem's evict_inode(). Fix this by removing the get from netfs_reissue_read() and adding one to netfs_retry_read_subrequests() to deal with the one place that didn't double up. (2) The loop in netfs_retry_read_subrequests() that retries a sequence of failed subrequests doesn't record whether or not it retried the one that the "subreq" pointer points to when it leaves the loop. It may not if renegotiation/repreparation of the subrequests means that fewer subrequests are needed to span the cumulative range of the sequence. Because it doesn't record this, the piece of code that discards now-superfluous subrequests doesn't know whether it should discard the one "subreq" points to - and so it doesn't. Fix this by noting whether the last subreq it examines is superfluous and if it is, then getting rid of it and all subsequent subrequests. If that one one wasn't superfluous, then we would have tried to go round the previous loop again and so there can be no further unretried subrequests in the sequence. (3) netfs_retry_read_subrequests() gets yet an extra ref on any additional subrequests it has to get because it ran out of ones it could reuse to to renegotiation/repreparation shrinking the subrequests. Fix this by removing that extra ref. (4) In netfs_retry_reads(), it was using wait_on_bit() to wait for NETFS_SREQ_IN_PROGRESS to be cleared on all subrequests in the sequence - but netfs_read_subreq_terminated() is now using a wait queue on the request instead and so this wait will never finish. Fix this by waiting on the wait queue instead. To make this work, a new flag, NETFS_RREQ_RETRYING, is now set around the wait loop to tell the wake-up code to wake up the wait queue rather than requeuing the request's work item. Note that this flag replaces the NETFS_RREQ_NEED_RETRY flag which is no longer used. (5) Whilst not strictly anything to do with the hang, netfs_retry_read_subrequests() was also doubly incrementing the subreq_counter and re-setting the debug index, leaving a gap in the trace. This is also fixed. One of these hangs was observed with 9p and with cifs. Others were forced by manual code injection into fs/afs/file.c. Firstly, afs_prepare_read() was created to provide an changing pattern of maximum subrequest sizes: static int afs_prepare_read(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; if (!S_ISREG(subreq->rreq->inode->i_mode)) return 0; if (subreq->retry_count < 20) rreq->io_streams[0].sreq_max_len = umax(200, 2222 - subreq->retry_count * 40); else rreq->io_streams[0].sreq_max_len = 3333; return 0; } and pointed to by afs_req_ops. Then the following: struct netfs_io_subrequest *subreq = op->fetch.subreq; if (subreq->error == 0 && S_ISREG(subreq->rreq->inode->i_mode) && subreq->retry_count < 20) { subreq->transferred = subreq->already_done; __clear_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); afs_fetch_data_notify(op); return; } was inserted into afs_fetch_data_success() at the beginning and struct netfs_io_subrequest given an extra field, "already_done" that was set to the value in "subreq->transferred" by netfs_reissue_read(). When reading a 4K file, the subrequests would get gradually smaller, a new subrequest would be allocated around the 3rd retry and then eventually be rendered superfluous when the 20th retry was hit and the limit on the first subrequest was eased. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Signed-off-by: David Howells Link: https://lore.kernel.org/r/20250212222402.3618494-2-dhowells@redhat.com Tested-by: Marc Dionne Tested-by: Steve French cc: Ihor Solodrai cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Paulo Alcantara cc: Jeff Layton cc: v9fs@lists.linux.dev cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/read_collect.c | 6 ++++-- fs/netfs/read_retry.c | 40 +++++++++++++++++++++++++++--------- include/linux/netfs.h | 2 +- include/trace/events/netfs.h | 4 +++- 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index f65affa5a9e4a..636cc5a98ef57 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -470,7 +470,8 @@ void netfs_read_collection_worker(struct work_struct *work) */ void netfs_wake_read_collector(struct netfs_io_request *rreq) { - if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { + if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && + !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { if (!work_pending(&rreq->work)) { netfs_get_request(rreq, netfs_rreq_trace_get_work); if (!queue_work(system_unbound_wq, &rreq->work)) @@ -586,7 +587,8 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ /* If we are at the head of the queue, wake up the collector. */ - if (list_is_first(&subreq->rreq_link, &stream->subrequests)) + if (list_is_first(&subreq->rreq_link, &stream->subrequests) || + test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) netfs_wake_read_collector(rreq); netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated); diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 2290af0d51acc..8316c4533a51d 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -14,7 +14,6 @@ static void netfs_reissue_read(struct netfs_io_request *rreq, { __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); subreq->rreq->netfs_ops->issue_read(subreq); } @@ -48,6 +47,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); subreq->retry_count++; netfs_reset_iter(subreq); + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); netfs_reissue_read(rreq, subreq); } } @@ -75,7 +75,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) struct iov_iter source; unsigned long long start, len; size_t part; - bool boundary = false; + bool boundary = false, subreq_superfluous = false; /* Go through the subreqs and find the next span of contiguous * buffer that we then rejig (cifs, for example, needs the @@ -116,8 +116,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) /* Work through the sublist. */ subreq = from; list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { - if (!len) + if (!len) { + subreq_superfluous = true; break; + } subreq->source = NETFS_DOWNLOAD_FROM_SERVER; subreq->start = start - subreq->transferred; subreq->len = len + subreq->transferred; @@ -154,19 +156,21 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); netfs_reissue_read(rreq, subreq); - if (subreq == to) + if (subreq == to) { + subreq_superfluous = false; break; + } } /* If we managed to use fewer subreqs, we can discard the * excess; if we used the same number, then we're done. */ if (!len) { - if (subreq == to) + if (!subreq_superfluous) continue; list_for_each_entry_safe_from(subreq, tmp, &stream->subrequests, rreq_link) { - trace_netfs_sreq(subreq, netfs_sreq_trace_discard); + trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous); list_del(&subreq->rreq_link); netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); if (subreq == to) @@ -187,14 +191,12 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) subreq->source = NETFS_DOWNLOAD_FROM_SERVER; subreq->start = start; subreq->len = len; - subreq->debug_index = atomic_inc_return(&rreq->subreq_counter); subreq->stream_nr = stream->stream_nr; subreq->retry_count = 1; trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), netfs_sreq_trace_new); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); list_add(&subreq->rreq_link, &to->rreq_link); to = list_next_entry(to, rreq_link); @@ -256,14 +258,32 @@ void netfs_retry_reads(struct netfs_io_request *rreq) { struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream = &rreq->io_streams[0]; + DEFINE_WAIT(myself); + + set_bit(NETFS_RREQ_RETRYING, &rreq->flags); /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, - TASK_UNINTERRUPTIBLE); + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + continue; + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + for (;;) { + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + break; + + trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + + finish_wait(&rreq->waitq, &myself); } + clear_bit(NETFS_RREQ_RETRYING, &rreq->flags); trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); netfs_retry_read_subrequests(rreq); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 071d05d81d388..c86a11cfc4a36 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -278,7 +278,7 @@ struct netfs_io_request { #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */ +#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 6e699cadcb294..f880835f7695e 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -99,7 +99,7 @@ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_need_clear, "N-CLR") \ EM(netfs_sreq_trace_partial_read, "PARTR") \ - EM(netfs_sreq_trace_need_retry, "NRTRY") \ + EM(netfs_sreq_trace_need_retry, "ND-RT") \ EM(netfs_sreq_trace_prepare, "PREP ") \ EM(netfs_sreq_trace_prep_failed, "PRPFL") \ EM(netfs_sreq_trace_progress, "PRGRS") \ @@ -108,7 +108,9 @@ EM(netfs_sreq_trace_short, "SHORT") \ EM(netfs_sreq_trace_split, "SPLIT") \ EM(netfs_sreq_trace_submit, "SUBMT") \ + EM(netfs_sreq_trace_superfluous, "SPRFL") \ EM(netfs_sreq_trace_terminated, "TERM ") \ + EM(netfs_sreq_trace_wait_for, "_WAIT") \ EM(netfs_sreq_trace_write, "WRITE") \ EM(netfs_sreq_trace_write_skip, "SKIP ") \ E_(netfs_sreq_trace_write_term, "WTERM") -- GitLab From d01c495f432ce34df8bfd092e71720a2cf169a90 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Feb 2025 22:24:00 +0000 Subject: [PATCH 0599/1585] netfs: Add retry stat counters Add stat counters to count the number of request and subrequest retries and display them in /proc/fs/netfs/stats. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20250212222402.3618494-3-dhowells@redhat.com cc: Jeff Layton cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/internal.h | 4 ++++ fs/netfs/read_retry.c | 3 +++ fs/netfs/stats.c | 9 +++++++++ fs/netfs/write_issue.c | 1 + fs/netfs/write_retry.c | 2 ++ 5 files changed, 19 insertions(+) diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index eb76f98c894bb..1c4f953c3d683 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -135,6 +135,8 @@ extern atomic_t netfs_n_rh_write_begin; extern atomic_t netfs_n_rh_write_done; extern atomic_t netfs_n_rh_write_failed; extern atomic_t netfs_n_rh_write_zskip; +extern atomic_t netfs_n_rh_retry_read_req; +extern atomic_t netfs_n_rh_retry_read_subreq; extern atomic_t netfs_n_wh_buffered_write; extern atomic_t netfs_n_wh_writethrough; extern atomic_t netfs_n_wh_dio_write; @@ -147,6 +149,8 @@ extern atomic_t netfs_n_wh_upload_failed; extern atomic_t netfs_n_wh_write; extern atomic_t netfs_n_wh_write_done; extern atomic_t netfs_n_wh_write_failed; +extern atomic_t netfs_n_wh_retry_write_req; +extern atomic_t netfs_n_wh_retry_write_subreq; extern atomic_t netfs_n_wb_lock_skip; extern atomic_t netfs_n_wb_lock_wait; extern atomic_t netfs_n_folioq; diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 8316c4533a51d..0f294b26e08c9 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -14,6 +14,7 @@ static void netfs_reissue_read(struct netfs_io_request *rreq, { __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + netfs_stat(&netfs_n_rh_retry_read_subreq); subreq->rreq->netfs_ops->issue_read(subreq); } @@ -260,6 +261,8 @@ void netfs_retry_reads(struct netfs_io_request *rreq) struct netfs_io_stream *stream = &rreq->io_streams[0]; DEFINE_WAIT(myself); + netfs_stat(&netfs_n_rh_retry_read_req); + set_bit(NETFS_RREQ_RETRYING, &rreq->flags); /* Wait for all outstanding I/O to quiesce before performing retries as diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c index f1af344266cc6..ab6b916addc44 100644 --- a/fs/netfs/stats.c +++ b/fs/netfs/stats.c @@ -29,6 +29,8 @@ atomic_t netfs_n_rh_write_begin; atomic_t netfs_n_rh_write_done; atomic_t netfs_n_rh_write_failed; atomic_t netfs_n_rh_write_zskip; +atomic_t netfs_n_rh_retry_read_req; +atomic_t netfs_n_rh_retry_read_subreq; atomic_t netfs_n_wh_buffered_write; atomic_t netfs_n_wh_writethrough; atomic_t netfs_n_wh_dio_write; @@ -41,6 +43,8 @@ atomic_t netfs_n_wh_upload_failed; atomic_t netfs_n_wh_write; atomic_t netfs_n_wh_write_done; atomic_t netfs_n_wh_write_failed; +atomic_t netfs_n_wh_retry_write_req; +atomic_t netfs_n_wh_retry_write_subreq; atomic_t netfs_n_wb_lock_skip; atomic_t netfs_n_wb_lock_wait; atomic_t netfs_n_folioq; @@ -81,6 +85,11 @@ int netfs_stats_show(struct seq_file *m, void *v) atomic_read(&netfs_n_wh_write), atomic_read(&netfs_n_wh_write_done), atomic_read(&netfs_n_wh_write_failed)); + seq_printf(m, "Retries: rq=%u rs=%u wq=%u ws=%u\n", + atomic_read(&netfs_n_rh_retry_read_req), + atomic_read(&netfs_n_rh_retry_read_subreq), + atomic_read(&netfs_n_wh_retry_write_req), + atomic_read(&netfs_n_wh_retry_write_subreq)); seq_printf(m, "Objs : rr=%u sr=%u foq=%u wsc=%u\n", atomic_read(&netfs_n_rh_rreq), atomic_read(&netfs_n_rh_sreq), diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 69727411683ef..77279fc5b5a7c 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -253,6 +253,7 @@ void netfs_reissue_write(struct netfs_io_stream *stream, subreq->retry_count++; __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + netfs_stat(&netfs_n_wh_retry_write_subreq); netfs_do_issue_write(stream, subreq); } diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index c841a851dd73b..545d33079a77d 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -203,6 +203,8 @@ void netfs_retry_writes(struct netfs_io_request *wreq) struct netfs_io_stream *stream; int s; + netfs_stat(&netfs_n_wh_retry_write_req); + /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ -- GitLab From 5de0219a9bb9dacc4ce6e8f2745540dcce786983 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Feb 2025 22:24:01 +0000 Subject: [PATCH 0600/1585] netfs: Fix setting NETFS_RREQ_ALL_QUEUED to be after all subreqs queued Due to the code that queues a subreq on the active subrequest list getting moved to netfs_issue_read(), the NETFS_RREQ_ALL_QUEUED flag may now get set before the list-add actually happens. This is not a problem if the collection worker happens after the list-add, but it's a race - and, for 9P, where the read from the server is synchronous and done in the submitting thread, this is a lot more likely. The result is that, if the timing is wrong, a ref gets leaked because the collector thinks that all the subreqs have completed (because it can't see the last one yet) and clears NETFS_RREQ_IN_PROGRESS - at which point, the collection worker no longer goes into the collector. This can be provoked with AFS by injecting an msleep() right before the final subreq is queued. Fix this by splitting the queuing part out of netfs_issue_read() into a new function, netfs_queue_read(), and calling it separately. The setting of NETFS_RREQ_ALL_QUEUED is then done by netfs_queue_read() whilst it is holding the spinlock (that's probably unnecessary, but shouldn't hurt). It might be better to set a flag on the final subreq, but this could be a problem if an error occurs and we can't queue it. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Reported-by: Ihor Solodrai Closes: https://lore.kernel.org/r/a7x33d4dnMdGTtRivptq6S1i8btK70SNBP2XyX_xwDAhLvgQoPox6FVBOkifq4eBinfFfbZlIkMZBe3QarlWTxoEtHZwJCZbNKtaqrR7PvI=@pm.me/ Signed-off-by: David Howells Link: https://lore.kernel.org/r/20250212222402.3618494-4-dhowells@redhat.com Tested-by: Ihor Solodrai cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Marc Dionne cc: Steve French cc: Paulo Alcantara cc: Jeff Layton cc: v9fs@lists.linux.dev cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index f761d44b34362..0d1b6d35ff3b8 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -155,8 +155,9 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, netfs_cache_read_terminated, subreq); } -static void netfs_issue_read(struct netfs_io_request *rreq, - struct netfs_io_subrequest *subreq) +static void netfs_queue_read(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq, + bool last_subreq) { struct netfs_io_stream *stream = &rreq->io_streams[0]; @@ -177,8 +178,17 @@ static void netfs_issue_read(struct netfs_io_request *rreq, } } + if (last_subreq) { + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); + } + spin_unlock(&rreq->lock); +} +static void netfs_issue_read(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq) +{ switch (subreq->source) { case NETFS_DOWNLOAD_FROM_SERVER: rreq->netfs_ops->issue_read(subreq); @@ -293,11 +303,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) } size -= slice; start += slice; - if (size <= 0) { - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - } + netfs_queue_read(rreq, subreq, size <= 0); netfs_issue_read(rreq, subreq); cond_resched(); } while (size > 0); -- GitLab From 1f47ed294a2bd577d5ae43e6e28e1c9a3be4a833 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Feb 2025 08:18:46 -0700 Subject: [PATCH 0601/1585] block: cleanup and fix batch completion adding conditions The conditions for whether or not a request is allowed adding to a completion batch are a bit hard to read, and they also have a few issues. One is that ioerror may indeed be a random value on passthrough, and it's being checked unconditionally of whether or not the given request is a passthrough request or not. Rewrite the conditions to be separate for easier reading, and only check ioerror for non-passthrough requests. This fixes an issue with bio unmapping on passthrough, where it fails getting added to a batch. This both leads to suboptimal performance, and may trigger a potential schedule-under-atomic condition for polled passthrough IO. Fixes: f794f3351f26 ("block: add support for blk_mq_end_request_batch()") Link: https://lore.kernel.org/r/20575f0a-656e-4bb3-9d82-dec6c7e3a35c@kernel.dk Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9ebb53f031cdb..fa2a76cc2f73d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -861,12 +861,22 @@ static inline bool blk_mq_add_to_batch(struct request *req, void (*complete)(struct io_comp_batch *)) { /* - * blk_mq_end_request_batch() can't end request allocated from - * sched tags + * Check various conditions that exclude batch processing: + * 1) No batch container + * 2) Has scheduler data attached + * 3) Not a passthrough request and end_io set + * 4) Not a passthrough request and an ioerror */ - if (!iob || (req->rq_flags & RQF_SCHED_TAGS) || ioerror || - (req->end_io && !blk_rq_is_passthrough(req))) + if (!iob) return false; + if (req->rq_flags & RQF_SCHED_TAGS) + return false; + if (!blk_rq_is_passthrough(req)) { + if (req->end_io) + return false; + if (ioerror < 0) + return false; + } if (!iob->complete) iob->complete = complete; -- GitLab From 6fe9116dd6bebee570406ec3f00a50388a62ccb3 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 4 Feb 2025 13:52:36 +0200 Subject: [PATCH 0602/1585] MAINTAINERS: Use my kernel.org address for I2C ACPI work Switch to use my kernel.org address for I2C ACPI work. Signed-off-by: Mika Westerberg Signed-off-by: Wolfram Sang --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 25c86f47353de..1d200adbdcea4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10822,7 +10822,7 @@ S: Odd Fixes F: drivers/tty/hvc/ I2C ACPI SUPPORT -M: Mika Westerberg +M: Mika Westerberg L: linux-i2c@vger.kernel.org L: linux-acpi@vger.kernel.org S: Maintained -- GitLab From 35fa2d88ca9481e5caf533d58b99ca259c63b2fe Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Feb 2025 13:30:25 +0100 Subject: [PATCH 0603/1585] driver core: add a faux bus for use when a simple device/bus is needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many drivers abuse the platform driver/bus system as it provides a simple way to create and bind a device to a driver-specific set of probe/release functions. Instead of doing that, and wasting all of the memory associated with a platform device, here is a "faux" bus that can be used instead. Reviewed-by: Jonathan Cameron Reviewed-by: Danilo Krummrich Reviewed-by: Lyude Paul Reviewed-by: Thomas Weißschuh Reviewed-by: Zijun Hu Link: https://lore.kernel.org/r/2025021026-atlantic-gibberish-3f0c@gregkh Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/infrastructure.rst | 6 + drivers/base/Makefile | 2 +- drivers/base/base.h | 1 + drivers/base/faux.c | 232 ++++++++++++++++++++ drivers/base/init.c | 1 + include/linux/device/faux.h | 69 ++++++ 6 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 drivers/base/faux.c create mode 100644 include/linux/device/faux.h diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst index 3d52dfdfa9fdf..35e36fee4238a 100644 --- a/Documentation/driver-api/infrastructure.rst +++ b/Documentation/driver-api/infrastructure.rst @@ -41,6 +41,12 @@ Device Drivers Base .. kernel-doc:: drivers/base/class.c :export: +.. kernel-doc:: include/linux/device/faux.h + :internal: + +.. kernel-doc:: drivers/base/faux.c + :export: + .. kernel-doc:: drivers/base/node.c :internal: diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 7fb21768ca36d..8074a10183dcb 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -6,7 +6,7 @@ obj-y := component.o core.o bus.o dd.o syscore.o \ cpu.o firmware.o init.o map.o devres.o \ attribute_container.o transport_class.o \ topology.o container.o property.o cacheinfo.o \ - swnode.o + swnode.o faux.o obj-$(CONFIG_AUXILIARY_BUS) += auxiliary.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o obj-y += power/ diff --git a/drivers/base/base.h b/drivers/base/base.h index 8cf04a557bdb0..0042e4774b0ce 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -137,6 +137,7 @@ int hypervisor_init(void); static inline int hypervisor_init(void) { return 0; } #endif int platform_bus_init(void); +int faux_bus_init(void); void cpu_dev_init(void); void container_dev_init(void); #ifdef CONFIG_AUXILIARY_BUS diff --git a/drivers/base/faux.c b/drivers/base/faux.c new file mode 100644 index 0000000000000..531e9d789ee04 --- /dev/null +++ b/drivers/base/faux.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2025 Greg Kroah-Hartman + * Copyright (c) 2025 The Linux Foundation + * + * A "simple" faux bus that allows devices to be created and added + * automatically to it. This is to be used whenever you need to create a + * device that is not associated with any "real" system resources, and do + * not want to have to deal with a bus/driver binding logic. It is + * intended to be very simple, with only a create and a destroy function + * available. + */ +#include +#include +#include +#include +#include +#include +#include "base.h" + +/* + * Internal wrapper structure so we can hold a pointer to the + * faux_device_ops for this device. + */ +struct faux_object { + struct faux_device faux_dev; + const struct faux_device_ops *faux_ops; +}; +#define to_faux_object(dev) container_of_const(dev, struct faux_object, faux_dev.dev) + +static struct device faux_bus_root = { + .init_name = "faux", +}; + +static int faux_match(struct device *dev, const struct device_driver *drv) +{ + /* Match always succeeds, we only have one driver */ + return 1; +} + +static int faux_probe(struct device *dev) +{ + struct faux_object *faux_obj = to_faux_object(dev); + struct faux_device *faux_dev = &faux_obj->faux_dev; + const struct faux_device_ops *faux_ops = faux_obj->faux_ops; + int ret = 0; + + if (faux_ops && faux_ops->probe) + ret = faux_ops->probe(faux_dev); + + return ret; +} + +static void faux_remove(struct device *dev) +{ + struct faux_object *faux_obj = to_faux_object(dev); + struct faux_device *faux_dev = &faux_obj->faux_dev; + const struct faux_device_ops *faux_ops = faux_obj->faux_ops; + + if (faux_ops && faux_ops->remove) + faux_ops->remove(faux_dev); +} + +static const struct bus_type faux_bus_type = { + .name = "faux", + .match = faux_match, + .probe = faux_probe, + .remove = faux_remove, +}; + +static struct device_driver faux_driver = { + .name = "faux_driver", + .bus = &faux_bus_type, + .probe_type = PROBE_FORCE_SYNCHRONOUS, +}; + +static void faux_device_release(struct device *dev) +{ + struct faux_object *faux_obj = to_faux_object(dev); + + kfree(faux_obj); +} + +/** + * faux_device_create_with_groups - Create and register with the driver + * core a faux device and populate the device with an initial + * set of sysfs attributes. + * @name: The name of the device we are adding, must be unique for + * all faux devices. + * @parent: Pointer to a potential parent struct device. If set to + * NULL, the device will be created in the "root" of the faux + * device tree in sysfs. + * @faux_ops: struct faux_device_ops that the new device will call back + * into, can be NULL. + * @groups: The set of sysfs attributes that will be created for this + * device when it is registered with the driver core. + * + * Create a new faux device and register it in the driver core properly. + * If present, callbacks in @faux_ops will be called with the device that + * for the caller to do something with at the proper time given the + * device's lifecycle. + * + * Note, when this function is called, the functions specified in struct + * faux_ops can be called before the function returns, so be prepared for + * everything to be properly initialized before that point in time. + * + * Return: + * * NULL if an error happened with creating the device + * * pointer to a valid struct faux_device that is registered with sysfs + */ +struct faux_device *faux_device_create_with_groups(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops, + const struct attribute_group **groups) +{ + struct faux_object *faux_obj; + struct faux_device *faux_dev; + struct device *dev; + int ret; + + faux_obj = kzalloc(sizeof(*faux_obj), GFP_KERNEL); + if (!faux_obj) + return NULL; + + /* Save off the callbacks so we can use them in the future */ + faux_obj->faux_ops = faux_ops; + + /* Initialize the device portion and register it with the driver core */ + faux_dev = &faux_obj->faux_dev; + dev = &faux_dev->dev; + + device_initialize(dev); + dev->release = faux_device_release; + if (parent) + dev->parent = parent; + else + dev->parent = &faux_bus_root; + dev->bus = &faux_bus_type; + dev->groups = groups; + dev_set_name(dev, "%s", name); + + ret = device_add(dev); + if (ret) { + pr_err("%s: device_add for faux device '%s' failed with %d\n", + __func__, name, ret); + put_device(dev); + return NULL; + } + + return faux_dev; +} +EXPORT_SYMBOL_GPL(faux_device_create_with_groups); + +/** + * faux_device_create - create and register with the driver core a faux device + * @name: The name of the device we are adding, must be unique for all + * faux devices. + * @parent: Pointer to a potential parent struct device. If set to + * NULL, the device will be created in the "root" of the faux + * device tree in sysfs. + * @faux_ops: struct faux_device_ops that the new device will call back + * into, can be NULL. + * + * Create a new faux device and register it in the driver core properly. + * If present, callbacks in @faux_ops will be called with the device that + * for the caller to do something with at the proper time given the + * device's lifecycle. + * + * Note, when this function is called, the functions specified in struct + * faux_ops can be called before the function returns, so be prepared for + * everything to be properly initialized before that point in time. + * + * Return: + * * NULL if an error happened with creating the device + * * pointer to a valid struct faux_device that is registered with sysfs + */ +struct faux_device *faux_device_create(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops) +{ + return faux_device_create_with_groups(name, parent, faux_ops, NULL); +} +EXPORT_SYMBOL_GPL(faux_device_create); + +/** + * faux_device_destroy - destroy a faux device + * @faux_dev: faux device to destroy + * + * Unregisters and cleans up a device that was created with a call to + * faux_device_create() + */ +void faux_device_destroy(struct faux_device *faux_dev) +{ + struct device *dev = &faux_dev->dev; + + if (!faux_dev) + return; + + device_del(dev); + + /* The final put_device() will clean up the memory we allocated for this device. */ + put_device(dev); +} +EXPORT_SYMBOL_GPL(faux_device_destroy); + +int __init faux_bus_init(void) +{ + int ret; + + ret = device_register(&faux_bus_root); + if (ret) { + put_device(&faux_bus_root); + return ret; + } + + ret = bus_register(&faux_bus_type); + if (ret) + goto error_bus; + + ret = driver_register(&faux_driver); + if (ret) + goto error_driver; + + return ret; + +error_driver: + bus_unregister(&faux_bus_type); + +error_bus: + device_unregister(&faux_bus_root); + return ret; +} diff --git a/drivers/base/init.c b/drivers/base/init.c index c4954835128cf..9d2b06d65dfc6 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -32,6 +32,7 @@ void __init driver_init(void) /* These are also core pieces, but must come after the * core core pieces. */ + faux_bus_init(); of_core_init(); platform_bus_init(); auxiliary_bus_init(); diff --git a/include/linux/device/faux.h b/include/linux/device/faux.h new file mode 100644 index 0000000000000..9f43c0e46aa45 --- /dev/null +++ b/include/linux/device/faux.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025 Greg Kroah-Hartman + * Copyright (c) 2025 The Linux Foundation + * + * A "simple" faux bus that allows devices to be created and added + * automatically to it. This is to be used whenever you need to create a + * device that is not associated with any "real" system resources, and do + * not want to have to deal with a bus/driver binding logic. It is + * intended to be very simple, with only a create and a destroy function + * available. + */ +#ifndef _FAUX_DEVICE_H_ +#define _FAUX_DEVICE_H_ + +#include +#include + +/** + * struct faux_device - a "faux" device + * @dev: internal struct device of the object + * + * A simple faux device that can be created/destroyed. To be used when a + * driver only needs to have a device to "hang" something off. This can be + * used for downloading firmware or other basic tasks. Use this instead of + * a struct platform_device if the device has no resources assigned to + * it at all. + */ +struct faux_device { + struct device dev; +}; +#define to_faux_device(x) container_of_const((x), struct faux_device, dev) + +/** + * struct faux_device_ops - a set of callbacks for a struct faux_device + * @probe: called when a faux device is probed by the driver core + * before the device is fully bound to the internal faux bus + * code. If probe succeeds, return 0, otherwise return a + * negative error number to stop the probe sequence from + * succeeding. + * @remove: called when a faux device is removed from the system + * + * Both @probe and @remove are optional, if not needed, set to NULL. + */ +struct faux_device_ops { + int (*probe)(struct faux_device *faux_dev); + void (*remove)(struct faux_device *faux_dev); +}; + +struct faux_device *faux_device_create(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops); +struct faux_device *faux_device_create_with_groups(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops, + const struct attribute_group **groups); +void faux_device_destroy(struct faux_device *faux_dev); + +static inline void *faux_device_get_drvdata(const struct faux_device *faux_dev) +{ + return dev_get_drvdata(&faux_dev->dev); +} + +static inline void faux_device_set_drvdata(struct faux_device *faux_dev, void *data) +{ + dev_set_drvdata(&faux_dev->dev, data); +} + +#endif /* _FAUX_DEVICE_H_ */ -- GitLab From 78418f300d3999f1cf8a9ac71065bf2eca61f4dd Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 10 Feb 2025 13:30:26 +0100 Subject: [PATCH 0604/1585] rust/kernel: Add faux device bindings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces a module for working with faux devices in rust, along with adding sample code to show how the API is used. Unlike other types of devices, we don't provide any hooks for device probe/removal - since these are optional for the faux API and are unnecessary in rust. Signed-off-by: Lyude Paul Cc: Maíra Canal Cc: Danilo Krummrich Cc: Miguel Ojeda Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/2025021026-exert-accent-b4c6@gregkh Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 + rust/bindings/bindings_helper.h | 1 + rust/kernel/faux.rs | 67 ++++++++++++++++++++++++++++++++ rust/kernel/lib.rs | 1 + samples/rust/Kconfig | 10 +++++ samples/rust/Makefile | 1 + samples/rust/rust_driver_faux.rs | 29 ++++++++++++++ 7 files changed, 111 insertions(+) create mode 100644 rust/kernel/faux.rs create mode 100644 samples/rust/rust_driver_faux.rs diff --git a/MAINTAINERS b/MAINTAINERS index 25c86f47353de..19ea159b23091 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7116,8 +7116,10 @@ F: rust/kernel/device.rs F: rust/kernel/device_id.rs F: rust/kernel/devres.rs F: rust/kernel/driver.rs +F: rust/kernel/faux.rs F: rust/kernel/platform.rs F: samples/rust/rust_driver_platform.rs +F: samples/rust/rust_driver_faux.rs DRIVERS FOR OMAP ADAPTIVE VOLTAGE SCALING (AVS) M: Nishanth Menon diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 55354e4dec14e..f46cf3bb70695 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs new file mode 100644 index 0000000000000..5acc0c02d451f --- /dev/null +++ b/rust/kernel/faux.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only + +//! Abstractions for the faux bus. +//! +//! This module provides bindings for working with faux devices in kernel modules. +//! +//! C header: [`include/linux/device/faux.h`] + +use crate::{bindings, device, error::code::*, prelude::*}; +use core::ptr::{addr_of_mut, null, null_mut, NonNull}; + +/// The registration of a faux device. +/// +/// This type represents the registration of a [`struct faux_device`]. When an instance of this type +/// is dropped, its respective faux device will be unregistered from the system. +/// +/// # Invariants +/// +/// `self.0` always holds a valid pointer to an initialized and registered [`struct faux_device`]. +/// +/// [`struct faux_device`]: srctree/include/linux/device/faux.h +#[repr(transparent)] +pub struct Registration(NonNull); + +impl Registration { + /// Create and register a new faux device with the given name. + pub fn new(name: &CStr) -> Result { + // SAFETY: + // - `name` is copied by this function into its own storage + // - `faux_ops` is safe to leave NULL according to the C API + let dev = unsafe { bindings::faux_device_create(name.as_char_ptr(), null_mut(), null()) }; + + // The above function will return either a valid device, or NULL on failure + // INVARIANT: The device will remain registered until faux_device_destroy() is called, which + // happens in our Drop implementation. + Ok(Self(NonNull::new(dev).ok_or(ENODEV)?)) + } + + fn as_raw(&self) -> *mut bindings::faux_device { + self.0.as_ptr() + } +} + +impl AsRef for Registration { + fn as_ref(&self) -> &device::Device { + // SAFETY: The underlying `device` in `faux_device` is guaranteed by the C API to be + // a valid initialized `device`. + unsafe { device::Device::as_ref(addr_of_mut!((*self.as_raw()).dev)) } + } +} + +impl Drop for Registration { + fn drop(&mut self) { + // SAFETY: `self.0` is a valid registered faux_device via our type invariants. + unsafe { bindings::faux_device_destroy(self.as_raw()) } + } +} + +// SAFETY: The faux device API is thread-safe as guaranteed by the device core, as long as +// faux_device_destroy() is guaranteed to only be called once - which is guaranteed by our type not +// having Copy/Clone. +unsafe impl Send for Registration {} + +// SAFETY: The faux device API is thread-safe as guaranteed by the device core, as long as +// faux_device_destroy() is guaranteed to only be called once - which is guaranteed by our type not +// having Copy/Clone. +unsafe impl Sync for Registration {} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 496ed32b0911a..398242f92a961 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -46,6 +46,7 @@ pub mod device_id; pub mod devres; pub mod driver; pub mod error; +pub mod faux; #[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)] pub mod firmware; pub mod fs; diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig index 918dbead2c0b4..3b6eae84b2977 100644 --- a/samples/rust/Kconfig +++ b/samples/rust/Kconfig @@ -61,6 +61,16 @@ config SAMPLE_RUST_DRIVER_PLATFORM If unsure, say N. +config SAMPLE_RUST_DRIVER_FAUX + tristate "Faux Driver" + help + This option builds the Rust Faux driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_faux. + + If unsure, say N. + config SAMPLE_RUST_HOSTPROGS bool "Host programs" help diff --git a/samples/rust/Makefile b/samples/rust/Makefile index 5a8ab0df0567c..0dbc6d90f1ef9 100644 --- a/samples/rust/Makefile +++ b/samples/rust/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_SAMPLE_RUST_MISC_DEVICE) += rust_misc_device.o obj-$(CONFIG_SAMPLE_RUST_PRINT) += rust_print.o obj-$(CONFIG_SAMPLE_RUST_DRIVER_PCI) += rust_driver_pci.o obj-$(CONFIG_SAMPLE_RUST_DRIVER_PLATFORM) += rust_driver_platform.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_FAUX) += rust_driver_faux.o rust_print-y := rust_print_main.o rust_print_events.o diff --git a/samples/rust/rust_driver_faux.rs b/samples/rust/rust_driver_faux.rs new file mode 100644 index 0000000000000..048c6cb98b29a --- /dev/null +++ b/samples/rust/rust_driver_faux.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +//! Rust faux device sample. + +use kernel::{c_str, faux, prelude::*, Module}; + +module! { + type: SampleModule, + name: "rust_faux_driver", + author: "Lyude Paul", + description: "Rust faux device sample", + license: "GPL", +} + +struct SampleModule { + _reg: faux::Registration, +} + +impl Module for SampleModule { + fn init(_module: &'static ThisModule) -> Result { + pr_info!("Initialising Rust Faux Device Sample\n"); + + let reg = faux::Registration::new(c_str!("rust-faux-sample-device"))?; + + dev_info!(reg.as_ref(), "Hello from faux device!\n"); + + Ok(Self { _reg: reg }) + } +} -- GitLab From b4f82f9ed43aefa79bec2504ae8c29be0c0f5d1d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 16 Jan 2025 10:35:03 -0500 Subject: [PATCH 0605/1585] Bluetooth: L2CAP: Fix slab-use-after-free Read in l2cap_send_cmd After the hci sync command releases l2cap_conn, the hci receive data work queue references the released l2cap_conn when sending to the upper layer. Add hci dev lock to the hci receive data work queue to synchronize the two. [1] BUG: KASAN: slab-use-after-free in l2cap_send_cmd+0x187/0x8d0 net/bluetooth/l2cap_core.c:954 Read of size 8 at addr ffff8880271a4000 by task kworker/u9:2/5837 CPU: 0 UID: 0 PID: 5837 Comm: kworker/u9:2 Not tainted 6.13.0-rc5-syzkaller-00163-gab75170520d4 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: hci1 hci_rx_work Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x169/0x550 mm/kasan/report.c:489 kasan_report+0x143/0x180 mm/kasan/report.c:602 l2cap_build_cmd net/bluetooth/l2cap_core.c:2964 [inline] l2cap_send_cmd+0x187/0x8d0 net/bluetooth/l2cap_core.c:954 l2cap_sig_send_rej net/bluetooth/l2cap_core.c:5502 [inline] l2cap_sig_channel net/bluetooth/l2cap_core.c:5538 [inline] l2cap_recv_frame+0x221f/0x10db0 net/bluetooth/l2cap_core.c:6817 hci_acldata_packet net/bluetooth/hci_core.c:3797 [inline] hci_rx_work+0x508/0xdb0 net/bluetooth/hci_core.c:4040 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa66/0x1840 kernel/workqueue.c:3310 worker_thread+0x870/0xd30 kernel/workqueue.c:3391 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Allocated by task 5837: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x243/0x390 mm/slub.c:4329 kmalloc_noprof include/linux/slab.h:901 [inline] kzalloc_noprof include/linux/slab.h:1037 [inline] l2cap_conn_add+0xa9/0x8e0 net/bluetooth/l2cap_core.c:6860 l2cap_connect_cfm+0x115/0x1090 net/bluetooth/l2cap_core.c:7239 hci_connect_cfm include/net/bluetooth/hci_core.h:2057 [inline] hci_remote_features_evt+0x68e/0xac0 net/bluetooth/hci_event.c:3726 hci_event_func net/bluetooth/hci_event.c:7473 [inline] hci_event_packet+0xac2/0x1540 net/bluetooth/hci_event.c:7525 hci_rx_work+0x3f3/0xdb0 net/bluetooth/hci_core.c:4035 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa66/0x1840 kernel/workqueue.c:3310 worker_thread+0x870/0xd30 kernel/workqueue.c:3391 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Freed by task 54: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:582 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x59/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2353 [inline] slab_free mm/slub.c:4613 [inline] kfree+0x196/0x430 mm/slub.c:4761 l2cap_connect_cfm+0xcc/0x1090 net/bluetooth/l2cap_core.c:7235 hci_connect_cfm include/net/bluetooth/hci_core.h:2057 [inline] hci_conn_failed+0x287/0x400 net/bluetooth/hci_conn.c:1266 hci_abort_conn_sync+0x56c/0x11f0 net/bluetooth/hci_sync.c:5603 hci_cmd_sync_work+0x22b/0x400 net/bluetooth/hci_sync.c:332 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa66/0x1840 kernel/workqueue.c:3310 worker_thread+0x870/0xd30 kernel/workqueue.c:3391 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Reported-by: syzbot+31c2f641b850a348a734@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=31c2f641b850a348a734 Tested-by: syzbot+31c2f641b850a348a734@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 39 +++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 27b4c4a2ba1fd..adb8c33ac5953 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -948,6 +948,16 @@ static u8 l2cap_get_ident(struct l2cap_conn *conn) return id; } +static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb, + u8 flags) +{ + /* Check if the hcon still valid before attempting to send */ + if (hci_conn_valid(conn->hcon->hdev, conn->hcon)) + hci_send_acl(conn->hchan, skb, flags); + else + kfree_skb(skb); +} + static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) { @@ -970,7 +980,7 @@ static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, bt_cb(skb)->force_active = BT_POWER_FORCE_ACTIVE_ON; skb->priority = HCI_PRIO_MAX; - hci_send_acl(conn->hchan, skb, flags); + l2cap_send_acl(conn, skb, flags); } static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb) @@ -1792,13 +1802,10 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) mutex_unlock(&conn->chan_lock); - hci_chan_del(conn->hchan); - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) cancel_delayed_work_sync(&conn->info_timer); hcon->l2cap_data = NULL; - conn->hchan = NULL; l2cap_conn_put(conn); } @@ -1806,6 +1813,7 @@ static void l2cap_conn_free(struct kref *ref) { struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref); + hci_chan_del(conn->hchan); hci_conn_put(conn->hcon); kfree(conn); } @@ -7466,14 +7474,33 @@ static void l2cap_recv_reset(struct l2cap_conn *conn) conn->rx_len = 0; } +static struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *c) +{ + BT_DBG("conn %p orig refcnt %u", c, kref_read(&c->ref)); + + if (!kref_get_unless_zero(&c->ref)) + return NULL; + + return c; +} + void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct l2cap_conn *conn; int len; + /* Lock hdev to access l2cap_data to avoid race with l2cap_conn_del */ + hci_dev_lock(hcon->hdev); + + conn = hcon->l2cap_data; + if (!conn) conn = l2cap_conn_add(hcon); + conn = l2cap_conn_hold_unless_zero(conn); + + hci_dev_unlock(hcon->hdev); + if (!conn) goto drop; @@ -7565,6 +7592,8 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) break; } + l2cap_conn_put(conn); + drop: kfree_skb(skb); } -- GitLab From 872274b992839ff64fe560767fe7ee5f942ccdb1 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Fri, 31 Jan 2025 18:30:19 +0530 Subject: [PATCH 0606/1585] Bluetooth: btintel_pcie: Fix a potential race condition On HCI_OP_RESET command, firmware raises alive interrupt. Driver needs to wait for this before sending other command. This patch fixes the potential miss of alive interrupt due to which HCI_OP_RESET can timeout. Expected flow: If tx command is HCI_OP_RESET, 1. set data->gp0_received = false 2. send HCI_OP_RESET 3. wait for alive interrupt Actual flow having potential race: If tx command is HCI_OP_RESET, 1. send HCI_OP_RESET 1a. Firmware raises alive interrupt here and in ISR data->gp0_received is set to true 2. set data->gp0_received = false 3. wait for alive interrupt Signed-off-by: Kiran K Fixes: 05c200c8f029 ("Bluetooth: btintel_pcie: Add handshake between driver and firmware") Reported-by: Bjorn Helgaas Closes: https://patchwork.kernel.org/project/bluetooth/patch/20241001104451.626964-1-kiran.k@intel.com/ Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 2b79952f3628d..091ffe3e14954 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1320,6 +1320,10 @@ static int btintel_pcie_send_frame(struct hci_dev *hdev, if (opcode == 0xfc01) btintel_pcie_inject_cmd_complete(hdev, opcode); } + /* Firmware raises alive interrupt on HCI_OP_RESET */ + if (opcode == HCI_OP_RESET) + data->gp0_received = false; + hdev->stat.cmd_tx++; break; case HCI_ACLDATA_PKT: @@ -1357,7 +1361,6 @@ static int btintel_pcie_send_frame(struct hci_dev *hdev, opcode, btintel_pcie_alivectxt_state2str(old_ctxt), btintel_pcie_alivectxt_state2str(data->alive_intr_ctxt)); if (opcode == HCI_OP_RESET) { - data->gp0_received = false; ret = wait_event_timeout(data->gp0_wait_q, data->gp0_received, msecs_to_jiffies(BTINTEL_DEFAULT_INTR_TIMEOUT_MS)); -- GitLab From ab4eedb790cae44313759b50fe47da285e2519d5 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 6 Feb 2025 15:54:45 -0500 Subject: [PATCH 0607/1585] Bluetooth: L2CAP: Fix corrupted list in hci_chan_del This fixes the following trace by reworking the locking of l2cap_conn so instead of only locking when changing the chan_l list this promotes chan_lock to a general lock of l2cap_conn so whenever it is being held it would prevents the likes of l2cap_conn_del to run: list_del corruption, ffff888021297e00->prev is LIST_POISON2 (dead000000000122) ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:61! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 1 UID: 0 PID: 5896 Comm: syz-executor213 Not tainted 6.14.0-rc1-next-20250204-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024 RIP: 0010:__list_del_entry_valid_or_report+0x12c/0x190 lib/list_debug.c:59 Code: 8c 4c 89 fe 48 89 da e8 32 8c 37 fc 90 0f 0b 48 89 df e8 27 9f 14 fd 48 c7 c7 a0 c0 60 8c 4c 89 fe 48 89 da e8 15 8c 37 fc 90 <0f> 0b 4c 89 e7 e8 0a 9f 14 fd 42 80 3c 2b 00 74 08 4c 89 e7 e8 cb RSP: 0018:ffffc90003f6f998 EFLAGS: 00010246 RAX: 000000000000004e RBX: dead000000000122 RCX: 01454d423f7fbf00 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffffff819f077c R09: 1ffff920007eded0 R10: dffffc0000000000 R11: fffff520007eded1 R12: dead000000000122 R13: dffffc0000000000 R14: ffff8880352248d8 R15: ffff888021297e00 FS: 00007f7ace6686c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7aceeeb1d0 CR3: 000000003527c000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __list_del_entry_valid include/linux/list.h:124 [inline] __list_del_entry include/linux/list.h:215 [inline] list_del_rcu include/linux/rculist.h:168 [inline] hci_chan_del+0x70/0x1b0 net/bluetooth/hci_conn.c:2858 l2cap_conn_free net/bluetooth/l2cap_core.c:1816 [inline] kref_put include/linux/kref.h:65 [inline] l2cap_conn_put+0x70/0xe0 net/bluetooth/l2cap_core.c:1830 l2cap_sock_shutdown+0xa8a/0x1020 net/bluetooth/l2cap_sock.c:1377 l2cap_sock_release+0x79/0x1d0 net/bluetooth/l2cap_sock.c:1416 __sock_release net/socket.c:642 [inline] sock_close+0xbc/0x240 net/socket.c:1393 __fput+0x3e9/0x9f0 fs/file_table.c:448 task_work_run+0x24f/0x310 kernel/task_work.c:227 ptrace_notify+0x2d2/0x380 kernel/signal.c:2522 ptrace_report_syscall include/linux/ptrace.h:415 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:477 [inline] syscall_exit_work+0xc7/0x1d0 kernel/entry/common.c:173 syscall_exit_to_user_mode_prepare kernel/entry/common.c:200 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:205 [inline] syscall_exit_to_user_mode+0x24a/0x340 kernel/entry/common.c:218 do_syscall_64+0x100/0x230 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f7aceeaf449 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f7ace668218 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: fffffffffffffffc RBX: 00007f7acef39328 RCX: 00007f7aceeaf449 RDX: 000000000000000e RSI: 0000000020000100 RDI: 0000000000000004 RBP: 00007f7acef39320 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003 R13: 0000000000000004 R14: 00007f7ace668670 R15: 000000000000000b Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:__list_del_entry_valid_or_report+0x12c/0x190 lib/list_debug.c:59 Code: 8c 4c 89 fe 48 89 da e8 32 8c 37 fc 90 0f 0b 48 89 df e8 27 9f 14 fd 48 c7 c7 a0 c0 60 8c 4c 89 fe 48 89 da e8 15 8c 37 fc 90 <0f> 0b 4c 89 e7 e8 0a 9f 14 fd 42 80 3c 2b 00 74 08 4c 89 e7 e8 cb RSP: 0018:ffffc90003f6f998 EFLAGS: 00010246 RAX: 000000000000004e RBX: dead000000000122 RCX: 01454d423f7fbf00 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffffff819f077c R09: 1ffff920007eded0 R10: dffffc0000000000 R11: fffff520007eded1 R12: dead000000000122 R13: dffffc0000000000 R14: ffff8880352248d8 R15: ffff888021297e00 FS: 00007f7ace6686c0(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7acef05b08 CR3: 000000003527c000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Reported-by: syzbot+10bd8fe6741eedd2be2e@syzkaller.appspotmail.com Tested-by: syzbot+10bd8fe6741eedd2be2e@syzkaller.appspotmail.com Fixes: b4f82f9ed43a ("Bluetooth: L2CAP: Fix slab-use-after-free Read in l2cap_send_cmd") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Dan Carpenter --- include/net/bluetooth/l2cap.h | 3 +- net/bluetooth/l2cap_core.c | 138 ++++++++++++---------------------- net/bluetooth/l2cap_sock.c | 15 ++-- 3 files changed, 58 insertions(+), 98 deletions(-) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index d9c767cf773de..9189354c568f4 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -668,7 +668,7 @@ struct l2cap_conn { struct l2cap_chan *smp; struct list_head chan_l; - struct mutex chan_lock; + struct mutex lock; struct kref ref; struct list_head users; }; @@ -970,6 +970,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err); void l2cap_send_conn_req(struct l2cap_chan *chan); struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn); +struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *conn); void l2cap_conn_put(struct l2cap_conn *conn); int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index adb8c33ac5953..fec11e576f310 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -119,7 +119,6 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, { struct l2cap_chan *c; - mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_scid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ @@ -127,7 +126,6 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, if (c) l2cap_chan_lock(c); } - mutex_unlock(&conn->chan_lock); return c; } @@ -140,7 +138,6 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, { struct l2cap_chan *c; - mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_dcid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ @@ -148,7 +145,6 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, if (c) l2cap_chan_lock(c); } - mutex_unlock(&conn->chan_lock); return c; } @@ -418,7 +414,7 @@ static void l2cap_chan_timeout(struct work_struct *work) if (!conn) return; - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. */ @@ -439,7 +435,7 @@ static void l2cap_chan_timeout(struct work_struct *work) l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } struct l2cap_chan *l2cap_chan_create(void) @@ -641,9 +637,9 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); __l2cap_chan_add(conn, chan); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } void l2cap_chan_del(struct l2cap_chan *chan, int err) @@ -731,9 +727,9 @@ void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, if (!conn) return; - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); __l2cap_chan_list(conn, func, data); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } EXPORT_SYMBOL_GPL(l2cap_chan_list); @@ -745,7 +741,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work) struct hci_conn *hcon = conn->hcon; struct l2cap_chan *chan; - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -754,7 +750,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work) l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) @@ -1507,8 +1503,6 @@ static void l2cap_conn_start(struct l2cap_conn *conn) BT_DBG("conn %p", conn); - mutex_lock(&conn->chan_lock); - list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -1577,8 +1571,6 @@ static void l2cap_conn_start(struct l2cap_conn *conn) l2cap_chan_unlock(chan); } - - mutex_unlock(&conn->chan_lock); } static void l2cap_le_conn_ready(struct l2cap_conn *conn) @@ -1624,7 +1616,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) if (hcon->type == ACL_LINK) l2cap_request_info(conn); - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { @@ -1642,7 +1634,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); if (hcon->type == LE_LINK) l2cap_le_conn_ready(conn); @@ -1657,14 +1649,10 @@ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) BT_DBG("conn %p", conn); - mutex_lock(&conn->chan_lock); - list_for_each_entry(chan, &conn->chan_l, list) { if (test_bit(FLAG_FORCE_RELIABLE, &chan->flags)) l2cap_chan_set_err(chan, err); } - - mutex_unlock(&conn->chan_lock); } static void l2cap_info_timeout(struct work_struct *work) @@ -1675,7 +1663,9 @@ static void l2cap_info_timeout(struct work_struct *work) conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; + mutex_lock(&conn->lock); l2cap_conn_start(conn); + mutex_unlock(&conn->lock); } /* @@ -1767,6 +1757,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + mutex_lock(&conn->lock); + kfree_skb(conn->rx_skb); skb_queue_purge(&conn->pending_rx); @@ -1785,8 +1777,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) /* Force the connection to be immediately dropped */ hcon->disc_timeout = 0; - mutex_lock(&conn->chan_lock); - /* Kill channels */ list_for_each_entry_safe(chan, l, &conn->chan_l, list) { l2cap_chan_hold(chan); @@ -1800,12 +1790,14 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_put(chan); } - mutex_unlock(&conn->chan_lock); - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) cancel_delayed_work_sync(&conn->info_timer); + hci_chan_del(conn->hchan); + conn->hchan = NULL; + hcon->l2cap_data = NULL; + mutex_unlock(&conn->lock); l2cap_conn_put(conn); } @@ -1813,7 +1805,6 @@ static void l2cap_conn_free(struct kref *ref) { struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref); - hci_chan_del(conn->hchan); hci_conn_put(conn->hcon); kfree(conn); } @@ -2924,8 +2915,6 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); - mutex_lock(&conn->chan_lock); - list_for_each_entry(chan, &conn->chan_l, list) { if (chan->chan_type != L2CAP_CHAN_RAW) continue; @@ -2940,8 +2929,6 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) if (chan->ops->recv(chan, nskb)) kfree_skb(nskb); } - - mutex_unlock(&conn->chan_lock); } /* ---- L2CAP signalling commands ---- */ @@ -3960,7 +3947,6 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, goto response; } - mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); /* Check if the ACL is secure enough (if not SDP) */ @@ -4067,7 +4053,6 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, } l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); } @@ -4106,27 +4091,19 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); - mutex_lock(&conn->chan_lock); - if (scid) { chan = __l2cap_get_chan_by_scid(conn, scid); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; } else { chan = __l2cap_get_chan_by_ident(conn, cmd->ident); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; } chan = l2cap_chan_hold_unless_zero(chan); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; err = 0; @@ -4164,9 +4141,6 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, l2cap_chan_unlock(chan); l2cap_chan_put(chan); -unlock: - mutex_unlock(&conn->chan_lock); - return err; } @@ -4454,11 +4428,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, chan->ops->set_shutdown(chan); - l2cap_chan_unlock(chan); - mutex_lock(&conn->chan_lock); - l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); - mutex_unlock(&conn->chan_lock); chan->ops->close(chan); @@ -4495,11 +4465,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, return 0; } - l2cap_chan_unlock(chan); - mutex_lock(&conn->chan_lock); - l2cap_chan_lock(chan); l2cap_chan_del(chan, 0); - mutex_unlock(&conn->chan_lock); chan->ops->close(chan); @@ -4697,13 +4663,9 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", dcid, mtu, mps, credits, result); - mutex_lock(&conn->chan_lock); - chan = __l2cap_get_chan_by_ident(conn, cmd->ident); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; err = 0; @@ -4751,9 +4713,6 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, l2cap_chan_unlock(chan); -unlock: - mutex_unlock(&conn->chan_lock); - return err; } @@ -4865,7 +4824,6 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, goto response; } - mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); if (!smp_sufficient_security(conn->hcon, pchan->sec_level, @@ -4931,7 +4889,6 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, response_unlock: l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); if (result == L2CAP_CR_PEND) @@ -5065,7 +5022,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, goto response; } - mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); if (!smp_sufficient_security(conn->hcon, pchan->sec_level, @@ -5140,7 +5096,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, unlock: l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); response: @@ -5177,8 +5132,6 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, BT_DBG("mtu %u mps %u credits %u result 0x%4.4x", mtu, mps, credits, result); - mutex_lock(&conn->chan_lock); - cmd_len -= sizeof(*rsp); list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { @@ -5264,8 +5217,6 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); - return err; } @@ -5378,8 +5329,6 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn, if (cmd_len < sizeof(*rej)) return -EPROTO; - mutex_lock(&conn->chan_lock); - chan = __l2cap_get_chan_by_ident(conn, cmd->ident); if (!chan) goto done; @@ -5394,7 +5343,6 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn, l2cap_chan_put(chan); done: - mutex_unlock(&conn->chan_lock); return 0; } @@ -6849,8 +6797,12 @@ static void process_pending_rx(struct work_struct *work) BT_DBG(""); + mutex_lock(&conn->lock); + while ((skb = skb_dequeue(&conn->pending_rx))) l2cap_recv_frame(conn, skb); + + mutex_unlock(&conn->lock); } static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) @@ -6889,7 +6841,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR; mutex_init(&conn->ident_lock); - mutex_init(&conn->chan_lock); + mutex_init(&conn->lock); INIT_LIST_HEAD(&conn->chan_l); INIT_LIST_HEAD(&conn->users); @@ -7080,7 +7032,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, } } - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); l2cap_chan_lock(chan); if (cid && __l2cap_get_chan_by_dcid(conn, cid)) { @@ -7121,7 +7073,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, chan_unlock: l2cap_chan_unlock(chan); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); done: hci_dev_unlock(hdev); hci_dev_put(hdev); @@ -7333,7 +7285,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt); - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -7407,7 +7359,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } /* Append fragment into frame respecting the maximum len of rx_skb */ @@ -7474,8 +7426,11 @@ static void l2cap_recv_reset(struct l2cap_conn *conn) conn->rx_len = 0; } -static struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *c) +struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *c) { + if (!c) + return NULL; + BT_DBG("conn %p orig refcnt %u", c, kref_read(&c->ref)); if (!kref_get_unless_zero(&c->ref)) @@ -7501,11 +7456,15 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) hci_dev_unlock(hcon->hdev); - if (!conn) - goto drop; + if (!conn) { + kfree_skb(skb); + return; + } BT_DBG("conn %p len %u flags 0x%x", conn, skb->len, flags); + mutex_lock(&conn->lock); + switch (flags) { case ACL_START: case ACL_START_NO_FLUSH: @@ -7530,7 +7489,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) if (len == skb->len) { /* Complete frame received */ l2cap_recv_frame(conn, skb); - return; + goto unlock; } BT_DBG("Start: total len %d, frag len %u", len, skb->len); @@ -7592,10 +7551,11 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) break; } - l2cap_conn_put(conn); - drop: kfree_skb(skb); +unlock: + mutex_unlock(&conn->lock); + l2cap_conn_put(conn); } static struct hci_cb l2cap_cb = { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 46ea0bee2259f..acd11b268b98a 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1326,9 +1326,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) /* prevent sk structure from being freed whilst unlocked */ sock_hold(sk); - chan = l2cap_pi(sk)->chan; /* prevent chan structure from being freed whilst unlocked */ - l2cap_chan_hold(chan); + chan = l2cap_chan_hold_unless_zero(l2cap_pi(sk)->chan); + if (!chan) + goto shutdown_already; BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); @@ -1358,22 +1359,20 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) release_sock(sk); l2cap_chan_lock(chan); - conn = chan->conn; - if (conn) - /* prevent conn structure from being freed */ - l2cap_conn_get(conn); + /* prevent conn structure from being freed */ + conn = l2cap_conn_hold_unless_zero(chan->conn); l2cap_chan_unlock(chan); if (conn) /* mutex lock must be taken before l2cap_chan_lock() */ - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); l2cap_chan_lock(chan); l2cap_chan_close(chan, 0); l2cap_chan_unlock(chan); if (conn) { - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); l2cap_conn_put(conn); } -- GitLab From 5bef3ac184b5626ea62385d6b82a1992b89d7940 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Feb 2025 13:49:28 +0000 Subject: [PATCH 0608/1585] team: better TEAM_OPTION_TYPE_STRING validation syzbot reported following splat [1] Make sure user-provided data contains one nul byte. [1] BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:633 [inline] BUG: KMSAN: uninit-value in string+0x3ec/0x5f0 lib/vsprintf.c:714 string_nocheck lib/vsprintf.c:633 [inline] string+0x3ec/0x5f0 lib/vsprintf.c:714 vsnprintf+0xa5d/0x1960 lib/vsprintf.c:2843 __request_module+0x252/0x9f0 kernel/module/kmod.c:149 team_mode_get drivers/net/team/team_core.c:480 [inline] team_change_mode drivers/net/team/team_core.c:607 [inline] team_mode_option_set+0x437/0x970 drivers/net/team/team_core.c:1401 team_option_set drivers/net/team/team_core.c:375 [inline] team_nl_options_set_doit+0x1339/0x1f90 drivers/net/team/team_core.c:2662 genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x1214/0x12c0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x375/0x650 net/netlink/af_netlink.c:2543 genl_rcv+0x40/0x60 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline] netlink_unicast+0xf52/0x1260 net/netlink/af_netlink.c:1348 netlink_sendmsg+0x10da/0x11e0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:718 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:733 ____sys_sendmsg+0x877/0xb60 net/socket.c:2573 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2627 __sys_sendmsg net/socket.c:2659 [inline] __do_sys_sendmsg net/socket.c:2664 [inline] __se_sys_sendmsg net/socket.c:2662 [inline] __x64_sys_sendmsg+0x212/0x3c0 net/socket.c:2662 x64_sys_call+0x2ed6/0x3c30 arch/x86/include/generated/asm/syscalls_64.h:47 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Reported-by: syzbot+1fcd957a82e3a1baa94d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1fcd957a82e3a1baa94d Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250212134928.1541609-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/team/team_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index dc7cbd6a9798a..f4019815f4736 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -2639,7 +2639,9 @@ int team_nl_options_set_doit(struct sk_buff *skb, struct genl_info *info) ctx.data.u32_val = nla_get_u32(attr_data); break; case TEAM_OPTION_TYPE_STRING: - if (nla_len(attr_data) > TEAM_STRING_MAX_LEN) { + if (nla_len(attr_data) > TEAM_STRING_MAX_LEN || + !memchr(nla_data(attr_data), '\0', + nla_len(attr_data))) { err = -EINVAL; goto team_put; } -- GitLab From a527750d877fd334de87eef81f1cb5f0f0ca3373 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Feb 2025 14:10:21 +0000 Subject: [PATCH 0609/1585] ipv6: mcast: add RCU protection to mld_newpack() mld_newpack() can be called without RTNL or RCU being held. Note that we no longer can use sock_alloc_send_skb() because ipv6.igmp_sk uses GFP_KERNEL allocations which can sleep. Instead use alloc_skb() and charge the net->ipv6.igmp_sk socket under RCU protection. Fixes: b8ad0cbc58f7 ("[NETNS][IPV6] mcast - handle several network namespace") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20250212141021.1663666-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/mcast.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 81a739ebf7094..65831b4fee1fd 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1773,21 +1773,19 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) struct net_device *dev = idev->dev; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct net *net = dev_net(dev); const struct in6_addr *saddr; struct in6_addr addr_buf; struct mld2_report *pmr; struct sk_buff *skb; unsigned int size; struct sock *sk; - int err; + struct net *net; - sk = net->ipv6.igmp_sk; /* we assume size > sizeof(ra) here * Also try to not allocate high-order pages for big MTU */ size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen; - skb = sock_alloc_send_skb(sk, size, 1, &err); + skb = alloc_skb(size, GFP_KERNEL); if (!skb) return NULL; @@ -1795,6 +1793,12 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) skb_reserve(skb, hlen); skb_tailroom_reserve(skb, mtu, tlen); + rcu_read_lock(); + + net = dev_net_rcu(dev); + sk = net->ipv6.igmp_sk; + skb_set_owner_w(skb, sk); + if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { /* : * use unspecified address as the source address @@ -1806,6 +1810,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); + rcu_read_unlock(); + skb_put_data(skb, ra, sizeof(ra)); skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data); -- GitLab From fee5d688940690cc845937459e340e4e02598e90 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Wed, 12 Feb 2025 23:23:11 +0800 Subject: [PATCH 0610/1585] mlxsw: Add return value check for mlxsw_sp_port_get_stats_raw() Add a check for the return value of mlxsw_sp_port_get_stats_raw() in __mlxsw_sp_port_get_stats(). If mlxsw_sp_port_get_stats_raw() returns an error, exit the function to prevent further processing with potentially invalid data. Fixes: 614d509aa1e7 ("mlxsw: Move ethtool_ops to spectrum_ethtool.c") Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Wentao Liang Reviewed-by: Petr Machata Link: https://patch.msgid.link/20250212152311.1332-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 2bed8c86b7cfc..3f64cdbabfa3c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -768,7 +768,9 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); if (err) return; - mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); + err = mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); + if (err) + return; for (i = 0; i < len; i++) { data[data_index + i] = hw_stats[i].getter(ppcnt_pl); if (!hw_stats[i].cells_bytes) -- GitLab From 0d0b752f2497471ddd2b32143d167d42e18a8f3c Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Wed, 12 Feb 2025 17:36:59 +0100 Subject: [PATCH 0611/1585] s390/qeth: move netif_napi_add_tx() and napi_enable() from under BH Like other drivers qeth is calling local_bh_enable() after napi_schedule() to kick-start softirqs [0]. Since netif_napi_add_tx() and napi_enable() now take the netdev_lock() mutex [1], move them out from under the BH protection. Same solution as in commit a60558644e20 ("wifi: mt76: move napi_enable() from under BH") Fixes: 1b23cdbd2bbc ("net: protect netdev->napi_list with netdev_lock()") Link: https://lore.kernel.org/netdev/20240612181900.4d9d18d0@kernel.org/ [0] Link: https://lore.kernel.org/netdev/20250115035319.559603-1-kuba@kernel.org/ [1] Signed-off-by: Alexandra Winter Acked-by: Joe Damato Link: https://patch.msgid.link/20250212163659.2287292-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index a3adaec5504e4..20328d695ef92 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -7050,14 +7050,16 @@ int qeth_open(struct net_device *dev) card->data.state = CH_STATE_UP; netif_tx_start_all_queues(dev); - local_bh_disable(); qeth_for_each_output_queue(card, queue, i) { netif_napi_add_tx(dev, &queue->napi, qeth_tx_poll); napi_enable(&queue->napi); - napi_schedule(&queue->napi); } - napi_enable(&card->napi); + + local_bh_disable(); + qeth_for_each_output_queue(card, queue, i) { + napi_schedule(&queue->napi); + } napi_schedule(&card->napi); /* kick-start the NAPI softirq: */ local_bh_enable(); -- GitLab From 0760d62dad5d3e76c2aa175d9bc42b5f664967c2 Mon Sep 17 00:00:00 2001 From: Devaansh Kumar Date: Tue, 11 Feb 2025 22:48:48 +0530 Subject: [PATCH 0612/1585] sched_ext: selftests: Fix grammar in tests description Fixed grammar for a few tests of sched_ext. Signed-off-by: Devaansh Kumar Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/init_enable_count.c | 2 +- tools/testing/selftests/sched_ext/maybe_null.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c index 0f3eddc7a17a0..eddf9e0e26e7f 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.c @@ -150,7 +150,7 @@ static enum scx_test_status run(void *ctx) struct scx_test init_enable_count = { .name = "init_enable_count", - .description = "Verify we do the correct amount of counting of init, " + .description = "Verify we correctly count the occurrences of init, " "enable, etc callbacks.", .run = run, }; diff --git a/tools/testing/selftests/sched_ext/maybe_null.c b/tools/testing/selftests/sched_ext/maybe_null.c index 31cfafb0cf65a..aacf0c58ca4fa 100644 --- a/tools/testing/selftests/sched_ext/maybe_null.c +++ b/tools/testing/selftests/sched_ext/maybe_null.c @@ -43,7 +43,7 @@ static enum scx_test_status run(void *ctx) struct scx_test maybe_null = { .name = "maybe_null", - .description = "Verify if PTR_MAYBE_NULL work for .dispatch", + .description = "Verify if PTR_MAYBE_NULL works for .dispatch", .run = run, }; REGISTER_SCX_TEST(&maybe_null) -- GitLab From a8972d5a49b408248294b5ecbdd0a085e4726349 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Fri, 27 Sep 2024 09:53:05 -0400 Subject: [PATCH 0613/1585] drm: panel: jd9365da-h3: fix reset signal polarity In jadard_prepare() a reset pulse is generated with the following statements (delays ommited for clarity): gpiod_set_value(jadard->reset, 1); --> Deassert reset gpiod_set_value(jadard->reset, 0); --> Assert reset for 10ms gpiod_set_value(jadard->reset, 1); --> Deassert reset However, specifying second argument of "0" to gpiod_set_value() means to deassert the GPIO, and "1" means to assert it. If the reset signal is defined as GPIO_ACTIVE_LOW in the DTS, the above statements will incorrectly generate the reset pulse (inverted) and leave it asserted (LOW) at the end of jadard_prepare(). Fix reset behavior by inverting gpiod_set_value() second argument in jadard_prepare(). Also modify second argument to devm_gpiod_get() in jadard_dsi_probe() to assert the reset when probing. Do not modify it in jadard_unprepare() as it is already properly asserted with "1", which seems to be the intended behavior. Fixes: 6b818c533dd8 ("drm: panel: Add Jadard JD9365DA-H3 DSI panel") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240927135306.857617-1-hugo@hugovil.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240927135306.857617-1-hugo@hugovil.com --- drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c index 45d09e6fa667f..7d68a8acfe2ea 100644 --- a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c +++ b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c @@ -109,13 +109,13 @@ static int jadard_prepare(struct drm_panel *panel) if (jadard->desc->lp11_to_reset_delay_ms) msleep(jadard->desc->lp11_to_reset_delay_ms); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(5); - gpiod_set_value(jadard->reset, 0); + gpiod_set_value(jadard->reset, 1); msleep(10); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(130); ret = jadard->desc->init(jadard); @@ -1130,7 +1130,7 @@ static int jadard_dsi_probe(struct mipi_dsi_device *dsi) dsi->format = desc->format; dsi->lanes = desc->lanes; - jadard->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + jadard->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(jadard->reset)) { DRM_DEV_ERROR(&dsi->dev, "failed to get our reset GPIO\n"); return PTR_ERR(jadard->reset); -- GitLab From 2e2006c91c842c551521434466f9b4324719c9a7 Mon Sep 17 00:00:00 2001 From: Chuyi Zhou Date: Wed, 12 Feb 2025 15:19:36 +0800 Subject: [PATCH 0614/1585] sched_ext: Fix the incorrect bpf_list kfunc API in common.bpf.h. Now BPF only supports bpf_list_push_{front,back}_impl kfunc, not bpf_list_ push_{front,back}. This patch fix this issue. Without this patch, if we use bpf_list kfunc in scx, the BPF verifier would complain: libbpf: extern (func ksym) 'bpf_list_push_back': not found in kernel or module BTFs libbpf: failed to load object 'scx_foo' libbpf: failed to load BPF skeleton 'scx_foo': -EINVAL With this patch, the bpf list kfunc will work as expected. Signed-off-by: Chuyi Zhou Fixes: 2a52ca7c98960 ("sched_ext: Add scx_simple and scx_example_qmap example schedulers") Signed-off-by: Tejun Heo --- tools/sched_ext/include/scx/common.bpf.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index d72b60a0c582c..7849405614b15 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -270,8 +270,16 @@ void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; #define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL)) #define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) -void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; -void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; +int bpf_list_push_front_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0) + +int bpf_list_push_back_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0) + struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym; struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, -- GitLab From 0892b840318daa6ae739b7cdec5ecdfca4006689 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 13 Feb 2025 08:49:44 -0800 Subject: [PATCH 0615/1585] Reapply "net: skb: introduce and use a single page frag cache" This reverts commit 011b0335903832facca86cd8ed05d7d8d94c9c76. Sabrina reports that the revert may trigger warnings due to intervening changes, especially the ability to rise MAX_SKB_FRAGS. Let's drop it and revisit once that part is also ironed out. Fixes: 011b03359038 ("Revert "net: skb: introduce and use a single page frag cache"") Reported-by: Sabrina Dubroca Link: https://lore.kernel.org/6bf54579233038bc0e76056c5ea459872ce362ab.1739375933.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 17 ------- net/core/skbuff.c | 103 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 99 insertions(+), 22 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 365f0e2098d13..c0a86afb85daa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4115,6 +4115,7 @@ void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); +void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) diff --git a/net/core/dev.c b/net/core/dev.c index 55e356a68db66..b91658e8aedb4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6920,23 +6920,6 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) list_add_rcu(&napi->dev_list, higher); /* adds after higher */ } -/* Double check that napi_get_frags() allocates skbs with - * skb->head being backed by slab, not a page fragment. - * This is to make sure bug fixed in 3226b158e67c - * ("net: avoid 32 x truesize under-estimation for tiny skbs") - * does not accidentally come back. - */ -static void napi_get_frags_check(struct napi_struct *napi) -{ - struct sk_buff *skb; - - local_bh_disable(); - skb = napi_get_frags(napi); - WARN_ON_ONCE(skb && skb->head_frag); - napi_free_frags(napi); - local_bh_enable(); -} - void netif_napi_add_weight_locked(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6a99c453397fc..a441613a1e6c1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -220,9 +220,67 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) #define NAPI_SKB_CACHE_BULK 16 #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2) +#if PAGE_SIZE == SZ_4K + +#define NAPI_HAS_SMALL_PAGE_FRAG 1 +#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc) + +/* specialized page frag allocator using a single order 0 page + * and slicing it into 1K sized fragment. Constrained to systems + * with a very limited amount of 1K fragments fitting a single + * page - to avoid excessive truesize underestimation + */ + +struct page_frag_1k { + void *va; + u16 offset; + bool pfmemalloc; +}; + +static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp) +{ + struct page *page; + int offset; + + offset = nc->offset - SZ_1K; + if (likely(offset >= 0)) + goto use_frag; + + page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); + if (!page) + return NULL; + + nc->va = page_address(page); + nc->pfmemalloc = page_is_pfmemalloc(page); + offset = PAGE_SIZE - SZ_1K; + page_ref_add(page, offset / SZ_1K); + +use_frag: + nc->offset = offset; + return nc->va + offset; +} +#else + +/* the small page is actually unused in this build; add dummy helpers + * to please the compiler and avoid later preprocessor's conditionals + */ +#define NAPI_HAS_SMALL_PAGE_FRAG 0 +#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false + +struct page_frag_1k { +}; + +static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask) +{ + return NULL; +} + +#endif + struct napi_alloc_cache { local_lock_t bh_lock; struct page_frag_cache page; + struct page_frag_1k page_small; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; @@ -232,6 +290,23 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; +/* Double check that napi_get_frags() allocates skbs with + * skb->head being backed by slab, not a page fragment. + * This is to make sure bug fixed in 3226b158e67c + * ("net: avoid 32 x truesize under-estimation for tiny skbs") + * does not accidentally come back. + */ +void napi_get_frags_check(struct napi_struct *napi) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = napi_get_frags(napi); + WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag); + napi_free_frags(napi); + local_bh_enable(); +} + void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -738,8 +813,10 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. + * When the small frag allocator is available, prefer it over kmalloc + * for small fragments */ - if (len <= SKB_WITH_OVERHEAD(1024) || + if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, @@ -749,16 +826,32 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) goto skb_success; } - len = SKB_HEAD_ALIGN(len); - if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache); + if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) { + /* we are artificially inflating the allocation size, but + * that is not as bad as it may look like, as: + * - 'len' less than GRO_MAX_HEAD makes little sense + * - On most systems, larger 'len' values lead to fragment + * size above 512 bytes + * - kmalloc would use the kmalloc-1k slab for such values + * - Builds with smaller GRO_MAX_HEAD will very likely do + * little networking, as that implies no WiFi and no + * tunnels support, and 32 bits arches. + */ + len = SZ_1K; - data = page_frag_alloc(&nc->page, len, gfp_mask); - pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); + data = page_frag_alloc_1k(&nc->page_small, gfp_mask); + pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small); + } else { + len = SKB_HEAD_ALIGN(len); + + data = page_frag_alloc(&nc->page, len, gfp_mask); + pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); + } local_unlock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!data)) -- GitLab From f5717c93a1b999970f3a64d771a1a9ee68cc37d0 Mon Sep 17 00:00:00 2001 From: Chuyi Zhou Date: Wed, 12 Feb 2025 21:09:35 +0800 Subject: [PATCH 0616/1585] sched_ext: Use SCX_CALL_OP_TASK in task_tick_scx Now when we use scx_bpf_task_cgroup() in ops.tick() to get the cgroup of the current task, the following error will occur: scx_foo[3795244] triggered exit kind 1024: runtime error (called on a task not being operated on) The reason is that we are using SCX_CALL_OP() instead of SCX_CALL_OP_TASK() when calling ops.tick(), which triggers the error during the subsequent scx_kf_allowed_on_arg_tasks() check. SCX_CALL_OP_TASK() was first introduced in commit 36454023f50b ("sched_ext: Track tasks that are subjects of the in-flight SCX operation") to ensure task's rq lock is held when accessing task's sched_group. Since ops.tick() is marked as SCX_KF_TERMINAL and task_tick_scx() is protected by the rq lock, we can use SCX_CALL_OP_TASK() to avoid the above issue. Similarly, the same changes should be made for ops.disable() and ops.exit_task(), as they are also protected by task_rq_lock() and it's safe to access the task's task_group. Fixes: 36454023f50b ("sched_ext: Track tasks that are subjects of the in-flight SCX operation") Signed-off-by: Chuyi Zhou Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 54edd0e2132a6..5a81d9a1e31f2 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3899,7 +3899,7 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued) curr->scx.slice = 0; touch_core_sched(rq, curr); } else if (SCX_HAS_OP(tick)) { - SCX_CALL_OP(SCX_KF_REST, tick, curr); + SCX_CALL_OP_TASK(SCX_KF_REST, tick, curr); } if (!curr->scx.slice) @@ -4046,7 +4046,7 @@ static void scx_ops_disable_task(struct task_struct *p) WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED); if (SCX_HAS_OP(disable)) - SCX_CALL_OP(SCX_KF_REST, disable, p); + SCX_CALL_OP_TASK(SCX_KF_REST, disable, p); scx_set_task_state(p, SCX_TASK_READY); } @@ -4075,7 +4075,7 @@ static void scx_ops_exit_task(struct task_struct *p) } if (SCX_HAS_OP(exit_task)) - SCX_CALL_OP(SCX_KF_REST, exit_task, p, &args); + SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, p, &args); scx_set_task_state(p, SCX_TASK_NONE); } -- GitLab From d6211ebbdaa541af197b50b8dd8f22642ce0b87f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Feb 2025 08:24:23 -0700 Subject: [PATCH 0617/1585] io_uring/uring_cmd: unconditionally copy SQEs at prep time This isn't generally necessary, but conditions have been observed where SQE data is accessed from the original SQE after prep has been done and outside of the initial issue. Opcode prep handlers must ensure that any SQE related data is stable beyond the prep phase, but uring_cmd is a bit special in how it handles the SQE which makes it susceptible to reading stale data. If the application has reused the SQE before the original completes, then that can lead to data corruption. Down the line we can relax this again once uring_cmd has been sanitized a bit, and avoid unnecessarily copying the SQE. Fixes: 5eff57fa9f3a ("io_uring/uring_cmd: defer SQE copying until it's needed") Reported-by: Caleb Sander Mateos Reviewed-by: Caleb Sander Mateos Reviewed-by: Li Zetao Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 8af7780407b7e..e6701b7aa1474 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -165,15 +165,6 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2, } EXPORT_SYMBOL_GPL(io_uring_cmd_done); -static void io_uring_cmd_cache_sqes(struct io_kiocb *req) -{ - struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); - struct io_uring_cmd_data *cache = req->async_data; - - memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = cache->sqes; -} - static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -185,10 +176,15 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, return -ENOMEM; cache->op_data = NULL; - ioucmd->sqe = sqe; - /* defer memcpy until we need it */ - if (unlikely(req->flags & REQ_F_FORCE_ASYNC)) - io_uring_cmd_cache_sqes(req); + /* + * Unconditionally cache the SQE for now - this is only needed for + * requests that go async, but prep handlers must ensure that any + * sqe data is stable beyond prep. Since uring_cmd is special in + * that it doesn't read in per-op data, play it safe and ensure that + * any SQE data is stable beyond prep. This can later get relaxed. + */ + memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = cache->sqes; return 0; } @@ -251,16 +247,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) } ret = file->f_op->uring_cmd(ioucmd, issue_flags); - if (ret == -EAGAIN) { - struct io_uring_cmd_data *cache = req->async_data; - - if (ioucmd->sqe != cache->sqes) - io_uring_cmd_cache_sqes(req); - return -EAGAIN; - } else if (ret == -EIOCBQUEUED) { - return -EIOCBQUEUED; - } - + if (ret == -EAGAIN || ret == -EIOCBQUEUED) + return ret; if (ret < 0) req_set_fail(req); io_req_uring_cleanup(req, issue_flags); -- GitLab From fbe8f2fa971c537571994a0df532c511c4fb5537 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Feb 2025 09:11:07 -0800 Subject: [PATCH 0618/1585] md/raid*: Fix the set_queue_limits implementations queue_limits_cancel_update() must only be called if queue_limits_start_update() is called first. Remove the queue_limits_cancel_update() calls from the raid*_set_limits() functions because there is no corresponding queue_limits_start_update() call. Cc: Christoph Hellwig Fixes: c6e56cf6b2e7 ("block: move integrity information into queue_limits") Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/linux-raid/20250212171108.3483150-1-bvanassche@acm.org/ Signed-off-by: Yu Kuai --- drivers/md/raid0.c | 4 +--- drivers/md/raid1.c | 4 +--- drivers/md/raid10.c | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 8fc9339b00c72..70bcc3cdf2cd1 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -386,10 +386,8 @@ static int raid0_set_limits(struct mddev *mddev) lim.io_opt = lim.io_min * mddev->raid_disks; lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); - if (err) { - queue_limits_cancel_update(mddev->gendisk->queue); + if (err) return err; - } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9d57a88dbd261..10ea3af40991d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3219,10 +3219,8 @@ static int raid1_set_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = 0; lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); - if (err) { - queue_limits_cancel_update(mddev->gendisk->queue); + if (err) return err; - } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index efe93b9791677..15b9ae5bf84d8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4020,10 +4020,8 @@ static int raid10_set_queue_limits(struct mddev *mddev) lim.io_opt = lim.io_min * raid10_nr_stripes(conf); lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); - if (err) { - queue_limits_cancel_update(mddev->gendisk->queue); + if (err) return err; - } return queue_limits_set(mddev->gendisk->queue, &lim); } -- GitLab From cd57e4327707126dca3f9517b84274c001d4c184 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Tue, 11 Feb 2025 10:29:48 +0800 Subject: [PATCH 0619/1585] phy: freescale: fsl-samsung-hdmi: Limit PLL lock detection clock divider to valid range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FIELD_PREP() checks that a value fits into the available bitfield, but the index div equals to 4,is out of range. which gcc complains about: In function ‘fsl_samsung_hdmi_phy_configure_pll_lock_det’, inlined from ‘fsl_samsung_hdmi_phy_configure’ at drivers/phy/freescale/phy-fsl-samsung-hdmi.c :470:2: ././include/linux/compiler_types.h:542:38: error: call to ‘__compiletime_assert_538’ declared with attribute error: FIELD_PREP: value too large for the field 542 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^ ././include/linux/compiler_types.h:523:4: note: in definition of macro ‘__compiletime_assert’ 523 | prefix ## suffix(); | ^~~~~~ ././include/linux/compiler_types.h:542:2: note: in expansion of macro ‘_compiletime_assert’ 542 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) REG12_CK_DIV_MASK only two bit, limit div to range 0~3, so build error will fix. Fixes: d567679f2b6a ("phy: freescale: fsl-samsung-hdmi: Clean up fld_tg_code calculation") Signed-off-by: Pei Xiao Changlog: Reviewed-by: Adam Ford Link: https://lore.kernel.org/r/tencent_6F503D43467AA99DD8CC59B8F645F0725B0A@qq.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-samsung-hdmi.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c index 45004f598e4dc..e4c0a82d16d9e 100644 --- a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c +++ b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c @@ -325,7 +325,7 @@ to_fsl_samsung_hdmi_phy(struct clk_hw *hw) return container_of(hw, struct fsl_samsung_hdmi_phy, hw); } -static void +static int fsl_samsung_hdmi_phy_configure_pll_lock_det(struct fsl_samsung_hdmi_phy *phy, const struct phy_config *cfg) { @@ -341,6 +341,9 @@ fsl_samsung_hdmi_phy_configure_pll_lock_det(struct fsl_samsung_hdmi_phy *phy, break; } + if (unlikely(div == 4)) + return -EINVAL; + writeb(FIELD_PREP(REG12_CK_DIV_MASK, div), phy->regs + PHY_REG(12)); /* @@ -364,6 +367,8 @@ fsl_samsung_hdmi_phy_configure_pll_lock_det(struct fsl_samsung_hdmi_phy *phy, FIELD_PREP(REG14_RP_CODE_MASK, 2) | FIELD_PREP(REG14_TG_CODE_HIGH_MASK, fld_tg_code >> 8), phy->regs + PHY_REG(14)); + + return 0; } static unsigned long fsl_samsung_hdmi_phy_find_pms(unsigned long fout, u8 *p, u16 *m, u8 *s) @@ -466,7 +471,11 @@ static int fsl_samsung_hdmi_phy_configure(struct fsl_samsung_hdmi_phy *phy, writeb(REG21_SEL_TX_CK_INV | FIELD_PREP(REG21_PMS_S_MASK, cfg->pll_div_regs[2] >> 4), phy->regs + PHY_REG(21)); - fsl_samsung_hdmi_phy_configure_pll_lock_det(phy, cfg); + ret = fsl_samsung_hdmi_phy_configure_pll_lock_det(phy, cfg); + if (ret) { + dev_err(phy->dev, "pixclock too large\n"); + return ret; + } writeb(REG33_FIX_DA | REG33_MODE_SET_DONE, phy->regs + PHY_REG(33)); -- GitLab From 7b4aebeecbbd5b5fe73e35fad3f62ed21aa7ef44 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 17:56:46 +0200 Subject: [PATCH 0620/1585] gpiolib: Fix crash on error in gpiochip_get_ngpios() The gpiochip_get_ngpios() uses chip_*() macros to print messages. However these macros rely on gpiodev to be initialised and set, which is not the case when called via bgpio_init(). In such a case the printing messages will crash on NULL pointer dereference. Replace chip_*() macros by the respective dev_*() ones to avoid such crash. Fixes: 55b2395e4e92 ("gpio: mmio: handle "ngpios" properly in bgpio_init()") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213155646.2882324-1-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 679ed764cb143..ca2f58a2cd45e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -904,13 +904,13 @@ int gpiochip_get_ngpios(struct gpio_chip *gc, struct device *dev) } if (gc->ngpio == 0) { - chip_err(gc, "tried to insert a GPIO chip with zero lines\n"); + dev_err(dev, "tried to insert a GPIO chip with zero lines\n"); return -EINVAL; } if (gc->ngpio > FASTPATH_NGPIO) - chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n", - gc->ngpio, FASTPATH_NGPIO); + dev_warn(dev, "line cnt %u is greater than fast path cnt %u\n", + gc->ngpio, FASTPATH_NGPIO); return 0; } -- GitLab From fbc7e61195e23f744814e78524b73b59faa54ab4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:19 +0000 Subject: [PATCH 0621/1585] KVM: arm64: Unconditionally save+flush host FPSIMD/SVE/SME state There are several problems with the way hyp code lazily saves the host's FPSIMD/SVE state, including: * Host SVE being discarded unexpectedly due to inconsistent configuration of TIF_SVE and CPACR_ELx.ZEN. This has been seen to result in QEMU crashes where SVE is used by memmove(), as reported by Eric Auger: https://issues.redhat.com/browse/RHEL-68997 * Host SVE state is discarded *after* modification by ptrace, which was an unintentional ptrace ABI change introduced with lazy discarding of SVE state. * The host FPMR value can be discarded when running a non-protected VM, where FPMR support is not exposed to a VM, and that VM uses FPSIMD/SVE. In these cases the hyp code does not save the host's FPMR before unbinding the host's FPSIMD/SVE/SME state, leaving a stale value in memory. Avoid these by eagerly saving and "flushing" the host's FPSIMD/SVE/SME state when loading a vCPU such that KVM does not need to save any of the host's FPSIMD/SVE/SME state. For clarity, fpsimd_kvm_prepare() is removed and the necessary call to fpsimd_save_and_flush_cpu_state() is placed in kvm_arch_vcpu_load_fp(). As 'fpsimd_state' and 'fpmr_ptr' should not be used, they are set to NULL; all uses of these will be removed in subsequent patches. Historical problems go back at least as far as v5.17, e.g. erroneous assumptions about TIF_SVE being clear in commit: 8383741ab2e773a9 ("KVM: arm64: Get rid of host SVE tracking/saving") ... and so this eager save+flush probably needs to be backported to ALL stable trees. Fixes: 93ae6b01bafee8fa ("KVM: arm64: Discard any SVE state when entering KVM guests") Fixes: 8c845e2731041f0f ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") Fixes: ef3be86021c3bdf3 ("KVM: arm64: Add save/restore support for FPMR") Reported-by: Eric Auger Reported-by: Wilco Dijkstra Reviewed-by: Mark Brown Tested-by: Mark Brown Tested-by: Eric Auger Acked-by: Will Deacon Cc: Catalin Marinas Cc: Florian Weimer Cc: Fuad Tabba Cc: Jeremy Linton Cc: Marc Zyngier Cc: Oliver Upton Cc: Paolo Bonzini Signed-off-by: Mark Rutland Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-2-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kernel/fpsimd.c | 25 ------------------------- arch/arm64/kvm/fpsimd.c | 35 ++++++++++------------------------- 2 files changed, 10 insertions(+), 50 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8c4c1a2186cc5..ec68d520b7ca7 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1694,31 +1694,6 @@ void fpsimd_signal_preserve_current_state(void) sve_to_fpsimd(current); } -/* - * Called by KVM when entering the guest. - */ -void fpsimd_kvm_prepare(void) -{ - if (!system_supports_sve()) - return; - - /* - * KVM does not save host SVE state since we can only enter - * the guest from a syscall so the ABI means that only the - * non-saved SVE state needs to be saved. If we have left - * SVE enabled for performance reasons then update the task - * state to be FPSIMD only. - */ - get_cpu_fpsimd_context(); - - if (test_and_clear_thread_flag(TIF_SVE)) { - sve_to_fpsimd(current); - current->thread.fp_type = FP_STATE_FPSIMD; - } - - put_cpu_fpsimd_context(); -} - /* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 4d3d1a2eb1570..ceeb0a4893aa7 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -54,16 +54,18 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) if (!system_supports_fpsimd()) return; - fpsimd_kvm_prepare(); - /* - * We will check TIF_FOREIGN_FPSTATE just before entering the - * guest in kvm_arch_vcpu_ctxflush_fp() and override this to - * FP_STATE_FREE if the flag set. + * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such + * that the host kernel is responsible for restoring this state upon + * return to userspace, and the hyp code doesn't need to save anything. + * + * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures + * that PSTATE.{SM,ZA} == {0,0}. */ - *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; - *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state); - *host_data_ptr(fpmr_ptr) = kern_hyp_va(¤t->thread.uw.fpmr); + fpsimd_save_and_flush_cpu_state(); + *host_data_ptr(fp_owner) = FP_STATE_FREE; + *host_data_ptr(fpsimd_state) = NULL; + *host_data_ptr(fpmr_ptr) = NULL; host_data_clear_flag(HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) @@ -73,23 +75,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) host_data_clear_flag(HOST_SME_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) host_data_set_flag(HOST_SME_ENABLED); - - /* - * If PSTATE.SM is enabled then save any pending FP - * state and disable PSTATE.SM. If we leave PSTATE.SM - * enabled and the guest does not enable SME via - * CPACR_EL1.SMEN then operations that should be valid - * may generate SME traps from EL1 to EL1 which we - * can't intercept and which would confuse the guest. - * - * Do the same for PSTATE.ZA in the case where there - * is state in the registers which has not already - * been saved, this is very unlikely to happen. - */ - if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { - *host_data_ptr(fp_owner) = FP_STATE_FREE; - fpsimd_save_and_flush_cpu_state(); - } } /* -- GitLab From 8eca7f6d5100b6997df4f532090bc3f7e0203bef Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:20 +0000 Subject: [PATCH 0622/1585] KVM: arm64: Remove host FPSIMD saving for non-protected KVM Now that the host eagerly saves its own FPSIMD/SVE/SME state, non-protected KVM never needs to save the host FPSIMD/SVE/SME state, and the code to do this is never used. Protected KVM still needs to save/restore the host FPSIMD/SVE state to avoid leaking guest state to the host (and to avoid revealing to the host whether the guest used FPSIMD/SVE/SME), and that code needs to be retained. Remove the unused code and data structures. To avoid the need for a stub copy of kvm_hyp_save_fpsimd_host() in the VHE hyp code, the nVHE/hVHE version is moved into the shared switch header, where it is only invoked when KVM is in protected mode. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-3-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 20 +++++------------- arch/arm64/kvm/arm.c | 8 ------- arch/arm64/kvm/fpsimd.c | 2 -- arch/arm64/kvm/hyp/include/hyp/switch.h | 25 ++++++++++++++++++++-- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 28 ------------------------- arch/arm64/kvm/hyp/vhe/switch.c | 8 ------- 7 files changed, 29 insertions(+), 64 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7cfa024de4e34..f56c07568591f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -624,23 +624,13 @@ struct kvm_host_data { struct kvm_cpu_context host_ctxt; /* - * All pointers in this union are hyp VA. + * Hyp VA. * sve_state is only used in pKVM and if system_supports_sve(). */ - union { - struct user_fpsimd_state *fpsimd_state; - struct cpu_sve_state *sve_state; - }; - - union { - /* HYP VA pointer to the host storage for FPMR */ - u64 *fpmr_ptr; - /* - * Used by pKVM only, as it needs to provide storage - * for the host - */ - u64 fpmr; - }; + struct cpu_sve_state *sve_state; + + /* Used by pKVM only. */ + u64 fpmr; /* Ownership of the FP regs */ enum { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 62c650c2f7b67..4b7389ad94f55 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2481,14 +2481,6 @@ static void finalize_init_hyp_mode(void) per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = kern_hyp_va(sve_state); } - } else { - for_each_possible_cpu(cpu) { - struct user_fpsimd_state *fpsimd_state; - - fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs; - per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state = - kern_hyp_va(fpsimd_state); - } } } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index ceeb0a4893aa7..332cb3904e68b 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -64,8 +64,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) */ fpsimd_save_and_flush_cpu_state(); *host_data_ptr(fp_owner) = FP_STATE_FREE; - *host_data_ptr(fpsimd_state) = NULL; - *host_data_ptr(fpmr_ptr) = NULL; host_data_clear_flag(HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f838a45665f26..c5b8a11ac4f50 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -375,7 +375,28 @@ static inline void __hyp_sve_save_host(void) true); } -static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu); +static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) +{ + /* + * Non-protected kvm relies on the host restoring its sve state. + * Protected kvm restores the host's sve state as not to reveal that + * fpsimd was used by a guest nor leak upper sve bits. + */ + if (system_supports_sve()) { + __hyp_sve_save_host(); + + /* Re-enable SVE traps if not supported for the guest vcpu. */ + if (!vcpu_has_sve(vcpu)) + cpacr_clear_set(CPACR_EL1_ZEN, 0); + + } else { + __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs)); + } + + if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) + *host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR); +} + /* * We trap the first access to the FP/SIMD to save the host context and @@ -425,7 +446,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (host_owns_fp_regs()) + if (is_protected_kvm_enabled() && host_owns_fp_regs()) kvm_hyp_save_fpsimd_host(vcpu); /* Restore the guest state */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 6e12c070832f7..1a334a38d8fd2 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -83,7 +83,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) if (system_supports_sve()) __hyp_sve_restore_host(); else - __fpsimd_restore_state(*host_data_ptr(fpsimd_state)); + __fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs)); if (has_fpmr) write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6c846d033d24a..7a2d189176249 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -192,34 +192,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) kvm_handle_pvm_sysreg(vcpu, exit_code)); } -static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) -{ - /* - * Non-protected kvm relies on the host restoring its sve state. - * Protected kvm restores the host's sve state as not to reveal that - * fpsimd was used by a guest nor leak upper sve bits. - */ - if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) { - __hyp_sve_save_host(); - - /* Re-enable SVE traps if not supported for the guest vcpu. */ - if (!vcpu_has_sve(vcpu)) - cpacr_clear_set(CPACR_EL1_ZEN, 0); - - } else { - __fpsimd_save_state(*host_data_ptr(fpsimd_state)); - } - - if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) { - u64 val = read_sysreg_s(SYS_FPMR); - - if (unlikely(is_protected_kvm_enabled())) - *host_data_ptr(fpmr) = val; - else - **host_data_ptr(fpmr_ptr) = val; - } -} - static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index b5b9dbaf1fdd6..e8a07d4bb546b 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -413,14 +413,6 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } -static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) -{ - __fpsimd_save_state(*host_data_ptr(fpsimd_state)); - - if (kvm_has_fpmr(vcpu->kvm)) - **host_data_ptr(fpmr_ptr) = read_sysreg_s(SYS_FPMR); -} - static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code) { int ret = -EINVAL; -- GitLab From 459f059be702056d91537b99a129994aa6ccdd35 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:21 +0000 Subject: [PATCH 0623/1585] KVM: arm64: Remove VHE host restore of CPACR_EL1.ZEN When KVM is in VHE mode, the host kernel tries to save and restore the configuration of CPACR_EL1.ZEN (i.e. CPTR_EL2.ZEN when HCR_EL2.E2H=1) across kvm_arch_vcpu_load_fp() and kvm_arch_vcpu_put_fp(), since the configuration may be clobbered by hyp when running a vCPU. This logic is currently redundant. The VHE hyp code unconditionally configures CPTR_EL2.ZEN to 0b01 when returning to the host, permitting host kernel usage of SVE. Now that the host eagerly saves and unbinds its own FPSIMD/SVE/SME state, there's no need to save/restore the state of the EL0 SVE trap. The kernel can safely save/restore state without trapping, as described above, and will restore userspace state (including trap controls) before returning to userspace. Remove the redundant logic. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-4-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/fpsimd.c | 16 ---------------- 2 files changed, 17 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f56c07568591f..ed6841bf21b22 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -615,7 +615,6 @@ struct cpu_sve_state { struct kvm_host_data { #define KVM_HOST_DATA_FLAG_HAS_SPE 0 #define KVM_HOST_DATA_FLAG_HAS_TRBE 1 -#define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED 2 #define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED 3 #define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4 #define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5 diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 332cb3904e68b..4ff0dee1a403f 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -65,10 +65,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) fpsimd_save_and_flush_cpu_state(); *host_data_ptr(fp_owner) = FP_STATE_FREE; - host_data_clear_flag(HOST_SVE_ENABLED); - if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) - host_data_set_flag(HOST_SVE_ENABLED); - if (system_supports_sme()) { host_data_clear_flag(HOST_SME_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) @@ -202,18 +198,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) * when needed. */ fpsimd_save_and_flush_cpu_state(); - } else if (has_vhe() && system_supports_sve()) { - /* - * The FPSIMD/SVE state in the CPU has not been touched, and we - * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been - * reset by kvm_reset_cptr_el2() in the Hyp code, disabling SVE - * for EL0. To avoid spurious traps, restore the trap state - * seen by kvm_arch_vcpu_load_fp(): - */ - if (host_data_test_flag(HOST_SVE_ENABLED)) - sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); - else - sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); } local_irq_restore(flags); -- GitLab From 407a99c4654e8ea65393f412c421a55cac539f5b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:22 +0000 Subject: [PATCH 0624/1585] KVM: arm64: Remove VHE host restore of CPACR_EL1.SMEN When KVM is in VHE mode, the host kernel tries to save and restore the configuration of CPACR_EL1.SMEN (i.e. CPTR_EL2.SMEN when HCR_EL2.E2H=1) across kvm_arch_vcpu_load_fp() and kvm_arch_vcpu_put_fp(), since the configuration may be clobbered by hyp when running a vCPU. This logic has historically been broken, and is currently redundant. This logic was originally introduced in commit: 861262ab86270206 ("KVM: arm64: Handle SME host state when running guests") At the time, the VHE hyp code would reset CPTR_EL2.SMEN to 0b00 when returning to the host, trapping host access to SME state. Unfortunately, this was unsafe as the host could take a softirq before calling kvm_arch_vcpu_put_fp(), and if a softirq handler were to use kernel mode NEON the resulting attempt to save the live FPSIMD/SVE/SME state would result in a fatal trap. That issue was limited to VHE mode. For nVHE/hVHE modes, KVM always saved/restored the host kernel's CPACR_EL1 value, and configured CPTR_EL2.TSM to 0b0, ensuring that host usage of SME would not be trapped. The issue above was incidentally fixed by commit: 375110ab51dec5dc ("KVM: arm64: Fix resetting SME trap values on reset for (h)VHE") That commit changed the VHE hyp code to configure CPTR_EL2.SMEN to 0b01 when returning to the host, permitting host kernel usage of SME, avoiding the issue described above. At the time, this was not identified as a fix for commit 861262ab86270206. Now that the host eagerly saves and unbinds its own FPSIMD/SVE/SME state, there's no need to save/restore the state of the EL0 SME trap. The kernel can safely save/restore state without trapping, as described above, and will restore userspace state (including trap controls) before returning to userspace. Remove the redundant logic. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-5-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/fpsimd.c | 21 --------------------- 2 files changed, 22 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ed6841bf21b22..c77acc9904576 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -615,7 +615,6 @@ struct cpu_sve_state { struct kvm_host_data { #define KVM_HOST_DATA_FLAG_HAS_SPE 0 #define KVM_HOST_DATA_FLAG_HAS_TRBE 1 -#define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED 3 #define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4 #define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5 unsigned long flags; diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 4ff0dee1a403f..f64724197958e 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -65,12 +65,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) fpsimd_save_and_flush_cpu_state(); *host_data_ptr(fp_owner) = FP_STATE_FREE; - if (system_supports_sme()) { - host_data_clear_flag(HOST_SME_ENABLED); - if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) - host_data_set_flag(HOST_SME_ENABLED); - } - /* * If normal guests gain SME support, maintain this behavior for pKVM * guests, which don't support SME. @@ -141,21 +135,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); - /* - * If we have VHE then the Hyp code will reset CPACR_EL1 to - * the default value and we need to reenable SME. - */ - if (has_vhe() && system_supports_sme()) { - /* Also restore EL0 state seen on entry */ - if (host_data_test_flag(HOST_SME_ENABLED)) - sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_SMEN); - else - sysreg_clear_set(CPACR_EL1, - CPACR_EL1_SMEN_EL0EN, - CPACR_EL1_SMEN_EL1EN); - isb(); - } - if (guest_owns_fp_regs()) { if (vcpu_has_sve(vcpu)) { u64 zcr = read_sysreg_el1(SYS_ZCR); -- GitLab From ee14db31a9c84e65f5adfd45598760d851f1d817 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:23 +0000 Subject: [PATCH 0625/1585] KVM: arm64: Refactor CPTR trap deactivation For historical reasons, the VHE and nVHE/hVHE implementations of __activate_cptr_traps() pair with a common implementation of __kvm_reset_cptr_el2(), which ideally would be named __deactivate_cptr_traps(). Rename __kvm_reset_cptr_el2() to __deactivate_cptr_traps(), and split it into separate VHE and nVHE/hVHE variants so that each can be paired with its corresponding implementation of __activate_cptr_traps(). At the same time, fold kvm_write_cptr_el2() into its callers. This makes it clear in-context whether a write is made to the CPACR_EL1 encoding or the CPTR_EL2 encoding, and removes the possibility of confusion as to whether kvm_write_cptr_el2() reformats the sysreg fields as cpacr_clear_set() does. In the nVHE/hVHE implementation of __activate_cptr_traps(), placing the sysreg writes within the if-else blocks requires that the call to __activate_traps_fpsimd32() is moved earlier, but as this was always called before writing to CPTR_EL2/CPACR_EL1, this should not result in a functional change. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-6-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 42 ---------------------------- arch/arm64/kvm/hyp/nvhe/switch.c | 35 ++++++++++++++++++++--- arch/arm64/kvm/hyp/vhe/switch.c | 12 +++++++- 3 files changed, 42 insertions(+), 47 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 47f2cf408eeda..78ec1ef2cfe82 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -605,48 +605,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) __cpacr_to_cptr_set(clr, set));\ } while (0) -static __always_inline void kvm_write_cptr_el2(u64 val) -{ - if (has_vhe() || has_hvhe()) - write_sysreg(val, cpacr_el1); - else - write_sysreg(val, cptr_el2); -} - -/* Resets the value of cptr_el2 when returning to the host. */ -static __always_inline void __kvm_reset_cptr_el2(struct kvm *kvm) -{ - u64 val; - - if (has_vhe()) { - val = (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN); - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN_EL1EN; - } else if (has_hvhe()) { - val = CPACR_EL1_FPEN; - - if (!kvm_has_sve(kvm) || !guest_owns_fp_regs()) - val |= CPACR_EL1_ZEN; - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN; - } else { - val = CPTR_NVHE_EL2_RES1; - - if (kvm_has_sve(kvm) && guest_owns_fp_regs()) - val |= CPTR_EL2_TZ; - if (!cpus_have_final_cap(ARM64_SME)) - val |= CPTR_EL2_TSM; - } - - kvm_write_cptr_el2(val); -} - -#ifdef __KVM_NVHE_HYPERVISOR__ -#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2(kern_hyp_va((v)->kvm)) -#else -#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2((v)->kvm) -#endif - /* * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE * format if E2H isn't set. diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 7a2d189176249..5d79f63a4f861 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -39,6 +39,9 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) { u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ + if (!guest_owns_fp_regs()) + __activate_traps_fpsimd32(vcpu); + if (has_hvhe()) { val |= CPACR_EL1_TTA; @@ -47,6 +50,8 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN; } + + write_sysreg(val, cpacr_el1); } else { val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; @@ -61,12 +66,34 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) if (!guest_owns_fp_regs()) val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); } +} - if (!guest_owns_fp_regs()) - __activate_traps_fpsimd32(vcpu); +static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); - kvm_write_cptr_el2(val); + if (has_hvhe()) { + u64 val = CPACR_EL1_FPEN; + + if (!kvm_has_sve(kvm) || !guest_owns_fp_regs()) + val |= CPACR_EL1_ZEN; + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN; + + write_sysreg(val, cpacr_el1); + } else { + u64 val = CPTR_NVHE_EL2_RES1; + + if (kvm_has_sve(kvm) && guest_owns_fp_regs()) + val |= CPTR_EL2_TZ; + if (!cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; + + write_sysreg(val, cptr_el2); + } } static void __activate_traps(struct kvm_vcpu *vcpu) @@ -119,7 +146,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); - kvm_reset_cptr_el2(vcpu); + __deactivate_cptr_traps(vcpu); write_sysreg(__kvm_hyp_host_vector, vbar_el2); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index e8a07d4bb546b..4748b1947ffa0 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -136,6 +136,16 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) write_sysreg(val, cpacr_el1); } +static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN; + + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN_EL1EN; + + write_sysreg(val, cpacr_el1); +} + static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -207,7 +217,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) */ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); - kvm_reset_cptr_el2(vcpu); + __deactivate_cptr_traps(vcpu); if (!arm64_kernel_unmapped_at_el0()) host_vectors = __this_cpu_read(this_cpu_vector); -- GitLab From 9b66195063c5a145843547b1d692bd189be85287 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:24 +0000 Subject: [PATCH 0626/1585] KVM: arm64: Refactor exit handlers The hyp exit handling logic is largely shared between VHE and nVHE/hVHE, with common logic in arch/arm64/kvm/hyp/include/hyp/switch.h. The code in the header depends on function definitions provided by arch/arm64/kvm/hyp/vhe/switch.c and arch/arm64/kvm/hyp/nvhe/switch.c when they include the header. This is an unusual header dependency, and prevents the use of arch/arm64/kvm/hyp/include/hyp/switch.h in other files as this would result in compiler warnings regarding missing definitions, e.g. | In file included from arch/arm64/kvm/hyp/nvhe/hyp-main.c:8: | ./arch/arm64/kvm/hyp/include/hyp/switch.h:733:31: warning: 'kvm_get_exit_handler_array' used but never defined | 733 | static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); | | ^~~~~~~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:735:13: warning: 'early_exit_filter' used but never defined | 735 | static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); | | ^~~~~~~~~~~~~~~~~ Refactor the logic such that the header doesn't depend on anything from the C files. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-7-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 30 +++++-------------------- arch/arm64/kvm/hyp/nvhe/switch.c | 28 +++++++++++++---------- arch/arm64/kvm/hyp/vhe/switch.c | 9 ++++---- 3 files changed, 26 insertions(+), 41 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index c5b8a11ac4f50..46df5c2eeaf57 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -679,23 +679,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); - -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); - /* * Allow the hypervisor to handle the exit with an exit handler if it has one. * * Returns true if the hypervisor handled the exit, and control should go back * to the guest, or false if it hasn't. */ -static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code, + const exit_handler_fn *handlers) { - const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); - exit_handler_fn fn; - - fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; - + exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; if (fn) return fn(vcpu, exit_code); @@ -725,20 +718,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code * the guest, false when we should restore the host state and return to the * main run loop. */ -static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code, + const exit_handler_fn *handlers) { - /* - * Save PSTATE early so that we can evaluate the vcpu mode - * early on. - */ - synchronize_vcpu_pstate(vcpu, exit_code); - - /* - * Check whether we want to repaint the state one way or - * another. - */ - early_exit_filter(vcpu, exit_code); - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); @@ -768,7 +750,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) goto exit; /* Check if there's an exit handler and allow it to handle the exit. */ - if (kvm_hyp_handle_exit(vcpu, exit_code)) + if (kvm_hyp_handle_exit(vcpu, exit_code, handlers)) goto guest; exit: /* Return to the host kernel and handle the exit */ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 5d79f63a4f861..69d7d3b4294a7 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -250,19 +250,21 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) return hyp_exit_handlers; } -/* - * Some guests (e.g., protected VMs) are not be allowed to run in AArch32. - * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a - * guest from dropping to AArch32 EL0 if implemented by the CPU. If the - * hypervisor spots a guest in such a state ensure it is handled, and don't - * trust the host to spot or fix it. The check below is based on the one in - * kvm_arch_vcpu_ioctl_run(). - * - * Returns false if the guest ran in AArch32 when it shouldn't have, and - * thus should exit to the host, or true if a the guest run loop can continue. - */ -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { + const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); + + synchronize_vcpu_pstate(vcpu, exit_code); + + /* + * Some guests (e.g., protected VMs) are not be allowed to run in + * AArch32. The ARMv8 architecture does not give the hypervisor a + * mechanism to prevent a guest from dropping to AArch32 EL0 if + * implemented by the CPU. If the hypervisor spots a guest in such a + * state ensure it is handled, and don't trust the host to spot or fix + * it. The check below is based on the one in + * kvm_arch_vcpu_ioctl_run(). + */ if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { /* * As we have caught the guest red-handed, decide that it isn't @@ -275,6 +277,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); *exit_code |= ARM_EXCEPTION_IL; } + + return __fixup_guest_exit(vcpu, exit_code, handlers); } /* Switch to the guest for legacy non-VHE systems */ diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 4748b1947ffa0..c854d84458892 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -540,13 +540,10 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - return hyp_exit_handlers; -} + synchronize_vcpu_pstate(vcpu, exit_code); -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) -{ /* * If we were in HYP context on entry, adjust the PSTATE view * so that the usual helpers work correctly. @@ -566,6 +563,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) *vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT); *vcpu_cpsr(vcpu) |= mode; } + + return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers); } /* Switch to the guest for VHE systems running in EL2 */ -- GitLab From f9dd00de1e53a47763dfad601635d18542c3836d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:25 +0000 Subject: [PATCH 0627/1585] KVM: arm64: Mark some header functions as inline The shared hyp switch header has a number of static functions which might not be used by all files that include the header, and when unused they will provoke compiler warnings, e.g. | In file included from arch/arm64/kvm/hyp/nvhe/hyp-main.c:8: | ./arch/arm64/kvm/hyp/include/hyp/switch.h:703:13: warning: 'kvm_hyp_handle_dabt_low' defined but not used [-Wunused-function] | 703 | static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:682:13: warning: 'kvm_hyp_handle_cp15_32' defined but not used [-Wunused-function] | 682 | static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:662:13: warning: 'kvm_hyp_handle_sysreg' defined but not used [-Wunused-function] | 662 | static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:458:13: warning: 'kvm_hyp_handle_fpsimd' defined but not used [-Wunused-function] | 458 | static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:329:13: warning: 'kvm_hyp_handle_mops' defined but not used [-Wunused-function] | 329 | static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~ Mark these functions as 'inline' to suppress this warning. This shouldn't result in any functional change. At the same time, avoid the use of __alias() in the header and alias kvm_hyp_handle_iabt_low() and kvm_hyp_handle_watchpt_low() to kvm_hyp_handle_memory_fault() using CPP, matching the style in the rest of the kernel. For consistency, kvm_hyp_handle_memory_fault() is also marked as 'inline'. Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-8-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 46df5c2eeaf57..163867f7f7c52 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -326,7 +326,7 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); } -static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2); @@ -404,7 +404,7 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) * If FP/SIMD is not implemented, handle the trap and inject an undefined * instruction exception to the guest. Similarly for trapped SVE accesses. */ -static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) { bool sve_guest; u8 esr_ec; @@ -608,7 +608,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu) return true; } -static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) { if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && handle_tx2_tvm(vcpu)) @@ -628,7 +628,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) { if (static_branch_unlikely(&vgic_v3_cpuif_trap) && __vgic_v3_perform_cpuif_access(vcpu) == 1) @@ -637,19 +637,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, + u64 *exit_code) { if (!__populate_fault_info(vcpu)) return true; return false; } -static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) - __alias(kvm_hyp_handle_memory_fault); -static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) - __alias(kvm_hyp_handle_memory_fault); +#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault +#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault -static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) { if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) return true; -- GitLab From 59419f10045bc955d2229819c7cf7a8b0b9c5b59 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:26 +0000 Subject: [PATCH 0628/1585] KVM: arm64: Eagerly switch ZCR_EL{1,2} In non-protected KVM modes, while the guest FPSIMD/SVE/SME state is live on the CPU, the host's active SVE VL may differ from the guest's maximum SVE VL: * For VHE hosts, when a VM uses NV, ZCR_EL2 contains a value constrained by the guest hypervisor, which may be less than or equal to that guest's maximum VL. Note: in this case the value of ZCR_EL1 is immaterial due to E2H. * For nVHE/hVHE hosts, ZCR_EL1 contains a value written by the guest, which may be less than or greater than the guest's maximum VL. Note: in this case hyp code traps host SVE usage and lazily restores ZCR_EL2 to the host's maximum VL, which may be greater than the guest's maximum VL. This can be the case between exiting a guest and kvm_arch_vcpu_put_fp(). If a softirq is taken during this period and the softirq handler tries to use kernel-mode NEON, then the kernel will fail to save the guest's FPSIMD/SVE state, and will pend a SIGKILL for the current thread. This happens because kvm_arch_vcpu_ctxsync_fp() binds the guest's live FPSIMD/SVE state with the guest's maximum SVE VL, and fpsimd_save_user_state() verifies that the live SVE VL is as expected before attempting to save the register state: | if (WARN_ON(sve_get_vl() != vl)) { | force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); | return; | } Fix this and make this a bit easier to reason about by always eagerly switching ZCR_EL{1,2} at hyp during guest<->host transitions. With this happening, there's no need to trap host SVE usage, and the nVHE/nVHE __deactivate_cptr_traps() logic can be simplified to enable host access to all present FPSIMD/SVE/SME features. In protected nVHE/hVHE modes, the host's state is always saved/restored by hyp, and the guest's state is saved prior to exit to the host, so from the host's PoV the guest never has live FPSIMD/SVE/SME state, and the host's ZCR_EL1 is never clobbered by hyp. Fixes: 8c8010d69c132273 ("KVM: arm64: Save/restore SVE state for nVHE") Fixes: 2e3cf82063a00ea0 ("KVM: arm64: nv: Ensure correct VL is loaded before saving SVE state") Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-9-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 30 ------------- arch/arm64/kvm/hyp/entry.S | 5 +++ arch/arm64/kvm/hyp/include/hyp/switch.h | 59 +++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 13 +++--- arch/arm64/kvm/hyp/nvhe/switch.c | 6 +-- arch/arm64/kvm/hyp/vhe/switch.c | 4 ++ 6 files changed, 76 insertions(+), 41 deletions(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index f64724197958e..3cbb999419af7 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -136,36 +136,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); if (guest_owns_fp_regs()) { - if (vcpu_has_sve(vcpu)) { - u64 zcr = read_sysreg_el1(SYS_ZCR); - - /* - * If the vCPU is in the hyp context then ZCR_EL1 is - * loaded with its vEL2 counterpart. - */ - __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr; - - /* - * Restore the VL that was saved when bound to the CPU, - * which is the maximum VL for the guest. Because the - * layout of the data when saving the sve state depends - * on the VL, we need to use a consistent (i.e., the - * maximum) VL. - * Note that this means that at guest exit ZCR_EL1 is - * not necessarily the same as on guest entry. - * - * ZCR_EL2 holds the guest hypervisor's VL when running - * a nested guest, which could be smaller than the - * max for the vCPU. Similar to above, we first need to - * switch to a VL consistent with the layout of the - * vCPU's SVE state. KVM support for NV implies VHE, so - * using the ZCR_EL1 alias is safe. - */ - if (!has_vhe() || (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) - sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, - SYS_ZCR_EL1); - } - /* * Flush (save and invalidate) the fpsimd/sve state so that if * the host tries to use fpsimd/sve, it's not using stale data diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 4433a234aa9ba..9f4e8d68ab505 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN alternative_else_nop_endif mrs x1, isr_el1 cbz x1, 1f + + // Ensure that __guest_enter() always provides a context + // synchronization event so that callers don't need ISBs for anything + // that would usually be synchonized by the ERET. + isb mov x0, #ARM_EXCEPTION_IRQ ret diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 163867f7f7c52..f5e882a358e2d 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -375,6 +375,65 @@ static inline void __hyp_sve_save_host(void) true); } +static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs()) + return; + + if (vcpu_has_sve(vcpu)) { + /* A guest hypervisor may restrict the effective max VL. */ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) + zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2); + else + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)); + write_sysreg_el1(zcr_el1, SYS_ZCR); + } +} + +static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs()) + return; + + /* + * When the guest owns the FP regs, we know that guest+hyp traps for + * any FPSIMD/SVE/SME features exposed to the guest have been disabled + * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd() + * prior to __guest_entry(). As __guest_entry() guarantees a context + * synchronization event, we don't need an ISB here to avoid taking + * traps for anything that was exposed to the guest. + */ + if (vcpu_has_sve(vcpu)) { + zcr_el1 = read_sysreg_el1(SYS_ZCR); + __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1; + + /* + * The guest's state is always saved using the guest's max VL. + * Ensure that the host has the guest's max VL active such that + * the host can save the guest's state lazily, but don't + * artificially restrict the host to the guest's max VL. + */ + if (has_vhe()) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + } else { + zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el1(zcr_el1, SYS_ZCR); + } + } +} + static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) { /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 1a334a38d8fd2..2c37680d954cf 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -5,6 +5,7 @@ */ #include +#include #include #include @@ -224,8 +225,12 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) sync_hyp_vcpu(hyp_vcpu); } else { + struct kvm_vcpu *vcpu = kern_hyp_va(host_vcpu); + /* The host is fully trusted, run its vCPU directly. */ - ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu)); + fpsimd_lazy_switch_to_guest(vcpu); + ret = __kvm_vcpu_run(vcpu); + fpsimd_lazy_switch_to_host(vcpu); } out: cpu_reg(host_ctxt, 1) = ret; @@ -675,12 +680,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SMC64: handle_host_smc(host_ctxt); break; - case ESR_ELx_EC_SVE: - cpacr_clear_set(0, CPACR_EL1_ZEN); - isb(); - sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, - SYS_ZCR_EL2); - break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: handle_host_mem_abort(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 69d7d3b4294a7..7d2ba6ef02618 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -73,12 +73,10 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) { - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - if (has_hvhe()) { u64 val = CPACR_EL1_FPEN; - if (!kvm_has_sve(kvm) || !guest_owns_fp_regs()) + if (cpus_have_final_cap(ARM64_SVE)) val |= CPACR_EL1_ZEN; if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN; @@ -87,7 +85,7 @@ static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) } else { u64 val = CPTR_NVHE_EL2_RES1; - if (kvm_has_sve(kvm) && guest_owns_fp_regs()) + if (!cpus_have_final_cap(ARM64_SVE)) val |= CPTR_EL2_TZ; if (!cpus_have_final_cap(ARM64_SME)) val |= CPTR_EL2_TSM; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index c854d84458892..647737d6e8d0b 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -579,6 +579,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_save_host_state_vhe(host_ctxt); + fpsimd_lazy_switch_to_guest(vcpu); + /* * Note that ARM erratum 1165522 requires us to configure both stage 1 * and stage 2 translation for the guest context before we clear @@ -603,6 +605,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); + fpsimd_lazy_switch_to_host(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); if (guest_owns_fp_regs()) -- GitLab From 332b7e6d62b7a3a988017f5184e547aa20e3a19a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 13 Feb 2025 09:15:31 +0000 Subject: [PATCH 0629/1585] KVM: arm64: Simplify warning in kvm_arch_vcpu_load_fp() At the end of kvm_arch_vcpu_load_fp() we check that no bits are set in SVCR. We only check this for protected mode despite this mattering equally for non-protected mode, and the comment above this is confusing. Remove the comment and simplify the check, moving from WARN_ON() to WARN_ON_ONCE() to avoid spamming the log. Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 3cbb999419af7..7f6e43d256915 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -65,12 +65,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) fpsimd_save_and_flush_cpu_state(); *host_data_ptr(fp_owner) = FP_STATE_FREE; - /* - * If normal guests gain SME support, maintain this behavior for pKVM - * guests, which don't support SME. - */ - WARN_ON(is_protected_kvm_enabled() && system_supports_sme() && - read_sysreg_s(SYS_SVCR)); + WARN_ON_ONCE(system_supports_sme() && read_sysreg_s(SYS_SVCR)); } /* -- GitLab From 65729da9ce37f5a2c62e2542ef03bc9ac6775a7d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 12 Feb 2025 17:34:54 +0000 Subject: [PATCH 0630/1585] KVM: arm64: Convert timer offset VA when accessed in HYP code Now that EL2 has gained some early timer emulation, it accesses the offsets pointed to by the timer structure, both of which live in the KVM structure. Of course, these are *kernel* pointers, so the dereferencing of these pointers in non-kernel code must be itself be offset. Given switch.h its own version of timer_get_offset() and use that instead. Fixes: b86fc215dc26d ("KVM: arm64: Handle counter access early in non-HYP context") Reported-by: Linux Kernel Functional Testing Reviewed-by: Oliver Upton Tested-by: Anders Roxell Link: https://lore.kernel.org/r/20250212173454.2864462-1-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f5e882a358e2d..23bbe28eaaf95 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -581,9 +581,22 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) return true; } +/* Open-coded version of timer_get_offset() to allow for kern_hyp_va() */ +static inline u64 hyp_timer_get_offset(struct arch_timer_context *ctxt) +{ + u64 offset = 0; + + if (ctxt->offset.vm_offset) + offset += *kern_hyp_va(ctxt->offset.vm_offset); + if (ctxt->offset.vcpu_offset) + offset += *kern_hyp_va(ctxt->offset.vcpu_offset); + + return offset; +} + static inline u64 compute_counter_value(struct arch_timer_context *ctxt) { - return arch_timer_read_cntpct_el0() - timer_get_offset(ctxt); + return arch_timer_read_cntpct_el0() - hyp_timer_get_offset(ctxt); } static bool kvm_handle_cntxct(struct kvm_vcpu *vcpu) -- GitLab From b938731ed2d4eea8e268a27bfc600581fedae2a9 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 13 Feb 2025 15:36:14 +0000 Subject: [PATCH 0631/1585] KVM: arm64: Fix alignment of kvm_hyp_memcache allocations When allocating guest stage-2 page-table pages at EL2, pKVM can consume pages from the host-provided kvm_hyp_memcache. As pgtable.c expects zeroed pages, guest_s2_zalloc_page() actively implements this zeroing with a PAGE_SIZE memset. Unfortunately, we don't check the page alignment of the host-provided address before doing so, which could lead to the memset overrunning the page if the host was malicious. Fix this by simply force-aligning all kvm_hyp_memcache allocations to page boundaries. Fixes: 60dfe093ec13 ("KVM: arm64: Instantiate guest stage-2 page-tables at EL2") Reported-by: Ben Simner Signed-off-by: Quentin Perret Link: https://lore.kernel.org/r/20250213153615.3642515-1-qperret@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c77acc9904576..3a7ec98ef1238 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -100,7 +100,7 @@ static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc, void *(*to_va)(phys_addr_t phys)) { - phys_addr_t *p = to_va(mc->head); + phys_addr_t *p = to_va(mc->head & PAGE_MASK); if (!mc->nr_pages) return NULL; -- GitLab From 540cda75884a6ba4c289980c84392261b1f61a9c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Feb 2025 11:21:24 +0000 Subject: [PATCH 0632/1585] rxrpc: Fix ipv6 path MTU discovery rxrpc path MTU discovery currently only makes use of ICMPv4, but not ICMPv6, which means that pmtud for IPv6 doesn't work correctly. Fix it to check for ICMPv6 messages also. Fixes: eeaedc5449d9 ("rxrpc: Implement path-MTU probing using padded PING ACKs (RFC8899)") Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/3517283.1739359284@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/peer_event.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index e874c31fa9012..bc283da9ee402 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -169,6 +169,13 @@ void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb) goto out; } + if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 && + serr->ee.ee_type == ICMPV6_PKT_TOOBIG && + serr->ee.ee_code == 0)) { + rxrpc_adjust_mtu(peer, serr->ee.ee_info); + goto out; + } + rxrpc_store_error(peer, skb); out: rxrpc_put_peer(peer, rxrpc_peer_put_input_error); -- GitLab From 488fb6effe03e20f38d34da7425de77bbd3e2665 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 12 Feb 2025 16:17:51 +0100 Subject: [PATCH 0633/1585] net: pse-pd: Fix deadlock in current limit functions Fix a deadlock in pse_pi_get_current_limit and pse_pi_set_current_limit caused by consecutive mutex_lock calls. One in the function itself and another in pse_pi_get_voltage. Resolve the issue by using the unlocked version of pse_pi_get_voltage instead. Fixes: e0a5e2bba38a ("net: pse-pd: Use power limit at driver side instead of current limit") Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20250212151751.1515008-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pse_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index 4f2a54afc4d09..4602e26eb8c86 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -319,7 +319,7 @@ static int pse_pi_get_current_limit(struct regulator_dev *rdev) goto out; mW = ret; - ret = pse_pi_get_voltage(rdev); + ret = _pse_pi_get_voltage(rdev); if (!ret) { dev_err(pcdev->dev, "Voltage null\n"); ret = -ERANGE; @@ -356,7 +356,7 @@ static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA, id = rdev_get_id(rdev); mutex_lock(&pcdev->lock); - ret = pse_pi_get_voltage(rdev); + ret = _pse_pi_get_voltage(rdev); if (!ret) { dev_err(pcdev->dev, "Voltage null\n"); ret = -ERANGE; -- GitLab From e6e3e0022ef8f1d584ee4d5b89dca02472c5eb1f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 12 Feb 2025 18:25:57 +0000 Subject: [PATCH 0634/1585] KVM: arm64: timer: Drop warning on failed interrupt signalling We currently spit out a warning if making a timer interrupt pending fails. But not only this is loud and easy to trigger from userspace, we also fail to do anything useful with that information. Dropping the warning is the easiest thing to do for now. We can always add error reporting if we really want in the future. Reported-by: Alexander Potapenko Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250212182558.2865232-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 231c0cd9c7b4b..70802e4c91cf5 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -447,21 +447,19 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, struct arch_timer_context *timer_ctx) { - int ret; - kvm_timer_update_status(timer_ctx, new_level); timer_ctx->irq.level = new_level; trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), timer_ctx->irq.level); - if (!userspace_irqchip(vcpu->kvm)) { - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, - timer_irq(timer_ctx), - timer_ctx->irq.level, - timer_ctx); - WARN_ON(ret); - } + if (userspace_irqchip(vcpu->kvm)) + return; + + kvm_vgic_inject_irq(vcpu->kvm, vcpu, + timer_irq(timer_ctx), + timer_ctx->irq.level, + timer_ctx); } /* Only called for a fully emulated timer */ -- GitLab From b3aa9283c0c505b5cfd25f7d6cfd720de2adc807 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 12 Feb 2025 18:25:58 +0000 Subject: [PATCH 0635/1585] KVM: arm64: vgic: Hoist SGI/PPI alloc from vgic_init() to kvm_create_vgic() If userspace creates vcpus, then a vgic, we end-up in a situation where irqchip_in_kernel() will return true, but no private interrupt has been allocated for these vcpus. This situation will continue until userspace initialises the vgic, at which point we fix the early vcpus. Should a vcpu run or be initialised in the interval, bad things may happen. An obvious solution is to move this fix-up phase to the point where the vgic is created. This ensures that from that point onwards, all vcpus have their private interrupts, as new vcpus will directly allocate them. With that, we have the invariant that when irqchip_in_kernel() is true, all vcpus have their private interrupts. Reported-by: Alexander Potapenko Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250212182558.2865232-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 74 ++++++++++++++++----------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index bc7e22ab5d812..775461cf2d2db 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -34,9 +34,9 @@ * * CPU Interface: * - * - kvm_vgic_vcpu_init(): initialization of static data that - * doesn't depend on any sizing information or emulation type. No - * allocation is allowed there. + * - kvm_vgic_vcpu_init(): initialization of static data that doesn't depend + * on any sizing information. Private interrupts are allocated if not + * already allocated at vgic-creation time. */ /* EARLY INIT */ @@ -58,6 +58,8 @@ void kvm_vgic_early_init(struct kvm *kvm) /* CREATION */ +static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type); + /** * kvm_vgic_create: triggered by the instantiation of the VGIC device by * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only) @@ -112,6 +114,22 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) goto out_unlock; } + kvm_for_each_vcpu(i, vcpu, kvm) { + ret = vgic_allocate_private_irqs_locked(vcpu, type); + if (ret) + break; + } + + if (ret) { + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + kfree(vgic_cpu->private_irqs); + vgic_cpu->private_irqs = NULL; + } + + goto out_unlock; + } + kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; @@ -180,7 +198,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) return 0; } -static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu) +static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int i; @@ -218,17 +236,28 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu) /* PPIs */ irq->config = VGIC_CONFIG_LEVEL; } + + switch (type) { + case KVM_DEV_TYPE_ARM_VGIC_V3: + irq->group = 1; + irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + irq->group = 0; + irq->targets = BIT(vcpu->vcpu_id); + break; + } } return 0; } -static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu) +static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu, u32 type) { int ret; mutex_lock(&vcpu->kvm->arch.config_lock); - ret = vgic_allocate_private_irqs_locked(vcpu); + ret = vgic_allocate_private_irqs_locked(vcpu, type); mutex_unlock(&vcpu->kvm->arch.config_lock); return ret; @@ -258,7 +287,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) if (!irqchip_in_kernel(vcpu->kvm)) return 0; - ret = vgic_allocate_private_irqs(vcpu); + ret = vgic_allocate_private_irqs(vcpu, dist->vgic_model); if (ret) return ret; @@ -295,7 +324,7 @@ int vgic_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; - int ret = 0, i; + int ret = 0; unsigned long idx; lockdep_assert_held(&kvm->arch.config_lock); @@ -315,35 +344,6 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; - /* Initialize groups on CPUs created before the VGIC type was known */ - kvm_for_each_vcpu(idx, vcpu, kvm) { - ret = vgic_allocate_private_irqs_locked(vcpu); - if (ret) - goto out; - - for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); - - switch (dist->vgic_model) { - case KVM_DEV_TYPE_ARM_VGIC_V3: - irq->group = 1; - irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); - break; - case KVM_DEV_TYPE_ARM_VGIC_V2: - irq->group = 0; - irq->targets = 1U << idx; - break; - default: - ret = -EINVAL; - } - - vgic_put_irq(kvm, irq); - - if (ret) - goto out; - } - } - /* * If we have GICv4.1 enabled, unconditionally request enable the * v4 support so that we get HW-accelerated vSGIs. Otherwise, only -- GitLab From 320702a76186222426e5dc8efb9d68ba9d4ed0ab Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 13 Feb 2025 17:29:51 +0100 Subject: [PATCH 0636/1585] MAINTAINERS: delete entry for AXXIA I2C The maintainer's email address bounced and he wasn't active for 4 years. Delete this entry and fall back to the generic I2C host drivers entry. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250213162950.45596-2-wsa+renesas@sang-engineering.com Signed-off-by: Andi Shyti --- MAINTAINERS | 7 ------- 1 file changed, 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 25c86f47353de..fc332fbf3958c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3859,13 +3859,6 @@ W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml F: drivers/pwm/pwm-axi-pwmgen.c -AXXIA I2C CONTROLLER -M: Krzysztof Adamski -L: linux-i2c@vger.kernel.org -S: Maintained -F: Documentation/devicetree/bindings/i2c/i2c-axxia.txt -F: drivers/i2c/busses/i2c-axxia.c - AZ6007 DVB DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -- GitLab From 7422c319fd805b956aab5ba93e0274517a8e3650 Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Savaliya Date: Thu, 23 Jan 2025 14:11:47 +0530 Subject: [PATCH 0637/1585] MAINTAINERS: Add maintainer for Qualcomm's I2C GENI driver Add a new entry for the I2C QCOM GENI driver to the MAINTAINERS file. This entry includes the maintainer's name and contact information, ensuring proper maintainership and communication for the i2c-qcom-geni driver file. Signed-off-by: Mukesh Kumar Savaliya Link: https://lore.kernel.org/r/20250123084147.3632023-1-quic_msavaliy@quicinc.com Signed-off-by: Andi Shyti --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fc332fbf3958c..db7e533c466c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19503,6 +19503,15 @@ L: dmaengine@vger.kernel.org S: Supported F: drivers/dma/qcom/hidma* +QUALCOMM I2C QCOM GENI DRIVER +M: Mukesh Kumar Savaliya +M: Viken Dadhaniya +L: linux-i2c@vger.kernel.org +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml +F: drivers/i2c/busses/i2c-qcom-geni.c + QUALCOMM I2C CCI DRIVER M: Loic Poulain M: Robert Foss -- GitLab From 325735e83d7d0016e7b61069df2570e910898466 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Fri, 14 Feb 2025 09:30:21 +0800 Subject: [PATCH 0638/1585] ALSA: hda/tas2781: Fix index issue in tas2781 hda SPI driver Correct wrong mask for device index. Signed-off-by: Baojun Xu Fixes: bb5f86ea50ff ("ALSA: hda/tas2781: Add tas2781 hda SPI driver") Link: https://patch.msgid.link/20250214013021.6072-1-baojun.xu@ti.com Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_spi_fwlib.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/tas2781_spi_fwlib.c b/sound/pci/hda/tas2781_spi_fwlib.c index 0e2acbc3c9001..131d9a77d1406 100644 --- a/sound/pci/hda/tas2781_spi_fwlib.c +++ b/sound/pci/hda/tas2781_spi_fwlib.c @@ -2,7 +2,7 @@ // // TAS2781 HDA SPI driver // -// Copyright 2024 Texas Instruments, Inc. +// Copyright 2024-2025 Texas Instruments, Inc. // // Author: Baojun Xu @@ -771,19 +771,19 @@ static int tasdevice_process_block(void *context, unsigned char *data, switch (subblk_typ) { case TASDEVICE_CMD_SING_W: subblk_offset = tasdevice_single_byte_wr(tas_priv, - dev_idx & 0x4f, data, sublocksize); + dev_idx & 0x3f, data, sublocksize); break; case TASDEVICE_CMD_BURST: subblk_offset = tasdevice_burst_wr(tas_priv, - dev_idx & 0x4f, data, sublocksize); + dev_idx & 0x3f, data, sublocksize); break; case TASDEVICE_CMD_DELAY: subblk_offset = tasdevice_delay(tas_priv, - dev_idx & 0x4f, data, sublocksize); + dev_idx & 0x3f, data, sublocksize); break; case TASDEVICE_CMD_FIELD_W: subblk_offset = tasdevice_field_wr(tas_priv, - dev_idx & 0x4f, data, sublocksize); + dev_idx & 0x3f, data, sublocksize); break; default: subblk_offset = 2; -- GitLab From 822b7ec657e99b44b874e052d8540d8b54fe8569 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Thu, 13 Feb 2025 15:45:43 +0800 Subject: [PATCH 0639/1585] ALSA: hda: Add error check for snd_ctl_rename_id() in snd_hda_create_dig_out_ctls() Check the return value of snd_ctl_rename_id() in snd_hda_create_dig_out_ctls(). Ensure that failures are properly handled. [ Note: the error cannot happen practically because the only error condition in snd_ctl_rename_id() is the missing ID, but this is a rename, hence it must be present. But for the code consistency, it's safer to have always the proper return check -- tiwai ] Fixes: 5c219a340850 ("ALSA: hda: Fix kctl->id initialization") Cc: stable@vger.kernel.org # 6.4+ Signed-off-by: Wentao Liang Link: https://patch.msgid.link/20250213074543.1620-1-vulab@iscas.ac.cn Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 14763c0f31ad9..46a2204049993 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2470,7 +2470,9 @@ int snd_hda_create_dig_out_ctls(struct hda_codec *codec, break; id = kctl->id; id.index = spdif_index; - snd_ctl_rename_id(codec->card, &kctl->id, &id); + err = snd_ctl_rename_id(codec->card, &kctl->id, &id); + if (err < 0) + return err; } bus->primary_dig_out_type = HDA_PCM_TYPE_HDMI; } -- GitLab From ef75966abf950c0539534effa4960caa29fb7167 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 27 Jan 2025 09:44:11 +0000 Subject: [PATCH 0640/1585] iommu/amd: Expicitly enable CNTRL.EPHEn bit in resume path With recent kernel, AMDGPU failed to resume after suspend on certain laptop. Sample log: ----------- Nov 14 11:52:19 Thinkbook kernel: iommu ivhd0: AMD-Vi: Event logged [ILLEGAL_DEV_TABLE_ENTRY device=0000:06:00.0 pasid=0x00000 address=0x135300000 flags=0x0080] Nov 14 11:52:19 Thinkbook kernel: AMD-Vi: DTE[0]: 7d90000000000003 Nov 14 11:52:19 Thinkbook kernel: AMD-Vi: DTE[1]: 0000100103fc0009 Nov 14 11:52:19 Thinkbook kernel: AMD-Vi: DTE[2]: 2000000117840013 Nov 14 11:52:19 Thinkbook kernel: AMD-Vi: DTE[3]: 0000000000000000 This is because in resume path, CNTRL[EPHEn] is not set. Fix this by setting CNTRL[EPHEn] to 1 in resume path if EFR[EPHSUP] is set. Note May be better approach is to save the control register in suspend path and restore it in resume path instead of trying to set indivisual bits. We will have separate patch for that. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219499 Fixes: c4cb23111103 ("iommu/amd: Add support for enable/disable IOPF") Tested-by: Hamish McIntyre-Bhatty Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20250127094411.5931-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/init.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 0bbda60d3cdc7..23caea22f8dcd 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -175,6 +175,7 @@ #define CONTROL_GAM_EN 25 #define CONTROL_GALOG_EN 28 #define CONTROL_GAINT_EN 29 +#define CONTROL_EPH_EN 45 #define CONTROL_XT_EN 50 #define CONTROL_INTCAPXT_EN 51 #define CONTROL_IRTCACHEDIS 59 diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c5cd92edada06..438848b0682fe 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2653,6 +2653,10 @@ static void iommu_init_flags(struct amd_iommu *iommu) /* Set IOTLB invalidation timeout to 1s */ iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); + + /* Enable Enhanced Peripheral Page Request Handling */ + if (check_feature(FEATURE_EPHSUP)) + iommu_feature_enable(iommu, CONTROL_EPH_EN); } static void iommu_apply_resume_quirks(struct amd_iommu *iommu) -- GitLab From 78be7f04537fa35f6cc694879e9a475ca1984936 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 28 Jan 2025 19:05:21 +0000 Subject: [PATCH 0641/1585] iommu: Fix a spelling error Fix spelling error IDENITY -> IDENTITY in drivers/iommu/iommu.c. Signed-off-by: Easwar Hariharan Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250128190522.70800-1-eahariha@linux.microsoft.com [ joro: Add commit message ] Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 870c3cdbd0f62..60aed01e54f27 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1756,7 +1756,7 @@ static int iommu_get_def_domain_type(struct iommu_group *group, group->id); /* - * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY + * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY * takes precedence. */ if (type == IOMMU_DOMAIN_IDENTITY) -- GitLab From 4a8991fe9cd0b6a509bab3d056700d3520601d86 Mon Sep 17 00:00:00 2001 From: Andrew Kreimer Date: Mon, 10 Feb 2025 13:20:04 +0200 Subject: [PATCH 0642/1585] iommu/exynos: Fix typos There are some typos in comments/messages: - modyfying -> modifying - Unabled -> Unable Fix them via codespell. Signed-off-by: Andrew Kreimer Link: https://lore.kernel.org/r/20250210112027.29791-1-algonell@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c666ecab955d2..69e23e017d9e5 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -249,7 +249,7 @@ struct exynos_iommu_domain { struct list_head clients; /* list of sysmmu_drvdata.domain_node */ sysmmu_pte_t *pgtable; /* lv1 page table, 16KB */ short *lv2entcnt; /* free lv2 entry counter for each section */ - spinlock_t lock; /* lock for modyfying list of clients */ + spinlock_t lock; /* lock for modifying list of clients */ spinlock_t pgtablelock; /* lock for modifying page table @ pgtable */ struct iommu_domain domain; /* generic domain data structure */ }; @@ -292,7 +292,7 @@ struct sysmmu_drvdata { struct clk *aclk; /* SYSMMU's aclk clock */ struct clk *pclk; /* SYSMMU's pclk clock */ struct clk *clk_master; /* master's device clock */ - spinlock_t lock; /* lock for modyfying state */ + spinlock_t lock; /* lock for modifying state */ bool active; /* current status */ struct exynos_iommu_domain *domain; /* domain we belong to */ struct list_head domain_node; /* node for domain clients list */ @@ -746,7 +746,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0, dev_name(dev), data); if (ret) { - dev_err(dev, "Unabled to register handler of irq %d\n", irq); + dev_err(dev, "Unable to register handler of irq %d\n", irq); return ret; } -- GitLab From add43c4fbc92f8b48c1acd64e953af3b1be4cd9c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 11 Feb 2025 08:55:12 +0800 Subject: [PATCH 0643/1585] iommu/vt-d: Make intel_iommu_drain_pasid_prq() cover faults for RID This driver supports page faults on PCI RID since commit <9f831c16c69e> ("iommu/vt-d: Remove the pasid present check in prq_event_thread") by allowing the reporting of page faults with the pasid_present field cleared to the upper layer for further handling. The fundamental assumption here is that the detach or replace operations act as a fence for page faults. This implies that all pending page faults associated with a specific RID or PASID are flushed when a domain is detached or replaced from a device RID or PASID. However, the intel_iommu_drain_pasid_prq() helper does not correctly handle faults for RID. This leads to faults potentially remaining pending in the iommu hardware queue even after the domain is detached, thereby violating the aforementioned assumption. Fix this issue by extending intel_iommu_drain_pasid_prq() to cover faults for RID. Fixes: 9f831c16c69e ("iommu/vt-d: Remove the pasid present check in prq_event_thread") Cc: stable@vger.kernel.org Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250121023150.815972-1-baolu.lu@linux.intel.com Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20250211005512.985563-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/prq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c index c2d792db52c3e..064194399b38b 100644 --- a/drivers/iommu/intel/prq.c +++ b/drivers/iommu/intel/prq.c @@ -87,7 +87,9 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid) struct page_req_dsc *req; req = &iommu->prq[head / sizeof(*req)]; - if (!req->pasid_present || req->pasid != pasid) { + if (req->rid != sid || + (req->pasid_present && pasid != req->pasid) || + (!req->pasid_present && pasid != IOMMU_NO_PASID)) { head = (head + sizeof(*req)) & PRQ_RING_MASK; continue; } -- GitLab From e71f7f42e3c874ac3314b8f250e8416a706165af Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 2 Feb 2025 20:49:35 +0800 Subject: [PATCH 0644/1585] USB: pci-quirks: Fix HCCPARAMS register error for LS7A EHCI LS7A EHCI controller doesn't have extended capabilities, so the EECP (EHCI Extended Capabilities Pointer) field of HCCPARAMS register should be 0x0, but it reads as 0xa0 now. This is a hardware flaw and will be fixed in future, now just clear the EECP field to avoid error messages on boot: ...... [ 0.581675] pci 0000:00:04.1: EHCI: unrecognized capability ff [ 0.581699] pci 0000:00:04.1: EHCI: unrecognized capability ff [ 0.581716] pci 0000:00:04.1: EHCI: unrecognized capability ff [ 0.581851] pci 0000:00:04.1: EHCI: unrecognized capability ff ...... [ 0.581916] pci 0000:00:05.1: EHCI: unrecognized capability ff [ 0.581951] pci 0000:00:05.1: EHCI: unrecognized capability ff [ 0.582704] pci 0000:00:05.1: EHCI: unrecognized capability ff [ 0.582799] pci 0000:00:05.1: EHCI: unrecognized capability ff ...... Cc: stable Signed-off-by: Baoqi Zhang Signed-off-by: Huacai Chen Link: https://lore.kernel.org/r/20250202124935.480500-1-chenhuacai@loongson.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 1f9c1b1435d86..0404489c2f6a9 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -958,6 +958,15 @@ static void quirk_usb_disable_ehci(struct pci_dev *pdev) * booting from USB disk or using a usb keyboard */ hcc_params = readl(base + EHCI_HCC_PARAMS); + + /* LS7A EHCI controller doesn't have extended capabilities, the + * EECP (EHCI Extended Capabilities Pointer) field of HCCPARAMS + * register should be 0x0 but it reads as 0xa0. So clear it to + * avoid error messages on boot. + */ + if (pdev->vendor == PCI_VENDOR_ID_LOONGSON && pdev->device == 0x7a14) + hcc_params &= ~(0xffL << 8); + offset = (hcc_params >> 8) & 0xff; while (offset && --count) { pci_read_config_dword(pdev, offset, &cap); -- GitLab From c81d9fcd5b9402166048f377d4e5e0ee6f9ef26d Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 28 Jan 2025 10:45:29 +0100 Subject: [PATCH 0645/1585] usb: xhci: Restore xhci_pci support for Renesas HCs Some Renesas HCs require firmware upload to work, this is handled by the xhci_pci_renesas driver. Other variants of those chips load firmware from a SPI flash and are ready to work with xhci_pci alone. A refactor merged in v6.12 broke the latter configuration so that users are finding their hardware ignored by the normal driver and are forced to enable the firmware loader which isn't really necessary on their systems. Let xhci_pci work with those chips as before when the firmware loader is disabled by kernel configuration. Fixes: 25f51b76f90f ("xhci-pci: Make xhci-pci-renesas a proper modular driver") Cc: stable Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219616 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219726 Signed-off-by: Michal Pecio Tested-by: Nicolai Buchwitz Link: https://lore.kernel.org/r/20250128104529.58a79bfc@foxbook Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 2d1e205c14c60..ad0ff356f6fa0 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -653,8 +653,8 @@ int xhci_pci_common_probe(struct pci_dev *dev, const struct pci_device_id *id) } EXPORT_SYMBOL_NS_GPL(xhci_pci_common_probe, "xhci"); -static const struct pci_device_id pci_ids_reject[] = { - /* handled by xhci-pci-renesas */ +/* handled by xhci-pci-renesas if enabled */ +static const struct pci_device_id pci_ids_renesas[] = { { PCI_DEVICE(PCI_VENDOR_ID_RENESAS, 0x0014) }, { PCI_DEVICE(PCI_VENDOR_ID_RENESAS, 0x0015) }, { /* end: all zeroes */ } @@ -662,7 +662,8 @@ static const struct pci_device_id pci_ids_reject[] = { static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { - if (pci_match_id(pci_ids_reject, dev)) + if (IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS) && + pci_match_id(pci_ids_renesas, dev)) return -ENODEV; return xhci_pci_common_probe(dev, id); -- GitLab From e563b01208f4d1f609bcab13333b6c0e24ce6a01 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 12 Feb 2025 19:15:15 +0100 Subject: [PATCH 0646/1585] usb: cdc-acm: Check control transfer buffer size before access If the first fragment is shorter than struct usb_cdc_notification, we can't calculate an expected_size. Log an error and discard the notification instead of reading lengths from memory outside the received data, which can lead to memory corruption when the expected_size decreases between fragments, causing `expected_size - acm->nb_index` to wrap. This issue has been present since the beginning of git history; however, it only leads to memory corruption since commit ea2583529cd1 ("cdc-acm: reassemble fragmented notifications"). A mitigating factor is that acm_ctrl_irq() can only execute after userspace has opened /dev/ttyACM*; but if ModemManager is running, ModemManager will do that automatically depending on the USB device's vendor/product IDs and its other interfaces. Cc: stable Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jann Horn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 6b37d1c47fce1..39c7db7bcd216 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -371,7 +371,7 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf) static void acm_ctrl_irq(struct urb *urb) { struct acm *acm = urb->context; - struct usb_cdc_notification *dr = urb->transfer_buffer; + struct usb_cdc_notification *dr; unsigned int current_size = urb->actual_length; unsigned int expected_size, copy_size, alloc_size; int retval; @@ -398,9 +398,20 @@ static void acm_ctrl_irq(struct urb *urb) usb_mark_last_busy(acm->dev); - if (acm->nb_index) + if (acm->nb_index == 0) { + /* + * The first chunk of a message must contain at least the + * notification header with the length field, otherwise we + * can't get an expected_size. + */ + if (current_size < sizeof(struct usb_cdc_notification)) { + dev_dbg(&acm->control->dev, "urb too short\n"); + goto exit; + } + dr = urb->transfer_buffer; + } else { dr = (struct usb_cdc_notification *)acm->notification_buffer; - + } /* size = notification-header + (optional) data */ expected_size = sizeof(struct usb_cdc_notification) + le16_to_cpu(dr->wLength); -- GitLab From 12e712964f41d05ae034989892de445781c46730 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 12 Feb 2025 19:15:16 +0100 Subject: [PATCH 0647/1585] usb: cdc-acm: Fix handling of oversized fragments If we receive an initial fragment of size 8 bytes which specifies a wLength of 1 byte (so the reassembled message is supposed to be 9 bytes long), and we then receive a second fragment of size 9 bytes (which is not supposed to happen), we currently wrongly bypass the fragment reassembly code but still pass the pointer to the acm->notification_buffer to acm_process_notification(). Make this less wrong by always going through fragment reassembly when we expect more fragments. Before this patch, receiving an overlong fragment could lead to `newctrl` in acm_process_notification() being uninitialized data (instead of data coming from the device). Cc: stable Fixes: ea2583529cd1 ("cdc-acm: reassemble fragmented notifications") Signed-off-by: Jann Horn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 39c7db7bcd216..c70f349936238 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -416,7 +416,7 @@ static void acm_ctrl_irq(struct urb *urb) expected_size = sizeof(struct usb_cdc_notification) + le16_to_cpu(dr->wLength); - if (current_size < expected_size) { + if (acm->nb_index != 0 || current_size < expected_size) { /* notification is transmitted fragmented, reassemble */ if (acm->nb_size < expected_size) { u8 *new_buffer; -- GitLab From 7284922f3e4fa285dff1b8bb593aa9a0b8458f30 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 9 Feb 2025 15:56:11 +0100 Subject: [PATCH 0648/1585] USB: cdc-acm: Fill in Renesas R-Car D3 USB Download mode quirk Add Renesas R-Car D3 USB Download mode quirk and update comments on all the other Renesas R-Car USB Download mode quirks to discern them from each other. This follows R-Car Series, 3rd Generation reference manual Rev.2.00 chapter 19.2.8 USB download mode . Fixes: 6d853c9e4104 ("usb: cdc-acm: Add DISABLE_ECHO for Renesas USB Download mode") Cc: stable Signed-off-by: Marek Vasut Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250209145708.106914-1-marek.vasut+renesas@mailbox.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index c70f349936238..c2ecfa3c83496 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1738,13 +1738,16 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, - { USB_DEVICE(0x045b, 0x023c), /* Renesas USB Download mode */ + { USB_DEVICE(0x045b, 0x023c), /* Renesas R-Car H3 USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, - { USB_DEVICE(0x045b, 0x0248), /* Renesas USB Download mode */ + { USB_DEVICE(0x045b, 0x0247), /* Renesas R-Car D3 USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, - { USB_DEVICE(0x045b, 0x024D), /* Renesas USB Download mode */ + { USB_DEVICE(0x045b, 0x0248), /* Renesas R-Car M3-N USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x024D), /* Renesas R-Car E3 USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ -- GitLab From 159daf1258227f44b26b5d38f4aa8f37b8cca663 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 6 Feb 2025 17:18:36 +0200 Subject: [PATCH 0649/1585] USB: Add USB_QUIRK_NO_LPM quirk for sony xperia xz1 smartphone The fastboot tool for communicating with Android bootloaders does not work reliably with this device if USB 2 Link Power Management (LPM) is enabled. Various fastboot commands are affected, including the following, which usually reproduces the problem within two tries: fastboot getvar kernel getvar:kernel FAILED (remote: 'GetVar Variable Not found') This issue was hidden on many systems up until commit 63a1f8454962 ("xhci: stored cached port capability values in one place") as the xhci driver failed to detect USB 2 LPM support if USB 3 ports were listed before USB 2 ports in the "supported protocol capabilities". Adding the quirk resolves the issue. No drawbacks are expected since the device uses different USB product IDs outside of fastboot mode, and since fastboot commands worked before, until LPM was enabled on the tested system by the aforementioned commit. Based on a patch from Forest from which most of the code and commit message is taken. Cc: stable Reported-by: Forest Closes: https://lore.kernel.org/hk8umj9lv4l4qguftdq1luqtdrpa1gks5l@sonic.net Tested-by: Forest Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250206151836.51742-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 67732c791c933..59ed9768dae1f 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -435,6 +435,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0c45, 0x7056), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Sony Xperia XZ1 Compact (lilac) smartphone in fastboot mode */ + { USB_DEVICE(0x0fce, 0x0dde), .driver_info = USB_QUIRK_NO_LPM }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, -- GitLab From 89cb121e94612cd3bb3c74b0e772ead5b40b7a5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 11 Feb 2025 14:25:28 +0100 Subject: [PATCH 0650/1585] selftests/landlock: Enable the new CONFIG_AF_UNIX_OOB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 5155cbcdbf03 ("af_unix: Add a prompt to CONFIG_AF_UNIX_OOB"), the Landlock selftests's configuration is not enough to build a minimal kernel. Because scoped_signal_test checks with the MSG_OOB flag, we need to enable CONFIG_AF_UNIX_OOB for tests: # RUN fown.no_sandbox.sigurg_socket ... # scoped_signal_test.c:420:sigurg_socket:Expected 1 (1) == send(client_socket, ".", 1, MSG_OOB) (-1) # sigurg_socket: Test terminated by assertion # FAIL fown.no_sandbox.sigurg_socket ... Cc: Günther Noack Acked-by: Florent Revest Link: https://lore.kernel.org/r/20250211132531.1625566-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 29af19c4e9f98..361f94f8cb0d4 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -1,3 +1,4 @@ +CONFIG_AF_UNIX_OOB=y CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y CONFIG_INET=y -- GitLab From 143c9aae043a1dc174a75be52521192a0caa224b Mon Sep 17 00:00:00 2001 From: Tanya Agarwal Date: Fri, 24 Jan 2025 01:12:10 +0530 Subject: [PATCH 0651/1585] landlock: Fix grammar error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix grammar error in comments that were identified using the codespell tool. Signed-off-by: Tanya Agarwal Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20250123194208.2660-1-tanyaagarwal25699@gmail.com [mic: Simplify commit message] Signed-off-by: Mickaël Salaün --- security/landlock/ruleset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index 241ce44375b6a..bff4e40a3093c 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c @@ -124,7 +124,7 @@ create_rule(const struct landlock_id id, return ERR_PTR(-ENOMEM); RB_CLEAR_NODE(&new_rule->node); if (is_object_pointer(id.type)) { - /* This should be catched by insert_rule(). */ + /* This should have been caught by insert_rule(). */ WARN_ON_ONCE(!id.key.object); landlock_get_object(id.key.object); } -- GitLab From 192b7ff29b1fb0335a9b9107991e6286f462f361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Fri, 24 Jan 2025 15:44:44 +0000 Subject: [PATCH 0652/1585] landlock: Minor typo and grammar fixes in IPC scoping documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix some whitespace, punctuation and minor grammar. * Add a missing sentence about the minimum ABI version, to stay in line with the section next to it. Cc: Tahera Fahimi Cc: Tanya Agarwal Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20250124154445.162841-1-gnoack@google.com [mic: Add newlines, update doc date] Signed-off-by: Mickaël Salaün --- Documentation/userspace-api/landlock.rst | 6 +++--- include/uapi/linux/landlock.h | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index d639c61cb472a..ad587f53fe417 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: October 2024 +:Date: January 2025 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -329,11 +329,11 @@ non-sandboxed process, we can specify this restriction with A sandboxed process can connect to a non-sandboxed process when its domain is not scoped. If a process's domain is scoped, it can only connect to sockets created by processes in the same scope. -Moreover, If a process is scoped to send signal to a non-scoped process, it can +Moreover, if a process is scoped to send signal to a non-scoped process, it can only send signals to processes in the same scope. A connected datagram socket behaves like a stream socket when its domain is -scoped, meaning if the domain is scoped after the socket is connected , it can +scoped, meaning if the domain is scoped after the socket is connected, it can still :manpage:`send(2)` data just like a stream socket. However, in the same scenario, a non-connected datagram socket cannot send data (with :manpage:`sendto(2)`) outside its scope. diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 33745642f7875..e1d2c27533b49 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -268,7 +268,9 @@ struct landlock_net_port_attr { * ~~~~~~~~~~~~~~~~ * * These flags enable to restrict a sandboxed process to a set of network - * actions. This is supported since the Landlock ABI version 4. + * actions. + * + * This is supported since Landlock ABI version 4. * * The following access rights apply to TCP port numbers: * @@ -291,11 +293,13 @@ struct landlock_net_port_attr { * Setting a flag for a ruleset will isolate the Landlock domain to forbid * connections to resources outside the domain. * + * This is supported since Landlock ABI version 6. + * * Scopes: * * - %LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET: Restrict a sandboxed process from * connecting to an abstract UNIX socket created by a process outside the - * related Landlock domain (e.g. a parent domain or a non-sandboxed process). + * related Landlock domain (e.g., a parent domain or a non-sandboxed process). * - %LANDLOCK_SCOPE_SIGNAL: Restrict a sandboxed process from sending a signal * to another process outside the domain. */ -- GitLab From 854277e2cc8c75dc3c216c82e72523258fcf65b9 Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Wed, 5 Feb 2025 17:36:49 +0800 Subject: [PATCH 0653/1585] landlock: Fix non-TCP sockets restriction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use sk_is_tcp() to check if socket is TCP in bind(2) and connect(2) hooks. SMC, MPTCP, SCTP protocols are currently restricted by TCP access rights. The purpose of TCP access rights is to provide control over ports that can be used by userland to establish a TCP connection. Therefore, it is incorrect to deny bind(2) and connect(2) requests for a socket of another protocol. However, SMC, MPTCP and RDS implementations use TCP internal sockets to establish communication or even to exchange packets over a TCP connection [1]. Landlock rules that configure bind(2) and connect(2) usage for TCP sockets should not cover requests for sockets of such protocols. These protocols have different set of security issues and security properties, therefore, it is necessary to provide the userland with the ability to distinguish between them (eg. [2]). Control over TCP connection used by other protocols can be achieved with upcoming support of socket creation control [3]. [1] https://lore.kernel.org/all/62336067-18c2-3493-d0ec-6dd6a6d3a1b5@huawei-partners.com/ [2] https://lore.kernel.org/all/20241204.fahVio7eicim@digikod.net/ [3] https://lore.kernel.org/all/20240904104824.1844082-1-ivanov.mikhail1@huawei-partners.com/ Closes: https://github.com/landlock-lsm/linux/issues/40 Fixes: fff69fb03dde ("landlock: Support network rules with TCP bind and connect") Signed-off-by: Mikhail Ivanov Link: https://lore.kernel.org/r/20250205093651.1424339-2-ivanov.mikhail1@huawei-partners.com [mic: Format commit message to 72 columns] Signed-off-by: Mickaël Salaün --- security/landlock/net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/security/landlock/net.c b/security/landlock/net.c index d5dcc4407a197..104b6c01fe503 100644 --- a/security/landlock/net.c +++ b/security/landlock/net.c @@ -63,8 +63,7 @@ static int current_check_access_socket(struct socket *const sock, if (WARN_ON_ONCE(dom->num_layers < 1)) return -EACCES; - /* Checks if it's a (potential) TCP socket. */ - if (sock->type != SOCK_STREAM) + if (!sk_is_tcp(sock->sk)) return 0; /* Checks for minimal header length to safely read sa_family. */ -- GitLab From f5534d511bcd273720f168386de74af76e148a9b Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Wed, 5 Feb 2025 17:36:50 +0800 Subject: [PATCH 0654/1585] selftests/landlock: Test TCP accesses with protocol=IPPROTO_TCP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend protocol_variant structure with protocol field (Cf. socket(2)). Extend protocol fixture with TCP test suits with protocol=IPPROTO_TCP which can be used as an alias for IPPROTO_IP (=0) in socket(2). Signed-off-by: Mikhail Ivanov Link: https://lore.kernel.org/r/20250205093651.1424339-3-ivanov.mikhail1@huawei-partners.com Cc: # 6.7.x Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/common.h | 1 + tools/testing/selftests/landlock/net_test.c | 80 +++++++++++++++++---- 2 files changed, 67 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index a604ea5d8297c..6064c9ac05329 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -207,6 +207,7 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd) struct protocol_variant { int domain; int type; + int protocol; }; struct service_fixture { diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 4e0aeb53b225a..333263780fae4 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -85,18 +85,18 @@ static void setup_loopback(struct __test_metadata *const _metadata) clear_ambient_cap(_metadata, CAP_NET_ADMIN); } +static bool prot_is_tcp(const struct protocol_variant *const prot) +{ + return (prot->domain == AF_INET || prot->domain == AF_INET6) && + prot->type == SOCK_STREAM && + (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP); +} + static bool is_restricted(const struct protocol_variant *const prot, const enum sandbox_type sandbox) { - switch (prot->domain) { - case AF_INET: - case AF_INET6: - switch (prot->type) { - case SOCK_STREAM: - return sandbox == TCP_SANDBOX; - } - break; - } + if (sandbox == TCP_SANDBOX) + return prot_is_tcp(prot); return false; } @@ -105,7 +105,7 @@ static int socket_variant(const struct service_fixture *const srv) int ret; ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC, - 0); + srv->protocol.protocol); if (ret < 0) return -errno; return ret; @@ -290,22 +290,48 @@ FIXTURE_TEARDOWN(protocol) } /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, }, }; @@ -350,22 +376,48 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) { }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, }, }; -- GitLab From 3d4033985ff508ef587ca11f1c8361ba57c7e09f Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Wed, 5 Feb 2025 17:36:51 +0800 Subject: [PATCH 0655/1585] selftests/landlock: Test that MPTCP actions are not restricted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend protocol fixture with test suits for MPTCP protocol. Add CONFIG_MPTCP and CONFIG_MPTCP_IPV6 options in config. Signed-off-by: Mikhail Ivanov Link: https://lore.kernel.org/r/20250205093651.1424339-4-ivanov.mikhail1@huawei-partners.com Cc: # 6.7.x Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/config | 2 + tools/testing/selftests/landlock/net_test.c | 44 +++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 361f94f8cb0d4..425de4c20271c 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -4,6 +4,8 @@ CONFIG_CGROUP_SCHED=y CONFIG_INET=y CONFIG_IPV6=y CONFIG_KEYS=y +CONFIG_MPTCP=y +CONFIG_MPTCP_IPV6=y CONFIG_NET=y CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 333263780fae4..d9de0ee49ebc2 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -312,6 +312,17 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) { }, }; +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + /* clang-format off */ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) { /* clang-format on */ @@ -335,6 +346,17 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) { }, }; +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + /* clang-format off */ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_udp) { /* clang-format on */ @@ -398,6 +420,17 @@ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) { }, }; +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + /* clang-format off */ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) { /* clang-format on */ @@ -421,6 +454,17 @@ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) { }, }; +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + /* clang-format off */ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_udp) { /* clang-format on */ -- GitLab From 78332fdb956f18accfbca5993b10c5ed69f00a2c Mon Sep 17 00:00:00 2001 From: Bharadwaj Raju Date: Mon, 10 Feb 2025 21:40:57 +0530 Subject: [PATCH 0656/1585] selftests/landlock: Add binaries to .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the test creates binaries 'wait-pipe' and 'sandbox-and-launch' which need to be gitignore'd. Signed-off-by: Bharadwaj Raju Link: https://lore.kernel.org/r/20250210161101.6024-1-bharadwaj.raju777@gmail.com [mic: Sort entries] Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore index 470203a7cd737..335b2b1a3463a 100644 --- a/tools/testing/selftests/landlock/.gitignore +++ b/tools/testing/selftests/landlock/.gitignore @@ -1,2 +1,4 @@ /*_test +/sandbox-and-launch /true +/wait-pipe -- GitLab From d3a8c28426fc1fb3252753a9f1db0d691ffc21b0 Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Sat, 1 Feb 2025 22:09:02 +0530 Subject: [PATCH 0657/1585] usb: dwc3: Fix timeout issue during controller enter/exit from halt state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a frequent timeout during controller enter/exit from halt state after toggling the run_stop bit by SW. This timeout occurs when performing frequent role switches between host and device, causing device enumeration issues due to the timeout. This issue was not present when USB2 suspend PHY was disabled by passing the SNPS quirks (snps,dis_u2_susphy_quirk and snps,dis_enblslpm_quirk) from the DTS. However, there is a requirement to enable USB2 suspend PHY by setting of GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY bits when controller starts in gadget or host mode results in the timeout issue. This commit addresses this timeout issue by ensuring that the bits GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting the dwc3_gadget_run_stop sequence and restoring them after the dwc3_gadget_run_stop sequence is completed. Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Cc: stable Signed-off-by: Selvarasu Ganesan Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250201163903.459-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d27af65eb08ae..ddd6b2ce57107 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2629,10 +2629,38 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on) { u32 reg; u32 timeout = 2000; + u32 saved_config = 0; if (pm_runtime_suspended(dwc->dev)) return 0; + /* + * When operating in USB 2.0 speeds (HS/FS), ensure that + * GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting + * or stopping the controller. This resolves timeout issues that occur + * during frequent role switches between host and device modes. + * + * Save and clear these settings, then restore them after completing the + * controller start or stop sequence. + * + * This solution was discovered through experimentation as it is not + * mentioned in the dwc3 programming guide. It has been tested on an + * Exynos platforms. + */ + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { + saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; + reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + } + + if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) { + saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM; + reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM; + } + + if (saved_config) + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); + reg = dwc3_readl(dwc->regs, DWC3_DCTL); if (is_on) { if (DWC3_VER_IS_WITHIN(DWC3, ANY, 187A)) { @@ -2660,6 +2688,12 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on) reg &= DWC3_DSTS_DEVCTRLHLT; } while (--timeout && !(!is_on ^ !reg)); + if (saved_config) { + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + reg |= saved_config; + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); + } + if (!timeout) return -ETIMEDOUT; -- GitLab From 4aac0db5a0ebc599d4ad9bf5ebab78afa1f33e10 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Mon, 3 Feb 2025 11:58:24 +0100 Subject: [PATCH 0658/1585] usb: core: fix pipe creation for get_bMaxPacketSize0 When usb_control_msg is used in the get_bMaxPacketSize0 function, the USB pipe does not include the endpoint device number. This can cause failures when a usb hub port is reinitialized after encountering a bad cable connection. As a result, the system logs the following error messages: usb usb2-port1: cannot reset (err = -32) usb usb2-port1: Cannot enable. Maybe the USB cable is bad? usb usb2-port1: attempt power cycle usb 2-1: new high-speed USB device number 5 using ci_hdrc usb 2-1: device descriptor read/8, error -71 The problem began after commit 85d07c556216 ("USB: core: Unite old scheme and new scheme descriptor reads"). There usb_get_device_descriptor was replaced with get_bMaxPacketSize0. Unlike usb_get_device_descriptor, the get_bMaxPacketSize0 function uses the macro usb_rcvaddr0pipe, which does not include the endpoint device number. usb_get_device_descriptor, on the other hand, used the macro usb_rcvctrlpipe, which includes the endpoint device number. By modifying the get_bMaxPacketSize0 function to use usb_rcvctrlpipe instead of usb_rcvaddr0pipe, the issue can be resolved. This change will ensure that the endpoint device number is included in the USB pipe, preventing reinitialization failures. If the endpoint has not set the device number yet, it will still work because the device number is 0 in udev. Cc: stable Fixes: 85d07c556216 ("USB: core: Unite old scheme and new scheme descriptor reads") Signed-off-by: Stefan Eichenberger Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250203105840.17539-1-eichest@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 0cd44f1fd56d2..a76bb50b62026 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4709,7 +4709,6 @@ void usb_ep0_reinit(struct usb_device *udev) EXPORT_SYMBOL_GPL(usb_ep0_reinit); #define usb_sndaddr0pipe() (PIPE_CONTROL << 30) -#define usb_rcvaddr0pipe() ((PIPE_CONTROL << 30) | USB_DIR_IN) static int hub_set_address(struct usb_device *udev, int devnum) { @@ -4815,7 +4814,7 @@ static int get_bMaxPacketSize0(struct usb_device *udev, for (i = 0; i < GET_MAXPACKET0_TRIES; ++i) { /* Start with invalid values in case the transfer fails */ buf->bDescriptorType = buf->bMaxPacketSize0 = 0; - rc = usb_control_msg(udev, usb_rcvaddr0pipe(), + rc = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, USB_DT_DEVICE << 8, 0, buf, size, -- GitLab From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001 From: Jill Donahue Date: Tue, 11 Feb 2025 10:48:05 -0700 Subject: [PATCH 0659/1585] USB: gadget: f_midi: f_midi_complete to call queue_work When using USB MIDI, a lock is attempted to be acquired twice through a re-entrant call to f_midi_transmit, causing a deadlock. Fix it by using queue_work() to schedule the inner f_midi_transmit() via a high priority work queue from the completion handler. Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGVAX6g@mail.gmail.com/ Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver") Cc: stable Signed-off-by: Jill Donahue Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_midi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 47260d65066a8..da82598fcef8a 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req) /* Our transmit completed. See if there's more to go. * f_midi_transmit eats req, don't queue it again. */ req->length = 0; - f_midi_transmit(midi); + queue_work(system_highpri_wq, &midi->work); return; } break; -- GitLab From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001 From: Roy Luo Date: Tue, 4 Feb 2025 23:36:42 +0000 Subject: [PATCH 0660/1585] usb: gadget: core: flush gadget workqueue after device removal device_del() can lead to new work being scheduled in gadget->work workqueue. This is observed, for example, with the dwc3 driver with the following call stack: device_del() gadget_unbind_driver() usb_gadget_disconnect_locked() dwc3_gadget_pullup() dwc3_gadget_soft_disconnect() usb_gadget_set_state() schedule_work(&gadget->work) Move flush_work() after device_del() to ensure the workqueue is cleaned up. Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue") Cc: stable Signed-off-by: Roy Luo Reviewed-by: Alan Stern Reviewed-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index a6f46364be65f..4b3d5075621aa 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget) kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE); sysfs_remove_link(&udc->dev.kobj, "gadget"); - flush_work(&gadget->work); device_del(&gadget->dev); + flush_work(&gadget->work); ida_free(&gadget_id_numbers, gadget->id_number); cancel_work_sync(&udc->vbus_work); device_unregister(&udc->dev); -- GitLab From e169d96eecd447ff7fd7542ca5fa0911f5622054 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 12 Feb 2025 17:38:29 +0800 Subject: [PATCH 0661/1585] USB: quirks: add USB_QUIRK_NO_LPM quirk for Teclast dist Teclast disk used on Huawei hisi platforms doesn't work well, losing connectivity intermittently if LPM is enabled. Add quirk disable LPM to resolve the issue. Signed-off-by: Lei Huang Cc: stable Link: https://lore.kernel.org/r/20250212093829.7379-1-huanglei814@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 59ed9768dae1f..dfcfc142bd5e1 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -528,6 +528,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Blackmagic Design UltraStudio SDI */ { USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM }, + /* Teclast disk */ + { USB_DEVICE(0x1f75, 0x0917), .driver_info = USB_QUIRK_NO_LPM }, + /* Hauppauge HVR-950q */ { USB_DEVICE(0x2040, 0x7200), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, -- GitLab From e5644be4079750a0a0a5a7068fd90b97bf6fac55 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 12 Feb 2025 14:55:14 +0100 Subject: [PATCH 0662/1585] usb: gadget: uvc: Fix unstarted kthread worker The behaviour of kthread_create_worker() was recently changed to align with the one of kthread_create(). The kthread worker is created but not awaken by default. This is to allow the use of kthread_affine_preferred() and kthread_bind[_mask]() with kthread workers. In order to keep the old behaviour and wake the kthread up, kthread_run_worker() must be used. All the pre-existing users have been converted, except for UVC that was introduced in the same merge window as the API change. This results in hangs: INFO: task UVCG:82 blocked for more than 491 seconds. Tainted: G T 6.13.0-rc2-00014-gb04e317b5226 #1 task:UVCG state:D stack:0 pid:82 Call Trace: __schedule schedule schedule_preempt_disabled kthread ? kthread_flush_work ret_from_fork ret_from_fork_asm entry_INT80_32 Fix this with converting UVCG kworker to the new API. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202502121025.55bfa801-lkp@intel.com Fixes: f0bbfbd16b3b ("usb: gadget: uvc: rework to enqueue in pump worker from encoded queue") Cc: stable Cc: Michael Grzeschik Signed-off-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20250212135514.30539-1-frederic@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/uvc_video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 79e223713d8b9..fb77b0b217901 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -818,7 +818,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc) return -EINVAL; /* Allocate a kthread for asynchronous hw submit handler. */ - video->kworker = kthread_create_worker(0, "UVCG"); + video->kworker = kthread_run_worker(0, "UVCG"); if (IS_ERR(video->kworker)) { uvcg_err(&video->uvc->func, "failed to create UVCG kworker\n"); return PTR_ERR(video->kworker); -- GitLab From 634775a752a86784511018a108f3b530cc3399a7 Mon Sep 17 00:00:00 2001 From: Elson Roy Serrao Date: Thu, 6 Feb 2025 11:39:50 -0800 Subject: [PATCH 0663/1585] usb: roles: set switch registered flag early on The role switch registration and set_role() can happen in parallel as they are invoked independent of each other. There is a possibility that a driver might spend significant amount of time in usb_role_switch_register() API due to the presence of time intensive operations like component_add() which operate under common mutex. This leads to a time window after allocating the switch and before setting the registered flag where the set role notifications are dropped. Below timeline summarizes this behavior Thread1 | Thread2 usb_role_switch_register() | | | ---> allocate switch | | | ---> component_add() | usb_role_switch_set_role() | | | | | --> Drop role notifications | | since sw->registered | | flag is not set. | | --->Set registered flag.| To avoid this, set the registered flag early on in the switch register API. Fixes: b787a3e78175 ("usb: roles: don't get/set_role() when usb_role_switch is unregistered") Cc: stable Signed-off-by: Elson Roy Serrao Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250206193950.22421-1-quic_eserrao@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index c58a12c147f45..30482d4cf8267 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -387,8 +387,11 @@ usb_role_switch_register(struct device *parent, dev_set_name(&sw->dev, "%s-role-switch", desc->name ? desc->name : dev_name(parent)); + sw->registered = true; + ret = device_register(&sw->dev); if (ret) { + sw->registered = false; put_device(&sw->dev); return ERR_PTR(ret); } @@ -399,8 +402,6 @@ usb_role_switch_register(struct device *parent, dev_warn(&sw->dev, "failed to add component\n"); } - sw->registered = true; - /* TODO: Symlinks for the host port and the device controller. */ return sw; -- GitLab From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001 From: Jos Wang Date: Thu, 13 Feb 2025 21:49:21 +0800 Subject: [PATCH 0664/1585] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters ERROR_RECOVERY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is used by the Policy Engine in Dual-Role Power device that is currently acting as a Sink to timeout on a PS_RDY Message during a Power Role Swap sequence. This condition leads to a Hard Reset for USB Type-A and Type-B Plugs and Error Recovery for Type-C plugs and return to USB Default Operation. Therefore, after PSSourceOffTimer timeout, the tcpm state machine should switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also solve the test items in the USB power delivery compliance test: TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout [1] https://usb.org/document-library/usb-power-delivery-compliance-test-specification-0/USB_PD3_CTS_Q4_2025_OR.zip Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)") Cc: stable Signed-off-by: Jos Wang Reviewed-by: Heikki Krogerus Tested-by: Amit Sunil Dhamne Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 47be450d2be35..6bf1a22c785af 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB, port->pps_data.active, 0); tcpm_set_charge(port, false); - tcpm_set_state(port, hard_reset_state(port), - port->timings.ps_src_off_time); + tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time); break; case PR_SWAP_SNK_SRC_SOURCE_ON: tcpm_enable_auto_vbus_discharge(port, true); -- GitLab From 66314e9a57a050f95cb0ebac904f5ab047a8926e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 2 Feb 2025 16:50:14 -0800 Subject: [PATCH 0665/1585] xfs: fix online repair probing when CONFIG_XFS_ONLINE_REPAIR=n I received a report from the release engineering side of the house that xfs_scrub without the -n flag (aka fix it mode) would try to fix a broken filesystem even on a kernel that doesn't have online repair built into it: # xfs_scrub -dTvn /mnt/test EXPERIMENTAL xfs_scrub program in use! Use at your own risk! Phase 1: Find filesystem geometry. /mnt/test: using 1 threads to scrub. Phase 1: Memory used: 132k/0k (108k/25k), time: 0.00/ 0.00/ 0.00s Phase 4: Repair filesystem. Info: /mnt/test/some/victimdir directory entries: Attempting repair. (repair.c line 351) Corruption: /mnt/test/some/victimdir directory entries: Repair unsuccessful; offline repair required. (repair.c line 204) Source: https://blogs.oracle.com/linux/post/xfs-online-filesystem-repair It is strange that xfs_scrub doesn't refuse to run, because the kernel is supposed to return EOPNOTSUPP if we actually needed to run a repair, and xfs_io's repair subcommand will perror that. And yet: # xfs_io -x -c 'repair probe' /mnt/test # The first problem is commit dcb660f9222fd9 (4.15) which should have had xchk_probe set the CORRUPT OFLAG so that any of the repair machinery will get called at all. It turns out that some refactoring that happened in the 6.6-6.8 era broke the operation of this corner case. What we *really* want to happen is that all the predicates that would steer xfs_scrub_metadata() towards calling xrep_attempt() should function the same way that they do when repair is compiled in; and then xrep_attempt gets to return the fatal EOPNOTSUPP error code that causes the probe to fail. Instead, commit 8336a64eb75cba (6.6) started the failwhale swimming by hoisting OFLAG checking logic into a helper whose non-repair stub always returns false, causing scrub to return "repair not needed" when in fact the repair is not supported. Prior to that commit, the oflag checking that was open-coded in scrub.c worked correctly. Similarly, in commit 4bdfd7d15747b1 (6.8) we hoisted the IFLAG_REPAIR and ALREADY_FIXED logic into a helper whose non-repair stub always returns false, so we never enter the if test body that would have called xrep_attempt, let alone fail to decode the OFLAGs correctly. The final insult (yes, we're doing The Naked Gun now) is commit 48a72f60861f79 (6.8) in which we hoisted the "are we going to try a repair?" predicate into yet another function with a non-repair stub always returns false. Fix xchk_probe to trigger xrep_probe if repair is enabled, or return EOPNOTSUPP directly if it is not. For all the other scrub types, we need to fix the header predicates so that the ->repair functions (which are all xrep_notsupported) get called to return EOPNOTSUPP. Commit 48a72 is tagged here because the scrub code prior to LTS 6.12 are incomplete and not worth patching. Reported-by: David Flynn Cc: # v6.8 Fixes: 8336a64eb75c ("xfs: don't complain about unfixed metadata when repairs were injected") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/common.h | 5 ----- fs/xfs/scrub/repair.h | 11 ++++++++++- fs/xfs/scrub/scrub.c | 12 ++++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index bdcd40f0ec742..19877d99f255b 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -224,7 +224,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm) bool xchk_dir_looks_zapped(struct xfs_inode *dp); bool xchk_pptr_looks_zapped(struct xfs_inode *ip); -#ifdef CONFIG_XFS_ONLINE_REPAIR /* Decide if a repair is required. */ static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm) { @@ -244,10 +243,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc) return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && !(sc->flags & XREP_ALREADY_FIXED); } -#else -# define xchk_needs_repair(sc) (false) -# define xchk_could_repair(sc) (false) -#endif /* CONFIG_XFS_ONLINE_REPAIR */ int xchk_metadata_inode_forks(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 823c00d1a5026..af0a3a9e5ed97 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -191,7 +191,16 @@ int xrep_reset_metafile_resv(struct xfs_scrub *sc); #else #define xrep_ino_dqattach(sc) (0) -#define xrep_will_attempt(sc) (false) + +/* + * When online repair is not built into the kernel, we still want to attempt + * the repair so that the stub xrep_attempt below will return EOPNOTSUPP. + */ +static inline bool xrep_will_attempt(const struct xfs_scrub *sc) +{ + return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || + xchk_needs_repair(sc->sm); +} static inline int xrep_attempt( diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 7567dd5cad14f..6fa9e3e5bab7b 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -149,6 +149,18 @@ xchk_probe( if (xchk_should_terminate(sc, &error)) return error; + /* + * If the caller is probing to see if repair works but repair isn't + * built into the kernel, return EOPNOTSUPP because that's the signal + * that userspace expects. If online repair is built in, set the + * CORRUPT flag (without any of the usual tracing/logging) to force us + * into xrep_probe. + */ + if (xchk_could_repair(sc)) { + if (!IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)) + return -EOPNOTSUPP; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + } return 0; } -- GitLab From 6e33017c3276e3af7f79f61f3b3648e4a4c03d34 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 2 Feb 2025 16:50:14 -0800 Subject: [PATCH 0666/1585] xfs: fix data fork format filtering during inode repair Coverity noticed that xrep_dinode_bad_metabt_fork never runs because XFS_DINODE_FMT_META_BTREE is always filtered out in the mode selection switch of xrep_dinode_check_dfork. Metadata btrees are allowed only in the data forks of regular files, so add this case explicitly. I guess this got fubard during a refactoring prior to 6.13 and I didn't notice until now. :/ Coverity-id: 1617714 Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/inode_repair.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 2f641b6d663eb..13ff1c933cb8f 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -1055,9 +1055,17 @@ xrep_dinode_check_dfork( return true; break; case S_IFREG: - if (fmt == XFS_DINODE_FMT_LOCAL) + switch (fmt) { + case XFS_DINODE_FMT_LOCAL: return true; - fallthrough; + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + case XFS_DINODE_FMT_META_BTREE: + break; + default: + return true; + } + break; case S_IFLNK: case S_IFDIR: switch (fmt) { -- GitLab From 9e00163c31676c6b43d2334fdf5b406232f42dee Mon Sep 17 00:00:00 2001 From: Lukas Herbolt Date: Mon, 3 Feb 2025 09:55:13 +0100 Subject: [PATCH 0667/1585] xfs: do not check NEEDSREPAIR if ro,norecovery mount. If there is corrutpion on the filesystem andxfs_repair fails to repair it. The last resort of getting the data is to use norecovery,ro mount. But if the NEEDSREPAIR is set the filesystem cannot be mounted. The flag must be cleared out manually using xfs_db, to get access to what left over of the corrupted fs. Signed-off-by: Lukas Herbolt Reviewed-by: Dave Chinner Reviewed-by: Eric Sandeen Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d92d7a07ea89b..0055066fb1d98 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1661,8 +1661,12 @@ xfs_fs_fill_super( #endif } - /* Filesystem claims it needs repair, so refuse the mount. */ - if (xfs_has_needsrepair(mp)) { + /* + * Filesystem claims it needs repair, so refuse the mount unless + * norecovery is also specified, in which case the filesystem can + * be mounted with no risk of further damage. + */ + if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) { xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); error = -EFSCORRUPTED; goto out_free_sb; -- GitLab From 9f0902091c332b2665951cfb970f60ae7cbdc0f3 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Mon, 3 Feb 2025 14:04:57 +0100 Subject: [PATCH 0668/1585] xfs: Do not allow norecovery mount with quotacheck Mounting a filesystem that requires quota state changing will generate a transaction. We already check for a read-only device; we should do that for norecovery too. A quotacheck on a norecovery mount, and with the right log size, will cause the mount process to hang on: [<0>] xlog_grant_head_wait+0x5d/0x2a0 [xfs] [<0>] xlog_grant_head_check+0x112/0x180 [xfs] [<0>] xfs_log_reserve+0xe3/0x260 [xfs] [<0>] xfs_trans_reserve+0x179/0x250 [xfs] [<0>] xfs_trans_alloc+0x101/0x260 [xfs] [<0>] xfs_sync_sb+0x3f/0x80 [xfs] [<0>] xfs_qm_mount_quotas+0xe3/0x2f0 [xfs] [<0>] xfs_mountfs+0x7ad/0xc20 [xfs] [<0>] xfs_fs_fill_super+0x762/0xa50 [xfs] [<0>] get_tree_bdev_flags+0x131/0x1d0 [<0>] vfs_get_tree+0x26/0xd0 [<0>] vfs_cmd_create+0x59/0xe0 [<0>] __do_sys_fsconfig+0x4e3/0x6b0 [<0>] do_syscall_64+0x82/0x160 [<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e This is caused by a transaction running with bogus initialized head/tail I initially hit this while running generic/050, with random log sizes, but I managed to reproduce it reliably here with the steps below: mkfs.xfs -f -lsize=1025M -f -b size=4096 -m crc=1,reflink=1,rmapbt=1, -i sparse=1 /dev/vdb2 > /dev/null mount -o usrquota,grpquota,prjquota /dev/vdb2 /mnt xfs_io -x -c 'shutdown -f' /mnt umount /mnt mount -o ro,norecovery,usrquota,grpquota,prjquota /dev/vdb2 /mnt Last mount hangs up As we add yet another validation if quota state is changing, this also add a new helper named xfs_qm_validate_state_change(), factoring the quota state changes out of xfs_qm_newmount() to reduce cluttering within it. Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_qm_bhv.c | 55 ++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 37f1230e75846..245d754f382a7 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -78,6 +78,28 @@ xfs_qm_statvfs( } } +STATIC int +xfs_qm_validate_state_change( + struct xfs_mount *mp, + uint uqd, + uint gqd, + uint pqd) +{ + int state; + + /* Is quota state changing? */ + state = ((uqd && !XFS_IS_UQUOTA_ON(mp)) || + (!uqd && XFS_IS_UQUOTA_ON(mp)) || + (gqd && !XFS_IS_GQUOTA_ON(mp)) || + (!gqd && XFS_IS_GQUOTA_ON(mp)) || + (pqd && !XFS_IS_PQUOTA_ON(mp)) || + (!pqd && XFS_IS_PQUOTA_ON(mp))); + + return state && + (xfs_dev_is_read_only(mp, "changing quota state") || + xfs_has_norecovery(mp)); +} + int xfs_qm_newmount( xfs_mount_t *mp, @@ -97,24 +119,25 @@ xfs_qm_newmount( } /* - * If the device itself is read-only, we can't allow - * the user to change the state of quota on the mount - - * this would generate a transaction on the ro device, - * which would lead to an I/O error and shutdown + * If the device itself is read-only and/or in norecovery + * mode, we can't allow the user to change the state of + * quota on the mount - this would generate a transaction + * on the ro device, which would lead to an I/O error and + * shutdown. */ - if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || - (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || - (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || - (!gquotaondisk && XFS_IS_GQUOTA_ON(mp)) || - (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || - (!pquotaondisk && XFS_IS_PQUOTA_ON(mp))) && - xfs_dev_is_read_only(mp, "changing quota state")) { - xfs_warn(mp, "please mount with%s%s%s%s.", - (!quotaondisk ? "out quota" : ""), - (uquotaondisk ? " usrquota" : ""), - (gquotaondisk ? " grpquota" : ""), - (pquotaondisk ? " prjquota" : "")); + if (xfs_qm_validate_state_change(mp, uquotaondisk, + gquotaondisk, pquotaondisk)) { + + if (xfs_has_metadir(mp)) + xfs_warn(mp, + "metadir enabled, please mount without any quota mount options"); + else + xfs_warn(mp, "please mount with%s%s%s%s.", + (!quotaondisk ? "out quota" : ""), + (uquotaondisk ? " usrquota" : ""), + (gquotaondisk ? " grpquota" : ""), + (pquotaondisk ? " prjquota" : "")); return -EPERM; } -- GitLab From 3cd6a8056f5a2e794c42fc2114ee2611e358b357 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:15:01 +0100 Subject: [PATCH 0669/1585] xfs: rename xfs_iomap_swapfile_activate to xfs_vm_swap_activate Match the method name and the naming convention or address_space operations. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 67877c36ed11a..a80608e82c9b9 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -528,7 +528,7 @@ xfs_vm_readahead( } static int -xfs_iomap_swapfile_activate( +xfs_vm_swap_activate( struct swap_info_struct *sis, struct file *swap_file, sector_t *span) @@ -549,11 +549,11 @@ const struct address_space_operations xfs_address_space_operations = { .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_folio = generic_error_remove_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; const struct address_space_operations xfs_dax_aops = { .writepages = xfs_dax_writepages, .dirty_folio = noop_dirty_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; -- GitLab From 2d873efd174bae9005776937d5ac6a96050266db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:15:00 +0100 Subject: [PATCH 0670/1585] xfs: flush inodegc before swapon Fix the brand new xfstest that tries to swapon on a recently unshared file and use the chance to document the other bit of magic in this function. The big comment is taken from a mailinglist post by Dave Chinner. Fixes: 5e672cd69f0a53 ("xfs: introduce xfs_inodegc_push()") Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_aops.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a80608e82c9b9..6d9965b546cbb 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -19,6 +19,7 @@ #include "xfs_reflink.h" #include "xfs_errortag.h" #include "xfs_error.h" +#include "xfs_icache.h" struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; @@ -533,7 +534,39 @@ xfs_vm_swap_activate( struct file *swap_file, sector_t *span) { - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; + struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + + /* + * Swap file activation can race against concurrent shared extent + * removal in files that have been cloned. If this happens, + * iomap_swapfile_iter() can fail because it encountered a shared + * extent even though an operation is in progress to remove those + * shared extents. + * + * This race becomes problematic when we defer extent removal + * operations beyond the end of a syscall (i.e. use async background + * processing algorithms). Users think the extents are no longer + * shared, but iomap_swapfile_iter() still sees them as shared + * because the refcountbt entries for the extents being removed have + * not yet been updated. Hence the swapon call fails unexpectedly. + * + * The race condition is currently most obvious from the unlink() + * operation as extent removal is deferred until after the last + * reference to the inode goes away. We then process the extent + * removal asynchronously, hence triggers the "syscall completed but + * work not done" condition mentioned above. To close this race + * window, we need to flush any pending inodegc operations to ensure + * they have updated the refcountbt records before we try to map the + * swapfile. + */ + xfs_inodegc_flush(ip->i_mount); + + /* + * Direct the swap code to the correct block device when this file + * sits on the RT device. + */ + sis->bdev = xfs_inode_buftarg(ip)->bt_bdev; + return iomap_swapfile_activate(sis, swap_file, span, &xfs_read_iomap_ops); } -- GitLab From 9e512eaaf8f4008c44ede3dfc0fbc9d9c5118583 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Sat, 8 Feb 2025 12:41:44 +0000 Subject: [PATCH 0671/1585] serial: 8250: Fix fifo underflow on flush When flushing the serial port's buffer, uart_flush_buffer() calls kfifo_reset() but if there is an outstanding DMA transfer then the completion function will consume data from the kfifo via uart_xmit_advance(), underflowing and leading to ongoing DMA as the driver tries to transmit another 2^32 bytes. This is readily reproduced with serial-generic and amidi sending even short messages as closing the device on exit will wait for the fifo to drain and in the underflow case amidi hangs for 30 seconds on exit in tty_wait_until_sent(). A trace of that gives: kworker/1:1-84 [001] 51.769423: bprint: serial8250_tx_dma: tx_size=3 fifo_len=3 amidi-763 [001] 51.769460: bprint: uart_flush_buffer: resetting fifo irq/21-fe530000-76 [000] 51.769474: bprint: __dma_tx_complete: tx_size=3 irq/21-fe530000-76 [000] 51.769479: bprint: serial8250_tx_dma: tx_size=4096 fifo_len=4294967293 irq/21-fe530000-76 [000] 51.781295: bprint: __dma_tx_complete: tx_size=4096 irq/21-fe530000-76 [000] 51.781301: bprint: serial8250_tx_dma: tx_size=4096 fifo_len=4294963197 irq/21-fe530000-76 [000] 51.793131: bprint: __dma_tx_complete: tx_size=4096 irq/21-fe530000-76 [000] 51.793135: bprint: serial8250_tx_dma: tx_size=4096 fifo_len=4294959101 irq/21-fe530000-76 [000] 51.804949: bprint: __dma_tx_complete: tx_size=4096 Since the port lock is held in when the kfifo is reset in uart_flush_buffer() and in __dma_tx_complete(), adding a flush_buffer hook to adjust the outstanding DMA byte count is sufficient to avoid the kfifo underflow. Fixes: 9ee4b83e51f74 ("serial: 8250: Add support for dmaengine") Cc: stable Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20250208124148.1189191-1-jkeeping@inmusicbrands.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 2 ++ drivers/tty/serial/8250/8250_dma.c | 16 ++++++++++++++++ drivers/tty/serial/8250/8250_port.c | 9 +++++++++ 3 files changed, 27 insertions(+) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 11e05aa014e54..b861585ca02ac 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -374,6 +374,7 @@ static inline int is_omap1510_8250(struct uart_8250_port *pt) #ifdef CONFIG_SERIAL_8250_DMA extern int serial8250_tx_dma(struct uart_8250_port *); +extern void serial8250_tx_dma_flush(struct uart_8250_port *); extern int serial8250_rx_dma(struct uart_8250_port *); extern void serial8250_rx_dma_flush(struct uart_8250_port *); extern int serial8250_request_dma(struct uart_8250_port *); @@ -406,6 +407,7 @@ static inline int serial8250_tx_dma(struct uart_8250_port *p) { return -1; } +static inline void serial8250_tx_dma_flush(struct uart_8250_port *p) { } static inline int serial8250_rx_dma(struct uart_8250_port *p) { return -1; diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index d215c494ee24c..f245a84f4a508 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -149,6 +149,22 @@ int serial8250_tx_dma(struct uart_8250_port *p) return ret; } +void serial8250_tx_dma_flush(struct uart_8250_port *p) +{ + struct uart_8250_dma *dma = p->dma; + + if (!dma->tx_running) + return; + + /* + * kfifo_reset() has been called by the serial core, avoid + * advancing and underflowing in __dma_tx_complete(). + */ + dma->tx_size = 0; + + dmaengine_terminate_async(dma->rxchan); +} + int serial8250_rx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index d7976a21cca9c..442967a6cd52d 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2555,6 +2555,14 @@ static void serial8250_shutdown(struct uart_port *port) serial8250_do_shutdown(port); } +static void serial8250_flush_buffer(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + if (up->dma) + serial8250_tx_dma_flush(up); +} + static unsigned int serial8250_do_get_divisor(struct uart_port *port, unsigned int baud, unsigned int *frac) @@ -3244,6 +3252,7 @@ static const struct uart_ops serial8250_pops = { .break_ctl = serial8250_break_ctl, .startup = serial8250_startup, .shutdown = serial8250_shutdown, + .flush_buffer = serial8250_flush_buffer, .set_termios = serial8250_set_termios, .set_ldisc = serial8250_set_ldisc, .pm = serial8250_pm, -- GitLab From 0c67c37e1710b2a8f61c8a02db95a51fe577e2c1 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 11 Feb 2025 13:47:50 -0800 Subject: [PATCH 0672/1585] fuse: revert back to __readahead_folio() for readahead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 3eab9d7bc2f4 ("fuse: convert readahead to use folios"), the logic was converted to using the new folio readahead code, which drops the reference on the folio once it is locked, using an inferred reference on the folio. Previously we held a reference on the folio for the entire duration of the readpages call. This is fine, however for the case for splice pipe responses where we will remove the old folio and splice in the new folio (see fuse_try_move_page()), we assume that there is a reference held on the folio for ap->folios, which is no longer the case. To fix this, revert back to __readahead_folio() which allows us to hold the reference on the folio for the duration of readpages until either we drop the reference ourselves in fuse_readpages_end() or the reference is dropped after it's replaced in the page cache in the splice case. This will fix the UAF bug that was reported. Link: https://lore.kernel.org/linux-fsdevel/2f681f48-00f5-4e09-8431-2b3dbfaa881e@heusel.eu/ Fixes: 3eab9d7bc2f4 ("fuse: convert readahead to use folios") Reported-by: Christian Heusel Closes: https://lore.kernel.org/all/2f681f48-00f5-4e09-8431-2b3dbfaa881e@heusel.eu/ Closes: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/110 Reported-by: Mantas Mikulėnas Closes: https://lore.kernel.org/all/34feb867-09e2-46e4-aa31-d9660a806d1a@gmail.com/ Closes: https://bugzilla.opensuse.org/show_bug.cgi?id=1236660 Cc: # v6.13 Signed-off-by: Joanne Koong Reviewed-by: Jeff Layton Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 6 ++++++ fs/fuse/file.c | 13 +++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 5b5f789b37eb6..2b2d1b7555444 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -838,6 +838,12 @@ static int fuse_check_folio(struct folio *folio) return 0; } +/* + * Attempt to steal a page from the splice() pipe and move it into the + * pagecache. If successful, the pointer in @pagep will be updated. The + * folio that was originally in @pagep will lose a reference and the new + * folio returned in @pagep will carry a reference. + */ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) { int err; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7d92a54799985..d63e56fd3dd20 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -955,8 +955,10 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, fuse_invalidate_atime(inode); } - for (i = 0; i < ap->num_folios; i++) + for (i = 0; i < ap->num_folios; i++) { folio_end_read(ap->folios[i], !err); + folio_put(ap->folios[i]); + } if (ia->ff) fuse_file_put(ia->ff, false); @@ -1048,7 +1050,14 @@ static void fuse_readahead(struct readahead_control *rac) ap = &ia->ap; while (ap->num_folios < cur_pages) { - folio = readahead_folio(rac); + /* + * This returns a folio with a ref held on it. + * The ref needs to be held until the request is + * completed, since the splice case (see + * fuse_try_move_page()) drops the ref after it's + * replaced in the page cache. + */ + folio = __readahead_folio(rac); ap->folios[ap->num_folios] = folio; ap->descs[ap->num_folios].length = folio_size(folio); ap->num_folios++; -- GitLab From 362ff1e7c6c20f8d6ebe20682870d471373c608b Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 13 Feb 2025 17:18:25 +0100 Subject: [PATCH 0673/1585] virtio_snd.h: clarify that `controls` depends on VIRTIO_SND_F_CTLS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As defined in the specification, the `controls` field in the configuration space is only valid/present if VIRTIO_SND_F_CTLS is negotiated. From https://docs.oasis-open.org/virtio/virtio/v1.3/virtio-v1.3.html: 5.14.4 Device Configuration Layout ... controls (driver-read-only) indicates a total number of all available control elements if VIRTIO_SND_F_CTLS has been negotiated. Let's use the same style used in virtio_blk.h to clarify this and to avoid confusion as happened in QEMU (see link). Link: https://gitlab.com/qemu-project/qemu/-/issues/2805 Signed-off-by: Stefano Garzarella Acked-by: Eugenio Pérez Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250213161825.139952-1-sgarzare@redhat.com --- include/uapi/linux/virtio_snd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/virtio_snd.h b/include/uapi/linux/virtio_snd.h index 5f4100c2cf04c..a4cfb9f6561ab 100644 --- a/include/uapi/linux/virtio_snd.h +++ b/include/uapi/linux/virtio_snd.h @@ -25,7 +25,7 @@ struct virtio_snd_config { __le32 streams; /* # of available channel maps */ __le32 chmaps; - /* # of available control elements */ + /* # of available control elements (if VIRTIO_SND_F_CTLS) */ __le32 controls; }; -- GitLab From 8789b4296aa796f658a19cac7d27365012893de1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Thu, 5 Dec 2024 10:22:00 +0000 Subject: [PATCH 0674/1585] phy: exynos5-usbdrd: gs101: ensure power is gated to SS phy in phy_exit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently don't gate the power to the SS phy in phy_exit(). Shuffle the code slightly to ensure the power is gated to the SS phy as well. Fixes: 32267c29bc7d ("phy: exynos5-usbdrd: support Exynos USBDRD 3.1 combo phy (HS & SS)") CC: stable@vger.kernel.org # 6.11+ Reviewed-by: Krzysztof Kozlowski Reviewed-by: Peter Griffin Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20241205-gs101-usb-phy-fix-v4-1-0278809fb810@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5-usbdrd.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index 4a108fdab118c..46b8f6987c62c 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -1296,14 +1296,17 @@ static int exynos5_usbdrd_gs101_phy_exit(struct phy *phy) struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst); int ret; + if (inst->phy_cfg->id == EXYNOS5_DRDPHY_UTMI) { + ret = exynos850_usbdrd_phy_exit(phy); + if (ret) + return ret; + } + + exynos5_usbdrd_phy_isol(inst, true); + if (inst->phy_cfg->id != EXYNOS5_DRDPHY_UTMI) return 0; - ret = exynos850_usbdrd_phy_exit(phy); - if (ret) - return ret; - - exynos5_usbdrd_phy_isol(inst, true); return regulator_bulk_disable(phy_drd->drv_data->n_regulators, phy_drd->regulators); } -- GitLab From 5ab90f40121a9f6a9b368274cd92d0f435dc7cfa Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Thu, 23 Jan 2025 12:22:34 -0600 Subject: [PATCH 0675/1585] phy: ti: gmii-sel: Do not use syscon helper to build regmap The syscon helper device_node_to_regmap() is used to fetch a regmap registered to a device node. It also currently creates this regmap if the node did not already have a regmap associated with it. This should only be used on "syscon" nodes. This driver is not such a device and instead uses device_node_to_regmap() on its own node as a hacky way to create a regmap for itself. This will not work going forward and so we should create our regmap the normal way by defining our regmap_config, fetching our memory resource, then using the normal regmap_init_mmio() function. Signed-off-by: Andrew Davis Tested-by: Nishanth Menon Link: https://lore.kernel.org/r/20250123182234.597665-1-afd@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-gmii-sel.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index e0ca59ae31531..ff5d5e29629fa 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -424,6 +424,12 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv) return 0; } +static const struct regmap_config phy_gmii_sel_regmap_cfg = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, +}; + static int phy_gmii_sel_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -468,7 +474,14 @@ static int phy_gmii_sel_probe(struct platform_device *pdev) priv->regmap = syscon_node_to_regmap(node->parent); if (IS_ERR(priv->regmap)) { - priv->regmap = device_node_to_regmap(node); + void __iomem *base; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return dev_err_probe(dev, PTR_ERR(base), + "failed to get base memory resource\n"); + + priv->regmap = regmap_init_mmio(dev, base, &phy_gmii_sel_regmap_cfg); if (IS_ERR(priv->regmap)) return dev_err_probe(dev, PTR_ERR(priv->regmap), "Failed to get syscon\n"); -- GitLab From 55f1a5f7c97c3c92ba469e16991a09274410ceb7 Mon Sep 17 00:00:00 2001 From: BH Hsieh Date: Wed, 22 Jan 2025 18:59:43 +0800 Subject: [PATCH 0676/1585] phy: tegra: xusb: reset VBUS & ID OVERRIDE Observed VBUS_OVERRIDE & ID_OVERRIDE might be programmed with unexpected value prior to XUSB PADCTL driver, this could also occur in virtualization scenario. For example, UEFI firmware programs ID_OVERRIDE=GROUNDED to set a type-c port to host mode and keeps the value to kernel. If the type-c port is connected a usb host, below errors can be observed right after usb host mode driver gets probed. The errors would keep until usb role class driver detects the type-c port as device mode and notifies usb device mode driver to set both ID_OVERRIDE and VBUS_OVERRIDE to correct value by XUSB PADCTL driver. [ 173.765814] usb usb3-port2: Cannot enable. Maybe the USB cable is bad? [ 173.765837] usb usb3-port2: config error Taking virtualization into account, asserting XUSB PADCTL reset would break XUSB functions used by other guest OS, hence only reset VBUS & ID OVERRIDE of the port in utmi_phy_init. Fixes: bbf711682cd5 ("phy: tegra: xusb: Add Tegra186 support") Cc: stable@vger.kernel.org Change-Id: Ic63058d4d49b4a1f8f9ab313196e20ad131cc591 Signed-off-by: BH Hsieh Signed-off-by: Henry Lin Link: https://lore.kernel.org/r/20250122105943.8057-1-henryl@nvidia.com Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb-tegra186.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index 0f60d5d1c1678..fae6242aa730e 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -928,6 +928,7 @@ static int tegra186_utmi_phy_init(struct phy *phy) unsigned int index = lane->index; struct device *dev = padctl->dev; int err; + u32 reg; port = tegra_xusb_find_usb2_port(padctl, index); if (!port) { @@ -935,6 +936,16 @@ static int tegra186_utmi_phy_init(struct phy *phy) return -ENODEV; } + if (port->mode == USB_DR_MODE_OTG || + port->mode == USB_DR_MODE_PERIPHERAL) { + /* reset VBUS&ID OVERRIDE */ + reg = padctl_readl(padctl, USB2_VBUS_ID); + reg &= ~VBUS_OVERRIDE; + reg &= ~ID_OVERRIDE(~0); + reg |= ID_OVERRIDE_FLOATING; + padctl_writel(padctl, reg, USB2_VBUS_ID); + } + if (port->supply && port->mode == USB_DR_MODE_HOST) { err = regulator_enable(port->supply); if (err) { -- GitLab From 5755eb0a8168493fddf63d72e0133de54b3a17dd Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 3 Feb 2025 13:26:31 +1030 Subject: [PATCH 0677/1585] MAINTAINERS: Mark Andrew as M: for ASPEED MACHINE SUPPORT From discussion in [1] and in-person with Joel, flip my entry from R: to M:. Link: https://lore.kernel.org/all/CACPK8Xe8yZLXzEQPp=1D2f0TsKA7hBZG=pHHW6U51FMpp_BiRQ@mail.gmail.com/ [1] Cc: Joel Stanley Cc: Arnd Bergmann Cc: soc@lists.linux.dev Cc: linux-aspeed@lists.ozlabs.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Andrew Jeffery Acked-by: Joel Stanley Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 25c86f47353de..e3920abb7794f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2284,7 +2284,7 @@ F: drivers/irqchip/irq-aspeed-i2c-ic.c ARM/ASPEED MACHINE SUPPORT M: Joel Stanley -R: Andrew Jeffery +M: Andrew Jeffery L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers) S: Supported -- GitLab From f0570fdc8f72fb1a22bfd2bbf030b34a06da21ff Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Sat, 8 Feb 2025 10:22:01 +0100 Subject: [PATCH 0678/1585] MAINTAINERS: arm: apple: Add Janne as maintainer Sven and I have agreed to share the maintainership for the ARM/APPLE platform after Marcan's step down. I'm handling the downstream Asahi Linux tree since April 2024 and worked on or wrote several drivers for the platform. Signed-off-by: Janne Grunau Acked-by: Sven Peter Acked-by: Hector Martin Acked-by: Neal Gompa Link: https://lore.kernel.org/r/20250208-maint-soc-apple-v1-1-a7f7337baec0@jannau.net Signed-off-by: Arnd Bergmann --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e3920abb7794f..04c90ecf616a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2210,6 +2210,7 @@ F: sound/soc/codecs/ssm3515.c ARM/APPLE MACHINE SUPPORT M: Sven Peter +M: Janne Grunau R: Alyssa Rosenzweig L: asahi@lists.linux.dev L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -- GitLab From be6686b823b30a69b1f71bde228ce042c78a1941 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Feb 2025 15:41:43 +0100 Subject: [PATCH 0679/1585] firmware: imx: IMX_SCMI_MISC_DRV should depend on ARCH_MXC The i.MX System Controller Management Interface firmware is only present on Freescale i.MX SoCs. Hence add a dependency on ARCH_MXC, to prevent asking the user about this driver when configuring a kernel without Freescale i.MX platform support. Fixes: 514b2262ade48a05 ("firmware: arm_scmi: Fix i.MX build dependency") Signed-off-by: Geert Uytterhoeven Reviewed-by: Fabio Estevam Signed-off-by: Arnd Bergmann --- drivers/firmware/imx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig index 907cd149c40a8..c964f4924359f 100644 --- a/drivers/firmware/imx/Kconfig +++ b/drivers/firmware/imx/Kconfig @@ -25,6 +25,7 @@ config IMX_SCU config IMX_SCMI_MISC_DRV tristate "IMX SCMI MISC Protocol driver" + depends on ARCH_MXC || COMPILE_TEST default y if ARCH_MXC help The System Controller Management Interface firmware (SCMI FW) is -- GitLab From dd0f05b98925111f4530d7dab774398cdb32e9e3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 14 Feb 2025 09:31:29 +0100 Subject: [PATCH 0680/1585] platform: cznic: CZNIC_PLATFORMS should depend on ARCH_MVEBU CZ.NIC's Turris devices are based on Marvell EBU SoCs. Hence add a dependency on ARCH_MVEBU, to prevent asking the user about these drivers when configuring a kernel that cannot run on an affected CZ.NIC Turris system. Fixes: 992f1a3d4e88498d ("platform: cznic: Add preliminary support for Turris Omnia MCU") Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- drivers/platform/cznic/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/cznic/Kconfig b/drivers/platform/cznic/Kconfig index 49c383eb67854..13e37b49d9d01 100644 --- a/drivers/platform/cznic/Kconfig +++ b/drivers/platform/cznic/Kconfig @@ -6,6 +6,7 @@ menuconfig CZNIC_PLATFORMS bool "Platform support for CZ.NIC's Turris hardware" + depends on ARCH_MVEBU || COMPILE_TEST help Say Y here to be able to choose driver support for CZ.NIC's Turris devices. This option alone does not add any kernel code. -- GitLab From 70b0d6b0a199c5a3ee6c72f5e61681ed6f759612 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Tue, 4 Feb 2025 13:04:18 +0530 Subject: [PATCH 0681/1585] tee: optee: Fix supplicant wait loop OP-TEE supplicant is a user-space daemon and it's possible for it be hung or crashed or killed in the middle of processing an OP-TEE RPC call. It becomes more complicated when there is incorrect shutdown ordering of the supplicant process vs the OP-TEE client application which can eventually lead to system hang-up waiting for the closure of the client application. Allow the client process waiting in kernel for supplicant response to be killed rather than indefinitely waiting in an unkillable state. Also, a normal uninterruptible wait should not have resulted in the hung-task watchdog getting triggered, but the endless loop would. This fixes issues observed during system reboot/shutdown when supplicant got hung for some reason or gets crashed/killed which lead to client getting hung in an unkillable state. It in turn lead to system being in hung up state requiring hard power off/on to recover. Fixes: 4fb0a5eb364d ("tee: add OP-TEE driver") Suggested-by: Arnd Bergmann Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Reviewed-by: Arnd Bergmann Reviewed-by: Jens Wiklander Signed-off-by: Arnd Bergmann --- drivers/tee/optee/supp.c | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c index 322a543b8c278..d0f397c902420 100644 --- a/drivers/tee/optee/supp.c +++ b/drivers/tee/optee/supp.c @@ -80,7 +80,6 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, struct optee *optee = tee_get_drvdata(ctx->teedev); struct optee_supp *supp = &optee->supp; struct optee_supp_req *req; - bool interruptable; u32 ret; /* @@ -111,36 +110,18 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, /* * Wait for supplicant to process and return result, once we've * returned from wait_for_completion(&req->c) successfully we have - * exclusive access again. + * exclusive access again. Allow the wait to be killable such that + * the wait doesn't turn into an indefinite state if the supplicant + * gets hung for some reason. */ - while (wait_for_completion_interruptible(&req->c)) { + if (wait_for_completion_killable(&req->c)) { mutex_lock(&supp->mutex); - interruptable = !supp->ctx; - if (interruptable) { - /* - * There's no supplicant available and since the - * supp->mutex currently is held none can - * become available until the mutex released - * again. - * - * Interrupting an RPC to supplicant is only - * allowed as a way of slightly improving the user - * experience in case the supplicant hasn't been - * started yet. During normal operation the supplicant - * will serve all requests in a timely manner and - * interrupting then wouldn't make sense. - */ - if (req->in_queue) { - list_del(&req->link); - req->in_queue = false; - } + if (req->in_queue) { + list_del(&req->link); + req->in_queue = false; } mutex_unlock(&supp->mutex); - - if (interruptable) { - req->ret = TEEC_ERROR_COMMUNICATION; - break; - } + req->ret = TEEC_ERROR_COMMUNICATION; } ret = req->ret; -- GitLab From b3fefbb30a1691533cb905006b69b2a474660744 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 24 Jan 2025 19:15:23 +0100 Subject: [PATCH 0682/1585] nouveau/svm: fix missing folio unlock + put after make_device_exclusive_range() In case we have to retry the loop, we are missing to unlock+put the folio. In that case, we will keep failing make_device_exclusive_range() because we cannot grab the folio lock, and even return from the function with the folio locked and referenced, effectively never succeeding the make_device_exclusive_range(). While at it, convert the other unlock+put to use a folio as well. This was found by code inspection. Fixes: 8f187163eb89 ("nouveau/svm: implement atomic SVM access") Signed-off-by: David Hildenbrand Reviewed-by: Alistair Popple Tested-by: Alistair Popple Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20250124181524.3584236-2-david@redhat.com --- drivers/gpu/drm/nouveau/nouveau_svm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index b4da82ddbb6b2..8ea98f06d39af 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -590,6 +590,7 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); struct mm_struct *mm = svmm->notifier.mm; + struct folio *folio; struct page *page; unsigned long start = args->p.addr; unsigned long notifier_seq; @@ -616,12 +617,16 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, ret = -EINVAL; goto out; } + folio = page_folio(page); mutex_lock(&svmm->mutex); if (!mmu_interval_read_retry(¬ifier->notifier, notifier_seq)) break; mutex_unlock(&svmm->mutex); + + folio_unlock(folio); + folio_put(folio); } /* Map the page on the GPU. */ @@ -637,8 +642,8 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); mutex_unlock(&svmm->mutex); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); out: mmu_interval_notifier_remove(¬ifier->notifier); -- GitLab From 80e648042e512d5a767da251d44132553fe04ae0 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 14 Feb 2025 02:39:50 +0100 Subject: [PATCH 0683/1585] partitions: mac: fix handling of bogus partition table Fix several issues in partition probing: - The bailout for a bad partoffset must use put_dev_sector(), since the preceding read_part_sector() succeeded. - If the partition table claims a silly sector size like 0xfff bytes (which results in partition table entries straddling sector boundaries), bail out instead of accessing out-of-bounds memory. - We must not assume that the partition table contains proper NUL termination - use strnlen() and strncmp() instead of strlen() and strcmp(). Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20250214-partition-mac-v1-1-c1c626dffbd5@google.com Signed-off-by: Jens Axboe --- block/partitions/mac.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/block/partitions/mac.c b/block/partitions/mac.c index c80183156d680..b02530d986297 100644 --- a/block/partitions/mac.c +++ b/block/partitions/mac.c @@ -53,13 +53,25 @@ int mac_partition(struct parsed_partitions *state) } secsize = be16_to_cpu(md->block_size); put_dev_sector(sect); + + /* + * If the "block size" is not a power of 2, things get weird - we might + * end up with a partition straddling a sector boundary, so we wouldn't + * be able to read a partition entry with read_part_sector(). + * Real block sizes are probably (?) powers of two, so just require + * that. + */ + if (!is_power_of_2(secsize)) + return -1; datasize = round_down(secsize, 512); data = read_part_sector(state, datasize / 512, §); if (!data) return -1; partoffset = secsize % 512; - if (partoffset + sizeof(*part) > datasize) + if (partoffset + sizeof(*part) > datasize) { + put_dev_sector(sect); return -1; + } part = (struct mac_partition *) (data + partoffset); if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) { put_dev_sector(sect); @@ -112,8 +124,8 @@ int mac_partition(struct parsed_partitions *state) int i, l; goodness++; - l = strlen(part->name); - if (strcmp(part->name, "/") == 0) + l = strnlen(part->name, sizeof(part->name)); + if (strncmp(part->name, "/", sizeof(part->name)) == 0) goodness++; for (i = 0; i <= l - 4; ++i) { if (strncasecmp(part->name + i, "root", -- GitLab From 8221fd1a73044adef712a5c9346a23c2447f629c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Feb 2025 16:43:49 +0000 Subject: [PATCH 0684/1585] workqueue: Log additional details when rejecting work Syzbot regularly runs into the following warning on arm64: | WARNING: CPU: 1 PID: 6023 at kernel/workqueue.c:2257 current_wq_worker kernel/workqueue_internal.h:69 [inline] | WARNING: CPU: 1 PID: 6023 at kernel/workqueue.c:2257 is_chained_work kernel/workqueue.c:2199 [inline] | WARNING: CPU: 1 PID: 6023 at kernel/workqueue.c:2257 __queue_work+0xe50/0x1308 kernel/workqueue.c:2256 | Modules linked in: | CPU: 1 UID: 0 PID: 6023 Comm: klogd Not tainted 6.13.0-rc2-syzkaller-g2e7aff49b5da #0 | Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 | pstate: 404000c5 (nZcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : __queue_work+0xe50/0x1308 kernel/workqueue_internal.h:69 | lr : current_wq_worker kernel/workqueue_internal.h:69 [inline] | lr : is_chained_work kernel/workqueue.c:2199 [inline] | lr : __queue_work+0xe50/0x1308 kernel/workqueue.c:2256 [...] | __queue_work+0xe50/0x1308 kernel/workqueue.c:2256 (L) | delayed_work_timer_fn+0x74/0x90 kernel/workqueue.c:2485 | call_timer_fn+0x1b4/0x8b8 kernel/time/timer.c:1793 | expire_timers kernel/time/timer.c:1839 [inline] | __run_timers kernel/time/timer.c:2418 [inline] | __run_timer_base+0x59c/0x7b4 kernel/time/timer.c:2430 | run_timer_base kernel/time/timer.c:2439 [inline] | run_timer_softirq+0xcc/0x194 kernel/time/timer.c:2449 The warning is probably because we are trying to queue work into a destroyed workqueue, but the softirq context makes it hard to pinpoint the problematic caller. Extend the warning diagnostics to print both the function we are trying to queue as well as the name of the workqueue. Cc: Tejun Heo Cc: Catalin Marinas Cc: Marc Zyngier Cc: Lai Jiangshan Link: https://syzkaller.appspot.com/bug?extid=e13e654d315d4da1277c Signed-off-by: Will Deacon Signed-off-by: Tejun Heo --- kernel/workqueue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ccad33001c58c..902df3253598c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2254,8 +2254,10 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, * queues a new work item to a wq after destroy_workqueue(wq). */ if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) && - WARN_ON_ONCE(!is_chained_work(wq)))) + WARN_ONCE(!is_chained_work(wq), "workqueue: cannot queue %ps on wq %s\n", + work->func, wq->name))) { return; + } rcu_read_lock(); retry: /* pwq which will be used unless @work is executing elsewhere */ -- GitLab From 9ba0e1755a40f9920ad0f4168031291b3eb58d7b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 13 Feb 2025 13:19:57 -0500 Subject: [PATCH 0685/1585] ring-buffer: Unlock resize on mmap error Memory mapping the tracing ring buffer will disable resizing the buffer. But if there's an error in the memory mapping like an invalid parameter, the function exits out without re-enabling the resizing of the ring buffer, preventing the ring buffer from being resized after that. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Link: https://lore.kernel.org/20250213131957.530ec3c5@gandalf.local.home Fixes: 117c39200d9d7 ("ring-buffer: Introducing ring-buffer mapping functions") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b8e0ae15ca5b6..07b421115692c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -7126,6 +7126,7 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu, kfree(cpu_buffer->subbuf_ids); cpu_buffer->subbuf_ids = NULL; rb_free_meta_page(cpu_buffer); + atomic_dec(&cpu_buffer->resize_disabled); } unlock: -- GitLab From 60b8f711143de7cd9c0f55be0fe7eb94b19eb5c7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 13 Feb 2025 13:41:32 -0500 Subject: [PATCH 0686/1585] tracing: Have the error of __tracing_resize_ring_buffer() passed to user Currently if __tracing_resize_ring_buffer() returns an error, the tracing_resize_ringbuffer() returns -ENOMEM. But it may not be a memory issue that caused the function to fail. If the ring buffer is memory mapped, then the resizing of the ring buffer will be disabled. But if the user tries to resize the buffer, it will get an -ENOMEM returned, which is confusing because there is plenty of memory. The actual error returned was -EBUSY, which would make much more sense to the user. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Cc: Vincent Donnefort Link: https://lore.kernel.org/20250213134132.7e4505d7@gandalf.local.home Fixes: 117c39200d9d7 ("ring-buffer: Introducing ring-buffer mapping functions") Signed-off-by: Steven Rostedt (Google) Reviewed-by: Masami Hiramatsu (Google) --- kernel/trace/trace.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1496a5ac33ae1..25ff37aab00f1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5977,8 +5977,6 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id) { - int ret; - guard(mutex)(&trace_types_lock); if (cpu_id != RING_BUFFER_ALL_CPUS) { @@ -5987,11 +5985,7 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr, return -EINVAL; } - ret = __tracing_resize_ring_buffer(tr, size, cpu_id); - if (ret < 0) - ret = -ENOMEM; - - return ret; + return __tracing_resize_ring_buffer(tr, size, cpu_id); } static void update_last_data(struct trace_array *tr) -- GitLab From f5b95f1fa2ef3a03f49eeec658ba97e721412b32 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Feb 2025 10:28:20 -0500 Subject: [PATCH 0687/1585] ring-buffer: Validate the persistent meta data subbuf array The meta data for a mapped ring buffer contains an array of indexes of all the subbuffers. The first entry is the reader page, and the rest of the entries lay out the order of the subbuffers in how the ring buffer link list is to be created. The validator currently makes sure that all the entries are within the range of 0 and nr_subbufs. But it does not check if there are any duplicates. While working on the ring buffer, I corrupted this array, where I added duplicates. The validator did not catch it and created the ring buffer link list on top of it. Luckily, the corruption was only that the reader page was also in the writer path and only presented corrupted data but did not crash the kernel. But if there were duplicates in the writer side, then it could corrupt the ring buffer link list and cause a crash. Create a bitmask array with the size of the number of subbuffers. Then clear it. When walking through the subbuf array checking to see if the entries are within the range, test if its bit is already set in the subbuf_mask. If it is, then there is duplicates and fail the validation. If not, set the corresponding bit and continue. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Link: https://lore.kernel.org/20250214102820.7509ddea@gandalf.local.home Fixes: c76883f18e59b ("ring-buffer: Add test if range of boot buffer is valid") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 07b421115692c..0419d41a20604 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1672,7 +1672,8 @@ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx) * must be the same. */ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, - struct trace_buffer *buffer, int nr_pages) + struct trace_buffer *buffer, int nr_pages, + unsigned long *subbuf_mask) { int subbuf_size = PAGE_SIZE; struct buffer_data_page *subbuf; @@ -1680,6 +1681,9 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, unsigned long buffers_end; int i; + if (!subbuf_mask) + return false; + /* Check the meta magic and meta struct size */ if (meta->magic != RING_BUFFER_META_MAGIC || meta->struct_size != sizeof(*meta)) { @@ -1712,6 +1716,8 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, subbuf = rb_subbufs_from_meta(meta); + bitmap_clear(subbuf_mask, 0, meta->nr_subbufs); + /* Is the meta buffers and the subbufs themselves have correct data? */ for (i = 0; i < meta->nr_subbufs; i++) { if (meta->buffers[i] < 0 || @@ -1725,6 +1731,12 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, return false; } + if (test_bit(meta->buffers[i], subbuf_mask)) { + pr_info("Ring buffer boot meta [%d] array has duplicates\n", cpu); + return false; + } + + set_bit(meta->buffers[i], subbuf_mask); subbuf = (void *)subbuf + subbuf_size; } @@ -1889,17 +1901,22 @@ static void rb_meta_init_text_addr(struct ring_buffer_meta *meta) static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) { struct ring_buffer_meta *meta; + unsigned long *subbuf_mask; unsigned long delta; void *subbuf; int cpu; int i; + /* Create a mask to test the subbuf array */ + subbuf_mask = bitmap_alloc(nr_pages + 1, GFP_KERNEL); + /* If subbuf_mask fails to allocate, then rb_meta_valid() will return false */ + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { void *next_meta; meta = rb_range_meta(buffer, nr_pages, cpu); - if (rb_meta_valid(meta, cpu, buffer, nr_pages)) { + if (rb_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) { /* Make the mappings match the current address */ subbuf = rb_subbufs_from_meta(meta); delta = (unsigned long)subbuf - meta->first_buffer; @@ -1943,6 +1960,7 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) subbuf += meta->subbuf_size; } } + bitmap_free(subbuf_mask); } static void *rbm_start(struct seq_file *m, loff_t *pos) -- GitLab From 41758630dd7ea9dce3eb152168c979534b415ab0 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Wed, 12 Feb 2025 19:25:34 +0800 Subject: [PATCH 0688/1585] dt-bindings: mtd: cadence: document required clock-names The clock-names property is required because the driver requests the clock by name and not the index. Update the example to use &clk instead of &nf_clk for the clocks property to avoid confusion with the clock-names property "nf_clk". Fixes: 1f05f823a16c (dt-bindings: mtd: cadence: convert cadence-nand-controller.txt to yaml) Signed-off-by: Niravkumar L Rabara Reviewed-by: Krzysztof Kozlowski Signed-off-by: Miquel Raynal --- Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml b/Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml index 0bed37a994c38..e1f4d7c35a885 100644 --- a/Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml +++ b/Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml @@ -33,6 +33,10 @@ properties: clocks: maxItems: 1 + clock-names: + items: + - const: nf_clk + dmas: maxItems: 1 @@ -51,6 +55,7 @@ required: - reg-names - interrupts - clocks + - clock-names unevaluatedProperties: false @@ -66,7 +71,8 @@ examples: #address-cells = <1>; #size-cells = <0>; interrupts = ; - clocks = <&nf_clk>; + clocks = <&clk>; + clock-names = "nf_clk"; cdns,board-delay-ps = <4830>; nand@0 { -- GitLab From 77b823fa619f97d16409ca37ad4f7936e28c5f83 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Tue, 4 Feb 2025 23:35:22 +0100 Subject: [PATCH 0689/1585] alpha: replace hardcoded stack offsets with autogenerated ones This allows the assembly in entry.S to automatically keep in sync with changes in the stack layout (struct pt_regs and struct switch_stack). Cc: stable@vger.kernel.org Tested-by: Maciej W. Rozycki Tested-by: Matt Turner Reviewed-by: Maciej W. Rozycki Signed-off-by: Ivan Kokshaysky Signed-off-by: Matt Turner --- arch/alpha/kernel/asm-offsets.c | 4 ++++ arch/alpha/kernel/entry.S | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c index 4cfeae42c79ac..e9dad60b147f3 100644 --- a/arch/alpha/kernel/asm-offsets.c +++ b/arch/alpha/kernel/asm-offsets.c @@ -19,9 +19,13 @@ static void __used foo(void) DEFINE(TI_STATUS, offsetof(struct thread_info, status)); BLANK(); + DEFINE(SP_OFF, offsetof(struct pt_regs, ps)); DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs)); BLANK(); + DEFINE(SWITCH_STACK_SIZE, sizeof(struct switch_stack)); + BLANK(); + DEFINE(HAE_CACHE, offsetof(struct alpha_machine_vector, hae_cache)); DEFINE(HAE_REG, offsetof(struct alpha_machine_vector, hae_register)); } diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index dd26062d75b3c..6fb38365539d4 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -15,10 +15,6 @@ .set noat .cfi_sections .debug_frame -/* Stack offsets. */ -#define SP_OFF 184 -#define SWITCH_STACK_SIZE 64 - .macro CFI_START_OSF_FRAME func .align 4 .globl \func -- GitLab From 0a0f7362b0367634a2d5cb7c96226afc116f19c9 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Tue, 4 Feb 2025 23:35:23 +0100 Subject: [PATCH 0690/1585] alpha: make stack 16-byte aligned (most cases) The problem is that GCC expects 16-byte alignment of the incoming stack since early 2004, as Maciej found out [1]: Having actually dug speculatively I can see that the psABI was changed in GCC 3.5 with commit e5e10fb4a350 ("re PR target/14539 (128-bit long double improperly aligned)") back in Mar 2004, when the stack pointer alignment was increased from 8 bytes to 16 bytes, and arch/alpha/kernel/entry.S has various suspicious stack pointer adjustments, starting with SP_OFF which is not a whole multiple of 16. Also, as Magnus noted, "ALPHA Calling Standard" [2] required the same: D.3.1 Stack Alignment This standard requires that stacks be octaword aligned at the time a new procedure is invoked. However: - the "normal" kernel stack is always misaligned by 8 bytes, thanks to the odd number of 64-bit words in 'struct pt_regs', which is the very first thing pushed onto the kernel thread stack; - syscall, fault, interrupt etc. handlers may, or may not, receive aligned stack depending on numerous factors. Somehow we got away with it until recently, when we ended up with a stack corruption in kernel/smp.c:smp_call_function_single() due to its use of 32-byte aligned local data and the compiler doing clever things allocating it on the stack. This adds padding between the PAL-saved and kernel-saved registers so that 'struct pt_regs' have an even number of 64-bit words. This makes the stack properly aligned for most of the kernel code, except two handlers which need special threatment. Note: struct pt_regs doesn't belong in uapi/asm; this should be fixed, but let's put this off until later. Link: https://lore.kernel.org/rcu/alpine.DEB.2.21.2501130248010.18889@angie.orcam.me.uk/ [1] Link: https://bitsavers.org/pdf/dec/alpha/Alpha_Calling_Standard_Rev_2.0_19900427.pdf [2] Cc: stable@vger.kernel.org Tested-by: Maciej W. Rozycki Tested-by: Magnus Lindholm Tested-by: Matt Turner Reviewed-by: Maciej W. Rozycki Signed-off-by: Ivan Kokshaysky Signed-off-by: Matt Turner --- arch/alpha/include/uapi/asm/ptrace.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/alpha/include/uapi/asm/ptrace.h b/arch/alpha/include/uapi/asm/ptrace.h index 5ca45934fcbb8..72ed913a910f2 100644 --- a/arch/alpha/include/uapi/asm/ptrace.h +++ b/arch/alpha/include/uapi/asm/ptrace.h @@ -42,6 +42,8 @@ struct pt_regs { unsigned long trap_a0; unsigned long trap_a1; unsigned long trap_a2; +/* This makes the stack 16-byte aligned as GCC expects */ + unsigned long __pad0; /* These are saved by PAL-code: */ unsigned long ps; unsigned long pc; -- GitLab From 3b35a171060f846b08b48646b38c30b5d57d17ff Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Tue, 4 Feb 2025 23:35:24 +0100 Subject: [PATCH 0691/1585] alpha: align stack for page fault and user unaligned trap handlers do_page_fault() and do_entUna() are special because they use non-standard stack frame layout. Fix them manually. Cc: stable@vger.kernel.org Tested-by: Maciej W. Rozycki Tested-by: Magnus Lindholm Tested-by: Matt Turner Reviewed-by: Maciej W. Rozycki Suggested-by: Maciej W. Rozycki Signed-off-by: Ivan Kokshaysky Signed-off-by: Matt Turner --- arch/alpha/kernel/entry.S | 20 ++++++++++---------- arch/alpha/kernel/traps.c | 2 +- arch/alpha/mm/fault.c | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 6fb38365539d4..f4d41b4538c2e 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -194,8 +194,8 @@ CFI_END_OSF_FRAME entArith CFI_START_OSF_FRAME entMM SAVE_ALL /* save $9 - $15 so the inline exception code can manipulate them. */ - subq $sp, 56, $sp - .cfi_adjust_cfa_offset 56 + subq $sp, 64, $sp + .cfi_adjust_cfa_offset 64 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -210,7 +210,7 @@ CFI_START_OSF_FRAME entMM .cfi_rel_offset $13, 32 .cfi_rel_offset $14, 40 .cfi_rel_offset $15, 48 - addq $sp, 56, $19 + addq $sp, 64, $19 /* handle the fault */ lda $8, 0x3fff bic $sp, $8, $8 @@ -223,7 +223,7 @@ CFI_START_OSF_FRAME entMM ldq $13, 32($sp) ldq $14, 40($sp) ldq $15, 48($sp) - addq $sp, 56, $sp + addq $sp, 64, $sp .cfi_restore $9 .cfi_restore $10 .cfi_restore $11 @@ -231,7 +231,7 @@ CFI_START_OSF_FRAME entMM .cfi_restore $13 .cfi_restore $14 .cfi_restore $15 - .cfi_adjust_cfa_offset -56 + .cfi_adjust_cfa_offset -64 /* finish up the syscall as normal. */ br ret_from_sys_call CFI_END_OSF_FRAME entMM @@ -378,8 +378,8 @@ entUnaUser: .cfi_restore $0 .cfi_adjust_cfa_offset -256 SAVE_ALL /* setup normal kernel stack */ - lda $sp, -56($sp) - .cfi_adjust_cfa_offset 56 + lda $sp, -64($sp) + .cfi_adjust_cfa_offset 64 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -395,7 +395,7 @@ entUnaUser: .cfi_rel_offset $14, 40 .cfi_rel_offset $15, 48 lda $8, 0x3fff - addq $sp, 56, $19 + addq $sp, 64, $19 bic $sp, $8, $8 jsr $26, do_entUnaUser ldq $9, 0($sp) @@ -405,7 +405,7 @@ entUnaUser: ldq $13, 32($sp) ldq $14, 40($sp) ldq $15, 48($sp) - lda $sp, 56($sp) + lda $sp, 64($sp) .cfi_restore $9 .cfi_restore $10 .cfi_restore $11 @@ -413,7 +413,7 @@ entUnaUser: .cfi_restore $13 .cfi_restore $14 .cfi_restore $15 - .cfi_adjust_cfa_offset -56 + .cfi_adjust_cfa_offset -64 br ret_from_sys_call CFI_END_OSF_FRAME entUna diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index a9a38c80c4a7a..7004397937cfd 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -649,7 +649,7 @@ s_reg_to_mem (unsigned long s_reg) static int unauser_reg_offsets[32] = { R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8), /* r9 ... r15 are stored in front of regs. */ - -56, -48, -40, -32, -24, -16, -8, + -64, -56, -48, -40, -32, -24, -16, /* padding at -8 */ R(r16), R(r17), R(r18), R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26), R(r27), R(r28), R(gp), diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 8c9850437e674..a9816bbc9f34d 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -78,8 +78,8 @@ __load_new_mm_context(struct mm_struct *next_mm) /* Macro for exception fixup code to access integer registers. */ #define dpf_reg(r) \ - (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ - (r) <= 18 ? (r)+10 : (r)-10]) + (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-17 : \ + (r) <= 18 ? (r)+11 : (r)-10]) asmlinkage void do_page_fault(unsigned long address, unsigned long mmcsr, -- GitLab From 757f051a506198186d796dff4ba696adb7bda54c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 7 Jan 2025 11:43:42 +0100 Subject: [PATCH 0692/1585] alpha: Replace one-element array with flexible array member Replace the deprecated one-element array with a modern flexible array member in the struct crb_struct. Reviewed-by: Kees Cook Signed-off-by: Thorsten Blum Signed-off-by: Matt Turner --- arch/alpha/include/asm/hwrpb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/hwrpb.h b/arch/alpha/include/asm/hwrpb.h index fc76f36265ad1..db831cf8de108 100644 --- a/arch/alpha/include/asm/hwrpb.h +++ b/arch/alpha/include/asm/hwrpb.h @@ -135,7 +135,7 @@ struct crb_struct { /* virtual->physical map */ unsigned long map_entries; unsigned long map_pages; - struct vf_map_struct map[1]; + struct vf_map_struct map[]; }; struct memclust_struct { -- GitLab From 1523226edda566057bdd3264ceb56631ddf5f6f7 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 12 Feb 2025 12:14:47 +0100 Subject: [PATCH 0693/1585] alpha: Use str_yes_no() helper in pci_dac_dma_supported() Remove hard-coded strings by using the str_yes_no() helper function. Reviewed-by: Geert Uytterhoeven Signed-off-by: Thorsten Blum Signed-off-by: Matt Turner --- arch/alpha/kernel/pci_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 681f56089d9ce..dc91de50f906d 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -212,7 +213,7 @@ static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask) /* If both conditions above are met, we are fine. */ DBGA("pci_dac_dma_supported %s from %ps\n", - ok ? "yes" : "no", __builtin_return_address(0)); + str_yes_no(ok), __builtin_return_address(0)); return ok; } -- GitLab From c158647c107358bf1be579f98e4bb705c1953292 Mon Sep 17 00:00:00 2001 From: Komal Bajaj Date: Tue, 19 Nov 2024 12:16:08 +0530 Subject: [PATCH 0694/1585] EDAC/qcom: Correct interrupt enable register configuration The previous implementation incorrectly configured the cmn_interrupt_2_enable register for interrupt handling. Using cmn_interrupt_2_enable to configure Tag, Data RAM ECC interrupts would lead to issues like double handling of the interrupts (EL1 and EL3) as cmn_interrupt_2_enable is meant to be configured for interrupts which needs to be handled by EL3. EL1 LLCC EDAC driver needs to use cmn_interrupt_0_enable register to configure Tag, Data RAM ECC interrupts instead of cmn_interrupt_2_enable. Fixes: 27450653f1db ("drivers: edac: Add EDAC driver support for QCOM SoCs") Signed-off-by: Komal Bajaj Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Manivannan Sadhasivam Cc: Link: https://lore.kernel.org/r/20241119064608.12326-1-quic_kbajaj@quicinc.com --- drivers/edac/qcom_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c index 04c42c83a2bad..f3da9385ca0d8 100644 --- a/drivers/edac/qcom_edac.c +++ b/drivers/edac/qcom_edac.c @@ -95,7 +95,7 @@ static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_b * Configure interrupt enable registers such that Tag, Data RAM related * interrupts are propagated to interrupt controller for servicing */ - ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable, TRP0_INTERRUPT_ENABLE, TRP0_INTERRUPT_ENABLE); if (ret) @@ -113,7 +113,7 @@ static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_b if (ret) return ret; - ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable, DRP0_INTERRUPT_ENABLE, DRP0_INTERRUPT_ENABLE); if (ret) -- GitLab From fb8179ce2996bffaa36a04e2b6262843b01b7565 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 4 Nov 2024 13:03:13 -0600 Subject: [PATCH 0695/1585] riscv: cacheinfo: Use of_property_present() for non-boolean properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The use of of_property_read_bool() for non-boolean properties is deprecated in favor of of_property_present() when testing for property presence. Signed-off-by: Rob Herring (Arm) Reviewed-by: Clément Léger Cc: stable@vger.kernel.org Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code") Link: https://lore.kernel.org/r/20241104190314.270095-1-robh@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cacheinfo.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 2d40736fc37ce..26b085dbdd073 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -108,11 +108,11 @@ int populate_cache_leaves(unsigned int cpu) if (!np) return -ENOENT; - if (of_property_read_bool(np, "cache-size")) + if (of_property_present(np, "cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) + if (of_property_present(np, "i-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) + if (of_property_present(np, "d-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); prev = np; @@ -125,11 +125,11 @@ int populate_cache_leaves(unsigned int cpu) break; if (level <= levels) break; - if (of_property_read_bool(np, "cache-size")) + if (of_property_present(np, "cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) + if (of_property_present(np, "i-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) + if (of_property_present(np, "d-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); levels = level; } -- GitLab From c6ec1e1b078d8e2ecd075e46db6197a14930a3fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Mon, 10 Feb 2025 16:56:14 +0100 Subject: [PATCH 0696/1585] riscv: cpufeature: use bitmap_equal() instead of memcmp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Comparison of bitmaps should be done using bitmap_equal(), not memcmp(), use the former one to compare isa bitmaps. Signed-off-by: Clément Léger Fixes: 625034abd52a8c ("riscv: add ISA extensions validation callback") Reviewed-by: Alexandre Ghiti Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250210155615.1545738-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c6ba750536c32..40ac72e407b68 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -479,7 +479,7 @@ static void __init riscv_resolve_isa(unsigned long *source_isa, if (bit < RISCV_ISA_EXT_BASE) *this_hwcap |= isa2hwcap[bit]; } - } while (loop && memcmp(prev_resolved_isa, resolved_isa, sizeof(prev_resolved_isa))); + } while (loop && !bitmap_equal(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX)); } static void __init match_isa_ext(const char *name, const char *name_end, unsigned long *bitmap) -- GitLab From 1898300abf3508bca152e65b36cce5bf93d7e63e Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Thu, 30 Jan 2025 10:25:38 +0100 Subject: [PATCH 0697/1585] riscv/atomic: Do proper sign extension also for unsigned in arch_cmpxchg Sign extend also an unsigned compare value to match what lr.w is doing. Otherwise try_cmpxchg may spuriously return true when used on a u32 value that has the sign bit set, as it happens often in inode_set_ctime_current. Do this in three conversion steps. The first conversion to long is needed to avoid a -Wpointer-to-int-cast warning when arch_cmpxchg is used with a pointer type. Then convert to int and back to long to always sign extend the 32-bit value to 64-bit. Fixes: 6c58f25e6938 ("riscv/atomic: Fix sign extension for RV64I") Signed-off-by: Andreas Schwab Reviewed-by: Alexandre Ghiti Reviewed-by: Andrew Jones Tested-by: Xi Ruoyao Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/mvmed0k4prh.fsf@suse.de Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 4cadc56220fea..427c41dde6431 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -231,7 +231,7 @@ __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ sc_prepend, sc_append, \ cas_prepend, cas_append, \ - __ret, __ptr, (long), __old, __new); \ + __ret, __ptr, (long)(int)(long), __old, __new); \ break; \ case 8: \ __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ -- GitLab From 599c44cd21f4967774e0acf58f734009be4aea9a Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 3 Feb 2025 11:06:00 +0100 Subject: [PATCH 0698/1585] riscv/futex: sign extend compare value in atomic cmpxchg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the compare value in the lr/sc loop is sign extended to match what lr.w does. Fortunately, due to the compiler keeping the register contents sign extended anyway the lack of the explicit extension didn't result in wrong code so far, but this cannot be relied upon. Fixes: b90edb33010b ("RISC-V: Add futex support.") Signed-off-by: Andreas Schwab Reviewed-by: Alexandre Ghiti Reviewed-by: Björn Töpel Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/mvmfrkv2vhz.fsf@suse.de Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/futex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h index 72be100afa236..90c86b115e008 100644 --- a/arch/riscv/include/asm/futex.h +++ b/arch/riscv/include/asm/futex.h @@ -93,7 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %[r]) \ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %[r]) \ : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp) - : [ov] "Jr" (oldval), [nv] "Jr" (newval) + : [ov] "Jr" ((long)(int)oldval), [nv] "Jr" (newval) : "memory"); __disable_user_access(); -- GitLab From 713e788c0e07e185fd44dd581f74855ef149722f Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 14 Jan 2025 17:07:21 +0000 Subject: [PATCH 0699/1585] rseq/selftests: Fix riscv rseq_offset_deref_addv inline asm When working on OpenRISC support for restartable sequences I noticed and fixed these two issues with the riscv support bits. 1 The 'inc' argument to RSEQ_ASM_OP_R_DEREF_ADDV was being implicitly passed to the macro. Fix this by adding 'inc' to the list of macro arguments. 2 The inline asm input constraints for 'inc' and 'off' use "er", The riscv gcc port does not have an "e" constraint, this looks to be copied from the x86 port. Fix this by just using an "r" constraint. I have compile tested this only for riscv. However, the same fixes I use in the OpenRISC rseq selftests and everything passes with no issues. Fixes: 171586a6ab66 ("selftests/rseq: riscv: Template memory ordering and percpu access mode") Signed-off-by: Stafford Horne Tested-by: Charlie Jenkins Reviewed-by: Charlie Jenkins Reviewed-by: Mathieu Desnoyers Acked-by: Shuah Khan Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250114170721.3613280-1-shorne@gmail.com Signed-off-by: Palmer Dabbelt --- tools/testing/selftests/rseq/rseq-riscv-bits.h | 6 +++--- tools/testing/selftests/rseq/rseq-riscv.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-riscv-bits.h b/tools/testing/selftests/rseq/rseq-riscv-bits.h index de31a0143139b..f02f411d550d1 100644 --- a/tools/testing/selftests/rseq/rseq-riscv-bits.h +++ b/tools/testing/selftests/rseq/rseq-riscv-bits.h @@ -243,7 +243,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") #endif - RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3) + RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3) RSEQ_INJECT_ASM(4) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ @@ -251,8 +251,8 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [ptr] "r" (ptr), - [off] "er" (off), - [inc] "er" (inc) + [off] "r" (off), + [inc] "r" (inc) RSEQ_INJECT_INPUT : "memory", RSEQ_ASM_TMP_REG_1 RSEQ_INJECT_CLOBBER diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index 37e598d0a365e..67d544aaa9a3b 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -158,7 +158,7 @@ do { \ "bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \ "333:\n" -#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \ +#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label) \ "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \ RSEQ_ASM_OP_R_ADD(off) \ REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \ -- GitLab From aa49bc2ca8524186ceb0811c23a7f00c3dea6987 Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Fri, 20 Dec 2024 16:39:23 +0800 Subject: [PATCH 0700/1585] riscv: signal: fix signal frame size The signal context of certain RISC-V extensions will be appended after struct __riscv_extra_ext_header, which already includes an empty context header. Therefore, there is no need to preserve a separate hdr for the END of signal context. Fixes: 8ee0b41898fa ("riscv: signal: Add sigcontext save/restore for vector") Signed-off-by: Yong-Xuan Wang Reviewed-by: Zong Li Reviewed-by: Andy Chiu Reviewed-by: Alexandre Ghiti Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20241220083926.19453-2-yongxuan.wang@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/signal.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 94e905eea1dee..08378fea3a111 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -215,12 +215,6 @@ static size_t get_rt_frame_size(bool cal_all) if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) total_context_size += riscv_v_sc_size; } - /* - * Preserved a __riscv_ctx_hdr for END signal context header if an - * extension uses __riscv_extra_ext_header - */ - if (total_context_size) - total_context_size += sizeof(struct __riscv_ctx_hdr); frame_size += total_context_size; -- GitLab From 564fc8eb6f78e01292ff10801f318feae6153fdd Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Fri, 20 Dec 2024 16:39:24 +0800 Subject: [PATCH 0701/1585] riscv: signal: fix signal_minsigstksz The init_rt_signal_env() funciton is called before the alternative patch is applied, so using the alternative-related API to check the availability of an extension within this function doesn't have the intended effect. This patch reorders the init_rt_signal_env() and apply_boot_alternatives() to get the correct signal_minsigstksz. Fixes: e92f469b0771 ("riscv: signal: Report signal frame size to userspace via auxv") Signed-off-by: Yong-Xuan Wang Reviewed-by: Zong Li Reviewed-by: Andy Chiu Reviewed-by: Alexandre Ghiti Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20241220083926.19453-3-yongxuan.wang@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index f1793630fc518..4fe45daa6281e 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -322,8 +322,8 @@ void __init setup_arch(char **cmdline_p) riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); - init_rt_signal_env(); apply_boot_alternatives(); + init_rt_signal_env(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) -- GitLab From 245aece3750d3692ae7a44516c1096936bded7ab Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 12 Dec 2024 14:11:34 +0100 Subject: [PATCH 0702/1585] MAINTAINERS: Add myself as a riscv reviewer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The goal is for me to get a kernel.org account and then send pull requests in order to relieve some pressure from Palmer and make our workflow smoother. Signed-off-by: Alexandre Ghiti Enthusiastically-Supported-by: Björn Töpel Acked-by: Conor Dooley Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20241212131134.288819-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa0654..ebc3e39dbcaab 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20280,6 +20280,7 @@ RISC-V ARCHITECTURE M: Paul Walmsley M: Palmer Dabbelt M: Albert Ou +R: Alexandre Ghiti L: linux-riscv@lists.infradead.org S: Supported Q: https://patchwork.kernel.org/project/linux-riscv/list/ -- GitLab From 054e61bb1de4fa02d148344152007facbcb28583 Mon Sep 17 00:00:00 2001 From: Jeroen de Borst Date: Thu, 13 Feb 2025 10:45:23 -0800 Subject: [PATCH 0703/1585] gve: Update MAINTAINERS Updating MAINTAINERS to include active contributers. Signed-off-by: Jeroen de Borst Link: https://patch.msgid.link/20250213184523.2002582-1-jeroendb@google.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 10893c91b1c10..988b0ff94fda9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9834,8 +9834,8 @@ F: drivers/input/touchscreen/goodix* GOOGLE ETHERNET DRIVERS M: Jeroen de Borst -M: Praveen Kaligineedi -R: Shailend Chand +M: Joshua Washington +M: Harshitha Ramamurthy L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/device_drivers/ethernet/google/gve.rst -- GitLab From 0d1fac6d26aff5df21bb4ec980d9b7a11c410b96 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 12 Feb 2025 12:15:35 +0100 Subject: [PATCH 0704/1585] net: wwan: mhi_wwan_mbim: Silence sequence number glitch errors When using the Qualcomm X55 modem on the ThinkPad X13s, the kernel log is constantly being filled with errors related to a "sequence number glitch", e.g.: [ 1903.284538] sequence number glitch prev=16 curr=0 [ 1913.812205] sequence number glitch prev=50 curr=0 [ 1923.698219] sequence number glitch prev=142 curr=0 [ 2029.248276] sequence number glitch prev=1555 curr=0 [ 2046.333059] sequence number glitch prev=70 curr=0 [ 2076.520067] sequence number glitch prev=272 curr=0 [ 2158.704202] sequence number glitch prev=2655 curr=0 [ 2218.530776] sequence number glitch prev=2349 curr=0 [ 2225.579092] sequence number glitch prev=6 curr=0 Internet connectivity is working fine, so this error seems harmless. It looks like modem does not preserve the sequence number when entering low power state; the amount of errors depends on how actively the modem is being used. A similar issue has also been seen on USB-based MBIM modems [1]. However, in cdc_ncm.c the "sequence number glitch" message is a debug message instead of an error. Apply the same to the mhi_wwan_mbim.c driver to silence these errors when using the modem. [1]: https://lists.freedesktop.org/archives/libmbim-devel/2016-November/000781.html Signed-off-by: Stephan Gerhold Reviewed-by: Loic Poulain Acked-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250212-mhi-wwan-mbim-sequence-glitch-v1-1-503735977cbd@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/wwan/mhi_wwan_mbim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index d5a9360323d29..8755c5e6a65b3 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -220,7 +220,7 @@ static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *s if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) && !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { - net_err_ratelimited("sequence number glitch prev=%d curr=%d\n", + net_dbg_ratelimited("sequence number glitch prev=%d curr=%d\n", mbim->rx_seq, le16_to_cpu(nth16->wSequence)); } mbim->rx_seq = le16_to_cpu(nth16->wSequence); -- GitLab From 13918315c5dc5a515926c8799042ea6885c2b734 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Sat, 8 Feb 2025 13:42:13 -0700 Subject: [PATCH 0705/1585] io-wq: backoff when retrying worker creation When io_uring submission goes async for the first time on a given task, we'll try to create a worker thread to handle the submission. Creating this worker thread can fail due to various transient conditions, such as an outstanding signal in the forking thread, so we have retry logic with a limit of 3 retries. However, this retry logic appears to be too aggressive/fast - we've observed a thread blowing through the retry limit while having the same outstanding signal the whole time. Here's an excerpt of some tracing that demonstrates the issue: First, signal 26 is generated for the process. It ends up getting routed to thread 92942. 0) cbd-92284 /* signal_generate: sig=26 errno=0 code=-2 comm=psblkdASD pid=92934 grp=1 res=0 */ This causes create_io_thread in the signalled thread to fail with ERESTARTNOINTR, and thus a retry is queued. 13) task_th-92942 /* io_uring_queue_async_work: ring 000000007325c9ae, request 0000000080c96d8e, user_data 0x0, opcode URING_CMD, flags 0x8240001, normal queue, work 000000006e96dd3f */ 13) task_th-92942 io_wq_enqueue() { 13) task_th-92942 _raw_spin_lock(); 13) task_th-92942 io_wq_activate_free_worker(); 13) task_th-92942 _raw_spin_lock(); 13) task_th-92942 create_io_worker() { 13) task_th-92942 __kmalloc_cache_noprof(); 13) task_th-92942 __init_swait_queue_head(); 13) task_th-92942 kprobe_ftrace_handler() { 13) task_th-92942 get_kprobe(); 13) task_th-92942 aggr_pre_handler() { 13) task_th-92942 pre_handler_kretprobe(); 13) task_th-92942 /* create_enter: (create_io_thread+0x0/0x50) fn=0xffffffff8172c0e0 arg=0xffff888996bb69c0 node=-1 */ 13) task_th-92942 } /* aggr_pre_handler */ ... 13) task_th-92942 } /* copy_process */ 13) task_th-92942 } /* create_io_thread */ 13) task_th-92942 kretprobe_rethook_handler() { 13) task_th-92942 /* create_exit: (create_io_worker+0x8a/0x1a0 <- create_io_thread) arg1=0xfffffffffffffdff */ 13) task_th-92942 } /* kretprobe_rethook_handler */ 13) task_th-92942 queue_work_on() { ... The CPU is then handed to a kworker to process the queued retry: ------------------------------------------ 13) task_th-92942 => kworker-54154 ------------------------------------------ 13) kworker-54154 io_workqueue_create() { 13) kworker-54154 io_queue_worker_create() { 13) kworker-54154 task_work_add() { 13) kworker-54154 wake_up_state() { 13) kworker-54154 try_to_wake_up() { 13) kworker-54154 _raw_spin_lock_irqsave(); 13) kworker-54154 _raw_spin_unlock_irqrestore(); 13) kworker-54154 } /* try_to_wake_up */ 13) kworker-54154 } /* wake_up_state */ 13) kworker-54154 kick_process(); 13) kworker-54154 } /* task_work_add */ 13) kworker-54154 } /* io_queue_worker_create */ 13) kworker-54154 } /* io_workqueue_create */ And then we immediately switch back to the original task to try creating a worker again. This fails, because the original task still hasn't handled its signal. ----------------------------------------- 13) kworker-54154 => task_th-92942 ------------------------------------------ 13) task_th-92942 create_worker_cont() { 13) task_th-92942 kprobe_ftrace_handler() { 13) task_th-92942 get_kprobe(); 13) task_th-92942 aggr_pre_handler() { 13) task_th-92942 pre_handler_kretprobe(); 13) task_th-92942 /* create_enter: (create_io_thread+0x0/0x50) fn=0xffffffff8172c0e0 arg=0xffff888996bb69c0 node=-1 */ 13) task_th-92942 } /* aggr_pre_handler */ 13) task_th-92942 } /* kprobe_ftrace_handler */ 13) task_th-92942 create_io_thread() { 13) task_th-92942 copy_process() { 13) task_th-92942 task_active_pid_ns(); 13) task_th-92942 _raw_spin_lock_irq(); 13) task_th-92942 recalc_sigpending(); 13) task_th-92942 _raw_spin_lock_irq(); 13) task_th-92942 } /* copy_process */ 13) task_th-92942 } /* create_io_thread */ 13) task_th-92942 kretprobe_rethook_handler() { 13) task_th-92942 /* create_exit: (create_worker_cont+0x35/0x1b0 <- create_io_thread) arg1=0xfffffffffffffdff */ 13) task_th-92942 } /* kretprobe_rethook_handler */ 13) task_th-92942 io_worker_release(); 13) task_th-92942 queue_work_on() { 13) task_th-92942 clear_pending_if_disabled(); 13) task_th-92942 __queue_work() { 13) task_th-92942 } /* __queue_work */ 13) task_th-92942 } /* queue_work_on */ 13) task_th-92942 } /* create_worker_cont */ The pattern repeats another couple times until we blow through the retry counter, at which point we give up. All outstanding work is canceled, and the io_uring command which triggered all this is failed with ECANCELED: 13) task_th-92942 io_acct_cancel_pending_work() { ... 13) task_th-92942 /* io_uring_complete: ring 000000007325c9ae, req 0000000080c96d8e, user_data 0x0, result -125, cflags 0x0 extra1 0 extra2 0 */ Finally, the task gets around to processing its outstanding signal 26, but it's too late. 13) task_th-92942 /* signal_deliver: sig=26 errno=0 code=-2 sa_handler=59566a0 sa_flags=14000000 */ Try to address this issue by adding a small scaling delay when retrying worker creation. This should give the forking thread time to handle its signal in the above case. This isn't a particularly satisfying solution, as sufficiently paradoxical scheduling would still have us hitting the same issue, and I'm open to suggestions for something better. But this is likely to prevent this (already rare) issue from hitting in practice. Signed-off-by: Uday Shankar Link: https://lore.kernel.org/r/20250208-wq_retry-v2-1-4f6f5041d303@purestorage.com Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 5d0928f37471e..91019b4d03088 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -64,7 +64,7 @@ struct io_worker { union { struct rcu_head rcu; - struct work_struct work; + struct delayed_work work; }; }; @@ -770,6 +770,18 @@ static inline bool io_should_retry_thread(struct io_worker *worker, long err) } } +static void queue_create_worker_retry(struct io_worker *worker) +{ + /* + * We only bother retrying because there's a chance that the + * failure to create a worker is due to some temporary condition + * in the forking task (e.g. outstanding signal); give the task + * some time to clear that condition. + */ + schedule_delayed_work(&worker->work, + msecs_to_jiffies(worker->init_retries * 5)); +} + static void create_worker_cont(struct callback_head *cb) { struct io_worker *worker; @@ -809,12 +821,13 @@ static void create_worker_cont(struct callback_head *cb) /* re-create attempts grab a new worker ref, drop the existing one */ io_worker_release(worker); - schedule_work(&worker->work); + queue_create_worker_retry(worker); } static void io_workqueue_create(struct work_struct *work) { - struct io_worker *worker = container_of(work, struct io_worker, work); + struct io_worker *worker = container_of(work, struct io_worker, + work.work); struct io_wq_acct *acct = io_wq_get_acct(worker); if (!io_queue_worker_create(worker, acct, create_worker_cont)) @@ -855,8 +868,8 @@ static bool create_io_worker(struct io_wq *wq, int index) kfree(worker); goto fail; } else { - INIT_WORK(&worker->work, io_workqueue_create); - schedule_work(&worker->work); + INIT_DELAYED_WORK(&worker->work, io_workqueue_create); + queue_create_worker_retry(worker); } return true; -- GitLab From 43c70b104093c324b1a000762ce943d16ce788f9 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 14 Feb 2025 12:36:36 -0700 Subject: [PATCH 0706/1585] block/merge: remove unnecessary min() with UINT_MAX In bvec_split_segs(), max_bytes is an unsigned, so it must be less than or equal to UINT_MAX. Remove the unnecessary min(). Prior to commit 67927d220150 ("block/merge: count bytes instead of sectors"), the min() was with UINT_MAX >> 9, so it did have an effect. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250214193637.234702-1-csander@purestorage.com Signed-off-by: Jens Axboe --- block/blk-merge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 15cd231d560cb..39b738c0e4c9a 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -270,7 +270,7 @@ static bool bvec_split_segs(const struct queue_limits *lim, const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes, unsigned max_segs, unsigned max_bytes) { - unsigned max_len = min(max_bytes, UINT_MAX) - *bytes; + unsigned max_len = max_bytes - *bytes; unsigned len = min(bv->bv_len, max_len); unsigned total_len = 0; unsigned seg_size = 0; -- GitLab From 435b344a7042e91fb4719d589f18310e8919e39f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 10 Feb 2025 22:53:47 +0000 Subject: [PATCH 0707/1585] crypto: ccp: Add external API interface for PSP module initialization KVM is dependent on the PSP SEV driver and PSP SEV driver needs to be loaded before KVM module. In case of module loading any dependent modules are automatically loaded but in case of built-in modules there is no inherent mechanism available to specify dependencies between modules and ensure that any dependent modules are loaded implicitly. Add a new external API interface for PSP module initialization which allows PSP SEV driver to be loaded explicitly if KVM is built-in. Signed-off-by: Sean Christopherson Co-developed-by: Ashish Kalra Signed-off-by: Ashish Kalra Reviewed-by: Tom Lendacky Message-ID: <15279ca0cad56a07cf12834ec544310f85ff5edc.1739226950.git.ashish.kalra@amd.com> Signed-off-by: Paolo Bonzini --- drivers/crypto/ccp/sp-dev.c | 14 ++++++++++++++ include/linux/psp-sev.h | 9 +++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c index 7eb3e46682860..3467f6db4f505 100644 --- a/drivers/crypto/ccp/sp-dev.c +++ b/drivers/crypto/ccp/sp-dev.c @@ -19,6 +19,7 @@ #include #include +#include "sev-dev.h" #include "ccp-dev.h" #include "sp-dev.h" @@ -253,8 +254,12 @@ struct sp_device *sp_get_psp_master_device(void) static int __init sp_mod_init(void) { #ifdef CONFIG_X86 + static bool initialized; int ret; + if (initialized) + return 0; + ret = sp_pci_init(); if (ret) return ret; @@ -263,6 +268,8 @@ static int __init sp_mod_init(void) psp_pci_init(); #endif + initialized = true; + return 0; #endif @@ -279,6 +286,13 @@ static int __init sp_mod_init(void) return -ENODEV; } +#if IS_BUILTIN(CONFIG_KVM_AMD) && IS_ENABLED(CONFIG_KVM_AMD_SEV) +int __init sev_module_init(void) +{ + return sp_mod_init(); +} +#endif + static void __exit sp_mod_exit(void) { #ifdef CONFIG_X86 diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 903ddfea85850..f3cad182d4ef6 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -814,6 +814,15 @@ struct sev_data_snp_commit { #ifdef CONFIG_CRYPTO_DEV_SP_PSP +/** + * sev_module_init - perform PSP SEV module initialization + * + * Returns: + * 0 if the PSP module is successfully initialized + * negative value if the PSP module initialization fails + */ +int sev_module_init(void); + /** * sev_platform_init - perform SEV INIT command * -- GitLab From 44e70718df4fc2fadf1665eb9374df71aeda1f03 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 10 Feb 2025 22:54:02 +0000 Subject: [PATCH 0708/1585] KVM: SVM: Ensure PSP module is initialized if KVM module is built-in The kernel's initcall infrastructure lacks the ability to express dependencies between initcalls, whereas the modules infrastructure automatically handles dependencies via symbol loading. Ensure the PSP SEV driver is initialized before proceeding in sev_hardware_setup() if KVM is built-in as the dependency isn't handled by the initcall infrastructure. Signed-off-by: Sean Christopherson Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Message-ID: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a2a794c320503..0dbb25442ec14 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2972,6 +2972,16 @@ void __init sev_hardware_setup(void) WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_FLUSHBYASID))) goto out; + /* + * The kernel's initcall infrastructure lacks the ability to express + * dependencies between initcalls, whereas the modules infrastructure + * automatically handles dependencies via symbol loading. Ensure the + * PSP SEV driver is initialized before proceeding if KVM is built-in, + * as the dependency isn't handled by the initcall infrastructure. + */ + if (IS_BUILTIN(CONFIG_KVM_AMD) && sev_module_init()) + goto out; + /* Retrieve SEV CPUID information */ cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); -- GitLab From 409f45387c937145adeeeebc6d6032c2ec232b35 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 10 Feb 2025 22:54:18 +0000 Subject: [PATCH 0709/1585] x86/sev: Fix broken SNP support with KVM module built-in Fix issues with enabling SNP host support and effectively SNP support which is broken with respect to the KVM module being built-in. SNP host support is enabled in snp_rmptable_init() which is invoked as device_initcall(). SNP check on IOMMU is done during IOMMU PCI init (IOMMU_PCI_INIT stage). And for that reason snp_rmptable_init() is currently invoked via device_initcall() and cannot be invoked via subsys_initcall() as core IOMMU subsystem gets initialized via subsys_initcall(). Now, if kvm_amd module is built-in, it gets initialized before SNP host support is enabled in snp_rmptable_init() : [ 10.131811] kvm_amd: TSC scaling supported [ 10.136384] kvm_amd: Nested Virtualization enabled [ 10.141734] kvm_amd: Nested Paging enabled [ 10.146304] kvm_amd: LBR virtualization supported [ 10.151557] kvm_amd: SEV enabled (ASIDs 100 - 509) [ 10.156905] kvm_amd: SEV-ES enabled (ASIDs 1 - 99) [ 10.162256] kvm_amd: SEV-SNP enabled (ASIDs 1 - 99) [ 10.171508] kvm_amd: Virtual VMLOAD VMSAVE supported [ 10.177052] kvm_amd: Virtual GIF supported ... ... [ 10.201648] kvm_amd: in svm_enable_virtualization_cpu And then svm_x86_ops->enable_virtualization_cpu() (svm_enable_virtualization_cpu) programs MSR_VM_HSAVE_PA as following: wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa); So VM_HSAVE_PA is non-zero before SNP support is enabled on all CPUs. snp_rmptable_init() gets invoked after svm_enable_virtualization_cpu() as following : ... [ 11.256138] kvm_amd: in svm_enable_virtualization_cpu ... [ 11.264918] SEV-SNP: in snp_rmptable_init This triggers a #GP exception in snp_rmptable_init() when snp_enable() is invoked to set SNP_EN in SYSCFG MSR: [ 11.294289] unchecked MSR access error: WRMSR to 0xc0010010 (tried to write 0x0000000003fc0000) at rIP: 0xffffffffaf5d5c28 (native_write_msr+0x8/0x30) ... [ 11.294404] Call Trace: [ 11.294482] [ 11.294513] ? show_stack_regs+0x26/0x30 [ 11.294522] ? ex_handler_msr+0x10f/0x180 [ 11.294529] ? search_extable+0x2b/0x40 [ 11.294538] ? fixup_exception+0x2dd/0x340 [ 11.294542] ? exc_general_protection+0x14f/0x440 [ 11.294550] ? asm_exc_general_protection+0x2b/0x30 [ 11.294557] ? __pfx_snp_enable+0x10/0x10 [ 11.294567] ? native_write_msr+0x8/0x30 [ 11.294570] ? __snp_enable+0x5d/0x70 [ 11.294575] snp_enable+0x19/0x20 [ 11.294578] __flush_smp_call_function_queue+0x9c/0x3a0 [ 11.294586] generic_smp_call_function_single_interrupt+0x17/0x20 [ 11.294589] __sysvec_call_function+0x20/0x90 [ 11.294596] sysvec_call_function+0x80/0xb0 [ 11.294601] [ 11.294603] [ 11.294605] asm_sysvec_call_function+0x1f/0x30 ... [ 11.294631] arch_cpu_idle+0xd/0x20 [ 11.294633] default_idle_call+0x34/0xd0 [ 11.294636] do_idle+0x1f1/0x230 [ 11.294643] ? complete+0x71/0x80 [ 11.294649] cpu_startup_entry+0x30/0x40 [ 11.294652] start_secondary+0x12d/0x160 [ 11.294655] common_startup_64+0x13e/0x141 [ 11.294662] This #GP exception is getting triggered due to the following errata for AMD family 19h Models 10h-1Fh Processors: Processor may generate spurious #GP(0) Exception on WRMSR instruction: Description: The Processor will generate a spurious #GP(0) Exception on a WRMSR instruction if the following conditions are all met: - the target of the WRMSR is a SYSCFG register. - the write changes the value of SYSCFG.SNPEn from 0 to 1. - One of the threads that share the physical core has a non-zero value in the VM_HSAVE_PA MSR. The document being referred to above: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/revision-guides/57095-PUB_1_01.pdf To summarize, with kvm_amd module being built-in, KVM/SVM initialization happens before host SNP is enabled and this SVM initialization sets VM_HSAVE_PA to non-zero, which then triggers a #GP when SYSCFG.SNPEn is being set and this will subsequently cause SNP_INIT(_EX) to fail with INVALID_CONFIG error as SYSCFG[SnpEn] is not set on all CPUs. Essentially SNP host enabling code should be invoked before KVM initialization, which is currently not the case when KVM is built-in. Add fix to call snp_rmptable_init() early from iommu_snp_enable() directly and not invoked via device_initcall() which enables SNP host support before KVM initialization with kvm_amd module built-in. Add additional handling for `iommu=off` or `amd_iommu=off` options. Note that IOMMUs need to be enabled for SNP initialization, therefore, if host SNP support is enabled but late IOMMU initialization fails then that will cause PSP driver's SNP_INIT to fail as IOMMU SNP sanity checks in SNP firmware will fail with invalid configuration error as below: [ 9.723114] ccp 0000:23:00.1: sev enabled [ 9.727602] ccp 0000:23:00.1: psp enabled [ 9.732527] ccp 0000:a2:00.1: enabling device (0000 -> 0002) [ 9.739098] ccp 0000:a2:00.1: no command queues available [ 9.745167] ccp 0000:a2:00.1: psp enabled [ 9.805337] ccp 0000:23:00.1: SEV-SNP: failed to INIT rc -5, error 0x3 [ 9.866426] ccp 0000:23:00.1: SEV API:1.53 build:5 Fixes: c3b86e61b756 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature") Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Cc: Signed-off-by: Ashish Kalra Acked-by: Joerg Roedel Message-ID: <138b520fb83964782303b43ade4369cd181fdd9c.1739226950.git.ashish.kalra@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/sev.h | 2 ++ arch/x86/virt/svm/sev.c | 23 +++++++---------------- drivers/iommu/amd/init.c | 34 ++++++++++++++++++++++++++++++---- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 5d9685f92e5c3..1581246491b54 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -531,6 +531,7 @@ static inline void __init snp_secure_tsc_init(void) { } #ifdef CONFIG_KVM_AMD_SEV bool snp_probe_rmptable_info(void); +int snp_rmptable_init(void); int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level); void snp_dump_hva_rmpentry(unsigned long address); int psmash(u64 pfn); @@ -541,6 +542,7 @@ void kdump_sev_callback(void); void snp_fixup_e820_tables(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } +static inline int snp_rmptable_init(void) { return -ENOSYS; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } static inline void snp_dump_hva_rmpentry(unsigned long address) {} static inline int psmash(u64 pfn) { return -ENODEV; } diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 1dcc027ec77e7..42e74a5a7d786 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -505,19 +505,19 @@ static bool __init setup_rmptable(void) * described in the SNP_INIT_EX firmware command description in the SNP * firmware ABI spec. */ -static int __init snp_rmptable_init(void) +int __init snp_rmptable_init(void) { unsigned int i; u64 val; - if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) - return 0; + if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP))) + return -ENOSYS; - if (!amd_iommu_snp_en) - goto nosnp; + if (WARN_ON_ONCE(!amd_iommu_snp_en)) + return -ENOSYS; if (!setup_rmptable()) - goto nosnp; + return -ENOSYS; /* * Check if SEV-SNP is already enabled, this can happen in case of @@ -530,7 +530,7 @@ static int __init snp_rmptable_init(void) /* Zero out the RMP bookkeeping area */ if (!clear_rmptable_bookkeeping()) { free_rmp_segment_table(); - goto nosnp; + return -ENOSYS; } /* Zero out the RMP entries */ @@ -562,17 +562,8 @@ static int __init snp_rmptable_init(void) crash_kexec_post_notifiers = true; return 0; - -nosnp: - cc_platform_clear(CC_ATTR_HOST_SEV_SNP); - return -ENOSYS; } -/* - * This must be called after the IOMMU has been initialized. - */ -device_initcall(snp_rmptable_init); - static void set_rmp_segment_info(unsigned int segment_shift) { rmp_segment_shift = segment_shift; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c5cd92edada06..2fecfed75e543 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3194,7 +3194,7 @@ static bool __init detect_ivrs(void) return true; } -static void iommu_snp_enable(void) +static __init void iommu_snp_enable(void) { #ifdef CONFIG_KVM_AMD_SEV if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) @@ -3219,6 +3219,14 @@ static void iommu_snp_enable(void) goto disable_snp; } + /* + * Enable host SNP support once SNP support is checked on IOMMU. + */ + if (snp_rmptable_init()) { + pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); + goto disable_snp; + } + pr_info("IOMMU SNP support enabled.\n"); return; @@ -3318,6 +3326,19 @@ static int __init iommu_go_to_state(enum iommu_init_state state) ret = state_next(); } + /* + * SNP platform initilazation requires IOMMUs to be fully configured. + * If the SNP support on IOMMUs has NOT been checked, simply mark SNP + * as unsupported. If the SNP support on IOMMUs has been checked and + * host SNP support enabled but RMP enforcement has not been enabled + * in IOMMUs, then the system is in a half-baked state, but can limp + * along as all memory should be Hypervisor-Owned in the RMP. WARN, + * but leave SNP as "supported" to avoid confusing the kernel. + */ + if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) && + !WARN_ON_ONCE(amd_iommu_snp_en)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + return ret; } @@ -3426,18 +3447,23 @@ void __init amd_iommu_detect(void) int ret; if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return; + goto disable_snp; if (!amd_iommu_sme_check()) - return; + goto disable_snp; ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) - return; + goto disable_snp; amd_iommu_detected = true; iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; + return; + +disable_snp: + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); } /**************************************************************************** -- GitLab From 55eff109e76a14e5ed10c8c3c3978d20a35e2a4d Mon Sep 17 00:00:00 2001 From: Junnan Wu Date: Fri, 14 Feb 2025 09:22:00 +0800 Subject: [PATCH 0710/1585] vsock/virtio: fix variables initialization during resuming When executing suspend to ram twice in a row, the `rx_buf_nr` and `rx_buf_max_nr` increase to three times vq->num_free. Then after virtqueue_get_buf and `rx_buf_nr` decreased in function virtio_transport_rx_work, the condition to fill rx buffer (rx_buf_nr < rx_buf_max_nr / 2) will never be met. It is because that `rx_buf_nr` and `rx_buf_max_nr` are initialized only in virtio_vsock_probe(), but they should be reset whenever virtqueues are recreated, like after a suspend/resume. Move the `rx_buf_nr` and `rx_buf_max_nr` initialization in virtio_vsock_vqs_init(), so we are sure that they are properly initialized, every time we initialize the virtqueues, either when we load the driver or after a suspend/resume. To prevent erroneous atomic load operations on the `queued_replies` in the virtio_transport_send_pkt_work() function which may disrupt the scheduling of vsock->rx_work when transmitting reply-required socket packets, this atomic variable must undergo synchronized initialization alongside the preceding two variables after a suspend/resume. Fixes: bd50c5dc182b ("vsock/virtio: add support for device suspend/resume") Link: https://lore.kernel.org/virtualization/20250207052033.2222629-1-junnan01.wu@samsung.com/ Co-developed-by: Ying Gao Signed-off-by: Ying Gao Signed-off-by: Junnan Wu Reviewed-by: Luigi Leonardi Acked-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20250214012200.1883896-1-junnan01.wu@samsung.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index b58c3818f284f..f0e48e6911fc4 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -670,6 +670,13 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) }; int ret; + mutex_lock(&vsock->rx_lock); + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + mutex_unlock(&vsock->rx_lock); + + atomic_set(&vsock->queued_replies, 0); + ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL); if (ret < 0) return ret; @@ -779,9 +786,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - atomic_set(&vsock->queued_replies, 0); mutex_init(&vsock->tx_lock); mutex_init(&vsock->rx_lock); -- GitLab From 9593172d93b9f91c362baec4643003dc29802929 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 13 Feb 2025 13:33:54 +0900 Subject: [PATCH 0711/1585] geneve: Fix use-after-free in geneve_find_dev(). syzkaller reported a use-after-free in geneve_find_dev() [0] without repro. geneve_configure() links struct geneve_dev.next to net_generic(net, geneve_net_id)->geneve_list. The net here could differ from dev_net(dev) if IFLA_NET_NS_PID, IFLA_NET_NS_FD, or IFLA_TARGET_NETNSID is set. When dev_net(dev) is dismantled, geneve_exit_batch_rtnl() finally calls unregister_netdevice_queue() for each dev in the netns, and later the dev is freed. However, its geneve_dev.next is still linked to the backend UDP socket netns. Then, use-after-free will occur when another geneve dev is created in the netns. Let's call geneve_dellink() instead in geneve_destroy_tunnels(). [0]: BUG: KASAN: slab-use-after-free in geneve_find_dev drivers/net/geneve.c:1295 [inline] BUG: KASAN: slab-use-after-free in geneve_configure+0x234/0x858 drivers/net/geneve.c:1343 Read of size 2 at addr ffff000054d6ee24 by task syz.1.4029/13441 CPU: 1 UID: 0 PID: 13441 Comm: syz.1.4029 Not tainted 6.13.0-g0ad9617c78ac #24 dc35ca22c79fb82e8e7bc5c9c9adafea898b1e3d Hardware name: linux,dummy-virt (DT) Call trace: show_stack+0x38/0x50 arch/arm64/kernel/stacktrace.c:466 (C) __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0xbc/0x108 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x16c/0x6f0 mm/kasan/report.c:489 kasan_report+0xc0/0x120 mm/kasan/report.c:602 __asan_report_load2_noabort+0x20/0x30 mm/kasan/report_generic.c:379 geneve_find_dev drivers/net/geneve.c:1295 [inline] geneve_configure+0x234/0x858 drivers/net/geneve.c:1343 geneve_newlink+0xb8/0x128 drivers/net/geneve.c:1634 rtnl_newlink_create+0x23c/0x868 net/core/rtnetlink.c:3795 __rtnl_newlink net/core/rtnetlink.c:3906 [inline] rtnl_newlink+0x1054/0x1630 net/core/rtnetlink.c:4021 rtnetlink_rcv_msg+0x61c/0x918 net/core/rtnetlink.c:6911 netlink_rcv_skb+0x1dc/0x398 net/netlink/af_netlink.c:2543 rtnetlink_rcv+0x34/0x50 net/core/rtnetlink.c:6938 netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline] netlink_unicast+0x618/0x838 net/netlink/af_netlink.c:1348 netlink_sendmsg+0x5fc/0x8b0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:713 [inline] __sock_sendmsg net/socket.c:728 [inline] ____sys_sendmsg+0x410/0x6f8 net/socket.c:2568 ___sys_sendmsg+0x178/0x1d8 net/socket.c:2622 __sys_sendmsg net/socket.c:2654 [inline] __do_sys_sendmsg net/socket.c:2659 [inline] __se_sys_sendmsg net/socket.c:2657 [inline] __arm64_sys_sendmsg+0x12c/0x1c8 net/socket.c:2657 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x90/0x278 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x13c/0x250 arch/arm64/kernel/syscall.c:132 do_el0_svc+0x54/0x70 arch/arm64/kernel/syscall.c:151 el0_svc+0x4c/0xa8 arch/arm64/kernel/entry-common.c:744 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:762 el0t_64_sync+0x198/0x1a0 arch/arm64/kernel/entry.S:600 Allocated by task 13247: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x30/0x68 mm/kasan/common.c:68 kasan_save_alloc_info+0x44/0x58 mm/kasan/generic.c:568 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x84/0xa0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4298 [inline] __kmalloc_node_noprof+0x2a0/0x560 mm/slub.c:4304 __kvmalloc_node_noprof+0x9c/0x230 mm/util.c:645 alloc_netdev_mqs+0xb8/0x11a0 net/core/dev.c:11470 rtnl_create_link+0x2b8/0xb50 net/core/rtnetlink.c:3604 rtnl_newlink_create+0x19c/0x868 net/core/rtnetlink.c:3780 __rtnl_newlink net/core/rtnetlink.c:3906 [inline] rtnl_newlink+0x1054/0x1630 net/core/rtnetlink.c:4021 rtnetlink_rcv_msg+0x61c/0x918 net/core/rtnetlink.c:6911 netlink_rcv_skb+0x1dc/0x398 net/netlink/af_netlink.c:2543 rtnetlink_rcv+0x34/0x50 net/core/rtnetlink.c:6938 netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline] netlink_unicast+0x618/0x838 net/netlink/af_netlink.c:1348 netlink_sendmsg+0x5fc/0x8b0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:713 [inline] __sock_sendmsg net/socket.c:728 [inline] ____sys_sendmsg+0x410/0x6f8 net/socket.c:2568 ___sys_sendmsg+0x178/0x1d8 net/socket.c:2622 __sys_sendmsg net/socket.c:2654 [inline] __do_sys_sendmsg net/socket.c:2659 [inline] __se_sys_sendmsg net/socket.c:2657 [inline] __arm64_sys_sendmsg+0x12c/0x1c8 net/socket.c:2657 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x90/0x278 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x13c/0x250 arch/arm64/kernel/syscall.c:132 do_el0_svc+0x54/0x70 arch/arm64/kernel/syscall.c:151 el0_svc+0x4c/0xa8 arch/arm64/kernel/entry-common.c:744 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:762 el0t_64_sync+0x198/0x1a0 arch/arm64/kernel/entry.S:600 Freed by task 45: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x30/0x68 mm/kasan/common.c:68 kasan_save_free_info+0x58/0x70 mm/kasan/generic.c:582 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x48/0x68 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2353 [inline] slab_free mm/slub.c:4613 [inline] kfree+0x140/0x420 mm/slub.c:4761 kvfree+0x4c/0x68 mm/util.c:688 netdev_release+0x94/0xc8 net/core/net-sysfs.c:2065 device_release+0x98/0x1c0 kobject_cleanup lib/kobject.c:689 [inline] kobject_release lib/kobject.c:720 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x2b0/0x438 lib/kobject.c:737 netdev_run_todo+0xe5c/0xfc8 net/core/dev.c:11185 rtnl_unlock+0x20/0x38 net/core/rtnetlink.c:151 cleanup_net+0x4fc/0x8c0 net/core/net_namespace.c:648 process_one_work+0x700/0x1398 kernel/workqueue.c:3236 process_scheduled_works kernel/workqueue.c:3317 [inline] worker_thread+0x8c4/0xe10 kernel/workqueue.c:3398 kthread+0x4bc/0x608 kernel/kthread.c:464 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:862 The buggy address belongs to the object at ffff000054d6e000 which belongs to the cache kmalloc-cg-4k of size 4096 The buggy address is located 3620 bytes inside of freed 4096-byte region [ffff000054d6e000, ffff000054d6f000) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x94d68 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 memcg:ffff000016276181 flags: 0x3fffe0000000040(head|node=0|zone=0|lastcpupid=0x1ffff) page_type: f5(slab) raw: 03fffe0000000040 ffff0000c000f500 dead000000000122 0000000000000000 raw: 0000000000000000 0000000000040004 00000001f5000000 ffff000016276181 head: 03fffe0000000040 ffff0000c000f500 dead000000000122 0000000000000000 head: 0000000000000000 0000000000040004 00000001f5000000 ffff000016276181 head: 03fffe0000000003 fffffdffc1535a01 ffffffffffffffff 0000000000000000 head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff000054d6ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff000054d6ed80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff000054d6ee00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff000054d6ee80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff000054d6ef00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250213043354.91368-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/geneve.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 642155cb83157..a1f674539965d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1907,16 +1907,11 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head) /* gather any geneve devices that were moved into this ns */ for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &geneve_link_ops) - unregister_netdevice_queue(dev, head); + geneve_dellink(dev, head); /* now gather any other geneve devices that were created in this ns */ - list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { - /* If geneve->dev is in the same netns, it was already added - * to the list by the previous loop. - */ - if (!net_eq(dev_net(geneve->dev), net)) - unregister_netdevice_queue(geneve->dev, head); - } + list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) + geneve_dellink(geneve->dev, head); } static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list, -- GitLab From 08b613b9e2ba431db3bd15cb68ca72472a50ef5c Mon Sep 17 00:00:00 2001 From: Vitaly Rodionov Date: Fri, 14 Feb 2025 21:07:28 +0000 Subject: [PATCH 0712/1585] ALSA: hda/cirrus: Correct the full scale volume set logic This patch corrects the full-scale volume setting logic. On certain platforms, the full-scale volume bit is required. The current logic mistakenly sets this bit and incorrectly clears reserved bit 0, causing the headphone output to be muted. Fixes: 342b6b610ae2 ("ALSA: hda/cs8409: Fix Full Scale Volume setting for all variants") Signed-off-by: Vitaly Rodionov Link: https://patch.msgid.link/20250214210736.30814-1-vitalyr@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_cs8409-tables.c | 6 +++--- sound/pci/hda/patch_cs8409.c | 20 +++++++++++--------- sound/pci/hda/patch_cs8409.h | 5 +++-- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c index 759f48038273d..621f947e38174 100644 --- a/sound/pci/hda/patch_cs8409-tables.c +++ b/sound/pci/hda/patch_cs8409-tables.c @@ -121,7 +121,7 @@ static const struct cs8409_i2c_param cs42l42_init_reg_seq[] = { { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3f }, - { CS42L42_HP_CTL, 0x03 }, + { CS42L42_HP_CTL, 0x0D }, { CS42L42_MIC_DET_CTL1, 0xB6 }, { CS42L42_TIPSENSE_CTL, 0xC2 }, { CS42L42_HS_CLAMP_DISABLE, 0x01 }, @@ -315,7 +315,7 @@ static const struct cs8409_i2c_param dolphin_c0_init_reg_seq[] = { { CS42L42_ASP_TX_SZ_EN, 0x01 }, { CS42L42_PWR_CTL1, 0x0A }, { CS42L42_PWR_CTL2, 0x84 }, - { CS42L42_HP_CTL, 0x03 }, + { CS42L42_HP_CTL, 0x0D }, { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3f }, @@ -371,7 +371,7 @@ static const struct cs8409_i2c_param dolphin_c1_init_reg_seq[] = { { CS42L42_ASP_TX_SZ_EN, 0x00 }, { CS42L42_PWR_CTL1, 0x0E }, { CS42L42_PWR_CTL2, 0x84 }, - { CS42L42_HP_CTL, 0x01 }, + { CS42L42_HP_CTL, 0x0D }, { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3f }, diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index 614327218634c..b760332a4e357 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -876,7 +876,7 @@ static void cs42l42_resume(struct sub_codec *cs42l42) { CS42L42_DET_INT_STATUS2, 0x00 }, { CS42L42_TSRS_PLUG_STATUS, 0x00 }, }; - int fsv_old, fsv_new; + unsigned int fsv; /* Bring CS42L42 out of Reset */ spec->gpio_data = snd_hda_codec_read(codec, CS8409_PIN_AFG, 0, AC_VERB_GET_GPIO_DATA, 0); @@ -893,13 +893,15 @@ static void cs42l42_resume(struct sub_codec *cs42l42) /* Clear interrupts, by reading interrupt status registers */ cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs)); - fsv_old = cs8409_i2c_read(cs42l42, CS42L42_HP_CTL); - if (cs42l42->full_scale_vol == CS42L42_FULL_SCALE_VOL_0DB) - fsv_new = fsv_old & ~CS42L42_FULL_SCALE_VOL_MASK; - else - fsv_new = fsv_old & CS42L42_FULL_SCALE_VOL_MASK; - if (fsv_new != fsv_old) - cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv_new); + fsv = cs8409_i2c_read(cs42l42, CS42L42_HP_CTL); + if (cs42l42->full_scale_vol) { + // Set the full scale volume bit + fsv |= CS42L42_FULL_SCALE_VOL_MASK; + cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv); + } + // Unmute analog channels A and B + fsv = (fsv & ~CS42L42_ANA_MUTE_AB); + cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv); /* we have to explicitly allow unsol event handling even during the * resume phase so that the jack event is processed properly @@ -920,7 +922,7 @@ static void cs42l42_suspend(struct sub_codec *cs42l42) { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, - { CS42L42_HP_CTL, 0x0F }, + { CS42L42_HP_CTL, 0x0D }, { CS42L42_ASP_RX_DAI0_EN, 0x00 }, { CS42L42_ASP_CLK_CFG, 0x00 }, { CS42L42_PWR_CTL1, 0xFE }, diff --git a/sound/pci/hda/patch_cs8409.h b/sound/pci/hda/patch_cs8409.h index 5e48115caf096..14645d25e70fd 100644 --- a/sound/pci/hda/patch_cs8409.h +++ b/sound/pci/hda/patch_cs8409.h @@ -230,9 +230,10 @@ enum cs8409_coefficient_index_registers { #define CS42L42_PDN_TIMEOUT_US (250000) #define CS42L42_PDN_SLEEP_US (2000) #define CS42L42_INIT_TIMEOUT_MS (45) +#define CS42L42_ANA_MUTE_AB (0x0C) #define CS42L42_FULL_SCALE_VOL_MASK (2) -#define CS42L42_FULL_SCALE_VOL_0DB (1) -#define CS42L42_FULL_SCALE_VOL_MINUS6DB (0) +#define CS42L42_FULL_SCALE_VOL_0DB (0) +#define CS42L42_FULL_SCALE_VOL_MINUS6DB (1) /* Dell BULLSEYE / WARLOCK / CYBORG Specific Definitions */ -- GitLab From 6a7ed7ee16a963f0ca028861eca8f8b365861dd1 Mon Sep 17 00:00:00 2001 From: Vitaly Rodionov Date: Fri, 14 Feb 2025 16:23:26 +0000 Subject: [PATCH 0713/1585] ALSA: hda/cirrus: Reduce codec resume time This patch reduces the resume time by half and introduces an option to include a delay after a single write operation before continuing. Signed-off-by: Vitaly Rodionov Link: https://patch.msgid.link/20250214162354.2675652-2-vitalyr@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_cs8409-tables.c | 6 +++--- sound/pci/hda/patch_cs8409.c | 6 +++++- sound/pci/hda/patch_cs8409.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c index 621f947e38174..09240138e0871 100644 --- a/sound/pci/hda/patch_cs8409-tables.c +++ b/sound/pci/hda/patch_cs8409-tables.c @@ -131,7 +131,7 @@ static const struct cs8409_i2c_param cs42l42_init_reg_seq[] = { { CS42L42_RSENSE_CTL3, 0x00 }, { CS42L42_TSENSE_CTL, 0x80 }, { CS42L42_HS_BIAS_CTL, 0xC0 }, - { CS42L42_PWR_CTL1, 0x02 }, + { CS42L42_PWR_CTL1, 0x02, 10000 }, { CS42L42_ADC_OVFL_INT_MASK, 0xff }, { CS42L42_MIXER_INT_MASK, 0xff }, { CS42L42_SRC_INT_MASK, 0xff }, @@ -328,7 +328,7 @@ static const struct cs8409_i2c_param dolphin_c0_init_reg_seq[] = { { CS42L42_RSENSE_CTL3, 0x00 }, { CS42L42_TSENSE_CTL, 0x80 }, { CS42L42_HS_BIAS_CTL, 0xC0 }, - { CS42L42_PWR_CTL1, 0x02 }, + { CS42L42_PWR_CTL1, 0x02, 10000 }, { CS42L42_ADC_OVFL_INT_MASK, 0xff }, { CS42L42_MIXER_INT_MASK, 0xff }, { CS42L42_SRC_INT_MASK, 0xff }, @@ -384,7 +384,7 @@ static const struct cs8409_i2c_param dolphin_c1_init_reg_seq[] = { { CS42L42_RSENSE_CTL3, 0x00 }, { CS42L42_TSENSE_CTL, 0x80 }, { CS42L42_HS_BIAS_CTL, 0xC0 }, - { CS42L42_PWR_CTL1, 0x06 }, + { CS42L42_PWR_CTL1, 0x06, 10000 }, { CS42L42_ADC_OVFL_INT_MASK, 0xff }, { CS42L42_MIXER_INT_MASK, 0xff }, { CS42L42_SRC_INT_MASK, 0xff }, diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index b760332a4e357..e50006757a2c8 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -346,6 +346,11 @@ static int cs8409_i2c_bulk_write(struct sub_codec *scodec, const struct cs8409_i if (cs8409_i2c_wait_complete(codec) < 0) goto error; + /* Certain use cases may require a delay + * after a write operation before proceeding. + */ + if (seq[i].delay) + fsleep(seq[i].delay); } mutex_unlock(&spec->i2c_mux); @@ -888,7 +893,6 @@ static void cs42l42_resume(struct sub_codec *cs42l42) /* Initialize CS42L42 companion codec */ cs8409_i2c_bulk_write(cs42l42, cs42l42->init_seq, cs42l42->init_seq_num); - msleep(CS42L42_INIT_TIMEOUT_MS); /* Clear interrupts, by reading interrupt status registers */ cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs)); diff --git a/sound/pci/hda/patch_cs8409.h b/sound/pci/hda/patch_cs8409.h index 14645d25e70fd..e4bd2e12110bc 100644 --- a/sound/pci/hda/patch_cs8409.h +++ b/sound/pci/hda/patch_cs8409.h @@ -229,7 +229,6 @@ enum cs8409_coefficient_index_registers { #define CS42L42_I2C_SLEEP_US (2000) #define CS42L42_PDN_TIMEOUT_US (250000) #define CS42L42_PDN_SLEEP_US (2000) -#define CS42L42_INIT_TIMEOUT_MS (45) #define CS42L42_ANA_MUTE_AB (0x0C) #define CS42L42_FULL_SCALE_VOL_MASK (2) #define CS42L42_FULL_SCALE_VOL_0DB (0) @@ -291,6 +290,7 @@ enum { struct cs8409_i2c_param { unsigned int addr; unsigned int value; + unsigned int delay; }; struct cs8409_cir_param { -- GitLab From d1d0963121769d8d16150b913fe886e48efefa51 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 11 Feb 2025 09:29:06 +0900 Subject: [PATCH 0714/1585] tools: fix annoying "mkdir -p ..." logs when building tools in parallel When CONFIG_OBJTOOL=y or CONFIG_DEBUG_INFO_BTF=y, parallel builds show awkward "mkdir -p ..." logs. $ make -j16 [ snip ] mkdir -p /home/masahiro/ref/linux/tools/objtool && make O=/home/masahiro/ref/linux subdir=tools/objtool --no-print-directory -C objtool mkdir -p /home/masahiro/ref/linux/tools/bpf/resolve_btfids && make O=/home/masahiro/ref/linux subdir=tools/bpf/resolve_btfids --no-print-directory -C bpf/resolve_btfids Defining MAKEFLAGS= on the command line wipes out command line switches from the resultant MAKEFLAGS definition, even though the command line switches are active. [1] MAKEFLAGS puts all single-letter options into the first word, and that word will be empty if no single-letter options were given. [2] However, this breaks if MAKEFLAGS= is given on the command line. The tools/ and tools/% targets set MAKEFLAGS= on the command line, which breaks the following code in tools/scripts/Makefile.include: short-opts := $(firstword -$(MAKEFLAGS)) If MAKEFLAGS really needs modification, it should be done through the environment variable, as follows: MAKEFLAGS= $(MAKE) ... That said, I question whether modifying MAKEFLAGS is necessary here. The only flag we might want to exclude is --no-print-directory, as the tools build system changes the working directory. However, people might find the "Entering/Leaving directory" logs annoying. I simply removed the offending MAKEFLAGS=. [1]: https://savannah.gnu.org/bugs/?62469 [2]: https://www.gnu.org/software/make/manual/make.html#Testing-Flags Fixes: ea01fa9f63ae ("tools: Connect to the kernel build system") Fixes: a50e43332756 ("perf tools: Honor parallel jobs") Signed-off-by: Masahiro Yamada Tested-by: Daniel Xu --- Makefile | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 89628e354ca7b..52207bcb1a9df 100644 --- a/Makefile +++ b/Makefile @@ -1421,18 +1421,13 @@ ifneq ($(wildcard $(resolve_btfids_O)),) $(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean endif -# Clear a bunch of variables before executing the submake -ifeq ($(quiet),silent_) -tools_silent=s -endif - tools/: FORCE $(Q)mkdir -p $(objtree)/tools - $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ + $(Q)$(MAKE) LDFLAGS= O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ tools/%: FORCE $(Q)mkdir -p $(objtree)/tools - $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $* + $(Q)$(MAKE) LDFLAGS= O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $* # --------------------------------------------------------------------------- # Kernel selftest -- GitLab From 140332b6ed727a4ec2e5722a1ccda28b52d45771 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 13 Feb 2025 15:26:44 +0900 Subject: [PATCH 0715/1585] kbuild: fix linux-headers package build when $(CC) cannot link userspace Since commit 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package"), the linux-headers Debian package fails to build when $(CC) cannot build userspace applications, for example, when using toolchains installed by the 0day bot. The host programs in the linux-headers package should be rebuilt using the disto's cross-compiler, ${DEB_HOST_GNU_TYPE}-gcc instead of $(CC). Hence, the variable 'CC' must be expanded in this shell script instead of in the top-level Makefile. Commit f354fc88a72a ("kbuild: install-extmod-build: add missing quotation marks for CC variable") was not a correct fix because CC="ccache gcc" should be unrelated when rebuilding userspace tools. Fixes: 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package") Reported-by: Jeff Johnson Closes: https://lore.kernel.org/linux-kbuild/CAK7LNARb3xO3ptBWOMpwKcyf3=zkfhMey5H2KnB1dOmUwM79dA@mail.gmail.com/T/#t Signed-off-by: Masahiro Yamada Tested-by: Jeff Johnson --- scripts/package/install-extmod-build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index b724626ea0ca0..2966473b46609 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -62,8 +62,8 @@ if [ "${CC}" != "${HOSTCC}" ]; then # # Clear VPATH and srcroot because the source files reside in the output # directory. - # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="$(CC)" VPATH= srcroot=. $(build)='"${destdir}"/scripts + # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"${destdir}"/scripts rm -f "${destdir}/scripts/Kbuild" fi -- GitLab From 1e988c3fe1264708f4f92109203ac5b1d65de50b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 14 Feb 2025 22:48:15 +0000 Subject: [PATCH 0716/1585] io_uring: prevent opcode speculation sqe->opcode is used for different tables, make sure we santitise it against speculations. Cc: stable@vger.kernel.org Fixes: d3656344fea03 ("io_uring: add lookup table for various opcode needs") Signed-off-by: Pavel Begunkov Reviewed-by: Li Zetao Link: https://lore.kernel.org/r/7eddbf31c8ca0a3947f8ed98271acc2b4349c016.1739568408.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 263e504be4a8b..29a42365a4816 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2045,6 +2045,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->opcode = 0; return io_init_fail_req(req, -EINVAL); } + opcode = array_index_nospec(opcode, IORING_OP_LAST); + def = &io_issue_defs[opcode]; if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) { /* enforce forwards compatibility on users */ -- GitLab From 071ed42cff4fcdd89025d966d48eabef59913bf2 Mon Sep 17 00:00:00 2001 From: Pierre Riteau Date: Thu, 13 Feb 2025 23:36:10 +0100 Subject: [PATCH 0717/1585] net/sched: cls_api: fix error handling causing NULL dereference tcf_exts_miss_cookie_base_alloc() calls xa_alloc_cyclic() which can return 1 if the allocation succeeded after wrapping. This was treated as an error, with value 1 returned to caller tcf_exts_init_ex() which sets exts->actions to NULL and returns 1 to caller fl_change(). fl_change() treats err == 1 as success, calling tcf_exts_validate_ex() which calls tcf_action_init() with exts->actions as argument, where it is dereferenced. Example trace: BUG: kernel NULL pointer dereference, address: 0000000000000000 CPU: 114 PID: 16151 Comm: handler114 Kdump: loaded Not tainted 5.14.0-503.16.1.el9_5.x86_64 #1 RIP: 0010:tcf_action_init+0x1f8/0x2c0 Call Trace: tcf_action_init+0x1f8/0x2c0 tcf_exts_validate_ex+0x175/0x190 fl_change+0x537/0x1120 [cls_flower] Fixes: 80cd22c35c90 ("net/sched: cls_api: Support hardware miss to tc action") Signed-off-by: Pierre Riteau Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250213223610.320278-1-pierre@stackhpc.com Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8e47e5355be61..4f648af8cfaaf 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -97,7 +97,7 @@ tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base, n, xa_limit_32b, &next, GFP_KERNEL); - if (err) + if (err < 0) goto err_xa_alloc; exts->miss_cookie_node = n; -- GitLab From ef24989a62eefa6293a6c1c59dbb8b7e646e76dd Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 13 Feb 2025 17:44:43 +0100 Subject: [PATCH 0718/1585] drm/msm/a6xx: Only print the GMU firmware version once We only fetch it once from userland, so let's also only notify the user once and not on every runtime resume. As you can notice by the tags chain, more than one user found this annoying. Reported-by: Jens Glathe Suggested-by: Abel Vesa Suggested-by: Rob Clark Signed-off-by: Konrad Dybcio Reviewed-by: Neil Armstrong Patchwork: https://patchwork.freedesktop.org/patch/637062/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 65d38b25c0707..699b0dd34b18f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -813,10 +813,10 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) } ver = gmu_read(gmu, REG_A6XX_GMU_CORE_FW_VERSION); - DRM_INFO("Loaded GMU firmware v%u.%u.%u\n", - FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver), - FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver), - FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver)); + DRM_INFO_ONCE("Loaded GMU firmware v%u.%u.%u\n", + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver)); return 0; } -- GitLab From 669c285620231786fffe9d87ab432e08a6ed922b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 13 Jan 2025 07:48:41 -0800 Subject: [PATCH 0719/1585] drm/msm: Avoid rounding up to one jiffy If userspace is trying to achieve a timeout of zero, let 'em have it. Only round up if the timeout is greater than zero. Fixes: 4969bccd5f4e ("drm/msm: Avoid rounding down to zero jiffies") Signed-off-by: Rob Clark Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/632264/ --- drivers/gpu/drm/msm/msm_drv.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index fee31680a6d54..a650778552017 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -537,15 +537,12 @@ static inline int align_pitch(int width, int bpp) static inline unsigned long timeout_to_jiffies(const ktime_t *timeout) { ktime_t now = ktime_get(); - s64 remaining_jiffies; - if (ktime_compare(*timeout, now) < 0) { - remaining_jiffies = 0; - } else { - ktime_t rem = ktime_sub(*timeout, now); - remaining_jiffies = ktime_divns(rem, NSEC_PER_SEC / HZ); - } + if (ktime_compare(*timeout, now) <= 0) + return 0; + ktime_t rem = ktime_sub(*timeout, now); + s64 remaining_jiffies = ktime_divns(rem, NSEC_PER_SEC / HZ); return clamp(remaining_jiffies, 1LL, (s64)INT_MAX); } -- GitLab From d440148418f4816b4973ec6723bf63821793a0a7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 15 Feb 2025 09:28:55 -0800 Subject: [PATCH 0720/1585] tegra210-adma: fix 32-bit x86 build The Tegra210 Audio DMA controller driver did a plain divide: page_no = (res_page->start - res_base->start) / cdata->ch_base_offset; which causes problems on 32-bit x86 configurations that have 64-bit resource sizes: x86_64-linux-ld: drivers/dma/tegra210-adma.o: in function `tegra_adma_probe': tegra210-adma.c:(.text+0x1322): undefined reference to `__udivdi3' because gcc doesn't generate the trivial code for a 64-by-32 divide, turning it into a function call to do a full 64-by-64 divide. And the kernel intentionally doesn't provide that helper function, because 99% of the time all you want is the narrower version. Of course, tegra210 is a 64-bit architecture and the 32-bit x86 build is purely for build testing, so this really is just about build coverage failure. But build coverage is good. Side note: div_u64() would be suboptimal if you actually have a 32-bit resource_t, so our "helper" for divides are admittedly making it harder than it should be to generate good code for all the possible cases. At some point, I'll consider 32-bit x86 so entirely legacy that I can't find it in myself to care any more, and we'll just add the __udivdi3 library function. But for now, the right thing to do is to use "div_u64()" to show that you know that you are doing the simpler divide with a 32-bit number. And the build error enforces that. While fixing the build issue, also check for division-by-zero, and for overflow. Which hopefully cannot happen on real production hardware, but the value of 'ch_base_offset' can definitely be zero in other places. Reported-by: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/dma/tegra210-adma.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 6896da8ac7ef6..5c6a5b3589873 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -887,7 +887,7 @@ static int tegra_adma_probe(struct platform_device *pdev) const struct tegra_adma_chip_data *cdata; struct tegra_adma *tdma; struct resource *res_page, *res_base; - int ret, i, page_no; + int ret, i; cdata = of_device_get_match_data(&pdev->dev); if (!cdata) { @@ -914,9 +914,20 @@ static int tegra_adma_probe(struct platform_device *pdev) res_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "global"); if (res_base) { - page_no = (res_page->start - res_base->start) / cdata->ch_base_offset; - if (page_no <= 0) + resource_size_t page_offset, page_no; + unsigned int ch_base_offset; + + if (res_page->start < res_base->start) + return -EINVAL; + page_offset = res_page->start - res_base->start; + ch_base_offset = cdata->ch_base_offset; + if (!ch_base_offset) return -EINVAL; + + page_no = div_u64(page_offset, ch_base_offset); + if (!page_no || page_no > INT_MAX) + return -EINVAL; + tdma->ch_page_no = page_no - 1; tdma->base_addr = devm_ioremap_resource(&pdev->dev, res_base); if (IS_ERR(tdma->base_addr)) -- GitLab From 1b71c2fb04e7a713abc6edde4a412416ff3158f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 13 Feb 2025 15:55:17 +0100 Subject: [PATCH 0721/1585] kbuild: userprogs: fix bitsize and target detection on clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scripts/Makefile.clang was changed in the linked commit to move --target from KBUILD_CFLAGS to KBUILD_CPPFLAGS, as that generally has a broader scope. However that variable is not inspected by the userprogs logic, breaking cross compilation on clang. Use both variables to detect bitsize and target arguments for userprogs. Fixes: feb843a469fb ("kbuild: add $(CLANG_FLAGS) to KBUILD_CPPFLAGS") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 52207bcb1a9df..272db408be5ca 100644 --- a/Makefile +++ b/Makefile @@ -1120,8 +1120,8 @@ LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif # Align the bit size of userspace programs with the kernel -KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) -KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) +KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) +KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) # make the checker run with the right architecture CHECKFLAGS += --arch=$(ARCH) -- GitLab From b28fb1f2ef45eeef1cd2c23149b50d184d545a3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 13 Feb 2025 17:04:29 +0100 Subject: [PATCH 0722/1585] modpost: Fix a few typos in a comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Namely: s/becasue/because/ and s/wiht/with/ plus an added article. Signed-off-by: Uwe Kleine-König Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 36b28987a2f07..c35d22607978b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -190,8 +190,8 @@ static struct module *new_module(const char *name, size_t namelen) /* * Set mod->is_gpl_compatible to true by default. If MODULE_LICENSE() - * is missing, do not check the use for EXPORT_SYMBOL_GPL() becasue - * modpost will exit wiht error anyway. + * is missing, do not check the use for EXPORT_SYMBOL_GPL() because + * modpost will exit with an error anyway. */ mod->is_gpl_compatible = true; -- GitLab From 129fe718819cc5e24ea2f489db9ccd4371f0c6f6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Feb 2025 11:55:47 -0500 Subject: [PATCH 0723/1585] tracing: Do not allow mmap() of persistent ring buffer When trying to mmap a trace instance buffer that is attached to reserve_mem, it would crash: BUG: unable to handle page fault for address: ffffe97bd00025c8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 2862f3067 P4D 2862f3067 PUD 0 Oops: Oops: 0000 [#1] PREEMPT_RT SMP PTI CPU: 4 UID: 0 PID: 981 Comm: mmap-rb Not tainted 6.14.0-rc2-test-00003-g7f1a5e3fbf9e-dirty #233 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:validate_page_before_insert+0x5/0xb0 Code: e2 01 89 d0 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 <48> 8b 46 08 a8 01 75 67 66 90 48 89 f0 8b 50 34 85 d2 74 76 48 89 RSP: 0018:ffffb148c2f3f968 EFLAGS: 00010246 RAX: ffff9fa5d3322000 RBX: ffff9fa5ccff9c08 RCX: 00000000b879ed29 RDX: ffffe97bd00025c0 RSI: ffffe97bd00025c0 RDI: ffff9fa5ccff9c08 RBP: ffffb148c2f3f9f0 R08: 0000000000000004 R09: 0000000000000004 R10: 0000000000000000 R11: 0000000000000200 R12: 0000000000000000 R13: 00007f16a18d5000 R14: ffff9fa5c48db6a8 R15: 0000000000000000 FS: 00007f16a1b54740(0000) GS:ffff9fa73df00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffe97bd00025c8 CR3: 00000001048c6006 CR4: 0000000000172ef0 Call Trace: ? __die_body.cold+0x19/0x1f ? __die+0x2e/0x40 ? page_fault_oops+0x157/0x2b0 ? search_module_extables+0x53/0x80 ? validate_page_before_insert+0x5/0xb0 ? kernelmode_fixup_or_oops.isra.0+0x5f/0x70 ? __bad_area_nosemaphore+0x16e/0x1b0 ? bad_area_nosemaphore+0x16/0x20 ? do_kern_addr_fault+0x77/0x90 ? exc_page_fault+0x22b/0x230 ? asm_exc_page_fault+0x2b/0x30 ? validate_page_before_insert+0x5/0xb0 ? vm_insert_pages+0x151/0x400 __rb_map_vma+0x21f/0x3f0 ring_buffer_map+0x21b/0x2f0 tracing_buffers_mmap+0x70/0xd0 __mmap_region+0x6f0/0xbd0 mmap_region+0x7f/0x130 do_mmap+0x475/0x610 vm_mmap_pgoff+0xf2/0x1d0 ksys_mmap_pgoff+0x166/0x200 __x64_sys_mmap+0x37/0x50 x64_sys_call+0x1670/0x1d70 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f The reason was that the code that maps the ring buffer pages to user space has: page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]); And uses that in: vm_insert_pages(vma, vma->vm_start, pages, &nr_pages); But virt_to_page() does not work with vmap()'d memory which is what the persistent ring buffer has. It is rather trivial to allow this, but for now just disable mmap() of instances that have their ring buffer from the reserve_mem option. If an mmap() is performed on a persistent buffer it will return -ENODEV just like it would if the .mmap field wasn't defined in the file_operations structure. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Link: https://lore.kernel.org/20250214115547.0d7287d3@gandalf.local.home Fixes: 9b7bdf6f6ece6 ("tracing: Have trace_printk not use binary prints if boot buffer") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 25ff37aab00f1..0e6d517e74e0f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8279,6 +8279,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) struct trace_iterator *iter = &info->iter; int ret = 0; + /* Currently the boot mapped buffer is not supported for mmap */ + if (iter->tr->flags & TRACE_ARRAY_FL_BOOT) + return -ENODEV; + ret = get_snapshot_map(iter->tr); if (ret) return ret; -- GitLab From 97937834ae876f29565415ab15f1284666dc6be3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Feb 2025 12:35:12 -0500 Subject: [PATCH 0724/1585] ring-buffer: Update pages_touched to reflect persistent buffer content The pages_touched field represents the number of subbuffers in the ring buffer that have content that can be read. This is used in accounting of "dirty_pages" and "buffer_percent" to allow the user to wait for the buffer to be filled to a certain amount before it reads the buffer in blocking mode. The persistent buffer never updated this value so it was set to zero, and this accounting would take it as it had no content. This would cause user space to wait for content even though there's enough content in the ring buffer that satisfies the buffer_percent. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Link: https://lore.kernel.org/20250214123512.0631436e@gandalf.local.home Fixes: 5f3b6e839f3ce ("ring-buffer: Validate boot range memory events") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0419d41a20604..bb6089c2951e5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1850,6 +1850,11 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->cpu); goto invalid; } + + /* If the buffer has content, update pages_touched */ + if (ret) + local_inc(&cpu_buffer->pages_touched); + entries += ret; entry_bytes += local_read(&head_page->page->commit); local_set(&cpu_buffer->head_page->entries, ret); -- GitLab From 2f69e54584475ac85ea0e3407c9198ac7c6ea8ad Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 14 Dec 2024 00:14:17 +0200 Subject: [PATCH 0725/1585] drm/msm/dpu: skip watchdog timer programming through TOP on >= SM8450 The SM8450 and later chips have DPU_MDP_PERIPH_0_REMOVED feature bit set, which means that those platforms have dropped some of the registers, including the WD TIMER-related ones. Stop providing the callback to program WD timer on those platforms. Fixes: 100d7ef6995d ("drm/msm/dpu: add support for SM8450") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/628874/ Link: https://lore.kernel.org/r/20241214-dpu-drop-features-v1-1-988f0662cb7e@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c index ad19330de61ab..562a3f4c5238a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c @@ -272,7 +272,7 @@ static void _setup_mdp_ops(struct dpu_hw_mdp_ops *ops, if (cap & BIT(DPU_MDP_VSYNC_SEL)) ops->setup_vsync_source = dpu_hw_setup_vsync_sel; - else + else if (!(cap & BIT(DPU_MDP_PERIPH_0_REMOVED))) ops->setup_vsync_source = dpu_hw_setup_wd_timer; ops->get_safe_status = dpu_hw_get_safe_status; -- GitLab From af0a4a2090cce732c70ad6c5f4145b43f39e3fe9 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 14 Dec 2024 00:14:18 +0200 Subject: [PATCH 0726/1585] drm/msm/dpu: enable DPU_WB_INPUT_CTRL for DPU 5.x Several DPU 5.x platforms are supposed to be using DPU_WB_INPUT_CTRL, to bind WB and PINGPONG blocks, but they do not. Change those platforms to use WB_SM8250_MASK, which includes that bit. Fixes: 1f5bcc4316b3 ("drm/msm/dpu: enable writeback on SC8108X") Fixes: ab2b03d73a66 ("drm/msm/dpu: enable writeback on SM6125") Fixes: 47cebb740a83 ("drm/msm/dpu: enable writeback on SM8150") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/628876/ Link: https://lore.kernel.org/r/20241214-dpu-drop-features-v1-2-988f0662cb7e@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h | 2 +- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h | 2 +- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 421afacb72480..36cc9dbc00b5c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -297,7 +297,7 @@ static const struct dpu_wb_cfg sm8150_wb[] = { { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, - .features = WB_SDM845_MASK, + .features = WB_SM8250_MASK, .format_list = wb2_formats_rgb, .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index 641023b102bf5..e8eacdb47967a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -304,7 +304,7 @@ static const struct dpu_wb_cfg sc8180x_wb[] = { { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, - .features = WB_SDM845_MASK, + .features = WB_SM8250_MASK, .format_list = wb2_formats_rgb, .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h index d039b96beb97c..76f60a2df7a89 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h @@ -144,7 +144,7 @@ static const struct dpu_wb_cfg sm6125_wb[] = { { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, - .features = WB_SDM845_MASK, + .features = WB_SM8250_MASK, .format_list = wb2_formats_rgb, .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, -- GitLab From 24b50b7340ab7e7b004ee6db43d625caa68498b0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 17 Dec 2024 14:35:40 +0200 Subject: [PATCH 0727/1585] drm/msm/dpu: correct LM pairing for SM6150 The SM6150 platform doesn't have 3DMux (MERGE_3D) block, so it can not split the screen between two LMs. Drop lm_pair fields as they don't make sense for this platform. Suggested-by: Abhinav Kumar Fixes: cb2f9144693b ("drm/msm/dpu: Add SM6150 support") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/629377/ Link: https://lore.kernel.org/r/20241217-dpu-fix-sm6150-v2-1-9acc8f5addf3@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h index 621a2140f675f..d761ed705bac3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h @@ -116,14 +116,12 @@ static const struct dpu_lm_cfg sm6150_lm[] = { .sblk = &sdm845_lm_sblk, .pingpong = PINGPONG_0, .dspp = DSPP_0, - .lm_pair = LM_1, }, { .name = "lm_1", .id = LM_1, .base = 0x45000, .len = 0x320, .features = MIXER_QCM2290_MASK, .sblk = &sdm845_lm_sblk, .pingpong = PINGPONG_1, - .lm_pair = LM_0, }, { .name = "lm_2", .id = LM_2, .base = 0x46000, .len = 0x320, -- GitLab From df9cf852ca3099feb8fed781bdd5d3863af001c8 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Thu, 6 Feb 2025 11:46:36 -0800 Subject: [PATCH 0728/1585] drm/msm/dp: account for widebus and yuv420 during mode validation Widebus allows the DP controller to operate in 2 pixel per clock mode. The mode validation logic validates the mode->clock against the max DP pixel clock. However the max DP pixel clock limit assumes widebus is already enabled. Adjust the mode validation logic to only compare the adjusted pixel clock which accounts for widebus against the max DP pixel clock. Also fix the mode validation logic for YUV420 modes as in that case as well, only half the pixel clock is needed. Cc: stable@vger.kernel.org Fixes: 757a2f36ab09 ("drm/msm/dp: enable widebus feature for display port") Fixes: 6db6e5606576 ("drm/msm/dp: change clock related programming for YUV420 over DP") Reviewed-by: Dmitry Baryshkov Tested-by: Dale Whinham Patchwork: https://patchwork.freedesktop.org/patch/635789/ Link: https://lore.kernel.org/r/20250206-dp-widebus-fix-v2-1-cb89a0313286@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_display.c | 11 ++++++----- drivers/gpu/drm/msm/dp/dp_drm.c | 5 ++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index d852e7a853348..a129e26c3ddb3 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -930,16 +930,17 @@ enum drm_mode_status msm_dp_bridge_mode_valid(struct drm_bridge *bridge, return -EINVAL; } - if (mode->clock > DP_MAX_PIXEL_CLK_KHZ) - return MODE_CLOCK_HIGH; - msm_dp_display = container_of(dp, struct msm_dp_display_private, msm_dp_display); link_info = &msm_dp_display->panel->link_info; - if (drm_mode_is_420_only(&dp->connector->display_info, mode) && - msm_dp_display->panel->vsc_sdp_supported) + if ((drm_mode_is_420_only(&dp->connector->display_info, mode) && + msm_dp_display->panel->vsc_sdp_supported) || + msm_dp_wide_bus_available(dp)) mode_pclk_khz /= 2; + if (mode_pclk_khz > DP_MAX_PIXEL_CLK_KHZ) + return MODE_CLOCK_HIGH; + mode_bpp = dp->connector->display_info.bpc * num_components; if (!mode_bpp) mode_bpp = default_bpp; diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index d3e241ea69416..16b7913d1eefa 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -257,7 +257,10 @@ static enum drm_mode_status msm_edp_bridge_mode_valid(struct drm_bridge *bridge, return -EINVAL; } - if (mode->clock > DP_MAX_PIXEL_CLK_KHZ) + if (msm_dp_wide_bus_available(dp)) + mode_pclk_khz /= 2; + + if (mode_pclk_khz > DP_MAX_PIXEL_CLK_KHZ) return MODE_CLOCK_HIGH; /* -- GitLab From 978ca99d6bd87b84ff7788eea4d2c328a70530f6 Mon Sep 17 00:00:00 2001 From: Ethan Carter Edwards Date: Sun, 9 Feb 2025 22:51:54 -0500 Subject: [PATCH 0729/1585] drm/msm/dpu: Fix uninitialized variable There is a possibility for an uninitialized *ret* variable to be returned in some code paths. Fix this by initializing *ret* to 0. Addresses-Coverity-ID: 1642546 ("Uninitialized scalar variable") Fixes: 774bcfb73176 ("drm/msm/dpu: add support for virtual planes") Signed-off-by: Ethan Carter Edwards Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/636201/ Link: https://lore.kernel.org/r/20250209-dpu-v2-1-114dfd4ebefd@ethancedwards.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 098abc2c0003c..af3e541f60c30 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1164,7 +1164,6 @@ int dpu_assign_plane_resources(struct dpu_global_state *global_state, unsigned int num_planes) { unsigned int i; - int ret; for (i = 0; i < num_planes; i++) { struct drm_plane_state *plane_state = states[i]; @@ -1173,13 +1172,13 @@ int dpu_assign_plane_resources(struct dpu_global_state *global_state, !plane_state->visible) continue; - ret = dpu_plane_virtual_assign_resources(crtc, global_state, + int ret = dpu_plane_virtual_assign_resources(crtc, global_state, state, plane_state); if (ret) - break; + return ret; } - return ret; + return 0; } static void dpu_plane_flush_csc(struct dpu_plane *pdpu, struct dpu_sw_pipe *pipe) -- GitLab From f063ac6b55df03ed25996bdc84d9e1c50147cfa1 Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Tue, 11 Feb 2025 19:59:19 -0800 Subject: [PATCH 0730/1585] drm/msm/dpu: Disable dither in phys encoder cleanup Disable pingpong dither in dpu_encoder_helper_phys_cleanup(). This avoids the issue where an encoder unknowingly uses dither after reserving a pingpong block that was previously bound to an encoder that had enabled dither. Cc: stable@vger.kernel.org Reported-by: Dmitry Baryshkov Closes: https://lore.kernel.org/all/jr7zbj5w7iq4apg3gofuvcwf4r2swzqjk7sshwcdjll4mn6ctt@l2n3qfpujg3q/ Signed-off-by: Jessica Zhang Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Fixes: 3c128638a07d ("drm/msm/dpu: add support for dither block in display") Patchwork: https://patchwork.freedesktop.org/patch/636517/ Link: https://lore.kernel.org/r/20250211-dither-disable-v1-1-ac2cb455f6b9@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 5172ab4dea995..48e6e8d74c855 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2281,6 +2281,9 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) } } + if (phys_enc->hw_pp && phys_enc->hw_pp->ops.setup_dither) + phys_enc->hw_pp->ops.setup_dither(phys_enc->hw_pp, NULL); + /* reset the merge 3D HW block */ if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) { phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, -- GitLab From 144429831f447223253a0e4376489f84ff37d1a7 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Tue, 11 Feb 2025 00:19:32 +0100 Subject: [PATCH 0731/1585] drm/msm/dpu: Don't leak bits_per_component into random DSC_ENC fields What used to be the input_10_bits boolean - feeding into the lowest bit of DSC_ENC - on MSM downstream turned into an accidental OR with the full bits_per_component number when it was ported to the upstream kernel. On typical bpc=8 setups we don't notice this because line_buf_depth is always an odd value (it contains bpc+1) and will also set the 4th bit after left-shifting by 3 (hence this |= bits_per_component is a no-op). Now that guards are being removed to allow more bits_per_component values besides 8 (possible since commit 49fd30a7153b ("drm/msm/dsi: use DRM DSC helpers for DSC setup")), a bpc of 10 will instead clash with the 5th bit which is convert_rgb. This is "fortunately" also always set to true by MSM's dsi_populate_dsc_params() already, but once a bpc of 12 starts being used it'll write into simple_422 which is normally false. To solve all these overlaps, simply replicate downstream code and only set this lowest bit if bits_per_component is equal to 10. It is unclear why DSC requires this only for bpc=10 but not bpc=12, and also notice that this lowest bit wasn't set previously despite having a panel and patch on the list using it without any mentioned issues. Fixes: c110cfd1753e ("drm/msm/disp/dpu1: Add support for DSC") Signed-off-by: Marijn Suijten Reviewed-by: Abhinav Kumar Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/636311/ Link: https://lore.kernel.org/r/20250211-dsc-10-bit-v1-1-1c85a9430d9a@somainline.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c index 657200401f576..cec6d4e8baec4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c @@ -52,6 +52,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, u32 slice_last_group_size; u32 det_thresh_flatness; bool is_cmd_mode = !(mode & DSC_MODE_VIDEO); + bool input_10_bits = dsc->bits_per_component == 10; DPU_REG_WRITE(c, DSC_COMMON_MODE, mode); @@ -68,7 +69,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, data |= (dsc->line_buf_depth << 3); data |= (dsc->simple_422 << 2); data |= (dsc->convert_rgb << 1); - data |= dsc->bits_per_component; + data |= input_10_bits; DPU_REG_WRITE(c, DSC_ENC, data); -- GitLab From 5e192eefebaab5bdcf716add8910d7f8a2e30e3c Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Wed, 8 Jan 2025 14:40:48 -0800 Subject: [PATCH 0732/1585] drm/msm/dpu: Drop extraneous return in dpu_crtc_reassign_planes() Drop extra return at the end of dpu_crtc_reassign_planes() Fixes: 774bcfb73176 ("drm/msm/dpu: add support for virtual planes") Signed-off-by: Jessica Zhang Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/631565/ Link: https://lore.kernel.org/r/20250108-virtual-planes-fixes-v1-2-420cb36df94a@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 7191b1a6d41b3..e5dcd41a361f4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -1228,8 +1228,6 @@ static int dpu_crtc_reassign_planes(struct drm_crtc *crtc, struct drm_crtc_state done: kfree(states); return ret; - - return 0; } static int dpu_crtc_atomic_check(struct drm_crtc *crtc, -- GitLab From 588257897058a0b1aa47912db4fe93c6ff5e3887 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Feb 2025 16:08:41 +0100 Subject: [PATCH 0733/1585] drm/msm/dsi/phy: Protect PHY_CMN_CLK_CFG0 updated from driver side PHY_CMN_CLK_CFG0 register is updated by the PHY driver and by two divider clocks from Common Clock Framework: devm_clk_hw_register_divider_parent_hw(). Concurrent access by the clocks side is protected with spinlock, however driver's side in restoring state is not. Restoring state is called from msm_dsi_phy_enable(), so there could be a path leading to concurrent and conflicting updates with clock framework. Add missing lock usage on the PHY driver side, encapsulated in its own function so the code will be still readable. While shuffling the code, define and use PHY_CMN_CLK_CFG0 bitfields to make the code more readable and obvious. Fixes: 1ef7c99d145c ("drm/msm/dsi: add support for 7nm DSI PHY/PLL") Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Signed-off-by: Krzysztof Kozlowski Patchwork: https://patchwork.freedesktop.org/patch/637376/ Link: https://lore.kernel.org/r/20250214-drm-msm-phy-pll-cfg-reg-v3-1-0943b850722c@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 14 ++++++++++++-- .../gpu/drm/msm/registers/display/dsi_phy_7nm.xml | 5 ++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 031446c87daec..25ca649de717e 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -372,6 +372,15 @@ static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll) ndelay(250); } +static void dsi_pll_cmn_clk_cfg0_write(struct dsi_pll_7nm *pll, u32 val) +{ + unsigned long flags; + + spin_lock_irqsave(&pll->postdiv_lock, flags); + writel(val, pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG0); + spin_unlock_irqrestore(&pll->postdiv_lock, flags); +} + static void dsi_pll_disable_global_clk(struct dsi_pll_7nm *pll) { u32 data; @@ -574,8 +583,9 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) val |= cached->pll_out_div; writel(val, pll_7nm->phy->pll_base + REG_DSI_7nm_PHY_PLL_PLL_OUTDIV_RATE); - writel(cached->bit_clk_div | (cached->pix_clk_div << 4), - phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG0); + dsi_pll_cmn_clk_cfg0_write(pll_7nm, + DSI_7nm_PHY_CMN_CLK_CFG0_DIV_CTRL_3_0(cached->bit_clk_div) | + DSI_7nm_PHY_CMN_CLK_CFG0_DIV_CTRL_7_4(cached->pix_clk_div)); val = readl(phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); val &= ~0x3; diff --git a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml index d54b72f924493..e0bf6e016b4ce 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml @@ -9,7 +9,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> - + + + + -- GitLab From 5a97bc924ae0804b8dbf627e357acaa5ef761483 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Feb 2025 16:08:42 +0100 Subject: [PATCH 0734/1585] drm/msm/dsi/phy: Protect PHY_CMN_CLK_CFG1 against clock driver PHY_CMN_CLK_CFG1 register is updated by the PHY driver and by a mux clock from Common Clock Framework: devm_clk_hw_register_mux_parent_hws(). There could be a path leading to concurrent and conflicting updates between PHY driver and clock framework, e.g. changing the mux and enabling PLL clocks. Add dedicated spinlock to be sure all PHY_CMN_CLK_CFG1 updates are synchronized. While shuffling the code, define and use PHY_CMN_CLK_CFG1 bitfields to make the code more readable and obvious. Fixes: 1ef7c99d145c ("drm/msm/dsi: add support for 7nm DSI PHY/PLL") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/637378/ Link: https://lore.kernel.org/r/20250214-drm-msm-phy-pll-cfg-reg-v3-2-0943b850722c@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 35 ++++++++++++------- .../drm/msm/registers/display/dsi_phy_7nm.xml | 5 ++- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 25ca649de717e..388017db45d80 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -83,6 +83,9 @@ struct dsi_pll_7nm { /* protects REG_DSI_7nm_PHY_CMN_CLK_CFG0 register */ spinlock_t postdiv_lock; + /* protects REG_DSI_7nm_PHY_CMN_CLK_CFG1 register */ + spinlock_t pclk_mux_lock; + struct pll_7nm_cached_state cached_state; struct dsi_pll_7nm *slave; @@ -381,22 +384,32 @@ static void dsi_pll_cmn_clk_cfg0_write(struct dsi_pll_7nm *pll, u32 val) spin_unlock_irqrestore(&pll->postdiv_lock, flags); } -static void dsi_pll_disable_global_clk(struct dsi_pll_7nm *pll) +static void dsi_pll_cmn_clk_cfg1_update(struct dsi_pll_7nm *pll, u32 mask, + u32 val) { + unsigned long flags; u32 data; + spin_lock_irqsave(&pll->pclk_mux_lock, flags); data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); - writel(data & ~BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + data &= ~mask; + data |= val & mask; + + writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + spin_unlock_irqrestore(&pll->pclk_mux_lock, flags); +} + +static void dsi_pll_disable_global_clk(struct dsi_pll_7nm *pll) +{ + dsi_pll_cmn_clk_cfg1_update(pll, DSI_7nm_PHY_CMN_CLK_CFG1_CLK_EN, 0); } static void dsi_pll_enable_global_clk(struct dsi_pll_7nm *pll) { - u32 data; + u32 cfg_1 = DSI_7nm_PHY_CMN_CLK_CFG1_CLK_EN | DSI_7nm_PHY_CMN_CLK_CFG1_CLK_EN_SEL; writel(0x04, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_3); - - data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); - writel(data | BIT(5) | BIT(4), pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_pll_cmn_clk_cfg1_update(pll, cfg_1, cfg_1); } static void dsi_pll_phy_dig_reset(struct dsi_pll_7nm *pll) @@ -574,7 +587,6 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) { struct dsi_pll_7nm *pll_7nm = to_pll_7nm(phy->vco_hw); struct pll_7nm_cached_state *cached = &pll_7nm->cached_state; - void __iomem *phy_base = pll_7nm->phy->base; u32 val; int ret; @@ -586,11 +598,7 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) dsi_pll_cmn_clk_cfg0_write(pll_7nm, DSI_7nm_PHY_CMN_CLK_CFG0_DIV_CTRL_3_0(cached->bit_clk_div) | DSI_7nm_PHY_CMN_CLK_CFG0_DIV_CTRL_7_4(cached->pix_clk_div)); - - val = readl(phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); - val &= ~0x3; - val |= cached->pll_mux; - writel(val, phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_pll_cmn_clk_cfg1_update(pll_7nm, 0x3, cached->pll_mux); ret = dsi_pll_7nm_vco_set_rate(phy->vco_hw, pll_7nm->vco_current_rate, @@ -743,7 +751,7 @@ static int pll_7nm_register(struct dsi_pll_7nm *pll_7nm, struct clk_hw **provide pll_by_2_bit, }), 2, 0, pll_7nm->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1, - 0, 1, 0, NULL); + 0, 1, 0, &pll_7nm->pclk_mux_lock); if (IS_ERR(hw)) { ret = PTR_ERR(hw); goto fail; @@ -788,6 +796,7 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) pll_7nm_list[phy->id] = pll_7nm; spin_lock_init(&pll_7nm->postdiv_lock); + spin_lock_init(&pll_7nm->pclk_mux_lock); pll_7nm->phy = phy; diff --git a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml index e0bf6e016b4ce..cfaf78c028b13 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml @@ -13,7 +13,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> - + + + + -- GitLab From 73f69c6be2a9f22c31c775ec03c6c286bfe12cfa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Feb 2025 16:08:43 +0100 Subject: [PATCH 0735/1585] drm/msm/dsi/phy: Do not overwite PHY_CMN_CLK_CFG1 when choosing bitclk source PHY_CMN_CLK_CFG1 register has four fields being used in the driver: DSI clock divider, source of bitclk and two for enabling the DSI PHY PLL clocks. dsi_7nm_set_usecase() sets only the source of bitclk, so should leave all other bits untouched. Use newly introduced dsi_pll_cmn_clk_cfg1_update() to update respective bits without overwriting the rest. While shuffling the code, define and use PHY_CMN_CLK_CFG1 bitfields to make the code more readable and obvious. Fixes: 1ef7c99d145c ("drm/msm/dsi: add support for 7nm DSI PHY/PLL") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/637380/ Link: https://lore.kernel.org/r/20250214-drm-msm-phy-pll-cfg-reg-v3-3-0943b850722c@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 4 ++-- drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 388017db45d80..798168180c1ab 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -617,7 +617,6 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) static int dsi_7nm_set_usecase(struct msm_dsi_phy *phy) { struct dsi_pll_7nm *pll_7nm = to_pll_7nm(phy->vco_hw); - void __iomem *base = phy->base; u32 data = 0x0; /* internal PLL */ DBG("DSI PLL%d", pll_7nm->phy->id); @@ -636,7 +635,8 @@ static int dsi_7nm_set_usecase(struct msm_dsi_phy *phy) } /* set PLL src */ - writel(data << 2, base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_pll_cmn_clk_cfg1_update(pll_7nm, DSI_7nm_PHY_CMN_CLK_CFG1_BITCLK_SEL__MASK, + DSI_7nm_PHY_CMN_CLK_CFG1_BITCLK_SEL(data)); return 0; } diff --git a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml index cfaf78c028b13..35f7f40e405b7 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml @@ -16,6 +16,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> + -- GitLab From 0ad2507d5d93f39619fc42372c347d6006b64319 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Feb 2025 14:02:44 -0800 Subject: [PATCH 0736/1585] Linux 6.14-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 272db408be5ca..96407c1d6be16 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* -- GitLab From 654292a0b264e9b8c51b98394146218a21612aa1 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sun, 16 Feb 2025 18:02:47 -0300 Subject: [PATCH 0737/1585] smb: client: fix chmod(2) regression with ATTR_READONLY When the user sets a file or directory as read-only (e.g. ~S_IWUGO), the client will set the ATTR_READONLY attribute by sending an SMB2_SET_INFO request to the server in cifs_setattr_{,nounix}(), but cifsInodeInfo::cifsAttrs will be left unchanged as the client will only update the new file attributes in the next call to {smb311_posix,cifs}_get_inode_info() with the new metadata filled in @data parameter. Commit a18280e7fdea ("smb: cilent: set reparse mount points as automounts") mistakenly removed the @data NULL check when calling is_inode_cache_good(), which broke the above case as the new ATTR_READONLY attribute would end up not being updated on files with a read lease. Fix this by updating the inode whenever we have cached metadata in @data parameter. Reported-by: Horst Reiterer Closes: https://lore.kernel.org/r/85a16504e09147a195ac0aac1c801280@fabasoft.com Fixes: a18280e7fdea ("smb: cilent: set reparse mount points as automounts") Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 9cc31cf6ebd07..3261190e6f903 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1408,7 +1408,7 @@ int cifs_get_inode_info(struct inode **inode, struct cifs_fattr fattr = {}; int rc; - if (is_inode_cache_good(*inode)) { + if (!data && is_inode_cache_good(*inode)) { cifs_dbg(FYI, "No need to revalidate cached inode sizes\n"); return 0; } @@ -1507,7 +1507,7 @@ int smb311_posix_get_inode_info(struct inode **inode, struct cifs_fattr fattr = {}; int rc; - if (is_inode_cache_good(*inode)) { + if (!data && is_inode_cache_good(*inode)) { cifs_dbg(FYI, "No need to revalidate cached inode sizes\n"); return 0; } -- GitLab From 579cd64b9df8a60284ec3422be919c362de40e41 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sat, 8 Feb 2025 00:54:35 +0000 Subject: [PATCH 0738/1585] ASoC: tas2770: Fix volume scale The scale starts at -100dB, not -128dB. Signed-off-by: Hector Martin Signed-off-by: Mark Brown Link: https://patch.msgid.link/20250208-asoc-tas2770-v1-1-cf50ff1d59a3@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/tas2770.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 9f93b230652a5..863c3f672ba98 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -506,7 +506,7 @@ static int tas2770_codec_probe(struct snd_soc_component *component) } static DECLARE_TLV_DB_SCALE(tas2770_digital_tlv, 1100, 50, 0); -static DECLARE_TLV_DB_SCALE(tas2770_playback_volume, -12750, 50, 0); +static DECLARE_TLV_DB_SCALE(tas2770_playback_volume, -10050, 50, 0); static const struct snd_kcontrol_new tas2770_snd_controls[] = { SOC_SINGLE_TLV("Speaker Playback Volume", TAS2770_PLAY_CFG_REG2, -- GitLab From 99e6ea912340d6a262a60d5dd0c87c5e7b2d6ff2 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 7 Feb 2025 13:21:45 +0100 Subject: [PATCH 0739/1585] spi: atmel-quadspi: remove references to runtime PM on error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to call runtime PM put APIs on error path of `atmel_qspi_sama7g5_transfer()` as the caller (`atmel_qspi_exec_op()`) of it will take care of this if needed. Fixes: 5af42209a4d2 ("spi: atmel-quadspi: Add support for sama7g5 QSPI") Signed-off-by: Claudiu Beznea Signed-off-by: Durai Manickam KR Reported-by: Alexander Dahl Closes: https://lore.kernel.org/linux-spi/20250109-carat-festivity-5f088e1add3c@thorsis.com/ [ csokas.bence: Rebase and clarify msg, fix/add tags ] Signed-off-by: Bence Csókás Link: https://patch.msgid.link/20250207122145.162183-2-csokas.bence@prolan.hu Signed-off-by: Mark Brown --- drivers/spi/atmel-quadspi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index d8c9be64d006a..244ac01068629 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -930,11 +930,8 @@ static int atmel_qspi_sama7g5_transfer(struct spi_mem *mem, /* Release the chip-select. */ ret = atmel_qspi_reg_sync(aq); - if (ret) { - pm_runtime_mark_last_busy(&aq->pdev->dev); - pm_runtime_put_autosuspend(&aq->pdev->dev); + if (ret) return ret; - } atmel_qspi_write(QSPI_CR_LASTXFER, aq, QSPI_CR); return atmel_qspi_wait_for_completion(aq, QSPI_SR_CSRA); -- GitLab From 3d7a20f9ba7b09a35df4bdb5f0ddb2a0c8a4f39e Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 13 Feb 2025 08:03:41 +0000 Subject: [PATCH 0740/1585] MAINTAINERS: add tambarus as R for Samsung SPI I'm currently working on a Samsung SoC which includes SPI. I'd like to be Cc'ed to further contributions and help on reviewing them. Add me as reviewer. Signed-off-by: Tudor Ambarus Acked-by: Andi Shyti Reviewed-by: Sam Protsenko Link: https://patch.msgid.link/20250213-gs101-spi-r-v1-1-1e3ab8096873@linaro.org Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index efee40ea589f7..3a5d1d5891e74 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21075,6 +21075,7 @@ F: include/linux/clk/samsung.h SAMSUNG SPI DRIVERS M: Andi Shyti +R: Tudor Ambarus L: linux-spi@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained -- GitLab From 7103f0589ac220eac3d2b1e8411494b31b883d06 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 13:14:34 +0100 Subject: [PATCH 0741/1585] x86/microcode/AMD: Remove ugly linebreak in __verify_patch_section() signature No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-2-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index a5dac7f3c0a07..4a62625c311a6 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -246,8 +246,7 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size) * On success, @sh_psize returns the patch size according to the section header, * to the caller. */ -static bool -__verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize) +static bool __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize) { u32 p_type, p_size; const u32 *hdr; -- GitLab From 3ef0740d10b005a45e8ae5b4b7b5d37bfddf63c0 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 12:23:47 +0100 Subject: [PATCH 0742/1585] x86/microcode/AMD: Remove unused save_microcode_in_initrd_amd() declarations Commit a7939f016720 ("x86/microcode/amd: Cache builtin/initrd microcode early") renamed it to save_microcode_in_initrd() and made it static. Zap the forgotten declarations. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-3-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- arch/x86/kernel/cpu/microcode/internal.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 4a62625c311a6..f831c06029948 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -517,7 +517,7 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) * patch container file in initrd, traverse equivalent cpu table, look for a * matching microcode patch, and update, all in initrd memory in place. * When vmalloc() is available for use later -- on 64-bit during first AP load, - * and on 32-bit during save_microcode_in_initrd_amd() -- we can call + * and on 32-bit during save_microcode_in_initrd() -- we can call * load_microcode_amd() to save equivalent cpu table and microcode patches in * kernel heap memory. * diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index 21776c529fa97..5df621752fefa 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -100,14 +100,12 @@ extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family); void load_ucode_amd_ap(unsigned int family); -int save_microcode_in_initrd_amd(unsigned int family); void reload_ucode_amd(unsigned int cpu); struct microcode_ops *init_amd_microcode(void); void exit_amd_microcode(void); #else /* CONFIG_CPU_SUP_AMD */ static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { } static inline void load_ucode_amd_ap(unsigned int family) { } -static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } static inline void reload_ucode_amd(unsigned int cpu) { } static inline struct microcode_ops *init_amd_microcode(void) { return NULL; } static inline void exit_amd_microcode(void) { } -- GitLab From dc15675074dcfd79a2f10a6e39f96b0244961a01 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 12:46:45 +0100 Subject: [PATCH 0743/1585] x86/microcode/AMD: Merge early_apply_microcode() into its single callsite No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-4-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 60 +++++++++++++---------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index f831c06029948..90f93b3ca9dbb 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -512,39 +512,6 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) return true; } -/* - * Early load occurs before we can vmalloc(). So we look for the microcode - * patch container file in initrd, traverse equivalent cpu table, look for a - * matching microcode patch, and update, all in initrd memory in place. - * When vmalloc() is available for use later -- on 64-bit during first AP load, - * and on 32-bit during save_microcode_in_initrd() -- we can call - * load_microcode_amd() to save equivalent cpu table and microcode patches in - * kernel heap memory. - * - * Returns true if container found (sets @desc), false otherwise. - */ -static bool early_apply_microcode(u32 old_rev, void *ucode, size_t size) -{ - struct cont_desc desc = { 0 }; - struct microcode_amd *mc; - - scan_containers(ucode, size, &desc); - - mc = desc.mc; - if (!mc) - return false; - - /* - * Allow application of the same revision to pick up SMT-specific - * changes even if the revision of the other SMT thread is already - * up-to-date. - */ - if (old_rev > mc->hdr.patch_id) - return false; - - return __apply_microcode_amd(mc, desc.psize); -} - static bool get_builtin_microcode(struct cpio_data *cp) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; @@ -582,8 +549,19 @@ static bool __init find_blobs_in_containers(struct cpio_data *ret) return found; } +/* + * Early load occurs before we can vmalloc(). So we look for the microcode + * patch container file in initrd, traverse equivalent cpu table, look for a + * matching microcode patch, and update, all in initrd memory in place. + * When vmalloc() is available for use later -- on 64-bit during first AP load, + * and on 32-bit during save_microcode_in_initrd() -- we can call + * load_microcode_amd() to save equivalent cpu table and microcode patches in + * kernel heap memory. + */ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax) { + struct cont_desc desc = { }; + struct microcode_amd *mc; struct cpio_data cp = { }; u32 dummy; @@ -597,7 +575,21 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ if (!find_blobs_in_containers(&cp)) return; - if (early_apply_microcode(ed->old_rev, cp.data, cp.size)) + scan_containers(cp.data, cp.size, &desc); + + mc = desc.mc; + if (!mc) + return; + + /* + * Allow application of the same revision to pick up SMT-specific + * changes even if the revision of the other SMT thread is already + * up-to-date. + */ + if (ed->old_rev > mc->hdr.patch_id) + return; + + if (__apply_microcode_amd(mc, desc.psize)) native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); } -- GitLab From b39c387164879eef71886fc93cee5ca7dd7bf500 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 12:51:37 +0100 Subject: [PATCH 0744/1585] x86/microcode/AMD: Get rid of the _load_microcode_amd() forward declaration Simply move save_microcode_in_initrd() down. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-5-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 54 ++++++++++++++--------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 90f93b3ca9dbb..adfea4d0d1297 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -593,34 +593,6 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); } -static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size); - -static int __init save_microcode_in_initrd(void) -{ - unsigned int cpuid_1_eax = native_cpuid_eax(1); - struct cpuinfo_x86 *c = &boot_cpu_data; - struct cont_desc desc = { 0 }; - enum ucode_state ret; - struct cpio_data cp; - - if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) - return 0; - - if (!find_blobs_in_containers(&cp)) - return -EINVAL; - - scan_containers(cp.data, cp.size, &desc); - if (!desc.mc) - return -EINVAL; - - ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size); - if (ret > UCODE_UPDATED) - return -EINVAL; - - return 0; -} -early_initcall(save_microcode_in_initrd); - static inline bool patch_cpus_equivalent(struct ucode_patch *p, struct ucode_patch *n, bool ignore_stepping) @@ -1004,6 +976,32 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz return ret; } +static int __init save_microcode_in_initrd(void) +{ + unsigned int cpuid_1_eax = native_cpuid_eax(1); + struct cpuinfo_x86 *c = &boot_cpu_data; + struct cont_desc desc = { 0 }; + enum ucode_state ret; + struct cpio_data cp; + + if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) + return 0; + + if (!find_blobs_in_containers(&cp)) + return -EINVAL; + + scan_containers(cp.data, cp.size, &desc); + if (!desc.mc) + return -EINVAL; + + ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size); + if (ret > UCODE_UPDATED) + return -EINVAL; + + return 0; +} +early_initcall(save_microcode_in_initrd); + /* * AMD microcode firmware naming convention, up to family 15h they are in * the legacy file: -- GitLab From 037e81fb9d2dfe7b31fd97e5f578854e38f09887 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 13:02:32 +0100 Subject: [PATCH 0745/1585] x86/microcode/AMD: Add get_patch_level() Put the MSR_AMD64_PATCH_LEVEL reading of the current microcode revision the hw has, into a separate function. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250211163648.30531-6-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 46 +++++++++++++++-------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index adfea4d0d1297..31f90e129b083 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -145,6 +145,15 @@ ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; */ static u32 bsp_cpuid_1_eax __ro_after_init; +static u32 get_patch_level(void) +{ + u32 rev, dummy __always_unused; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + + return rev; +} + static union cpuid_1_eax ucode_rev_to_cpuid(unsigned int val) { union zen_patch_rev p; @@ -483,10 +492,10 @@ static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc) } } -static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) +static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev, + unsigned int psize) { unsigned long p_addr = (unsigned long)&mc->hdr.data_code; - u32 rev, dummy; native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr); @@ -504,9 +513,8 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) } /* verify patch application was successful */ - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - - if (rev != mc->hdr.patch_id) + *cur_rev = get_patch_level(); + if (*cur_rev != mc->hdr.patch_id) return false; return true; @@ -563,11 +571,12 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ struct cont_desc desc = { }; struct microcode_amd *mc; struct cpio_data cp = { }; - u32 dummy; + u32 rev; bsp_cpuid_1_eax = cpuid_1_eax; - native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy); + rev = get_patch_level(); + ed->old_rev = rev; /* Needed in load_microcode_amd() */ ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax; @@ -589,8 +598,8 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ if (ed->old_rev > mc->hdr.patch_id) return; - if (__apply_microcode_amd(mc, desc.psize)) - native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); + if (__apply_microcode_amd(mc, &rev, desc.psize)) + ed->new_rev = rev; } static inline bool patch_cpus_equivalent(struct ucode_patch *p, @@ -692,14 +701,9 @@ static void free_cache(void) static struct ucode_patch *find_patch(unsigned int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - u32 rev, dummy __always_unused; u16 equiv_id = 0; - /* fetch rev if not populated yet: */ - if (!uci->cpu_sig.rev) { - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - uci->cpu_sig.rev = rev; - } + uci->cpu_sig.rev = get_patch_level(); if (x86_family(bsp_cpuid_1_eax) < 0x17) { equiv_id = find_equiv_id(&equiv_table, uci->cpu_sig.sig); @@ -722,22 +726,20 @@ void reload_ucode_amd(unsigned int cpu) mc = p->data; - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - + rev = get_patch_level(); if (rev < mc->hdr.patch_id) { - if (__apply_microcode_amd(mc, p->size)) - pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id); + if (__apply_microcode_amd(mc, &rev, p->size)) + pr_info_once("reload revision: 0x%08x\n", rev); } } static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { - struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct ucode_patch *p; csig->sig = cpuid_eax(0x00000001); - csig->rev = c->microcode; + csig->rev = get_patch_level(); /* * a patch could have been loaded early, set uci->mc so that @@ -778,7 +780,7 @@ static enum ucode_state apply_microcode_amd(int cpu) goto out; } - if (!__apply_microcode_amd(mc_amd, p->size)) { + if (!__apply_microcode_amd(mc_amd, &rev, p->size)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); return UCODE_ERROR; -- GitLab From 7fb39882b20c98a9a393c244c86b56ef6933cff8 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Sun, 16 Feb 2025 11:42:09 +0100 Subject: [PATCH 0746/1585] dm-integrity: Avoid divide by zero in table status in Inline mode In Inline mode, the journal is unused, and journal_sectors is zero. Calculating the journal watermark requires dividing by journal_sectors, which should be done only if the journal is configured. Otherwise, a simple table query (dmsetup table) can cause OOPS. This bug did not show on some systems, perhaps only due to compiler optimization. On my 32-bit testing machine, this reliably crashes with the following: : Oops: divide error: 0000 [#1] PREEMPT SMP : CPU: 0 UID: 0 PID: 2450 Comm: dmsetup Not tainted 6.14.0-rc2+ #959 : EIP: dm_integrity_status+0x2f8/0xab0 [dm_integrity] ... Signed-off-by: Milan Broz Signed-off-by: Mikulas Patocka Fixes: fb0987682c62 ("dm-integrity: introduce the Inline mode") Cc: stable@vger.kernel.org # 6.11+ --- drivers/md/dm-integrity.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index ee9f7cecd78e0..555dc06b94228 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3790,10 +3790,6 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: { - __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; - - watermark_percentage += ic->journal_entries / 2; - do_div(watermark_percentage, ic->journal_entries); arg_count = 3; arg_count += !!ic->meta_dev; arg_count += ic->sectors_per_block != 1; @@ -3826,6 +3822,10 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); if (ic->mode == 'J') { + __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; + + watermark_percentage += ic->journal_entries / 2; + do_div(watermark_percentage, ic->journal_entries); DMEMIT(" journal_watermark:%u", (unsigned int)watermark_percentage); DMEMIT(" commit_time:%u", ic->autocommit_msec); } -- GitLab From c19525b5fb71a355079063bb14adcddae60cf922 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Sun, 16 Feb 2025 11:42:10 +0100 Subject: [PATCH 0747/1585] dm-integrity: Do not emit journal configuration in DM table for Inline mode The Inline mode does not use a journal; it makes no sense to print journal information in DM table. Print it only if the journal is used. The same applies to interleave_sectors (unused for Inline mode). Also, add comments for arg_count, as the current calculation is quite obscure. Signed-off-by: Milan Broz Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 555dc06b94228..c45464b6576aa 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3790,16 +3790,18 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: { - arg_count = 3; + arg_count = 1; /* buffer_sectors */ arg_count += !!ic->meta_dev; arg_count += ic->sectors_per_block != 1; arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); arg_count += ic->reset_recalculate_flag; arg_count += ic->discard; - arg_count += ic->mode == 'J'; - arg_count += ic->mode == 'J'; - arg_count += ic->mode == 'B'; - arg_count += ic->mode == 'B'; + arg_count += ic->mode != 'I'; /* interleave_sectors */ + arg_count += ic->mode == 'J'; /* journal_sectors */ + arg_count += ic->mode == 'J'; /* journal_watermark */ + arg_count += ic->mode == 'J'; /* commit_time */ + arg_count += ic->mode == 'B'; /* sectors_per_bit */ + arg_count += ic->mode == 'B'; /* bitmap_flush_interval */ arg_count += !!ic->internal_hash_alg.alg_string; arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string; @@ -3818,14 +3820,15 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, DMEMIT(" reset_recalculate"); if (ic->discard) DMEMIT(" allow_discards"); - DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); - DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); + if (ic->mode != 'I') + DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); if (ic->mode == 'J') { __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; watermark_percentage += ic->journal_entries / 2; do_div(watermark_percentage, ic->journal_entries); + DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); DMEMIT(" journal_watermark:%u", (unsigned int)watermark_percentage); DMEMIT(" commit_time:%u", ic->autocommit_msec); } -- GitLab From 9d846b1aebbe488f245f1aa463802ff9c34cc078 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 10 Feb 2025 11:51:55 +0100 Subject: [PATCH 0748/1585] gpiolib: check the return value of gpio_chip::get_direction() As per the API contract - gpio_chip::get_direction() may fail and return a negative error number. However, we treat it as if it always returned 0 or 1. Check the return value of the callback and propagate the error number up the stack. Cc: stable@vger.kernel.org Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250210-gpio-sanitize-retvals-v1-1-12ea88506cb2@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 44 ++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index ca2f58a2cd45e..29110dc436f15 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1057,8 +1057,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, desc->gdev = gdev; if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) { - assign_bit(FLAG_IS_OUT, - &desc->flags, !gc->get_direction(gc, desc_index)); + ret = gc->get_direction(gc, desc_index); + if (ret < 0) + goto err_cleanup_desc_srcu; + + assign_bit(FLAG_IS_OUT, &desc->flags, !ret); } else { assign_bit(FLAG_IS_OUT, &desc->flags, !gc->direction_input); @@ -2728,13 +2731,18 @@ int gpiod_direction_input_nonotify(struct gpio_desc *desc) if (guard.gc->direction_input) { ret = guard.gc->direction_input(guard.gc, gpio_chip_hwgpio(desc)); - } else if (guard.gc->get_direction && - (guard.gc->get_direction(guard.gc, - gpio_chip_hwgpio(desc)) != 1)) { - gpiod_warn(desc, - "%s: missing direction_input() operation and line is output\n", - __func__); - return -EIO; + } else if (guard.gc->get_direction) { + ret = guard.gc->get_direction(guard.gc, + gpio_chip_hwgpio(desc)); + if (ret < 0) + return ret; + + if (ret != GPIO_LINE_DIRECTION_IN) { + gpiod_warn(desc, + "%s: missing direction_input() operation and line is output\n", + __func__); + return -EIO; + } } if (ret == 0) { clear_bit(FLAG_IS_OUT, &desc->flags); @@ -2771,12 +2779,18 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) gpio_chip_hwgpio(desc), val); } else { /* Check that we are in output mode if we can */ - if (guard.gc->get_direction && - guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc))) { - gpiod_warn(desc, - "%s: missing direction_output() operation\n", - __func__); - return -EIO; + if (guard.gc->get_direction) { + ret = guard.gc->get_direction(guard.gc, + gpio_chip_hwgpio(desc)); + if (ret < 0) + return ret; + + if (ret != GPIO_LINE_DIRECTION_OUT) { + gpiod_warn(desc, + "%s: missing direction_output() operation\n", + __func__); + return -EIO; + } } /* * If we can't actively set the direction, we are some -- GitLab From 4e667a1968099c6deadee2313ecd648f8f0a8956 Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Mon, 17 Feb 2025 10:16:43 +0100 Subject: [PATCH 0749/1585] gpio: vf610: add locking to gpio direction functions Add locking to `vf610_gpio_direction_input|output()` functions. Without this locking, a race condition exists between concurrent calls to these functions, potentially leading to incorrect GPIO direction settings. To verify the correctness of this fix, a `trylock` patch was applied, where after a couple of reboots the race was confirmed. I.e., one user had to wait before acquiring the lock. With this patch the race has not been encountered. It's worth mentioning that any type of debugging (printing, tracing, etc.) would "resolve"/hide the issue. Fixes: 659d8a62311f ("gpio: vf610: add imx7ulp support") Signed-off-by: Johan Korsnes Reviewed-by: Linus Walleij Reviewed-by: Haibo Chen Cc: Bartosz Golaszewski Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250217091643.679644-1-johan.korsnes@remarkable.no Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-vf610.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index c4f34a347cb6e..c36a9dbccd4dd 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -36,6 +36,7 @@ struct vf610_gpio_port { struct clk *clk_port; struct clk *clk_gpio; int irq; + spinlock_t lock; /* protect gpio direction registers */ }; #define GPIO_PDOR 0x00 @@ -124,6 +125,7 @@ static int vf610_gpio_direction_input(struct gpio_chip *chip, unsigned int gpio) u32 val; if (port->sdata->have_paddr) { + guard(spinlock_irqsave)(&port->lock); val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); val &= ~mask; vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); @@ -142,6 +144,7 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned int gpio vf610_gpio_set(chip, gpio, value); if (port->sdata->have_paddr) { + guard(spinlock_irqsave)(&port->lock); val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); val |= mask; vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); @@ -297,6 +300,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) return -ENOMEM; port->sdata = device_get_match_data(dev); + spin_lock_init(&port->lock); dual_base = port->sdata->have_dual_base; -- GitLab From c7db342e3b4744688be1e27e31254c1d31a35274 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 17 Feb 2025 09:45:08 +0100 Subject: [PATCH 0750/1585] riscv: KVM: Fix hart suspend status check "Not stopped" means started or suspended so we need to check for a single state in order to have a chance to check for each state. Also, we need to use target_vcpu when checking for the suspend state. Fixes: 763c8bed8c05 ("RISC-V: KVM: Implement SBI HSM suspend call") Signed-off-by: Andrew Jones Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250217084506.18763-8-ajones@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_hsm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index dce667f4b6ab0..13a35eb77e8e3 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -79,12 +79,12 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!kvm_riscv_vcpu_stopped(target_vcpu)) - return SBI_HSM_STATE_STARTED; - else if (vcpu->stat.generic.blocking) + if (kvm_riscv_vcpu_stopped(target_vcpu)) + return SBI_HSM_STATE_STOPPED; + else if (target_vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; else - return SBI_HSM_STATE_STOPPED; + return SBI_HSM_STATE_STARTED; } static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, -- GitLab From e3219b0c491f2aa0e0b200a39d3352ab05cdda96 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 17 Feb 2025 09:45:09 +0100 Subject: [PATCH 0751/1585] riscv: KVM: Fix hart suspend_type use The spec says suspend_type is 32 bits wide and "In case the data is defined as 32bit wide, higher privilege software must ensure that it only uses 32 bit data." Mask off upper bits of suspend_type before using it. Fixes: 763c8bed8c05 ("RISC-V: KVM: Implement SBI HSM suspend call") Signed-off-by: Andrew Jones Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250217084506.18763-9-ajones@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_hsm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index 13a35eb77e8e3..3070bb31745de 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -109,7 +110,7 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, } return 0; case SBI_EXT_HSM_HART_SUSPEND: - switch (cp->a0) { + switch (lower_32_bits(cp->a0)) { case SBI_HSM_SUSPEND_RET_DEFAULT: kvm_riscv_vcpu_wfi(vcpu); break; -- GitLab From 0611f78f83c93c000029ab01daa28166d03590ed Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 17 Feb 2025 09:45:10 +0100 Subject: [PATCH 0752/1585] riscv: KVM: Fix SBI IPI error generation When an invalid function ID of an SBI extension is used we should return not-supported, not invalid-param. Also, when we see that at least one hartid constructed from the base and mask parameters is invalid, then we should return invalid-param. Finally, rather than relying on overflowing a left shift to result in zero and then using that zero in a condition which [correctly] skips sending an IPI (but loops unnecessarily), explicitly check for overflow and exit the loop immediately. Fixes: 5f862df5585c ("RISC-V: KVM: Add v0.1 replacement SBI extensions defined in v0.2") Signed-off-by: Andrew Jones Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250217084506.18763-10-ajones@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_replace.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 9c2ab3dfa93aa..74e3a38c6a29e 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -51,9 +51,10 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_context *cp = &vcpu->arch.guest_context; unsigned long hmask = cp->a0; unsigned long hbase = cp->a1; + unsigned long hart_bit = 0, sentmask = 0; if (cp->a6 != SBI_EXT_IPI_SEND_IPI) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -62,15 +63,23 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, if (hbase != -1UL) { if (tmp->vcpu_id < hbase) continue; - if (!(hmask & (1UL << (tmp->vcpu_id - hbase)))) + hart_bit = tmp->vcpu_id - hbase; + if (hart_bit >= __riscv_xlen) + goto done; + if (!(hmask & (1UL << hart_bit))) continue; } ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT); if (ret < 0) break; + sentmask |= 1UL << hart_bit; kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RCVD); } +done: + if (hbase != -1UL && (hmask ^ sentmask)) + retdata->err_val = SBI_ERR_INVALID_PARAM; + return ret; } -- GitLab From b901484852992cf3d162a5eab72251cc813ca624 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 17 Feb 2025 09:45:11 +0100 Subject: [PATCH 0753/1585] riscv: KVM: Fix SBI TIME error generation When an invalid function ID of an SBI extension is used we should return not-supported, not invalid-param. Fixes: 5f862df5585c ("RISC-V: KVM: Add v0.1 replacement SBI extensions defined in v0.2") Signed-off-by: Andrew Jones Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250217084506.18763-11-ajones@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_replace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 74e3a38c6a29e..5fbf3f94f1e85 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -21,7 +21,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, u64 next_cycle; if (cp->a6 != SBI_EXT_TIME_SET_TIMER) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } -- GitLab From 351e02b1733b057e33fe13fc03ca93ec799e4f78 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 17 Feb 2025 09:45:12 +0100 Subject: [PATCH 0754/1585] riscv: KVM: Fix SBI sleep_type use The spec says sleep_type is 32 bits wide and "In case the data is defined as 32bit wide, higher privilege software must ensure that it only uses 32 bit data." Mask off upper bits of sleep_type before using it. Fixes: 023c15151fbb ("RISC-V: KVM: Add SBI system suspend support") Signed-off-by: Andrew Jones Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250217084506.18763-12-ajones@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_system.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kvm/vcpu_sbi_system.c b/arch/riscv/kvm/vcpu_sbi_system.c index 5d55e08791fa1..bc0ebba890037 100644 --- a/arch/riscv/kvm/vcpu_sbi_system.c +++ b/arch/riscv/kvm/vcpu_sbi_system.c @@ -4,6 +4,7 @@ */ #include +#include #include #include @@ -19,7 +20,7 @@ static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, switch (funcid) { case SBI_EXT_SUSP_SYSTEM_SUSPEND: - if (cp->a0 != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { + if (lower_32_bits(cp->a0) != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { retdata->err_val = SBI_ERR_INVALID_PARAM; return 0; } -- GitLab From 6d1f86610f23b0bc334d6506a186f21a98f51392 Mon Sep 17 00:00:00 2001 From: John Veness Date: Mon, 17 Feb 2025 12:15:50 +0000 Subject: [PATCH 0755/1585] ALSA: hda/conexant: Add quirk for HP ProBook 450 G4 mute LED Allows the LED on the dedicated mute button on the HP ProBook 450 G4 laptop to change colour correctly. Signed-off-by: John Veness Cc: Link: https://patch.msgid.link/2fb55d48-6991-4a42-b591-4c78f2fad8d7@pelago.org.uk Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 4985e72b90947..34874039ad453 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1090,6 +1090,7 @@ static const struct hda_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x8231, "HP ProBook 450 G4", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), -- GitLab From 539bd20352832b9244238a055eb169ccf1c41ff6 Mon Sep 17 00:00:00 2001 From: Amit Kumar Mahapatra Date: Thu, 13 Feb 2025 11:15:46 +0530 Subject: [PATCH 0756/1585] mtd: spi-nor: sst: Fix SST write failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'commit 18bcb4aa54ea ("mtd: spi-nor: sst: Factor out common write operation to `sst_nor_write_data()`")' introduced a bug where only one byte of data is written, regardless of the number of bytes passed to sst_nor_write_data(), causing a kernel crash during the write operation. Ensure the correct number of bytes are written as passed to sst_nor_write_data(). Call trace: [ 57.400180] ------------[ cut here ]------------ [ 57.404842] While writing 2 byte written 1 bytes [ 57.409493] WARNING: CPU: 0 PID: 737 at drivers/mtd/spi-nor/sst.c:187 sst_nor_write_data+0x6c/0x74 [ 57.418464] Modules linked in: [ 57.421517] CPU: 0 UID: 0 PID: 737 Comm: mtd_debug Not tainted 6.12.0-g5ad04afd91f9 #30 [ 57.429517] Hardware name: Xilinx Versal A2197 Processor board revA - x-prc-02 revA (DT) [ 57.437600] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 57.444557] pc : sst_nor_write_data+0x6c/0x74 [ 57.448911] lr : sst_nor_write_data+0x6c/0x74 [ 57.453264] sp : ffff80008232bb40 [ 57.456570] x29: ffff80008232bb40 x28: 0000000000010000 x27: 0000000000000001 [ 57.463708] x26: 000000000000ffff x25: 0000000000000000 x24: 0000000000000000 [ 57.470843] x23: 0000000000010000 x22: ffff80008232bbf0 x21: ffff000816230000 [ 57.477978] x20: ffff0008056c0080 x19: 0000000000000002 x18: 0000000000000006 [ 57.485112] x17: 0000000000000000 x16: 0000000000000000 x15: ffff80008232b580 [ 57.492246] x14: 0000000000000000 x13: ffff8000816d1530 x12: 00000000000004a4 [ 57.499380] x11: 000000000000018c x10: ffff8000816fd530 x9 : ffff8000816d1530 [ 57.506515] x8 : 00000000fffff7ff x7 : ffff8000816fd530 x6 : 0000000000000001 [ 57.513649] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 [ 57.520782] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0008049b0000 [ 57.527916] Call trace: [ 57.530354] sst_nor_write_data+0x6c/0x74 [ 57.534361] sst_nor_write+0xb4/0x18c [ 57.538019] mtd_write_oob_std+0x7c/0x88 [ 57.541941] mtd_write_oob+0x70/0xbc [ 57.545511] mtd_write+0x68/0xa8 [ 57.548733] mtdchar_write+0x10c/0x290 [ 57.552477] vfs_write+0xb4/0x3a8 [ 57.555791] ksys_write+0x74/0x10c [ 57.559189] __arm64_sys_write+0x1c/0x28 [ 57.563109] invoke_syscall+0x54/0x11c [ 57.566856] el0_svc_common.constprop.0+0xc0/0xe0 [ 57.571557] do_el0_svc+0x1c/0x28 [ 57.574868] el0_svc+0x30/0xcc [ 57.577921] el0t_64_sync_handler+0x120/0x12c [ 57.582276] el0t_64_sync+0x190/0x194 [ 57.585933] ---[ end trace 0000000000000000 ]--- Cc: stable@vger.kernel.org Fixes: 18bcb4aa54ea ("mtd: spi-nor: sst: Factor out common write operation to `sst_nor_write_data()`") Signed-off-by: Amit Kumar Mahapatra Reviewed-by: Pratyush Yadav Reviewed-by: Tudor Ambarus Reviewed-by: Bence Csókás [pratyush@kernel.org: add Cc stable tag] Signed-off-by: Pratyush Yadav Link: https://lore.kernel.org/r/20250213054546.2078121-1-amit.kumar-mahapatra@amd.com --- drivers/mtd/spi-nor/sst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/sst.c b/drivers/mtd/spi-nor/sst.c index b5ad7118c49a2..175211fe6a5ed 100644 --- a/drivers/mtd/spi-nor/sst.c +++ b/drivers/mtd/spi-nor/sst.c @@ -174,7 +174,7 @@ static int sst_nor_write_data(struct spi_nor *nor, loff_t to, size_t len, int ret; nor->program_opcode = op; - ret = spi_nor_write_data(nor, to, 1, buf); + ret = spi_nor_write_data(nor, to, len, buf); if (ret < 0) return ret; WARN(ret != len, "While writing %zu byte written %i bytes\n", len, ret); -- GitLab From e49477f7f78598295551d486ecc7f020d796432e Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Thu, 16 Jan 2025 10:40:46 +0000 Subject: [PATCH 0757/1585] drm/i915/gt: Use spin_lock_irqsave() in interruptible context spin_lock/unlock() functions used in interrupt contexts could result in a deadlock, as seen in GitLab issue #13399, which occurs when interrupt comes in while holding a lock. Try to remedy the problem by saving irq state before spin lock acquisition. v2: add irqs' state save/restore calls to all locks/unlocks in signal_irq_work() execution (Maciej) v3: use with spin_lock_irqsave() in guc_lrc_desc_unpin() instead of other lock/unlock calls and add Fixes and Cc tags (Tvrtko); change title and commit message Fixes: 2f2cc53b5fe7 ("drm/i915/guc: Close deregister-context race against CT-loss") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13399 Signed-off-by: Krzysztof Karas Cc: # v6.9+ Reviewed-by: Maciej Patelczyk Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/pusppq5ybyszau2oocboj3mtj5x574gwij323jlclm5zxvimmu@mnfg6odxbpsv (cherry picked from commit c088387ddd6482b40f21ccf23db1125e8fa4af7e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index cc05bd9e43b49..3fce5c0001444 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3449,10 +3449,10 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { - spin_lock(&ce->guc_state.lock); + spin_lock_irqsave(&ce->guc_state.lock, flags); set_context_registered(ce); clr_context_destroyed(ce); - spin_unlock(&ce->guc_state.lock); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. -- GitLab From 0c455f3a12298e9c89a78d2f3327e15e52c0adc5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:28:59 -0800 Subject: [PATCH 0758/1585] drm/xe: Fix error handling in xe_irq_install() When devm_add_action_or_reset() fails, it already calls the function passed as parameter and that function is already free'ing the irqs. Drop the goto and just return. The caller, xe_device_probe(), should also do the same thing instead of wrongly doing `goto err` and calling the unrelated xe_display_fini() function. Fixes: 14d25d8d684d ("drm/xe: change old msi irq api to a new one") Reviewed-by: Rodrigo Vivi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 121b214cdf10d4129b64f2b1f31807154c74ae55) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 32f5a67a917b5..08552ee3fb94b 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -757,19 +757,7 @@ int xe_irq_install(struct xe_device *xe) xe_irq_postinstall(xe); - err = devm_add_action_or_reset(xe->drm.dev, irq_uninstall, xe); - if (err) - goto free_irq_handler; - - return 0; - -free_irq_handler: - if (xe_device_has_msix(xe)) - xe_irq_msix_free(xe); - else - xe_irq_msi_free(xe); - - return err; + return devm_add_action_or_reset(xe->drm.dev, irq_uninstall, xe); } static void xe_irq_msi_synchronize_irq(struct xe_device *xe) -- GitLab From 879f70382ff3e92fc854589ada3453e3f5f5b601 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 14 Feb 2025 16:19:51 +0200 Subject: [PATCH 0759/1585] drm/i915/dsi: Use TRANS_DDI_FUNC_CTL's own port width macro The format of the port width field in the DDI_BUF_CTL and the TRANS_DDI_FUNC_CTL registers are different starting with MTL, where the x3 lane mode for HDMI FRL has a different encoding in the two registers. To account for this use the TRANS_DDI_FUNC_CTL's own port width macro. Cc: # v6.5+ Fixes: b66a8abaa48a ("drm/i915/display/mtl: Fill port width in DDI_BUF_/TRANS_DDI_FUNC_/PORT_BUF_CTL for HDMI") Reviewed-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20250214142001.552916-2-imre.deak@intel.com (cherry picked from commit 76120b3a304aec28fef4910204b81a12db8974da) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/icl_dsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index c977b74f82f0b..82bf6c654de25 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -809,8 +809,8 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, /* select data lane width */ tmp = intel_de_read(display, TRANS_DDI_FUNC_CTL(display, dsi_trans)); - tmp &= ~DDI_PORT_WIDTH_MASK; - tmp |= DDI_PORT_WIDTH(intel_dsi->lane_count); + tmp &= ~TRANS_DDI_PORT_WIDTH_MASK; + tmp |= TRANS_DDI_PORT_WIDTH(intel_dsi->lane_count); /* select input pipe */ tmp &= ~TRANS_DDI_EDP_INPUT_MASK; -- GitLab From 166ce267ae3f96e439d8ccc838e8ec4d8b4dab73 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 14 Feb 2025 16:19:52 +0200 Subject: [PATCH 0760/1585] drm/i915/ddi: Fix HDMI port width programming in DDI_BUF_CTL Fix the port width programming in the DDI_BUF_CTL register on MTLP+, where this had an off-by-one error. Cc: # v6.5+ Fixes: b66a8abaa48a ("drm/i915/display/mtl: Fill port width in DDI_BUF_/TRANS_DDI_FUNC_/PORT_BUF_CTL for HDMI") Reviewed-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20250214142001.552916-3-imre.deak@intel.com (cherry picked from commit b2ecdabe46d23db275f94cd7c46ca414a144818b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index acb986bc1f33a..2b9240ab547d8 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3487,7 +3487,7 @@ static void intel_ddi_enable_hdmi(struct intel_atomic_state *state, intel_de_rmw(dev_priv, XELPDP_PORT_BUF_CTL1(dev_priv, port), XELPDP_PORT_WIDTH_MASK | XELPDP_PORT_REVERSAL, port_buf); - buf_ctl |= DDI_PORT_WIDTH(lane_count); + buf_ctl |= DDI_PORT_WIDTH(crtc_state->lane_count); if (DISPLAY_VER(dev_priv) >= 20) buf_ctl |= XE2LPD_DDI_BUF_D2D_LINK_ENABLE; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 765e6c0528fb0..786c727aea454 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3633,7 +3633,7 @@ enum skl_power_gate { #define DDI_BUF_IS_IDLE (1 << 7) #define DDI_BUF_CTL_TC_PHY_OWNERSHIP REG_BIT(6) #define DDI_A_4_LANES (1 << 4) -#define DDI_PORT_WIDTH(width) (((width) - 1) << 1) +#define DDI_PORT_WIDTH(width) (((width) == 3 ? 4 : ((width) - 1)) << 1) #define DDI_PORT_WIDTH_MASK (7 << 1) #define DDI_PORT_WIDTH_SHIFT 1 #define DDI_INIT_DISPLAY_DETECTED (1 << 0) -- GitLab From 07fb70d82e0df085980246bf17bc12537588795f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 12 Feb 2025 18:43:21 +0200 Subject: [PATCH 0761/1585] drm/i915: Make sure all planes in use by the joiner have their crtc included MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Any active plane needs to have its crtc included in the atomic state. For planes enabled via uapi that is all handler in the core. But when we use a plane for joiner the uapi code things the plane is disabled and therefore doesn't have a crtc. So we need to pull those in by hand. We do it first thing in intel_joiner_add_affected_crtcs() so that any newly added crtc will subsequently pull in all of its joined crtcs as well. The symptoms from failing to do this are: - duct tape in the form of commit 1d5b09f8daf8 ("drm/i915: Fix NULL ptr deref by checking new_crtc_state") - the plane's hw state will get overwritten by the disabled uapi state if it can't find the uapi counterpart plane in the atomic state from where it should copy the correct state Cc: stable@vger.kernel.org Reviewed-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250212164330.16891-2-ville.syrjala@linux.intel.com (cherry picked from commit 91077d1deb5374eb8be00fb391710f00e751dc4b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4271da219b410..41128469f12a2 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6628,12 +6628,30 @@ static int intel_async_flip_check_hw(struct intel_atomic_state *state, struct in static int intel_joiner_add_affected_crtcs(struct intel_atomic_state *state) { struct drm_i915_private *i915 = to_i915(state->base.dev); + const struct intel_plane_state *plane_state; struct intel_crtc_state *crtc_state; + struct intel_plane *plane; struct intel_crtc *crtc; u8 affected_pipes = 0; u8 modeset_pipes = 0; int i; + /* + * Any plane which is in use by the joiner needs its crtc. + * Pull those in first as this will not have happened yet + * if the plane remains disabled according to uapi. + */ + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + crtc = to_intel_crtc(plane_state->hw.crtc); + if (!crtc) + continue; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } + + /* Now pull in all joined crtcs */ for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { affected_pipes |= crtc_state->joiner_pipes; if (intel_crtc_needs_modeset(crtc_state)) -- GitLab From dd8b0582e25e36bba483c60338741c0ba5bc426c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 17 Feb 2025 11:16:26 +0800 Subject: [PATCH 0762/1585] block: fix NULL pointer dereferenced within __blk_rq_map_sg The block layer internal flush request may not have bio attached, so the request iterator has to be initialized from valid req->bio, otherwise NULL pointer dereferenced is triggered. Cc: Christoph Hellwig Reported-and-tested-by: Cheyenne Wills Fixes: b7175e24d6ac ("block: add a dma mapping iterator") Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250217031626.461977-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-merge.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 39b738c0e4c9a..c7c85e10cf9cb 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -556,11 +556,14 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq, { struct req_iterator iter = { .bio = rq->bio, - .iter = rq->bio->bi_iter, }; struct phys_vec vec; int nsegs = 0; + /* the internal flush request may not have bio attached */ + if (iter.bio) + iter.iter = iter.bio->bi_iter; + while (blk_map_iter_next(rq, &iter, &vec)) { *last_sg = blk_next_sg(last_sg, sglist); sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, -- GitLab From 290237fde9491ca26cf4020bbf5a2b330452e7db Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 10 Feb 2025 22:17:29 +1100 Subject: [PATCH 0763/1585] btrfs: selftests: fix btrfs_test_delayed_refs() leak of transaction The btrfs_transaction struct leaks, which can cause sporadic fstests failures when kmemleak checking is enabled: kmemleak: 5 new suspected memory leaks (see /sys/kernel/debug/kmemleak) > cat /sys/kernel/debug/kmemleak unreferenced object 0xffff88810fdc6c00 (size 512): comm "modprobe", pid 203, jiffies 4294892552 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 6736050f): __kmalloc_cache_noprof+0x133/0x2c0 btrfs_test_delayed_refs+0x6f/0xbb0 [btrfs] btrfs_run_sanity_tests.cold+0x91/0xf9 [btrfs] 0xffffffffa02fd055 do_one_initcall+0x49/0x1c0 do_init_module+0x5b/0x1f0 init_module_from_file+0x70/0x90 idempotent_init_module+0xe8/0x2c0 __x64_sys_finit_module+0x6b/0xd0 do_syscall_64+0x54/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e The transaction struct was initially stack-allocated but switched to heap following frame size compiler warnings. Fixes: 2b34879d97e27 ("btrfs: selftests: add delayed ref self test cases") Signed-off-by: David Disseldorp Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tests/delayed-refs-tests.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/tests/delayed-refs-tests.c b/fs/btrfs/tests/delayed-refs-tests.c index 6558508c2ddf5..265370e79a546 100644 --- a/fs/btrfs/tests/delayed-refs-tests.c +++ b/fs/btrfs/tests/delayed-refs-tests.c @@ -1009,6 +1009,7 @@ int btrfs_test_delayed_refs(u32 sectorsize, u32 nodesize) if (!ret) ret = select_delayed_refs_test(&trans); + kfree(transaction); out_free_fs_info: btrfs_free_dummy_fs_info(fs_info); return ret; -- GitLab From e77aa4b2eaa7fb31b2a7a50214ecb946b2a8b0f6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 17 Feb 2025 18:00:30 +0100 Subject: [PATCH 0764/1585] ALSA: seq: Drop UMP events when no UMP-conversion is set When a destination client is a user client in the legacy MIDI mode and it sets the no-UMP-conversion flag, currently the all UMP events are still passed as-is. But this may confuse the user-space, because the event packet size is different from the legacy mode. Since we cannot handle UMP events in user clients unless it's running in the UMP client mode, we should filter out those events instead of accepting blindly. This patch addresses it by slightly adjusting the conditions for UMP event handling at the event delivery time. Fixes: 329ffe11a014 ("ALSA: seq: Allow suppressing UMP conversions") Link: https://lore.kernel.org/b77a2cd6-7b59-4eb0-a8db-22d507d3af5f@gmail.com Link: https://patch.msgid.link/20250217170034.21930-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 073b56dc22257..cb66ec42a3f8a 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -678,12 +678,18 @@ static int snd_seq_deliver_single_event(struct snd_seq_client *client, dest_port->time_real); #if IS_ENABLED(CONFIG_SND_SEQ_UMP) - if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { - if (snd_seq_ev_is_ump(event)) { + if (snd_seq_ev_is_ump(event)) { + if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { result = snd_seq_deliver_from_ump(client, dest, dest_port, event, atomic, hop); goto __skip; - } else if (snd_seq_client_is_ump(dest)) { + } else if (dest->type == USER_CLIENT && + !snd_seq_client_is_ump(dest)) { + result = 0; // drop the event + goto __skip; + } + } else if (snd_seq_client_is_ump(dest)) { + if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { result = snd_seq_deliver_to_ump(client, dest, dest_port, event, atomic, hop); goto __skip; -- GitLab From 4cb77793842a351b39a030f77caebace3524840e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 15 Feb 2025 18:52:41 +0000 Subject: [PATCH 0765/1585] irqchip/gic-v3: Fix rk3399 workaround when secure interrupts are enabled Christoph reports that their rk3399 system dies since commit 773c05f417fa1 ("irqchip/gic-v3: Work around insecure GIC integrations"). It appears that some rk3399 have secure payloads, and that the firmware sets SCR_EL3.FIQ==1. Obivously, disabling security in that configuration leads to even more problems. Revisit the workaround by: - making it rk3399 specific - checking whether Group-0 is available, which is a good proxy for SCR_EL3.FIQ being 0 - either apply the workaround if Group-0 is available, or disable pseudo-NMIs if not Note that this doesn't mean that the secure side is able to receive interrupts, as all interrupts are made non-secure anyway. Clearly, nobody ever tested secure interrupts on this platform. Fixes: 773c05f417fa1 ("irqchip/gic-v3: Work around insecure GIC integrations") Reported-by: Christoph Fritz Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Christoph Fritz Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250215185241.3768218-1-maz@kernel.org Closes: https://lore.kernel.org/r/b1266652fb64857246e8babdf268d0df8f0c36d9.camel@googlemail.com --- drivers/irqchip/irq-gic-v3.c | 53 +++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 76dce0aac2465..270d7a4d85a6d 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -44,6 +44,7 @@ static u8 dist_prio_nmi __ro_after_init = GICV3_PRIO_NMI; #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1) #define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 2) +#define FLAGS_WORKAROUND_INSECURE (1ULL << 3) #define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) @@ -83,6 +84,8 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); #define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) #define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer) +static bool nmi_support_forbidden; + /* * There are 16 SGIs, though we only actually use 8 in Linux. The other 8 SGIs * are potentially stolen by the secure side. Some code, especially code dealing @@ -163,21 +166,27 @@ static void __init gic_prio_init(void) { bool ds; - ds = gic_dist_security_disabled(); - if (!ds) { - u32 val; - - val = readl_relaxed(gic_data.dist_base + GICD_CTLR); - val |= GICD_CTLR_DS; - writel_relaxed(val, gic_data.dist_base + GICD_CTLR); + cpus_have_group0 = gic_has_group0(); - ds = gic_dist_security_disabled(); - if (ds) - pr_warn("Broken GIC integration, security disabled"); + ds = gic_dist_security_disabled(); + if ((gic_data.flags & FLAGS_WORKAROUND_INSECURE) && !ds) { + if (cpus_have_group0) { + u32 val; + + val = readl_relaxed(gic_data.dist_base + GICD_CTLR); + val |= GICD_CTLR_DS; + writel_relaxed(val, gic_data.dist_base + GICD_CTLR); + + ds = gic_dist_security_disabled(); + if (ds) + pr_warn("Broken GIC integration, security disabled\n"); + } else { + pr_warn("Broken GIC integration, pNMI forbidden\n"); + nmi_support_forbidden = true; + } } cpus_have_security_disabled = ds; - cpus_have_group0 = gic_has_group0(); /* * How priority values are used by the GIC depends on two things: @@ -209,7 +218,7 @@ static void __init gic_prio_init(void) * be in the non-secure range, we program the non-secure values into * the distributor to match the PMR values we want. */ - if (cpus_have_group0 & !cpus_have_security_disabled) { + if (cpus_have_group0 && !cpus_have_security_disabled) { dist_prio_irq = __gicv3_prio_to_ns(dist_prio_irq); dist_prio_nmi = __gicv3_prio_to_ns(dist_prio_nmi); } @@ -1922,6 +1931,18 @@ static bool gic_enable_quirk_arm64_2941627(void *data) return true; } +static bool gic_enable_quirk_rk3399(void *data) +{ + struct gic_chip_data *d = data; + + if (of_machine_is_compatible("rockchip,rk3399")) { + d->flags |= FLAGS_WORKAROUND_INSECURE; + return true; + } + + return false; +} + static bool rd_set_non_coherent(void *data) { struct gic_chip_data *d = data; @@ -1996,6 +2017,12 @@ static const struct gic_quirk gic_quirks[] = { .property = "dma-noncoherent", .init = rd_set_non_coherent, }, + { + .desc = "GICv3: Insecure RK3399 integration", + .iidr = 0x0000043b, + .mask = 0xff000fff, + .init = gic_enable_quirk_rk3399, + }, { } }; @@ -2004,7 +2031,7 @@ static void gic_enable_nmi_support(void) { int i; - if (!gic_prio_masking_enabled()) + if (!gic_prio_masking_enabled() || nmi_support_forbidden) return; rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR, -- GitLab From d7e3fd658248f257006227285095d190e70ee73a Mon Sep 17 00:00:00 2001 From: Artur Rojek Date: Sun, 16 Feb 2025 18:55:45 +0100 Subject: [PATCH 0766/1585] irqchip/jcore-aic, clocksource/drivers/jcore: Fix jcore-pit interrupt request The jcore-aic irqchip does not have separate interrupt numbers reserved for cpu-local vs global interrupts. Therefore the device drivers need to request the given interrupt as per CPU interrupt. 69a9dcbd2d65 ("clocksource/drivers/jcore: Use request_percpu_irq()") converted the clocksource driver over to request_percpu_irq(), but failed to do add all the required changes, resulting in a failure to register PIT interrupts. Fix this by: 1) Explicitly mark the interrupt via irq_set_percpu_devid() in jcore_pit_init(). 2) Enable and disable the per CPU interrupt in the CPU hotplug callbacks. 3) Pass the correct per-cpu cookie to the irq handler by using handle_percpu_devid_irq() instead of handle_percpu_irq() in handle_jcore_irq(). [ tglx: Massage change log ] Fixes: 69a9dcbd2d65 ("clocksource/drivers/jcore: Use request_percpu_irq()") Signed-off-by: Artur Rojek Signed-off-by: Thomas Gleixner Acked-by: Uros Bizjak Link: https://lore.kernel.org/all/20250216175545.35079-3-contact@artur-rojek.eu --- drivers/clocksource/jcore-pit.c | 15 ++++++++++++++- drivers/irqchip/irq-jcore-aic.c | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/jcore-pit.c b/drivers/clocksource/jcore-pit.c index a3fe98cd38382..82815428f8f92 100644 --- a/drivers/clocksource/jcore-pit.c +++ b/drivers/clocksource/jcore-pit.c @@ -114,6 +114,18 @@ static int jcore_pit_local_init(unsigned cpu) pit->periodic_delta = DIV_ROUND_CLOSEST(NSEC_PER_SEC, HZ * buspd); clockevents_config_and_register(&pit->ced, freq, 1, ULONG_MAX); + enable_percpu_irq(pit->ced.irq, IRQ_TYPE_NONE); + + return 0; +} + +static int jcore_pit_local_teardown(unsigned cpu) +{ + struct jcore_pit *pit = this_cpu_ptr(jcore_pit_percpu); + + pr_info("Local J-Core PIT teardown on cpu %u\n", cpu); + + disable_percpu_irq(pit->ced.irq); return 0; } @@ -168,6 +180,7 @@ static int __init jcore_pit_init(struct device_node *node) return -ENOMEM; } + irq_set_percpu_devid(pit_irq); err = request_percpu_irq(pit_irq, jcore_timer_interrupt, "jcore_pit", jcore_pit_percpu); if (err) { @@ -237,7 +250,7 @@ static int __init jcore_pit_init(struct device_node *node) cpuhp_setup_state(CPUHP_AP_JCORE_TIMER_STARTING, "clockevents/jcore:starting", - jcore_pit_local_init, NULL); + jcore_pit_local_init, jcore_pit_local_teardown); return 0; } diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c index b9dcc8e78c750..1f613eb7b7f03 100644 --- a/drivers/irqchip/irq-jcore-aic.c +++ b/drivers/irqchip/irq-jcore-aic.c @@ -38,7 +38,7 @@ static struct irq_chip jcore_aic; static void handle_jcore_irq(struct irq_desc *desc) { if (irqd_is_per_cpu(irq_desc_get_irq_data(desc))) - handle_percpu_irq(desc); + handle_percpu_devid_irq(desc); else handle_simple_irq(desc); } -- GitLab From 07b598c0e6f06a0f254c88dafb4ad50f8a8c6eea Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 13 Feb 2025 15:20:55 +0000 Subject: [PATCH 0767/1585] drop_monitor: fix incorrect initialization order Syzkaller reports the following bug: BUG: spinlock bad magic on CPU#1, syz-executor.0/7995 lock: 0xffff88805303f3e0, .magic: 00000000, .owner: /-1, .owner_cpu: 0 CPU: 1 PID: 7995 Comm: syz-executor.0 Tainted: G E 5.10.209+ #1 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x119/0x179 lib/dump_stack.c:118 debug_spin_lock_before kernel/locking/spinlock_debug.c:83 [inline] do_raw_spin_lock+0x1f6/0x270 kernel/locking/spinlock_debug.c:112 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:117 [inline] _raw_spin_lock_irqsave+0x50/0x70 kernel/locking/spinlock.c:159 reset_per_cpu_data+0xe6/0x240 [drop_monitor] net_dm_cmd_trace+0x43d/0x17a0 [drop_monitor] genl_family_rcv_msg_doit+0x22f/0x330 net/netlink/genetlink.c:739 genl_family_rcv_msg net/netlink/genetlink.c:783 [inline] genl_rcv_msg+0x341/0x5a0 net/netlink/genetlink.c:800 netlink_rcv_skb+0x14d/0x440 net/netlink/af_netlink.c:2497 genl_rcv+0x29/0x40 net/netlink/genetlink.c:811 netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline] netlink_unicast+0x54b/0x800 net/netlink/af_netlink.c:1348 netlink_sendmsg+0x914/0xe00 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:651 [inline] __sock_sendmsg+0x157/0x190 net/socket.c:663 ____sys_sendmsg+0x712/0x870 net/socket.c:2378 ___sys_sendmsg+0xf8/0x170 net/socket.c:2432 __sys_sendmsg+0xea/0x1b0 net/socket.c:2461 do_syscall_64+0x30/0x40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x62/0xc7 RIP: 0033:0x7f3f9815aee9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f3f972bf0c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f3f9826d050 RCX: 00007f3f9815aee9 RDX: 0000000020000000 RSI: 0000000020001300 RDI: 0000000000000007 RBP: 00007f3f981b63bd R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007f3f9826d050 R15: 00007ffe01ee6768 If drop_monitor is built as a kernel module, syzkaller may have time to send a netlink NET_DM_CMD_START message during the module loading. This will call the net_dm_monitor_start() function that uses a spinlock that has not yet been initialized. To fix this, let's place resource initialization above the registration of a generic netlink family. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 9a8afc8d3962 ("Network Drop Monitor: Adding drop monitor implementation & Netlink protocol") Cc: stable@vger.kernel.org Signed-off-by: Ilia Gavrilov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250213152054.2785669-1-Ilia.Gavrilov@infotecs.ru Signed-off-by: Jakub Kicinski --- net/core/drop_monitor.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 6efd4cccc9ddd..212f0a048cab6 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1734,30 +1734,30 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - rc = genl_register_family(&net_drop_monitor_family); - if (rc) { - pr_err("Could not create drop monitor netlink family\n"); - return rc; + for_each_possible_cpu(cpu) { + net_dm_cpu_data_init(cpu); + net_dm_hw_cpu_data_init(cpu); } - WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT); rc = register_netdevice_notifier(&dropmon_net_notifier); if (rc < 0) { pr_crit("Failed to register netdevice notifier\n"); + return rc; + } + + rc = genl_register_family(&net_drop_monitor_family); + if (rc) { + pr_err("Could not create drop monitor netlink family\n"); goto out_unreg; } + WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT); rc = 0; - for_each_possible_cpu(cpu) { - net_dm_cpu_data_init(cpu); - net_dm_hw_cpu_data_init(cpu); - } - goto out; out_unreg: - genl_unregister_family(&net_drop_monitor_family); + WARN_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); out: return rc; } @@ -1766,19 +1766,18 @@ static void exit_net_drop_monitor(void) { int cpu; - BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); - /* * Because of the module_get/put we do in the trace state change path * we are guaranteed not to have any current users when we get here */ + BUG_ON(genl_unregister_family(&net_drop_monitor_family)); + + BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); for_each_possible_cpu(cpu) { net_dm_hw_cpu_data_fini(cpu); net_dm_cpu_data_fini(cpu); } - - BUG_ON(genl_unregister_family(&net_drop_monitor_family)); } module_init(init_net_drop_monitor); -- GitLab From 915e34d5ad35a6a9e56113f852ade4a730fb88f0 Mon Sep 17 00:00:00 2001 From: Julian Ruess Date: Fri, 14 Feb 2025 13:01:37 +0100 Subject: [PATCH 0768/1585] s390/ism: add release function for struct device According to device_release() in /drivers/base/core.c, a device without a release function is a broken device and must be fixed. The current code directly frees the device after calling device_add() without waiting for other kernel parts to release their references. Thus, a reference could still be held to a struct device, e.g., by sysfs, leading to potential use-after-free issues if a proper release function is not set. Fixes: 8c81ba20349d ("net/smc: De-tangle ism and smc device initialization") Reviewed-by: Alexandra Winter Reviewed-by: Wenjia Zhang Signed-off-by: Julian Ruess Signed-off-by: Alexandra Winter Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250214120137.563409-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/s390/net/ism_drv.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index e36e3ea165d3b..2f34761e64135 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -588,6 +588,15 @@ static int ism_dev_init(struct ism_dev *ism) return ret; } +static void ism_dev_release(struct device *dev) +{ + struct ism_dev *ism; + + ism = container_of(dev, struct ism_dev, dev); + + kfree(ism); +} + static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct ism_dev *ism; @@ -601,6 +610,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_set_drvdata(&pdev->dev, ism); ism->pdev = pdev; ism->dev.parent = &pdev->dev; + ism->dev.release = ism_dev_release; device_initialize(&ism->dev); dev_set_name(&ism->dev, dev_name(&pdev->dev)); ret = device_add(&ism->dev); @@ -637,7 +647,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) device_del(&ism->dev); err_dev: dev_set_drvdata(&pdev->dev, NULL); - kfree(ism); + put_device(&ism->dev); return ret; } @@ -682,7 +692,7 @@ static void ism_remove(struct pci_dev *pdev) pci_disable_device(pdev); device_del(&ism->dev); dev_set_drvdata(&pdev->dev, NULL); - kfree(ism); + put_device(&ism->dev); } static struct pci_driver ism_driver = { -- GitLab From bdf5d13aa05ec314d4385b31ac974d6c7e0997c9 Mon Sep 17 00:00:00 2001 From: Nick Child Date: Fri, 14 Feb 2025 09:52:33 -0600 Subject: [PATCH 0769/1585] ibmvnic: Don't reference skb after sending to VIOS Previously, after successfully flushing the xmit buffer to VIOS, the tx_bytes stat was incremented by the length of the skb. It is invalid to access the skb memory after sending the buffer to the VIOS because, at any point after sending, the VIOS can trigger an interrupt to free this memory. A race between reading skb->len and freeing the skb is possible (especially during LPM) and will result in use-after-free: ================================================================== BUG: KASAN: slab-use-after-free in ibmvnic_xmit+0x75c/0x1808 [ibmvnic] Read of size 4 at addr c00000024eb48a70 by task hxecom/14495 <...> Call Trace: [c000000118f66cf0] [c0000000018cba6c] dump_stack_lvl+0x84/0xe8 (unreliable) [c000000118f66d20] [c0000000006f0080] print_report+0x1a8/0x7f0 [c000000118f66df0] [c0000000006f08f0] kasan_report+0x128/0x1f8 [c000000118f66f00] [c0000000006f2868] __asan_load4+0xac/0xe0 [c000000118f66f20] [c0080000046eac84] ibmvnic_xmit+0x75c/0x1808 [ibmvnic] [c000000118f67340] [c0000000014be168] dev_hard_start_xmit+0x150/0x358 <...> Freed by task 0: kasan_save_stack+0x34/0x68 kasan_save_track+0x2c/0x50 kasan_save_free_info+0x64/0x108 __kasan_mempool_poison_object+0x148/0x2d4 napi_skb_cache_put+0x5c/0x194 net_tx_action+0x154/0x5b8 handle_softirqs+0x20c/0x60c do_softirq_own_stack+0x6c/0x88 <...> The buggy address belongs to the object at c00000024eb48a00 which belongs to the cache skbuff_head_cache of size 224 ================================================================== Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Nick Child Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250214155233.235559-1-nnac123@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index e95ae0d39948c..0676fc547b6f4 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2408,6 +2408,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) dma_addr_t data_dma_addr; struct netdev_queue *txq; unsigned long lpar_rc; + unsigned int skblen; union sub_crq tx_crq; unsigned int offset; bool use_scrq_send_direct = false; @@ -2522,6 +2523,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_buff->skb = skb; tx_buff->index = bufidx; tx_buff->pool_index = queue_num; + skblen = skb->len; memset(&tx_crq, 0, sizeof(tx_crq)); tx_crq.v1.first = IBMVNIC_CRQ_CMD; @@ -2614,7 +2616,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) netif_stop_subqueue(netdev, queue_num); } - tx_bytes += skb->len; + tx_bytes += skblen; txq_trans_cond_update(txq); ret = NETDEV_TX_OK; goto out; -- GitLab From 0a4f598c84fc0eeb143ba03cdd3fc3d857061c3c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 15 Feb 2025 14:52:00 -0800 Subject: [PATCH 0770/1585] MAINTAINERS: create entry for ethtool MAC merge Vladimir implemented the MAC merge support and reviews all the new driver implementations. Acked-by: Vladimir Oltean Link: https://patch.msgid.link/20250215225200.2652212-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 988b0ff94fda9..1405ebe703a8f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16477,6 +16477,12 @@ F: net/ethtool/cabletest.c F: tools/testing/selftests/drivers/net/*/ethtool* K: cable_test +NETWORKING [ETHTOOL MAC MERGE] +M: Vladimir Oltean +F: net/ethtool/mm.c +F: tools/testing/selftests/drivers/net/hw/ethtool_mm.sh +K: ethtool_mm + NETWORKING [GENERAL] M: "David S. Miller" M: Eric Dumazet -- GitLab From c8a3e63ff9d75b9f3f031c90d218876051dea0ba Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 29 Jan 2025 14:20:03 -0800 Subject: [PATCH 0771/1585] procfs: fix a locking bug in a vmcore_add_device_dump() error path Unlock vmcore_mutex when returning -EBUSY. Link: https://lkml.kernel.org/r/20250129222003.1495713-1-bvanassche@acm.org Fixes: 0f3b1c40c652 ("fs/proc/vmcore: disallow vmcore modifications while the vmcore is open") Signed-off-by: Bart Van Assche Acked-by: Michael S. Tsirkin Acked-by: David Hildenbrand Cc: Baoquan he Signed-off-by: Andrew Morton --- fs/proc/vmcore.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index a00120a3c0994..10d01eb09c43d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1524,7 +1524,7 @@ int vmcore_add_device_dump(struct vmcoredd_data *data) pr_warn_once("Unexpected adding of device dump\n"); if (vmcore_open) { ret = -EBUSY; - goto out_err; + goto unlock; } list_add_tail(&dump->list, &vmcoredd_list); @@ -1532,6 +1532,9 @@ int vmcore_add_device_dump(struct vmcoredd_data *data) mutex_unlock(&vmcore_mutex); return 0; +unlock: + mutex_unlock(&vmcore_mutex); + out_err: vfree(buf); vfree(dump); -- GitLab From f4b78260fc678ccd7169f32dc9f3bfa3b93931c7 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 31 Jan 2025 14:13:15 +0000 Subject: [PATCH 0772/1585] lib/iov_iter: fix import_iovec_ubuf iovec management import_iovec() says that it should always be fine to kfree the iovec returned in @iovp regardless of the error code. __import_iovec_ubuf() never reallocates it and thus should clear the pointer even in cases when copy_iovec_*() fail. Link: https://lkml.kernel.org/r/378ae26923ffc20fd5e41b4360d673bf47b1775b.1738332461.git.asml.silence@gmail.com Fixes: 3b2deb0e46da ("iov_iter: import single vector iovecs as ITER_UBUF") Signed-off-by: Pavel Begunkov Reviewed-by: Jens Axboe Cc: Al Viro Cc: Christian Brauner Cc: Signed-off-by: Andrew Morton --- lib/iov_iter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 9ec806f989f25..65f550cb5081b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1428,6 +1428,8 @@ static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec, struct iovec *iov = *iovp; ssize_t ret; + *iovp = NULL; + if (compat) ret = copy_compat_iovec_from_user(iov, uvec, 1); else @@ -1438,7 +1440,6 @@ static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec, ret = import_ubuf(type, iov->iov_base, iov->iov_len, i); if (unlikely(ret)) return ret; - *iovp = NULL; return i->count; } -- GitLab From 63895d20d63b446f5049a963983489319c2ea3e2 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 29 Jan 2025 19:08:44 +0900 Subject: [PATCH 0773/1585] mm/zswap: fix inconsistency when zswap_store_page() fails Commit b7c0ccdfbafd ("mm: zswap: support large folios in zswap_store()") skips charging any zswap entries when it failed to zswap the entire folio. However, when some base pages are zswapped but it failed to zswap the entire folio, the zswap operation is rolled back. When freeing zswap entries for those pages, zswap_entry_free() uncharges the zswap entries that were not previously charged, causing zswap charging to become inconsistent. This inconsistency triggers two warnings with following steps: # On a machine with 64GiB of RAM and 36GiB of zswap $ stress-ng --bigheap 2 # wait until the OOM-killer kills stress-ng $ sudo reboot The two warnings are: in mm/memcontrol.c:163, function obj_cgroup_release(): WARN_ON_ONCE(nr_bytes & (PAGE_SIZE - 1)); in mm/page_counter.c:60, function page_counter_cancel(): if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n", new, nr_pages)) zswap_stored_pages also becomes inconsistent in the same way. As suggested by Kanchana, increment zswap_stored_pages and charge zswap entries within zswap_store_page() when it succeeds. This way, zswap_entry_free() will decrement the counter and uncharge the entries when it failed to zswap the entire folio. While this could potentially be optimized by batching objcg charging and incrementing the counter, let's focus on fixing the bug this time and leave the optimization for later after some evaluation. After resolving the inconsistency, the warnings disappear. [42.hyeyoo@gmail.com: refactor zswap_store_page()] Link: https://lkml.kernel.org/r/20250131082037.2426-1-42.hyeyoo@gmail.com Link: https://lkml.kernel.org/r/20250129100844.2935-1-42.hyeyoo@gmail.com Fixes: b7c0ccdfbafd ("mm: zswap: support large folios in zswap_store()") Co-developed-by: Kanchana P Sridhar Signed-off-by: Kanchana P Sridhar Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: Yosry Ahmed Acked-by: Nhat Pham Cc: Chengming Zhou Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton --- mm/zswap.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 6504174fbc6ad..ac9d299e7d0c1 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1445,9 +1445,9 @@ static void shrink_worker(struct work_struct *w) * main API **********************************/ -static ssize_t zswap_store_page(struct page *page, - struct obj_cgroup *objcg, - struct zswap_pool *pool) +static bool zswap_store_page(struct page *page, + struct obj_cgroup *objcg, + struct zswap_pool *pool) { swp_entry_t page_swpentry = page_swap_entry(page); struct zswap_entry *entry, *old; @@ -1456,7 +1456,7 @@ static ssize_t zswap_store_page(struct page *page, entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page)); if (!entry) { zswap_reject_kmemcache_fail++; - return -EINVAL; + return false; } if (!zswap_compress(page, entry, pool)) @@ -1483,13 +1483,17 @@ static ssize_t zswap_store_page(struct page *page, /* * The entry is successfully compressed and stored in the tree, there is - * no further possibility of failure. Grab refs to the pool and objcg. - * These refs will be dropped by zswap_entry_free() when the entry is - * removed from the tree. + * no further possibility of failure. Grab refs to the pool and objcg, + * charge zswap memory, and increment zswap_stored_pages. + * The opposite actions will be performed by zswap_entry_free() + * when the entry is removed from the tree. */ zswap_pool_get(pool); - if (objcg) + if (objcg) { obj_cgroup_get(objcg); + obj_cgroup_charge_zswap(objcg, entry->length); + } + atomic_long_inc(&zswap_stored_pages); /* * We finish initializing the entry while it's already in xarray. @@ -1510,13 +1514,13 @@ static ssize_t zswap_store_page(struct page *page, zswap_lru_add(&zswap_list_lru, entry); } - return entry->length; + return true; store_failed: zpool_free(pool->zpool, entry->handle); compress_failed: zswap_entry_cache_free(entry); - return -EINVAL; + return false; } bool zswap_store(struct folio *folio) @@ -1526,7 +1530,6 @@ bool zswap_store(struct folio *folio) struct obj_cgroup *objcg = NULL; struct mem_cgroup *memcg = NULL; struct zswap_pool *pool; - size_t compressed_bytes = 0; bool ret = false; long index; @@ -1564,20 +1567,14 @@ bool zswap_store(struct folio *folio) for (index = 0; index < nr_pages; ++index) { struct page *page = folio_page(folio, index); - ssize_t bytes; - bytes = zswap_store_page(page, objcg, pool); - if (bytes < 0) + if (!zswap_store_page(page, objcg, pool)) goto put_pool; - compressed_bytes += bytes; } - if (objcg) { - obj_cgroup_charge_zswap(objcg, compressed_bytes); + if (objcg) count_objcg_events(objcg, ZSWPOUT, nr_pages); - } - atomic_long_add(nr_pages, &zswap_stored_pages); count_vm_events(ZSWPOUT, nr_pages); ret = true; -- GitLab From 2ede647a6fde3e54a6bfda7cf01c716649655900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= Date: Mon, 3 Feb 2025 08:52:06 +0100 Subject: [PATCH 0774/1585] mm,madvise,hugetlb: check for 0-length range after end address adjustment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a sanity check to madvise_dontneed_free() to address a corner case in madvise where a race condition causes the current vma being processed to be backed by a different page size. During a madvise(MADV_DONTNEED) call on a memory region registered with a userfaultfd, there's a period of time where the process mm lock is temporarily released in order to send a UFFD_EVENT_REMOVE and let userspace handle the event. During this time, the vma covering the current address range may change due to an explicit mmap done concurrently by another thread. If, after that change, the memory region, which was originally backed by 4KB pages, is now backed by hugepages, the end address is rounded down to a hugepage boundary to avoid data loss (see "Fixes" below). This rounding may cause the end address to be truncated to the same address as the start. Make this corner case follow the same semantics as in other similar cases where the requested region has zero length (ie. return 0). This will make madvise_walk_vmas() continue to the next vma in the range (this time holding the process mm lock) which, due to the prev pointer becoming stale because of the vma change, will be the same hugepage-backed vma that was just checked before. The next time madvise_dontneed_free() runs for this vma, if the start address isn't aligned to a hugepage boundary, it'll return -EINVAL, which is also in line with the madvise api. From userspace perspective, madvise() will return EINVAL because the start address isn't aligned according to the new vma alignment requirements (hugepage), even though it was correctly page-aligned when the call was issued. Link: https://lkml.kernel.org/r/20250203075206.1452208-1-rcn@igalia.com Fixes: 8ebe0a5eaaeb ("mm,madvise,hugetlb: fix unexpected data loss with MADV_DONTNEED on hugetlbfs") Signed-off-by: Ricardo Cañuelo Navarro Reviewed-by: Oscar Salvador Cc: Florent Revest Cc: Rik van Riel Cc: Signed-off-by: Andrew Morton --- mm/madvise.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 49f3a75046f63..08b207f8e61ef 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -933,7 +933,16 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, */ end = vma->vm_end; } - VM_WARN_ON(start >= end); + /* + * If the memory region between start and end was + * originally backed by 4kB pages and then remapped to + * be backed by hugepages while mmap_lock was dropped, + * the adjustment for hugetlb vma above may have rounded + * end down to the start address. + */ + if (start == end) + return 0; + VM_WARN_ON(start > end); } if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED) -- GitLab From 639375b0aa4323fe59b5fe2a6ebc68b022c36f50 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Wed, 5 Feb 2025 12:01:00 -0800 Subject: [PATCH 0775/1585] .mailmap: add entries for Jeff Johnson Map past iterations of my e-mail addresses to the current one. Link: https://lkml.kernel.org/r/20250205-jjohnson-mailmap-v1-1-269cb7b1710d@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index fedebf86640ad..c96dbe259c228 100644 --- a/.mailmap +++ b/.mailmap @@ -317,6 +317,8 @@ Jayachandran C Jean Tourrilhes Jeevan Shriram Jeff Garzik +Jeff Johnson +Jeff Johnson Jeff Layton Jeff Layton Jeff Layton -- GitLab From 3219585e894c12cbffd4ac93d3e6783d236f146e Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 5 Feb 2025 14:04:57 +0800 Subject: [PATCH 0776/1585] mailmap: add entry for Feng Tang Map my old business email to personal email. Link: https://lkml.kernel.org/r/20250205060457.53667-1-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index c96dbe259c228..f34af946180f8 100644 --- a/.mailmap +++ b/.mailmap @@ -226,6 +226,7 @@ Fangrui Song Felipe W Damasio Felix Kuhling Felix Moeller +Feng Tang Fenglin Wu Filipe Lautert Finn Thain -- GitLab From 035d3c778709680288b3954ee896043132bc3f8d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 10 Feb 2025 12:05:18 -0800 Subject: [PATCH 0777/1585] tools/mm: fix build warnings with musl-libc musl-libc warns about the following: /home/florian/dev/buildroot/output/arm64/rpi4-b/host/aarch64-buildroot-linux-musl/sysroot/usr/include/sys/errno.h:1:2: attention: #warning redirecting incorrect #include to [-Wcpp] 1 | #warning redirecting incorrect #include to | ^~~~~~~ /home/florian/dev/buildroot/output/arm64/rpi4-b/host/aarch64-buildroot-linux-musl/sysroot/usr/include/sys/fcntl.h:1:2: attention: #warning redirecting incorrect #include to [-Wcpp] 1 | #warning redirecting incorrect #include to | ^~~~~~~ include errno.h and fcntl.h directly. Link: https://lkml.kernel.org/r/20250210200518.1137295-1-florian.fainelli@broadcom.com Signed-off-by: Florian Fainelli Signed-off-by: Andrew Morton --- tools/mm/page-types.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c index bcac7ebfb51fd..d7e5e8902af86 100644 --- a/tools/mm/page-types.c +++ b/tools/mm/page-types.c @@ -24,8 +24,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include -- GitLab From 41cddf83d8b00f29fd105e7a0777366edc69a5cf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 17:13:17 +0100 Subject: [PATCH 0778/1585] mm/migrate_device: don't add folio to be freed to LRU in migrate_device_finalize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If migration succeeded, we called folio_migrate_flags()->mem_cgroup_migrate() to migrate the memcg from the old to the new folio. This will set memcg_data of the old folio to 0. Similarly, if migration failed, memcg_data of the dst folio is left unset. If we call folio_putback_lru() on such folios (memcg_data == 0), we will add the folio to be freed to the LRU, making memcg code unhappy. Running the hmm selftests: # ./hmm-tests ... # RUN hmm.hmm_device_private.migrate ... [ 102.078007][T14893] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x7ff27d200 pfn:0x13cc00 [ 102.079974][T14893] anon flags: 0x17ff00000020018(uptodate|dirty|swapbacked|node=0|zone=2|lastcpupid=0x7ff) [ 102.082037][T14893] raw: 017ff00000020018 dead000000000100 dead000000000122 ffff8881353896c9 [ 102.083687][T14893] raw: 00000007ff27d200 0000000000000000 00000001ffffffff 0000000000000000 [ 102.085331][T14893] page dumped because: VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled()) [ 102.087230][T14893] ------------[ cut here ]------------ [ 102.088279][T14893] WARNING: CPU: 0 PID: 14893 at ./include/linux/memcontrol.h:726 folio_lruvec_lock_irqsave+0x10e/0x170 [ 102.090478][T14893] Modules linked in: [ 102.091244][T14893] CPU: 0 UID: 0 PID: 14893 Comm: hmm-tests Not tainted 6.13.0-09623-g6c216bc522fd #151 [ 102.093089][T14893] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 [ 102.094848][T14893] RIP: 0010:folio_lruvec_lock_irqsave+0x10e/0x170 [ 102.096104][T14893] Code: ... [ 102.099908][T14893] RSP: 0018:ffffc900236c37b0 EFLAGS: 00010293 [ 102.101152][T14893] RAX: 0000000000000000 RBX: ffffea0004f30000 RCX: ffffffff8183f426 [ 102.102684][T14893] RDX: ffff8881063cb880 RSI: ffffffff81b8117f RDI: ffff8881063cb880 [ 102.104227][T14893] RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000000 [ 102.105757][T14893] R10: 0000000000000001 R11: 0000000000000002 R12: ffffc900236c37d8 [ 102.107296][T14893] R13: ffff888277a2bcb0 R14: 000000000000001f R15: 0000000000000000 [ 102.108830][T14893] FS: 00007ff27dbdd740(0000) GS:ffff888277a00000(0000) knlGS:0000000000000000 [ 102.110643][T14893] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 102.111924][T14893] CR2: 00007ff27d400000 CR3: 000000010866e000 CR4: 0000000000750ef0 [ 102.113478][T14893] PKRU: 55555554 [ 102.114172][T14893] Call Trace: [ 102.114805][T14893] [ 102.115397][T14893] ? folio_lruvec_lock_irqsave+0x10e/0x170 [ 102.116547][T14893] ? __warn.cold+0x110/0x210 [ 102.117461][T14893] ? folio_lruvec_lock_irqsave+0x10e/0x170 [ 102.118667][T14893] ? report_bug+0x1b9/0x320 [ 102.119571][T14893] ? handle_bug+0x54/0x90 [ 102.120494][T14893] ? exc_invalid_op+0x17/0x50 [ 102.121433][T14893] ? asm_exc_invalid_op+0x1a/0x20 [ 102.122435][T14893] ? __wake_up_klogd.part.0+0x76/0xd0 [ 102.123506][T14893] ? dump_page+0x4f/0x60 [ 102.124352][T14893] ? folio_lruvec_lock_irqsave+0x10e/0x170 [ 102.125500][T14893] folio_batch_move_lru+0xd4/0x200 [ 102.126577][T14893] ? __pfx_lru_add+0x10/0x10 [ 102.127505][T14893] __folio_batch_add_and_move+0x391/0x720 [ 102.128633][T14893] ? __pfx_lru_add+0x10/0x10 [ 102.129550][T14893] folio_putback_lru+0x16/0x80 [ 102.130564][T14893] migrate_device_finalize+0x9b/0x530 [ 102.131640][T14893] dmirror_migrate_to_device.constprop.0+0x7c5/0xad0 [ 102.133047][T14893] dmirror_fops_unlocked_ioctl+0x89b/0xc80 Likely, nothing else goes wrong: putting the last folio reference will remove the folio from the LRU again. So besides memcg complaining, adding the folio to be freed to the LRU is just an unnecessary step. The new flow resembles what we have in migrate_folio_move(): add the dst to the lru, remove migration ptes, unlock and unref dst. Link: https://lkml.kernel.org/r/20250210161317.717936-1-david@redhat.com Fixes: 8763cb45ab96 ("mm/migrate: new memory migration helper for use with device memory") Signed-off-by: David Hildenbrand Cc: Jérôme Glisse Cc: John Hubbard Cc: Alistair Popple Cc: Signed-off-by: Andrew Morton --- mm/migrate_device.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 9cf26592ac934..5bd888223cc8b 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -840,20 +840,15 @@ void migrate_device_finalize(unsigned long *src_pfns, dst = src; } + if (!folio_is_zone_device(dst)) + folio_add_lru(dst); remove_migration_ptes(src, dst, 0); folio_unlock(src); - - if (folio_is_zone_device(src)) - folio_put(src); - else - folio_putback_lru(src); + folio_put(src); if (dst != src) { folio_unlock(dst); - if (folio_is_zone_device(dst)) - folio_put(dst); - else - folio_putback_lru(dst); + folio_put(dst); } } } -- GitLab From 2272dbc471037b78f308b44351ab1b9f88d32628 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Sat, 8 Feb 2025 14:44:00 +0800 Subject: [PATCH 0779/1585] getdelays: fix error format characters getdelays had a compilation issue because the format string was not updated when the "delay min" was added. For example, after adding the "delay min" in printf, there were 7 strings but only 6 "%s" format specifiers. Similarly, after adding the 't->cpu_delay_total', there were 7 variables but only 6 format characters specifiers, causing compilation issues as follows. This commit fixes these issues to ensure that getdelays compiles correctly. root@xx:~/linux-next/tools/accounting$ make getdelays.c:199:9: warning: format `%llu' expects argument of type `long long unsigned int', but argument 8 has type `char *' [-Wformat=] 199 | printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n" | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ..... 216 | "delay total", "delay average", "delay max", "delay min", | ~~~~~~~~~~~ | | | char * getdelays.c:200:21: note: format string is defined here 200 | " %15llu%15llu%15llu%15llu%15.3fms%13.6fms\n" | ~~~~~^ | | | long long unsigned int | %15s getdelays.c:199:9: warning: format `%f' expects argument of type `double', but argument 12 has type `long long unsigned int' [-Wformat=] 199 | printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n" | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ..... 220 | (unsigned long long)t->cpu_delay_total, | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | long long unsigned int ..... Link: https://lkml.kernel.org/r/20250208144400544RduNRhwIpT3m2JyRBqskZ@zte.com.cn Fixes: f65c64f311ee ("delayacct: add delay min to record delay peak") Reviewed-by: xu xin Signed-off-by: Wang Yaxin Signed-off-by: Kun Jiang Cc: Balbir Singh Cc: David Hildenbrand Cc: Fan Yu Cc: Peilin He Cc: Qiang Tu Cc: wangyong Cc: ye xingchen Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- tools/accounting/getdelays.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 100ad3dc091a2..3feac0482fe90 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -196,22 +196,22 @@ static int get_family_id(int sd) static void print_delayacct(struct taskstats *t) { - printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n" - " %15llu%15llu%15llu%15llu%15.3fms%13.6fms\n" - "IO %15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "SWAP %15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "RECLAIM %12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "THRASHING%12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "COMPACT %12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "WPCOPY %12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "IRQ %15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n", + printf("\n\nCPU %15s%15s%15s%15s%15s%15s%15s\n" + " %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "IO %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "SWAP %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "RECLAIM %12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "THRASHING%12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "COMPACT %12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "WPCOPY %12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "IRQ %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n", "count", "real total", "virtual total", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->cpu_count, -- GitLab From b016d0873777462e55af4c615104cc684fce086d Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Sat, 8 Feb 2025 14:49:01 +0800 Subject: [PATCH 0780/1585] taskstats: modify taskstats version After adding "delay max" and "delay min" to the taskstats structure, the taskstats version needs to be updated. Link: https://lkml.kernel.org/r/20250208144901218Q5ptVpqsQkb2MOEmW4Ujn@zte.com.cn Fixes: f65c64f311ee ("delayacct: add delay min to record delay peak") Signed-off-by: Wang Yaxin Signed-off-by: Kun Jiang Reviewed-by: xu xin Signed-off-by: Andrew Morton --- include/uapi/linux/taskstats.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 934e20ef7f793..95762232e0186 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 14 +#define TASKSTATS_VERSION 15 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ -- GitLab From f39edcf6349abb2ca2df96acc8645f4d2631d0a7 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Tue, 11 Feb 2025 15:26:25 +0800 Subject: [PATCH 0781/1585] mm: pgtable: fix incorrect reclaim of non-empty PTE pages In zap_pte_range(), if the pte lock was released midway, the pte entries may be refilled with physical pages by another thread, which may cause a non-empty PTE page to be reclaimed and eventually cause the system to crash. To fix it, fall back to the slow path in this case to recheck if all pte entries are still none. Link: https://lkml.kernel.org/r/20250211072625.89188-1-zhengqi.arch@bytedance.com Fixes: 6375e95f381e ("mm: pgtable: reclaim empty PTE page in madvise(MADV_DONTNEED)") Signed-off-by: Qi Zheng Reported-by: Christian Brauner Closes: https://lore.kernel.org/all/20250207-anbot-bankfilialen-acce9d79a2c7@brauner/ Reported-by: Qu Wenruo Closes: https://lore.kernel.org/all/152296f3-5c81-4a94-97f3-004108fba7be@gmx.com/ Tested-by: Zi Yan Cc: Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: David Hildenbrand Cc: Jann Horn Cc: Matthew Wilcox Cc: Muchun Song Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/memory.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 539c0f7c6d545..b4d3d4893267c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1719,7 +1719,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, pmd_t pmdval; unsigned long start = addr; bool can_reclaim_pt = reclaim_pt_is_enabled(start, end, details); - bool direct_reclaim = false; + bool direct_reclaim = true; int nr; retry: @@ -1734,8 +1734,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, do { bool any_skipped = false; - if (need_resched()) + if (need_resched()) { + direct_reclaim = false; break; + } nr = do_zap_pte_range(tlb, vma, pte, addr, end, details, rss, &force_flush, &force_break, &any_skipped); @@ -1743,11 +1745,20 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, can_reclaim_pt = false; if (unlikely(force_break)) { addr += nr * PAGE_SIZE; + direct_reclaim = false; break; } } while (pte += nr, addr += PAGE_SIZE * nr, addr != end); - if (can_reclaim_pt && addr == end) + /* + * Fast path: try to hold the pmd lock and unmap the PTE page. + * + * If the pte lock was released midway (retry case), or if the attempt + * to hold the pmd lock failed, then we need to recheck all pte entries + * to ensure they are still none, thereby preventing the pte entries + * from being repopulated by another thread. + */ + if (can_reclaim_pt && direct_reclaim && addr == end) direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval); add_mm_rss_vec(mm, rss); -- GitLab From 8648ee2622aefa5b567ebea71609822373995f37 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 11 Feb 2025 13:21:17 -0800 Subject: [PATCH 0782/1585] mailmap: update Nick's entry Link: https://lkml.kernel.org/r/20250211212117.3195265-1-ndesaulniers@google.com Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index f34af946180f8..a897c16d3baef 100644 --- a/.mailmap +++ b/.mailmap @@ -534,6 +534,7 @@ Nicholas Piggin Nicholas Piggin Nicholas Piggin Nicholas Piggin +Nick Desaulniers Nicolas Ferre Nicolas Pitre Nicolas Pitre -- GitLab From 99333229dee41b992f3b0493f6aa2e3528138384 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 11 Feb 2025 08:18:19 +0000 Subject: [PATCH 0783/1585] memcg: avoid dead loop when setting memory.max A softlockup issue was found with stress test: watchdog: BUG: soft lockup - CPU#27 stuck for 26s! [migration/27:181] CPU: 27 UID: 0 PID: 181 Comm: migration/27 6.14.0-rc2-next-20250210 #1 Stopper: multi_cpu_stop <- stop_machine_from_inactive_cpu RIP: 0010:stop_machine_yield+0x2/0x10 RSP: 0000:ff4a0dcecd19be48 EFLAGS: 00000246 RAX: ffffffff89c0108f RBX: ff4a0dcec03afe44 RCX: 0000000000000000 RDX: ff1cdaaf6eba5808 RSI: 0000000000000282 RDI: ff1cda80c1775a40 RBP: 0000000000000001 R08: 00000011620096c6 R09: 7fffffffffffffff R10: 0000000000000001 R11: 0000000000000100 R12: ff1cda80c1775a40 R13: 0000000000000000 R14: 0000000000000001 R15: ff4a0dcec03afe20 FS: 0000000000000000(0000) GS:ff1cdaaf6eb80000(0000) CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000025e2c2a001 CR4: 0000000000773ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: multi_cpu_stop+0x8f/0x100 cpu_stopper_thread+0x90/0x140 smpboot_thread_fn+0xad/0x150 kthread+0xc2/0x100 ret_from_fork+0x2d/0x50 The stress test involves CPU hotplug operations and memory control group (memcg) operations. The scenario can be described as follows: echo xx > memory.max cache_ap_online oom_reaper (CPU23) (CPU50) xx < usage stop_machine_from_inactive_cpu for(;;) // all active cpus trigger OOM queue_stop_cpus_work // waiting oom_reaper multi_cpu_stop(migration/xx) // sync all active cpus ack // waiting cpu23 ack // CPU50 loops in multi_cpu_stop waiting cpu50 Detailed explanation: 1. When the usage is larger than xx, an OOM may be triggered. If the process does not handle with ths kill signal immediately, it will loop in the memory_max_write. 2. When cache_ap_online is triggered, the multi_cpu_stop is queued to the active cpus. Within the multi_cpu_stop function, it attempts to synchronize the CPU states. However, the CPU23 didn't acknowledge because it is stuck in a loop within the for(;;). 3. The oom_reaper process is blocked because CPU50 is in a loop, waiting for CPU23 to acknowledge the synchronization request. 4. Finally, it formed cyclic dependency and lead to softlockup and dead loop. To fix this issue, add cond_resched() in the memory_max_write, so that it will not block migration task. Link: https://lkml.kernel.org/r/20250211081819.33307-1-chenridong@huaweicloud.com Fixes: b6e6edcfa405 ("mm: memcontrol: reclaim and OOM kill when shrinking memory.max below usage") Signed-off-by: Chen Ridong Acked-by: Michal Hocko Cc: Roman Gushchin Cc: Johannes Weiner Cc: Shakeel Butt Cc: Muchun Song Cc: Wang Weiyang Signed-off-by: Andrew Morton --- mm/memcontrol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 46f8b372d212b..4de6acb9b8ecb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4166,6 +4166,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, memcg_memory_event(memcg, MEMCG_OOM); if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0)) break; + cond_resched(); } memcg_wb_domain_size_changed(memcg); -- GitLab From 6d7bc938adca9024a6b51cf55d9b0542b653b69c Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 10 Feb 2025 22:48:56 -0500 Subject: [PATCH 0784/1585] mm: hugetlb: avoid fallback for specific node allocation of 1G pages When using the HugeTLB kernel command-line to allocate 1G pages from a specific node, such as: default_hugepagesz=1G hugepages=1:1 If node 1 happens to not have enough memory for the requested number of 1G pages, the allocation falls back to other nodes. A quick way to reproduce this is by creating a KVM guest with a memory-less node and trying to allocate 1 1G page from it. Instead of failing, the allocation will fallback to other nodes. This defeats the purpose of node specific allocation. Also, specific node allocation for 2M pages don't have this behavior: the allocation will just fail for the pages it can't satisfy. This issue happens because HugeTLB calls memblock_alloc_try_nid_raw() for 1G boot-time allocation as this function falls back to other nodes if the allocation can't be satisfied. Use memblock_alloc_exact_nid_raw() instead, which ensures that the allocation will only be satisfied from the specified node. Link: https://lkml.kernel.org/r/20250211034856.629371-1-luizcap@redhat.com Fixes: b5389086ad7b ("hugetlbfs: extend the definition of hugepages parameter to support node allocation") Signed-off-by: Luiz Capitulino Acked-by: Oscar Salvador Acked-by: David Hildenbrand Cc: "Mike Rapoport (IBM)" Cc: Muchun Song Cc: Zhenguo Yao Cc: Frank van der Linden Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 65068671e460a..163190e89ea16 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3145,7 +3145,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) /* do node specific alloc */ if (nid != NUMA_NO_NODE) { - m = memblock_alloc_try_nid_raw(huge_page_size(h), huge_page_size(h), + m = memblock_alloc_exact_nid_raw(huge_page_size(h), huge_page_size(h), 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid); if (!m) return 0; -- GitLab From 5dcf52e2ce0fe3c4516b1e494c1af6d3a69e30e7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Feb 2025 17:44:25 +0000 Subject: [PATCH 0785/1585] selftests/mm: fix check for running THP tests When testing if we should try to compact memory or drop caches before we run the THP or HugeTLB tests we use | as an or operator. This doesn't work since run_vmtests.sh is written in shell where this is used to pipe the output of the first argument into the second. Instead use the shell's -o operator. Link: https://lkml.kernel.org/r/20250212-kselftest-mm-no-hugepages-v1-1-44702f538522@kernel.org Fixes: b433ffa8dbac ("selftests: mm: perform some system cleanup before using hugepages") Signed-off-by: Mark Brown Reviewed-by: Nico Pache Cc: Mariano Pache Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 333c468c26991..da7e266681031 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -220,7 +220,7 @@ run_test() { if test_selected ${CATEGORY}; then # On memory constrainted systems some tests can fail to allocate hugepages. # perform some cleanup before the test for a higher success rate. - if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then + if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then echo 3 > /proc/sys/vm/drop_caches sleep 2 echo 1 > /proc/sys/vm/compact_memory -- GitLab From 4998a6fa2a31176d0882bdfa27d5d03b665ba19b Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 12 Feb 2025 09:35:20 -0800 Subject: [PATCH 0786/1585] MAINTAINERS: update Nick's contact info Updated .mailmap, but forgot these other places. Link: https://lkml.kernel.org/r/20250212173523.3979840-1-ndesaulniers@google.com Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton --- Documentation/process/embargoed-hardware-issues.rst | 2 +- .../translations/sp_SP/process/embargoed-hardware-issues.rst | 2 +- MAINTAINERS | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index daebce49cfdf5..0e19d2f0d6bbe 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -308,7 +308,7 @@ an involved disclosed party. The current ambassadors list: Google Kees Cook - LLVM Nick Desaulniers + LLVM Nick Desaulniers ============= ======================================================== If you want your organization to be added to the ambassadors list, please diff --git a/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst b/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst index 7d4d694967c73..9d444b9c46d39 100644 --- a/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst +++ b/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst @@ -287,7 +287,7 @@ revelada involucrada. La lista de embajadores actuales: Google Kees Cook - LLVM Nick Desaulniers + LLVM Nick Desaulniers ============= ======================================================== Si quiere que su organización se añada a la lista de embajadores, por diff --git a/MAINTAINERS b/MAINTAINERS index efee40ea589f7..4e17764cb6ed4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5655,7 +5655,7 @@ F: .clang-format CLANG/LLVM BUILD SUPPORT M: Nathan Chancellor -R: Nick Desaulniers +R: Nick Desaulniers R: Bill Wendling R: Justin Stitt L: llvm@lists.linux.dev -- GitLab From ac7af1f57acd1e1d112b36e036584ca4bc4c284a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 17 Feb 2025 15:44:02 -0500 Subject: [PATCH 0787/1585] kasan: don't call find_vm_area() in a PREEMPT_RT kernel The following bug report was found when running a PREEMPT_RT debug kernel. BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 140605, name: kunit_try_catch preempt_count: 1, expected: 0 Call trace: rt_spin_lock+0x70/0x140 find_vmap_area+0x84/0x168 find_vm_area+0x1c/0x50 print_address_description.constprop.0+0x2a0/0x320 print_report+0x108/0x1f8 kasan_report+0x90/0xc8 Since commit e30a0361b851 ("kasan: make report_lock a raw spinlock"), report_lock was changed to raw_spinlock_t to fix another similar PREEMPT_RT problem. That alone isn't enough to cover other corner cases. print_address_description() is always invoked under the report_lock. The context under this lock is always atomic even on PREEMPT_RT. find_vm_area() acquires vmap_node::busy.lock which is a spinlock_t, becoming a sleeping lock on PREEMPT_RT and must not be acquired in atomic context. Don't invoke find_vm_area() on PREEMPT_RT and just print the address. Non-PREEMPT_RT builds remain unchanged. Add a DEFINE_WAIT_OVERRIDE_MAP() macro to tell lockdep that this lock nesting is allowed because the PREEMPT_RT part (which is invalid) has been taken care of. This macro was first introduced in commit 0cce06ba859a ("debugobjects,locking: Annotate debug_object_fill_pool() wait type violation"). Link: https://lkml.kernel.org/r/20250217204402.60533-1-longman@redhat.com Fixes: e30a0361b851 ("kasan: make report_lock a raw spinlock") Signed-off-by: Waiman Long Suggested-by: Andrey Konovalov Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Steven Rostedt Cc: Mariano Pache Cc: Sebastian Andrzej Siewior Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- mm/kasan/report.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 3fe77a360f1c5..8357e1a33699b 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -370,6 +370,36 @@ static inline bool init_task_stack_addr(const void *addr) sizeof(init_thread_union.stack)); } +/* + * This function is invoked with report_lock (a raw_spinlock) held. A + * PREEMPT_RT kernel cannot call find_vm_area() as it will acquire a sleeping + * rt_spinlock. + * + * For !RT kernel, the PROVE_RAW_LOCK_NESTING config option will print a + * lockdep warning for this raw_spinlock -> spinlock dependency. This config + * option is enabled by default to ensure better test coverage to expose this + * kind of RT kernel problem. This lockdep splat, however, can be suppressed + * by using DEFINE_WAIT_OVERRIDE_MAP() if it serves a useful purpose and the + * invalid PREEMPT_RT case has been taken care of. + */ +static inline struct vm_struct *kasan_find_vm_area(void *addr) +{ + static DEFINE_WAIT_OVERRIDE_MAP(vmalloc_map, LD_WAIT_SLEEP); + struct vm_struct *va; + + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + return NULL; + + /* + * Suppress lockdep warning and fetch vmalloc area of the + * offending address. + */ + lock_map_acquire_try(&vmalloc_map); + va = find_vm_area(addr); + lock_map_release(&vmalloc_map); + return va; +} + static void print_address_description(void *addr, u8 tag, struct kasan_report_info *info) { @@ -399,7 +429,7 @@ static void print_address_description(void *addr, u8 tag, } if (is_vmalloc_addr(addr)) { - struct vm_struct *va = find_vm_area(addr); + struct vm_struct *va = kasan_find_vm_area(addr); if (va) { pr_err("The buggy address belongs to the virtual mapping at\n" @@ -409,6 +439,8 @@ static void print_address_description(void *addr, u8 tag, pr_err("\n"); page = vmalloc_to_page(addr); + } else { + pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr); } } -- GitLab From 8344017aaf32a7532cff293eb3df7fd2265ebafd Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 14 Feb 2025 00:36:59 +0800 Subject: [PATCH 0788/1585] test_xarray: fix failure in check_pause when CONFIG_XARRAY_MULTI is not defined In case CONFIG_XARRAY_MULTI is not defined, xa_store_order can store a multi-index entry but xas_for_each can't tell sbiling entry from valid entry. So the check_pause failed when we store a multi-index entry and wish xas_for_each can handle it normally. Avoid to store multi-index entry when CONFIG_XARRAY_MULTI is disabled to fix the failure. Link: https://lkml.kernel.org/r/20250213163659.414309-1-shikemeng@huaweicloud.com Fixes: c9ba5249ef8b ("Xarray: move forward index correctly in xas_pause()") Signed-off-by: Kemeng Shi Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/r/CAMuHMdU_bfadUO=0OZ=AoQ9EAmQPA4wsLCBqohXR+QCeCKRn4A@mail.gmail.com Tested-by: Geert Uytterhoeven Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- lib/test_xarray.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 6932a26f4927c..0e865bab4a10b 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1418,7 +1418,7 @@ static noinline void check_pause(struct xarray *xa) { XA_STATE(xas, xa, 0); void *entry; - unsigned int order; + int order; unsigned long index = 1; unsigned int count = 0; @@ -1450,7 +1450,7 @@ static noinline void check_pause(struct xarray *xa) xa_destroy(xa); index = 0; - for (order = XA_CHUNK_SHIFT; order > 0; order--) { + for (order = order_limit - 1; order >= 0; order--) { XA_BUG_ON(xa, xa_store_order(xa, index, order, xa_mk_index(index), GFP_KERNEL)); index += 1UL << order; @@ -1462,24 +1462,25 @@ static noinline void check_pause(struct xarray *xa) rcu_read_lock(); xas_for_each(&xas, entry, ULONG_MAX) { XA_BUG_ON(xa, entry != xa_mk_index(index)); - index += 1UL << (XA_CHUNK_SHIFT - count); + index += 1UL << (order_limit - count - 1); count++; } rcu_read_unlock(); - XA_BUG_ON(xa, count != XA_CHUNK_SHIFT); + XA_BUG_ON(xa, count != order_limit); index = 0; count = 0; - xas_set(&xas, XA_CHUNK_SIZE / 2 + 1); + /* test unaligned index */ + xas_set(&xas, 1 % (1UL << (order_limit - 1))); rcu_read_lock(); xas_for_each(&xas, entry, ULONG_MAX) { XA_BUG_ON(xa, entry != xa_mk_index(index)); - index += 1UL << (XA_CHUNK_SHIFT - count); + index += 1UL << (order_limit - count - 1); count++; xas_pause(&xas); } rcu_read_unlock(); - XA_BUG_ON(xa, count != XA_CHUNK_SHIFT); + XA_BUG_ON(xa, count != order_limit); xa_destroy(xa); -- GitLab From 02d954c0fdf91845169cdacc7405b120f90afe01 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 10 Feb 2025 16:32:50 +0100 Subject: [PATCH 0789/1585] sched: Compact RSEQ concurrency IDs with reduced threads and affinity When a process reduces its number of threads or clears bits in its CPU affinity mask, the mm_cid allocation should eventually converge towards smaller values. However, the change introduced by: commit 7e019dcc470f ("sched: Improve cache locality of RSEQ concurrency IDs for intermittent workloads") adds a per-mm/CPU recent_cid which is never unset unless a thread migrates. This is a tradeoff between: A) Preserving cache locality after a transition from many threads to few threads, or after reducing the hamming weight of the allowed CPU mask. B) Making the mm_cid upper bounds wrt nr threads and allowed CPU mask easy to document and understand. C) Allowing applications to eventually react to mm_cid compaction after reduction of the nr threads or allowed CPU mask, making the tracking of mm_cid compaction easier by shrinking it back towards 0 or not. D) Making sure applications that periodically reduce and then increase again the nr threads or allowed CPU mask still benefit from good cache locality with mm_cid. Introduce the following changes: * After shrinking the number of threads or reducing the number of allowed CPUs, reduce the value of max_nr_cid so expansion of CID allocation will preserve cache locality if the number of threads or allowed CPUs increase again. * Only re-use a recent_cid if it is within the max_nr_cid upper bound, else find the first available CID. Fixes: 7e019dcc470f ("sched: Improve cache locality of RSEQ concurrency IDs for intermittent workloads") Signed-off-by: Mathieu Desnoyers Signed-off-by: Gabriele Monaco Signed-off-by: Peter Zijlstra (Intel) Tested-by: Gabriele Monaco Link: https://lkml.kernel.org/r/20250210153253.460471-2-gmonaco@redhat.com --- include/linux/mm_types.h | 7 ++++--- kernel/sched/sched.h | 25 ++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6b27db7f94963..0234f14f2aa6b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -875,10 +875,11 @@ struct mm_struct { */ unsigned int nr_cpus_allowed; /** - * @max_nr_cid: Maximum number of concurrency IDs allocated. + * @max_nr_cid: Maximum number of allowed concurrency + * IDs allocated. * - * Track the highest number of concurrency IDs allocated for the - * mm. + * Track the highest number of allowed concurrency IDs + * allocated for the mm. */ atomic_t max_nr_cid; /** diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b93c8c3dc05a5..c8512a9fb0229 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3698,10 +3698,28 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm) { struct cpumask *cidmask = mm_cidmask(mm); struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; - int cid = __this_cpu_read(pcpu_cid->recent_cid); + int cid, max_nr_cid, allowed_max_nr_cid; + /* + * After shrinking the number of threads or reducing the number + * of allowed cpus, reduce the value of max_nr_cid so expansion + * of cid allocation will preserve cache locality if the number + * of threads or allowed cpus increase again. + */ + max_nr_cid = atomic_read(&mm->max_nr_cid); + while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed), + atomic_read(&mm->mm_users))), + max_nr_cid > allowed_max_nr_cid) { + /* atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. */ + if (atomic_try_cmpxchg(&mm->max_nr_cid, &max_nr_cid, allowed_max_nr_cid)) { + max_nr_cid = allowed_max_nr_cid; + break; + } + } /* Try to re-use recent cid. This improves cache locality. */ - if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask)) + cid = __this_cpu_read(pcpu_cid->recent_cid); + if (!mm_cid_is_unset(cid) && cid < max_nr_cid && + !cpumask_test_and_set_cpu(cid, cidmask)) return cid; /* * Expand cid allocation if the maximum number of concurrency @@ -3709,8 +3727,9 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm) * and number of threads. Expanding cid allocation as much as * possible improves cache locality. */ - cid = atomic_read(&mm->max_nr_cid); + cid = max_nr_cid; while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) { + /* atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. */ if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1)) continue; if (!cpumask_test_and_set_cpu(cid, cidmask)) -- GitLab From ec5fd50aeff9c9156304853c6d75eda852d4a2c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 08:43:35 +0100 Subject: [PATCH 0790/1585] uprobes: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restricted pointers ("%pK") are not meant to be used through printk(). It can unintentionally expose security sensitive, raw pointer values. Use regular pointer formatting instead. For more background, see: https://lore.kernel.org/lkml/20250113171731-dc10e3c1-da64-4af0-b767-7c7070468023@linutronix.de/ Signed-off-by: Thomas Weißschuh Signed-off-by: Ingo Molnar Cc: Masami Hiramatsu Cc: Oleg Nesterov Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250217-restricted-pointers-uprobes-v1-1-e8cbe5bb22a7@linutronix.de --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2ca797cbe465f..bf2a87a0a3787 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -417,7 +417,7 @@ static void update_ref_ctr_warn(struct uprobe *uprobe, struct mm_struct *mm, short d) { pr_warn("ref_ctr %s failed for inode: 0x%lx offset: " - "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n", + "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%p\n", d > 0 ? "increment" : "decrement", uprobe->inode->i_ino, (unsigned long long) uprobe->offset, (unsigned long long) uprobe->ref_ctr_offset, mm); -- GitLab From 81570d6a7ad37033c7895811551a5a9023706eda Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Sat, 15 Feb 2025 10:56:55 +0100 Subject: [PATCH 0791/1585] gpiolib: protect gpio_chip with SRCU in array_info paths in multi get/set During the locking rework in GPIOLIB, we omitted one important use-case, namely: setting and getting values for GPIO descriptor arrays with array_info present. This patch does two things: first it makes struct gpio_array store the address of the underlying GPIO device and not chip. Next: it protects the chip with SRCU from removal in gpiod_get_array_value_complex() and gpiod_set_array_value_complex(). Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250215095655.23152-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 48 +++++++++++++++++++++++++++++------------- drivers/gpio/gpiolib.h | 4 ++-- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 29110dc436f15..5529d8b65f6fb 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3143,6 +3143,8 @@ static int gpiod_get_raw_value_commit(const struct gpio_desc *desc) static int gpio_chip_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { + lockdep_assert_held(&gc->gpiodev->srcu); + if (gc->get_multiple) return gc->get_multiple(gc, mask, bits); if (gc->get) { @@ -3173,6 +3175,7 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, struct gpio_array *array_info, unsigned long *value_bitmap) { + struct gpio_chip *gc; int ret, i = 0; /* @@ -3184,10 +3187,15 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, array_size <= array_info->size && (void *)array_info == desc_array + array_info->size) { if (!can_sleep) - WARN_ON(array_info->chip->can_sleep); + WARN_ON(array_info->gdev->can_sleep); + + guard(srcu)(&array_info->gdev->srcu); + gc = srcu_dereference(array_info->gdev->chip, + &array_info->gdev->srcu); + if (!gc) + return -ENODEV; - ret = gpio_chip_get_multiple(array_info->chip, - array_info->get_mask, + ret = gpio_chip_get_multiple(gc, array_info->get_mask, value_bitmap); if (ret) return ret; @@ -3468,6 +3476,8 @@ static void gpiod_set_raw_value_commit(struct gpio_desc *desc, bool value) static void gpio_chip_set_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { + lockdep_assert_held(&gc->gpiodev->srcu); + if (gc->set_multiple) { gc->set_multiple(gc, mask, bits); } else { @@ -3485,6 +3495,7 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep, struct gpio_array *array_info, unsigned long *value_bitmap) { + struct gpio_chip *gc; int i = 0; /* @@ -3496,14 +3507,19 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep, array_size <= array_info->size && (void *)array_info == desc_array + array_info->size) { if (!can_sleep) - WARN_ON(array_info->chip->can_sleep); + WARN_ON(array_info->gdev->can_sleep); + + guard(srcu)(&array_info->gdev->srcu); + gc = srcu_dereference(array_info->gdev->chip, + &array_info->gdev->srcu); + if (!gc) + return -ENODEV; if (!raw && !bitmap_empty(array_info->invert_mask, array_size)) bitmap_xor(value_bitmap, value_bitmap, array_info->invert_mask, array_size); - gpio_chip_set_multiple(array_info->chip, array_info->set_mask, - value_bitmap); + gpio_chip_set_multiple(gc, array_info->set_mask, value_bitmap); i = find_first_zero_bit(array_info->set_mask, array_size); if (i == array_size) @@ -4765,9 +4781,10 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, { struct gpio_desc *desc; struct gpio_descs *descs; + struct gpio_device *gdev; struct gpio_array *array_info = NULL; - struct gpio_chip *gc; int count, bitmap_size; + unsigned long dflags; size_t descs_size; count = gpiod_count(dev, con_id); @@ -4788,7 +4805,7 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, descs->desc[descs->ndescs] = desc; - gc = gpiod_to_chip(desc); + gdev = gpiod_to_gpio_device(desc); /* * If pin hardware number of array member 0 is also 0, select * its chip as a candidate for fast bitmap processing path. @@ -4796,8 +4813,8 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, if (descs->ndescs == 0 && gpio_chip_hwgpio(desc) == 0) { struct gpio_descs *array; - bitmap_size = BITS_TO_LONGS(gc->ngpio > count ? - gc->ngpio : count); + bitmap_size = BITS_TO_LONGS(gdev->ngpio > count ? + gdev->ngpio : count); array = krealloc(descs, descs_size + struct_size(array_info, invert_mask, 3 * bitmap_size), @@ -4817,7 +4834,7 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, array_info->desc = descs->desc; array_info->size = count; - array_info->chip = gc; + array_info->gdev = gdev; bitmap_set(array_info->get_mask, descs->ndescs, count - descs->ndescs); bitmap_set(array_info->set_mask, descs->ndescs, @@ -4830,7 +4847,7 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, continue; /* Unmark array members which don't belong to the 'fast' chip */ - if (array_info->chip != gc) { + if (array_info->gdev != gdev) { __clear_bit(descs->ndescs, array_info->get_mask); __clear_bit(descs->ndescs, array_info->set_mask); } @@ -4853,9 +4870,10 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, array_info->set_mask); } } else { + dflags = READ_ONCE(desc->flags); /* Exclude open drain or open source from fast output */ - if (gpiochip_line_is_open_drain(gc, descs->ndescs) || - gpiochip_line_is_open_source(gc, descs->ndescs)) + if (test_bit(FLAG_OPEN_DRAIN, &dflags) || + test_bit(FLAG_OPEN_SOURCE, &dflags)) __clear_bit(descs->ndescs, array_info->set_mask); /* Identify 'fast' pins which require invertion */ @@ -4867,7 +4885,7 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, if (array_info) dev_dbg(dev, "GPIO array info: chip=%s, size=%d, get_mask=%lx, set_mask=%lx, invert_mask=%lx\n", - array_info->chip->label, array_info->size, + array_info->gdev->label, array_info->size, *array_info->get_mask, *array_info->set_mask, *array_info->invert_mask); return descs; diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 83690f72f7e5c..147156ec502b2 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -114,7 +114,7 @@ extern const char *const gpio_suffixes[]; * * @desc: Array of pointers to the GPIO descriptors * @size: Number of elements in desc - * @chip: Parent GPIO chip + * @gdev: Parent GPIO device * @get_mask: Get mask used in fastpath * @set_mask: Set mask used in fastpath * @invert_mask: Invert mask used in fastpath @@ -126,7 +126,7 @@ extern const char *const gpio_suffixes[]; struct gpio_array { struct gpio_desc **desc; unsigned int size; - struct gpio_chip *chip; + struct gpio_device *gdev; unsigned long *get_mask; unsigned long *set_mask; unsigned long invert_mask[]; -- GitLab From 8fb5bb169d17cdd12c2dcc2e96830ed487d77a0f Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Feb 2025 12:58:49 +0100 Subject: [PATCH 0792/1585] sockmap, vsock: For connectible sockets allow only connected sockmap expects all vsocks to have a transport assigned, which is expressed in vsock_proto::psock_update_sk_prot(). However, there is an edge case where an unconnected (connectible) socket may lose its previously assigned transport. This is handled with a NULL check in the vsock/BPF recv path. Another design detail is that listening vsocks are not supposed to have any transport assigned at all. Which implies they are not supported by the sockmap. But this is complicated by the fact that a socket, before switching to TCP_LISTEN, may have had some transport assigned during a failed connect() attempt. Hence, we may end up with a listening vsock in a sockmap, which blows up quickly: KASAN: null-ptr-deref in range [0x0000000000000120-0x0000000000000127] CPU: 7 UID: 0 PID: 56 Comm: kworker/7:0 Not tainted 6.14.0-rc1+ Workqueue: vsock-loopback vsock_loopback_work RIP: 0010:vsock_read_skb+0x4b/0x90 Call Trace: sk_psock_verdict_data_ready+0xa4/0x2e0 virtio_transport_recv_pkt+0x1ca8/0x2acc vsock_loopback_work+0x27d/0x3f0 process_one_work+0x846/0x1420 worker_thread+0x5b3/0xf80 kthread+0x35a/0x700 ret_from_fork+0x2d/0x70 ret_from_fork_asm+0x1a/0x30 For connectible sockets, instead of relying solely on the state of vsk->transport, tell sockmap to only allow those representing established connections. This aligns with the behaviour for AF_INET and AF_UNIX. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj Acked-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/core/sock_map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index f1b9b3958792c..2f1be9baad057 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -541,6 +541,9 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); if (sk_is_stream_unix(sk)) return (1 << sk->sk_state) & TCPF_ESTABLISHED; + if (sk_is_vsock(sk) && + (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) + return (1 << sk->sk_state) & TCPF_ESTABLISHED; return true; } -- GitLab From 857ae05549ee2542317e7084ecaa5f8536634dd9 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Feb 2025 12:58:50 +0100 Subject: [PATCH 0793/1585] vsock/bpf: Warn on socket without transport In the spirit of commit 91751e248256 ("vsock: prevent null-ptr-deref in vsock_*[has_data|has_space]"), armorize the "impossible" cases with a warning. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/af_vsock.c | 3 +++ net/vmw_vsock/vsock_bpf.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 53a081d49d28a..7e3db87ae4333 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1189,6 +1189,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor) { struct vsock_sock *vsk = vsock_sk(sk); + if (WARN_ON_ONCE(!vsk->transport)) + return -ENODEV; + return vsk->transport->read_skb(vsk, read_actor); } diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index f201d9eca1df2..07b96d56f3a57 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -87,7 +87,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, lock_sock(sk); vsk = vsock_sk(sk); - if (!vsk->transport) { + if (WARN_ON_ONCE(!vsk->transport)) { copied = -ENODEV; goto out; } -- GitLab From 8350695bfb169b1924626a68f76b369ad01f18f2 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Feb 2025 12:58:51 +0100 Subject: [PATCH 0794/1585] selftest/bpf: Adapt vsock_delete_on_close to sockmap rejecting unconnected Commit 515745445e92 ("selftest/bpf: Add test for vsock removal from sockmap on close()") added test that checked if proto::close() callback was invoked on AF_VSOCK socket release. I.e. it verified that a close()d vsock does indeed get removed from the sockmap. It was done simply by creating a socket pair and attempting to replace a close()d one with its peer. Since, due to a recent change, sockmap does not allow updating index with a non-established connectible vsock, redo it with a freshly established one. Signed-off-by: Michal Luczaj Acked-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- .../selftests/bpf/prog_tests/sockmap_basic.c | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 884ad87783d59..21793d8c79e12 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -111,31 +111,35 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type) static void test_sockmap_vsock_delete_on_close(void) { - int err, c, p, map; - const int zero = 0; - - err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); - if (!ASSERT_OK(err, "create_pair(AF_VSOCK)")) - return; + int map, c, p, err, zero = 0; map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); - if (!ASSERT_GE(map, 0, "bpf_map_create")) { - close(c); - goto out; - } + if (!ASSERT_OK_FD(map, "bpf_map_create")) + return; - err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST); - close(c); - if (!ASSERT_OK(err, "bpf_map_update")) - goto out; + err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto close_map; - err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST)) + goto close_socks; + + xclose(c); + xclose(p); + + err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto close_map; + + err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST); ASSERT_OK(err, "after close(), bpf_map_update"); -out: - close(p); - close(map); +close_socks: + xclose(c); + xclose(p); +close_map: + xclose(map); } static void test_skmsg_helpers(enum bpf_map_type map_type) -- GitLab From 85928e9c436398abcac32a9afa2f591895dd497d Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 13 Feb 2025 12:58:52 +0100 Subject: [PATCH 0795/1585] selftest/bpf: Add vsock test for sockmap rejecting unconnected Verify that for a connectible AF_VSOCK socket, merely having a transport assigned is insufficient; socket must be connected for the sockmap to accept. This does not test datagram vsocks. Even though it hardly matters. VMCI is the only transport that features VSOCK_TRANSPORT_F_DGRAM, but it has an unimplemented vsock_transport::readskb() callback, making it unsupported by BPF/sockmap. Signed-off-by: Michal Luczaj Acked-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- .../selftests/bpf/prog_tests/sockmap_basic.c | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 21793d8c79e12..05eb37935c3e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -1065,6 +1065,34 @@ static void test_sockmap_skb_verdict_vsock_poll(void) test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_vsock_unconnected(void) +{ + struct sockaddr_storage addr; + int map, s, zero = 0; + socklen_t alen; + + map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), + sizeof(int), 1, NULL); + if (!ASSERT_OK_FD(map, "bpf_map_create")) + return; + + s = xsocket(AF_VSOCK, SOCK_STREAM, 0); + if (s < 0) + goto close_map; + + /* Fail connect(), but trigger transport assignment. */ + init_addr_loopback(AF_VSOCK, &addr, &alen); + if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect")) + goto close_sock; + + ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update"); + +close_sock: + xclose(s); +close_map: + xclose(map); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -1131,4 +1159,6 @@ void test_sockmap_basic(void) test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH); if (test__start_subtest("sockmap skb_verdict vsock poll")) test_sockmap_skb_verdict_vsock_poll(); + if (test__start_subtest("sockmap vsock unconnected")) + test_sockmap_vsock_unconnected(); } -- GitLab From a3bdd8f5c2217e1cb35db02c2eed36ea20fb50f5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2025 12:40:24 +0100 Subject: [PATCH 0796/1585] ALSA: usb-audio: Avoid dropping MIDI events at closing multiple ports We fixed the UAF issue in USB MIDI code by canceling the pending work at closing each MIDI output device in the commit below. However, this assumed that it's the only one that is tied with the endpoint, and it resulted in unexpected data truncations when multiple devices are assigned to a single endpoint and opened simultaneously. For addressing the unexpected MIDI message drops, simply replace cancel_work_sync() with flush_work(). The drain callback should have been already invoked before the close callback, hence the port->active flag must be already cleared. So this just assures that the pending work is finished before freeing the resources. Fixes: 0125de38122f ("ALSA: usb-audio: Cancel pending work at closing a MIDI substream") Reported-and-tested-by: John Keeping Closes: https://lore.kernel.org/20250217111647.3368132-1-jkeeping@inmusicbrands.com Link: https://patch.msgid.link/20250218114024.23125-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 737dd00e97b14..779d97d31f170 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1145,7 +1145,7 @@ static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream) { struct usbmidi_out_port *port = substream->runtime->private_data; - cancel_work_sync(&port->ep->work); + flush_work(&port->ep->work); return substream_open(substream, 0, 0); } -- GitLab From fcd875445866a5219cf2be3101e276b21fc843f3 Mon Sep 17 00:00:00 2001 From: Christopher Lentocha Date: Tue, 18 Feb 2025 08:59:29 -0500 Subject: [PATCH 0797/1585] nvme-pci: quirk Acer FA100 for non-uniqueue identifiers In order for two Acer FA100 SSDs to work in one PC (in the case of myself, a Lenovo Legion T5 28IMB05), and not show one drive and not the other, and sometimes mix up what drive shows up (randomly), these two lines of code need to be added, and then both of the SSDs will show up and not conflict when booting off of one of them. If you boot up your computer with both SSDs installed without this patch, you may also randomly get into a kernel panic (if the initrd is not set up) or stuck in the initrd "/init" process, it is set up, however, if you do apply this patch, there should not be problems with booting or seeing both contents of the drive. Tested with the btrfs filesystem with a RAID configuration of having the root drive '/' combined to make two 256GB Acer FA100 SSDs become 512GB in total storage. Kernel Logs with patch applied (`dmesg -t | grep -i nvm`): ``` ... nvme 0000:04:00.0: platform quirk: setting simple suspend nvme nvme0: pci function 0000:04:00.0 nvme 0000:05:00.0: platform quirk: setting simple suspend nvme nvme1: pci function 0000:05:00.0 nvme nvme1: missing or invalid SUBNQN field. nvme nvme1: allocated 64 MiB host memory buffer. nvme nvme0: missing or invalid SUBNQN field. nvme nvme0: allocated 64 MiB host memory buffer. nvme nvme1: 8/0/0 default/read/poll queues nvme nvme1: Ignoring bogus Namespace Identifiers nvme nvme0: 8/0/0 default/read/poll queues nvme nvme0: Ignoring bogus Namespace Identifiers nvme0n1: p1 p2 ... ``` Kernel Logs with patch not applied (`dmesg -t | grep -i nvm`): ``` ... nvme 0000:04:00.0: platform quirk: setting simple suspend nvme nvme0: pci function 0000:04:00.0 nvme 0000:05:00.0: platform quirk: setting simple suspend nvme nvme1: pci function 0000:05:00.0 nvme nvme0: missing or invalid SUBNQN field. nvme nvme1: missing or invalid SUBNQN field. nvme nvme0: allocated 64 MiB host memory buffer. nvme nvme1: allocated 64 MiB host memory buffer. nvme nvme0: 8/0/0 default/read/poll queues nvme nvme1: 8/0/0 default/read/poll queues nvme nvme1: globally duplicate IDs for nsid 1 nvme nvme1: VID:DID 1dbe:5216 model:Acer SSD FA100 256GB firmware:1.Z.J.2X nvme0n1: p1 p2 ... ``` Signed-off-by: Christopher Lentocha Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9197a5b173fdf..950289405ef28 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3706,6 +3706,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1dbe, 0x5216), /* Acer/INNOGRIT FA100/5216 NVMe SSD */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1dbe, 0x5236), /* ADATA XPG GAMMIX S70 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e49, 0x0021), /* ZHITAI TiPro5000 NVMe SSD */ -- GitLab From 84e009042d0f3dfe91bec60bcd208ee3f866cbcd Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 17 Feb 2025 17:08:27 +0100 Subject: [PATCH 0798/1585] nvme-tcp: add basic support for the C2HTermReq PDU Previously, the NVMe/TCP host driver did not handle the C2HTermReq PDU, instead printing "unsupported pdu type (3)" when received. This patch adds support for processing the C2HTermReq PDU, allowing the driver to print the Fatal Error Status field. Example of output: nvme nvme4: Received C2HTermReq (FES = Invalid PDU Header Field) Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 43 ++++++++++++++++++++++++++++++++++++++++ include/linux/nvme-tcp.h | 2 ++ 2 files changed, 45 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 841238f38fdda..038b35238c26d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -763,6 +763,40 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, return 0; } +static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue, + struct nvme_tcp_term_pdu *pdu) +{ + u16 fes; + const char *msg; + u32 plen = le32_to_cpu(pdu->hdr.plen); + + static const char * const msg_table[] = { + [NVME_TCP_FES_INVALID_PDU_HDR] = "Invalid PDU Header Field", + [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error", + [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error", + [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range", + [NVME_TCP_FES_R2T_LIMIT_EXCEEDED] = "R2T Limit Exceeded", + [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter", + }; + + if (plen < NVME_TCP_MIN_C2HTERM_PLEN || + plen > NVME_TCP_MAX_C2HTERM_PLEN) { + dev_err(queue->ctrl->ctrl.device, + "Received a malformed C2HTermReq PDU (plen = %u)\n", + plen); + return; + } + + fes = le16_to_cpu(pdu->fes); + if (fes && fes < ARRAY_SIZE(msg_table)) + msg = msg_table[fes]; + else + msg = "Unknown"; + + dev_err(queue->ctrl->ctrl.device, + "Received C2HTermReq (FES = %s)\n", msg); +} + static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { @@ -784,6 +818,15 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return 0; hdr = queue->pdu; + if (unlikely(hdr->type == nvme_tcp_c2h_term)) { + /* + * C2HTermReq never includes Header or Data digests. + * Skip the checks. + */ + nvme_tcp_handle_c2h_term(queue, (void *)queue->pdu); + return -EINVAL; + } + if (queue->hdr_digest) { ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen); if (unlikely(ret)) diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index e07e8978d691b..e435250fcb4d0 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -13,6 +13,8 @@ #define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_DIGEST_LENGTH 4 #define NVME_TCP_MIN_MAXH2CDATA 4096 +#define NVME_TCP_MIN_C2HTERM_PLEN 24 +#define NVME_TCP_MAX_C2HTERM_PLEN 152 enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, -- GitLab From 4082326807072b71496501b6a0c55ffe8d5092a5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 7 Feb 2025 13:41:34 +0100 Subject: [PATCH 0799/1585] nvmet: Fix crash when a namespace is disabled The namespace percpu counter protects pending I/O, and we can only safely diable the namespace once the counter drop to zero. Otherwise we end up with a crash when running blktests/nvme/058 (eg for loop transport): [ 2352.930426] [ T53909] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN PTI [ 2352.930431] [ T53909] KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] [ 2352.930434] [ T53909] CPU: 3 UID: 0 PID: 53909 Comm: kworker/u16:5 Tainted: G W 6.13.0-rc6 #232 [ 2352.930438] [ T53909] Tainted: [W]=WARN [ 2352.930440] [ T53909] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 [ 2352.930443] [ T53909] Workqueue: nvmet-wq nvme_loop_execute_work [nvme_loop] [ 2352.930449] [ T53909] RIP: 0010:blkcg_set_ioprio+0x44/0x180 as the queue is already torn down when calling submit_bio(); So we need to init the percpu counter in nvmet_ns_enable(), and wait for it to drop to zero in nvmet_ns_disable() to avoid having I/O pending after the namespace has been disabled. Fixes: 74d16965d7ac ("nvmet-loop: avoid using mutex in IO hotpath") Signed-off-by: Hannes Reinecke Reviewed-by: Nilay Shroff Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Tested-by: Shin'ichiro Kawasaki Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 40 ++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cdc4a09a6e8a4..2e741696f3712 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -606,6 +606,9 @@ int nvmet_ns_enable(struct nvmet_ns *ns) goto out_dev_put; } + if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) + goto out_pr_exit; + nvmet_ns_changed(subsys, ns->nsid); ns->enabled = true; xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); @@ -613,6 +616,9 @@ int nvmet_ns_enable(struct nvmet_ns *ns) out_unlock: mutex_unlock(&subsys->lock); return ret; +out_pr_exit: + if (ns->pr.enable) + nvmet_pr_exit_ns(ns); out_dev_put: list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); @@ -638,6 +644,19 @@ void nvmet_ns_disable(struct nvmet_ns *ns) mutex_unlock(&subsys->lock); + /* + * Now that we removed the namespaces from the lookup list, we + * can kill the per_cpu ref and wait for any remaining references + * to be dropped, as well as a RCU grace period for anyone only + * using the namepace under rcu_read_lock(). Note that we can't + * use call_rcu here as we need to ensure the namespaces have + * been fully destroyed before unloading the module. + */ + percpu_ref_kill(&ns->ref); + synchronize_rcu(); + wait_for_completion(&ns->disable_done); + percpu_ref_exit(&ns->ref); + if (ns->pr.enable) nvmet_pr_exit_ns(ns); @@ -660,22 +679,6 @@ void nvmet_ns_free(struct nvmet_ns *ns) if (ns->nsid == subsys->max_nsid) subsys->max_nsid = nvmet_max_nsid(subsys); - mutex_unlock(&subsys->lock); - - /* - * Now that we removed the namespaces from the lookup list, we - * can kill the per_cpu ref and wait for any remaining references - * to be dropped, as well as a RCU grace period for anyone only - * using the namepace under rcu_read_lock(). Note that we can't - * use call_rcu here as we need to ensure the namespaces have - * been fully destroyed before unloading the module. - */ - percpu_ref_kill(&ns->ref); - synchronize_rcu(); - wait_for_completion(&ns->disable_done); - percpu_ref_exit(&ns->ref); - - mutex_lock(&subsys->lock); subsys->nr_namespaces--; mutex_unlock(&subsys->lock); @@ -705,9 +708,6 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) ns->nsid = nsid; ns->subsys = subsys; - if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) - goto out_free; - if (ns->nsid > subsys->max_nsid) subsys->max_nsid = nsid; @@ -730,8 +730,6 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) return ns; out_exit: subsys->max_nsid = nvmet_max_nsid(subsys); - percpu_ref_exit(&ns->ref); -out_free: kfree(ns); out_unlock: mutex_unlock(&subsys->lock); -- GitLab From 3988ac1c67e6e84d2feb987d7b36d5791174b3da Mon Sep 17 00:00:00 2001 From: Ruozhu Li Date: Sun, 16 Feb 2025 20:49:56 +0800 Subject: [PATCH 0800/1585] nvmet-rdma: recheck queue state is LIVE in state lock in recv done The queue state checking in nvmet_rdma_recv_done is not in queue state lock.Queue state can transfer to LIVE in cm establish handler between state checking and state lock here, cause a silent drop of nvme connect cmd. Recheck queue state whether in LIVE state in state lock to prevent this issue. Signed-off-by: Ruozhu Li Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/rdma.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 1afd93026f9bf..2a4536ef61848 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -996,6 +996,27 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, nvmet_req_complete(&cmd->req, status); } +static bool nvmet_rdma_recv_not_live(struct nvmet_rdma_queue *queue, + struct nvmet_rdma_rsp *rsp) +{ + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&queue->state_lock, flags); + /* + * recheck queue state is not live to prevent a race condition + * with RDMA_CM_EVENT_ESTABLISHED handler. + */ + if (queue->state == NVMET_RDMA_Q_LIVE) + ret = false; + else if (queue->state == NVMET_RDMA_Q_CONNECTING) + list_add_tail(&rsp->wait_list, &queue->rsp_wait_list); + else + nvmet_rdma_put_rsp(rsp); + spin_unlock_irqrestore(&queue->state_lock, flags); + return ret; +} + static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_cmd *cmd = @@ -1038,17 +1059,9 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) rsp->n_rdma = 0; rsp->invalidate_rkey = 0; - if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { - unsigned long flags; - - spin_lock_irqsave(&queue->state_lock, flags); - if (queue->state == NVMET_RDMA_Q_CONNECTING) - list_add_tail(&rsp->wait_list, &queue->rsp_wait_list); - else - nvmet_rdma_put_rsp(rsp); - spin_unlock_irqrestore(&queue->state_lock, flags); + if (unlikely(queue->state != NVMET_RDMA_Q_LIVE) && + nvmet_rdma_recv_not_live(queue, rsp)) return; - } nvmet_rdma_handle_command(queue, rsp); } -- GitLab From 68a5c91f01fc9f086567b260cced003ed9fdff3f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Feb 2025 15:52:28 +0900 Subject: [PATCH 0801/1585] nvmet: pci-epf: Correctly initialize CSTS when enabling the controller The function nvmet_pci_epf_poll_cc_work() sets the NVME_CSTS_RDY bit of the controller status register (CSTS) when nvmet_pci_epf_enable_ctrl() returns success. However, since this function can be called several times (e.g. if the host reboots), instead of setting the bit in ctrl->csts, initialize this field to only have NVME_CSTS_RDY set. Conversely, if nvmet_pci_epf_enable_ctrl() fails, make sure to clear all bits from ctrl->csts. To simplify nvmet_pci_epf_poll_cc_work(), initialize ctrl->csts to NVME_CSTS_RDY directly inside nvmet_pci_epf_enable_ctrl() and clear this field in that function as well in case of a failure. To be consistent, move clearing the NVME_CSTS_RDY bit from ctrl->csts when the controller is being disabled from nvmet_pci_epf_poll_cc_work() into nvmet_pci_epf_disable_ctrl(). Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index ac30b42cc6221..efd4623fb0022 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1822,14 +1822,14 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) if (ctrl->io_sqes < sizeof(struct nvme_command)) { dev_err(ctrl->dev, "Unsupported I/O SQES %zu (need %zu)\n", ctrl->io_sqes, sizeof(struct nvme_command)); - return -EINVAL; + goto err; } ctrl->io_cqes = 1UL << nvmet_cc_iocqes(ctrl->cc); if (ctrl->io_cqes < sizeof(struct nvme_completion)) { dev_err(ctrl->dev, "Unsupported I/O CQES %zu (need %zu)\n", ctrl->io_sqes, sizeof(struct nvme_completion)); - return -EINVAL; + goto err; } /* Create the admin queue. */ @@ -1844,7 +1844,7 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) qsize, pci_addr, 0); if (status != NVME_SC_SUCCESS) { dev_err(ctrl->dev, "Failed to create admin completion queue\n"); - return -EINVAL; + goto err; } qsize = aqa & 0x00000fff; @@ -1854,17 +1854,22 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) if (status != NVME_SC_SUCCESS) { dev_err(ctrl->dev, "Failed to create admin submission queue\n"); nvmet_pci_epf_delete_cq(ctrl->tctrl, 0); - return -EINVAL; + goto err; } ctrl->sq_ab = NVMET_PCI_EPF_SQ_AB; ctrl->irq_vector_threshold = NVMET_PCI_EPF_IV_THRESHOLD; ctrl->enabled = true; + ctrl->csts = NVME_CSTS_RDY; /* Start polling the controller SQs. */ schedule_delayed_work(&ctrl->poll_sqs, 0); return 0; + +err: + ctrl->csts = 0; + return -EINVAL; } static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) @@ -1889,6 +1894,8 @@ static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) /* Delete the admin queue last. */ nvmet_pci_epf_delete_sq(ctrl->tctrl, 0); nvmet_pci_epf_delete_cq(ctrl->tctrl, 0); + + ctrl->csts &= ~NVME_CSTS_RDY; } static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) @@ -1909,13 +1916,10 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) ret = nvmet_pci_epf_enable_ctrl(ctrl); if (ret) return; - ctrl->csts |= NVME_CSTS_RDY; } - if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) { + if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) nvmet_pci_epf_disable_ctrl(ctrl); - ctrl->csts &= ~NVME_CSTS_RDY; - } if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) { nvmet_pci_epf_disable_ctrl(ctrl); -- GitLab From ffa35567632c0059e7f380ed155e26a07ec4153f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Feb 2025 15:52:29 +0900 Subject: [PATCH 0802/1585] nvmet: pci-epf: Do not uselessly write the CSTS register The function nvmet_pci_epf_poll_cc_work() will do nothing if there are no changes to the controller configuration (CC) register. However, even for such case, this function still calls nvmet_update_cc() and uselessly writes the CSTS register. Avoid this by simply rescheduling the poll_cc work if the CC register has not changed. Also reschedule the poll_cc work if the function nvmet_pci_epf_enable_ctrl() fails to allow the host the chance to try again enabling the controller. While at it, since there is no point in trying to handle the CC register as quickly as possible, change the poll_cc work scheduling interval to 10 ms (from 5ms), to avoid excessive read accesses to that register. Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index efd4623fb0022..b646a8f468ea9 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -46,7 +46,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex); /* * BAR CC register and SQ polling intervals. */ -#define NVMET_PCI_EPF_CC_POLL_INTERVAL msecs_to_jiffies(5) +#define NVMET_PCI_EPF_CC_POLL_INTERVAL msecs_to_jiffies(10) #define NVMET_PCI_EPF_SQ_POLL_INTERVAL msecs_to_jiffies(5) #define NVMET_PCI_EPF_SQ_POLL_IDLE msecs_to_jiffies(5000) @@ -1910,12 +1910,15 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) old_cc = ctrl->cc; new_cc = nvmet_pci_epf_bar_read32(ctrl, NVME_REG_CC); + if (new_cc == old_cc) + goto reschedule_work; + ctrl->cc = new_cc; if (nvmet_cc_en(new_cc) && !nvmet_cc_en(old_cc)) { ret = nvmet_pci_epf_enable_ctrl(ctrl); if (ret) - return; + goto reschedule_work; } if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) @@ -1932,6 +1935,7 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) nvmet_update_cc(ctrl->tctrl, ctrl->cc); nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts); +reschedule_work: schedule_delayed_work(&ctrl->poll_cc, NVMET_PCI_EPF_CC_POLL_INTERVAL); } -- GitLab From 01ef7ff7dd3cd8cc71af8d1d1496be853281a948 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Feb 2025 15:52:30 +0900 Subject: [PATCH 0803/1585] nvmet: pci-epf: Avoid RCU stalls under heavy workload The delayed work item function nvmet_pci_epf_poll_sqs_work() polls all submission queues and keeps running in a loop as long as commands are being submitted by the host. Depending on the preemption configuration of the kernel, under heavy command workload, this function can thus run for more than RCU_CPU_STALL_TIMEOUT seconds, leading to a RCU stall: rcu: INFO: rcu_sched self-detected stall on CPU rcu: 5-....: (20998 ticks this GP) idle=4244/1/0x4000000000000000 softirq=301/301 fqs=5132 rcu: (t=21000 jiffies g=-443 q=12 ncpus=8) CPU: 5 UID: 0 PID: 82 Comm: kworker/5:1 Not tainted 6.14.0-rc2 #1 Hardware name: Radxa ROCK 5B (DT) Workqueue: events nvmet_pci_epf_poll_sqs_work [nvmet_pci_epf] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : dw_edma_device_tx_status+0xb8/0x130 lr : dw_edma_device_tx_status+0x9c/0x130 sp : ffff800080b5bbb0 x29: ffff800080b5bbb0 x28: ffff0331c5c78400 x27: ffff0331c1cd1960 x26: ffff0331c0e39010 x25: ffff0331c20e4000 x24: ffff0331c20e4a90 x23: 0000000000000000 x22: 0000000000000001 x21: 00000000005aca33 x20: ffff800080b5bc30 x19: ffff0331c123e370 x18: 000000000ab29e62 x17: ffffb2a878c9c118 x16: ffff0335bde82040 x15: 0000000000000000 x14: 000000000000017b x13: 00000000ee601780 x12: 0000000000000018 x11: 0000000000000000 x10: 0000000000000001 x9 : 0000000000000040 x8 : 00000000ee601780 x7 : 0000000105c785c0 x6 : ffff0331c1027d80 x5 : 0000000001ee7ad6 x4 : ffff0335bdea16c0 x3 : ffff0331c123e438 x2 : 00000000005aca33 x1 : 0000000000000000 x0 : ffff0331c123e410 Call trace: dw_edma_device_tx_status+0xb8/0x130 (P) dma_sync_wait+0x60/0xbc nvmet_pci_epf_dma_transfer+0x128/0x264 [nvmet_pci_epf] nvmet_pci_epf_poll_sqs_work+0x2a0/0x2e0 [nvmet_pci_epf] process_one_work+0x144/0x390 worker_thread+0x27c/0x458 kthread+0xe8/0x19c ret_from_fork+0x10/0x20 The solution for this is simply to explicitly allow rescheduling using cond_resched(). However, since doing so for every loop of nvmet_pci_epf_poll_sqs_work() significantly degrades performance (for 4K random reads using 4 I/O queues, the maximum IOPS goes down from 137 KIOPS to 110 KIOPS), call cond_resched() every second to avoid the RCU stalls. Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index b646a8f468ea9..565d2bd36dcde 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1694,6 +1694,7 @@ static void nvmet_pci_epf_poll_sqs_work(struct work_struct *work) struct nvmet_pci_epf_ctrl *ctrl = container_of(work, struct nvmet_pci_epf_ctrl, poll_sqs.work); struct nvmet_pci_epf_queue *sq; + unsigned long limit = jiffies; unsigned long last = 0; int i, nr_sqs; @@ -1708,6 +1709,16 @@ static void nvmet_pci_epf_poll_sqs_work(struct work_struct *work) nr_sqs++; } + /* + * If we have been running for a while, reschedule to let other + * tasks run and to avoid RCU stalls. + */ + if (time_is_before_jiffies(limit + secs_to_jiffies(1))) { + cond_resched(); + limit = jiffies; + continue; + } + if (nr_sqs) { last = jiffies; continue; -- GitLab From cd513e0434c3e736c549bc99bf7982658b25114d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Feb 2025 15:52:31 +0900 Subject: [PATCH 0804/1585] nvme: tcp: Fix compilation warning with W=1 When compiling with W=1, a warning result for the function nvme_tcp_set_queue_io_cpu(): host/tcp.c:1578: warning: Function parameter or struct member 'queue' not described in 'nvme_tcp_set_queue_io_cpu' host/tcp.c:1578: warning: expecting prototype for Track the number of queues assigned to each cpu using a global per(). Prototype was for nvme_tcp_set_queue_io_cpu() instead Avoid this warning by using the regular comment format for the function nvme_tcp_set_queue_io_cpu() instead of the kdoc comment format. Fixes: 32193789878c ("nvme-tcp: Fix I/O queue cpu spreading for multiple controllers") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 038b35238c26d..a7398e9e5f71c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1608,7 +1608,7 @@ static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) ctrl->io_queues[HCTX_TYPE_POLL]; } -/** +/* * Track the number of queues assigned to each cpu using a global per-cpu * counter and select the least used cpu from the mq_map. Our goal is to spread * different controllers I/O threads across different cpu cores. -- GitLab From 578539e0969028f711c34d9a4565931edfe1d730 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 24 Jan 2025 11:43:10 -0700 Subject: [PATCH 0805/1585] nvme-tcp: fix connect failure on receiving partial ICResp PDU nvme_tcp_init_connection() attempts to receive an ICResp PDU but only checks that the return value from recvmsg() is non-negative. If the sender closes the TCP connection or sends fewer than 128 bytes, this check will pass even though the full PDU wasn't received. Ensure the full ICResp PDU is received by checking that recvmsg() returns the expected 128 bytes. Additionally set the MSG_WAITALL flag for recvmsg(), as a sender could split the ICResp over multiple TCP frames. Without MSG_WAITALL, recvmsg() could return prematurely with only part of the PDU. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Caleb Sander Mateos Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index a7398e9e5f71c..8a9131c95a3da 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1492,11 +1492,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) msg.msg_control = cbuf; msg.msg_controllen = sizeof(cbuf); } + msg.msg_flags = MSG_WAITALL; ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); - if (ret < 0) { + if (ret < sizeof(*icresp)) { pr_warn("queue %d: failed to receive icresp, error %d\n", nvme_tcp_queue_id(queue), ret); + if (ret >= 0) + ret = -ECONNRESET; goto free_icresp; } ret = -ENOTCONN; -- GitLab From 487a3ea7b1b8ba2ca7d2c2bb3c3594dc360d6261 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Thu, 13 Feb 2025 10:05:14 -0700 Subject: [PATCH 0806/1585] nvme/ioctl: add missing space in err message nvme_validate_passthru_nsid() logs an err message whose format string is split over 2 lines. There is a missing space between the two pieces, resulting in log lines like "... does not match nsid (1)of namespace". Add the missing space between ")" and "of". Also combine the format string pieces onto a single line to make the err message easier to grep. Fixes: e7d4b5493a2d ("nvme: factor out a nvme_validate_passthru_nsid helper") Signed-off-by: Caleb Sander Mateos Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index e8930146847af..b1b46c2713e1c 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -283,8 +283,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, { if (ns && nsid != ns->head->ns_id) { dev_err(ctrl->device, - "%s: nsid (%u) in cmd does not match nsid (%u)" - "of namespace\n", + "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", current->comm, nsid, ns->head->ns_id); return false; } -- GitLab From d422247d14a53fe825b1778edf104167d8fd8f3f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Feb 2025 15:49:59 +0900 Subject: [PATCH 0807/1585] nvme: Cleanup the definition of the controller config register fields Reorganized the enum used to define the fields of the contrller configuration (CC) register in include/linux/nvme.h to: 1) Group together all the values defined for each field. 2) Add the missing field masks definitions. 3) Add comments to describe the enum and each field. Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index fe3b60818fdcf..2dc05b1c3283d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -199,28 +199,54 @@ enum { #define NVME_NVM_IOSQES 6 #define NVME_NVM_IOCQES 4 +/* + * Controller Configuration (CC) register (Offset 14h) + */ enum { + /* Enable (EN): bit 0 */ NVME_CC_ENABLE = 1 << 0, NVME_CC_EN_SHIFT = 0, + + /* Bits 03:01 are reserved (NVMe Base Specification rev 2.1) */ + + /* I/O Command Set Selected (CSS): bits 06:04 */ NVME_CC_CSS_SHIFT = 4, - NVME_CC_MPS_SHIFT = 7, - NVME_CC_AMS_SHIFT = 11, - NVME_CC_SHN_SHIFT = 14, - NVME_CC_IOSQES_SHIFT = 16, - NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT, NVME_CC_CSS_NVM = 0 << NVME_CC_CSS_SHIFT, NVME_CC_CSS_CSI = 6 << NVME_CC_CSS_SHIFT, - NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT, + + /* Memory Page Size (MPS): bits 10:07 */ + NVME_CC_MPS_SHIFT = 7, + NVME_CC_MPS_MASK = 0xf << NVME_CC_MPS_SHIFT, + + /* Arbitration Mechanism Selected (AMS): bits 13:11 */ + NVME_CC_AMS_SHIFT = 11, + NVME_CC_AMS_MASK = 7 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, + + /* Shutdown Notification (SHN): bits 15:14 */ + NVME_CC_SHN_SHIFT = 14, + NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, - NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, + + /* I/O Submission Queue Entry Size (IOSQES): bits 19:16 */ + NVME_CC_IOSQES_SHIFT = 16, + NVME_CC_IOSQES_MASK = 0xf << NVME_CC_IOSQES_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, + + /* I/O Completion Queue Entry Size (IOCQES): bits 23:20 */ + NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_IOCQES_MASK = 0xf << NVME_CC_IOCQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, + + /* Controller Ready Independent of Media Enable (CRIME): bit 24 */ NVME_CC_CRIME = 1 << 24, + + /* Bits 25:31 are reserved (NVMe Base Specification rev 2.1) */ }; enum { -- GitLab From 2ba8cf918f0d1873cd5430ae2cc3c41711a144d7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Feb 2025 15:50:00 +0900 Subject: [PATCH 0808/1585] nvmet: Use enum definitions instead of hardcoded values Change the definition of the inline functions nvmet_cc_en(), nvmet_cc_css(), nvmet_cc_mps(), nvmet_cc_ams(), nvmet_cc_shn(), nvmet_cc_iosqes(), and nvmet_cc_iocqes() to use the enum difinitions in include/linux/nvme.h instead of hardcoded values. Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/nvmet.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 4be8d22d2d8d4..d2c1233981e1a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -784,37 +784,37 @@ u16 nvmet_report_invalid_opcode(struct nvmet_req *req); static inline bool nvmet_cc_en(u32 cc) { - return (cc >> NVME_CC_EN_SHIFT) & 0x1; + return (cc & NVME_CC_ENABLE) >> NVME_CC_EN_SHIFT; } static inline u8 nvmet_cc_css(u32 cc) { - return (cc >> NVME_CC_CSS_SHIFT) & 0x7; + return (cc & NVME_CC_CSS_MASK) >> NVME_CC_CSS_SHIFT; } static inline u8 nvmet_cc_mps(u32 cc) { - return (cc >> NVME_CC_MPS_SHIFT) & 0xf; + return (cc & NVME_CC_MPS_MASK) >> NVME_CC_MPS_SHIFT; } static inline u8 nvmet_cc_ams(u32 cc) { - return (cc >> NVME_CC_AMS_SHIFT) & 0x7; + return (cc & NVME_CC_AMS_MASK) >> NVME_CC_AMS_SHIFT; } static inline u8 nvmet_cc_shn(u32 cc) { - return (cc >> NVME_CC_SHN_SHIFT) & 0x3; + return (cc & NVME_CC_SHN_MASK) >> NVME_CC_SHN_SHIFT; } static inline u8 nvmet_cc_iosqes(u32 cc) { - return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; + return (cc & NVME_CC_IOSQES_MASK) >> NVME_CC_IOSQES_SHIFT; } static inline u8 nvmet_cc_iocqes(u32 cc) { - return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; + return (cc & NVME_CC_IOCQES_MASK) >> NVME_CC_IOCQES_SHIFT; } /* Convert a 32-bit number to a 16-bit 0's based number */ -- GitLab From eefa72a15ea03fd009333aaa9f0e360b2578e434 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 13 Feb 2025 11:12:59 -0500 Subject: [PATCH 0809/1585] apple-nvme: Release power domains when probe fails Signed-off-by: Hector Martin Reviewed-by: Neal Gompa Reviewed-by: Sven Peter Signed-off-by: Alyssa Rosenzweig Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 1de11b722f049..7995f0776bc06 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1516,6 +1516,7 @@ static struct apple_nvme *apple_nvme_alloc(struct platform_device *pdev) return anv; put_dev: + apple_nvme_detach_genpd(anv); put_device(anv->dev); return ERR_PTR(ret); } @@ -1549,6 +1550,7 @@ static int apple_nvme_probe(struct platform_device *pdev) nvme_uninit_ctrl(&anv->ctrl); out_put_ctrl: nvme_put_ctrl(&anv->ctrl); + apple_nvme_detach_genpd(anv); return ret; } -- GitLab From 3f22421f6a240b33ab8ffbf662bf0a8f336f405b Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 13 Feb 2025 11:12:58 -0500 Subject: [PATCH 0810/1585] apple-nvme: Support coprocessors left idle iBoot on at least some firmwares/machines leaves ANS2 running, requiring a wake command instead of a CPU boot (and if we reset ANS2 in that state, everything breaks). Only stop the CPU if RTKit was running, and only do the reset dance if the CPU is stopped. Normal shutdown handoff: - RTKit not yet running - CPU detected not running - Reset - CPU powerup - RTKit boot wait ANS2 left running/idle: - RTKit not yet running - CPU detected running - RTKit wake message Sleep/resume cycle: - RTKit shutdown - CPU stopped - (sleep here) - CPU detected not running - Reset - CPU powerup - RTKit boot wait Shutdown or device removal: - RTKit shutdown - CPU stopped Therefore, the CPU running bit serves as a consistent flag of whether the coprocessor is fully stopped or just idle. Signed-off-by: Hector Martin Reviewed-by: Neal Gompa Reviewed-by: Sven Peter Signed-off-by: Alyssa Rosenzweig Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 53 ++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 7995f0776bc06..a060f69558e76 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1011,25 +1011,37 @@ static void apple_nvme_reset_work(struct work_struct *work) ret = apple_rtkit_shutdown(anv->rtk); if (ret) goto out; + + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); } - writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + /* + * Only do the soft-reset if the CPU is not running, which means either we + * or the previous stage shut it down cleanly. + */ + if (!(readl(anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL) & + APPLE_ANS_COPROC_CPU_CONTROL_RUN)) { - ret = reset_control_assert(anv->reset); - if (ret) - goto out; + ret = reset_control_assert(anv->reset); + if (ret) + goto out; - ret = apple_rtkit_reinit(anv->rtk); - if (ret) - goto out; + ret = apple_rtkit_reinit(anv->rtk); + if (ret) + goto out; - ret = reset_control_deassert(anv->reset); - if (ret) - goto out; + ret = reset_control_deassert(anv->reset); + if (ret) + goto out; + + writel(APPLE_ANS_COPROC_CPU_CONTROL_RUN, + anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + + ret = apple_rtkit_boot(anv->rtk); + } else { + ret = apple_rtkit_wake(anv->rtk); + } - writel(APPLE_ANS_COPROC_CPU_CONTROL_RUN, - anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); - ret = apple_rtkit_boot(anv->rtk); if (ret) { dev_err(anv->dev, "ANS did not boot"); goto out; @@ -1565,9 +1577,12 @@ static void apple_nvme_remove(struct platform_device *pdev) apple_nvme_disable(anv, true); nvme_uninit_ctrl(&anv->ctrl); - if (apple_rtkit_is_running(anv->rtk)) + if (apple_rtkit_is_running(anv->rtk)) { apple_rtkit_shutdown(anv->rtk); + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + } + apple_nvme_detach_genpd(anv); } @@ -1576,8 +1591,11 @@ static void apple_nvme_shutdown(struct platform_device *pdev) struct apple_nvme *anv = platform_get_drvdata(pdev); apple_nvme_disable(anv, true); - if (apple_rtkit_is_running(anv->rtk)) + if (apple_rtkit_is_running(anv->rtk)) { apple_rtkit_shutdown(anv->rtk); + + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + } } static int apple_nvme_resume(struct device *dev) @@ -1594,10 +1612,11 @@ static int apple_nvme_suspend(struct device *dev) apple_nvme_disable(anv, true); - if (apple_rtkit_is_running(anv->rtk)) + if (apple_rtkit_is_running(anv->rtk)) { ret = apple_rtkit_shutdown(anv->rtk); - writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + } return ret; } -- GitLab From ed83aff5a94e1d623c007159a6a7f1c3ef202c6c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 12 Feb 2025 12:01:15 +0100 Subject: [PATCH 0811/1585] s390: Update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 2 ++ arch/s390/configs/defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 44f01a4bc810f..80bdfbae6e5b4 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -469,6 +469,7 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set +CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -874,6 +875,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_GRAPH_RETVAL=y +CONFIG_FUNCTION_GRAPH_RETADDR=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 8bcd37edd3c97..449a0e996b963 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -459,6 +459,7 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set +CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -825,6 +826,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_GRAPH_RETVAL=y +CONFIG_FUNCTION_GRAPH_RETADDR=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y -- GitLab From 173767c218cc1da74704e7863f165ac8a9796f3e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 13 Feb 2025 21:16:14 +0000 Subject: [PATCH 0812/1585] s390/purgatory: Use -D__DISABLE_EXPORTS The object files in purgatory do not export symbols, so disable exports for all the object files, not only sha256.o, with -D__DISABLE_EXPORTS. This fixes a build failure with CONFIG_GENDWARFKSYMS, where we would otherwise attempt to calculate symbol versions for purgatory objects and fail because they're not built with debugging information: error: gendwarfksyms: process_module: dwarf_get_units failed: no debugging information? make[5]: *** [../scripts/Makefile.build:207: arch/s390/purgatory/string.o] Error 1 make[5]: *** Deleting file 'arch/s390/purgatory/string.o' Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502120752.U3fOKScQ-lkp@intel.com/ Signed-off-by: Sami Tolvanen Link: https://lore.kernel.org/r/20250213211614.3537605-2-samitolvanen@google.com Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/purgatory/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index bdcf2a3b6c41b..bd39b36e7bd68 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -8,7 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) -CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY +CFLAGS_sha256.o := -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) @@ -19,9 +19,11 @@ KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += -D__DISABLE_EXPORTS KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS += -D__DISABLE_EXPORTS # Since we link purgatory with -r unresolved symbols are not checked, so we # also link a purgatory.chk binary without -r to check for unresolved symbols. -- GitLab From c3a589fd9fcbf295a7402a4b188dc9277d505f4f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 18 Feb 2025 12:11:34 +0100 Subject: [PATCH 0813/1585] s390/boot: Fix ESSA detection The cmma_test_essa() inline assembly uses tmp as input and output, however tmp is specified as output only, which allows the compiler to optimize the initialization of tmp away. Therefore the ESSA detection may or may not work depending on previous contents of the register that the compiler selected for tmp. Fix this by using the correct constraint modifier. Fixes: 468a3bc2b7b9 ("s390/cmma: move parsing of cmma kernel parameter to early boot code") Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 885bd1dd2c82f..9276e0576d0ab 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -86,7 +86,7 @@ static int cmma_test_essa(void) : [reg1] "=&d" (reg1), [reg2] "=&a" (reg2), [rc] "+&d" (rc), - [tmp] "=&d" (tmp), + [tmp] "+&d" (tmp), "+Q" (get_lowcore()->program_new_psw), "=Q" (old) : [psw_old] "a" (&old), -- GitLab From 293f324ce96d700112c726682b14094d1b54e09c Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Thu, 13 Feb 2025 13:06:21 -0800 Subject: [PATCH 0814/1585] tools: Unify top-level quiet infrastructure Commit f2868b1a66d4f40f ("perf tools: Expose quiet/verbose variables in Makefile.perf") moved the quiet infrastructure out of tools/build/Makefile.build and into the top-level Makefile.perf file so that the quiet infrastructure could be used throughout perf and not just in Makefile.build. Extract out the quiet infrastructure into Makefile.include so that it can be leveraged outside of perf. Fixes: f2868b1a66d4f40f ("perf tools: Expose quiet/verbose variables in Makefile.perf") Reviewed-by: Jiri Olsa Signed-off-by: Charlie Jenkins Acked-by: Andrii Nakryiko Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Benjamin Tissoires Cc: Daniel Borkmann Cc: Daniel Lezcano Cc: Eduard Zingerman Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Kosina Cc: John Fastabend Cc: Josh Poimboeuf Cc: KP Singh Cc: Lukasz Luba Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Mykola Lysenko Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Quentin Monnet Cc: Rafael J. Wysocki Cc: Shuah Khan Cc: Song Liu Cc: Stanislav Fomichev Cc: Steven Rostedt (VMware) Cc: Yonghong Song Cc: Zhang Rui Link: https://lore.kernel.org/r/20250213-quiet_tools-v3-1-07de4482a581@rivosinc.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile | 8 +------ tools/perf/Makefile.perf | 41 ---------------------------------- tools/scripts/Makefile.include | 30 +++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 48 deletions(-) diff --git a/tools/build/Makefile b/tools/build/Makefile index 18ad131f6ea74..63ef218787616 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -17,13 +17,7 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld) export HOSTCC HOSTLD HOSTAR -ifeq ($(V),1) - Q = -else - Q = @ -endif - -export Q srctree CC LD +export srctree CC LD MAKEFLAGS := --no-print-directory build := -f $(srctree)/tools/build/Makefile.build dir=. obj diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 55d6ce9ea52fb..05c083bb11220 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -161,47 +161,6 @@ export VPATH SOURCE := $(shell ln -sf $(srctree)/tools/perf $(OUTPUT)/source) endif -# Beautify output -# --------------------------------------------------------------------------- -# -# Most of build commands in Kbuild start with "cmd_". You can optionally define -# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from -# that command is printed by default. -# -# e.g.) -# quiet_cmd_depmod = DEPMOD $(MODLIB) -# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE) -# -# A simple variant is to prefix commands with $(Q) - that's useful -# for commands that shall be hidden in non-verbose mode. -# -# $(Q)$(MAKE) $(build)=scripts/basic -# -# To put more focus on warnings, be less verbose as default -# Use 'make V=1' to see the full commands - -ifeq ($(V),1) - quiet = - Q = -else - quiet=quiet_ - Q=@ -endif - -# If the user is running make -s (silent mode), suppress echoing of commands -# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS. -ifeq ($(filter 3.%,$(MAKE_VERSION)),) -short-opts := $(firstword -$(MAKEFLAGS)) -else -short-opts := $(filter-out --%,$(MAKEFLAGS)) -endif - -ifneq ($(findstring s,$(short-opts)),) - quiet=silent_ -endif - -export quiet Q - # Do not use make's built-in rules # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 0aa4005017c72..45f4abef70640 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -136,6 +136,33 @@ else NO_SUBDIR = : endif +# Beautify output +# --------------------------------------------------------------------------- +# +# Most of build commands in Kbuild start with "cmd_". You can optionally define +# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from +# that command is printed by default. +# +# e.g.) +# quiet_cmd_depmod = DEPMOD $(MODLIB) +# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE) +# +# A simple variant is to prefix commands with $(Q) - that's useful +# for commands that shall be hidden in non-verbose mode. +# +# $(Q)$(MAKE) $(build)=scripts/basic +# +# To put more focus on warnings, be less verbose as default +# Use 'make V=1' to see the full commands + +ifeq ($(V),1) + quiet = + Q = +else + quiet = quiet_ + Q = @ +endif + # If the user is running make -s (silent mode), suppress echoing of commands # make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS. ifeq ($(filter 3.%,$(MAKE_VERSION)),) @@ -146,8 +173,11 @@ endif ifneq ($(findstring s,$(short-opts)),) silent=1 + quiet=silent_ endif +export quiet Q + # # Define a callable command for descending to a new directory # -- GitLab From d403120cb9d4787b283ea202b2162f459d18fe9d Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Wed, 12 Feb 2025 14:30:58 -0500 Subject: [PATCH 0815/1585] ACPI: platform_profile: Fix memory leak in profile_class_is_visible() If class_find_device() finds a device, it's reference count is incremented. Call put_device() to drop this reference before returning. Fixes: 77be5cacb2c2 ("ACPI: platform_profile: Create class for ACPI platform profile") Signed-off-by: Kurt Borja Reviewed-by: Mark Pearson Link: https://patch.msgid.link/20250212193058.32110-1-kuurtb@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/platform_profile.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index fc92e43d0fe93..2ad53cc6aae53 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -417,8 +417,14 @@ static int profile_class_registered(struct device *dev, const void *data) static umode_t profile_class_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { - if (!class_find_device(&platform_profile_class, NULL, NULL, profile_class_registered)) + struct device *dev; + + dev = class_find_device(&platform_profile_class, NULL, NULL, profile_class_registered); + if (!dev) return 0; + + put_device(dev); + return attr->mode; } -- GitLab From 643f209ba3fdd4099416aaf9efa8266f7366d6fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 23 Jan 2025 12:22:04 -0800 Subject: [PATCH 0816/1585] drm/xe: Make GUC binaries dump consistent with other binaries in devcoredump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All other(hwsp, hwctx and vmas) binaries follow this format: [name].length: 0x1000 [name].data: xxxxxxx [name].error: errno The error one is just in case by some reason it was not able to capture the binary. So this GuC binaries should follow the same patern. v2: - renamed GUC binary to LOG Cc: John Harrison Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20250123202307.95103-3-jose.souza@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit cb1f868ca13756c0c18ba54d1591332476760d07) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 6 ++++-- drivers/gpu/drm/xe/xe_guc_log.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 50c8076b51585..497036675a38c 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1723,9 +1723,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, drm_printf(p, "\tg2h outstanding: %d\n", snapshot->g2h_outstanding); - if (snapshot->ctb) - xe_print_blob_ascii85(p, "CTB data", '\n', + if (snapshot->ctb) { + drm_printf(p, "[CTB].length: 0x%lx\n", snapshot->ctb_size); + xe_print_blob_ascii85(p, "[CTB].data", '\n', snapshot->ctb, 0, snapshot->ctb_size); + } } else { drm_puts(p, "CT disabled\n"); } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 2baa4d95571fb..2457572ed86ad 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -208,10 +208,11 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ drm_printf(p, "GuC timestamp: 0x%08llX [%llu]\n", snapshot->stamp, snapshot->stamp); drm_printf(p, "Log level: %u\n", snapshot->level); + drm_printf(p, "[LOG].length: 0x%lx\n", snapshot->size); remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain); - const char *prefix = i ? NULL : "Log data"; + const char *prefix = i ? NULL : "[LOG].data"; char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0; xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size); -- GitLab From 42367eca7604e16e170bd6bd94ef61ffdd335f4a Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Thu, 13 Feb 2025 13:06:22 -0800 Subject: [PATCH 0817/1585] tools: Remove redundant quiet setup Q is exported from Makefile.include so it is not necessary to manually set it. Reviewed-by: Jiri Olsa Signed-off-by: Charlie Jenkins Acked-by: Andrii Nakryiko Acked-by: Quentin Monnet Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Benjamin Tissoires Cc: Daniel Borkmann Cc: Daniel Lezcano Cc: Eduard Zingerman Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Kosina Cc: John Fastabend Cc: Josh Poimboeuf Cc: KP Singh Cc: Lukasz Luba Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Mykola Lysenko Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Shuah Khan Cc: Song Liu Cc: Stanislav Fomichev Cc: Steven Rostedt (VMware) Cc: Yonghong Song Cc: Zhang Rui Link: https://lore.kernel.org/r/20250213-quiet_tools-v3-2-07de4482a581@rivosinc.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/tools/Makefile | 6 ------ tools/bpf/Makefile | 6 ------ tools/bpf/bpftool/Documentation/Makefile | 6 ------ tools/bpf/bpftool/Makefile | 6 ------ tools/bpf/resolve_btfids/Makefile | 2 -- tools/bpf/runqslower/Makefile | 5 +---- tools/lib/bpf/Makefile | 13 ------------- tools/lib/perf/Makefile | 13 ------------- tools/lib/thermal/Makefile | 13 ------------- tools/objtool/Makefile | 6 ------ tools/testing/selftests/bpf/Makefile.docs | 6 ------ tools/testing/selftests/hid/Makefile | 2 -- tools/thermal/lib/Makefile | 13 ------------- tools/tracing/latency/Makefile | 6 ------ tools/tracing/rtla/Makefile | 6 ------ tools/verification/rv/Makefile | 6 ------ 16 files changed, 1 insertion(+), 114 deletions(-) diff --git a/tools/arch/arm64/tools/Makefile b/tools/arch/arm64/tools/Makefile index 7b42feedf6471..de4f1b66ef014 100644 --- a/tools/arch/arm64/tools/Makefile +++ b/tools/arch/arm64/tools/Makefile @@ -13,12 +13,6 @@ AWK ?= awk MKDIR ?= mkdir RM ?= rm -ifeq ($(V),1) -Q = -else -Q = @ -endif - arm64_tools_dir = $(top_srcdir)/arch/arm64/tools arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 243b79f2b451e..062bbd6cd048e 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -27,12 +27,6 @@ srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -ifeq ($(V),1) - Q = -else - Q = @ -endif - FEATURE_USER = .bpf FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled FEATURE_DISPLAY = libbfd diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 4315652678b9f..bf843f328812e 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -5,12 +5,6 @@ INSTALL ?= install RM ?= rm -f RMDIR ?= rmdir --ignore-fail-on-non-empty -ifeq ($(V),1) - Q = -else - Q = @ -endif - prefix ?= /usr/local mandir ?= $(prefix)/man man8dir = $(mandir)/man8 diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index dd9f3ec842017..6ea4823b770cb 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -7,12 +7,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -ifeq ($(V),1) - Q = -else - Q = @ -endif - BPF_DIR = $(srctree)/tools/lib/bpf ifneq ($(OUTPUT),) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 4b8079f294f65..afbddea3a39c6 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -5,10 +5,8 @@ include ../../scripts/Makefile.arch srctree := $(abspath $(CURDIR)/../../../) ifeq ($(V),1) - Q = msg = else - Q = @ ifeq ($(silent),1) msg = else diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index c4f1f1735af65..e49203ebd48c1 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -26,10 +26,7 @@ VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ $(wildcard $(VMLINUX_BTF_PATHS)))) -ifeq ($(V),1) -Q = -else -Q = @ +ifneq ($(V),1) MAKEFLAGS += --no-print-directory submake_extras := feature_display=0 endif diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 857a5f7b413d6..168140f8e6461 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -53,13 +53,6 @@ include $(srctree)/tools/scripts/Makefile.include # copy a bit from Linux kbuild -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - INCLUDES = -I$(or $(OUTPUT),.) \ -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include @@ -96,12 +89,6 @@ override CFLAGS += $(CLANG_CROSS_FLAGS) # flags specific for shared library SHLIB_FLAGS := -DSHARED -fPIC -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 3a9b2140aa048..e9a7ac2c062e2 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - TEST_ARGS := $(if $(V),-v) # Set compile option CFLAGS diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile index 8890fd57b110c..a1f5e388644d3 100644 --- a/tools/lib/thermal/Makefile +++ b/tools/lib/thermal/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Set compile option CFLAGS ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index f56e277275341..7a65948892e56 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -46,12 +46,6 @@ HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" AWK = awk MKDIR = mkdir -ifeq ($(V),1) - Q = -else - Q = @ -endif - BUILD_ORC := n ifeq ($(SRCARCH),x86) diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs index eb6a4fea8c794..f7f9e7088bb38 100644 --- a/tools/testing/selftests/bpf/Makefile.docs +++ b/tools/testing/selftests/bpf/Makefile.docs @@ -7,12 +7,6 @@ INSTALL ?= install RM ?= rm -f RMDIR ?= rmdir --ignore-fail-on-non-empty -ifeq ($(V),1) - Q = -else - Q = @ -endif - prefix ?= /usr/local mandir ?= $(prefix)/man man2dir = $(mandir)/man2 diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 0336353bd15f0..2839d2612ce3a 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -43,10 +43,8 @@ TEST_GEN_PROGS = hid_bpf hidraw # $3 - target (assumed to be file); only file name will be emitted; # $4 - optional extra arg, emitted as-is, if provided. ifeq ($(V),1) -Q = msg = else -Q = @ msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; MAKEFLAGS += --no-print-directory submake_extras := feature_display=0 diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile index f2552f73a64c7..056d212f25cf5 100644 --- a/tools/thermal/lib/Makefile +++ b/tools/thermal/lib/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Set compile option CFLAGS ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile index 6518b03e05c71..257a56b1899f2 100644 --- a/tools/tracing/latency/Makefile +++ b/tools/tracing/latency/Makefile @@ -37,12 +37,6 @@ FEATURE_TESTS += libtracefs FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs -ifeq ($(V),1) - Q = -else - Q = @ -endif - all: $(LATENCY-COLLECTOR) include $(srctree)/tools/build/Makefile.include diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index a6a7dee16622d..a1727c414e44b 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -37,12 +37,6 @@ FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs FEATURE_DISPLAY += libcpupower -ifeq ($(V),1) - Q = -else - Q = @ -endif - all: $(RTLA) include $(srctree)/tools/build/Makefile.include diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile index 411d62b3d8eb9..5b898360ba481 100644 --- a/tools/verification/rv/Makefile +++ b/tools/verification/rv/Makefile @@ -35,12 +35,6 @@ FEATURE_TESTS += libtracefs FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs -ifeq ($(V),1) - Q = -else - Q = @ -endif - all: $(RV) include $(srctree)/tools/build/Makefile.include -- GitLab From a3f172359e22b2c11b750d23560481a55bf86af1 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 18 Feb 2025 18:35:35 +1000 Subject: [PATCH 0818/1585] ASoC: tas2764: Fix power control mask Reviewed-by: Neal Gompa Signed-off-by: Hector Martin Signed-off-by: James Calligeros Link: https://patch.msgid.link/20250218-apple-codec-changes-v2-1-932760fd7e07@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2764.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h index 168af772a898f..d13ecae9c9c2f 100644 --- a/sound/soc/codecs/tas2764.h +++ b/sound/soc/codecs/tas2764.h @@ -25,7 +25,7 @@ /* Power Control */ #define TAS2764_PWR_CTRL TAS2764_REG(0X0, 0x02) -#define TAS2764_PWR_CTRL_MASK GENMASK(1, 0) +#define TAS2764_PWR_CTRL_MASK GENMASK(2, 0) #define TAS2764_PWR_CTRL_ACTIVE 0x0 #define TAS2764_PWR_CTRL_MUTE BIT(0) #define TAS2764_PWR_CTRL_SHUTDOWN BIT(1) -- GitLab From f5468beeab1b1adfc63c2717b1f29ef3f49a5fab Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 18 Feb 2025 18:36:02 +1000 Subject: [PATCH 0819/1585] ASoC: tas2764: Set the SDOUT polarity correctly TX launch polarity needs to be the opposite of RX capture polarity, to generate the right bit slot alignment. Reviewed-by: Neal Gompa Signed-off-by: Hector Martin Signed-off-by: James Calligeros Link: https://patch.msgid.link/20250218-apple-codec-changes-v2-28-932760fd7e07@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2764.c | 10 +++++++++- sound/soc/codecs/tas2764.h | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index d482cd194c08c..58315eab492a1 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -365,7 +365,7 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct snd_soc_component *component = dai->component; struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); - u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0; + u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0, asi_cfg_4 = 0; int ret; switch (fmt & SND_SOC_DAIFMT_INV_MASK) { @@ -374,12 +374,14 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) fallthrough; case SND_SOC_DAIFMT_NB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING; + asi_cfg_4 = TAS2764_TDM_CFG4_TX_FALLING; break; case SND_SOC_DAIFMT_IB_IF: asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; fallthrough; case SND_SOC_DAIFMT_IB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING; + asi_cfg_4 = TAS2764_TDM_CFG4_TX_RISING; break; } @@ -389,6 +391,12 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) if (ret < 0) return ret; + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG4, + TAS2764_TDM_CFG4_TX_MASK, + asi_cfg_4); + if (ret < 0) + return ret; + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h index d13ecae9c9c2f..9490f2686e389 100644 --- a/sound/soc/codecs/tas2764.h +++ b/sound/soc/codecs/tas2764.h @@ -79,6 +79,12 @@ #define TAS2764_TDM_CFG3_RXS_SHIFT 0x4 #define TAS2764_TDM_CFG3_MASK GENMASK(3, 0) +/* TDM Configuration Reg4 */ +#define TAS2764_TDM_CFG4 TAS2764_REG(0X0, 0x0d) +#define TAS2764_TDM_CFG4_TX_MASK BIT(0) +#define TAS2764_TDM_CFG4_TX_RISING 0x0 +#define TAS2764_TDM_CFG4_TX_FALLING BIT(0) + /* TDM Configuration Reg5 */ #define TAS2764_TDM_CFG5 TAS2764_REG(0X0, 0x0e) #define TAS2764_TDM_CFG5_VSNS_MASK BIT(6) -- GitLab From 0c28e4d1e10d2aae608094620bb386e6fd73d55e Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Thu, 13 Feb 2025 13:38:49 +0000 Subject: [PATCH 0820/1585] HID: corsair-void: Update power supply values with a unified work handler corsair_void_process_receiver can be called from an interrupt context, locking battery_mutex in it was causing a kernel panic. Fix it by moving the critical section into its own work, sharing this work with battery_add_work and battery_remove_work to remove the need for any locking Closes: https://bugzilla.suse.com/show_bug.cgi?id=1236843 Fixes: 6ea2a6fd3872 ("HID: corsair-void: Add Corsair Void headset family driver") Cc: stable@vger.kernel.org Signed-off-by: Stuart Hayhurst Reviewed-by: Jiri Slaby Signed-off-by: Jiri Kosina --- drivers/hid/hid-corsair-void.c | 83 ++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/drivers/hid/hid-corsair-void.c b/drivers/hid/hid-corsair-void.c index 56e858066c3c3..afbd67aa97192 100644 --- a/drivers/hid/hid-corsair-void.c +++ b/drivers/hid/hid-corsair-void.c @@ -71,11 +71,9 @@ #include #include -#include #include #include #include -#include #include #include #include @@ -120,6 +118,12 @@ enum { CORSAIR_VOID_BATTERY_CHARGING = 5, }; +enum { + CORSAIR_VOID_ADD_BATTERY = 0, + CORSAIR_VOID_REMOVE_BATTERY = 1, + CORSAIR_VOID_UPDATE_BATTERY = 2, +}; + static enum power_supply_property corsair_void_battery_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_PRESENT, @@ -155,12 +159,12 @@ struct corsair_void_drvdata { struct power_supply *battery; struct power_supply_desc battery_desc; - struct mutex battery_mutex; struct delayed_work delayed_status_work; struct delayed_work delayed_firmware_work; - struct work_struct battery_remove_work; - struct work_struct battery_add_work; + + unsigned long battery_work_flags; + struct work_struct battery_work; }; /* @@ -260,11 +264,9 @@ static void corsair_void_process_receiver(struct corsair_void_drvdata *drvdata, /* Inform power supply if battery values changed */ if (memcmp(&orig_battery_data, battery_data, sizeof(*battery_data))) { - scoped_guard(mutex, &drvdata->battery_mutex) { - if (drvdata->battery) { - power_supply_changed(drvdata->battery); - } - } + set_bit(CORSAIR_VOID_UPDATE_BATTERY, + &drvdata->battery_work_flags); + schedule_work(&drvdata->battery_work); } } @@ -536,29 +538,11 @@ static void corsair_void_firmware_work_handler(struct work_struct *work) } -static void corsair_void_battery_remove_work_handler(struct work_struct *work) -{ - struct corsair_void_drvdata *drvdata; - - drvdata = container_of(work, struct corsair_void_drvdata, - battery_remove_work); - scoped_guard(mutex, &drvdata->battery_mutex) { - if (drvdata->battery) { - power_supply_unregister(drvdata->battery); - drvdata->battery = NULL; - } - } -} - -static void corsair_void_battery_add_work_handler(struct work_struct *work) +static void corsair_void_add_battery(struct corsair_void_drvdata *drvdata) { - struct corsair_void_drvdata *drvdata; struct power_supply_config psy_cfg = {}; struct power_supply *new_supply; - drvdata = container_of(work, struct corsair_void_drvdata, - battery_add_work); - guard(mutex)(&drvdata->battery_mutex); if (drvdata->battery) return; @@ -583,16 +567,42 @@ static void corsair_void_battery_add_work_handler(struct work_struct *work) drvdata->battery = new_supply; } +static void corsair_void_battery_work_handler(struct work_struct *work) +{ + struct corsair_void_drvdata *drvdata = container_of(work, + struct corsair_void_drvdata, battery_work); + + bool add_battery = test_and_clear_bit(CORSAIR_VOID_ADD_BATTERY, + &drvdata->battery_work_flags); + bool remove_battery = test_and_clear_bit(CORSAIR_VOID_REMOVE_BATTERY, + &drvdata->battery_work_flags); + bool update_battery = test_and_clear_bit(CORSAIR_VOID_UPDATE_BATTERY, + &drvdata->battery_work_flags); + + if (add_battery && !remove_battery) { + corsair_void_add_battery(drvdata); + } else if (remove_battery && !add_battery && drvdata->battery) { + power_supply_unregister(drvdata->battery); + drvdata->battery = NULL; + } + + if (update_battery && drvdata->battery) + power_supply_changed(drvdata->battery); + +} + static void corsair_void_headset_connected(struct corsair_void_drvdata *drvdata) { - schedule_work(&drvdata->battery_add_work); + set_bit(CORSAIR_VOID_ADD_BATTERY, &drvdata->battery_work_flags); + schedule_work(&drvdata->battery_work); schedule_delayed_work(&drvdata->delayed_firmware_work, msecs_to_jiffies(100)); } static void corsair_void_headset_disconnected(struct corsair_void_drvdata *drvdata) { - schedule_work(&drvdata->battery_remove_work); + set_bit(CORSAIR_VOID_REMOVE_BATTERY, &drvdata->battery_work_flags); + schedule_work(&drvdata->battery_work); corsair_void_set_unknown_wireless_data(drvdata); corsair_void_set_unknown_batt(drvdata); @@ -678,13 +688,7 @@ static int corsair_void_probe(struct hid_device *hid_dev, drvdata->battery_desc.get_property = corsair_void_battery_get_property; drvdata->battery = NULL; - INIT_WORK(&drvdata->battery_remove_work, - corsair_void_battery_remove_work_handler); - INIT_WORK(&drvdata->battery_add_work, - corsair_void_battery_add_work_handler); - ret = devm_mutex_init(drvdata->dev, &drvdata->battery_mutex); - if (ret) - return ret; + INIT_WORK(&drvdata->battery_work, corsair_void_battery_work_handler); ret = sysfs_create_group(&hid_dev->dev.kobj, &corsair_void_attr_group); if (ret) @@ -721,8 +725,7 @@ static void corsair_void_remove(struct hid_device *hid_dev) struct corsair_void_drvdata *drvdata = hid_get_drvdata(hid_dev); hid_hw_stop(hid_dev); - cancel_work_sync(&drvdata->battery_remove_work); - cancel_work_sync(&drvdata->battery_add_work); + cancel_work_sync(&drvdata->battery_work); if (drvdata->battery) power_supply_unregister(drvdata->battery); -- GitLab From 213e24250feed3bcf58d7594298df2d7e78a88ab Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 28 Jan 2025 07:42:42 -0800 Subject: [PATCH 0821/1585] drm/xe/guc: Fix size_t print format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use %zx format to print size_t to remove the following warning when building for i386: >> drivers/gpu/drm/xe/xe_guc_ct.c:1727:43: warning: format specifies type 'unsigned long' but the argument has type 'size_t' (aka 'unsigned int') [-Wformat] 1727 | drm_printf(p, "[CTB].length: 0x%lx\n", snapshot->ctb_size); | ~~~ ^~~~~~~~~~~~~~~~~~ | %zx Cc: José Roberto de Souza Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501281627.H6nj184e-lkp@intel.com/ Fixes: 643f209ba3fd ("drm/xe: Make GUC binaries dump consistent with other binaries in devcoredump") Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20250128154242.3371687-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 7748289df510638ba61fed86b59ce7d2fb4a194c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- drivers/gpu/drm/xe/xe_guc_log.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 497036675a38c..72ad576fc18eb 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1724,7 +1724,7 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, snapshot->g2h_outstanding); if (snapshot->ctb) { - drm_printf(p, "[CTB].length: 0x%lx\n", snapshot->ctb_size); + drm_printf(p, "[CTB].length: 0x%zx\n", snapshot->ctb_size); xe_print_blob_ascii85(p, "[CTB].data", '\n', snapshot->ctb, 0, snapshot->ctb_size); } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 2457572ed86ad..0ca3056d8bd3f 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -208,7 +208,7 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ drm_printf(p, "GuC timestamp: 0x%08llX [%llu]\n", snapshot->stamp, snapshot->stamp); drm_printf(p, "Log level: %u\n", snapshot->level); - drm_printf(p, "[LOG].length: 0x%lx\n", snapshot->size); + drm_printf(p, "[LOG].length: 0x%zx\n", snapshot->size); remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain); -- GitLab From 1fc61eeefe10d9996d2b875214d89f0909d03417 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 18 Feb 2025 16:47:40 -0700 Subject: [PATCH 0822/1585] io_uring: fix spelling error in uapi io_uring.h This is obviously not that important, but when changes are synced back from the kernel to liburing, the codespell CI ends up erroring because of this misspelling. Let's just correct it and avoid this biting us again on an import. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index e11c826385277..050fa8eb2e8f8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -380,7 +380,7 @@ enum io_uring_op { * result will be the number of buffers send, with * the starting buffer ID in cqe->flags as per * usual for provided buffer usage. The buffers - * will be contigious from the starting buffer ID. + * will be contiguous from the starting buffer ID. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) -- GitLab From f5da7c45188eea71394bf445655cae2df88a7788 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 17 Feb 2025 15:29:05 -0800 Subject: [PATCH 0823/1585] tcp: adjust rcvq_space after updating scaling ratio Since commit under Fixes we set the window clamp in accordance to newly measured rcvbuf scaling_ratio. If the scaling_ratio decreased significantly we may put ourselves in a situation where windows become smaller than rcvq_space, preventing tcp_rcv_space_adjust() from increasing rcvbuf. The significant decrease of scaling_ratio is far more likely since commit 697a6c8cec03 ("tcp: increase the default TCP scaling ratio"), which increased the "default" scaling ratio from ~30% to 50%. Hitting the bad condition depends a lot on TCP tuning, and drivers at play. One of Meta's workloads hits it reliably under following conditions: - default rcvbuf of 125k - sender MTU 1500, receiver MTU 5000 - driver settles on scaling_ratio of 78 for the config above. Initial rcvq_space gets calculated as TCP_INIT_CWND * tp->advmss (10 * 5k = 50k). Once we find out the true scaling ratio and MSS we clamp the windows to 38k. Triggering the condition also depends on the message sequence of this workload. I can't repro the problem with simple iperf or TCP_RR-style tests. Fixes: a2cbb1603943 ("tcp: Update window clamping condition") Reviewed-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20250217232905.3162187-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb82e01da9110..98b8cc7403920 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -243,9 +243,15 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; - if (old_ratio != tcp_sk(sk)->scaling_ratio) - WRITE_ONCE(tcp_sk(sk)->window_clamp, - tcp_win_from_space(sk, sk->sk_rcvbuf)); + if (old_ratio != tcp_sk(sk)->scaling_ratio) { + struct tcp_sock *tp = tcp_sk(sk); + + val = tcp_win_from_space(sk, sk->sk_rcvbuf); + tcp_set_window_clamp(sk, val); + + if (tp->window_clamp < tp->rcvq_space.space) + tp->rcvq_space.space = tp->window_clamp; + } } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); -- GitLab From 5644c6b50ffee0a56c1e01430a8c88e34decb120 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Sun, 9 Feb 2025 23:22:35 -0800 Subject: [PATCH 0824/1585] bpf: skip non exist keys in generic_map_lookup_batch The generic_map_lookup_batch currently returns EINTR if it fails with ENOENT and retries several times on bpf_map_copy_value. The next batch would start from the same location, presuming it's a transient issue. This is incorrect if a map can actually have "holes", i.e. "get_next_key" can return a key that does not point to a valid value. At least the array of maps type may contain such holes legitly. Right now these holes show up, generic batch lookup cannot proceed any more. It will always fail with EINTR errors. Rather, do not retry in generic_map_lookup_batch. If it finds a non existing element, skip to the next key. This simple solution comes with a price that transient errors may not be recovered, and the iteration might cycle back to the first key under parallel deletion. For example, Hou Tao pointed out a following scenario: For LPM trie map: (1) ->map_get_next_key(map, prev_key, key) returns a valid key (2) bpf_map_copy_value() return -ENOMENT It means the key must be deleted concurrently. (3) goto next_key It swaps the prev_key and key (4) ->map_get_next_key(map, prev_key, key) again prev_key points to a non-existing key, for LPM trie it will treat just like prev_key=NULL case, the returned key will be duplicated. With the retry logic, the iteration can continue to the key next to the deleted one. But if we directly skip to the next key, the iteration loop would restart from the first key for the lpm_trie type. However, not all races may be recovered. For example, if current key is deleted after instead of before bpf_map_copy_value, or if the prev_key also gets deleted, then the loop will still restart from the first key for lpm_tire anyway. For generic lookup it might be better to stay simple, i.e. just skip to the next key. To guarantee that the output keys are not duplicated, it is better to implement map type specific batch operations, which can properly lock the trie and synchronize with concurrent mutators. Fixes: cb4d03ab499d ("bpf: Add generic support for lookup batch op") Closes: https://lore.kernel.org/bpf/Z6JXtA1M5jAZx8xD@debian.debian/ Signed-off-by: Yan Zhai Acked-by: Hou Tao Link: https://lore.kernel.org/r/85618439eea75930630685c467ccefeac0942e2b.1739171594.git.yan@cloudflare.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 14d6e99459d32..548ec1c46b787 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1977,8 +1977,6 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, return err; } -#define MAP_LOOKUP_RETRIES 3 - int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr) @@ -1988,8 +1986,8 @@ int generic_map_lookup_batch(struct bpf_map *map, void __user *values = u64_to_user_ptr(attr->batch.values); void __user *keys = u64_to_user_ptr(attr->batch.keys); void *buf, *buf_prevkey, *prev_key, *key, *value; - int err, retry = MAP_LOOKUP_RETRIES; u32 value_size, cp, max_count; + int err; if (attr->batch.elem_flags & ~BPF_F_LOCK) return -EINVAL; @@ -2035,14 +2033,8 @@ int generic_map_lookup_batch(struct bpf_map *map, err = bpf_map_copy_value(map, key, value, attr->batch.elem_flags); - if (err == -ENOENT) { - if (retry) { - retry--; - continue; - } - err = -EINTR; - break; - } + if (err == -ENOENT) + goto next_key; if (err) goto free_buf; @@ -2057,12 +2049,12 @@ int generic_map_lookup_batch(struct bpf_map *map, goto free_buf; } + cp++; +next_key: if (!prev_key) prev_key = buf_prevkey; swap(prev_key, key); - retry = MAP_LOOKUP_RETRIES; - cp++; cond_resched(); } -- GitLab From d66b7739176d513b81db8b18e8677e30f1b67574 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Sun, 9 Feb 2025 23:22:39 -0800 Subject: [PATCH 0825/1585] selftests: bpf: test batch lookup on array of maps with holes Iterating through array of maps may encounter non existing keys. The batch operation should not fail on when this happens. Signed-off-by: Yan Zhai Acked-by: Hou Tao Link: https://lore.kernel.org/r/9007237b9606dc2ee44465a4447fe46e13f3bea6.1739171594.git.yan@cloudflare.com Signed-off-by: Alexei Starovoitov --- .../bpf/map_tests/map_in_map_batch_ops.c | 62 +++++++++++++------ 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c index 66191ae9863c1..79c3ccadb9622 100644 --- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -120,11 +120,12 @@ static void validate_fetch_results(int outer_map_fd, static void fetch_and_validate(int outer_map_fd, struct bpf_map_batch_opts *opts, - __u32 batch_size, bool delete_entries) + __u32 batch_size, bool delete_entries, + bool has_holes) { - __u32 *fetched_keys, *fetched_values, total_fetched = 0; - __u32 batch_key = 0, fetch_count, step_size; - int err, max_entries = OUTER_MAP_ENTRIES; + int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes; + __u32 *fetched_keys, *fetched_values, total_fetched = 0, i; + __u32 batch_key = 0, fetch_count, step_size = batch_size; __u32 value_size = sizeof(__u32); /* Total entries needs to be fetched */ @@ -134,9 +135,8 @@ static void fetch_and_validate(int outer_map_fd, "Memory allocation failed for fetched_keys or fetched_values", "error=%s\n", strerror(errno)); - for (step_size = batch_size; - step_size <= max_entries; - step_size += batch_size) { + /* hash map may not always return full batch */ + for (i = 0; i < OUTER_MAP_ENTRIES; i++) { fetch_count = step_size; err = delete_entries ? bpf_map_lookup_and_delete_batch(outer_map_fd, @@ -155,6 +155,7 @@ static void fetch_and_validate(int outer_map_fd, if (err && errno == ENOSPC) { /* Fetch again with higher batch size */ total_fetched = 0; + step_size += batch_size; continue; } @@ -184,18 +185,19 @@ static void fetch_and_validate(int outer_map_fd, } static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, - enum bpf_map_type inner_map_type) + enum bpf_map_type inner_map_type, + bool has_holes) { + __u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes; __u32 *outer_map_keys, *inner_map_fds; - __u32 max_entries = OUTER_MAP_ENTRIES; LIBBPF_OPTS(bpf_map_batch_opts, opts); __u32 value_size = sizeof(__u32); int batch_size[2] = {5, 10}; __u32 map_index, op_index; int outer_map_fd, ret; - outer_map_keys = calloc(max_entries, value_size); - inner_map_fds = calloc(max_entries, value_size); + outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size); + inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size); CHECK((!outer_map_keys || !inner_map_fds), "Memory allocation failed for outer_map_keys or inner_map_fds", "error=%s\n", strerror(errno)); @@ -209,6 +211,24 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, ((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) ? 9 : 1000) - map_index; + /* This condition is only meaningful for array of maps. + * + * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say + * max_entries is short for n, then outer_map_keys looks like: + * + * [n, n-1, ... 2, 1] + * + * We change it to + * + * [n, n-1, ... 2, 0] + * + * So it will leave key 1 as a hole. It will serve to test the + * correctness when batch on an array: a "non-exist" key might be + * actually allocated and returned from key iteration. + */ + if (has_holes) + outer_map_keys[max_entries - 1]--; + /* batch operation - map_update */ ret = bpf_map_update_batch(outer_map_fd, outer_map_keys, inner_map_fds, &max_entries, &opts); @@ -219,15 +239,17 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, /* batch operation - map_lookup */ for (op_index = 0; op_index < 2; ++op_index) fetch_and_validate(outer_map_fd, &opts, - batch_size[op_index], false); + batch_size[op_index], false, + has_holes); /* batch operation - map_lookup_delete */ if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS) fetch_and_validate(outer_map_fd, &opts, - max_entries, true /*delete*/); + max_entries, true /*delete*/, + has_holes); /* close all map fds */ - for (map_index = 0; map_index < max_entries; map_index++) + for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) close(inner_map_fds[map_index]); close(outer_map_fd); @@ -237,16 +259,20 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, void test_map_in_map_batch_ops_array(void) { - _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false); printf("%s:PASS with inner ARRAY map\n", __func__); - _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false); printf("%s:PASS with inner HASH map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true); + printf("%s:PASS with inner ARRAY map with holes\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true); + printf("%s:PASS with inner HASH map with holes\n", __func__); } void test_map_in_map_batch_ops_hash(void) { - _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY); + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false); printf("%s:PASS with inner ARRAY map\n", __func__); - _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH); + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false); printf("%s:PASS with inner HASH map\n", __func__); } -- GitLab From 4fa382be430421e1445f9c95c4dc9b7e0949ae8a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Feb 2025 14:43:44 -0800 Subject: [PATCH 0826/1585] scsi: ufs: core: Fix ufshcd_is_ufs_dev_busy() and ufshcd_eh_timed_out() ufshcd_is_ufs_dev_busy(), ufshcd_print_host_state() and ufshcd_eh_timed_out() are used in both modes (legacy mode and MCQ mode). hba->outstanding_reqs only represents the outstanding requests in legacy mode. Hence, change hba->outstanding_reqs into scsi_host_busy(hba->host) in these functions. Fixes: eacb139b77ff ("scsi: ufs: core: mcq: Enable multi-circular queue") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250214224352.3025151-1-bvanassche@acm.org Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 1893a7ad95316..f9303e66bb798 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -266,7 +266,7 @@ static bool ufshcd_has_pending_tasks(struct ufs_hba *hba) static bool ufshcd_is_ufs_dev_busy(struct ufs_hba *hba) { - return hba->outstanding_reqs || ufshcd_has_pending_tasks(hba); + return scsi_host_busy(hba->host) || ufshcd_has_pending_tasks(hba); } static const struct ufs_dev_quirk ufs_fixups[] = { @@ -628,8 +628,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba) const struct scsi_device *sdev_ufs = hba->ufs_device_wlun; dev_err(hba->dev, "UFS Host state=%d\n", hba->ufshcd_state); - dev_err(hba->dev, "outstanding reqs=0x%lx tasks=0x%lx\n", - hba->outstanding_reqs, hba->outstanding_tasks); + dev_err(hba->dev, "%d outstanding reqs, tasks=0x%lx\n", + scsi_host_busy(hba->host), hba->outstanding_tasks); dev_err(hba->dev, "saved_err=0x%x, saved_uic_err=0x%x\n", hba->saved_err, hba->saved_uic_err); dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n", @@ -8882,7 +8882,7 @@ static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) dev_info(hba->dev, "%s() finished; outstanding_tasks = %#lx.\n", __func__, hba->outstanding_tasks); - return hba->outstanding_reqs ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE; + return scsi_host_busy(hba->host) ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE; } static const struct attribute_group *ufshcd_driver_groups[] = { -- GitLab From 415cadd505464d9a11ff5e0f6e0329c127849da5 Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Fri, 14 Feb 2025 14:43:59 -0800 Subject: [PATCH 0827/1585] gve: set xdp redirect target only when it is available Before this patch the NETDEV_XDP_ACT_NDO_XMIT XDP feature flag is set by default as part of driver initialization, and is never cleared. However, this flag differs from others in that it is used as an indicator for whether the driver is ready to perform the ndo_xdp_xmit operation as part of an XDP_REDIRECT. Kernel helpers xdp_features_(set|clear)_redirect_target exist to convey this meaning. This patch ensures that the netdev is only reported as a redirect target when XDP queues exist to forward traffic. Fixes: 39a7f4aa3e4a ("gve: Add XDP REDIRECT support for GQI-QPL format") Cc: stable@vger.kernel.org Reviewed-by: Praveen Kaligineedi Reviewed-by: Jeroen de Borst Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20250214224417.1237818-1-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 10 ++++++++++ drivers/net/ethernet/google/gve/gve_main.c | 6 +++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 8167cc5fb0df1..78d2a19593d18 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1116,6 +1116,16 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv) return gve_xdp_tx_queue_id(priv, 0); } +static inline bool gve_supports_xdp_xmit(struct gve_priv *priv) +{ + switch (priv->queue_format) { + case GVE_GQI_QPL_FORMAT: + return true; + default: + return false; + } +} + /* gqi napi handler defined in gve_main.c */ int gve_napi_poll(struct napi_struct *napi, int budget); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 533e659b15b31..92237fb0b60c1 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1903,6 +1903,8 @@ static void gve_turndown(struct gve_priv *priv) /* Stop tx queues */ netif_tx_disable(priv->dev); + xdp_features_clear_redirect_target(priv->dev); + gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); @@ -1972,6 +1974,9 @@ static void gve_turnup(struct gve_priv *priv) napi_schedule(&block->napi); } + if (priv->num_xdp_queues && gve_supports_xdp_xmit(priv)) + xdp_features_set_redirect_target(priv->dev, false); + gve_set_napi_enabled(priv); } @@ -2246,7 +2251,6 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv) if (priv->queue_format == GVE_GQI_QPL_FORMAT) { xdp_features = NETDEV_XDP_ACT_BASIC; xdp_features |= NETDEV_XDP_ACT_REDIRECT; - xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { xdp_features = 0; -- GitLab From 2f56be7f52ece7fc8c16a58ca9683f0a73e288e1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 15 Feb 2025 08:26:46 -0800 Subject: [PATCH 0828/1585] MAINTAINERS: trim the GVE entry We requested in the past that GVE patches coming out of Google should be submitted only by GVE maintainers. There were too many patches posted which didn't follow the subsystem guidance. Recently Joshua was added to maintainers, but even tho he was asked to follow the netdev "FAQ" in the past [1] he does not follow the local customs. It is not reasonable for a person who hasn't read the maintainer entry for the subsystem to be a driver maintainer. We can re-add once Joshua does some on-list reviews to prove the fluency with the upstream process. Link: https://lore.kernel.org/20240610172720.073d5912@kernel.org # [1] Link: https://patch.msgid.link/20250215162646.2446559-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1405ebe703a8f..0bfcbe6a74ea7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9834,7 +9834,6 @@ F: drivers/input/touchscreen/goodix* GOOGLE ETHERNET DRIVERS M: Jeroen de Borst -M: Joshua Washington M: Harshitha Ramamurthy L: netdev@vger.kernel.org S: Maintained -- GitLab From f6093c5ec74d5cc495f89bd359253d9c738d04d9 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 17 Feb 2025 14:48:11 +0100 Subject: [PATCH 0829/1585] net: pse-pd: pd692x0: Fix power limit retrieval Fix incorrect data offset read in the pd692x0_pi_get_pw_limit callback. The issue was previously unnoticed as it was only used by the regulator API and not thoroughly tested, since the PSE is mainly controlled via ethtool. The function became actively used by ethtool after commit 3e9dbfec4998 ("net: pse-pd: Split ethtool_get_status into multiple callbacks"), which led to the discovery of this issue. Fix it by using the correct data offset. Fixes: a87e699c9d33 ("net: pse-pd: pd692x0: Enhance with new current limit and voltage read callbacks") Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20250217134812.1925345-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pd692x0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index fc9e23927b3b7..7d60a714ca536 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -1047,7 +1047,7 @@ static int pd692x0_pi_get_pw_limit(struct pse_controller_dev *pcdev, if (ret < 0) return ret; - return pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]); + return pd692x0_pi_get_pw_from_table(buf.data[0], buf.data[1]); } static int pd692x0_pi_set_pw_limit(struct pse_controller_dev *pcdev, -- GitLab From e57a6320215c3967f51ab0edeff87db2095440e4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 17 Feb 2025 11:11:27 -0800 Subject: [PATCH 0830/1585] net: Add net_passive_inc() and net_passive_dec(). net_drop_ns() is NULL when CONFIG_NET_NS is disabled. The next patch introduces a function that increments and decrements net->passive. As a prep, let's rename and export net_free() to net_passive_dec() and add net_passive_inc(). Suggested-by: Eric Dumazet Link: https://lore.kernel.org/netdev/CANn89i+oUCt2VGvrbrweniTendZFEh+nwS=uonc004-aPkWy-Q@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250217191129.19967-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/net_namespace.h | 11 +++++++++++ net/core/net_namespace.c | 8 ++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 7ba1402ca7796..f467a66abc6b1 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -297,6 +297,7 @@ static inline int check_net(const struct net *net) } void net_drop_ns(void *); +void net_passive_dec(struct net *net); #else @@ -326,8 +327,18 @@ static inline int check_net(const struct net *net) } #define net_drop_ns NULL + +static inline void net_passive_dec(struct net *net) +{ + refcount_dec(&net->passive); +} #endif +static inline void net_passive_inc(struct net *net) +{ + refcount_inc(&net->passive); +} + /* Returns true if the netns initialization is completed successfully */ static inline bool net_initialized(const struct net *net) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cb39a12b2f829..4303f2a492624 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -464,7 +464,7 @@ static void net_complete_free(void) } -static void net_free(struct net *net) +void net_passive_dec(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); @@ -482,7 +482,7 @@ void net_drop_ns(void *p) struct net *net = (struct net *)p; if (net) - net_free(net); + net_passive_dec(net); } struct net *copy_net_ns(unsigned long flags, @@ -523,7 +523,7 @@ struct net *copy_net_ns(unsigned long flags, key_remove_domain(net->key_domain); #endif put_user_ns(user_ns); - net_free(net); + net_passive_dec(net); dec_ucounts: dec_net_namespaces(ucounts); return ERR_PTR(rv); @@ -672,7 +672,7 @@ static void cleanup_net(struct work_struct *work) key_remove_domain(net->key_domain); #endif put_user_ns(net->user_ns); - net_free(net); + net_passive_dec(net); } cleanup_net_task = NULL; } -- GitLab From 65161fb544aada499c912b6010a8f7d8e04f6130 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 17 Feb 2025 11:11:28 -0800 Subject: [PATCH 0831/1585] net: Fix dev_net(dev) race in unregister_netdevice_notifier_dev_net(). After the cited commit, dev_net(dev) is fetched before holding RTNL and passed to __unregister_netdevice_notifier_net(). However, dev_net(dev) might be different after holding RTNL. In the reported case [0], while removing a VF device, its netns was being dismantled and the VF was moved to init_net. So the following sequence is basically illegal when dev was fetched without lookup: net = dev_net(dev); rtnl_net_lock(net); Let's use a new helper rtnl_net_dev_lock() to fix the race. It fetches dev_net_rcu(dev), bumps its net->passive, and checks if dev_net_rcu(dev) is changed after rtnl_net_lock(). [0]: BUG: KASAN: slab-use-after-free in notifier_call_chain (kernel/notifier.c:75 (discriminator 2)) Read of size 8 at addr ffff88810cefb4c8 by task test-bridge-lag/21127 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:123) print_report (mm/kasan/report.c:379 mm/kasan/report.c:489) kasan_report (mm/kasan/report.c:604) notifier_call_chain (kernel/notifier.c:75 (discriminator 2)) call_netdevice_notifiers_info (net/core/dev.c:2011) unregister_netdevice_many_notify (net/core/dev.c:11551) unregister_netdevice_queue (net/core/dev.c:11487) unregister_netdev (net/core/dev.c:11635) mlx5e_remove (drivers/net/ethernet/mellanox/mlx5/core/en_main.c:6552 drivers/net/ethernet/mellanox/mlx5/core/en_main.c:6579) mlx5_core auxiliary_bus_remove (drivers/base/auxiliary.c:230) device_release_driver_internal (drivers/base/dd.c:1275 drivers/base/dd.c:1296) bus_remove_device (./include/linux/kobject.h:193 drivers/base/base.h:73 drivers/base/bus.c:583) device_del (drivers/base/power/power.h:142 drivers/base/core.c:3855) mlx5_rescan_drivers_locked (./include/linux/auxiliary_bus.h:241 drivers/net/ethernet/mellanox/mlx5/core/dev.c:333 drivers/net/ethernet/mellanox/mlx5/core/dev.c:535 drivers/net/ethernet/mellanox/mlx5/core/dev.c:549) mlx5_core mlx5_unregister_device (drivers/net/ethernet/mellanox/mlx5/core/dev.c:468) mlx5_core mlx5_uninit_one (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 drivers/net/ethernet/mellanox/mlx5/core/main.c:1563) mlx5_core remove_one (drivers/net/ethernet/mellanox/mlx5/core/main.c:965 drivers/net/ethernet/mellanox/mlx5/core/main.c:2019) mlx5_core pci_device_remove (./include/linux/pm_runtime.h:129 drivers/pci/pci-driver.c:475) device_release_driver_internal (drivers/base/dd.c:1275 drivers/base/dd.c:1296) unbind_store (drivers/base/bus.c:245) kernfs_fop_write_iter (fs/kernfs/file.c:338) vfs_write (fs/read_write.c:587 (discriminator 1) fs/read_write.c:679 (discriminator 1)) ksys_write (fs/read_write.c:732) do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) RIP: 0033:0x7f6a4d5018b7 Fixes: 7fb1073300a2 ("net: Hold rtnl_net_lock() in (un)?register_netdevice_notifier_dev_net().") Reported-by: Yael Chemla Closes: https://lore.kernel.org/netdev/146eabfe-123c-4970-901e-e961b4c09bc3@nvidia.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250217191129.19967-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index b91658e8aedb4..19e268568282a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2070,6 +2070,42 @@ static void __move_netdevice_notifier_net(struct net *src_net, __register_netdevice_notifier_net(dst_net, nb, true); } +static void rtnl_net_dev_lock(struct net_device *dev) +{ + bool again; + + do { + struct net *net; + + again = false; + + /* netns might be being dismantled. */ + rcu_read_lock(); + net = dev_net_rcu(dev); + net_passive_inc(net); + rcu_read_unlock(); + + rtnl_net_lock(net); + +#ifdef CONFIG_NET_NS + /* dev might have been moved to another netns. */ + if (!net_eq(net, rcu_access_pointer(dev->nd_net.net))) { + rtnl_net_unlock(net); + net_passive_dec(net); + again = true; + } +#endif + } while (again); +} + +static void rtnl_net_dev_unlock(struct net_device *dev) +{ + struct net *net = dev_net(dev); + + rtnl_net_unlock(net); + net_passive_dec(net); +} + int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) @@ -2077,6 +2113,11 @@ int register_netdevice_notifier_dev_net(struct net_device *dev, struct net *net = dev_net(dev); int err; + /* rtnl_net_lock() assumes dev is not yet published by + * register_netdevice(). + */ + DEBUG_NET_WARN_ON_ONCE(!list_empty(&dev->dev_list)); + rtnl_net_lock(net); err = __register_netdevice_notifier_net(net, nb, false); if (!err) { @@ -2093,13 +2134,12 @@ int unregister_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) { - struct net *net = dev_net(dev); int err; - rtnl_net_lock(net); + rtnl_net_dev_lock(dev); list_del(&nn->list); - err = __unregister_netdevice_notifier_net(net, nb); - rtnl_net_unlock(net); + err = __unregister_netdevice_notifier_net(dev_net(dev), nb); + rtnl_net_dev_unlock(dev); return err; } -- GitLab From d4c6bfc83936cb61fac99e9891c406fbdd40f964 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 17 Feb 2025 11:11:29 -0800 Subject: [PATCH 0832/1585] dev: Use rtnl_net_dev_lock() in unregister_netdev(). The following sequence is basically illegal when dev was fetched without lookup because dev_net(dev) might be different after holding rtnl_net_lock(): net = dev_net(dev); rtnl_net_lock(net); Let's use rtnl_net_dev_lock() in unregister_netdev(). Note that there is no real bug in unregister_netdev() for now because RTNL protects the scope even if dev_net(dev) is changed before/after RTNL. Fixes: 00fb9823939e ("dev: Hold per-netns RTNL in (un)?register_netdev().") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250217191129.19967-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 19e268568282a..fafd2f4b5d5d7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11920,11 +11920,9 @@ EXPORT_SYMBOL(unregister_netdevice_many); */ void unregister_netdev(struct net_device *dev) { - struct net *net = dev_net(dev); - - rtnl_net_lock(net); + rtnl_net_dev_lock(dev); unregister_netdevice(dev); - rtnl_net_unlock(net); + rtnl_net_dev_unlock(dev); } EXPORT_SYMBOL(unregister_netdev); -- GitLab From eff2eb592efd73f00590d578c3d6021f604df62c Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Wed, 19 Feb 2025 17:48:07 +1100 Subject: [PATCH 0833/1585] cxl: Fix cross-reference in documentation and add deprecation warning commit 5731d41af924 ("cxl: Deprecate driver") labelled the cxl driver as deprecated and moved the ABI documentation to the obsolete/ subdirectory, but didn't update cxl.rst, causing a warning once ff7ff6eb4f809 ("docs: media: Allow creating cross-references for RC ABI") was merged. Fix the cross-reference, and also add a deprecation warning. Fixes: 5731d41af924 ("cxl: Deprecate driver") Reported-by: Bagas Sanjaya Signed-off-by: Andrew Donnellan Acked-by: Bagas Sanjaya Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250219064807.175107-1-ajd@linux.ibm.com --- Documentation/arch/powerpc/cxl.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/arch/powerpc/cxl.rst b/Documentation/arch/powerpc/cxl.rst index d2d77057610e4..778adda740d24 100644 --- a/Documentation/arch/powerpc/cxl.rst +++ b/Documentation/arch/powerpc/cxl.rst @@ -18,6 +18,7 @@ Introduction both access system memory directly and with the same effective addresses. + **This driver is deprecated and will be removed in a future release.** Hardware overview ================= @@ -453,7 +454,7 @@ Sysfs Class A cxl sysfs class is added under /sys/class/cxl to facilitate enumeration and tuning of the accelerators. Its layout is - described in Documentation/ABI/testing/sysfs-class-cxl + described in Documentation/ABI/obsolete/sysfs-class-cxl Udev rules -- GitLab From 8821f36333e27c8355d4a730649923f938e1e4b9 Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Mon, 27 Jan 2025 16:27:52 +0100 Subject: [PATCH 0834/1585] cgroup/dmem: Don't open-code css_for_each_descendant_pre The current implementation has a bug: If the current css doesn't contain any pool that is a descendant of the "pool" (i.e. when found_descendant == false), then "pool" will point to some unrelated pool. If the current css has a child, we'll overwrite parent_pool with this unrelated pool on the next iteration. Since we can just check whether a pool refers to the same region to determine whether or not it's related, all the additional pool tracking is unnecessary, so just switch to using css_for_each_descendant_pre for traversal. Fixes: b168ed458dde ("kernel/cgroup: Add "dmem" memory accounting cgroup") Signed-off-by: Friedrich Vock Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20250127152754.21325-1-friedrich.vock@gmx.de Signed-off-by: Maarten Lankhorst --- kernel/cgroup/dmem.c | 50 ++++++++++---------------------------------- 1 file changed, 11 insertions(+), 39 deletions(-) diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index fbe34299673d3..10b63433f0573 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -220,60 +220,32 @@ dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state *limit_pool, struct dmem_cgroup_pool_state *test_pool) { struct page_counter *climit; - struct cgroup_subsys_state *css, *next_css; + struct cgroup_subsys_state *css; struct dmemcg_state *dmemcg_iter; - struct dmem_cgroup_pool_state *pool, *parent_pool; - bool found_descendant; + struct dmem_cgroup_pool_state *pool, *found_pool; climit = &limit_pool->cnt; rcu_read_lock(); - parent_pool = pool = limit_pool; - css = &limit_pool->cs->css; - /* - * This logic is roughly equivalent to css_foreach_descendant_pre, - * except we also track the parent pool to find out which pool we need - * to calculate protection values for. - * - * We can stop the traversal once we find test_pool among the - * descendants since we don't really care about any others. - */ - while (pool != test_pool) { - next_css = css_next_child(NULL, css); - if (next_css) { - parent_pool = pool; - } else { - while (css != &limit_pool->cs->css) { - next_css = css_next_child(css, css->parent); - if (next_css) - break; - css = css->parent; - parent_pool = pool_parent(parent_pool); - } - /* - * We can only hit this when test_pool is not a - * descendant of limit_pool. - */ - if (WARN_ON_ONCE(css == &limit_pool->cs->css)) - break; - } - css = next_css; - - found_descendant = false; + css_for_each_descendant_pre(css, &limit_pool->cs->css) { dmemcg_iter = container_of(css, struct dmemcg_state, css); + found_pool = NULL; list_for_each_entry_rcu(pool, &dmemcg_iter->pools, css_node) { - if (pool_parent(pool) == parent_pool) { - found_descendant = true; + if (pool->region == limit_pool->region) { + found_pool = pool; break; } } - if (!found_descendant) + if (!found_pool) continue; page_counter_calculate_protection( - climit, &pool->cnt, true); + climit, &found_pool->cnt, true); + + if (found_pool == test_pool) + break; } rcu_read_unlock(); } -- GitLab From 44afc10d4678d5a3a4ab8c25750be00f037298cf Mon Sep 17 00:00:00 2001 From: Ryan McClelland Date: Thu, 16 Jan 2025 22:49:24 -0800 Subject: [PATCH 0835/1585] HID: nintendo: fix gencon button events map This fixes the button event map to match the 3-button recommendation as well as the redundant 'z' in the button map events for the Sega MD/Gen 6 Button. Signed-off-by: Ryan McClelland Reviewed-by: Daniel J. Ogorchock Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 11ac246176ae1..839d5bcd72b1e 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -457,13 +457,13 @@ static const struct joycon_ctlr_button_mapping snescon_button_mappings[] = { }; static const struct joycon_ctlr_button_mapping gencon_button_mappings[] = { - { BTN_A, JC_BTN_A, }, - { BTN_B, JC_BTN_B, }, - { BTN_C, JC_BTN_R, }, - { BTN_X, JC_BTN_X, }, /* MD/GEN 6B Only */ - { BTN_Y, JC_BTN_Y, }, /* MD/GEN 6B Only */ - { BTN_Z, JC_BTN_L, }, /* MD/GEN 6B Only */ - { BTN_SELECT, JC_BTN_ZR, }, + { BTN_WEST, JC_BTN_A, }, /* A */ + { BTN_SOUTH, JC_BTN_B, }, /* B */ + { BTN_EAST, JC_BTN_R, }, /* C */ + { BTN_TL, JC_BTN_X, }, /* X MD/GEN 6B Only */ + { BTN_NORTH, JC_BTN_Y, }, /* Y MD/GEN 6B Only */ + { BTN_TR, JC_BTN_L, }, /* Z MD/GEN 6B Only */ + { BTN_SELECT, JC_BTN_ZR, }, /* Mode */ { BTN_START, JC_BTN_PLUS, }, { BTN_MODE, JC_BTN_HOME, }, { BTN_Z, JC_BTN_CAP, }, -- GitLab From 4bd0725c09f377ffaf22b834241f6c050742e4fc Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Tue, 18 Feb 2025 00:50:13 +0800 Subject: [PATCH 0836/1585] HID: google: fix unused variable warning under !CONFIG_ACPI As reported by the kernel test robot, the following warning occurs: >> drivers/hid/hid-google-hammer.c:261:36: warning: 'cbas_ec_acpi_ids' defined but not used [-Wunused-const-variable=] 261 | static const struct acpi_device_id cbas_ec_acpi_ids[] = { | ^~~~~~~~~~~~~~~~ The 'cbas_ec_acpi_ids' array is only used when CONFIG_ACPI is enabled. Wrapping its definition and 'MODULE_DEVICE_TABLE' in '#ifdef CONFIG_ACPI' prevents a compiler warning when ACPI is disabled. Fixes: eb1aac4c8744f75 ("HID: google: add support tablet mode switch for Whiskers") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501201141.jctFH5eB-lkp@intel.com/ Signed-off-by: Yu-Chun Lin Signed-off-by: Jiri Kosina --- drivers/hid/hid-google-hammer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 0f292b5d3e26d..eb6fd2dc75d0a 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -268,11 +268,13 @@ static void cbas_ec_remove(struct platform_device *pdev) mutex_unlock(&cbas_ec_reglock); } +#ifdef CONFIG_ACPI static const struct acpi_device_id cbas_ec_acpi_ids[] = { { "GOOG000B", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, cbas_ec_acpi_ids); +#endif #ifdef CONFIG_OF static const struct of_device_id cbas_ec_of_match[] = { -- GitLab From 823987841424289339fdb4ba90e6d2c3792836db Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Tue, 18 Feb 2025 14:37:29 +0800 Subject: [PATCH 0837/1585] HID: intel-ish-hid: Fix use-after-free issue in hid_ishtp_cl_remove() During the `rmmod` operation for the `intel_ishtp_hid` driver, a use-after-free issue can occur in the hid_ishtp_cl_remove() function. The function hid_ishtp_cl_deinit() is called before ishtp_hid_remove(), which can lead to accessing freed memory or resources during the removal process. Call Trace: ? ishtp_cl_send+0x168/0x220 [intel_ishtp] ? hid_output_report+0xe3/0x150 [hid] hid_ishtp_set_feature+0xb5/0x120 [intel_ishtp_hid] ishtp_hid_request+0x7b/0xb0 [intel_ishtp_hid] hid_hw_request+0x1f/0x40 [hid] sensor_hub_set_feature+0x11f/0x190 [hid_sensor_hub] _hid_sensor_power_state+0x147/0x1e0 [hid_sensor_trigger] hid_sensor_runtime_resume+0x22/0x30 [hid_sensor_trigger] sensor_hub_remove+0xa8/0xe0 [hid_sensor_hub] hid_device_remove+0x49/0xb0 [hid] hid_destroy_device+0x6f/0x90 [hid] ishtp_hid_remove+0x42/0x70 [intel_ishtp_hid] hid_ishtp_cl_remove+0x6b/0xb0 [intel_ishtp_hid] ishtp_cl_device_remove+0x4a/0x60 [intel_ishtp] ... Additionally, ishtp_hid_remove() is a HID level power off, which should occur before the ISHTP level disconnect. This patch resolves the issue by reordering the calls in hid_ishtp_cl_remove(). The function ishtp_hid_remove() is now called before hid_ishtp_cl_deinit(). Fixes: f645a90e8ff7 ("HID: intel-ish-hid: ishtp-hid-client: use helper functions for connection") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid-client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index cb04cd1d980bd..6550ad5bfbb53 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -832,9 +832,9 @@ static void hid_ishtp_cl_remove(struct ishtp_cl_device *cl_device) hid_ishtp_cl); dev_dbg(ishtp_device(cl_device), "%s\n", __func__); - hid_ishtp_cl_deinit(hid_ishtp_cl); ishtp_put_device(cl_device); ishtp_hid_remove(client_data); + hid_ishtp_cl_deinit(hid_ishtp_cl); hid_ishtp_cl = NULL; -- GitLab From 07583a0010696a17fb0942e0b499a62785c5fc9f Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Tue, 18 Feb 2025 14:37:30 +0800 Subject: [PATCH 0838/1585] HID: intel-ish-hid: Fix use-after-free issue in ishtp_hid_remove() The system can experience a random crash a few minutes after the driver is removed. This issue occurs due to improper handling of memory freeing in the ishtp_hid_remove() function. The function currently frees the `driver_data` directly within the loop that destroys the HID devices, which can lead to accessing freed memory. Specifically, `hid_destroy_device()` uses `driver_data` when it calls `hid_ishtp_set_feature()` to power off the sensor, so freeing `driver_data` beforehand can result in accessing invalid memory. This patch resolves the issue by storing the `driver_data` in a temporary variable before calling `hid_destroy_device()`, and then freeing the `driver_data` after the device is destroyed. Fixes: 0b28cb4bcb17 ("HID: intel-ish-hid: ISH HID client driver") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.c b/drivers/hid/intel-ish-hid/ishtp-hid.c index 00c6f0ebf3563..be2c62fc8251d 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid.c @@ -261,12 +261,14 @@ int ishtp_hid_probe(unsigned int cur_hid_dev, */ void ishtp_hid_remove(struct ishtp_cl_data *client_data) { + void *data; int i; for (i = 0; i < client_data->num_hid_devices; ++i) { if (client_data->hid_sensor_hubs[i]) { - kfree(client_data->hid_sensor_hubs[i]->driver_data); + data = client_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(client_data->hid_sensor_hubs[i]); + kfree(data); client_data->hid_sensor_hubs[i] = NULL; } } -- GitLab From 3dbc0215e3c502a9f3221576da0fdc9847fb9721 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Tue, 18 Feb 2025 03:28:03 -0600 Subject: [PATCH 0839/1585] drm/nouveau/pmu: Fix gp10b firmware guard Most kernel configs enable multiple Tegra SoC generations, causing this typo to go unnoticed. But in the case where a kernel config is strictly for Tegra186, this is a problem. Fixes: 989863d7cbe5 ("drm/nouveau/pmu: select implementation based on available firmware") Signed-off-by: Aaron Kling Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20250218-nouveau-gm10b-guard-v2-1-a4de71500d48@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c index a6f410ba60bc9..d393bc540f862 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c @@ -75,7 +75,7 @@ gp10b_pmu_acr = { .bootstrap_multiple_falcons = gp10b_pmu_acr_bootstrap_multiple_falcons, }; -#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) +#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC) MODULE_FIRMWARE("nvidia/gp10b/pmu/desc.bin"); MODULE_FIRMWARE("nvidia/gp10b/pmu/image.bin"); MODULE_FIRMWARE("nvidia/gp10b/pmu/sig.bin"); -- GitLab From 6586788f0a8d0f3b33b1383885575f5b5f7b9dad Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sat, 15 Feb 2025 08:37:53 +0100 Subject: [PATCH 0840/1585] MAINTAINERS: Remove myself I was pondering with myself for a while if I should just make it official that I'm not really involved in the kernel community anymore, neither as a reviewer, nor as a maintainer. Most of the time I simply excused myself with "if something urgent comes up, I can chime in and help out". Lyude and Danilo are doing a wonderful job and I've put all my trust into them. However, there is one thing I can't stand and it's hurting me the most. I'm convinced, no, my core believe is, that inclusivity and respect, working with others as equals, no power plays involved, is how we should work together within the Free and Open Source community. I can understand maintainers needing to learn, being concerned on technical points. Everybody deserves the time to understand and learn. It is my true belief that most people are capable of change eventually. I truly believe this community can change from within, however this doesn't mean it's going to be a smooth process. The moment I made up my mind about this was reading the following words written by a maintainer within the kernel community: "we are the thin blue line" This isn't okay. This isn't creating an inclusive environment. This isn't okay with the current political situation especially in the US. A maintainer speaking those words can't be kept. No matter how important or critical or relevant they are. They need to be removed until they learn. Learn what those words mean for a lot of marginalized people. Learn about what horrors it evokes in their minds. I can't in good faith remain to be part of a project and its community where those words are tolerated. Those words are not technical, they are a political statement. Even if unintentionally, such words carry power, they carry meanings one needs to be aware of. They do cause an immense amount of harm. I wish the best of luck for everybody to continue to try to work from within. You got my full support and I won't hold it against anybody trying to improve the community, it's a thankless job, it's a lot of work. People will continue to burn out. I got burned out enough by myself caring about the bits I maintained, but eventually I had to realize my limits. The obligation I felt was eating me from inside. It stopped being fun at some point and I reached a point where I simply couldn't continue the work I was so motivated doing as I've did in the early days. Please respect my wishes and put this statement as is into the tree. Leaving anything out destroys its entire meaning. Respectfully Karol Signed-off-by: Karol Herbst Acked-by: Masami Hiramatsu (Google) Acked-by: Steven Rostedt (Google) Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20250215073753.1217002-2-kherbst@redhat.com --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index fd4f528bc4145..18ade2ea4f3c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7354,7 +7354,6 @@ F: Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml F: drivers/gpu/drm/panel/panel-novatek-nt36672a.c DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS -M: Karol Herbst M: Lyude Paul M: Danilo Krummrich L: dri-devel@lists.freedesktop.org @@ -23819,7 +23818,6 @@ F: tools/testing/selftests/ftrace/ TRACING MMIO ACCESSES (MMIOTRACE) M: Steven Rostedt M: Masami Hiramatsu -R: Karol Herbst R: Pekka Paalanen L: linux-kernel@vger.kernel.org L: nouveau@lists.freedesktop.org -- GitLab From 60255f3704fde70ed3c4d62f919aa4b46f841f70 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Sun, 16 Feb 2025 11:15:36 +0800 Subject: [PATCH 0841/1585] mtd: rawnand: cadence: fix unchecked dereference Add NULL check before variable dereference to fix static checker warning. Fixes: d76d22b5096c ("mtd: rawnand: cadence: use dma_map_resource for sdma address") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/e448a22c-bada-448d-9167-7af71305130d@stanley.mountain/ Signed-off-by: Niravkumar L Rabara Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/cadence-nand-controller.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index 0b2db4173e723..6667eea955977 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -2972,8 +2972,10 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) static void cadence_nand_remove(struct cdns_nand_ctrl *cdns_ctrl) { cadence_nand_chips_cleanup(cdns_ctrl); - dma_unmap_resource(cdns_ctrl->dmac->device->dev, cdns_ctrl->io.iova_dma, - cdns_ctrl->io.size, DMA_BIDIRECTIONAL, 0); + if (cdns_ctrl->dmac) + dma_unmap_resource(cdns_ctrl->dmac->device->dev, + cdns_ctrl->io.iova_dma, cdns_ctrl->io.size, + DMA_BIDIRECTIONAL, 0); cadence_nand_irq_cleanup(cdns_ctrl->irq, cdns_ctrl); kfree(cdns_ctrl->buf); dma_free_coherent(cdns_ctrl->dev, sizeof(struct cadence_nand_cdma_desc), -- GitLab From a8e8ffcc3afce2ee5fb70162aeaef3f03573ee1e Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 9 Feb 2025 13:05:50 +0200 Subject: [PATCH 0842/1585] mei: me: add panther lake P DID Add Panther Lake P device id. Cc: stable Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250209110550.1582982-1-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index c3a6657dcd4a2..a5f88ec97df75 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -117,6 +117,8 @@ #define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ +#define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 6589635f8ba32..d6ff9d82ae94b 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -124,6 +124,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; -- GitLab From fdb1ada57cf8b8752cdf54f08709d76d74999544 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 14 Feb 2025 22:24:25 +0100 Subject: [PATCH 0843/1585] mei: vsc: Use "wakeuphostint" when getting the host wakeup GPIO The _CRS ACPI resources table has 2 entries for the host wakeup GPIO, the first one being a regular GpioIo () resource while the second one is a GpioInt () resource for the same pin. The acpi_gpio_mapping table used by vsc-tp.c maps the first Gpio () resource to "wakeuphost-gpios" where as the second GpioInt () entry is mapped to "wakeuphostint-gpios". Using "wakeuphost" to request the GPIO as was done until now, means that the gpiolib-acpi code does not know that the GPIO is active-low as that info is only available in the GpioInt () entry. Things were still working before due to the following happening: 1. Since the 2 entries point to the same pin they share a struct gpio_desc 2. The SPI core creates the SPI device vsc-tp.c binds to and calls acpi_dev_gpio_irq_get(). This does use the second entry and sets FLAG_ACTIVE_LOW in gpio_desc.flags . 3. vsc_tp_probe() requests the "wakeuphost" GPIO and inherits the active-low flag set by acpi_dev_gpio_irq_get() But there is a possible scenario where things do not work: 1. - 3. happen as above 4. After requesting the "wakeuphost" GPIO, the "resetfw" GPIO is requested next, but its USB GPIO controller is not available yet, so this call returns -EPROBE_DEFER. 5. The gpio_desc for "wakeuphost" is put() and during this the active-low flag is cleared from gpio_desc.flags . 6. Later on vsc_tp_probe() requests the "wakeuphost" GPIO again, but now it is not marked active-low. The difference can also be seen in /sys/kernel/debug/gpio, which contains the following line for this GPIO: gpio-535 ( |wakeuphost ) in hi IRQ ACTIVE LOW If the second scenario is hit the "ACTIVE LOW" at the end disappears and things do not work. Fix this by requesting the GPIO through the "wakeuphostint" mapping instead which provides active-low info without relying on acpi_dev_gpio_irq_get() pre-populating this info in the gpio_desc. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2316918 Signed-off-by: Hans de Goede Reviewed-by: Stanislaw Gruszka Tested-by: Sakari Ailus Fixes: 566f5ca97680 ("mei: Add transport driver for IVSC device") Cc: stable Link: https://lore.kernel.org/r/20250214212425.84021-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/vsc-tp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index 35d349fee7698..7be1649b19725 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -502,7 +502,7 @@ static int vsc_tp_probe(struct spi_device *spi) if (ret) return ret; - tp->wakeuphost = devm_gpiod_get(dev, "wakeuphost", GPIOD_IN); + tp->wakeuphost = devm_gpiod_get(dev, "wakeuphostint", GPIOD_IN); if (IS_ERR(tp->wakeuphost)) return PTR_ERR(tp->wakeuphost); -- GitLab From c90aad369899a607cfbc002bebeafd51e31900cd Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Thu, 13 Feb 2025 15:22:57 +0300 Subject: [PATCH 0844/1585] usb: atm: cxacru: fix a flaw in existing endpoint checks Syzbot once again identified a flaw in usb endpoint checking, see [1]. This time the issue stems from a commit authored by me (2eabb655a968 ("usb: atm: cxacru: fix endpoint checking in cxacru_bind()")). While using usb_find_common_endpoints() may usually be enough to discard devices with wrong endpoints, in this case one needs more than just finding and identifying the sufficient number of endpoints of correct types - one needs to check the endpoint's address as well. Since cxacru_bind() fills URBs with CXACRU_EP_CMD address in mind, switch the endpoint verification approach to usb_check_XXX_endpoints() instead to fix incomplete ep testing. [1] Syzbot report: usb 5-1: BOGUS urb xfer, pipe 3 != type 1 WARNING: CPU: 0 PID: 1378 at drivers/usb/core/urb.c:504 usb_submit_urb+0xc4e/0x18c0 drivers/usb/core/urb.c:503 ... RIP: 0010:usb_submit_urb+0xc4e/0x18c0 drivers/usb/core/urb.c:503 ... Call Trace: cxacru_cm+0x3c8/0xe50 drivers/usb/atm/cxacru.c:649 cxacru_card_status drivers/usb/atm/cxacru.c:760 [inline] cxacru_bind+0xcf9/0x1150 drivers/usb/atm/cxacru.c:1223 usbatm_usb_probe+0x314/0x1d30 drivers/usb/atm/usbatm.c:1058 cxacru_usb_probe+0x184/0x220 drivers/usb/atm/cxacru.c:1377 usb_probe_interface+0x641/0xbb0 drivers/usb/core/driver.c:396 really_probe+0x2b9/0xad0 drivers/base/dd.c:658 __driver_probe_device+0x1a2/0x390 drivers/base/dd.c:800 driver_probe_device+0x50/0x430 drivers/base/dd.c:830 ... Reported-and-tested-by: syzbot+ccbbc229a024fa3e13b5@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ccbbc229a024fa3e13b5 Fixes: 2eabb655a968 ("usb: atm: cxacru: fix endpoint checking in cxacru_bind()") Cc: stable@kernel.org Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20250213122259.730772-1-n.zhandarovich@fintech.ru Signed-off-by: Greg Kroah-Hartman --- drivers/usb/atm/cxacru.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 0dd85d2635b99..47d06af33747d 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -1131,7 +1131,10 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, struct cxacru_data *instance; struct usb_device *usb_dev = interface_to_usbdev(intf); struct usb_host_endpoint *cmd_ep = usb_dev->ep_in[CXACRU_EP_CMD]; - struct usb_endpoint_descriptor *in, *out; + static const u8 ep_addrs[] = { + CXACRU_EP_CMD + USB_DIR_IN, + CXACRU_EP_CMD + USB_DIR_OUT, + 0}; int ret; /* instance init */ @@ -1179,13 +1182,11 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, } if (usb_endpoint_xfer_int(&cmd_ep->desc)) - ret = usb_find_common_endpoints(intf->cur_altsetting, - NULL, NULL, &in, &out); + ret = usb_check_int_endpoints(intf, ep_addrs); else - ret = usb_find_common_endpoints(intf->cur_altsetting, - &in, &out, NULL, NULL); + ret = usb_check_bulk_endpoints(intf, ep_addrs); - if (ret) { + if (!ret) { usb_err(usbatm_instance, "cxacru_bind: interface has incorrect endpoints\n"); ret = -ENODEV; goto fail; -- GitLab From 17c2c87c37862c3e95b55f660681cc6e8d66660e Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Wed, 12 Feb 2025 15:38:40 +0530 Subject: [PATCH 0845/1585] usb: gadget: u_ether: Set is_suspend flag if remote wakeup fails Currently while UDC suspends, u_ether attempts to remote wakeup the host if there are any pending transfers. However, if remote wakeup fails, the UDC remains suspended but the is_suspend flag is not set. And since is_suspend flag isn't set, the subsequent eth_start_xmit() would queue USB requests to suspended UDC. To fix this, bail out from gether_suspend() only if remote wakeup operation is successful. Cc: stable Fixes: 0a1af6dfa077 ("usb: gadget: f_ecm: Add suspend/resume and remote wakeup support") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20250212100840.3812153-1-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 09e2838917e29..f58590bf5e02f 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -1052,8 +1052,8 @@ void gether_suspend(struct gether *link) * There is a transfer in progress. So we trigger a remote * wakeup to inform the host. */ - ether_wakeup_host(dev->port_usb); - return; + if (!ether_wakeup_host(dev->port_usb)) + return; } spin_lock_irqsave(&dev->lock, flags); link->is_suspend = true; -- GitLab From 40e89ff5750fca2c1d6da93f98a2038716bba86c Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Mon, 17 Feb 2025 17:33:28 +0530 Subject: [PATCH 0846/1585] usb: gadget: Set self-powered based on MaxPower and bmAttributes Currently the USB gadget will be set as bus-powered based solely on whether its bMaxPower is greater than 100mA, but this may miss devices that may legitimately draw less than 100mA but still want to report as bus-powered. Similarly during suspend & resume, USB gadget is incorrectly marked as bus/self powered without checking the bmAttributes field. Fix these by configuring the USB gadget as self or bus powered based on bmAttributes, and explicitly set it as bus-powered if it draws more than 100mA. Cc: stable Fixes: 5e5caf4fa8d3 ("usb: gadget: composite: Inform controller driver of self-powered") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20250217120328.2446639-1-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index bdda8c74602de..1fb28bbf6c458 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1050,10 +1050,11 @@ static int set_config(struct usb_composite_dev *cdev, else usb_gadget_set_remote_wakeup(gadget, 0); done: - if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) - usb_gadget_set_selfpowered(gadget); - else + if (power > USB_SELF_POWER_VBUS_MAX_DRAW || + !(c->bmAttributes & USB_CONFIG_ATT_SELFPOWER)) usb_gadget_clear_selfpowered(gadget); + else + usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) @@ -2615,7 +2616,9 @@ void composite_suspend(struct usb_gadget *gadget) cdev->suspended = 1; - usb_gadget_set_selfpowered(gadget); + if (cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER) + usb_gadget_set_selfpowered(gadget); + usb_gadget_vbus_draw(gadget, 2); } @@ -2649,8 +2652,11 @@ void composite_resume(struct usb_gadget *gadget) else maxpower = min(maxpower, 900U); - if (maxpower > USB_SELF_POWER_VBUS_MAX_DRAW) + if (maxpower > USB_SELF_POWER_VBUS_MAX_DRAW || + !(cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER)) usb_gadget_clear_selfpowered(gadget); + else + usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, maxpower); } else { -- GitLab From d6b82dafd17db0658f089b9cdec573982ca82bc5 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 19 Feb 2025 12:47:00 +0100 Subject: [PATCH 0847/1585] usb: typec: tcpci_rt1711h: Unmask alert interrupts to fix functionality During probe, the TCPC alert interrupts are getting masked to avoid unwanted interrupts during chip setup: this is ok to do but there is no unmasking happening at any later time, which means that the chip will not raise any interrupt, essentially making it not functional as, while internally it does perform all of the intended functions, it won't signal anything to the outside. Unmask the alert interrupts to fix functionality. Fixes: ce08eaeb6388 ("staging: typec: rt1711h typec chip driver") Cc: stable Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20250219114700.41700-1-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci_rt1711h.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpci_rt1711h.c b/drivers/usb/typec/tcpm/tcpci_rt1711h.c index 64f6dd0dc6609..88c50b984e8a3 100644 --- a/drivers/usb/typec/tcpm/tcpci_rt1711h.c +++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c @@ -334,6 +334,11 @@ static int rt1711h_probe(struct i2c_client *client) { int ret; struct rt1711h_chip *chip; + const u16 alert_mask = TCPC_ALERT_TX_SUCCESS | TCPC_ALERT_TX_DISCARDED | + TCPC_ALERT_TX_FAILED | TCPC_ALERT_RX_HARD_RST | + TCPC_ALERT_RX_STATUS | TCPC_ALERT_POWER_STATUS | + TCPC_ALERT_CC_STATUS | TCPC_ALERT_RX_BUF_OVF | + TCPC_ALERT_FAULT; chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (!chip) @@ -382,6 +387,12 @@ static int rt1711h_probe(struct i2c_client *client) dev_name(chip->dev), chip); if (ret < 0) return ret; + + /* Enable alert interrupts */ + ret = rt1711h_write16(chip, TCPC_ALERT_MASK, alert_mask); + if (ret < 0) + return ret; + enable_irq_wake(client->irq); return 0; -- GitLab From 976e7e9bdc7719a023a4ecccd2e3daec9ab20a40 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Mon, 17 Feb 2025 13:54:39 +0300 Subject: [PATCH 0848/1585] acpi: typec: ucsi: Introduce a ->poll_cci method For the ACPI backend of UCSI the UCSI "registers" are just a memory copy of the register values in an opregion. The ACPI implementation in the BIOS ensures that the opregion contents are synced to the embedded controller and it ensures that the registers (in particular CCI) are synced back to the opregion on notifications. While there is an ACPI call that syncs the actual registers to the opregion there is rarely a need to do this and on some ACPI implementations it actually breaks in various interesting ways. The only reason to force a sync from the embedded controller is to poll CCI while notifications are disabled. Only the ucsi core knows if this is the case and guessing based on the current command is suboptimal, i.e. leading to the following spurious assertion splat: WARNING: CPU: 3 PID: 76 at drivers/usb/typec/ucsi/ucsi.c:1388 ucsi_reset_ppm+0x1b4/0x1c0 [typec_ucsi] CPU: 3 UID: 0 PID: 76 Comm: kworker/3:0 Not tainted 6.12.11-200.fc41.x86_64 #1 Hardware name: LENOVO 21D0/LNVNB161216, BIOS J6CN45WW 03/17/2023 Workqueue: events_long ucsi_init_work [typec_ucsi] RIP: 0010:ucsi_reset_ppm+0x1b4/0x1c0 [typec_ucsi] Call Trace: ucsi_init_work+0x3c/0xac0 [typec_ucsi] process_one_work+0x179/0x330 worker_thread+0x252/0x390 kthread+0xd2/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Thus introduce a ->poll_cci() method that works like ->read_cci() with an additional forced sync and document that this should be used when polling with notifications disabled. For all other backends that presumably don't have this issue use the same implementation for both methods. Fixes: fa48d7e81624 ("usb: typec: ucsi: Do not call ACPI _DSM method for UCSI read operations") Cc: stable Signed-off-by: Christian A. Ehrhardt Tested-by: Fedor Pchelkin Signed-off-by: Fedor Pchelkin Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250217105442.113486-2-boddah8794@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 10 +++++----- drivers/usb/typec/ucsi/ucsi.h | 2 ++ drivers/usb/typec/ucsi/ucsi_acpi.c | 21 ++++++++++++++------- drivers/usb/typec/ucsi/ucsi_ccg.c | 1 + drivers/usb/typec/ucsi/ucsi_glink.c | 1 + drivers/usb/typec/ucsi/ucsi_stm32g0.c | 1 + drivers/usb/typec/ucsi/ucsi_yoga_c630.c | 1 + 7 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index fcf499cc9458c..0fe1476f4c297 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1346,7 +1346,7 @@ static int ucsi_reset_ppm(struct ucsi *ucsi) mutex_lock(&ucsi->ppm_lock); - ret = ucsi->ops->read_cci(ucsi, &cci); + ret = ucsi->ops->poll_cci(ucsi, &cci); if (ret < 0) goto out; @@ -1364,7 +1364,7 @@ static int ucsi_reset_ppm(struct ucsi *ucsi) tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS); do { - ret = ucsi->ops->read_cci(ucsi, &cci); + ret = ucsi->ops->poll_cci(ucsi, &cci); if (ret < 0) goto out; if (cci & UCSI_CCI_COMMAND_COMPLETE) @@ -1393,7 +1393,7 @@ static int ucsi_reset_ppm(struct ucsi *ucsi) /* Give the PPM time to process a reset before reading CCI */ msleep(20); - ret = ucsi->ops->read_cci(ucsi, &cci); + ret = ucsi->ops->poll_cci(ucsi, &cci); if (ret) goto out; @@ -1929,8 +1929,8 @@ struct ucsi *ucsi_create(struct device *dev, const struct ucsi_operations *ops) struct ucsi *ucsi; if (!ops || - !ops->read_version || !ops->read_cci || !ops->read_message_in || - !ops->sync_control || !ops->async_control) + !ops->read_version || !ops->read_cci || !ops->poll_cci || + !ops->read_message_in || !ops->sync_control || !ops->async_control) return ERR_PTR(-EINVAL); ucsi = kzalloc(sizeof(*ucsi), GFP_KERNEL); diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 82735eb34f0e3..28780acc4af2e 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -62,6 +62,7 @@ struct dentry; * struct ucsi_operations - UCSI I/O operations * @read_version: Read implemented UCSI version * @read_cci: Read CCI register + * @poll_cci: Read CCI register while polling with notifications disabled * @read_message_in: Read message data from UCSI * @sync_control: Blocking control operation * @async_control: Non-blocking control operation @@ -76,6 +77,7 @@ struct dentry; struct ucsi_operations { int (*read_version)(struct ucsi *ucsi, u16 *version); int (*read_cci)(struct ucsi *ucsi, u32 *cci); + int (*poll_cci)(struct ucsi *ucsi, u32 *cci); int (*read_message_in)(struct ucsi *ucsi, void *val, size_t val_len); int (*sync_control)(struct ucsi *ucsi, u64 command); int (*async_control)(struct ucsi *ucsi, u64 command); diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 5c55155519634..ac1ebb5d95272 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -59,19 +59,24 @@ static int ucsi_acpi_read_version(struct ucsi *ucsi, u16 *version) static int ucsi_acpi_read_cci(struct ucsi *ucsi, u32 *cci) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); - int ret; - - if (UCSI_COMMAND(ua->cmd) == UCSI_PPM_RESET) { - ret = ucsi_acpi_dsm(ua, UCSI_DSM_FUNC_READ); - if (ret) - return ret; - } memcpy(cci, ua->base + UCSI_CCI, sizeof(*cci)); return 0; } +static int ucsi_acpi_poll_cci(struct ucsi *ucsi, u32 *cci) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + int ret; + + ret = ucsi_acpi_dsm(ua, UCSI_DSM_FUNC_READ); + if (ret) + return ret; + + return ucsi_acpi_read_cci(ucsi, cci); +} + static int ucsi_acpi_read_message_in(struct ucsi *ucsi, void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); @@ -94,6 +99,7 @@ static int ucsi_acpi_async_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations ucsi_acpi_ops = { .read_version = ucsi_acpi_read_version, .read_cci = ucsi_acpi_read_cci, + .poll_cci = ucsi_acpi_poll_cci, .read_message_in = ucsi_acpi_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = ucsi_acpi_async_control @@ -142,6 +148,7 @@ static int ucsi_gram_sync_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations ucsi_gram_ops = { .read_version = ucsi_acpi_read_version, .read_cci = ucsi_acpi_read_cci, + .poll_cci = ucsi_acpi_poll_cci, .read_message_in = ucsi_gram_read_message_in, .sync_control = ucsi_gram_sync_control, .async_control = ucsi_acpi_async_control diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 740171f24ef9f..4b1668733a4be 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -664,6 +664,7 @@ static int ucsi_ccg_sync_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations ucsi_ccg_ops = { .read_version = ucsi_ccg_read_version, .read_cci = ucsi_ccg_read_cci, + .poll_cci = ucsi_ccg_read_cci, .read_message_in = ucsi_ccg_read_message_in, .sync_control = ucsi_ccg_sync_control, .async_control = ucsi_ccg_async_control, diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index fed39d4580905..8af79101a2fc7 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -206,6 +206,7 @@ static void pmic_glink_ucsi_connector_status(struct ucsi_connector *con) static const struct ucsi_operations pmic_glink_ucsi_ops = { .read_version = pmic_glink_ucsi_read_version, .read_cci = pmic_glink_ucsi_read_cci, + .poll_cci = pmic_glink_ucsi_read_cci, .read_message_in = pmic_glink_ucsi_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = pmic_glink_ucsi_async_control, diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c index 6923fad31d795..57ef7d83a4121 100644 --- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c +++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c @@ -424,6 +424,7 @@ static irqreturn_t ucsi_stm32g0_irq_handler(int irq, void *data) static const struct ucsi_operations ucsi_stm32g0_ops = { .read_version = ucsi_stm32g0_read_version, .read_cci = ucsi_stm32g0_read_cci, + .poll_cci = ucsi_stm32g0_read_cci, .read_message_in = ucsi_stm32g0_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = ucsi_stm32g0_async_control, diff --git a/drivers/usb/typec/ucsi/ucsi_yoga_c630.c b/drivers/usb/typec/ucsi/ucsi_yoga_c630.c index 4cae85c0dc12a..d33e3f2dd1d80 100644 --- a/drivers/usb/typec/ucsi/ucsi_yoga_c630.c +++ b/drivers/usb/typec/ucsi/ucsi_yoga_c630.c @@ -74,6 +74,7 @@ static int yoga_c630_ucsi_async_control(struct ucsi *ucsi, u64 command) static const struct ucsi_operations yoga_c630_ucsi_ops = { .read_version = yoga_c630_ucsi_read_version, .read_cci = yoga_c630_ucsi_read_cci, + .poll_cci = yoga_c630_ucsi_read_cci, .read_message_in = yoga_c630_ucsi_read_message_in, .sync_control = ucsi_sync_control_common, .async_control = yoga_c630_ucsi_async_control, -- GitLab From bf4f9ae1cb08ccaafbe6874be6c46f59b83ae778 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 17 Feb 2025 13:54:40 +0300 Subject: [PATCH 0849/1585] usb: typec: ucsi: increase timeout for PPM reset operations It is observed that on some systems an initial PPM reset during the boot phase can trigger a timeout: [ 6.482546] ucsi_acpi USBC000:00: failed to reset PPM! [ 6.482551] ucsi_acpi USBC000:00: error -ETIMEDOUT: PPM init failed Still, increasing the timeout value, albeit being the most straightforward solution, eliminates the problem: the initial PPM reset may take up to ~8000-10000ms on some Lenovo laptops. When it is reset after the above period of time (or even if ucsi_reset_ppm() is not called overall), UCSI works as expected. Moreover, if the ucsi_acpi module is loaded/unloaded manually after the system has booted, reading the CCI values and resetting the PPM works perfectly, without any timeout. Thus it's only a boot-time issue. The reason for this behavior is not clear but it may be the consequence of some tricks that the firmware performs or be an actual firmware bug. As a workaround, increase the timeout to avoid failing the UCSI initialization prematurely. Fixes: b1b59e16075f ("usb: typec: ucsi: Increase command completion timeout value") Cc: stable Signed-off-by: Fedor Pchelkin Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250217105442.113486-3-boddah8794@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 0fe1476f4c297..7a56d3f840d75 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -25,7 +25,7 @@ * difficult to estimate the time it takes for the system to process the command * before it is actually passed to the PPM. */ -#define UCSI_TIMEOUT_MS 5000 +#define UCSI_TIMEOUT_MS 10000 /* * UCSI_SWAP_TIMEOUT_MS - Timeout for role swap requests -- GitLab From a321d163de3d8aa38a6449ab2becf4b1581aed96 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 8 Jan 2025 19:09:27 +0530 Subject: [PATCH 0850/1585] bus: mhi: host: pci_generic: Use pci_try_reset_function() to avoid deadlock There are multiple places from where the recovery work gets scheduled asynchronously. Also, there are multiple places where the caller waits synchronously for the recovery to be completed. One such place is during the PM shutdown() callback. If the device is not alive during recovery_work, it will try to reset the device using pci_reset_function(). This function internally will take the device_lock() first before resetting the device. By this time, if the lock has already been acquired, then recovery_work will get stalled while waiting for the lock. And if the lock was already acquired by the caller which waits for the recovery_work to be completed, it will lead to deadlock. This is what happened on the X1E80100 CRD device when the device died before shutdown() callback. Driver core calls the driver's shutdown() callback while holding the device_lock() leading to deadlock. And this deadlock scenario can occur on other paths as well, like during the PM suspend() callback, where the driver core would hold the device_lock() before calling driver's suspend() callback. And if the recovery_work was already started, it could lead to deadlock. This is also observed on the X1E80100 CRD. So to fix both issues, use pci_try_reset_function() in recovery_work. This function first checks for the availability of the device_lock() before trying to reset the device. If the lock is available, it will acquire it and reset the device. Otherwise, it will return -EAGAIN. If that happens, recovery_work will fail with the error message "Recovery failed" as not much could be done. Cc: stable@vger.kernel.org # 5.12 Reported-by: Johan Hovold Closes: https://lore.kernel.org/mhi/Z1me8iaK7cwgjL92@hovoldconsulting.com Fixes: 7389337f0a78 ("mhi: pci_generic: Add suspend/resume/recovery procedure") Reviewed-by: Johan Hovold Tested-by: Johan Hovold Analyzed-by: Johan Hovold Link: https://lore.kernel.org/mhi/Z2KKjWY2mPen6GPL@hovoldconsulting.com/ Reviewed-by: Loic Poulain Link: https://lore.kernel.org/r/20250108-mhi_recovery_fix-v1-1-a0a00a17da46@linaro.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pci_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index c41119b9079f0..7ffea0f981628 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -1095,8 +1095,9 @@ static void mhi_pci_recovery_work(struct work_struct *work) err_unprepare: mhi_unprepare_after_power_down(mhi_cntrl); err_try_reset: - if (pci_reset_function(pdev)) - dev_err(&pdev->dev, "Recovery failed\n"); + err = pci_try_reset_function(pdev); + if (err) + dev_err(&pdev->dev, "Recovery failed: %d\n", err); } static void health_check(struct timer_list *t) -- GitLab From fa2e55811ae25020a5e9b23a8932e67e6d6261a4 Mon Sep 17 00:00:00 2001 From: Mike Lothian Date: Fri, 14 Feb 2025 12:28:00 +0000 Subject: [PATCH 0851/1585] ntsync: Set the permissions to be 0666 This allows ntsync to be usuable by non-root processes out of the box Signed-off-by: Mike Lothian Reviewed-by: Elizabeth Figura Link: https://lore.kernel.org/r/20250214122759.2629-2-mike@fireburn.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 055395cde42b6..0b4e56d59b3da 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -1208,6 +1208,7 @@ static struct miscdevice ntsync_misc = { .minor = MISC_DYNAMIC_MINOR, .name = NTSYNC_NAME, .fops = &ntsync_fops, + .mode = 0666, }; module_misc_device(ntsync_misc); -- GitLab From fb3331f53e3cb1f1505f918f4f33bb0a3a231e4f Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 18 Feb 2025 20:34:43 -0700 Subject: [PATCH 0852/1585] io_uring/rsrc: remove unused constants IO_NODE_ALLOC_CACHE_MAX has been unused since commit fbbb8e991d86 ("io_uring/rsrc: get rid of io_rsrc_node allocation cache") removed the rsrc_node_cache. IO_RSRC_TAG_TABLE_SHIFT and IO_RSRC_TAG_TABLE_MASK have been unused since commit 7029acd8a950 ("io_uring/rsrc: get rid of per-ring io_rsrc_node list") removed the separate tag table for registered nodes. Signed-off-by: Caleb Sander Mateos Reviewed-by: Li Zetao Link: https://lore.kernel.org/r/20250219033444.2020136-1-csander@purestorage.com Signed-off-by: Jens Axboe --- io_uring/rsrc.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 190f7ee45de93..89ea0135a1a0d 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -4,12 +4,6 @@ #include -#define IO_NODE_ALLOC_CACHE_MAX 32 - -#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) -#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT) -#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1) - enum { IORING_RSRC_FILE = 0, IORING_RSRC_BUFFER = 1, -- GitLab From 954b8915ff86353037d4246c7129d807a75f898b Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 13 Feb 2025 16:57:46 -0600 Subject: [PATCH 0853/1585] MAINTAINERS: change maintainer for FSI Due to job transitions, both Joel and Jeremy can no longer maintain the FSI subsystem. I will take over. I also replaced Alistair with Ninad as a reviewer, as Alistair doesn't have access to hardware and hasn't been active. I also removed the link to Joel's FSI tree as he won't be maintaining it. Signed-off-by: Eddie James Acked-by: Jeremy Kerr Link: https://lore.kernel.org/r/20250213225746.2159118-1-eajames@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 25c86f47353de..f6026443526b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9439,14 +9439,11 @@ F: include/linux/fscrypt.h F: include/uapi/linux/fscrypt.h FSI SUBSYSTEM -M: Jeremy Kerr -M: Joel Stanley -R: Alistar Popple -R: Eddie James +M: Eddie James +R: Ninad Palsule L: linux-fsi@lists.ozlabs.org S: Supported Q: http://patchwork.ozlabs.org/project/linux-fsi/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi.git F: drivers/fsi/ F: include/linux/fsi*.h F: include/trace/events/fsi*.h -- GitLab From 7330195e6018ece3e886177ffbc9349a0b6585e6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 11 Feb 2025 20:51:25 +1030 Subject: [PATCH 0854/1585] smb: client, common: Avoid multiple -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. So, in order to avoid ending up with flexible-array members in the middle of other structs, we use the `__struct_group()` helper to separate the flexible arrays from the rest of the members in the flexible structures. We then use the newly created tagged `struct smb2_file_link_info_hdr` and `struct smb2_file_rename_info_hdr` to replace the type of the objects causing trouble: `rename_info` and `link_info` in `struct smb2_compound_vars`. We also want to ensure that when new members need to be added to the flexible structures, they are always included within the newly created tagged structs. For this, we use `static_assert()`. This ensures that the memory layout for both the flexible structure and the new tagged struct is the same after any changes. So, with these changes, fix 86 of the following warnings: fs/smb/client/cifsglob.h:2335:36: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] fs/smb/client/cifsglob.h:2334:38: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 4 ++-- fs/smb/common/smb2pdu.h | 30 ++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 4bdd6a43e5215..bc06b8ae2ebd4 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2324,8 +2324,8 @@ struct smb2_compound_vars { struct kvec io_iov[SMB2_IOCTL_IOV_SIZE]; struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE]; struct kvec close_iov; - struct smb2_file_rename_info rename_info; - struct smb2_file_link_info link_info; + struct smb2_file_rename_info_hdr rename_info; + struct smb2_file_link_info_hdr link_info; struct kvec ea_iov; }; diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 3336df2ea5d4a..c7a0efda44036 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1707,23 +1707,33 @@ struct smb2_file_internal_info { } __packed; /* level 6 Query */ struct smb2_file_rename_info { /* encoding of request for level 10 */ - __u8 ReplaceIfExists; /* 1 = replace existing target with new */ - /* 0 = fail if target already exists */ - __u8 Reserved[7]; - __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ - __le32 FileNameLength; + /* New members MUST be added within the struct_group() macro below. */ + __struct_group(smb2_file_rename_info_hdr, __hdr, __packed, + __u8 ReplaceIfExists; /* 1 = replace existing target with new */ + /* 0 = fail if target already exists */ + __u8 Reserved[7]; + __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ + __le32 FileNameLength; + ); char FileName[]; /* New name to be assigned */ /* padding - overall struct size must be >= 24 so filename + pad >= 6 */ } __packed; /* level 10 Set */ +static_assert(offsetof(struct smb2_file_rename_info, FileName) == sizeof(struct smb2_file_rename_info_hdr), + "struct member likely outside of __struct_group()"); struct smb2_file_link_info { /* encoding of request for level 11 */ - __u8 ReplaceIfExists; /* 1 = replace existing link with new */ - /* 0 = fail if link already exists */ - __u8 Reserved[7]; - __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ - __le32 FileNameLength; + /* New members MUST be added within the struct_group() macro below. */ + __struct_group(smb2_file_link_info_hdr, __hdr, __packed, + __u8 ReplaceIfExists; /* 1 = replace existing link with new */ + /* 0 = fail if link already exists */ + __u8 Reserved[7]; + __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ + __le32 FileNameLength; + ); char FileName[]; /* Name to be assigned to new link */ } __packed; /* level 11 Set */ +static_assert(offsetof(struct smb2_file_link_info, FileName) == sizeof(struct smb2_file_link_info_hdr), + "struct member likely outside of __struct_group()"); /* * This level 18, although with struct with same name is different from cifs -- GitLab From 9df23801c83d3e12b4c09be39d37d2be385e52f9 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 16 Feb 2025 22:17:54 -0600 Subject: [PATCH 0855/1585] smb311: failure to open files of length 1040 when mounting with SMB3.1.1 POSIX extensions If a file size has bits 0x410 = ATTR_DIRECTORY | ATTR_REPARSE set then during queryinfo (stat) the file is regarded as a directory and subsequent opens can fail. A simple test example is trying to open any file 1040 bytes long when mounting with "posix" (SMB3.1.1 POSIX/Linux Extensions). The cause of this bug is that Attributes field in smb2_file_all_info struct occupies the same place that EndOfFile field in smb311_posix_qinfo, and sometimes the latter struct is incorrectly processed as if it was the first one. Reported-by: Oleh Nykyforchyn Tested-by: Oleh Nykyforchyn Acked-by: Paulo Alcantara (Red Hat) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 + fs/smb/client/reparse.h | 28 ++++++++++++++++++++++------ fs/smb/client/smb2inode.c | 4 ++++ fs/smb/client/smb2ops.c | 3 ++- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index bc06b8ae2ebd4..cddeb2adbf4af 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -253,6 +253,7 @@ struct cifs_cred { struct cifs_open_info_data { bool adjust_tz; bool reparse_point; + bool contains_posix_file_info; struct { /* ioctl response buffer */ struct { diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 5a753fec7e2c2..c0be5ab45a78a 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -99,14 +99,30 @@ static inline bool reparse_inode_match(struct inode *inode, static inline bool cifs_open_data_reparse(struct cifs_open_info_data *data) { - struct smb2_file_all_info *fi = &data->fi; - u32 attrs = le32_to_cpu(fi->Attributes); + u32 attrs; bool ret; - ret = data->reparse_point || (attrs & ATTR_REPARSE); - if (ret) - attrs |= ATTR_REPARSE; - fi->Attributes = cpu_to_le32(attrs); + if (data->contains_posix_file_info) { + struct smb311_posix_qinfo *fi = &data->posix_fi; + + attrs = le32_to_cpu(fi->DosAttributes); + if (data->reparse_point) { + attrs |= ATTR_REPARSE; + fi->DosAttributes = cpu_to_le32(attrs); + } + + } else { + struct smb2_file_all_info *fi = &data->fi; + + attrs = le32_to_cpu(fi->Attributes); + if (data->reparse_point) { + attrs |= ATTR_REPARSE; + fi->Attributes = cpu_to_le32(attrs); + } + } + + ret = attrs & ATTR_REPARSE; + return ret; } diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 5dfb30b0a852c..826b57a5a2a8d 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -650,6 +650,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, switch (cmds[i]) { case SMB2_OP_QUERY_INFO: idata = in_iov[i].iov_base; + idata->contains_posix_file_info = false; if (rc == 0 && cfile && cfile->symlink_target) { idata->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); if (!idata->symlink_target) @@ -673,6 +674,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, break; case SMB2_OP_POSIX_QUERY_INFO: idata = in_iov[i].iov_base; + idata->contains_posix_file_info = true; if (rc == 0 && cfile && cfile->symlink_target) { idata->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); if (!idata->symlink_target) @@ -770,6 +772,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, idata = in_iov[i].iov_base; idata->reparse.io.iov = *iov; idata->reparse.io.buftype = resp_buftype[i + 1]; + idata->contains_posix_file_info = false; /* BB VERIFY */ rbuf = reparse_buf_ptr(iov); if (IS_ERR(rbuf)) { rc = PTR_ERR(rbuf); @@ -791,6 +794,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, case SMB2_OP_QUERY_WSL_EA: if (!rc) { idata = in_iov[i].iov_base; + idata->contains_posix_file_info = false; qi_rsp = rsp_iov[i + 1].iov_base; data[0] = (u8 *)qi_rsp + le16_to_cpu(qi_rsp->OutputBufferOffset); size[0] = le32_to_cpu(qi_rsp->OutputBufferLength); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index ec36bed54b0b9..23e0c8be7fb52 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1001,6 +1001,7 @@ static int smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, if (!data->symlink_target) return -ENOMEM; } + data->contains_posix_file_info = false; return SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid, &data->fi); } @@ -5146,7 +5147,7 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, FILE_CREATE, CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, ACL_NO_MODE); oparms.fid = &fid; - + idata.contains_posix_file_info = false; rc = server->ops->open(xid, &oparms, &oplock, &idata); if (rc) goto out; -- GitLab From cad3fc0a4c8cef07b07ceddc137f582267577250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 18 Sep 2024 00:16:05 +0200 Subject: [PATCH 0856/1585] cifs: Throw -EOPNOTSUPP error on unsupported reparse point type from parse_reparse_point() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This would help to track and detect by caller if the reparse point type was processed or not. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/reparse.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 0a5a52a8a7dd1..2b9e9885dc425 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -1088,13 +1088,12 @@ int parse_reparse_point(struct reparse_data_buffer *buf, le32_to_cpu(buf->ReparseTag)); return -EIO; } - break; + return 0; default: cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n", le32_to_cpu(buf->ReparseTag)); - break; + return -EOPNOTSUPP; } - return 0; } int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, -- GitLab From b587fd128660d48cd2122f870f720ff8e2b4abb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 18 Sep 2024 00:28:25 +0200 Subject: [PATCH 0857/1585] cifs: Treat unhandled directory name surrogate reparse points as mount directory nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the reparse point was not handled (indicated by the -EOPNOTSUPP from ops->parse_reparse_point() call) but reparse tag is of type name surrogate directory type, then treat is as a new mount point. Name surrogate reparse point represents another named entity in the system. From SMB client point of view, this another entity is resolved on the SMB server, and server serves its content automatically. Therefore from Linux client point of view, this name surrogate reparse point of directory type crosses mount point. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/inode.c | 13 +++++++++++++ fs/smb/common/smbfsctl.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 3261190e6f903..616149c7f0a54 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1215,6 +1215,19 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, rc = server->ops->parse_reparse_point(cifs_sb, full_path, iov, data); + /* + * If the reparse point was not handled but it is the + * name surrogate which points to directory, then treat + * is as a new mount point. Name surrogate reparse point + * represents another named entity in the system. + */ + if (rc == -EOPNOTSUPP && + IS_REPARSE_TAG_NAME_SURROGATE(data->reparse.tag) && + (le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY)) { + rc = 0; + cifs_create_junction_fattr(fattr, sb); + goto out; + } } if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) { diff --git a/fs/smb/common/smbfsctl.h b/fs/smb/common/smbfsctl.h index 4b379e84c46b9..3253a18ecb5cb 100644 --- a/fs/smb/common/smbfsctl.h +++ b/fs/smb/common/smbfsctl.h @@ -159,6 +159,9 @@ #define IO_REPARSE_TAG_LX_CHR 0x80000025 #define IO_REPARSE_TAG_LX_BLK 0x80000026 +/* If Name Surrogate Bit is set, the file or directory represents another named entity in the system. */ +#define IS_REPARSE_TAG_NAME_SURROGATE(tag) (!!((tag) & 0x20000000)) + /* fsctl flags */ /* If Flags is set to this value, the request is an FSCTL not ioctl request */ #define SMB2_0_IOCTL_IS_FSCTL 0x00000001 -- GitLab From c84e125fff2615b4d9c259e762596134eddd2f27 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Sat, 15 Feb 2025 00:51:48 +0300 Subject: [PATCH 0858/1585] ovl: fix UAF in ovl_dentry_update_reval by moving dput() in ovl_link_up The issue was caused by dput(upper) being called before ovl_dentry_update_reval(), while upper->d_flags was still accessed in ovl_dentry_remote(). Move dput(upper) after its last use to prevent use-after-free. BUG: KASAN: slab-use-after-free in ovl_dentry_remote fs/overlayfs/util.c:162 [inline] BUG: KASAN: slab-use-after-free in ovl_dentry_update_reval+0xd2/0xf0 fs/overlayfs/util.c:167 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc3/0x620 mm/kasan/report.c:488 kasan_report+0xd9/0x110 mm/kasan/report.c:601 ovl_dentry_remote fs/overlayfs/util.c:162 [inline] ovl_dentry_update_reval+0xd2/0xf0 fs/overlayfs/util.c:167 ovl_link_up fs/overlayfs/copy_up.c:610 [inline] ovl_copy_up_one+0x2105/0x3490 fs/overlayfs/copy_up.c:1170 ovl_copy_up_flags+0x18d/0x200 fs/overlayfs/copy_up.c:1223 ovl_rename+0x39e/0x18c0 fs/overlayfs/dir.c:1136 vfs_rename+0xf84/0x20a0 fs/namei.c:4893 ... Fixes: b07d5cc93e1b ("ovl: update of dentry revalidate flags after copy up") Reported-by: syzbot+316db8a1191938280eb6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=316db8a1191938280eb6 Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20250214215148.761147-1-kovalev@altlinux.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/overlayfs/copy_up.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 0c28e5fa34077..d7310fcf38881 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -618,7 +618,6 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper); - dput(upper); if (!err) { /* Restore timestamps on parent (best effort) */ @@ -626,6 +625,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, upper); } + dput(upper); } inode_unlock(udir); if (err) -- GitLab From 838c17fd077e611b12c78feb0feee1b30ed09b63 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Tue, 11 Feb 2025 09:53:55 +0800 Subject: [PATCH 0859/1585] accel/amdxdna: Add missing include linux/slab.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling without CONFIG_IA32_EMULATION, there can be some errors: drivers/accel/amdxdna/amdxdna_mailbox.c: In function ‘mailbox_release_msg’: drivers/accel/amdxdna/amdxdna_mailbox.c:197:2: error: implicit declaration of function ‘kfree’. 197 | kfree(mb_msg); | ^~~~~ drivers/accel/amdxdna/amdxdna_mailbox.c: In function ‘xdna_mailbox_send_msg’: drivers/accel/amdxdna/amdxdna_mailbox.c:418:11: error:implicit declaration of function ‘kzalloc’. 418 | mb_msg = kzalloc(sizeof(*mb_msg) + pkg_size, GFP_KERNEL); | ^~~~~~~ Add the missing include. Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox") Signed-off-by: Su Hui Reviewed-by: Lizhi Hou Reviewed-by: Jeffrey Hugo Signed-off-by: Lizhi Hou Link: https://patchwork.freedesktop.org/patch/msgid/20250211015354.3388171-1-suhui@nfschina.com --- drivers/accel/amdxdna/amdxdna_mailbox.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index 814b16bb1953f..e5301fac13971 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS -- GitLab From b9ddb3e1a8aa86c61c4a93e27cf66414f5fa7b6e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Feb 2025 12:43:42 -0500 Subject: [PATCH 0860/1585] bcachefs: Fix fsck directory i_size checking Error handling was wrong, causing unhandled transaction restart errors. check_directory_size() was also inefficient, since keys in multiple snapshots would be iterated over once for every snapshot. Convert it to the same scheme used for i_sectors and subdir count checking. Cc: Hongbo Li Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 78 +++++++++++++++----------------------- fs/bcachefs/sb-downgrade.c | 2 +- 2 files changed, 32 insertions(+), 48 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 53a421ff136d3..9bf316e7b845d 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -823,6 +823,7 @@ struct inode_walker_entry { struct bch_inode_unpacked inode; u32 snapshot; u64 count; + u64 i_size; }; struct inode_walker { @@ -910,8 +911,9 @@ lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_ if (k.k->p.snapshot != i->snapshot && !is_whiteout) { struct inode_walker_entry new = *i; - new.snapshot = k.k->p.snapshot; - new.count = 0; + new.snapshot = k.k->p.snapshot; + new.count = 0; + new.i_size = 0; struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, k); @@ -1116,37 +1118,6 @@ static int get_snapshot_root_inode(struct btree_trans *trans, return ret; } -static int check_directory_size(struct btree_trans *trans, - struct bch_inode_unpacked *inode_u, - struct bkey_s_c inode_k, bool *write_inode) -{ - struct btree_iter iter; - struct bkey_s_c k; - u64 new_size = 0; - int ret; - - for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents, - SPOS(inode_k.k->p.offset, 0, inode_k.k->p.snapshot), - POS(inode_k.k->p.offset, U64_MAX), - 0, k, ret) { - if (k.k->type != KEY_TYPE_dirent) - continue; - - struct bkey_s_c_dirent dirent = bkey_s_c_to_dirent(k); - struct qstr name = bch2_dirent_get_name(dirent); - - new_size += dirent_occupied_size(&name); - } - bch2_trans_iter_exit(trans, &iter); - - if (!ret && inode_u->bi_size != new_size) { - inode_u->bi_size = new_size; - *write_inode = true; - } - - return ret; -} - static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, @@ -1335,16 +1306,6 @@ static int check_inode(struct btree_trans *trans, u.bi_journal_seq = journal_cur_seq(&c->journal); do_update = true; } - - if (S_ISDIR(u.bi_mode)) { - ret = check_directory_size(trans, &u, k, &do_update); - - fsck_err_on(ret, - trans, directory_size_mismatch, - "directory inode %llu:%u with the mismatch directory size", - u.bi_inum, k.k->p.snapshot); - ret = 0; - } do_update: if (do_update) { ret = __bch2_fsck_write_inode(trans, &u); @@ -2017,10 +1978,31 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ return ret; } -static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) +static int check_dir_i_size_notnested(struct btree_trans *trans, struct inode_walker *w) +{ + struct bch_fs *c = trans->c; + int ret = 0; + + darray_for_each(w->inodes, i) + if (fsck_err_on(i->inode.bi_size != i->i_size, + trans, inode_dir_wrong_nlink, + "directory %llu:%u with wrong i_size: got %llu, should be %llu", + w->last_pos.inode, i->snapshot, i->inode.bi_size, i->i_size)) { + i->inode.bi_size = i->i_size; + ret = bch2_fsck_write_inode(trans, &i->inode); + if (ret) + break; + } +fsck_err: + bch_err_fn(c, ret); + return ret; +} + +static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_walker *w) { u32 restart_count = trans->restart_count; return check_subdir_count_notnested(trans, w) ?: + check_dir_i_size_notnested(trans, w) ?: trans_was_restarted(trans, restart_count); } @@ -2367,7 +2349,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, goto out; if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) { - ret = check_subdir_count(trans, dir); + ret = check_subdir_dirents_count(trans, dir); if (ret) goto err; } @@ -2457,9 +2439,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) goto err; - if (d.v->d_type == DT_DIR) - for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) + for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) { + if (d.v->d_type == DT_DIR) i->count++; + i->i_size += bkey_bytes(d.k); + } out: err: fsck_err: diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 14f6b6a5fb383..35e07bc8fbd34 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -92,7 +92,7 @@ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ BCH_FSCK_ERR_accounting_key_junk_at_end) \ x(directory_size, \ - BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ + BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \ BCH_FSCK_ERR_directory_size_mismatch) \ #define DOWNGRADE_TABLE() \ -- GitLab From b9275eabe31e6679ae12c46a4a0a18d622db4570 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 18 Feb 2025 00:38:27 +0200 Subject: [PATCH 0861/1585] drm/i915/dp: Fix error handling during 128b/132b link training At the end of a 128b/132b link training sequence, the HW expects the transcoder training pattern to be set to TPS2 and from that to normal mode (disabling the training pattern). Transitioning from TPS1 directly to normal mode leaves the transcoder in a stuck state, resulting in page-flip timeouts later in the modeset sequence. Atm, in case of a failure during link training, the transcoder may be still set to output the TPS1 pattern. Later the transcoder is then set from TPS1 directly to normal mode in intel_dp_stop_link_train(), leading to modeset failures later as described above. Fix this by setting the training patter to TPS2, if the link training failed at any point. The clue in the specification about the above HW behavior is the explicit mention that TPS2 must be set after the link training sequence (and there isn't a similar requirement specified for the 8b/10b link training), see the Bspec links below. v2: Add bspec aspect/link to the commit log. (Jani) Bspec: 54128, 65448, 68849 Cc: stable@vger.kernel.org # v5.18+ Cc: Jani Nikula Signed-off-by: Imre Deak Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20250217223828.1166093-2-imre.deak@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 8b4bbaf8ddc1f68f3ee96a706f65fdb1bcd9d355) Signed-off-by: Rodrigo Vivi --- .../gpu/drm/i915/display/intel_dp_link_training.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 8b1977cfec503..6696a32cdd3e6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -1563,7 +1563,7 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, if (wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) { lt_err(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clear\n"); - return false; + goto out; } if (intel_dp_128b132b_lane_eq(intel_dp, crtc_state) && @@ -1575,6 +1575,19 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, passed ? "passed" : "failed", crtc_state->port_clock, crtc_state->lane_count); +out: + /* + * Ensure that the training pattern does get set to TPS2 even in case + * of a failure, as is the case at the end of a passing link training + * and what is expected by the transcoder. Leaving TPS1 set (and + * disabling the link train mode in DP_TP_CTL later from TPS1 directly) + * would result in a stuck transcoder HW state and flip-done timeouts + * later in the modeset sequence. + */ + if (!passed) + intel_dp_program_link_training_pattern(intel_dp, crtc_state, + DP_PHY_DPRX, DP_TRAINING_PATTERN_2); + return passed; } -- GitLab From 8058b49bf6fff777bf3f47309c7b15dbef2191af Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 18 Feb 2025 00:38:28 +0200 Subject: [PATCH 0862/1585] drm/i915/dp: Fix disabling the transcoder function in 128b/132b mode During disabling the transcoder in DP 128b/132b mode (both in case of an MST master transcoder and in case of SST) the transcoder function must be first disabled without changing any other field in the register (in particular leaving the DDI port and mode select fields unchanged) and clearing the DDI port and mode select fields separately, later during the disabling sequences. Fix the sequence accordingly. Bspec: 54128, 65448, 68849 Cc: Jani Nikula Fixes: 79a6734cd56e ("drm/i915/ddi: disable trancoder port select for 128b/132b SST") Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20250217223828.1166093-3-imre.deak@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 2ed653c7b843db0670136330480842d76cb65cd8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_ddi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 2b9240ab547d8..18c66992aa1d8 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -658,7 +658,6 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); u32 ctl; if (DISPLAY_VER(dev_priv) >= 11) @@ -678,8 +677,7 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK); if (DISPLAY_VER(dev_priv) >= 12) { - if (!intel_dp_mst_is_master_trans(crtc_state) || - (!is_mst && intel_dp_is_uhbr(crtc_state))) { + if (!intel_dp_mst_is_master_trans(crtc_state)) { ctl &= ~(TGL_TRANS_DDI_PORT_MASK | TRANS_DDI_MODE_SELECT_MASK); } @@ -3134,7 +3132,7 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, intel_dp_set_power(intel_dp, DP_SET_POWER_D3); if (DISPLAY_VER(dev_priv) >= 12) { - if (is_mst) { + if (is_mst || intel_dp_is_uhbr(old_crtc_state)) { enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; intel_de_rmw(dev_priv, -- GitLab From 67b0025d19f99fb9fbb8b62e6975553c183f3a16 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 19 Feb 2025 01:33:37 +0000 Subject: [PATCH 0863/1585] io_uring/rw: forbid multishot async reads At the moment we can't sanely handle queuing an async request from a multishot context, so disable them. It shouldn't matter as pollable files / socekts don't normally do async. Patching it in __io_read() is not the cleanest way, but it's simpler than other options, so let's fix it there and clean up on top. Cc: stable@vger.kernel.org Reported-by: chase xd Fixes: fc68fcda04910 ("io_uring/rw: add support for IORING_OP_READ_MULTISHOT") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7d51732c125159d17db4fe16f51ec41b936973f8.1739919038.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rw.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 7aa1e4c9f64a3..e8efd97fdee5b 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -880,7 +880,15 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret)) return ret; - ret = io_iter_do_read(rw, &io->iter); + if (unlikely(req->opcode == IORING_OP_READ_MULTISHOT)) { + void *cb_copy = rw->kiocb.ki_complete; + + rw->kiocb.ki_complete = NULL; + ret = io_iter_do_read(rw, &io->iter); + rw->kiocb.ki_complete = cb_copy; + } else { + ret = io_iter_do_read(rw, &io->iter); + } /* * Some file systems like to return -EOPNOTSUPP for an IOCB_NOWAIT @@ -904,7 +912,8 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) } else if (ret == -EIOCBQUEUED) { return IOU_ISSUE_SKIP_COMPLETE; } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock || - (req->flags & REQ_F_NOWAIT) || !need_complete_io(req)) { + (req->flags & REQ_F_NOWAIT) || !need_complete_io(req) || + (issue_flags & IO_URING_F_MULTISHOT)) { /* read all, failed, already did sync or don't want to retry */ goto done; } -- GitLab From 4e43133c6f2319d3e205ea986c507b25d9b41e64 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 19 Feb 2025 01:33:38 +0000 Subject: [PATCH 0864/1585] io_uring/rw: don't directly use ki_complete We want to avoid checking ->ki_complete directly in the io_uring completion path. Fortunately we have only two callback the selection of which depend on the ring constant flags, i.e. IOPOLL, so use that to infer the function. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4eb4bdab8cbcf5bc87083f7047edc81e920ab83c.1739919038.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rw.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index e8efd97fdee5b..27ccc82d78436 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -563,8 +563,10 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) smp_store_release(&req->iopoll_completed, 1); } -static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) +static inline void io_rw_done(struct io_kiocb *req, ssize_t ret) { + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + /* IO was queued async, completion will happen later */ if (ret == -EIOCBQUEUED) return; @@ -586,8 +588,10 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) } } - INDIRECT_CALL_2(kiocb->ki_complete, io_complete_rw_iopoll, - io_complete_rw, kiocb, ret); + if (req->ctx->flags & IORING_SETUP_IOPOLL) + io_complete_rw_iopoll(&rw->kiocb, ret); + else + io_complete_rw(&rw->kiocb, ret); } static int kiocb_done(struct io_kiocb *req, ssize_t ret, @@ -598,7 +602,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, if (ret >= 0 && req->flags & REQ_F_CUR_POS) req->file->f_pos = rw->kiocb.ki_pos; - if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) { + if (ret >= 0 && !(req->ctx->flags & IORING_SETUP_IOPOLL)) { __io_complete_rw_common(req, ret); /* * Safe to call io_end from here as we're inline @@ -609,7 +613,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, io_req_rw_cleanup(req, issue_flags); return IOU_OK; } else { - io_rw_done(&rw->kiocb, ret); + io_rw_done(req, ret); } return IOU_ISSUE_SKIP_COMPLETE; -- GitLab From 74f3e875268f1ce2dd01029c29560263212077df Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 19 Feb 2025 01:33:39 +0000 Subject: [PATCH 0865/1585] io_uring/rw: move ki_complete init into prep Initialise ki_complete during request prep stage, we'll depend on it not being reset during issue in the following patch. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/817624086bd5f0448b08c80623399919fda82f34.1739919038.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rw.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 27ccc82d78436..d162565053892 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -23,6 +23,9 @@ #include "poll.h" #include "rw.h" +static void io_complete_rw(struct kiocb *kiocb, long res); +static void io_complete_rw_iopoll(struct kiocb *kiocb, long res); + struct io_rw { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct kiocb kiocb; @@ -289,6 +292,11 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, rw->kiocb.dio_complete = NULL; rw->kiocb.ki_flags = 0; + if (req->ctx->flags & IORING_SETUP_IOPOLL) + rw->kiocb.ki_complete = io_complete_rw_iopoll; + else + rw->kiocb.ki_complete = io_complete_rw; + rw->addr = READ_ONCE(sqe->addr); rw->len = READ_ONCE(sqe->len); rw->flags = READ_ONCE(sqe->rw_flags); @@ -817,10 +825,8 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) if (ctx->flags & IORING_SETUP_IOPOLL) { if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) return -EOPNOTSUPP; - kiocb->private = NULL; kiocb->ki_flags |= IOCB_HIPRI; - kiocb->ki_complete = io_complete_rw_iopoll; req->iopoll_completed = 0; if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) { /* make sure every req only blocks once*/ @@ -830,7 +836,6 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) } else { if (kiocb->ki_flags & IOCB_HIPRI) return -EINVAL; - kiocb->ki_complete = io_complete_rw; } if (req->flags & REQ_F_HAS_METADATA) { -- GitLab From 4614de748e78a295ee9b1f54ca87280b101fbdf0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 19 Feb 2025 01:33:40 +0000 Subject: [PATCH 0866/1585] io_uring/rw: clean up mshot forced sync mode Move code forcing synchronous execution of multishot read requests out a more generic __io_read(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4ad7b928c776d1ad59addb9fff64ef2d1fc474d5.1739919038.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rw.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index d162565053892..9edc6baebd01c 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -889,15 +889,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret)) return ret; - if (unlikely(req->opcode == IORING_OP_READ_MULTISHOT)) { - void *cb_copy = rw->kiocb.ki_complete; - - rw->kiocb.ki_complete = NULL; - ret = io_iter_do_read(rw, &io->iter); - rw->kiocb.ki_complete = cb_copy; - } else { - ret = io_iter_do_read(rw, &io->iter); - } + ret = io_iter_do_read(rw, &io->iter); /* * Some file systems like to return -EOPNOTSUPP for an IOCB_NOWAIT @@ -995,6 +987,8 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) if (!io_file_can_poll(req)) return -EBADFD; + /* make it sync, multishot doesn't support async execution */ + rw->kiocb.ki_complete = NULL; ret = __io_read(req, issue_flags); /* -- GitLab From fcf857ee1958e9247298251f7615d0c76f1e9b38 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Feb 2025 14:59:02 -0500 Subject: [PATCH 0867/1585] NFS: O_DIRECT writes must check and adjust the file length While it is uncommon for delegations to be held while O_DIRECT writes are in progress, it is possible. The xfstests generic/647 and generic/729 both end up triggering that state, and end up failing due to the fact that the file size is not adjusted. Reported-by: Chuck Lever Link: https://bugzilla.kernel.org/show_bug.cgi?id=219738 Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f45beea92d034..40e13c9a2873f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -130,6 +130,20 @@ static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, dreq->count = req_start; } +static void nfs_direct_file_adjust_size_locked(struct inode *inode, + loff_t offset, size_t count) +{ + loff_t newsize = offset + (loff_t)count; + loff_t oldsize = i_size_read(inode); + + if (newsize > oldsize) { + i_size_write(inode, newsize); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; + trace_nfs_size_grow(inode, newsize); + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); + } +} + /** * nfs_swap_rw - NFS address space operation for swap I/O * @iocb: target I/O control block @@ -741,6 +755,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct inode *inode = dreq->inode; int flags = NFS_ODIRECT_DONE; trace_nfs_direct_write_completion(dreq); @@ -762,6 +777,10 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } spin_unlock(&dreq->lock); + spin_lock(&inode->i_lock); + nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count); + spin_unlock(&inode->i_lock); + while (!list_empty(&hdr->pages)) { req = nfs_list_entry(hdr->pages.next); -- GitLab From 88025c67fe3c025a0123bc7af50535b97f7af89a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Feb 2025 14:59:03 -0500 Subject: [PATCH 0868/1585] NFS: Adjust delegated timestamps for O_DIRECT reads and writes Adjust the timestamps if O_DIRECT is being combined with attribute delegations. Fixes: e12912d94137 ("NFSv4: Add support for delegated atime and mtime attributes") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 40e13c9a2873f..f32f8d7c9122b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -56,6 +56,7 @@ #include #include +#include "delegation.h" #include "internal.h" #include "iostat.h" #include "pnfs.h" @@ -286,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) nfs_direct_count_bytes(dreq, hdr); spin_unlock(&dreq->lock); + nfs_update_delegated_atime(dreq->inode); + while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; @@ -779,6 +782,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_lock(&inode->i_lock); nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count); + nfs_update_delegated_mtime_locked(dreq->inode); spin_unlock(&inode->i_lock); while (!list_empty(&hdr->pages)) { -- GitLab From 5bbd6e863b15a85221e49b9bdb2d5d8f0bb91f3d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Feb 2025 15:00:02 -0500 Subject: [PATCH 0869/1585] SUNRPC: Prevent looping due to rpc_signal_task() races If rpc_signal_task() is called while a task is in an rpc_call_done() callback function, and the latter calls rpc_restart_call(), the task can end up looping due to the RPC_TASK_SIGNALLED flag being set without the tk_rpc_status being set. Removing the redundant mechanism for signalling the task fixes the looping behaviour. Reported-by: Li Lingfeng Fixes: 39494194f93b ("SUNRPC: Fix races with rpc_killall_tasks()") Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 3 +-- include/trace/events/sunrpc.h | 3 +-- net/sunrpc/sched.c | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index fec1e8a1570c3..eac57914dcf32 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -158,7 +158,6 @@ enum { RPC_TASK_NEED_XMIT, RPC_TASK_NEED_RECV, RPC_TASK_MSG_PIN_WAIT, - RPC_TASK_SIGNALLED, }; #define rpc_test_and_set_running(t) \ @@ -171,7 +170,7 @@ enum { #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) -#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate) +#define RPC_SIGNALLED(t) (READ_ONCE(task->tk_rpc_status) == -ERESTARTSYS) /* * Task priorities. diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index b13dc275ef4a7..851841336ee65 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -360,8 +360,7 @@ TRACE_EVENT(rpc_request, { (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \ { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \ { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \ - { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }, \ - { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" }) + { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }) DECLARE_EVENT_CLASS(rpc_task_running, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cef623ea15060..9b45fbdc90cab 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -864,8 +864,6 @@ void rpc_signal_task(struct rpc_task *task) if (!rpc_task_set_rpc_status(task, -ERESTARTSYS)) return; trace_rpc_task_signalled(task, task->tk_action); - set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); - smp_mb__after_atomic(); queue = READ_ONCE(task->tk_waitqueue); if (queue) rpc_wake_up_queued_task(queue, task); -- GitLab From 8f8df955f078e1a023ee55161935000a67651f38 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Feb 2025 15:00:09 -0500 Subject: [PATCH 0870/1585] NFSv4: Fix a deadlock when recovering state on a sillyrenamed file If the file is sillyrenamed, and slated for delete on close, it is possible for a server reboot to triggeer an open reclaim, with can again race with the application call to close(). When that happens, the call to put_nfs_open_context() can trigger a synchronous delegreturn call which deadlocks because it is not marked as privileged. Instead, ensure that the call to nfs4_inode_return_delegation_on_close() catches the delegreturn, and schedules it asynchronously. Reported-by: Li Lingfeng Fixes: adb4b42d19ae ("Return the delegation when deleting sillyrenamed files") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 37 +++++++++++++++++++++++++++++++++++++ fs/nfs/delegation.h | 1 + fs/nfs/nfs4proc.c | 3 +++ 3 files changed, 41 insertions(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 035ba52742a50..4db912f562305 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -780,6 +780,43 @@ int nfs4_inode_return_delegation(struct inode *inode) return 0; } +/** + * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation + * @inode: inode to process + * + * This routine is called to request that the delegation be returned as soon + * as the file is closed. If the file is already closed, the delegation is + * immediately returned. + */ +void nfs4_inode_set_return_delegation_on_close(struct inode *inode) +{ + struct nfs_delegation *delegation; + struct nfs_delegation *ret = NULL; + + if (!inode) + return; + rcu_read_lock(); + delegation = nfs4_get_valid_delegation(inode); + if (!delegation) + goto out; + spin_lock(&delegation->lock); + if (!delegation->inode) + goto out_unlock; + if (list_empty(&NFS_I(inode)->open_files) && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + /* Refcount matched in nfs_end_delegation_return() */ + ret = nfs_get_delegation(delegation); + } else + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); +out_unlock: + spin_unlock(&delegation->lock); + if (ret) + nfs_clear_verifier_delegated(inode); +out: + rcu_read_unlock(); + nfs_end_delegation_return(inode, ret, 0); +} + /** * nfs4_inode_return_delegation_on_close - asynchronously return a delegation * @inode: inode to process diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 71524d34ed207..8ff5ab9c5c256 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -49,6 +49,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, unsigned long pagemod_limit, u32 deleg_type); int nfs4_inode_return_delegation(struct inode *inode); void nfs4_inode_return_delegation_on_close(struct inode *inode); +void nfs4_inode_set_return_delegation_on_close(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_evict_delegation(struct inode *inode); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index df9669d4ded7f..c25ecdb76d304 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3906,8 +3906,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) { + struct dentry *dentry = ctx->dentry; if (ctx->state == NULL) return; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + nfs4_inode_set_return_delegation_on_close(d_inode(dentry)); if (is_sync) nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx)); else -- GitLab From 7a2f6f7687c5f7083a35317cddec5ad9fa491443 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 11 Feb 2025 12:31:57 -0500 Subject: [PATCH 0871/1585] SUNRPC: Handle -ETIMEDOUT return from tlshd If the TLS handshake attempt returns -ETIMEDOUT, we currently translate that error into -EACCES. This becomes problematic for cases where the RPC layer is attempting to re-connect in paths that don't resonably handle -EACCES, for example: writeback. The RPC layer can handle -ETIMEDOUT quite well, however - so if the handshake returns this error let's just pass it along. Fixes: 75eb6af7acdf ("SUNRPC: Add a TCP-with-TLS RPC transport class") Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c60936d8cef71..6b80b2aaf7639 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2581,7 +2581,15 @@ static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid) struct sock_xprt *lower_transport = container_of(lower_xprt, struct sock_xprt, xprt); - lower_transport->xprt_err = status ? -EACCES : 0; + switch (status) { + case 0: + case -EACCES: + case -ETIMEDOUT: + lower_transport->xprt_err = status; + break; + default: + lower_transport->xprt_err = -EACCES; + } complete(&lower_transport->handshake_done); xprt_put(lower_xprt); } -- GitLab From 102c51c50db88aedd00a318b7708ad60dbec2e95 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Feb 2025 13:37:24 +0000 Subject: [PATCH 0872/1585] KVM: arm64: Fix tcr_el2 initialisation in hVHE mode When not running in VHE mode, cpu_prepare_hyp_mode() computes the value of TCR_EL2 using the host's TCR_EL1 settings as a starting point. For nVHE, this amounts to masking out everything apart from the TG0, SH0, ORGN0, IRGN0 and T0SZ fields before setting the RES1 bits, shifting the IPS field down to the PS field and setting DS if LPA2 is enabled. Unfortunately, for hVHE, things go slightly wonky: EPD1 is correctly set to disable walks via TTBR1_EL2 but then the T1SZ and IPS fields are corrupted when we mistakenly attempt to initialise the PS and DS fields in their E2H=0 positions. Furthermore, many fields are retained from TCR_EL1 which should not be propagated to TCR_EL2. Notably, this means we can end up with A1 set despite not initialising TTBR1_EL2 at all. This has been shown to cause unexpected translation faults at EL2 with pKVM due to TLB invalidation not taking effect when running with a non-zero ASID. Fix the TCR_EL2 initialisation code to set PS and DS only when E2H=0, masking out HD, HA and A1 when E2H=1. Cc: Marc Zyngier Cc: Oliver Upton Fixes: ad744e8cb346 ("arm64: Allow arm64_sw.hvhe on command line") Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20250214133724.13179-1-will@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/kvm/arm.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8d94a6c0ed5c4..c2417a424b98d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -119,7 +119,7 @@ #define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK #define TCR_EL2_T0SZ_MASK 0x3f #define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ - TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) + TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK) /* VTCR_EL2 Registers bits */ #define VTCR_EL2_DS TCR_EL2_DS diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b8e55a441282f..bc7a37cea2420 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1980,7 +1980,7 @@ static int kvm_init_vector_slots(void) static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); - unsigned long tcr, ips; + unsigned long tcr; /* * Calculate the raw per-cpu offset without a translation from the @@ -1994,19 +1994,18 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) params->mair_el2 = read_sysreg(mair_el1); tcr = read_sysreg(tcr_el1); - ips = FIELD_GET(TCR_IPS_MASK, tcr); if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + tcr &= ~(TCR_HD | TCR_HA | TCR_A1 | TCR_T0SZ_MASK); tcr |= TCR_EPD1_MASK; } else { + unsigned long ips = FIELD_GET(TCR_IPS_MASK, tcr); + tcr &= TCR_EL2_MASK; - tcr |= TCR_EL2_RES1; + tcr |= TCR_EL2_RES1 | FIELD_PREP(TCR_EL2_PS_MASK, ips); + if (lpa2_is_enabled()) + tcr |= TCR_EL2_DS; } - tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits); - tcr &= ~TCR_EL2_PS_MASK; - tcr |= FIELD_PREP(TCR_EL2_PS_MASK, ips); - if (lpa2_is_enabled()) - tcr |= TCR_EL2_DS; params->tcr_el2 = tcr; params->pgd_pa = kvm_mmu_get_httbr(); -- GitLab From 4fd509c10f9687f54752fbcaf83f520c93fc1f18 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 19 Feb 2025 13:45:02 -0500 Subject: [PATCH 0873/1585] bcachefs: Fix bch2_indirect_extent_missing_error() We had some error handling confusion here; -BCH_ERR_missing_indirect_extent is thrown by trans_trigger_reflink_p_segment(); at this point we haven't decide whether we're generating an error. Signed-off-by: Kent Overstreet --- fs/bcachefs/reflink.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 376fd0a6e868c..441e648f28b51 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -172,7 +172,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, bool should_commit) { if (REFLINK_P_ERROR(p.v)) - return -BCH_ERR_missing_indirect_extent; + return 0; struct bch_fs *c = trans->c; u64 live_start = REFLINK_P_IDX(p.v); @@ -259,8 +259,6 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, return k; if (unlikely(!bkey_extent_is_reflink_data(k.k))) { - bch2_trans_iter_exit(trans, iter); - unsigned size = min((u64) k.k->size, REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) - reflink_offset); @@ -268,14 +266,16 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, k.k->p.offset, should_commit); - if (ret) + if (ret) { + bch2_trans_iter_exit(trans, iter); return bkey_s_c_err(ret); + } } else if (unlikely(REFLINK_P_ERROR(p.v))) { - bch2_trans_iter_exit(trans, iter); - int ret = bch2_indirect_extent_not_missing(trans, p, should_commit); - if (ret) + if (ret) { + bch2_trans_iter_exit(trans, iter); return bkey_s_c_err(ret); + } } *offset_into_extent = reflink_offset - bkey_start_offset(k.k); @@ -300,7 +300,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, if (ret) return ret; - if (bkey_deleted(k.k)) { + if (!bkey_refcount_c(k)) { if (!(flags & BTREE_TRIGGER_overwrite)) ret = -BCH_ERR_missing_indirect_extent; goto next; @@ -381,8 +381,6 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, not_found: if (flags & BTREE_TRIGGER_check_repair) { ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false); - if (ret == -BCH_ERR_missing_indirect_extent) - ret = 0; if (ret) goto err; } -- GitLab From b04974f759ac7574d8556deb7c602a8d01a0dcc6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 19 Feb 2025 15:40:03 -0500 Subject: [PATCH 0874/1585] bcachefs: Fix srcu lock warning in btree_update_nodes_written() We don't want to be holding the srcu lock while waiting on btree write completions - easily fixed. Reported-by: Janpieter Sollie Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index f4aeadbe53c1a..e4e7c804625e0 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -681,9 +681,11 @@ static void btree_update_nodes_written(struct btree_update *as) b = as->old_nodes[i]; + bch2_trans_begin(trans); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); seq = b->data ? b->data->keys.seq : 0; six_unlock_read(&b->c.lock); + bch2_trans_unlock_long(trans); if (seq == as->old_nodes_seq[i]) wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner, -- GitLab From 4ccacf86491d33d2486b62d4d44864d7101b299d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 17 Feb 2025 12:37:04 -0800 Subject: [PATCH 0875/1585] gtp: Suppress list corruption splat in gtp_net_exit_batch_rtnl(). Brad Spengler reported the list_del() corruption splat in gtp_net_exit_batch_rtnl(). [0] Commit eb28fd76c0a0 ("gtp: Destroy device along with udp socket's netns dismantle.") added the for_each_netdev() loop in gtp_net_exit_batch_rtnl() to destroy devices in each netns as done in geneve and ip tunnels. However, this could trigger ->dellink() twice for the same device during ->exit_batch_rtnl(). Say we have two netns A & B and gtp device B that resides in netns B but whose UDP socket is in netns A. 1. cleanup_net() processes netns A and then B. 2. gtp_net_exit_batch_rtnl() finds the device B while iterating netns A's gn->gtp_dev_list and calls ->dellink(). [ device B is not yet unlinked from netns B as unregister_netdevice_many() has not been called. ] 3. gtp_net_exit_batch_rtnl() finds the device B while iterating netns B's for_each_netdev() and calls ->dellink(). gtp_dellink() cleans up the device's hash table, unlinks the dev from gn->gtp_dev_list, and calls unregister_netdevice_queue(). Basically, calling gtp_dellink() multiple times is fine unless CONFIG_DEBUG_LIST is enabled. Let's remove for_each_netdev() in gtp_net_exit_batch_rtnl() and delegate the destruction to default_device_exit_batch() as done in bareudp. [0]: list_del corruption, ffff8880aaa62c00->next (autoslab_size_M_dev_P_net_core_dev_11127_8_1328_8_S_4096_A_64_n_139+0xc00/0x1000 [slab object]) is LIST_POISON1 (ffffffffffffff02) (prev is 0xffffffffffffff04) kernel BUG at lib/list_debug.c:58! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 UID: 0 PID: 1804 Comm: kworker/u8:7 Tainted: G T 6.12.13-grsec-full-20250211091339 #1 Tainted: [T]=RANDSTRUCT Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: netns cleanup_net RIP: 0010:[] __list_del_entry_valid_or_report+0x141/0x200 lib/list_debug.c:58 Code: c2 76 91 31 c0 e8 9f b1 f7 fc 0f 0b 4d 89 f0 48 c7 c1 02 ff ff ff 48 89 ea 48 89 ee 48 c7 c7 e0 c2 76 91 31 c0 e8 7f b1 f7 fc <0f> 0b 4d 89 e8 48 c7 c1 04 ff ff ff 48 89 ea 48 89 ee 48 c7 c7 60 RSP: 0018:fffffe8040b4fbd0 EFLAGS: 00010283 RAX: 00000000000000cc RBX: dffffc0000000000 RCX: ffffffff818c4054 RDX: ffffffff84947381 RSI: ffffffff818d1512 RDI: 0000000000000000 RBP: ffff8880aaa62c00 R08: 0000000000000001 R09: fffffbd008169f32 R10: fffffe8040b4f997 R11: 0000000000000001 R12: a1988d84f24943e4 R13: ffffffffffffff02 R14: ffffffffffffff04 R15: ffff8880aaa62c08 RBX: kasan shadow of 0x0 RCX: __wake_up_klogd.part.0+0x74/0xe0 kernel/printk/printk.c:4554 RDX: __list_del_entry_valid_or_report+0x141/0x200 lib/list_debug.c:58 RSI: vprintk+0x72/0x100 kernel/printk/printk_safe.c:71 RBP: autoslab_size_M_dev_P_net_core_dev_11127_8_1328_8_S_4096_A_64_n_139+0xc00/0x1000 [slab object] RSP: process kstack fffffe8040b4fbd0+0x7bd0/0x8000 [kworker/u8:7+netns 1804 ] R09: kasan shadow of process kstack fffffe8040b4f990+0x7990/0x8000 [kworker/u8:7+netns 1804 ] R10: process kstack fffffe8040b4f997+0x7997/0x8000 [kworker/u8:7+netns 1804 ] R15: autoslab_size_M_dev_P_net_core_dev_11127_8_1328_8_S_4096_A_64_n_139+0xc08/0x1000 [slab object] FS: 0000000000000000(0000) GS:ffff888116000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000748f5372c000 CR3: 0000000015408000 CR4: 00000000003406f0 shadow CR4: 00000000003406f0 Stack: 0000000000000000 ffffffff8a0c35e7 ffffffff8a0c3603 ffff8880aaa62c00 ffff8880aaa62c00 0000000000000004 ffff88811145311c 0000000000000005 0000000000000001 ffff8880aaa62000 fffffe8040b4fd40 ffffffff8a0c360d Call Trace: [] __list_del_entry_valid include/linux/list.h:131 [inline] fffffe8040b4fc28 [] __list_del_entry include/linux/list.h:248 [inline] fffffe8040b4fc28 [] list_del include/linux/list.h:262 [inline] fffffe8040b4fc28 [] gtp_dellink+0x16d/0x360 drivers/net/gtp.c:1557 fffffe8040b4fc28 [] gtp_net_exit_batch_rtnl+0x124/0x2c0 drivers/net/gtp.c:2495 fffffe8040b4fc88 [] cleanup_net+0x5a4/0xbe0 net/core/net_namespace.c:635 fffffe8040b4fcd0 [] process_one_work+0xbd7/0x2160 kernel/workqueue.c:3326 fffffe8040b4fd88 [] process_scheduled_works kernel/workqueue.c:3407 [inline] fffffe8040b4fec0 [] worker_thread+0x6b5/0xfa0 kernel/workqueue.c:3488 fffffe8040b4fec0 [] kthread+0x360/0x4c0 kernel/kthread.c:397 fffffe8040b4ff78 [] ret_from_fork+0x74/0xe0 arch/x86/kernel/process.c:172 fffffe8040b4ffb8 [] ret_from_fork_asm+0x29/0xc0 arch/x86/entry/entry_64.S:399 fffffe8040b4ffe8 Modules linked in: Fixes: eb28fd76c0a0 ("gtp: Destroy device along with udp socket's netns dismantle.") Reported-by: Brad Spengler Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250217203705.40342-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index d64740bf44ed3..b7b46c5e6399a 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -2481,11 +2481,6 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list, list_for_each_entry(net, net_list, exit_list) { struct gtp_net *gn = net_generic(net, gtp_net_id); struct gtp_dev *gtp, *gtp_next; - struct net_device *dev; - - for_each_netdev(net, dev) - if (dev->rtnl_link_ops == >p_link_ops) - gtp_dellink(dev, dev_to_kill); list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list) gtp_dellink(gtp->dev, dev_to_kill); -- GitLab From 62fab6eef61f245dc8797e3a6a5b890ef40e8628 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 17 Feb 2025 12:37:05 -0800 Subject: [PATCH 0876/1585] geneve: Suppress list corruption splat in geneve_destroy_tunnels(). As explained in the previous patch, iterating for_each_netdev() and gn->geneve_list during ->exit_batch_rtnl() could trigger ->dellink() twice for the same device. If CONFIG_DEBUG_LIST is enabled, we will see a list_del() corruption splat in the 2nd call of geneve_dellink(). Let's remove for_each_netdev() in geneve_destroy_tunnels() and delegate that part to default_device_exit_batch(). Fixes: 9593172d93b9 ("geneve: Fix use-after-free in geneve_find_dev().") Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250217203705.40342-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/geneve.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index a1f674539965d..dbb3960126ee7 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1902,14 +1902,7 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *geneve, *next; - struct net_device *dev, *aux; - /* gather any geneve devices that were moved into this ns */ - for_each_netdev_safe(net, dev, aux) - if (dev->rtnl_link_ops == &geneve_link_ops) - geneve_dellink(dev, head); - - /* now gather any other geneve devices that were created in this ns */ list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) geneve_dellink(geneve->dev, head); } -- GitLab From 3e5796862c692ea608d96f0a1437f9290f44953a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 17 Feb 2025 20:32:07 -0800 Subject: [PATCH 0877/1585] flow_dissector: Fix handling of mixed port and port-range keys This patch fixes a bug in TC flower filter where rules combining a specific destination port with a source port range weren't working correctly. The specific case was when users tried to configure rules like: tc filter add dev ens38 ingress protocol ip flower ip_proto udp \ dst_port 5000 src_port 2000-3000 action drop The root cause was in the flow dissector code. While both FLOW_DISSECTOR_KEY_PORTS and FLOW_DISSECTOR_KEY_PORTS_RANGE flags were being set correctly in the classifier, the __skb_flow_dissect_ports() function was only populating one of them: whichever came first in the enum check. This meant that when the code needed both a specific port and a port range, one of them would be left as 0, causing the filter to not match packets as expected. Fix it by removing the either/or logic and instead checking and populating both key types independently when they're in use. Fixes: 8ffb055beae5 ("cls_flower: Fix the behavior using port ranges with hw-offload") Reported-by: Qiang Zhang Closes: https://lore.kernel.org/netdev/CAPx+-5uvFxkhkz4=j_Xuwkezjn9U6kzKTD5jz4tZ9msSJ0fOJA@mail.gmail.com/ Cc: Yoshiki Komachi Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250218043210.732959-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/core/flow_dissector.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5db41bf2ed93e..c33af3ef0b790 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -853,23 +853,30 @@ __skb_flow_dissect_ports(const struct sk_buff *skb, void *target_container, const void *data, int nhoff, u8 ip_proto, int hlen) { - enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX; - struct flow_dissector_key_ports *key_ports; + struct flow_dissector_key_ports_range *key_ports_range = NULL; + struct flow_dissector_key_ports *key_ports = NULL; + __be32 ports; if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) - dissector_ports = FLOW_DISSECTOR_KEY_PORTS; - else if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS_RANGE)) - dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE; + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); - if (dissector_ports == FLOW_DISSECTOR_KEY_MAX) + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE)) + key_ports_range = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); + + if (!key_ports && !key_ports_range) return; - key_ports = skb_flow_dissector_target(flow_dissector, - dissector_ports, - target_container); - key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, - data, hlen); + ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); + + if (key_ports) + key_ports->ports = ports; + + if (key_ports_range) + key_ports_range->tp.ports = ports; } static void -- GitLab From dfc1580f960bf70bdaacda8f3d644e3e58160f9d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 17 Feb 2025 20:32:08 -0800 Subject: [PATCH 0878/1585] selftests/net/forwarding: Add a test case for tc-flower of mixed port and port-range After this patch: # ./tc_flower_port_range.sh TEST: Port range matching - IPv4 UDP [ OK ] TEST: Port range matching - IPv4 TCP [ OK ] TEST: Port range matching - IPv6 UDP [ OK ] TEST: Port range matching - IPv6 TCP [ OK ] TEST: Port range matching - IPv4 UDP Drop [ OK ] Cc: Qiang Zhang Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250218043210.732959-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../net/forwarding/tc_flower_port_range.sh | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh index 3885a2a91f7d8..baed5e380dae4 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh @@ -20,6 +20,7 @@ ALL_TESTS=" test_port_range_ipv4_tcp test_port_range_ipv6_udp test_port_range_ipv6_tcp + test_port_range_ipv4_udp_drop " NUM_NETIFS=4 @@ -194,6 +195,51 @@ test_port_range_ipv6_tcp() __test_port_range $proto $ip_proto $sip $dip $mode "$name" } +test_port_range_ipv4_udp_drop() +{ + local proto=ipv4 + local ip_proto=udp + local sip=192.0.2.1 + local dip=192.0.2.2 + local mode="-4" + local name="IPv4 UDP Drop" + local dmac=$(mac_get $h2) + local smac=$(mac_get $h1) + local sport_min=2000 + local sport_max=3000 + local sport_mid=$((sport_min + (sport_max - sport_min) / 2)) + local dport=5000 + + RET=0 + + tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \ + flower src_ip $sip dst_ip $dip ip_proto $ip_proto \ + src_port $sport_min-$sport_max \ + dst_port $dport \ + action drop + + # Test ports outside range - should pass + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_min - 1)),dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_max + 1)),dp=$dport" + + # Test ports inside range - should be dropped + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_mid,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_max,dp=$dport" + + tc_check_packets "dev $swp1 ingress" 101 3 + check_err $? "Filter did not drop the expected number of packets" + + tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower + + log_test "Port range matching - $name" +} + setup_prepare() { h1=${NETIFS[p1]} -- GitLab From 69ab34f705fbfabcace64b5d53bb7a4450fac875 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 17 Feb 2025 20:32:09 -0800 Subject: [PATCH 0879/1585] flow_dissector: Fix port range key handling in BPF conversion Fix how port range keys are handled in __skb_flow_bpf_to_target() by: - Separating PORTS and PORTS_RANGE key handling - Using correct key_ports_range structure for range keys - Properly initializing both key types independently This ensures port range information is correctly stored in its dedicated structure rather than incorrectly using the regular ports key structure. Fixes: 59fb9b62fb6c ("flow_dissector: Fix to use new variables for port ranges in bpf hook") Reported-by: Qiang Zhang Closes: https://lore.kernel.org/netdev/CAPx+-5uvFxkhkz4=j_Xuwkezjn9U6kzKTD5jz4tZ9msSJ0fOJA@mail.gmail.com/ Cc: Yoshiki Komachi Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250218043210.732959-4-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/core/flow_dissector.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c33af3ef0b790..9cd8de6bebb54 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -931,6 +931,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector *flow_dissector, void *target_container) { + struct flow_dissector_key_ports_range *key_ports_range = NULL; struct flow_dissector_key_ports *key_ports = NULL; struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; @@ -975,20 +976,21 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); - else if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS_RANGE)) - key_ports = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS_RANGE, - target_container); - - if (key_ports) { key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE)) { + key_ports_range = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); + key_ports_range->tp.src = flow_keys->sport; + key_ports_range->tp.dst = flow_keys->dport; + } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL)) { -- GitLab From 15de6ba95dbe98af7eb71e644205a37c2f1a9aea Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 17 Feb 2025 20:32:10 -0800 Subject: [PATCH 0880/1585] selftests/bpf: Add a specific dst port matching After this patch: #102/1 flow_dissector_classification/ipv4:OK #102/2 flow_dissector_classification/ipv4_continue_dissect:OK #102/3 flow_dissector_classification/ipip:OK #102/4 flow_dissector_classification/gre:OK #102/5 flow_dissector_classification/port_range:OK #102/6 flow_dissector_classification/ipv6:OK #102 flow_dissector_classification:OK Summary: 1/6 PASSED, 0 SKIPPED, 0 FAILED Cc: Daniel Borkmann Cc: Andrii Nakryiko Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250218043210.732959-5-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../bpf/prog_tests/flow_dissector_classification.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c index 3729fbfd30846..80b153d3ddecf 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c @@ -542,8 +542,12 @@ static void detach_program(struct bpf_flow *skel, int prog_fd) static int set_port_drop(int pf, bool multi_port) { + char dst_port[16]; + + snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER); + SYS(fail, "tc qdisc add dev lo ingress"); - SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s", + SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s", "dev lo", "parent FFFF:", "protocol", pf == PF_INET6 ? "ipv6" : "ip", @@ -551,6 +555,7 @@ static int set_port_drop(int pf, bool multi_port) "flower", "ip_proto udp", "src_port", multi_port ? "8-10" : "9", + "dst_port", dst_port, "action drop"); return 0; -- GitLab From 606572eb22c1786a3957d24307f5760bb058ca19 Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Tue, 18 Feb 2025 16:12:16 +0800 Subject: [PATCH 0881/1585] sctp: Fix undefined behavior in left shift operation According to the C11 standard (ISO/IEC 9899:2011, 6.5.7): "If E1 has a signed type and E1 x 2^E2 is not representable in the result type, the behavior is undefined." Shifting 1 << 31 causes signed integer overflow, which leads to undefined behavior. Fix this by explicitly using '1U << 31' to ensure the shift operates on an unsigned type, avoiding undefined behavior. Signed-off-by: Yu-Chun Lin Link: https://patch.msgid.link/20250218081217.3468369-1-eleanor15x@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index c241cc552e8d5..bfcff6d6a4386 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -735,7 +735,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( * value SHOULD be the smallest TSN not acknowledged by the * receiver of the request plus 2^31. */ - init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31); + init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1U << 31); sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, init_tsn, GFP_ATOMIC); -- GitLab From 4b5a28b38c4a0106c64416a1b2042405166b26ce Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 18 Feb 2025 05:49:30 -0800 Subject: [PATCH 0882/1585] net: Add non-RCU dev_getbyhwaddr() helper Add dedicated helper for finding devices by hardware address when holding rtnl_lock, similar to existing dev_getbyhwaddr_rcu(). This prevents PROVE_LOCKING warnings when rtnl_lock is held but RCU read lock is not. Extract common address comparison logic into dev_addr_cmp(). The context about this change could be found in the following discussion: Link: https://lore.kernel.org/all/20250206-scarlet-ermine-of-improvement-1fcac5@leitao/ Cc: kuniyu@amazon.com Cc: ushankar@purestorage.com Suggested-by: Eric Dumazet Signed-off-by: Breno Leitao Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250218-arm_fix_selftest-v5-1-d3d6892db9e1@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 37 ++++++++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c0a86afb85daa..94b7d4eca0030 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3275,6 +3275,8 @@ static inline struct net_device *first_net_device_rcu(struct net *net) } int netdev_boot_setup_check(struct net_device *dev); +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, + const char *hwaddr); struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); diff --git a/net/core/dev.c b/net/core/dev.c index fafd2f4b5d5d7..72459dd02f384 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1121,6 +1121,12 @@ int netdev_get_name(struct net *net, char *name, int ifindex) return ret; } +static bool dev_addr_cmp(struct net_device *dev, unsigned short type, + const char *ha) +{ + return dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len); +} + /** * dev_getbyhwaddr_rcu - find a device by its hardware address * @net: the applicable net namespace @@ -1129,7 +1135,7 @@ int netdev_get_name(struct net *net, char *name, int ifindex) * * Search for an interface by MAC address. Returns NULL if the device * is not found or a pointer to the device. - * The caller must hold RCU or RTNL. + * The caller must hold RCU. * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * @@ -1141,14 +1147,39 @@ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, struct net_device *dev; for_each_netdev_rcu(net, dev) - if (dev->type == type && - !memcmp(dev->dev_addr, ha, dev->addr_len)) + if (dev_addr_cmp(dev, type, ha)) return dev; return NULL; } EXPORT_SYMBOL(dev_getbyhwaddr_rcu); +/** + * dev_getbyhwaddr() - find a device by its hardware address + * @net: the applicable net namespace + * @type: media type of device + * @ha: hardware address + * + * Similar to dev_getbyhwaddr_rcu(), but the owner needs to hold + * rtnl_lock. + * + * Context: rtnl_lock() must be held. + * Return: pointer to the net_device, or NULL if not found + */ +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, + const char *ha) +{ + struct net_device *dev; + + ASSERT_RTNL(); + for_each_netdev(net, dev) + if (dev_addr_cmp(dev, type, ha)) + return dev; + + return NULL; +} +EXPORT_SYMBOL(dev_getbyhwaddr); + struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev, *ret = NULL; -- GitLab From 4eae0ee0f1e6256d0b0b9dd6e72f1d9cf8f72e08 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 18 Feb 2025 05:49:31 -0800 Subject: [PATCH 0883/1585] arp: switch to dev_getbyhwaddr() in arp_req_set_public() The arp_req_set_public() function is called with the rtnl lock held, which provides enough synchronization protection. This makes the RCU variant of dev_getbyhwaddr() unnecessary. Switch to using the simpler dev_getbyhwaddr() function since we already have the required rtnl locking. This change helps maintain consistency in the networking code by using the appropriate helper function for the existing locking context. Since we're not holding the RCU read lock in arp_req_set_public() existing code could trigger false positive locking warnings. Fixes: 941666c2e3e0 ("net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()") Suggested-by: Kuniyuki Iwashima Reviewed-by: Kuniyuki Iwashima Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250218-arm_fix_selftest-v5-2-d3d6892db9e1@debian.org Signed-off-by: Jakub Kicinski --- net/ipv4/arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f23a1ec6694cb..814300eee39de 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1077,7 +1077,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family, + dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; -- GitLab From 3d8c6f26893d55fab218ad086719de1fc9bb86ba Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 10 Feb 2025 13:31:11 +0200 Subject: [PATCH 0884/1585] RDMA/mlx5: Fix implicit ODP hang on parent deregistration Fix the destroy_unused_implicit_child_mr() to prevent hanging during parent deregistration as of below [1]. Upon entering destroy_unused_implicit_child_mr(), the reference count for the implicit MR parent is incremented using: refcount_inc_not_zero(). A corresponding decrement must be performed if free_implicit_child_mr_work() is not called. The code has been updated to properly manage the reference count that was incremented. [1] INFO: task python3:2157 blocked for more than 120 seconds. Not tainted 6.12.0-rc7+ #1633 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:python3 state:D stack:0 pid:2157 tgid:2157 ppid:1685 flags:0x00000000 Call Trace: __schedule+0x420/0xd30 schedule+0x47/0x130 __mlx5_ib_dereg_mr+0x379/0x5d0 [mlx5_ib] ? __pfx_autoremove_wake_function+0x10/0x10 ib_dereg_mr_user+0x5f/0x120 [ib_core] ? lock_release+0xc6/0x280 destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs] uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs] uobj_destroy+0x3f/0x70 [ib_uverbs] ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs] ? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs] ? lock_acquire+0xc1/0x2f0 ? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] ? ib_uverbs_ioctl+0x116/0x170 [ib_uverbs] ? lock_release+0xc6/0x280 ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs] ? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] __x64_sys_ioctl+0x1b0/0xa70 ? kmem_cache_free+0x221/0x400 do_syscall_64+0x6b/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f20f21f017b RSP: 002b:00007ffcfc4a77c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffcfc4a78d8 RCX: 00007f20f21f017b RDX: 00007ffcfc4a78c0 RSI: 00000000c0181b01 RDI: 0000000000000003 RBP: 00007ffcfc4a78a0 R08: 000056147d125190 R09: 00007f20f1f14c60 R10: 0000000000000001 R11: 0000000000000246 R12: 00007ffcfc4a7890 R13: 000000000000001c R14: 000056147d100fc0 R15: 00007f20e365c9d0 Fixes: d3d930411ce3 ("RDMA/mlx5: Fix implicit ODP use after free") Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Link: https://patch.msgid.link/80f2fcd19952dfa7d9981d93fd6359b4471f8278.1739186929.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/odp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index f1e23583e6c08..e77c9280c07e4 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -242,6 +242,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) != mr) { xa_unlock(&imr->implicit_children); + mlx5r_deref_odp_mkey(&imr->mmkey); return; } -- GitLab From c534ffda781f44a1c6ac25ef6e0e444da38ca8af Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 10 Feb 2025 13:32:39 +0200 Subject: [PATCH 0885/1585] RDMA/mlx5: Fix AH static rate parsing Previously static rate wasn't translated according to our PRM but simply used the 4 lower bytes. Correctly translate static rate value passed in AH creation attribute according to our PRM expected values. In addition change 800GB mapping to zero, which is the PRM specified value. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Patrisious Haddad Reviewed-by: Maor Gottlieb Link: https://patch.msgid.link/18ef4cc5396caf80728341eb74738cd777596f60.1739187089.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/ah.c | 3 ++- drivers/infiniband/hw/mlx5/qp.c | 6 +++--- drivers/infiniband/hw/mlx5/qp.h | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 505bc47fd575d..99036afb3aef0 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -67,7 +67,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.tclass = grh->traffic_class; } - ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); + ah->av.stat_rate_sl = + (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (init_attr->xmit_slave) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 08d22db8dca91..88724d15705d4 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3447,11 +3447,11 @@ static int ib_to_mlx5_rate_map(u8 rate) return 0; } -static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) +int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate) { u32 stat_rate_support; - if (rate == IB_RATE_PORT_CURRENT) + if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS) return 0; if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS) @@ -3596,7 +3596,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, sizeof(grh->dgid.raw)); } - err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah)); + err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah)); if (err < 0) return err; MLX5_SET(ads, path, stat_rate, err); diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index b6ee7c3ee1ca1..2530e7730635f 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -56,4 +56,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); int mlx5_ib_qp_event_init(void); void mlx5_ib_qp_event_cleanup(void); +int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate); #endif /* _MLX5_IB_QP_H */ -- GitLab From a370295367b55662a32a4be92565fe72a5aa79bb Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Mon, 17 Feb 2025 13:58:42 +0800 Subject: [PATCH 0886/1585] net: axienet: Set mac_managed_pm The external PHY will undergo a soft reset twice during the resume process when it wake up from suspend. The first reset occurs when the axienet driver calls phylink_of_phy_connect(), and the second occurs when mdio_bus_phy_resume() invokes phy_init_hw(). The second soft reset of the external PHY does not reinitialize the internal PHY, which causes issues with the internal PHY, resulting in the PHY link being down. To prevent this, setting the mac_managed_pm flag skips the mdio_bus_phy_resume() function. Fixes: a129b41fe0a8 ("Revert "net: phy: dp83867: perform soft reset and retain established link"") Signed-off-by: Nick Hu Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250217055843.19799-1-nick.hu@sifive.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 9e7fa012e4fad..f33178f90c42e 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2897,6 +2897,7 @@ static int axienet_probe(struct platform_device *pdev) lp->phylink_config.dev = &ndev->dev; lp->phylink_config.type = PHYLINK_NETDEV; + lp->phylink_config.mac_managed_pm = true; lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | MAC_10FD | MAC_100FD | MAC_1000FD; -- GitLab From 9b6412e6979f6f9e0632075f8f008937b5cd4efd Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 17 Feb 2025 11:23:35 +0100 Subject: [PATCH 0887/1585] tcp: drop secpath at the same time as we currently drop dst Xiumei reported hitting the WARN in xfrm6_tunnel_net_exit while running tests that boil down to: - create a pair of netns - run a basic TCP test over ipcomp6 - delete the pair of netns The xfrm_state found on spi_byaddr was not deleted at the time we delete the netns, because we still have a reference on it. This lingering reference comes from a secpath (which holds a ref on the xfrm_state), which is still attached to an skb. This skb is not leaked, it ends up on sk_receive_queue and then gets defer-free'd by skb_attempt_defer_free. The problem happens when we defer freeing an skb (push it on one CPU's defer_list), and don't flush that list before the netns is deleted. In that case, we still have a reference on the xfrm_state that we don't expect at this point. We already drop the skb's dst in the TCP receive path when it's no longer needed, so let's also drop the secpath. At this point, tcp_filter has already called into the LSM hooks that may require the secpath, so it should not be needed anymore. However, in some of those places, the MPTCP extension has just been attached to the skb, so we cannot simply drop all extensions. Fixes: 68822bdf76f1 ("net: generalize skb freeing deferral to per-cpu lists") Reported-by: Xiumei Mu Signed-off-by: Sabrina Dubroca Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/5055ba8f8f72bdcb602faa299faca73c280b7735.1739743613.git.sd@queasysnail.net Signed-off-by: Paolo Abeni --- include/net/tcp.h | 14 ++++++++++++++ net/ipv4/tcp_fastopen.c | 4 ++-- net/ipv4/tcp_input.c | 8 ++++---- net/ipv4/tcp_ipv4.c | 2 +- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b2b04835688f..930cda5b5eb98 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -683,6 +684,19 @@ void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); void tcp_sack_compress_send_ack(struct sock *sk); +static inline void tcp_cleanup_skb(struct sk_buff *skb) +{ + skb_dst_drop(skb); + secpath_reset(skb); +} + +static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb) +{ + DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); + DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb)); + __skb_queue_tail(&sk->sk_receive_queue, skb); +} + /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 0f523cbfe329e..32b28fc21b63c 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -178,7 +178,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) if (!skb) return; - skb_dst_drop(skb); + tcp_cleanup_skb(skb); /* segs_in has been initialized to 1 in tcp_create_openreq_child(). * Hence, reset segs_in to 0 before calling tcp_segs_in() * to avoid double counting. Also, tcp_segs_in() expects @@ -195,7 +195,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); tp->syn_data_acked = 1; /* u64_stats_update_begin(&tp->syncp) not needed here, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 98b8cc7403920..0cbf81bf3d451 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4976,7 +4976,7 @@ static void tcp_ofo_queue(struct sock *sk) tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); else kfree_skb_partial(skb, fragstolen); @@ -5168,7 +5168,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, skb, fragstolen)) ? 1 : 0; tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); skb_set_owner_r(skb, sk); } return eaten; @@ -5251,7 +5251,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); return; } - skb_dst_drop(skb); + tcp_cleanup_skb(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); reason = SKB_DROP_REASON_NOT_SPECIFIED; @@ -6232,7 +6232,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ - skb_dst_drop(skb); + tcp_cleanup_skb(skb); __skb_pull(skb, tcp_header_len); eaten = tcp_queue_rcv(sk, skb, &fragstolen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cc2b5194a18d2..2632844d2c356 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2027,7 +2027,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, */ skb_condense(skb); - skb_dst_drop(skb); + tcp_cleanup_skb(skb); if (unlikely(tcp_checksum_complete(skb))) { bh_unlock_sock(sk); -- GitLab From 878e7b11736e062514e58f3b445ff343e6705537 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Tue, 18 Feb 2025 11:04:09 +0800 Subject: [PATCH 0888/1585] nfp: bpf: Add check for nfp_app_ctrl_msg_alloc() Add check for the return value of nfp_app_ctrl_msg_alloc() in nfp_bpf_cmsg_alloc() to prevent null pointer dereference. Fixes: ff3d43f7568c ("nfp: bpf: implement helpers for FW map ops") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Link: https://patch.msgid.link/20250218030409.2425798-1-haoxiang_li2024@163.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/netronome/nfp/bpf/cmsg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index 2ec62c8d86e1c..59486fe2ad18c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -20,6 +20,8 @@ nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size) struct sk_buff *skb; skb = nfp_app_ctrl_msg_alloc(bpf->app, size, GFP_KERNEL); + if (!skb) + return NULL; skb_put(skb, size); return skb; -- GitLab From 4738d3d3e12d70a5067baba147daf57e57b77548 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 11 Feb 2025 20:50:13 +0200 Subject: [PATCH 0889/1585] intel_th: msu: Fix kernel-doc warnings Correct function comments to prevent kernel-doc warnings found when using "W=1". msu.c:162: warning: Function parameter or struct member 'mbuf_priv' not described in 'msc' msu.c:164: warning: Function parameter or struct member 'orig_addr' not described in 'msc' msu.c:164: warning: Function parameter or struct member 'orig_sz' not described in 'msc' Signed-off-by: Andy Shevchenko Signed-off-by: Alexander Shishkin Link: https://lore.kernel.org/r/20250211185017.1759193-2-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 66123d684ac9e..492d8eba37ebc 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -108,7 +108,7 @@ struct msc_iter { * @reg_base: register window base address * @thdev: intel_th_device pointer * @mbuf: MSU buffer, if assigned - * @mbuf_priv MSU buffer's private data, if @mbuf + * @mbuf_priv: MSU buffer's private data, if @mbuf * @win_list: list of windows in multiblock mode * @single_sgt: single mode buffer * @cur_win: current window @@ -117,6 +117,8 @@ struct msc_iter { * @single_wrap: single mode wrap occurred * @base: buffer's base pointer * @base_addr: buffer's base address + * @orig_addr: MSC0 buffer's base address + * @orig_sz: MSC0 buffer's size * @user_count: number of users of the buffer * @mmap_count: number of mappings * @buf_mutex: mutex to serialize access to buffer-related bits -- GitLab From 04190ec6d02aa8fee0f03189bb7762b44739c253 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 11 Feb 2025 20:50:14 +0200 Subject: [PATCH 0890/1585] intel_th: msu: Fix less trivial kernel-doc warnings Correct function comments to prevent kernel-doc warnings found when using "W=1" that the drive-by fixers had trouble documenting and skipped over. msu.c:168: warning: Function parameter or struct member 'msu_base' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'work' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'switch_on_unlock' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'iter_list' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'stop_on_full' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'do_irq' not described in 'msc' msu.c:168: warning: Function parameter or struct member 'multi_is_broken' not described in 'msc' Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250211185017.1759193-3-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 492d8eba37ebc..bf99d79a41920 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -105,13 +105,16 @@ struct msc_iter { /** * struct msc - MSC device representation - * @reg_base: register window base address + * @reg_base: register window base address for the entire MSU + * @msu_base: register window base address for this MSC * @thdev: intel_th_device pointer * @mbuf: MSU buffer, if assigned * @mbuf_priv: MSU buffer's private data, if @mbuf + * @work: a work to stop the trace when the buffer is full * @win_list: list of windows in multiblock mode * @single_sgt: single mode buffer * @cur_win: current window + * @switch_on_unlock: window to switch to when it becomes available * @nr_pages: total number of pages allocated for this buffer * @single_sz: amount of data in single mode * @single_wrap: single mode wrap occurred @@ -122,8 +125,12 @@ struct msc_iter { * @user_count: number of users of the buffer * @mmap_count: number of mappings * @buf_mutex: mutex to serialize access to buffer-related bits + * @iter_list: list of open file descriptor iterators + * @stop_on_full: stop the trace if the current window is full * @enabled: MSC is enabled * @wrap: wrapping is enabled + * @do_irq: IRQ resource is available, handle interrupts + * @multi_is_broken: multiblock mode enabled (not disabled by PCI drvdata) * @mode: MSC operating mode * @burst_len: write burst length * @index: number of this MSC in the MSU -- GitLab From b5edccae9f447a92d475267d94c33f4926963eec Mon Sep 17 00:00:00 2001 From: Pawel Chmielewski Date: Tue, 11 Feb 2025 20:50:15 +0200 Subject: [PATCH 0891/1585] intel_th: pci: Add Arrow Lake support Add support for the Trace Hub in Arrow Lake. Signed-off-by: Pawel Chmielewski Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@kernel.org Link: https://lore.kernel.org/r/20250211185017.1759193-4-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index e9d8d28e055f3..3e03ee788bb94 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -334,6 +334,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa824), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Arrow Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7724), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), -- GitLab From a70034d6c0d5f3cdee40bb00a578e17fd2ebe426 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 11 Feb 2025 20:50:16 +0200 Subject: [PATCH 0892/1585] intel_th: pci: Add Panther Lake-H support Add support for the Trace Hub in Panther Lake-H. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@kernel.org Link: https://lore.kernel.org/r/20250211185017.1759193-5-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 3e03ee788bb94..004e68286fd43 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -339,6 +339,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7724), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Panther Lake-H */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe324), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), -- GitLab From 49114ff05770264ae233f50023fc64a719a9dcf9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 11 Feb 2025 20:50:17 +0200 Subject: [PATCH 0893/1585] intel_th: pci: Add Panther Lake-P/U support Add support for the Trace Hub in Panther Lake-P/U. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@kernel.org Link: https://lore.kernel.org/r/20250211185017.1759193-6-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 004e68286fd43..e3def163d5cf7 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -344,6 +344,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe324), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Panther Lake-P/U */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe424), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), -- GitLab From 14ad6ed30a10afbe91b0749d6378285f4225d482 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 18 Feb 2025 19:29:39 +0100 Subject: [PATCH 0894/1585] net: allow small head cache usage with large MAX_SKB_FRAGS values Sabrina reported the following splat: WARNING: CPU: 0 PID: 1 at net/core/dev.c:6935 netif_napi_add_weight_locked+0x8f2/0xba0 Modules linked in: CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc1-net-00092-g011b03359038 #996 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 RIP: 0010:netif_napi_add_weight_locked+0x8f2/0xba0 Code: e8 c3 e6 6a fe 48 83 c4 28 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc c7 44 24 10 ff ff ff ff e9 8f fb ff ff e8 9e e6 6a fe <0f> 0b e9 d3 fe ff ff e8 92 e6 6a fe 48 8b 04 24 be ff ff ff ff 48 RSP: 0000:ffffc9000001fc60 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff88806ce48128 RCX: 1ffff11001664b9e RDX: ffff888008f00040 RSI: ffffffff8317ca42 RDI: ffff88800b325cb6 RBP: ffff88800b325c40 R08: 0000000000000001 R09: ffffed100167502c R10: ffff88800b3a8163 R11: 0000000000000000 R12: ffff88800ac1c168 R13: ffff88800ac1c168 R14: ffff88800ac1c168 R15: 0000000000000007 FS: 0000000000000000(0000) GS:ffff88806ce00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff888008201000 CR3: 0000000004c94001 CR4: 0000000000370ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: gro_cells_init+0x1ba/0x270 xfrm_input_init+0x4b/0x2a0 xfrm_init+0x38/0x50 ip_rt_init+0x2d7/0x350 ip_init+0xf/0x20 inet_init+0x406/0x590 do_one_initcall+0x9d/0x2e0 do_initcalls+0x23b/0x280 kernel_init_freeable+0x445/0x490 kernel_init+0x20/0x1d0 ret_from_fork+0x46/0x80 ret_from_fork_asm+0x1a/0x30 irq event stamp: 584330 hardirqs last enabled at (584338): [] __up_console_sem+0x77/0xb0 hardirqs last disabled at (584345): [] __up_console_sem+0x5c/0xb0 softirqs last enabled at (583242): [] netlink_insert+0x14d/0x470 softirqs last disabled at (583754): [] netif_napi_add_weight_locked+0x77d/0xba0 on kernel built with MAX_SKB_FRAGS=45, where SKB_WITH_OVERHEAD(1024) is smaller than GRO_MAX_HEAD. Such built additionally contains the revert of the single page frag cache so that napi_get_frags() ends up using the page frag allocator, triggering the splat. Note that the underlying issue is independent from the mentioned revert; address it ensuring that the small head cache will fit either TCP and GRO allocation and updating napi_alloc_skb() and __netdev_alloc_skb() to select kmalloc() usage for any allocation fitting such cache. Reported-by: Sabrina Dubroca Suggested-by: Eric Dumazet Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/gro.h | 3 +++ net/core/gro.c | 3 --- net/core/skbuff.c | 10 +++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/net/gro.h b/include/net/gro.h index b9b58c1f8d190..7b548f91754bf 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -11,6 +11,9 @@ #include #include +/* This should be increased if a protocol with a bigger head is added. */ +#define GRO_MAX_HEAD (MAX_HEADER + 128) + struct napi_gro_cb { union { struct { diff --git a/net/core/gro.c b/net/core/gro.c index d1f44084e978f..78b320b631744 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -7,9 +7,6 @@ #define MAX_GRO_SKBS 8 -/* This should be increased if a protocol with a bigger head is added. */ -#define GRO_MAX_HEAD (MAX_HEADER + 128) - static DEFINE_SPINLOCK(offload_lock); /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a441613a1e6c1..f5a6d50570c4f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -95,7 +96,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif -#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER) +#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN) +#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \ + GRO_MAX_HEAD_PAD)) /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique @@ -736,7 +739,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ - if (len <= SKB_WITH_OVERHEAD(1024) || + if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); @@ -816,7 +819,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) * When the small frag allocator is available, prefer it over kmalloc * for small fragments */ - if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) || + if ((!NAPI_HAS_SMALL_PAGE_FRAG && + len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, -- GitLab From 6bc7e4eb0499562ccd291712fd7be0d1a5aad00a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 18 Feb 2025 19:29:40 +0100 Subject: [PATCH 0895/1585] Revert "net: skb: introduce and use a single page frag cache" After the previous commit is finally safe to revert commit dbae2b062824 ("net: skb: introduce and use a single page frag cache"): do it here. The intended goal of such change was to counter a performance regression introduced by commit 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs"). Unfortunately, the blamed commit introduces another regression for the virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny size, so that the whole head frag could fit a 512-byte block. The single page frag cache uses a 1K fragment for such allocation, and the additional overhead, under small UDP packets flood, makes the page allocator a bottleneck. Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for typical/small skb->head"), this revert does not re-introduce the original regression. Actually, in the relevant test on top of this revert, I measure a small but noticeable positive delta, just above noise level. The revert itself required some additional mangling due to recent updates in the affected code. Suggested-by: Eric Dumazet Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache") Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 1 - net/core/dev.c | 17 +++++++ net/core/skbuff.c | 104 ++------------------------------------ 3 files changed, 22 insertions(+), 100 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94b7d4eca0030..ab550a89b9bfa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4117,7 +4117,6 @@ void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); -void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) diff --git a/net/core/dev.c b/net/core/dev.c index 72459dd02f384..1b252e9459fdb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6991,6 +6991,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) list_add_rcu(&napi->dev_list, higher); /* adds after higher */ } +/* Double check that napi_get_frags() allocates skbs with + * skb->head being backed by slab, not a page fragment. + * This is to make sure bug fixed in 3226b158e67c + * ("net: avoid 32 x truesize under-estimation for tiny skbs") + * does not accidentally come back. + */ +static void napi_get_frags_check(struct napi_struct *napi) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = napi_get_frags(napi); + WARN_ON_ONCE(skb && skb->head_frag); + napi_free_frags(napi); + local_bh_enable(); +} + void netif_napi_add_weight_locked(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f5a6d50570c4f..7b03b64fdcb27 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -223,67 +223,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) #define NAPI_SKB_CACHE_BULK 16 #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2) -#if PAGE_SIZE == SZ_4K - -#define NAPI_HAS_SMALL_PAGE_FRAG 1 -#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc) - -/* specialized page frag allocator using a single order 0 page - * and slicing it into 1K sized fragment. Constrained to systems - * with a very limited amount of 1K fragments fitting a single - * page - to avoid excessive truesize underestimation - */ - -struct page_frag_1k { - void *va; - u16 offset; - bool pfmemalloc; -}; - -static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp) -{ - struct page *page; - int offset; - - offset = nc->offset - SZ_1K; - if (likely(offset >= 0)) - goto use_frag; - - page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - if (!page) - return NULL; - - nc->va = page_address(page); - nc->pfmemalloc = page_is_pfmemalloc(page); - offset = PAGE_SIZE - SZ_1K; - page_ref_add(page, offset / SZ_1K); - -use_frag: - nc->offset = offset; - return nc->va + offset; -} -#else - -/* the small page is actually unused in this build; add dummy helpers - * to please the compiler and avoid later preprocessor's conditionals - */ -#define NAPI_HAS_SMALL_PAGE_FRAG 0 -#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false - -struct page_frag_1k { -}; - -static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask) -{ - return NULL; -} - -#endif - struct napi_alloc_cache { local_lock_t bh_lock; struct page_frag_cache page; - struct page_frag_1k page_small; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; @@ -293,23 +235,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; -/* Double check that napi_get_frags() allocates skbs with - * skb->head being backed by slab, not a page fragment. - * This is to make sure bug fixed in 3226b158e67c - * ("net: avoid 32 x truesize under-estimation for tiny skbs") - * does not accidentally come back. - */ -void napi_get_frags_check(struct napi_struct *napi) -{ - struct sk_buff *skb; - - local_bh_disable(); - skb = napi_get_frags(napi); - WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag); - napi_free_frags(napi); - local_bh_enable(); -} - void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -816,11 +741,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. - * When the small frag allocator is available, prefer it over kmalloc - * for small fragments */ - if ((!NAPI_HAS_SMALL_PAGE_FRAG && - len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)) || + if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, @@ -830,32 +752,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) goto skb_success; } + len = SKB_HEAD_ALIGN(len); + if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache); - if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) { - /* we are artificially inflating the allocation size, but - * that is not as bad as it may look like, as: - * - 'len' less than GRO_MAX_HEAD makes little sense - * - On most systems, larger 'len' values lead to fragment - * size above 512 bytes - * - kmalloc would use the kmalloc-1k slab for such values - * - Builds with smaller GRO_MAX_HEAD will very likely do - * little networking, as that implies no WiFi and no - * tunnels support, and 32 bits arches. - */ - len = SZ_1K; - data = page_frag_alloc_1k(&nc->page_small, gfp_mask); - pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small); - } else { - len = SKB_HEAD_ALIGN(len); - - data = page_frag_alloc(&nc->page, len, gfp_mask); - pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); - } + data = page_frag_alloc(&nc->page, len, gfp_mask); + pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!data)) -- GitLab From 78eb41f518f414378643ab022241df2a9dcd008b Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 13 Feb 2025 15:05:13 +0100 Subject: [PATCH 0896/1585] drivers: core: fix device leak in __fw_devlink_relax_cycles() Commit bac3b10b78e5 ("driver core: fw_devlink: Stop trying to optimize cycle detection logic") introduced a new struct device *con_dev and a get_dev_from_fwnode() call to get it, but without adding a corresponding put_device(). Closes: https://lore.kernel.org/all/20241204124826.2e055091@booty/ Fixes: bac3b10b78e5 ("driver core: fw_devlink: Stop trying to optimize cycle detection logic") Cc: stable@vger.kernel.org Reviewed-by: Saravana Kannan Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20250213-fix__fw_devlink_relax_cycles_missing_device_put-v2-1-8cd3b03e6a3f@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 5a1f051981149..2fde698430dff 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2079,6 +2079,7 @@ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, out: sup_handle->flags &= ~FWNODE_FLAG_VISITED; put_device(sup_dev); + put_device(con_dev); put_device(par_dev); return ret; } -- GitLab From c783e1258f29c5caac9eea0aea6b172870f1baf8 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 20 Feb 2025 13:03:14 +0100 Subject: [PATCH 0897/1585] usb: gadget: Fix setting self-powered state on suspend cdev->config might be NULL, so check it before dereferencing. CC: stable Fixes: 40e89ff5750f ("usb: gadget: Set self-powered based on MaxPower and bmAttributes") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250220120314.3614330-1-m.szyprowski@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 1fb28bbf6c458..4bcf73bae7610 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2616,7 +2616,8 @@ void composite_suspend(struct usb_gadget *gadget) cdev->suspended = 1; - if (cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER) + if (cdev->config && + cdev->config->bmAttributes & USB_CONFIG_ATT_SELFPOWER) usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, 2); -- GitLab From 96fa9ec477ff60bed87e1441fd43e003179f3253 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 19 Feb 2025 15:43:56 +0100 Subject: [PATCH 0898/1585] gpiolib: don't bail out if get_direction() fails in gpiochip_add_data() Since commit 9d846b1aebbe ("gpiolib: check the return value of gpio_chip::get_direction()") we check the return value of the get_direction() callback as per its API contract. Some drivers have been observed to fail to register now as they may call get_direction() in gpiochip_add_data() in contexts where it has always silently failed. Until we audit all drivers, replace the bail-out to a kernel log warning. Fixes: 9d846b1aebbe ("gpiolib: check the return value of gpio_chip::get_direction()") Reported-by: Mark Brown Closes: https://lore.kernel.org/all/Z7VFB1nST6lbmBIo@finisterre.sirena.org.uk/ Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/all/dfe03f88-407e-4ef1-ad30-42db53bbd4e4@samsung.com/ Tested-by: Mark Brown Reviewed-by: Mark Brown Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250219144356.258635-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 5529d8b65f6fb..fc19df5a64c2b 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1059,7 +1059,15 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) { ret = gc->get_direction(gc, desc_index); if (ret < 0) - goto err_cleanup_desc_srcu; + /* + * FIXME: Bail-out here once all GPIO drivers + * are updated to not return errors in + * situations that can be considered normal + * operation. + */ + dev_warn(&gdev->dev, + "%s: get_direction failed: %d\n", + __func__, ret); assign_bit(FLAG_IS_OUT, &desc->flags, !ret); } else { -- GitLab From 91d44c1afc61a2fec37a9c7a3485368309391e0b Mon Sep 17 00:00:00 2001 From: Qiu-ji Chen Date: Sat, 18 Jan 2025 15:08:33 +0800 Subject: [PATCH 0899/1585] cdx: Fix possible UAF error in driver_override_show() Fixed a possible UAF problem in driver_override_show() in drivers/cdx/cdx.c This function driver_override_show() is part of DEVICE_ATTR_RW, which includes both driver_override_show() and driver_override_store(). These functions can be executed concurrently in sysfs. The driver_override_store() function uses driver_set_override() to update the driver_override value, and driver_set_override() internally locks the device (device_lock(dev)). If driver_override_show() reads cdx_dev->driver_override without locking, it could potentially access a freed pointer if driver_override_store() frees the string concurrently. This could lead to printing a kernel address, which is a security risk since DEVICE_ATTR can be read by all users. Additionally, a similar pattern is used in drivers/amba/bus.c, as well as many other bus drivers, where device_lock() is taken in the show function, and it has been working without issues. This potential bug was detected by our experimental static analysis tool, which analyzes locking APIs and paired functions to identify data races and atomicity violations. Fixes: 1f86a00c1159 ("bus/fsl-mc: add support for 'driver_override' in the mc-bus") Cc: stable Signed-off-by: Qiu-ji Chen Link: https://lore.kernel.org/r/20250118070833.27201-1-chenqiuji666@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/cdx/cdx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cdx/cdx.c b/drivers/cdx/cdx.c index c573ed2ee71a8..7811aa7340537 100644 --- a/drivers/cdx/cdx.c +++ b/drivers/cdx/cdx.c @@ -473,8 +473,12 @@ static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cdx_device *cdx_dev = to_cdx_device(dev); + ssize_t len; - return sysfs_emit(buf, "%s\n", cdx_dev->driver_override); + device_lock(dev); + len = sysfs_emit(buf, "%s\n", cdx_dev->driver_override); + device_unlock(dev); + return len; } static DEVICE_ATTR_RW(driver_override); -- GitLab From c99e1e1d0850ff157f1bc16871acd2dff5a9bcc3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Jan 2025 07:54:33 +0100 Subject: [PATCH 0900/1585] vbox: add HAS_IOPORT dependency The vboxguest driver depends on port I/O for debug output: include/asm-generic/io.h:626:15: error: call to '_outl' declared with attribute error: outl() requires CONFIG_HAS_IOPORT 626 | #define _outl _outl include/asm-generic/io.h:663:14: note: in expansion of macro '_outl' 663 | #define outl _outl | ^~~~~ drivers/virt/vboxguest/vboxguest_utils.c:102:9: note: in expansion of macro 'outl' 102 | outl(phys_req, gdev->io_port + VMMDEV_PORT_OFF_REQUEST); | ^~~~ Most arm64 platforms don't actually support port I/O, though it is currently enabled unconditionally. Refine the vbox dependency to allow turning HAS_IOPORT off in the future when building for platforms without port I/O and allow compile-testing on all architectures. Fixes: 5cf8f938bf5c ("vbox: Enable VBOXGUEST and VBOXSF_FS on ARM64") Signed-off-by: Arnd Bergmann Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250122065445.1469218-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/virt/vboxguest/Kconfig b/drivers/virt/vboxguest/Kconfig index 11b153e7454e4..eaba28c95e733 100644 --- a/drivers/virt/vboxguest/Kconfig +++ b/drivers/virt/vboxguest/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config VBOXGUEST tristate "Virtual Box Guest integration support" - depends on (ARM64 || X86) && PCI && INPUT + depends on (ARM64 || X86 || COMPILE_TEST) && PCI && INPUT + depends on HAS_IOPORT help This is a driver for the Virtual Box Guest PCI device used in Virtual Box virtual machines. Enabling this driver will add -- GitLab From dcb0d43ba8eb9517e70b1a0e4b0ae0ab657a0e5a Mon Sep 17 00:00:00 2001 From: Visweswara Tanuku Date: Fri, 24 Jan 2025 04:57:40 -0800 Subject: [PATCH 0901/1585] slimbus: messaging: Free transaction ID in delayed interrupt scenario In case of interrupt delay for any reason, slim_do_transfer() returns timeout error but the transaction ID (TID) is not freed. This results into invalid memory access inside qcom_slim_ngd_rx_msgq_cb() due to invalid TID. Fix the issue by freeing the TID in slim_do_transfer() before returning timeout error to avoid invalid memory access. Call trace: __memcpy_fromio+0x20/0x190 qcom_slim_ngd_rx_msgq_cb+0x130/0x290 [slim_qcom_ngd_ctrl] vchan_complete+0x2a0/0x4a0 tasklet_action_common+0x274/0x700 tasklet_action+0x28/0x3c _stext+0x188/0x620 run_ksoftirqd+0x34/0x74 smpboot_thread_fn+0x1d8/0x464 kthread+0x178/0x238 ret_from_fork+0x10/0x20 Code: aa0003e8 91000429 f100044a 3940002b (3800150b) ---[ end trace 0fe00bec2b975c99 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt. Fixes: afbdcc7c384b ("slimbus: Add messaging APIs to slimbus framework") Cc: stable Signed-off-by: Visweswara Tanuku Link: https://lore.kernel.org/r/20250124125740.16897-1-quic_vtanuku@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/messaging.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index e7aa9bd4b44b8..6f01d944f9c65 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -148,8 +148,9 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn) } ret = ctrl->xfer_msg(ctrl, txn); - - if (!ret && need_tid && !txn->msg->comp) { + if (ret == -ETIMEDOUT) { + slim_free_txn_tid(ctrl, txn); + } else if (!ret && need_tid && !txn->msg->comp) { unsigned long ms = txn->rl + HZ; time_left = wait_for_completion_timeout(txn->comp, -- GitLab From e77aff5528a183462714f750e45add6cc71e276a Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Thu, 30 Jan 2025 21:58:22 +0000 Subject: [PATCH 0902/1585] binderfs: fix use-after-free in binder_devices Devices created through binderfs are added to the global binder_devices list but are not removed before being destroyed. This leads to dangling pointers in the list and subsequent use-after-free errors: ================================================================== BUG: KASAN: slab-use-after-free in binder_add_device+0x5c/0x9c Write of size 8 at addr ffff0000c258d708 by task mount/653 CPU: 7 UID: 0 PID: 653 Comm: mount Not tainted 6.13.0-09030-g6d61a53dd6f5 #1 Hardware name: linux,dummy-virt (DT) Call trace: binder_add_device+0x5c/0x9c binderfs_binder_device_create+0x690/0x84c [...] __arm64_sys_mount+0x324/0x3bc Allocated by task 632: binderfs_binder_device_create+0x168/0x84c binder_ctl_ioctl+0xfc/0x184 [...] __arm64_sys_ioctl+0x110/0x150 Freed by task 649: kfree+0xe0/0x338 binderfs_evict_inode+0x138/0x1dc [...] ================================================================== Remove devices from binder_devices before destroying them. Cc: Li Li Reported-by: syzbot+7015dcf45953112c8b45@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7015dcf45953112c8b45 Fixes: 12d909cac1e1 ("binderfs: add new binder devices to binder_devices") Signed-off-by: Carlos Llamas Tested-by: syzbot+7015dcf45953112c8b45@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20250130215823.1518990-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index bc6bae76ccaf1..94c6446604fc9 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -274,6 +274,7 @@ static void binderfs_evict_inode(struct inode *inode) mutex_unlock(&binderfs_minors_mutex); if (refcount_dec_and_test(&device->ref)) { + hlist_del_init(&device->hlist); kfree(device->context.name); kfree(device); } -- GitLab From 819cec1dc47cdeac8f5dd6ba81c1dbee2a68c3bb Mon Sep 17 00:00:00 2001 From: Haoyu Li Date: Thu, 30 Jan 2025 19:58:11 +0800 Subject: [PATCH 0903/1585] drivers: virt: acrn: hsm: Use kzalloc to avoid info leak in pmcmd_ioctl In the "pmcmd_ioctl" function, three memory objects allocated by kmalloc are initialized by "hcall_get_cpu_state", which are then copied to user space. The initializer is indeed implemented in "acrn_hypercall2" (arch/x86/include/asm/acrn.h). There is a risk of information leakage due to uninitialized bytes. Fixes: 3d679d5aec64 ("virt: acrn: Introduce interfaces to query C-states and P-states allowed by hypervisor") Signed-off-by: Haoyu Li Cc: stable Acked-by: Fei Li Link: https://lore.kernel.org/r/20250130115811.92424-1-lihaoyu499@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/acrn/hsm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c index c24036c4e51ec..e4e196abdaac9 100644 --- a/drivers/virt/acrn/hsm.c +++ b/drivers/virt/acrn/hsm.c @@ -49,7 +49,7 @@ static int pmcmd_ioctl(u64 cmd, void __user *uptr) switch (cmd & PMCMD_TYPE_MASK) { case ACRN_PMCMD_GET_PX_CNT: case ACRN_PMCMD_GET_CX_CNT: - pm_info = kmalloc(sizeof(u64), GFP_KERNEL); + pm_info = kzalloc(sizeof(u64), GFP_KERNEL); if (!pm_info) return -ENOMEM; @@ -64,7 +64,7 @@ static int pmcmd_ioctl(u64 cmd, void __user *uptr) kfree(pm_info); break; case ACRN_PMCMD_GET_PX_DATA: - px_data = kmalloc(sizeof(*px_data), GFP_KERNEL); + px_data = kzalloc(sizeof(*px_data), GFP_KERNEL); if (!px_data) return -ENOMEM; @@ -79,7 +79,7 @@ static int pmcmd_ioctl(u64 cmd, void __user *uptr) kfree(px_data); break; case ACRN_PMCMD_GET_CX_DATA: - cx_data = kmalloc(sizeof(*cx_data), GFP_KERNEL); + cx_data = kzalloc(sizeof(*cx_data), GFP_KERNEL); if (!cx_data) return -ENOMEM; -- GitLab From 038ef0754aae76f79b147b8867f9250e6a976872 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 7 Feb 2025 00:03:11 +0200 Subject: [PATCH 0904/1585] eeprom: digsy_mtc: Make GPIO lookup table match the device The dev_id value in the GPIO lookup table must match to the device instance name, which in this case is combined of name and platform device ID, i.e. "spi_gpio.1". But the table assumed that there was no platform device ID defined, which is wrong. Fix the dev_id value accordingly. Fixes: 9b00bc7b901f ("spi: spi-gpio: Rewrite to use GPIO descriptors") Cc: stable Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250206220311.1554075-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/digsy_mtc_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/digsy_mtc_eeprom.c b/drivers/misc/eeprom/digsy_mtc_eeprom.c index 88888485e6f8e..ee58f7ce5bfa9 100644 --- a/drivers/misc/eeprom/digsy_mtc_eeprom.c +++ b/drivers/misc/eeprom/digsy_mtc_eeprom.c @@ -50,7 +50,7 @@ static struct platform_device digsy_mtc_eeprom = { }; static struct gpiod_lookup_table eeprom_spi_gpiod_table = { - .dev_id = "spi_gpio", + .dev_id = "spi_gpio.1", .table = { GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_CLK, "sck", GPIO_ACTIVE_HIGH), -- GitLab From 6d991f569c5ef6eaeadf1238df2c36e3975233ad Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 23 Jan 2025 09:32:49 -0300 Subject: [PATCH 0905/1585] char: misc: deallocate static minor in error path When creating sysfs files fail, the allocated minor must be freed such that it can be later reused. That is specially harmful for static minor numbers, since those would always fail to register later on. Fixes: 6d04d2b554b1 ("misc: misc_minor_alloc to use ida for all dynamic/misc dynamic minors") Cc: stable Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20250123123249.4081674-5-cascardo@igalia.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 2cf595d2e10b8..f7dd455dd0dd3 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -264,8 +264,8 @@ int misc_register(struct miscdevice *misc) device_create_with_groups(&misc_class, misc->parent, dev, misc, misc->groups, "%s", misc->name); if (IS_ERR(misc->this_device)) { + misc_minor_free(misc->minor); if (is_dynamic) { - misc_minor_free(misc->minor); misc->minor = MISC_DYNAMIC_MINOR; } err = PTR_ERR(misc->this_device); -- GitLab From 32ce5d87d52213a50a513750f01a56f4d01f50cb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 14 Feb 2025 11:21:30 +0100 Subject: [PATCH 0906/1585] bus: simple-pm-bus: fix forced runtime PM use The simple-pm-bus driver only enables runtime PM for some buses ('simple-pm-bus') yet has started calling pm_runtime_force_suspend() and pm_runtime_force_resume() during system suspend unconditionally. This currently works, but that is not obvious and depends on implementation details which may change at some point. Add dedicated system sleep ops and only call pm_runtime_force_suspend() and pm_runtime_force_resume() for buses that use runtime PM to avoid any future surprises. Fixes: c45839309c3d ("drivers: bus: simple-pm-bus: Use clocks") Cc: Liu Ying Signed-off-by: Johan Hovold Reviewed-by: Ulf Hansson Acked-by: Liu Ying Acked-by: Rafael J. Wysocki Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250214102130.3000-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/simple-pm-bus.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index 5dea31769f9a8..d8e029e7e53f7 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c @@ -109,9 +109,29 @@ static int simple_pm_bus_runtime_resume(struct device *dev) return 0; } +static int simple_pm_bus_suspend(struct device *dev) +{ + struct simple_pm_bus *bus = dev_get_drvdata(dev); + + if (!bus) + return 0; + + return pm_runtime_force_suspend(dev); +} + +static int simple_pm_bus_resume(struct device *dev) +{ + struct simple_pm_bus *bus = dev_get_drvdata(dev); + + if (!bus) + return 0; + + return pm_runtime_force_resume(dev); +} + static const struct dev_pm_ops simple_pm_bus_pm_ops = { RUNTIME_PM_OPS(simple_pm_bus_runtime_suspend, simple_pm_bus_runtime_resume, NULL) - NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + NOIRQ_SYSTEM_SLEEP_PM_OPS(simple_pm_bus_suspend, simple_pm_bus_resume) }; #define ONLY_BUS ((void *) 1) /* Match if the device is only a bus. */ -- GitLab From b4c173dfbb6c78568578ff18f9e8822d7bd0e31b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Feb 2025 11:02:58 +0100 Subject: [PATCH 0907/1585] fuse: don't truncate cached, mutated symlink Fuse allows the value of a symlink to change and this property is exploited by some filesystems (e.g. CVMFS). It has been observed, that sometimes after changing the symlink contents, the value is truncated to the old size. This is caused by fuse_getattr() racing with fuse_reverse_inval_inode(). fuse_reverse_inval_inode() updates the fuse_inode's attr_version, which results in fuse_change_attributes() exiting before updating the cached attributes This is okay, as the cached attributes remain invalid and the next call to fuse_change_attributes() will likely update the inode with the correct values. The reason this causes problems is that cached symlinks will be returned through page_get_link(), which truncates the symlink to inode->i_size. This is correct for filesystems that don't mutate symlinks, but in this case it causes bad behavior. The solution is to just remove this truncation. This can cause a regression in a filesystem that relies on supplying a symlink larger than the file size, but this is unlikely. If that happens we'd need to make this behavior conditional. Reported-by: Laura Promberger Tested-by: Sam Lewis Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20250220100258.793363-1-mszeredi@redhat.com Reviewed-by: Bernd Schubert Signed-off-by: Christian Brauner --- fs/fuse/dir.c | 2 +- fs/namei.c | 24 +++++++++++++++++++----- include/linux/fs.h | 2 ++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 198862b086ff7..3805f9b06c9d2 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1636,7 +1636,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, goto out_err; if (fc->cache_symlinks) - return page_get_link(dentry, inode, callback); + return page_get_link_raw(dentry, inode, callback); err = -ECHILD; if (!dentry) diff --git a/fs/namei.c b/fs/namei.c index 3ab9440c5b931..ecb7b95c2ca33 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -5356,10 +5356,9 @@ const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done) EXPORT_SYMBOL(vfs_get_link); /* get the link contents into pagecache */ -const char *page_get_link(struct dentry *dentry, struct inode *inode, - struct delayed_call *callback) +static char *__page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) { - char *kaddr; struct page *page; struct address_space *mapping = inode->i_mapping; @@ -5378,8 +5377,23 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode, } set_delayed_call(callback, page_put_link, page); BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM); - kaddr = page_address(page); - nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); + return page_address(page); +} + +const char *page_get_link_raw(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + return __page_get_link(dentry, inode, callback); +} +EXPORT_SYMBOL_GPL(page_get_link_raw); + +const char *page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + char *kaddr = __page_get_link(dentry, inode, callback); + + if (!IS_ERR(kaddr)) + nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); return kaddr; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c3b2f8a621f7..9346adf28f7bc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3452,6 +3452,8 @@ extern const struct file_operations generic_ro_fops; extern int readlink_copy(char __user *, int, const char *, int); extern int page_readlink(struct dentry *, char __user *, int); +extern const char *page_get_link_raw(struct dentry *, struct inode *, + struct delayed_call *); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); extern void page_put_link(void *); -- GitLab From 782cffeec9ad96daa64ffb2d527b2a052fb02552 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 19 Feb 2025 06:10:05 -0800 Subject: [PATCH 0908/1585] perf/x86/intel: Fix event constraints for LNC According to the latest event list, update the event constraint tables for Lion Cove core. The general rule (the event codes < 0x90 are restricted to counters 0-3.) has been removed. There is no restriction for most of the performance monitoring events. Fixes: a932aa0e868f ("perf/x86: Add Lunar Lake and Arrow Lake support") Reported-by: Amiri Khalil Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250219141005.2446823-1-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 20 +++++++------------- arch/x86/events/intel/ds.c | 2 +- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index e86333eee2668..cdcebf30468a0 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -397,34 +397,28 @@ static struct event_constraint intel_lnc_event_constraints[] = { METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), + INTEL_EVENT_CONSTRAINT(0x20, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x012a, 0xf), + INTEL_UEVENT_CONSTRAINT(0x012b, 0xf), INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), INTEL_UEVENT_CONSTRAINT(0x0175, 0x4), INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff), INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff), - /* - * Generally event codes < 0x90 are restricted to counters 0-3. - * The 0x2E and 0x3C are exception, which has no restriction. - */ - INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf), - INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf), - INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8), + INTEL_UEVENT_CONSTRAINT(0x01cd, 0x3fc), INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3), - INTEL_EVENT_CONSTRAINT(0xce, 0x1), INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), - /* - * Generally event codes >= 0x90 are likely to have no restrictions. - * The exception are defined as above. - */ - INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0x3ff), + + INTEL_UEVENT_CONSTRAINT(0x00e0, 0xf), EVENT_CONSTRAINT_END }; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index c2e2eae7309c3..f122882ef278f 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1199,7 +1199,7 @@ struct event_constraint intel_lnc_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), - INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff), + INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc), INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ -- GitLab From fa808ed4e199ed17d878eb75b110bda30dd52434 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 19 Feb 2025 14:07:37 -0800 Subject: [PATCH 0909/1585] KVM: arm64: Ensure a VMID is allocated before programming VTTBR_EL2 Vladimir reports that a race condition to attach a VMID to a stage-2 MMU sometimes results in a vCPU entering the guest with a VMID of 0: | CPU1 | CPU2 | | | | kvm_arch_vcpu_ioctl_run | | vcpu_load <= load VTTBR_EL2 | | kvm_vmid->id = 0 | | | kvm_arch_vcpu_ioctl_run | | vcpu_load <= load VTTBR_EL2 | | with kvm_vmid->id = 0| | kvm_arm_vmid_update <= allocates fresh | | kvm_vmid->id and | | reload VTTBR_EL2 | | | | | kvm_arm_vmid_update <= observes that kvm_vmid->id | | already allocated, | | skips reload VTTBR_EL2 Oh yeah, it's as bad as it looks. Remember that VHE loads the stage-2 MMU eagerly but a VMID only gets attached to the MMU later on in the KVM_RUN loop. Even in the "best case" where VTTBR_EL2 correctly gets reprogrammed before entering the EL1&0 regime, there is a period of time where hardware is configured with VMID 0. That's completely insane. So, rather than decorating the 'late' binding with another hack, just allocate the damn thing up front. Attaching a VMID from vcpu_load() is still rollover safe since (surprise!) it'll always get called after a vCPU was preempted. Excuse me while I go find a brown paper bag. Cc: stable@vger.kernel.org Fixes: 934bf871f011 ("KVM: arm64: Load the stage-2 MMU context in kvm_vcpu_load_vhe()") Reported-by: Vladimir Murzin Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20250219220737.130842-1-oliver.upton@linux.dev Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/arm.c | 22 ++++++++++------------ arch/arm64/kvm/vmid.c | 11 +++-------- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3a7ec98ef1238..d919557af5e50 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1259,7 +1259,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, extern unsigned int __ro_after_init kvm_arm_vmid_bits; int __init kvm_arm_vmid_alloc_init(void); void __init kvm_arm_vmid_alloc_free(void); -bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); void kvm_arm_vmid_clear_active(void); static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bc7a37cea2420..0160b49243511 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -559,6 +559,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) mmu = vcpu->arch.hw_mmu; last_ran = this_cpu_ptr(mmu->last_vcpu_ran); + /* + * Ensure a VMID is allocated for the MMU before programming VTTBR_EL2, + * which happens eagerly in VHE. + * + * Also, the VMID allocator only preserves VMIDs that are active at the + * time of rollover, so KVM might need to grab a new VMID for the MMU if + * this is called from kvm_sched_in(). + */ + kvm_arm_vmid_update(&mmu->vmid); + /* * We guarantee that both TLBs and I-cache are private to each * vcpu. If detecting that a vcpu from the same VM has @@ -1138,18 +1148,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ preempt_disable(); - /* - * The VMID allocator only tracks active VMIDs per - * physical CPU, and therefore the VMID allocated may not be - * preserved on VMID roll-over if the task was preempted, - * making a thread's VMID inactive. So we need to call - * kvm_arm_vmid_update() in non-premptible context. - */ - if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) && - has_vhe()) - __load_stage2(vcpu->arch.hw_mmu, - vcpu->arch.hw_mmu->arch); - kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 806223b7022af..7fe8ba1a2851c 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -135,11 +135,10 @@ void kvm_arm_vmid_clear_active(void) atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID); } -bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) { unsigned long flags; u64 vmid, old_active_vmid; - bool updated = false; vmid = atomic64_read(&kvm_vmid->id); @@ -157,21 +156,17 @@ bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) if (old_active_vmid != 0 && vmid_gen_match(vmid) && 0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids), old_active_vmid, vmid)) - return false; + return; raw_spin_lock_irqsave(&cpu_vmid_lock, flags); /* Check that our VMID belongs to the current generation. */ vmid = atomic64_read(&kvm_vmid->id); - if (!vmid_gen_match(vmid)) { + if (!vmid_gen_match(vmid)) vmid = new_vmid(kvm_vmid); - updated = true; - } atomic64_set(this_cpu_ptr(&active_vmids), vmid); raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags); - - return updated; } /* -- GitLab From 9af3b4f2d879da01192d6168e6c651e7fb5b652d Mon Sep 17 00:00:00 2001 From: Dmitry Panchenko Date: Thu, 20 Feb 2025 18:15:37 +0200 Subject: [PATCH 0910/1585] ALSA: usb-audio: Re-add sample rate quirk for Pioneer DJM-900NXS2 Re-add the sample-rate quirk for the Pioneer DJM-900NXS2. This device does not work without setting sample-rate. Signed-off-by: Dmitry Panchenko Cc: Link: https://patch.msgid.link/20250220161540.3624660-1-dmitry@d-systems.ee Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index a97efb7b131ea..09210fb4ac60c 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1868,6 +1868,7 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */ subs->stream_offset_adj = 2; break; + case USB_ID(0x2b73, 0x000a): /* Pioneer DJM-900NXS2 */ case USB_ID(0x2b73, 0x0013): /* Pioneer DJM-450 */ pioneer_djm_set_format_quirk(subs, 0x0082); break; -- GitLab From f13409bb3f9140dad7256febcb478f0c9600312c Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 14 Feb 2025 09:02:04 +0100 Subject: [PATCH 0911/1585] nvme-fc: rely on state transitions to handle connectivity loss It's not possible to call nvme_state_ctrl_state with holding a spin lock, because nvme_state_ctrl_state calls cancel_delayed_work_sync when fastfail is enabled. Instead syncing the ASSOC_FLAG and state transitions using a lock, it's possible to only rely on the state machine transitions. That means nvme_fc_ctrl_connectivity_loss should unconditionally call nvme_reset_ctrl which avoids the read race on the ctrl state variable. Actually, it's not necessary to test in which state the ctrl is, the reset work will only scheduled when the state machine is in LIVE state. In nvme_fc_create_association, the LIVE state can only be entered if it was previously CONNECTING. If this is not possible then the reset handler got triggered. Thus just error out here. Fixes: ee59e3820ca9 ("nvme-fc: do not ignore connectivity loss during connecting") Closes: https://lore.kernel.org/all/denqwui6sl5erqmz2gvrwueyxakl5txzbbiu3fgebryzrfxunm@iwxuthct377m/ Reported-by: Shinichiro Kawasaki Tested-by: Shin'ichiro Kawasaki Reviewed-by: Sagi Grimberg Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 67 ++++-------------------------------------- 1 file changed, 6 insertions(+), 61 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index f4f1866fbd5b8..b9929a5a7f4e3 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -781,61 +781,12 @@ nvme_fc_abort_lsops(struct nvme_fc_rport *rport) static void nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) { - enum nvme_ctrl_state state; - unsigned long flags; - dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller connectivity lost. Awaiting " "Reconnect", ctrl->cnum); - spin_lock_irqsave(&ctrl->lock, flags); set_bit(ASSOC_FAILED, &ctrl->flags); - state = nvme_ctrl_state(&ctrl->ctrl); - spin_unlock_irqrestore(&ctrl->lock, flags); - - switch (state) { - case NVME_CTRL_NEW: - case NVME_CTRL_LIVE: - /* - * Schedule a controller reset. The reset will terminate the - * association and schedule the reconnect timer. Reconnects - * will be attempted until either the ctlr_loss_tmo - * (max_retries * connect_delay) expires or the remoteport's - * dev_loss_tmo expires. - */ - if (nvme_reset_ctrl(&ctrl->ctrl)) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Couldn't schedule reset.\n", - ctrl->cnum); - nvme_delete_ctrl(&ctrl->ctrl); - } - break; - - case NVME_CTRL_CONNECTING: - /* - * The association has already been terminated and the - * controller is attempting reconnects. No need to do anything - * futher. Reconnects will be attempted until either the - * ctlr_loss_tmo (max_retries * connect_delay) expires or the - * remoteport's dev_loss_tmo expires. - */ - break; - - case NVME_CTRL_RESETTING: - /* - * Controller is already in the process of terminating the - * association. No need to do anything further. The reconnect - * step will kick in naturally after the association is - * terminated. - */ - break; - - case NVME_CTRL_DELETING: - case NVME_CTRL_DELETING_NOIO: - default: - /* no action to take - let it delete */ - break; - } + nvme_reset_ctrl(&ctrl->ctrl); } /** @@ -3071,7 +3022,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) struct nvmefc_ls_rcv_op *disls = NULL; unsigned long flags; int ret; - bool changed; ++ctrl->ctrl.nr_reconnects; @@ -3177,23 +3127,18 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) else ret = nvme_fc_recreate_io_queues(ctrl); } + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) + ret = -EIO; if (ret) goto out_term_aen_ops; - spin_lock_irqsave(&ctrl->lock, flags); - if (!test_bit(ASSOC_FAILED, &ctrl->flags)) - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - else + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) { ret = -EIO; - spin_unlock_irqrestore(&ctrl->lock, flags); - - if (ret) goto out_term_aen_ops; + } ctrl->ctrl.nr_reconnects = 0; - - if (changed) - nvme_start_ctrl(&ctrl->ctrl); + nvme_start_ctrl(&ctrl->ctrl); return 0; /* Success */ -- GitLab From d2fe192348f93fe3a0cb1e33e4aba58e646397f4 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 14 Feb 2025 09:02:03 +0100 Subject: [PATCH 0912/1585] nvme: only allow entering LIVE from CONNECTING state The fabric transports and also the PCI transport are not entering the LIVE state from NEW or RESETTING. This makes the state machine more restrictive and allows to catch not supported state transitions, e.g. directly switching from RESETTING to LIVE. Reviewed-by: Sagi Grimberg Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 818d4e49aab51..f028913e2e622 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -564,8 +564,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (new_state) { case NVME_CTRL_LIVE: switch (old_state) { - case NVME_CTRL_NEW: - case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: changed = true; fallthrough; -- GitLab From 860ca5e50f73c2a1cef7eefc9d39d04e275417f7 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Mon, 17 Feb 2025 15:20:38 +0800 Subject: [PATCH 0913/1585] smb: client: Add check for next_buffer in receive_encrypted_standard() Add check for the return value of cifs_buf_get() and cifs_small_buf_get() in receive_encrypted_standard() to prevent null pointer dereference. Fixes: eec04ea11969 ("smb: client: fix OOB in receive_encrypted_standard()") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 23e0c8be7fb52..4dd11eafb69d9 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4965,6 +4965,10 @@ receive_encrypted_standard(struct TCP_Server_Info *server, next_buffer = (char *)cifs_buf_get(); else next_buffer = (char *)cifs_small_buf_get(); + if (!next_buffer) { + cifs_server_dbg(VFS, "No memory for (large) SMB response\n"); + return -1; + } memcpy(next_buffer, buf + next_cmd, pdu_length - next_cmd); } -- GitLab From bd30e8d7bfa6e528f9e746c940e6f7246c7899d6 Mon Sep 17 00:00:00 2001 From: Hsin-chen Chuang Date: Fri, 14 Feb 2025 19:17:09 +0800 Subject: [PATCH 0914/1585] Bluetooth: Always allow SCO packets for user channel The SCO packets from Bluetooth raw socket are now rejected because hci_conn_num is left 0. This patch allows such the usecase to enable the userspace SCO support. Fixes: b16b327edb4d ("Bluetooth: btusb: add sysfs attribute to control USB alt setting") Signed-off-by: Hsin-chen Chuang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 90966dfbd2781..8149e53fd0a76 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2102,7 +2102,8 @@ static int btusb_send_frame(struct hci_dev *hdev, struct sk_buff *skb) return submit_or_queue_tx_urb(hdev, urb); case HCI_SCODATA_PKT: - if (hci_conn_num(hdev, SCO_LINK) < 1) + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_conn_num(hdev, SCO_LINK) < 1) return -ENODEV; urb = alloc_isoc_urb(hdev, skb); @@ -2576,7 +2577,8 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb) return submit_or_queue_tx_urb(hdev, urb); case HCI_SCODATA_PKT: - if (hci_conn_num(hdev, SCO_LINK) < 1) + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_conn_num(hdev, SCO_LINK) < 1) return -ENODEV; urb = alloc_isoc_urb(hdev, skb); -- GitLab From b25120e1d5f2ebb3db00af557709041f47f7f3d0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 14 Feb 2025 10:30:25 -0500 Subject: [PATCH 0915/1585] Bluetooth: L2CAP: Fix L2CAP_ECRED_CONN_RSP response L2CAP_ECRED_CONN_RSP needs to respond DCID in the same order received as SCID but the order is reversed due to use of list_add which actually prepend channels to the list so the response is reversed: > ACL Data RX: Handle 16 flags 0x02 dlen 26 LE L2CAP: Enhanced Credit Connection Request (0x17) ident 2 len 18 PSM: 39 (0x0027) MTU: 256 MPS: 251 Credits: 65535 Source CID: 116 Source CID: 117 Source CID: 118 Source CID: 119 Source CID: 120 < ACL Data TX: Handle 16 flags 0x00 dlen 26 LE L2CAP: Enhanced Credit Connection Response (0x18) ident 2 len 18 MTU: 517 MPS: 247 Credits: 3 Result: Connection successful (0x0000) Destination CID: 68 Destination CID: 67 Destination CID: 66 Destination CID: 65 Destination CID: 64 Also make sure the response don't include channels that are not on BT_CONNECT2 since the chan->ident can be set to the same value as in the following trace: < ACL Data TX: Handle 16 flags 0x00 dlen 12 LE L2CAP: LE Flow Control Credit (0x16) ident 6 len 4 Source CID: 64 Credits: 1 ... > ACL Data RX: Handle 16 flags 0x02 dlen 18 LE L2CAP: Enhanced Credit Connection Request (0x17) ident 6 len 10 PSM: 39 (0x0027) MTU: 517 MPS: 251 Credits: 255 Source CID: 70 < ACL Data TX: Handle 16 flags 0x00 dlen 20 LE L2CAP: Enhanced Credit Connection Response (0x18) ident 6 len 12 MTU: 517 MPS: 247 Credits: 3 Result: Connection successful (0x0000) Destination CID: 64 Destination CID: 68 Closes: https://github.com/bluez/bluez/issues/1094 Fixes: 9aa9d9473f15 ("Bluetooth: L2CAP: Fix responding with wrong PDU type") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fec11e576f310..b22078b679726 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -632,7 +632,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) test_bit(FLAG_HOLD_HCI_CONN, &chan->flags)) hci_conn_hold(conn->hcon); - list_add(&chan->list, &conn->chan_l); + /* Append to the list since the order matters for ECRED */ + list_add_tail(&chan->list, &conn->chan_l); } void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) @@ -3771,7 +3772,11 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) struct l2cap_ecred_conn_rsp *rsp_flex = container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr); - if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + /* Check if channel for outgoing connection or if it wasn't deferred + * since in those cases it must be skipped. + */ + if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags) || + !test_and_clear_bit(FLAG_DEFER_SETUP, &chan->flags)) return; /* Reset ident so only one response is sent */ -- GitLab From 511a3444f72efdc51fa923c4b1f5f0abd545fb20 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 20 Feb 2025 15:07:57 +0100 Subject: [PATCH 0916/1585] MAINTAINERS: Add entry for DMEM cgroup controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cgroups controller is currently maintained through the drm-misc tree, so lets add Maxime Ripard, Natalie Vock and me as specific maintainers for dmem. We keep the cgroup mailing list CC'd on all cgroup specific patches. Acked-by: Maxime Ripard Acked-by: Natalie Vock Acked-by: Tejun Heo Acked-by: Johannes Weiner Acked-by: Michal Koutný Link: https://patchwork.freedesktop.org/patch/msgid/20250220140757.16823-1-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- .mailmap | 1 + MAINTAINERS | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/.mailmap b/.mailmap index 399322897938d..b71ee37f805d3 100644 --- a/.mailmap +++ b/.mailmap @@ -502,6 +502,7 @@ Nadav Amit Nadia Yvette Chambers William Lee Irwin III Naoya Horiguchi Naoya Horiguchi +Natalie Vock Nathan Chancellor Naveen N Rao Naveen N Rao diff --git a/MAINTAINERS b/MAINTAINERS index 18ade2ea4f3c4..473e7814a2925 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5878,6 +5878,17 @@ F: tools/testing/selftests/cgroup/test_cpuset.c F: tools/testing/selftests/cgroup/test_cpuset_prs.sh F: tools/testing/selftests/cgroup/test_cpuset_v1_base.sh +CONTROL GROUP - DEVICE MEMORY CONTROLLER (DMEM) +M: Maarten Lankhorst +M: Maxime Ripard +M: Natalie Vock +L: cgroups@vger.kernel.org +L: dri-devel@lists.freedesktop.org +S: Maintained +T: git https://gitlab.freedesktop.org/drm/misc/kernel.git +F: include/linux/cgroup_dmem.h +F: kernel/cgroup/dmem.c + CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG) M: Johannes Weiner M: Michal Hocko -- GitLab From 992ee3ed6e9fdd0be83a7daa5ff738e3cf86047f Mon Sep 17 00:00:00 2001 From: George Moussalem Date: Wed, 19 Feb 2025 14:09:21 +0100 Subject: [PATCH 0917/1585] net: phy: qcom: qca807x fix condition for DAC_DSP_BIAS_CURRENT While setting the DAC value, the wrong boolean value is evaluated to set the DSP bias current. So let's correct the conditional statement and use the right boolean value read from the DTS set in the priv. Cc: stable@vger.kernel.org Fixes: d1cb613efbd3 ("net: phy: qcom: add support for QCA807x PHY Family") Signed-off-by: George Moussalem Signed-off-by: Christian Marangi Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250219130923.7216-1-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/qcom/qca807x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index 3279de857b474..2ad8c2586d643 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c @@ -774,7 +774,7 @@ static int qca807x_config_init(struct phy_device *phydev) control_dac &= ~QCA807X_CONTROL_DAC_MASK; if (!priv->dac_full_amplitude) control_dac |= QCA807X_CONTROL_DAC_DSP_AMPLITUDE; - if (!priv->dac_full_amplitude) + if (!priv->dac_full_bias_current) control_dac |= QCA807X_CONTROL_DAC_DSP_BIAS_CURRENT; if (!priv->dac_disable_bias_current_tweak) control_dac |= QCA807X_CONTROL_DAC_BIAS_CURRENT_TWEAK; -- GitLab From e31e3f6c0ce473f7ce1e70d54ac8e3ed190509f8 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Thu, 20 Feb 2025 16:17:14 +0800 Subject: [PATCH 0918/1585] soc: loongson: loongson2_guts: Add check for devm_kstrdup() Add check for the return value of devm_kstrdup() in loongson2_guts_probe() to catch potential exception. Fixes: b82621ac8450 ("soc: loongson: add GUTS driver for loongson-2 platforms") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Link: https://lore.kernel.org/r/20250220081714.2676828-1-haoxiang_li2024@163.com Signed-off-by: Arnd Bergmann --- drivers/soc/loongson/loongson2_guts.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/soc/loongson/loongson2_guts.c b/drivers/soc/loongson/loongson2_guts.c index ae42e3a9127fc..16913c3ef65ca 100644 --- a/drivers/soc/loongson/loongson2_guts.c +++ b/drivers/soc/loongson/loongson2_guts.c @@ -114,8 +114,11 @@ static int loongson2_guts_probe(struct platform_device *pdev) if (of_property_read_string(root, "model", &machine)) of_property_read_string_index(root, "compatible", 0, &machine); of_node_put(root); - if (machine) + if (machine) { soc_dev_attr.machine = devm_kstrdup(dev, machine, GFP_KERNEL); + if (!soc_dev_attr.machine) + return -ENOMEM; + } svr = loongson2_guts_get_svr(); soc_die = loongson2_soc_die_match(svr, loongson2_soc_die); -- GitLab From 68aaa637162787dc3374080efe03366f70b344f1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 20 Feb 2025 09:17:17 -0500 Subject: [PATCH 0919/1585] bcachefs: print op->nonce on data update inconsistency "nonce inconstancy" is popping up again, causing us to go emergency read-only. This one looks less serious, i.e. specific to the encryption path and not indicative of a data corruption bug. But we'll need more info to track it down. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 337494facac64..642fbc60ecab1 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -340,6 +340,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, struct printbuf buf = PRINTBUF; prt_str(&buf, "about to insert invalid key in data update path"); + prt_printf(&buf, "\nop.nonce: %u", m->op.nonce); prt_str(&buf, "\nold: "); bch2_bkey_val_to_text(&buf, c, old); prt_str(&buf, "\nk: "); -- GitLab From c522093b02835f2e897b83e9764e7919edac5d08 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Thu, 20 Feb 2025 18:56:08 +0800 Subject: [PATCH 0920/1585] bcachefs: Fix memmove when move keys down The fix alone doesn't fix [1], but should be applied before debugging that. [1] https://syzkaller.appspot.com/bug?extid=38a0cbd267eff2d286ff Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index e371e60e3133e..dece27d9db04e 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -996,7 +996,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, } got_good_key: le16_add_cpu(&i->u64s, -next_good_key); - memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); + memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_node_need_rewrite(b); } fsck_err: -- GitLab From b522f180ee2b264b771fcbd0ab67d84cdd9e580d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 31 Jan 2025 11:07:31 -0800 Subject: [PATCH 0921/1585] MAINTAINERS: Change maintainer for RDT Due to job transition, I am stepping down as RDT maintainer. Add Tony as a co-maintainer. Signed-off-by: Fenghua Yu Signed-off-by: Dave Hansen Acked-by: Reinette Chatre Acked-by: Tony Luck Link: https://lore.kernel.org/all/20250131190731.3981085-1-fenghua.yu%40intel.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index feed152470f68..d1cbaeb58143a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19779,7 +19779,7 @@ F: net/rds/ F: tools/testing/selftests/net/rds/ RDT - RESOURCE ALLOCATION -M: Fenghua Yu +M: Tony Luck M: Reinette Chatre L: linux-kernel@vger.kernel.org S: Supported -- GitLab From f06e4bfd010faefa637689d2df2c727dbf6e1d27 Mon Sep 17 00:00:00 2001 From: Qunqin Zhao Date: Wed, 19 Feb 2025 10:07:01 +0800 Subject: [PATCH 0922/1585] net: stmmac: dwmac-loongson: Add fix_soc_reset() callback Loongson's DWMAC device may take nearly two seconds to complete DMA reset, however, the default waiting time for reset is 200 milliseconds. Therefore, the following error message may appear: [14.427169] dwmac-loongson-pci 0000:00:03.2: Failed to reset the dma Fixes: 803fc61df261 ("net: stmmac: dwmac-loongson: Add Loongson Multi-channels GMAC support") Cc: stable@vger.kernel.org Signed-off-by: Qunqin Zhao Reviewed-by: Huacai Chen Reviewed-by: Jacob Keller Acked-by: Yanteng Si Link: https://patch.msgid.link/20250219020701.15139-1-zhaoqunqin@loongson.cn Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-loongson.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index bfe6e2d631bdf..f5acfb7d4ff65 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -516,6 +516,19 @@ static int loongson_dwmac_acpi_config(struct pci_dev *pdev, return 0; } +/* Loongson's DWMAC device may take nearly two seconds to complete DMA reset */ +static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr + DMA_BUS_MODE); + + value |= DMA_BUS_MODE_SFT_RESET; + writel(value, ioaddr + DMA_BUS_MODE); + + return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, + !(value & DMA_BUS_MODE_SFT_RESET), + 10000, 2000000); +} + static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct plat_stmmacenet_data *plat; @@ -566,6 +579,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id plat->bsp_priv = ld; plat->setup = loongson_dwmac_setup; + plat->fix_soc_reset = loongson_dwmac_fix_reset; ld->dev = &pdev->dev; ld->loongson_id = readl(res.addr + GMAC_VERSION) & 0xff; -- GitLab From dce5c4afd035e8090a26e5d776b1682c0e649683 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 17 Feb 2025 10:16:28 +0800 Subject: [PATCH 0923/1585] scsi: core: Clear driver private data when retrying request After commit 1bad6c4a57ef ("scsi: zero per-cmd private driver data for each MQ I/O"), the xen-scsifront/virtio_scsi/snic drivers all removed code that explicitly zeroed driver-private command data. In combination with commit 464a00c9e0ad ("scsi: core: Kill DRIVER_SENSE"), after virtio_scsi performs a capacity expansion, the first request will return a unit attention to indicate that the capacity has changed. And then the original command is retried. As driver-private command data was not cleared, the request would return UA again and eventually time out and fail. Zero driver-private command data when a request is retried. Fixes: f7de50da1479 ("scsi: xen-scsifront: Remove code that zeroes driver-private command data") Fixes: c2bb87318baa ("scsi: virtio_scsi: Remove code that zeroes driver-private command data") Fixes: c3006a926468 ("scsi: snic: Remove code that zeroes driver-private command data") Signed-off-by: Ye Bin Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20250217021628.2929248-1-yebin@huaweicloud.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index be0890e4e7062..f1cfe0bb89b20 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1669,13 +1669,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req) if (in_flight) __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); - /* - * Only clear the driver-private command data if the LLD does not supply - * a function to initialize that data. - */ - if (!shost->hostt->init_cmd_priv) - memset(cmd + 1, 0, shost->hostt->cmd_size); - cmd->prot_op = SCSI_PROT_NORMAL; if (blk_rq_bytes(req)) cmd->sc_data_direction = rq_dma_dir(req); @@ -1842,6 +1835,13 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (!scsi_host_queue_ready(q, shost, sdev, cmd)) goto out_dec_target_busy; + /* + * Only clear the driver-private command data if the LLD does not supply + * a function to initialize that data. + */ + if (shost->hostt->cmd_size && !shost->hostt->init_cmd_priv) + memset(cmd + 1, 0, shost->hostt->cmd_size); + if (!(req->rq_flags & RQF_DONTPREP)) { ret = scsi_prepare_cmd(req); if (ret != BLK_STS_OK) -- GitLab From fe06b7c07f3fbcce2a2ca6f7b0d543b5699ea00f Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 19 Feb 2025 16:20:47 +0530 Subject: [PATCH 0924/1585] scsi: ufs: core: Set default runtime/system PM levels before ufshcd_hba_init() Commit bb9850704c04 ("scsi: ufs: core: Honor runtime/system PM levels if set by host controller drivers") introduced the check for setting default PM levels only if the levels are uninitialized by the host controller drivers. But it missed the fact that the levels could be initialized to 0 (UFS_PM_LVL_0) on purpose by the controller drivers. Even though none of the drivers are doing so now, the logic should be fixed irrespectively. So set the default levels unconditionally before calling ufshcd_hba_init() API which initializes the controller drivers. It ensures that the controller drivers could override the default levels if required. Fixes: bb9850704c04 ("scsi: ufs: core: Honor runtime/system PM levels if set by host controller drivers") Reported-by: Bao D. Nguyen Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20250219105047.49932-1-manivannan.sadhasivam@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f9303e66bb798..464f13da259aa 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10431,6 +10431,21 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) */ spin_lock_init(&hba->clk_gating.lock); + /* + * Set the default power management level for runtime and system PM. + * Host controller drivers can override them in their + * 'ufs_hba_variant_ops::init' callback. + * + * Default power saving mode is to keep UFS link in Hibern8 state + * and UFS device in sleep state. + */ + hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( + UFS_SLEEP_PWR_MODE, + UIC_LINK_HIBERN8_STATE); + hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( + UFS_SLEEP_PWR_MODE, + UIC_LINK_HIBERN8_STATE); + err = ufshcd_hba_init(hba); if (err) goto out_error; @@ -10544,21 +10559,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) goto out_disable; } - /* - * Set the default power management level for runtime and system PM if - * not set by the host controller drivers. - * Default power saving mode is to keep UFS link in Hibern8 state - * and UFS device in sleep state. - */ - if (!hba->rpm_lvl) - hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( - UFS_SLEEP_PWR_MODE, - UIC_LINK_HIBERN8_STATE); - if (!hba->spm_lvl) - hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( - UFS_SLEEP_PWR_MODE, - UIC_LINK_HIBERN8_STATE); - INIT_DELAYED_WORK(&hba->rpm_dev_flush_recheck_work, ufshcd_rpm_dev_flush_recheck_work); INIT_DELAYED_WORK(&hba->ufs_rtc_update_work, ufshcd_rtc_work); -- GitLab From f27a95845b01e86d67c8b014b4f41bd3327daa63 Mon Sep 17 00:00:00 2001 From: Arthur Simchaev Date: Thu, 20 Feb 2025 16:20:39 +0200 Subject: [PATCH 0925/1585] scsi: ufs: core: bsg: Fix crash when arpmb command fails If the device doesn't support arpmb we'll crash due to copying user data in bsg_transport_sg_io_fn(). In the case where ufs_bsg_exec_advanced_rpmb_req() returns an error, do not set the job's reply_len. Memory crash backtrace: 3,1290,531166405,-;ufshcd 0000:00:12.5: ARPMB OP failed: error code -22 4,1308,531166555,-;Call Trace: 4,1309,531166559,-; 4,1310,531166565,-; ? show_regs+0x6d/0x80 4,1311,531166575,-; ? die+0x37/0xa0 4,1312,531166583,-; ? do_trap+0xd4/0xf0 4,1313,531166593,-; ? do_error_trap+0x71/0xb0 4,1314,531166601,-; ? usercopy_abort+0x6c/0x80 4,1315,531166610,-; ? exc_invalid_op+0x52/0x80 4,1316,531166622,-; ? usercopy_abort+0x6c/0x80 4,1317,531166630,-; ? asm_exc_invalid_op+0x1b/0x20 4,1318,531166643,-; ? usercopy_abort+0x6c/0x80 4,1319,531166652,-; __check_heap_object+0xe3/0x120 4,1320,531166661,-; check_heap_object+0x185/0x1d0 4,1321,531166670,-; __check_object_size.part.0+0x72/0x150 4,1322,531166679,-; __check_object_size+0x23/0x30 4,1323,531166688,-; bsg_transport_sg_io_fn+0x314/0x3b0 Fixes: 6ff265fc5ef6 ("scsi: ufs: core: bsg: Add advanced RPMB support in ufs_bsg") Cc: stable@vger.kernel.org Reviewed-by: Bean Huo Signed-off-by: Arthur Simchaev Link: https://lore.kernel.org/r/20250220142039.250992-1-arthur.simchaev@sandisk.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs_bsg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufs_bsg.c b/drivers/ufs/core/ufs_bsg.c index 8d4ad0a3f2cf0..252186124669a 100644 --- a/drivers/ufs/core/ufs_bsg.c +++ b/drivers/ufs/core/ufs_bsg.c @@ -194,10 +194,12 @@ static int ufs_bsg_request(struct bsg_job *job) ufshcd_rpm_put_sync(hba); kfree(buff); bsg_reply->result = ret; - job->reply_len = !rpmb ? sizeof(struct ufs_bsg_reply) : sizeof(struct ufs_rpmb_reply); /* complete the job here only if no error */ - if (ret == 0) + if (ret == 0) { + job->reply_len = rpmb ? sizeof(struct ufs_rpmb_reply) : + sizeof(struct ufs_bsg_reply); bsg_job_done(job, ret, bsg_reply->reply_payload_rcv_len); + } return ret; } -- GitLab From 59f37036bb7ab3d554c24abc856aabca01126414 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 15 Feb 2025 11:36:15 +0000 Subject: [PATCH 0926/1585] btrfs: fix use-after-free on inode when scanning root during em shrinking At btrfs_scan_root() we are accessing the inode's root (and fs_info) in a call to btrfs_fs_closing() after we have scheduled the inode for a delayed iput, and that can result in a use-after-free on the inode in case the cleaner kthread does the iput before we dereference the inode in the call to btrfs_fs_closing(). Fix this by using the fs_info stored already in a local variable instead of doing inode->root->fs_info. Fixes: 102044384056 ("btrfs: make the extent map shrinker run asynchronously as a work queue job") CC: stable@vger.kernel.org # 6.13+ Tested-by: Ivan Shapovalov Link: https://lore.kernel.org/linux-btrfs/0414d690ac5680d0d77dfc930606cdc36e42e12f.camel@intelfx.name/ Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent_map.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 67ce85ff0ae25..bee1b94a10495 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1222,8 +1222,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx fs_info->em_shrinker_last_ino = btrfs_ino(inode); btrfs_add_delayed_iput(inode); - if (ctx->scanned >= ctx->nr_to_scan || - btrfs_fs_closing(inode->root->fs_info)) + if (ctx->scanned >= ctx->nr_to_scan || btrfs_fs_closing(fs_info)) break; cond_resched(); -- GitLab From c6c9c4d56483d941f567eb921434c25fc6086dfa Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 15 Feb 2025 11:04:15 +0000 Subject: [PATCH 0927/1585] btrfs: skip inodes without loaded extent maps when shrinking extent maps If there are inodes that don't have any loaded extent maps, we end up grabbing a reference on them and later adding a delayed iput, which wakes up the cleaner and makes it do unnecessary work. This is common when for example the inodes were open only to run stat(2) or all their extent maps were already released through the folio release callback (btrfs_release_folio()) or released by a previous run of the shrinker, or directories which never have extent maps. Reported-by: Ivan Shapovalov Tested-by: Ivan Shapovalov Link: https://lore.kernel.org/linux-btrfs/0414d690ac5680d0d77dfc930606cdc36e42e12f.camel@intelfx.name/ CC: stable@vger.kernel.org # 6.13+ Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent_map.c | 78 +++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index bee1b94a10495..8c6b85ffd18f6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1128,6 +1128,8 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c long nr_dropped = 0; struct rb_node *node; + lockdep_assert_held_write(&tree->lock); + /* * Take the mmap lock so that we serialize with the inode logging phase * of fsync because we may need to set the full sync flag on the inode, @@ -1139,28 +1141,12 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c * to find new extents, which may not be there yet because ordered * extents haven't completed yet. * - * We also do a try lock because otherwise we could deadlock. This is - * because the shrinker for this filesystem may be invoked while we are - * in a path that is holding the mmap lock in write mode. For example in - * a reflink operation while COWing an extent buffer, when allocating - * pages for a new extent buffer and under memory pressure, the shrinker - * may be invoked, and therefore we would deadlock by attempting to read - * lock the mmap lock while we are holding already a write lock on it. + * We also do a try lock because we don't want to block for too long and + * we are holding the extent map tree's lock in write mode. */ if (!down_read_trylock(&inode->i_mmap_lock)) return 0; - /* - * We want to be fast so if the lock is busy we don't want to spend time - * waiting for it - either some task is about to do IO for the inode or - * we may have another task shrinking extent maps, here in this code, so - * skip this inode. - */ - if (!write_trylock(&tree->lock)) { - up_read(&inode->i_mmap_lock); - return 0; - } - node = rb_first(&tree->root); while (node) { struct rb_node *next = rb_next(node); @@ -1201,12 +1187,61 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c break; node = next; } - write_unlock(&tree->lock); up_read(&inode->i_mmap_lock); return nr_dropped; } +static struct btrfs_inode *find_first_inode_to_shrink(struct btrfs_root *root, + u64 min_ino) +{ + struct btrfs_inode *inode; + unsigned long from = min_ino; + + xa_lock(&root->inodes); + while (true) { + struct extent_map_tree *tree; + + inode = xa_find(&root->inodes, &from, ULONG_MAX, XA_PRESENT); + if (!inode) + break; + + tree = &inode->extent_tree; + + /* + * We want to be fast so if the lock is busy we don't want to + * spend time waiting for it (some task is about to do IO for + * the inode). + */ + if (!write_trylock(&tree->lock)) + goto next; + + /* + * Skip inode if it doesn't have loaded extent maps, so we avoid + * getting a reference and doing an iput later. This includes + * cases like files that were opened for things like stat(2), or + * files with all extent maps previously released through the + * release folio callback (btrfs_release_folio()) or released in + * a previous run, or directories which never have extent maps. + */ + if (RB_EMPTY_ROOT(&tree->root)) { + write_unlock(&tree->lock); + goto next; + } + + if (igrab(&inode->vfs_inode)) + break; + + write_unlock(&tree->lock); +next: + from = btrfs_ino(inode) + 1; + cond_resched_lock(&root->inodes.xa_lock); + } + xa_unlock(&root->inodes); + + return inode; +} + static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -1214,9 +1249,10 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx long nr_dropped = 0; u64 min_ino = fs_info->em_shrinker_last_ino + 1; - inode = btrfs_find_first_inode(root, min_ino); + inode = find_first_inode_to_shrink(root, min_ino); while (inode) { nr_dropped += btrfs_scan_inode(inode, ctx); + write_unlock(&inode->extent_tree.lock); min_ino = btrfs_ino(inode) + 1; fs_info->em_shrinker_last_ino = btrfs_ino(inode); @@ -1227,7 +1263,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx cond_resched(); - inode = btrfs_find_first_inode(root, min_ino); + inode = find_first_inode_to_shrink(root, min_ino); } if (inode) { -- GitLab From 15b3b3254d1453a8db038b7d44b311a2d6c71f98 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 15 Feb 2025 11:11:29 +0000 Subject: [PATCH 0928/1585] btrfs: do regular iput instead of delayed iput during extent map shrinking The extent map shrinker now runs in the system unbound workqueue and no longer in kswapd context so it can directly do an iput() on inodes even if that blocks or needs to acquire any lock (we aren't holding any locks when requesting the delayed iput from the shrinker). So we don't need to add a delayed iput, wake up the cleaner and delegate the iput() to the cleaner, which also adds extra contention on the spinlock that protects the delayed iputs list. Reported-by: Ivan Shapovalov Tested-by: Ivan Shapovalov Link: https://lore.kernel.org/linux-btrfs/0414d690ac5680d0d77dfc930606cdc36e42e12f.camel@intelfx.name/ CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8c6b85ffd18f6..7f46abbd6311b 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1256,7 +1256,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx min_ino = btrfs_ino(inode) + 1; fs_info->em_shrinker_last_ino = btrfs_ino(inode); - btrfs_add_delayed_iput(inode); + iput(&inode->vfs_inode); if (ctx->scanned >= ctx->nr_to_scan || btrfs_fs_closing(fs_info)) break; -- GitLab From b1bf18223a8340cf5d52162d320badcfe07b905d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 17 Feb 2025 20:16:39 +1030 Subject: [PATCH 0929/1585] btrfs: output an error message if btrfs failed to find the seed fsid [BUG] If btrfs failed to locate the seed device for whatever reason, mounting the sprouted device will fail without any meaning error message: # mkfs.btrfs -f /dev/test/scratch1 # btrfstune -S1 /dev/test/scratch1 # mount /dev/test/scratch1 /mnt/btrfs # btrfs dev add -f /dev/test/scratch2 /mnt/btrfs # umount /mnt/btrfs # btrfs dev scan -u # btrfs mount /dev/test/scratch2 /mnt/btrfs mount: /mnt/btrfs: fsconfig system call failed: No such file or directory. dmesg(1) may have more information after failed mount system call. # dmesg -t | tail -n6 BTRFS info (device dm-5): first mount of filesystem 64252ded-5953-4868-b962-cea48f7ac4ea BTRFS info (device dm-5): using crc32c (crc32c-generic) checksum algorithm BTRFS info (device dm-5): using free-space-tree BTRFS error (device dm-5): failed to read chunk tree: -2 BTRFS error (device dm-5): open_ctree failed: -2 [CAUSE] The failure to mount is pretty straight forward, just unable to find the seed device and its fsid, caused by `btrfs dev scan -u`. But the lack of any useful info is a problem. [FIX] Just add an extra error message in open_seed_devices() to indicate the error. Now the error message would look like this: BTRFS info (device dm-4): first mount of filesystem 7769223d-4db1-4e4c-ac29-0a96f53576ab BTRFS info (device dm-4): using crc32c (crc32c-generic) checksum algorithm BTRFS info (device dm-4): using free-space-tree BTRFS error (device dm-4): failed to find fsid e87c12e6-584b-4e98-8b88-962c33a619ff when attempting to open seed devices BTRFS error (device dm-4): failed to read chunk tree: -2 BTRFS error (device dm-4): open_ctree failed: -2 Link: https://github.com/kdave/btrfs-progs/issues/959 Reviewed-by: Anand Jain Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a594f66daedf0..f6ae76815e4b5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7196,8 +7196,12 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, fs_devices = find_fsid(fsid, NULL); if (!fs_devices) { - if (!btrfs_test_opt(fs_info, DEGRADED)) + if (!btrfs_test_opt(fs_info, DEGRADED)) { + btrfs_err(fs_info, + "failed to find fsid %pU when attempting to open seed devices", + fsid); return ERR_PTR(-ENOENT); + } fs_devices = alloc_fs_devices(fsid); if (IS_ERR(fs_devices)) -- GitLab From efa11fd269c139e29b71ec21bc9c9c0063fde40d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 19 Feb 2025 09:06:33 +1030 Subject: [PATCH 0930/1585] btrfs: fix data overwriting bug during buffered write when block size < page size [BUG] When running generic/418 with a btrfs whose block size < page size (subpage cases), it always fails. And the following minimal reproducer is more than enough to trigger it reliably: workload() { mkfs.btrfs -s 4k -f $dev > /dev/null dmesg -C mount $dev $mnt $fsstree_dir/src/dio-invalidate-cache -r -b 4096 -n 3 -i 1 -f $mnt/diotest ret=$? umount $mnt stop_trace if [ $ret -ne 0 ]; then fail fi } for (( i = 0; i < 1024; i++)); do echo "=== $i/$runtime ===" workload done [CAUSE] With extra trace printk added to the following functions: - btrfs_buffered_write() * Which folio is touched * The file offset (start) where the buffered write is at * How many bytes are copied * The content of the write (the first 2 bytes) - submit_one_sector() * Which folio is touched * The position inside the folio * The content of the page cache (the first 2 bytes) - pagecache_isize_extended() * The parameters of the function itself * The parameters of the folio_zero_range() Which are enough to show the problem: 22.158114: btrfs_buffered_write: folio pos=0 start=0 copied=4096 content=0x0101 22.158161: submit_one_sector: r/i=5/257 folio=0 pos=0 content=0x0101 22.158609: btrfs_buffered_write: folio pos=0 start=4096 copied=4096 content=0x0101 22.158634: btrfs_buffered_write: folio pos=0 start=8192 copied=4096 content=0x0101 22.158650: pagecache_isize_extended: folio=0 from=4096 to=8192 bsize=4096 zero off=4096 len=8192 22.158682: submit_one_sector: r/i=5/257 folio=0 pos=4096 content=0x0000 22.158686: submit_one_sector: r/i=5/257 folio=0 pos=8192 content=0x0101 The tool dio-invalidate-cache will start 3 threads, each doing a buffered write with 0x01 at offset 0, 4096 and 8192, do a fsync, then do a direct read, and compare the read buffer with the write buffer. Note that all 3 btrfs_buffered_write() are writing the correct 0x01 into the page cache. But at submit_one_sector(), at file offset 4096, the content is zeroed out, by pagecache_isize_extended(). The race happens like this: Thread A is writing into range [4K, 8K). Thread B is writing into range [8K, 12k). Thread A | Thread B -------------------------------------+------------------------------------ btrfs_buffered_write() | btrfs_buffered_write() |- old_isize = 4K; | |- old_isize = 4096; |- btrfs_inode_lock() | | |- write into folio range [4K, 8K) | | |- pagecache_isize_extended() | | | extend isize from 4096 to 8192 | | | no folio_zero_range() called | | |- btrfs_inode_lock() | | | |- btrfs_inode_lock() | |- write into folio range [8K, 12K) | |- pagecache_isize_extended() | | calling folio_zero_range(4K, 8K) | | This is caused by the old_isize is | | grabbed too early, without any | | inode lock. | |- btrfs_inode_unlock() The @old_isize is grabbed without inode lock, causing race between two buffered write threads and making pagecache_isize_extended() to zero range which is still containing cached data. And this is only affecting subpage btrfs, because for regular blocksize == page size case, the function pagecache_isize_extended() will do nothing if the block size >= page size. [FIX] Grab the old i_size while holding the inode lock. This means each buffered write thread will have a stable view of the old inode size, thus avoid the above race. CC: stable@vger.kernel.org # 5.15+ Fixes: 5e8b9ef30392 ("btrfs: move pos increment and pagecache extension to btrfs_buffered_write") Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/file.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ed3c0d6546c5d..0b568c8d24cbc 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1090,7 +1090,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) u64 lockend; size_t num_written = 0; ssize_t ret; - loff_t old_isize = i_size_read(inode); + loff_t old_isize; unsigned int ilock_flags = 0; const bool nowait = (iocb->ki_flags & IOCB_NOWAIT); unsigned int bdp_flags = (nowait ? BDP_ASYNC : 0); @@ -1103,6 +1103,13 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) if (ret < 0) return ret; + /* + * We can only trust the isize with inode lock held, or it can race with + * other buffered writes and cause incorrect call of + * pagecache_isize_extended() to overwrite existing data. + */ + old_isize = i_size_read(inode); + ret = generic_write_checks(iocb, i); if (ret <= 0) goto out; -- GitLab From e9a48ea4d90be251e0d057d41665745caccb0351 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 18 Feb 2025 16:59:18 +0100 Subject: [PATCH 0931/1585] irqchip/qcom-pdc: Workaround hardware register bug on X1E80100 On X1E80100, there is a hardware bug in the register logic of the IRQ_ENABLE_BANK register: While read accesses work on the normal address, all write accesses must be made to a shifted address. Without a workaround for this, the wrong interrupt gets enabled in the PDC and it is impossible to wakeup from deep suspend (CX collapse). This has not caused problems so far, because the deep suspend state was not enabled. A workaround is required now since work is ongoing to fix this. The PDC has multiple "DRV" regions, each one has a size of 0x10000 and provides the same set of registers for a particular client in the system. Linux is one the clients and uses DRV region 2 on X1E. Each "bank" inside the DRV region consists of 32 interrupt pins that can be enabled using the IRQ_ENABLE_BANK register: IRQ_ENABLE_BANK[bank] = base + IRQ_ENABLE_BANK + bank * sizeof(u32) On X1E, this works as intended for read access. However, write access to most banks is shifted by 2: IRQ_ENABLE_BANK_X1E[0] = IRQ_ENABLE_BANK[-2] IRQ_ENABLE_BANK_X1E[1] = IRQ_ENABLE_BANK[-1] IRQ_ENABLE_BANK_X1E[2] = IRQ_ENABLE_BANK[0] = IRQ_ENABLE_BANK[2 - 2] IRQ_ENABLE_BANK_X1E[3] = IRQ_ENABLE_BANK[1] = IRQ_ENABLE_BANK[3 - 2] IRQ_ENABLE_BANK_X1E[4] = IRQ_ENABLE_BANK[2] = IRQ_ENABLE_BANK[4 - 2] IRQ_ENABLE_BANK_X1E[5] = IRQ_ENABLE_BANK[5] (this one works as intended) The negative indexes underflow to banks of the previous DRV/client region: IRQ_ENABLE_BANK_X1E[drv 2][bank 0] = IRQ_ENABLE_BANK[drv 2][bank -2] = IRQ_ENABLE_BANK[drv 1][bank 5-2] = IRQ_ENABLE_BANK[drv 1][bank 3] = IRQ_ENABLE_BANK[drv 1][bank 0 + 3] IRQ_ENABLE_BANK_X1E[drv 2][bank 1] = IRQ_ENABLE_BANK[drv 2][bank -1] = IRQ_ENABLE_BANK[drv 1][bank 5-1] = IRQ_ENABLE_BANK[drv 1][bank 4] = IRQ_ENABLE_BANK[drv 1][bank 1 + 3] Introduce a workaround for the bug by matching the qcom,x1e80100-pdc compatible and apply the offsets as shown above: - Bank 0...1: previous DRV region, bank += 3 - Bank 1...4: our DRV region, bank -= 2 - Bank 5: our DRV region, no fixup required The PDC node in the device tree only describes the DRV region for the Linux client, but the workaround also requires to map parts of the previous DRV region to issue writes there. To maintain compatibility with old device trees, obtain the base address of the preceeding region by applying the -0x10000 offset. Note that this is also more correct from a conceptual point of view: It does not really make use of the other region; it just issues shifted writes that end up in the registers of the Linux associated DRV region 2. Signed-off-by: Stephan Gerhold Signed-off-by: Thomas Gleixner Tested-by: Johan Hovold Link: https://lore.kernel.org/all/20250218-x1e80100-pdc-hw-wa-v2-1-29be4c98e355@linaro.org --- drivers/irqchip/qcom-pdc.c | 67 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 74b2f124116e3..52d77546aacb9 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -21,9 +21,11 @@ #include #define PDC_MAX_GPIO_IRQS 256 +#define PDC_DRV_OFFSET 0x10000 /* Valid only on HW version < 3.2 */ #define IRQ_ENABLE_BANK 0x10 +#define IRQ_ENABLE_BANK_MAX (IRQ_ENABLE_BANK + BITS_TO_BYTES(PDC_MAX_GPIO_IRQS)) #define IRQ_i_CFG 0x110 /* Valid only on HW version >= 3.2 */ @@ -46,13 +48,20 @@ struct pdc_pin_region { static DEFINE_RAW_SPINLOCK(pdc_lock); static void __iomem *pdc_base; +static void __iomem *pdc_prev_base; static struct pdc_pin_region *pdc_region; static int pdc_region_cnt; static unsigned int pdc_version; +static bool pdc_x1e_quirk; + +static void pdc_base_reg_write(void __iomem *base, int reg, u32 i, u32 val) +{ + writel_relaxed(val, base + reg + i * sizeof(u32)); +} static void pdc_reg_write(int reg, u32 i, u32 val) { - writel_relaxed(val, pdc_base + reg + i * sizeof(u32)); + pdc_base_reg_write(pdc_base, reg, i, val); } static u32 pdc_reg_read(int reg, u32 i) @@ -60,6 +69,34 @@ static u32 pdc_reg_read(int reg, u32 i) return readl_relaxed(pdc_base + reg + i * sizeof(u32)); } +static void pdc_x1e_irq_enable_write(u32 bank, u32 enable) +{ + void __iomem *base; + + /* Remap the write access to work around a hardware bug on X1E */ + switch (bank) { + case 0 ... 1: + /* Use previous DRV (client) region and shift to bank 3-4 */ + base = pdc_prev_base; + bank += 3; + break; + case 2 ... 4: + /* Use our own region and shift to bank 0-2 */ + base = pdc_base; + bank -= 2; + break; + case 5: + /* No fixup required for bank 5 */ + base = pdc_base; + break; + default: + WARN_ON(1); + return; + } + + pdc_base_reg_write(base, IRQ_ENABLE_BANK, bank, enable); +} + static void __pdc_enable_intr(int pin_out, bool on) { unsigned long enable; @@ -72,7 +109,11 @@ static void __pdc_enable_intr(int pin_out, bool on) enable = pdc_reg_read(IRQ_ENABLE_BANK, index); __assign_bit(mask, &enable, on); - pdc_reg_write(IRQ_ENABLE_BANK, index, enable); + + if (pdc_x1e_quirk) + pdc_x1e_irq_enable_write(index, enable); + else + pdc_reg_write(IRQ_ENABLE_BANK, index, enable); } else { enable = pdc_reg_read(IRQ_i_CFG, pin_out); __assign_bit(IRQ_i_CFG_IRQ_ENABLE, &enable, on); @@ -324,10 +365,29 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent) if (res_size > resource_size(&res)) pr_warn("%pOF: invalid reg size, please fix DT\n", node); + /* + * PDC has multiple DRV regions, each one provides the same set of + * registers for a particular client in the system. Due to a hardware + * bug on X1E, some writes to the IRQ_ENABLE_BANK register must be + * issued inside the previous region. This region belongs to + * a different client and is not described in the device tree. Map the + * region with the expected offset to preserve support for old DTs. + */ + if (of_device_is_compatible(node, "qcom,x1e80100-pdc")) { + pdc_prev_base = ioremap(res.start - PDC_DRV_OFFSET, IRQ_ENABLE_BANK_MAX); + if (!pdc_prev_base) { + pr_err("%pOF: unable to map previous PDC DRV region\n", node); + return -ENXIO; + } + + pdc_x1e_quirk = true; + } + pdc_base = ioremap(res.start, res_size); if (!pdc_base) { pr_err("%pOF: unable to map PDC registers\n", node); - return -ENXIO; + ret = -ENXIO; + goto fail; } pdc_version = pdc_reg_read(PDC_VERSION_REG, 0); @@ -363,6 +423,7 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent) fail: kfree(pdc_region); iounmap(pdc_base); + iounmap(pdc_prev_base); return ret; } -- GitLab From 92527e473911b835c2c18b0c55c337c33e85ff00 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Thu, 20 Feb 2025 13:23:34 -0600 Subject: [PATCH 0932/1585] ntsync: Check wait count based on byte size. GCC versions below 13 incorrectly detect the copy size as being static and too small to fit in the "fds" array. Work around this by explicitly calculating the size and returning EINVAL based on that, instead of based on the object count. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502072019.LYoCR9bF-lkp@intel.com/ Suggested-by: Arnd Bergmann Signed-off-by: Elizabeth Figura -- Suggested-by as per Arnd's request, but the only thing I changed was preserving array_size() [as noted by Geert in the linked thread]. I tested and found no regressions. v2: Add missing sign-off Link: https://lore.kernel.org/r/20250220192334.549167-1-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 0b4e56d59b3da..999026a1ae048 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -873,6 +873,7 @@ static int setup_wait(struct ntsync_device *dev, { int fds[NTSYNC_MAX_WAIT_COUNT + 1]; const __u32 count = args->count; + size_t size = array_size(count, sizeof(fds[0])); struct ntsync_q *q; __u32 total_count; __u32 i, j; @@ -880,15 +881,14 @@ static int setup_wait(struct ntsync_device *dev, if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME)) return -EINVAL; - if (args->count > NTSYNC_MAX_WAIT_COUNT) + if (size >= sizeof(fds)) return -EINVAL; total_count = count; if (args->alert) total_count++; - if (copy_from_user(fds, u64_to_user_ptr(args->objs), - array_size(count, sizeof(*fds)))) + if (copy_from_user(fds, u64_to_user_ptr(args->objs), size)) return -EFAULT; if (args->alert) fds[count] = args->alert; -- GitLab From d252435aca44d647d57b84de5108556f9c97614a Mon Sep 17 00:00:00 2001 From: BillXiang Date: Fri, 21 Feb 2025 18:45:38 +0800 Subject: [PATCH 0933/1585] riscv: KVM: Remove unnecessary vcpu kick MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unnecessary kick to the vCPU after writing to the vs_file of IMSIC in kvm_riscv_vcpu_aia_imsic_inject. For vCPUs that are running, writing to the vs_file directly forwards the interrupt as an MSI to them and does not need an extra kick. For vCPUs that are descheduled after emulating WFI, KVM will enable the guest external interrupt for that vCPU in kvm_riscv_aia_wakeon_hgei. This means that writing to the vs_file will cause a guest external interrupt, which will cause KVM to wake up the vCPU in hgei_interrupt to handle the interrupt properly. Signed-off-by: BillXiang Reviewed-by: Andrew Jones Reviewed-by: Radim Krčmář Link: https://lore.kernel.org/r/20250221104538.2147-1-xiangwencheng@lanxincomputing.com Signed-off-by: Anup Patel --- arch/riscv/kvm/aia_imsic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index a8085cd8215e3..29ef9c2133a93 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -974,7 +974,6 @@ int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, if (imsic->vsfile_cpu >= 0) { writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE); - kvm_vcpu_kick(vcpu); } else { eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)]; set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip); -- GitLab From 8510edf191d2df0822ea22d6226e4eef87562271 Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 18 Feb 2025 20:02:08 +0800 Subject: [PATCH 0934/1585] mm/filemap: fix miscalculated file range for filemap_fdatawrite_range_kick() iocb->ki_pos has been updated with the number of written bytes since generic_perform_write(). Besides __filemap_fdatawrite_range() accepts the inclusive end of the data range. Fixes: 1d4457576570 ("mm: call filemap_fdatawrite_range_kick() after IOCB_DONTCACHE issue") Signed-off-by: Jingbo Xu Link: https://lore.kernel.org/r/20250218120209.88093-2-jefflexu@linux.alibaba.com Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 ++-- mm/filemap.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 9346adf28f7bc..2788df98080f8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2975,8 +2975,8 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) } else if (iocb->ki_flags & IOCB_DONTCACHE) { struct address_space *mapping = iocb->ki_filp->f_mapping; - filemap_fdatawrite_range_kick(mapping, iocb->ki_pos, - iocb->ki_pos + count); + filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count, + iocb->ki_pos - 1); } return count; diff --git a/mm/filemap.c b/mm/filemap.c index 804d7365680c1..d4564a79eb353 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -445,7 +445,7 @@ EXPORT_SYMBOL(filemap_fdatawrite_range); * filemap_fdatawrite_range_kick - start writeback on a range * @mapping: target address_space * @start: index to start writeback on - * @end: last (non-inclusive) index for writeback + * @end: last (inclusive) index for writeback * * This is a non-integrity writeback helper, to start writing back folios * for the indicated range. -- GitLab From 927289988068a65ccc168eda881ce60f8712707b Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 18 Feb 2025 20:02:09 +0800 Subject: [PATCH 0935/1585] mm/truncate: don't skip dirty page in folio_unmap_invalidate() ... otherwise this is a behavior change for the previous callers of invalidate_complete_folio2(), e.g. the page invalidation routine. Fixes: 4a9e23159fd3 ("mm/truncate: add folio_unmap_invalidate() helper") Signed-off-by: Jingbo Xu Link: https://lore.kernel.org/r/20250218120209.88093-3-jefflexu@linux.alibaba.com Signed-off-by: Christian Brauner --- mm/truncate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/truncate.c b/mm/truncate.c index e2e115adfbc58..76d8fcd89bd00 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -548,8 +548,6 @@ int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - if (folio_test_dirty(folio)) - return 0; if (folio_mapped(folio)) unmap_mapping_folio(folio); BUG_ON(folio_mapped(folio)); -- GitLab From 517120728484df1ab8b71cba8d2cad19f52f18a1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 19 Feb 2025 22:01:24 -0800 Subject: [PATCH 0936/1585] x86/cpufeatures: Make AVX-VNNI depend on AVX The 'noxsave' boot option disables support for AVX, but support for the AVX-VNNI feature was still declared on CPUs that support it. Fix this. Signed-off-by: Eric Biggers Signed-off-by: Ingo Molnar Cc: Dave Hansen Link: https://lore.kernel.org/r/20250220060124.89622-1-ebiggers@kernel.org --- arch/x86/kernel/cpu/cpuid-deps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 8bd84114c2d96..df838e3bdbe02 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -45,6 +45,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, + { X86_FEATURE_AVX_VNNI, X86_FEATURE_AVX }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, { X86_FEATURE_VAES, X86_FEATURE_AVX }, { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, -- GitLab From dc0a241ceaf3b7df6f1a7658b020c92682b75bfc Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Wed, 19 Feb 2025 15:53:26 -0500 Subject: [PATCH 0937/1585] rseq: Fix rseq registration with CONFIG_DEBUG_RSEQ With CONFIG_DEBUG_RSEQ=y, at rseq registration the read-only fields are copied from user-space, if this copy fails the syscall returns -EFAULT and the registration should not be activated - but it erroneously is. Move the activation of the registration after the copy of the fields to fix this bug. Fixes: 7d5265ffcd8b ("rseq: Validate read-only fields under DEBUG_RSEQ config") Signed-off-by: Michael Jeanson Signed-off-by: Ingo Molnar Reviewed-by: Mathieu Desnoyers Link: https://lore.kernel.org/r/20250219205330.324770-1-mjeanson@efficios.com --- kernel/rseq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/rseq.c b/kernel/rseq.c index 442aba29bc4cf..2cb16091ec0ae 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -507,9 +507,6 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, return -EINVAL; if (!access_ok(rseq, rseq_len)) return -EFAULT; - current->rseq = rseq; - current->rseq_len = rseq_len; - current->rseq_sig = sig; #ifdef CONFIG_DEBUG_RSEQ /* * Initialize the in-kernel rseq fields copy for validation of @@ -521,6 +518,14 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid)) return -EFAULT; #endif + /* + * Activate the registration by setting the rseq area address, length + * and signature in the task struct. + */ + current->rseq = rseq; + current->rseq_len = rseq_len; + current->rseq_sig = sig; + /* * If rseq was previously inactive, and has just been * registered, ensure the cpu_id_start and cpu_id fields -- GitLab From c9876cdb3ac4dcdf3c710ff02094165982e2a557 Mon Sep 17 00:00:00 2001 From: Brian Ochoa Date: Wed, 19 Feb 2025 10:09:20 -0500 Subject: [PATCH 0938/1585] docs: arch/x86/sva: Fix two grammar errors under Background and FAQ - Correct "in order" to "in order to" - Append missing quantifier Signed-off-by: Brian Ochoa Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250219150920.445802-1-brianeochoa@gmail.com --- Documentation/arch/x86/sva.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/x86/sva.rst b/Documentation/arch/x86/sva.rst index 33cb050059820..6a759984d4712 100644 --- a/Documentation/arch/x86/sva.rst +++ b/Documentation/arch/x86/sva.rst @@ -25,7 +25,7 @@ to cache translations for virtual addresses. The IOMMU driver uses the mmu_notifier() support to keep the device TLB cache and the CPU cache in sync. When an ATS lookup fails for a virtual address, the device should use the PRI in order to request the virtual address to be paged into the -CPU page tables. The device must use ATS again in order the fetch the +CPU page tables. The device must use ATS again in order to fetch the translation before use. Shared Hardware Workqueues @@ -216,7 +216,7 @@ submitting work and processing completions. Single Root I/O Virtualization (SR-IOV) focuses on providing independent hardware interfaces for virtualizing hardware. Hence, it's required to be -almost fully functional interface to software supporting the traditional +an almost fully functional interface to software supporting the traditional BARs, space for interrupts via MSI-X, its own register layout. Virtual Functions (VFs) are assisted by the Physical Function (PF) driver. -- GitLab From 38b14061947fa546491656e3f5e388d4fedf8dba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:10 -0500 Subject: [PATCH 0939/1585] ftrace: Fix accounting of adding subops to a manager ops Function graph uses a subops and manager ops mechanism to attach to ftrace. The manager ops connects to ftrace and the functions it connects to is defined by a list of subops that it manages. The function hash that defines what the above ops attaches to limits the functions to attach if the hash has any content. If the hash is empty, it means to trace all functions. The creation of the manager ops hash is done by iterating over all the subops hashes. If any of the subops hashes is empty, it means that the manager ops hash must trace all functions as well. The issue is in the creation of the manager ops. When a second subops is attached, a new hash is created by starting it as NULL and adding the subops one at a time. But the NULL ops is mistaken as an empty hash, and once an empty hash is found, it stops the loop of subops and just enables all functions. # echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kernel_clone (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 # echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions trace_initcall_start_cb (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 try_to_run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 cleanup_rapl_pmus (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 uncore_free_pcibus_map (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 uncore_types_exit (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 kvm_shutdown (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 vmx_dump_msrs (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 vmx_cleanup_l1d_flush (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 [..] Fix this by initializing the new hash to NULL and if the hash is NULL do not treat it as an empty hash but instead allocate by copying the content of the first sub ops. Then on subsequent iterations, the new hash will not be NULL, but the content of the previous subops. If that first subops attached to all functions, then new hash may assume that the manager ops also needs to attach to all functions. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Heiko Carstens Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.060300046@goodmis.org Fixes: 5fccc7552ccbc ("ftrace: Add subops logic to allow one ops to manage many") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 728ecda6e8d4d..bec54dc272049 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3220,15 +3220,22 @@ static struct ftrace_hash *copy_hash(struct ftrace_hash *src) * The filter_hash updates uses just the append_hash() function * and the notrace_hash does not. */ -static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash) +static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash, + int size_bits) { struct ftrace_func_entry *entry; int size; int i; - /* An empty hash does everything */ - if (ftrace_hash_empty(*hash)) - return 0; + if (*hash) { + /* An empty hash does everything */ + if (ftrace_hash_empty(*hash)) + return 0; + } else { + *hash = alloc_ftrace_hash(size_bits); + if (!*hash) + return -ENOMEM; + } /* If new_hash has everything make hash have everything */ if (ftrace_hash_empty(new_hash)) { @@ -3292,16 +3299,18 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has /* Return a new hash that has a union of all @ops->filter_hash entries */ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) { - struct ftrace_hash *new_hash; + struct ftrace_hash *new_hash = NULL; struct ftrace_ops *subops; + int size_bits; int ret; - new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits); - if (!new_hash) - return NULL; + if (ops->func_hash->filter_hash) + size_bits = ops->func_hash->filter_hash->size_bits; + else + size_bits = FTRACE_HASH_DEFAULT_BITS; list_for_each_entry(subops, &ops->subop_list, list) { - ret = append_hash(&new_hash, subops->func_hash->filter_hash); + ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits); if (ret < 0) { free_ftrace_hash(new_hash); return NULL; @@ -3310,7 +3319,8 @@ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) if (ftrace_hash_empty(new_hash)) break; } - return new_hash; + /* Can't return NULL as that means this failed */ + return new_hash ? : EMPTY_HASH; } /* Make @ops trace evenything except what all its subops do not trace */ @@ -3505,7 +3515,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash); if (!filter_hash) return -ENOMEM; - ret = append_hash(&filter_hash, subops->func_hash->filter_hash); + ret = append_hash(&filter_hash, subops->func_hash->filter_hash, + size_bits); if (ret < 0) { free_ftrace_hash(filter_hash); return ret; -- GitLab From 8eb4b09e0bbd30981305643229fe7640ad41b667 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:11 -0500 Subject: [PATCH 0940/1585] ftrace: Do not add duplicate entries in subops manager ops Check if a function is already in the manager ops of a subops. A manager ops contains multiple subops, and if two or more subops are tracing the same function, the manager ops only needs a single entry in its hash. Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.226762894@goodmis.org Fixes: 4f554e955614f ("ftrace: Add ftrace_set_filter_ips function") Tested-by: Heiko Carstens Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bec54dc272049..6b0c25761ccb1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5718,6 +5718,9 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) return -ENOENT; free_hash_entry(hash, entry); return 0; + } else if (__ftrace_lookup_ip(hash, ip) != NULL) { + /* Already exists */ + return 0; } entry = add_hash_entry(hash, ip); -- GitLab From ded9140622358a154efb3a777025fa7f7ae2c2d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:12 -0500 Subject: [PATCH 0941/1585] fprobe: Always unregister fgraph function from ops When the last fprobe is removed, it calls unregister_ftrace_graph() to remove the graph_ops from function graph. The issue is when it does so, it calls return before removing the function from its graph ops via ftrace_set_filter_ips(). This leaves the last function lingering in the fprobe's fgraph ops and if a probe is added it also enables that last function (even though the callback will just drop it, it does add unneeded overhead to make that call). # echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 # echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 schedule_timeout (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 # > /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions # echo "f:myevent3 kmem_cache_free" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kmem_cache_free (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 schedule_timeout (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 The above enabled a fprobe on kernel_clone, and then on schedule_timeout. The content of the enabled_functions shows the functions that have a callback attached to them. The fprobe attached to those functions properly. Then the fprobes were cleared, and enabled_functions was empty after that. But after adding a fprobe on kmem_cache_free, the enabled_functions shows that the schedule_timeout was attached again. This is because it was still left in the fprobe ops that is used to tell function graph what functions it wants callbacks from. Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.393254452@goodmis.org Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer") Tested-by: Heiko Carstens Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fprobe.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 2560b312ad576..62e8f7d56602f 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -403,11 +403,9 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num) lockdep_assert_held(&fprobe_mutex); fprobe_graph_active--; - if (!fprobe_graph_active) { - /* Q: should we unregister it ? */ + /* Q: should we unregister it ? */ + if (!fprobe_graph_active) unregister_ftrace_graph(&fprobe_graph_ops); - return; - } ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); } -- GitLab From ca26554a1498bc905c4a39fb42d55d93f3ae8df2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:13 -0500 Subject: [PATCH 0942/1585] fprobe: Fix accounting of when to unregister from function graph When adding a new fprobe, it will update the function hash to the functions the fprobe is attached to and register with function graph to have it call the registered functions. The fprobe_graph_active variable keeps track of the number of fprobes that are using function graph. If two fprobes attach to the same function, it increments the fprobe_graph_active for each of them. But when they are removed, the first fprobe to be removed will see that the function it is attached to is also used by another fprobe and it will not remove that function from function_graph. The logic will skip decrementing the fprobe_graph_active variable. This causes the fprobe_graph_active variable to not go to zero when all fprobes are removed, and in doing so it does not unregister from function graph. As the fgraph ops hash will now be empty, and an empty filter hash means all functions are enabled, this triggers function graph to add a callback to the fprobe infrastructure for every function! # echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events # echo "f:myevent2 kernel_clone%return" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kernel_clone (1) tramp: 0xffffffffc0024000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 # > /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions trace_initcall_start_cb (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 try_to_run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 cleanup_rapl_pmus (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 uncore_free_pcibus_map (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 uncore_types_exit (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 kvm_shutdown (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 vmx_dump_msrs (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 [..] # cat /sys/kernel/tracing/enabled_functions | wc -l 54702 If a fprobe is being removed and all its functions are also traced by other fprobes, still decrement the fprobe_graph_active counter. Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.565129766@goodmis.org Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer") Closes: https://lore.kernel.org/all/20250217114918.10397-A-hca@linux.ibm.com/ Reported-by: Heiko Carstens Tested-by: Heiko Carstens Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fprobe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 62e8f7d56602f..33082c4e8154e 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -407,7 +407,8 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num) if (!fprobe_graph_active) unregister_ftrace_graph(&fprobe_graph_ops); - ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); + if (num) + ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); } static int symbols_cmp(const void *a, const void *b) @@ -677,8 +678,7 @@ int unregister_fprobe(struct fprobe *fp) } del_fprobe_hash(fp); - if (count) - fprobe_graph_remove_ips(addrs, count); + fprobe_graph_remove_ips(addrs, count); kfree_rcu(hlist_array, rcu); fp->hlist_array = NULL; -- GitLab From e85c5e9792b942381ad92ccd0ff745b6d408a91f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:14 -0500 Subject: [PATCH 0943/1585] selftests/ftrace: Update fprobe test to check enabled_functions file A few bugs were found in the fprobe accounting logic along with it using the function graph infrastructure. Update the fprobe selftest to catch those bugs in case they or something similar shows up in the future. The test now checks the enabled_functions file which shows all the functions attached to ftrace or fgraph. When enabling a fprobe, make sure that its corresponding function is also added to that file. Also add two more fprobes to enable to make sure that the fprobe logic works properly with multiple probes. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.733001756@goodmis.org Acked-by: Masami Hiramatsu (Google) Tested-by: Heiko Carstens Signed-off-by: Steven Rostedt (Google) --- .../test.d/dynevent/add_remove_fprobe.tc | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index dc25bcf4f9e2c..449f9d8be7462 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -7,12 +7,38 @@ echo 0 > events/enable echo > dynamic_events PLACE=$FUNCTION_FORK +PLACE2="kmem_cache_free" +PLACE3="schedule_timeout" echo "f:myevent1 $PLACE" >> dynamic_events + +# Make sure the event is attached and is the only one +grep -q $PLACE enabled_functions +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 1 ]; then + exit_fail +fi + echo "f:myevent2 $PLACE%return" >> dynamic_events +# It should till be the only attached function +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 1 ]; then + exit_fail +fi + +# add another event +echo "f:myevent3 $PLACE2" >> dynamic_events + +grep -q $PLACE2 enabled_functions +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 2 ]; then + exit_fail +fi + grep -q myevent1 dynamic_events grep -q myevent2 dynamic_events +grep -q myevent3 dynamic_events test -d events/fprobes/myevent1 test -d events/fprobes/myevent2 @@ -21,6 +47,34 @@ echo "-:myevent2" >> dynamic_events grep -q myevent1 dynamic_events ! grep -q myevent2 dynamic_events +# should still have 2 left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 2 ]; then + exit_fail +fi + echo > dynamic_events +# Should have none left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 0 ]; then + exit_fail +fi + +echo "f:myevent4 $PLACE" >> dynamic_events + +# Should only have one enabled +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 1 ]; then + exit_fail +fi + +echo > dynamic_events + +# Should have none left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 0 ]; then + exit_fail +fi + clear_trace -- GitLab From 57b76bedc5c52c66968183b5ef57234894c25ce7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Feb 2025 15:07:49 +0100 Subject: [PATCH 0944/1585] ftrace: Correct preemption accounting for function tracing. The function tracer should record the preemption level at the point when the function is invoked. If the tracing subsystem decrement the preemption counter it needs to correct this before feeding the data into the trace buffer. This was broken in the commit cited below while shifting the preempt-disabled section. Use tracing_gen_ctx_dec() which properly subtracts one from the preemption counter on a preemptible kernel. Cc: stable@vger.kernel.org Cc: Wander Lairson Costa Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Thomas Gleixner Link: https://lore.kernel.org/20250220140749.pfw8qoNZ@linutronix.de Fixes: ce5e48036c9e7 ("ftrace: disable preemption when recursion locked") Signed-off-by: Sebastian Andrzej Siewior Tested-by: Wander Lairson Costa Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index d358c9935164d..df56f9b760109 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -216,7 +216,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, parent_ip = function_get_true_parent_ip(parent_ip, fregs); - trace_ctx = tracing_gen_ctx(); + trace_ctx = tracing_gen_ctx_dec(); data = this_cpu_ptr(tr->array_buffer.data); if (!atomic_read(&data->disabled)) @@ -321,7 +321,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = op->private; struct trace_array_cpu *data; unsigned int trace_ctx; - unsigned long flags; int bit; if (unlikely(!tr->function_enabled)) @@ -347,8 +346,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, if (is_repeat_check(tr, last_info, ip, parent_ip)) goto out; - local_save_flags(flags); - trace_ctx = tracing_gen_ctx_flags(flags); + trace_ctx = tracing_gen_ctx_dec(); process_repeats(tr, ip, parent_ip, last_info, trace_ctx); trace_function(tr, ip, parent_ip, trace_ctx); -- GitLab From 2fa6a01345b538faa7b0fae8f723bb6977312428 Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Thu, 20 Feb 2025 11:15:28 +0800 Subject: [PATCH 0945/1585] tracing: Fix memory leak when reading set_event file kmemleak reports the following memory leak after reading set_event file: # cat /sys/kernel/tracing/set_event # cat /sys/kernel/debug/kmemleak unreferenced object 0xff110001234449e0 (size 16): comm "cat", pid 13645, jiffies 4294981880 hex dump (first 16 bytes): 01 00 00 00 00 00 00 00 a8 71 e7 84 ff ff ff ff .........q...... backtrace (crc c43abbc): __kmalloc_cache_noprof+0x3ca/0x4b0 s_start+0x72/0x2d0 seq_read_iter+0x265/0x1080 seq_read+0x2c9/0x420 vfs_read+0x166/0xc30 ksys_read+0xf4/0x1d0 do_syscall_64+0x79/0x150 entry_SYSCALL_64_after_hwframe+0x76/0x7e The issue can be reproduced regardless of whether set_event is empty or not. Here is an example about the valid content of set_event. # cat /sys/kernel/tracing/set_event sched:sched_process_fork sched:sched_switch sched:sched_wakeup *:*:mod:trace_events_sample The root cause is that s_next() returns NULL when nothing is found. This results in s_stop() attempting to free a NULL pointer because its parameter is NULL. Fix the issue by freeing the memory appropriately when s_next() fails to find anything. Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250220031528.7373-1-ahuang12@lenovo.com Fixes: b355247df104 ("tracing: Cache ":mod:" events for modules not loaded yet") Signed-off-by: Adrian Huang Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4cb275316e51e..513de9ceb80ef 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1591,6 +1591,13 @@ s_next(struct seq_file *m, void *v, loff_t *pos) return iter; #endif + /* + * The iter is allocated in s_start() and passed via the 'v' + * parameter. To stop the iterator, NULL must be returned. But + * the return value is what the 'v' parameter in s_stop() receives + * and frees. Free iter here as it will no longer be used. + */ + kfree(iter); return NULL; } @@ -1667,9 +1674,9 @@ static int s_show(struct seq_file *m, void *v) } #endif -static void s_stop(struct seq_file *m, void *p) +static void s_stop(struct seq_file *m, void *v) { - kfree(p); + kfree(v); t_stop(m, NULL); } -- GitLab From 4ecaa75771a75f2b78a431bf67dea165d19d72a6 Mon Sep 17 00:00:00 2001 From: Yu-Che Cheng Date: Wed, 19 Feb 2025 15:07:48 +0800 Subject: [PATCH 0946/1585] thermal: gov_power_allocator: Fix incorrect calculation in divvy_up_power() divvy_up_power() should use weighted_req_power instead of req_power to calculate granted_power. Otherwise, granted_power may be unexpected as the denominator total_req_power is a weighted sum. This is a mistake made during the previous refactor. Replace req_power with weighted_req_power in divvy_up_power() calculation. Fixes: 912e97c67cc3 ("thermal: gov_power_allocator: Move memory allocation out of throttle()") Signed-off-by: Yu-Che Cheng Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/20250219-fix-power-allocator-calc-v1-1-48b860291919@chromium.org [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/gov_power_allocator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 3b644de3292e2..3b626db55b2b9 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -370,7 +370,7 @@ static void divvy_up_power(struct power_actor *power, int num_actors, for (i = 0; i < num_actors; i++) { struct power_actor *pa = &power[i]; - u64 req_range = (u64)pa->req_power * power_range; + u64 req_range = (u64)pa->weighted_req_power * power_range; pa->granted_power = DIV_ROUND_CLOSEST_ULL(req_range, total_req_power); -- GitLab From 7241c886a71797cc51efc6fadec7076fcf6435c2 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 9 Feb 2025 15:52:52 -0800 Subject: [PATCH 0947/1585] fbdev: hyperv_fb: iounmap() the correct memory when removing a device When a Hyper-V framebuffer device is removed, or the driver is unbound from a device, any allocated and/or mapped memory must be released. In particular, MMIO address space that was mapped to the framebuffer must be unmapped. Current code unmaps the wrong address, resulting in an error like: [ 4093.980597] iounmap: bad address 00000000c936c05c followed by a stack dump. Commit d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") changed the kind of address stored in info->screen_base, and the iounmap() call in hvfb_putmem() was not updated accordingly. Fix this by updating hvfb_putmem() to unmap the correct address. Fixes: d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") Signed-off-by: Michael Kelley Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20250209235252.2987-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250209235252.2987-1-mhklinux@outlook.com> --- drivers/video/fbdev/hyperv_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 7fdb5edd7e2e8..363e4ccfcdb77 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1080,7 +1080,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) if (par->need_docopy) { vfree(par->dio_vp); - iounmap(info->screen_base); + iounmap(par->mmio_vp); vmbus_free_mmio(par->mem->start, screen_fb_size); } else { hvfb_release_phymem(hdev, info->fix.smem_start, -- GitLab From c34d999ca3145d9fe858258cc3342ec493f47d2e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 18 Feb 2025 19:22:44 +0000 Subject: [PATCH 0948/1585] rxrpc: rxperf: Fix missing decoding of terminal magic cookie The rxperf RPCs seem to have a magic cookie at the end of the request that was failing to be taken account of by the unmarshalling of the request. Fix the rxperf code to expect this. Fixes: 75bfdbf2fca3 ("rxrpc: Implement an in-kernel rxperf server for testing purposes") Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250218192250.296870-2-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/rxperf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c index 7ef93407be830..e848a4777b8c7 100644 --- a/net/rxrpc/rxperf.c +++ b/net/rxrpc/rxperf.c @@ -478,6 +478,18 @@ static int rxperf_deliver_request(struct rxperf_call *call) call->unmarshal++; fallthrough; case 2: + ret = rxperf_extract_data(call, true); + if (ret < 0) + return ret; + + /* Deal with the terminal magic cookie. */ + call->iov_len = 4; + call->kvec[0].iov_len = call->iov_len; + call->kvec[0].iov_base = call->tmp; + iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len); + call->unmarshal++; + fallthrough; + case 3: ret = rxperf_extract_data(call, false); if (ret < 0) return ret; -- GitLab From 833fefa074444b1e7f7e834cbdce59ce02562ed0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 18 Feb 2025 19:22:45 +0000 Subject: [PATCH 0949/1585] rxrpc: peer->mtu_lock is redundant The peer->mtu_lock is only used to lock around writes to peer->max_data - and nothing else; further, all such writes take place in the I/O thread and the lock is only ever write-locked and never read-locked. In a couple of places, the write_seqcount_begin() is wrapped in preempt_disable/enable(), but not in all places. This can cause lockdep to complain: WARNING: CPU: 0 PID: 1549 at include/linux/seqlock.h:221 rxrpc_input_ack_trailer+0x305/0x430 ... RIP: 0010:rxrpc_input_ack_trailer+0x305/0x430 Fix this by just getting rid of the lock. Fixes: eeaedc5449d9 ("rxrpc: Implement path-MTU probing using padded PING ACKs (RFC8899)") Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250218192250.296870-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 1 - net/rxrpc/input.c | 2 -- net/rxrpc/peer_event.c | 9 +-------- net/rxrpc/peer_object.c | 1 - 4 files changed, 1 insertion(+), 12 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5e740c4862034..a64a0cab1bf7f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -360,7 +360,6 @@ struct rxrpc_peer { u8 pmtud_jumbo; /* Max jumbo packets for the MTU */ bool ackr_adv_pmtud; /* T if the peer advertises path-MTU */ unsigned int ackr_max_data; /* Maximum data advertised by peer */ - seqcount_t mtu_lock; /* Lockless MTU access management */ unsigned int if_mtu; /* Local interface MTU (- hdrsize) for this peer */ unsigned int max_data; /* Maximum packet data capacity for this peer */ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9047ba13bd31e..24aceb183c2c3 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -810,9 +810,7 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb if (max_mtu < peer->max_data) { trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_mtu, rxrpc_pmtud_reduce_ack); - write_seqcount_begin(&peer->mtu_lock); peer->max_data = max_mtu; - write_seqcount_end(&peer->mtu_lock); } max_data = umin(max_mtu, peer->max_data); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index bc283da9ee402..7f4729234957e 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -130,9 +130,7 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) peer->pmtud_bad = max_data + 1; trace_rxrpc_pmtud_reduce(peer, 0, max_data, rxrpc_pmtud_reduce_icmp); - write_seqcount_begin(&peer->mtu_lock); peer->max_data = max_data; - write_seqcount_end(&peer->mtu_lock); } } @@ -408,13 +406,8 @@ void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t a } max_data = umin(max_data, peer->ackr_max_data); - if (max_data != peer->max_data) { - preempt_disable(); - write_seqcount_begin(&peer->mtu_lock); + if (max_data != peer->max_data) peer->max_data = max_data; - write_seqcount_end(&peer->mtu_lock); - preempt_enable(); - } jumbo = max_data + sizeof(struct rxrpc_jumbo_header); jumbo /= RXRPC_JUMBO_SUBPKTLEN; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 0fcc87f0409f9..2ddc8ed687429 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -235,7 +235,6 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); - seqcount_init(&peer->mtu_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); peer->recent_srtt_us = UINT_MAX; peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW; -- GitLab From 71f5409176f4ffd460689eb5423a20332d00e342 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 18 Feb 2025 19:22:46 +0000 Subject: [PATCH 0950/1585] rxrpc: Fix locking issues with the peer record hash rxrpc_new_incoming_peer() can't use spin_lock_bh() whilst its caller has interrupts disabled. WARNING: CPU: 0 PID: 1550 at kernel/softirq.c:369 __local_bh_enable_ip+0x46/0xd0 ... Call Trace: rxrpc_alloc_incoming_call+0x1b0/0x400 rxrpc_new_incoming_call+0x1dd/0x5e0 rxrpc_input_packet+0x84a/0x920 rxrpc_io_thread+0x40d/0xb40 kthread+0x2ec/0x300 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 irq event stamp: 1811 hardirqs last enabled at (1809): _raw_spin_unlock_irq+0x24/0x50 hardirqs last disabled at (1810): _raw_read_lock_irq+0x17/0x70 softirqs last enabled at (1182): handle_softirqs+0x3ee/0x430 softirqs last disabled at (1811): rxrpc_new_incoming_peer+0x56/0x120 Fix this by using a plain spin_lock() instead. IRQs are held, so softirqs can't happen. Fixes: a2ea9a907260 ("rxrpc: Use irq-disabling spinlocks between app and I/O thread") Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250218192250.296870-4-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/peer_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 2ddc8ed687429..56e09d161a97f 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -324,10 +324,10 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) hash_key = rxrpc_peer_hash_key(local, &peer->srx); rxrpc_init_peer(local, peer, hash_key); - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); } /* -- GitLab From add117e48df4788a86a21bd0515833c0a6db1ad1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 18 Feb 2025 19:22:47 +0000 Subject: [PATCH 0951/1585] afs: Fix the server_list to unuse a displaced server rather than putting it When allocating and building an afs_server_list struct object from a VLDB record, we look up each server address to get the server record for it - but a server may have more than one entry in the record and we discard the duplicate pointers. Currently, however, when we discard, we only put a server record, not unuse it - but the lookup got as an active-user count. The active-user count on an afs_server_list object determines its lifetime whereas the refcount keeps the memory backing it around. Failing to reduce the active-user counter prevents the record from being cleaned up and can lead to multiple copied being seen - and pointing to deleted afs_cell objects and other such things. Fix this by switching the incorrect 'put' to an 'unuse' instead. Without this, occasionally, a dead server record can be seen in /proc/net/afs/servers and list corruption may be observed: list_del corruption. prev->next should be ffff888102423e40, but was 0000000000000000. (prev=ffff88810140cd38) Fixes: 977e5f8ed0ab ("afs: Split the usage count on struct afs_server") Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250218192250.296870-5-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- fs/afs/server_list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 7e7e567a7f8a2..d20cd902ef949 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -97,8 +97,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume, break; if (j < slist->nr_servers) { if (slist->servers[j].server == server) { - afs_put_server(volume->cell->net, server, - afs_server_trace_put_slist_isort); + afs_unuse_server(volume->cell->net, server, + afs_server_trace_put_slist_isort); continue; } -- GitLab From 1f0fc3374f3345ff1d150c5c56ac5016e5d3826a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 18 Feb 2025 19:22:48 +0000 Subject: [PATCH 0952/1585] afs: Give an afs_server object a ref on the afs_cell object it points to Give an afs_server object a ref on the afs_cell object it points to so that the cell doesn't get deleted before the server record. Whilst this is circular (cell -> vol -> server_list -> server -> cell), the ref only pins the memory, not the lifetime as that's controlled by the activity counter. When the volume's activity counter reaches 0, it detaches from the cell and discards its server list; when a cell's activity counter reaches 0, it discards its root volume. At that point, the circularity is cut. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250218192250.296870-6-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- fs/afs/server.c | 3 +++ include/trace/events/afs.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/fs/afs/server.c b/fs/afs/server.c index 038f9d0ae3af8..4504e16b458cc 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -163,6 +163,8 @@ static struct afs_server *afs_install_server(struct afs_cell *cell, rb_insert_color(&server->uuid_rb, &net->fs_servers); hlist_add_head_rcu(&server->proc_link, &net->fs_proc); + afs_get_cell(cell, afs_cell_trace_get_server); + added_dup: write_seqlock(&net->fs_addr_lock); estate = rcu_dereference_protected(server->endpoint_state, @@ -442,6 +444,7 @@ static void afs_server_rcu(struct rcu_head *rcu) atomic_read(&server->active), afs_server_trace_free); afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state), afs_estate_trace_put_server); + afs_put_cell(server->cell, afs_cell_trace_put_server); kfree(server); } diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index b0db89058c911..958a2460330c0 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -174,6 +174,7 @@ enum yfs_cm_operation { EM(afs_cell_trace_get_queue_dns, "GET q-dns ") \ EM(afs_cell_trace_get_queue_manage, "GET q-mng ") \ EM(afs_cell_trace_get_queue_new, "GET q-new ") \ + EM(afs_cell_trace_get_server, "GET server") \ EM(afs_cell_trace_get_vol, "GET vol ") \ EM(afs_cell_trace_insert, "INSERT ") \ EM(afs_cell_trace_manage, "MANAGE ") \ @@ -182,6 +183,7 @@ enum yfs_cm_operation { EM(afs_cell_trace_put_destroy, "PUT destry") \ EM(afs_cell_trace_put_queue_work, "PUT q-work") \ EM(afs_cell_trace_put_queue_fail, "PUT q-fail") \ + EM(afs_cell_trace_put_server, "PUT server") \ EM(afs_cell_trace_put_vol, "PUT vol ") \ EM(afs_cell_trace_see_source, "SEE source") \ EM(afs_cell_trace_see_ws, "SEE ws ") \ -- GitLab From 5c70eb5c593d64d93b178905da215a9fd288a4b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Feb 2025 13:18:54 +0000 Subject: [PATCH 0953/1585] net: better track kernel sockets lifetime While kernel sockets are dismantled during pernet_operations->exit(), their freeing can be delayed by any tx packets still held in qdisc or device queues, due to skb_set_owner_w() prior calls. This then trigger the following warning from ref_tracker_dir_exit() [1] To fix this, make sure that kernel sockets own a reference on net->passive. Add sk_net_refcnt_upgrade() helper, used whenever a kernel socket is converted to a refcounted one. [1] [ 136.263918][ T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at [ 136.263918][ T35] sk_alloc+0x2b3/0x370 [ 136.263918][ T35] inet6_create+0x6ce/0x10f0 [ 136.263918][ T35] __sock_create+0x4c0/0xa30 [ 136.263918][ T35] inet_ctl_sock_create+0xc2/0x250 [ 136.263918][ T35] igmp6_net_init+0x39/0x390 [ 136.263918][ T35] ops_init+0x31e/0x590 [ 136.263918][ T35] setup_net+0x287/0x9e0 [ 136.263918][ T35] copy_net_ns+0x33f/0x570 [ 136.263918][ T35] create_new_namespaces+0x425/0x7b0 [ 136.263918][ T35] unshare_nsproxy_namespaces+0x124/0x180 [ 136.263918][ T35] ksys_unshare+0x57d/0xa70 [ 136.263918][ T35] __x64_sys_unshare+0x38/0x40 [ 136.263918][ T35] do_syscall_64+0xf3/0x230 [ 136.263918][ T35] entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 136.263918][ T35] [ 136.343488][ T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at [ 136.343488][ T35] sk_alloc+0x2b3/0x370 [ 136.343488][ T35] inet6_create+0x6ce/0x10f0 [ 136.343488][ T35] __sock_create+0x4c0/0xa30 [ 136.343488][ T35] inet_ctl_sock_create+0xc2/0x250 [ 136.343488][ T35] ndisc_net_init+0xa7/0x2b0 [ 136.343488][ T35] ops_init+0x31e/0x590 [ 136.343488][ T35] setup_net+0x287/0x9e0 [ 136.343488][ T35] copy_net_ns+0x33f/0x570 [ 136.343488][ T35] create_new_namespaces+0x425/0x7b0 [ 136.343488][ T35] unshare_nsproxy_namespaces+0x124/0x180 [ 136.343488][ T35] ksys_unshare+0x57d/0xa70 [ 136.343488][ T35] __x64_sys_unshare+0x38/0x40 [ 136.343488][ T35] do_syscall_64+0xf3/0x230 [ 136.343488][ T35] entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 0cafd77dcd03 ("net: add a refcount tracker for kernel sockets") Reported-by: syzbot+30a19e01a97420719891@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67b72aeb.050a0220.14d86d.0283.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250220131854.4048077-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 + net/core/sock.c | 27 ++++++++++++++++++++++----- net/mptcp/subflow.c | 5 +---- net/netlink/af_netlink.c | 10 ---------- net/rds/tcp.c | 8 ++------ net/smc/af_smc.c | 5 +---- net/sunrpc/svcsock.c | 5 +---- net/sunrpc/xprtsock.c | 8 ++------ 8 files changed, 30 insertions(+), 39 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 8036b3b79cd8b..7ef728324e4e7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1751,6 +1751,7 @@ static inline bool sock_allow_reclassification(const struct sock *csk) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void sk_free(struct sock *sk); +void sk_net_refcnt_upgrade(struct sock *sk); void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); void sk_free_unlock_clone(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index eae2ae70a2e03..6c0e87f97fa4a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2246,6 +2246,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, get_net_track(net, &sk->ns_tracker, priority); sock_inuse_add(net, 1); } else { + net_passive_inc(net); __netns_tracker_alloc(net, &sk->ns_tracker, false, priority); } @@ -2270,6 +2271,7 @@ EXPORT_SYMBOL(sk_alloc); static void __sk_destruct(struct rcu_head *head) { struct sock *sk = container_of(head, struct sock, sk_rcu); + struct net *net = sock_net(sk); struct sk_filter *filter; if (sk->sk_destruct) @@ -2301,14 +2303,28 @@ static void __sk_destruct(struct rcu_head *head) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); - if (likely(sk->sk_net_refcnt)) - put_net_track(sock_net(sk), &sk->ns_tracker); - else - __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); - + if (likely(sk->sk_net_refcnt)) { + put_net_track(net, &sk->ns_tracker); + } else { + __netns_tracker_free(net, &sk->ns_tracker, false); + net_passive_dec(net); + } sk_prot_free(sk->sk_prot_creator, sk); } +void sk_net_refcnt_upgrade(struct sock *sk) +{ + struct net *net = sock_net(sk); + + WARN_ON_ONCE(sk->sk_net_refcnt); + __netns_tracker_free(net, &sk->ns_tracker, false); + net_passive_dec(net); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); +} +EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade); + void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); @@ -2405,6 +2421,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * is not properly dismantling its kernel sockets at netns * destroy time. */ + net_passive_inc(sock_net(newsk)); __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, false, priority); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index fd021cf8286ef..dfcbef9c46246 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1772,10 +1772,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, * needs it. * Update ns_tracker to current stack trace and refcounted tracker. */ - __netns_tracker_free(net, &sf->sk->ns_tracker, false); - sf->sk->sk_net_refcnt = 1; - get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sf->sk); err = tcp_set_ulp(sf->sk, "mptcp"); if (err) goto err_free; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 85311226183a2..a53ea60d0a78d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -795,16 +795,6 @@ static int netlink_release(struct socket *sock) sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); - /* Because struct net might disappear soon, do not keep a pointer. */ - if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { - __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); - /* Because of deferred_put_nlk_sk and use of work queue, - * it is possible netns will be freed before this socket. - */ - sock_net_set(sk, &init_net); - __netns_tracker_alloc(&init_net, &sk->ns_tracker, - false, GFP_KERNEL); - } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 0581c53e65170..3cc2f303bf786 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -504,12 +504,8 @@ bool rds_tcp_tune(struct socket *sock) release_sock(sk); return false; } - /* Update ns_tracker to current stack trace and refcounted tracker */ - __netns_tracker_free(net, &sk->ns_tracker, false); - - sk->sk_net_refcnt = 1; - netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); + put_net(net); } rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ca6984541edbd..3e6cb35baf25a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3337,10 +3337,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family) * which need net ref. */ sk = smc->clcsock->sk; - __netns_tracker_free(net, &sk->ns_tracker, false); - sk->sk_net_refcnt = 1; - get_net_track(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); return 0; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cb3bd12f5818b..72e5a01df3d35 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1541,10 +1541,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, newlen = error; if (protocol == IPPROTO_TCP) { - __netns_tracker_free(net, &sock->sk->ns_tracker, false); - sock->sk->sk_net_refcnt = 1; - get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sock->sk); if ((error = kernel_listen(sock, 64)) < 0) goto bummer; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c60936d8cef71..940fe65b2a351 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1941,12 +1941,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, goto out; } - if (protocol == IPPROTO_TCP) { - __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false); - sock->sk->sk_net_refcnt = 1; - get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(xprt->xprt_net, 1); - } + if (protocol == IPPROTO_TCP) + sk_net_refcnt_upgrade(sock->sk); filp = sock_alloc_file(sock, O_NONBLOCK, NULL); if (IS_ERR(filp)) -- GitLab From 0e4427f8f587c4b603475468bb3aee9418574893 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 20 Feb 2025 09:25:59 +0200 Subject: [PATCH 0954/1585] net: loopback: Avoid sending IP packets without an Ethernet header After commit 22600596b675 ("ipv4: give an IPv4 dev to blackhole_netdev") IPv4 neighbors can be constructed on the blackhole net device, but they are constructed with an output function (neigh_direct_output()) that simply calls dev_queue_xmit(). The latter will transmit packets via 'skb->dev' which might not be the blackhole net device if dst_dev_put() switched 'dst->dev' to the blackhole net device while another CPU was using the dst entry in ip_output(), but after it already initialized 'skb->dev' from 'dst->dev'. Specifically, the following can happen: CPU1 CPU2 udp_sendmsg(sk1) udp_sendmsg(sk2) udp_send_skb() [...] ip_output() skb->dev = skb_dst(skb)->dev dst_dev_put() dst->dev = blackhole_netdev ip_finish_output2() resolves neigh on dst->dev neigh_output() neigh_direct_output() dev_queue_xmit() This will result in IPv4 packets being sent without an Ethernet header via a valid net device: tcpdump: verbose output suppressed, use -v[v]... for full protocol decode listening on enp9s0, link-type EN10MB (Ethernet), snapshot length 262144 bytes 22:07:02.329668 20:00:40:11:18:fb > 45:00:00:44:f4:94, ethertype Unknown (0x58c6), length 68: 0x0000: 8dda 74ca f1ae ca6c ca6c 0098 969c 0400 ..t....l.l...... 0x0010: 0000 4730 3f18 6800 0000 0000 0000 9971 ..G0?.h........q 0x0020: c4c9 9055 a157 0a70 9ead bf83 38ca ab38 ...U.W.p....8..8 0x0030: 8add ab96 e052 .....R Fix by making sure that neighbors are constructed on top of the blackhole net device with an output function that simply consumes the packets, in a similar fashion to dst_discard_out() and blackhole_netdev_xmit(). Fixes: 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries") Fixes: 22600596b675 ("ipv4: give an IPv4 dev to blackhole_netdev") Reported-by: Florian Meister Closes: https://lore.kernel.org/netdev/20250210084931.23a5c2e4@hermes.local/ Signed-off-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250220072559.782296-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/loopback.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index c8840c3b9a1bc..f1d68153987e1 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -244,8 +244,22 @@ static netdev_tx_t blackhole_netdev_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static int blackhole_neigh_output(struct neighbour *n, struct sk_buff *skb) +{ + kfree_skb(skb); + return 0; +} + +static int blackhole_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + n->output = blackhole_neigh_output; + return 0; +} + static const struct net_device_ops blackhole_netdev_ops = { .ndo_start_xmit = blackhole_netdev_xmit, + .ndo_neigh_construct = blackhole_neigh_construct, }; /* This is a dst-dummy device used specifically for invalidated -- GitLab From c180188ec02281126045414e90d08422a80f75b4 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 20 Feb 2025 12:07:52 +0100 Subject: [PATCH 0955/1585] net: set the minimum for net_hotdata.netdev_budget_usecs Commit 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning") added a possibility to set net_hotdata.netdev_budget_usecs, but added no lower bound checking. Commit a4837980fd9f ("net: revert default NAPI poll timeout to 2 jiffies") made the *initial* value HZ-dependent, so the initial value is at least 2 jiffies even for lower HZ values (2 ms for 1000 Hz, 8ms for 250 Hz, 20 ms for 100 Hz). But a user still can set improper values by a sysctl. Set .extra1 (the lower bound) for net_hotdata.netdev_budget_usecs to the same value as in the latter commit. That is to 2 jiffies. Fixes: a4837980fd9f ("net: revert default NAPI poll timeout to 2 jiffies") Fixes: 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning") Signed-off-by: Jiri Slaby (SUSE) Cc: Dmitry Yakunin Cc: Konstantin Khlebnikov Link: https://patch.msgid.link/20250220110752.137639-1-jirislaby@kernel.org Signed-off-by: Jakub Kicinski --- net/core/sysctl_net_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index ad2741f1346af..c7769ee0d9c55 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -34,6 +34,7 @@ static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; +static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -587,7 +588,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .extra1 = &netdev_budget_usecs_min, }, { .procname = "fb_tunnels_only_for_init_net", -- GitLab From 27843ce6ba3d3122b65066550fe33fb8839f8aef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Feb 2025 15:53:36 +0000 Subject: [PATCH 0956/1585] ipvlan: ensure network headers are in skb linear part syzbot found that ipvlan_process_v6_outbound() was assuming the IPv6 network header isis present in skb->head [1] Add the needed pskb_network_may_pull() calls for both IPv4 and IPv6 handlers. [1] BUG: KMSAN: uninit-value in __ipv6_addr_type+0xa2/0x490 net/ipv6/addrconf_core.c:47 __ipv6_addr_type+0xa2/0x490 net/ipv6/addrconf_core.c:47 ipv6_addr_type include/net/ipv6.h:555 [inline] ip6_route_output_flags_noref net/ipv6/route.c:2616 [inline] ip6_route_output_flags+0x51/0x720 net/ipv6/route.c:2651 ip6_route_output include/net/ip6_route.h:93 [inline] ipvlan_route_v6_outbound+0x24e/0x520 drivers/net/ipvlan/ipvlan_core.c:476 ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:491 [inline] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:541 [inline] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:605 [inline] ipvlan_queue_xmit+0xd72/0x1780 drivers/net/ipvlan/ipvlan_core.c:671 ipvlan_start_xmit+0x5b/0x210 drivers/net/ipvlan/ipvlan_main.c:223 __netdev_start_xmit include/linux/netdevice.h:5150 [inline] netdev_start_xmit include/linux/netdevice.h:5159 [inline] xmit_one net/core/dev.c:3735 [inline] dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3751 sch_direct_xmit+0x399/0xd40 net/sched/sch_generic.c:343 qdisc_restart net/sched/sch_generic.c:408 [inline] __qdisc_run+0x14da/0x35d0 net/sched/sch_generic.c:416 qdisc_run+0x141/0x4d0 include/net/pkt_sched.h:127 net_tx_action+0x78b/0x940 net/core/dev.c:5484 handle_softirqs+0x1a0/0x7c0 kernel/softirq.c:561 __do_softirq+0x14/0x1a kernel/softirq.c:595 do_softirq+0x9a/0x100 kernel/softirq.c:462 __local_bh_enable_ip+0x9f/0xb0 kernel/softirq.c:389 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:919 [inline] __dev_queue_xmit+0x2758/0x57d0 net/core/dev.c:4611 dev_queue_xmit include/linux/netdevice.h:3311 [inline] packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3132 [inline] packet_sendmsg+0x93e0/0xa7e0 net/packet/af_packet.c:3164 sock_sendmsg_nosec net/socket.c:718 [inline] Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Reported-by: syzbot+93ab4a777bafb9d9f960@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67b74f01.050a0220.14d86d.02d8.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Mahesh Bandewar Link: https://patch.msgid.link/20250220155336.61884-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvlan_core.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index fd591ddb3884d..ca62188a317ad 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -416,20 +416,25 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) { - const struct iphdr *ip4h = ip_hdr(skb); struct net_device *dev = skb->dev; struct net *net = dev_net(dev); - struct rtable *rt; int err, ret = NET_XMIT_DROP; + const struct iphdr *ip4h; + struct rtable *rt; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, - .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)), .flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_mark = skb->mark, - .daddr = ip4h->daddr, - .saddr = ip4h->saddr, }; + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + goto err; + + ip4h = ip_hdr(skb); + fl4.daddr = ip4h->daddr; + fl4.saddr = ip4h->saddr; + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)); + rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; @@ -488,6 +493,12 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) struct net_device *dev = skb->dev; int err, ret = NET_XMIT_DROP; + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { + DEV_STATS_INC(dev, tx_errors); + kfree_skb(skb); + return ret; + } + err = ipvlan_route_v6_outbound(dev, skb); if (unlikely(err)) { DEV_STATS_INC(dev, tx_errors); -- GitLab From fa52f15c745ce55261b92873676f64f7348cfe82 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 20 Feb 2025 11:29:50 -0500 Subject: [PATCH 0957/1585] net: cadence: macb: Synchronize stats calculations Stats calculations involve a RMW to add the stat update to the existing value. This is currently not protected by any synchronization mechanism, so data races are possible. Add a spinlock to protect the update. The reader side could be protected using u64_stats, but we would still need a spinlock for the update side anyway. And we always do an update immediately before reading the stats anyway. Fixes: 89e5785fc8a6 ("[PATCH] Atmel MACB ethernet driver") Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20250220162950.95941-1-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 2 ++ drivers/net/ethernet/cadence/macb_main.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 5740c98d8c9f0..2847278d9cd48 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1279,6 +1279,8 @@ struct macb { struct clk *rx_clk; struct clk *tsu_clk; struct net_device *dev; + /* Protects hw_stats and ethtool_stats */ + spinlock_t stats_lock; union { struct macb_stats macb; struct gem_stats gem; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 48496209fb164..c1f57d96e63fc 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1978,10 +1978,12 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) if (status & MACB_BIT(ISR_ROVR)) { /* We missed at least one packet */ + spin_lock(&bp->stats_lock); if (macb_is_gem(bp)) bp->hw_stats.gem.rx_overruns++; else bp->hw_stats.macb.rx_overruns++; + spin_unlock(&bp->stats_lock); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(ISR_ROVR)); @@ -3102,6 +3104,7 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) if (!netif_running(bp->dev)) return nstat; + spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors + @@ -3131,6 +3134,7 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) nstat->tx_aborted_errors = hwstat->tx_excessive_collisions; nstat->tx_carrier_errors = hwstat->tx_carrier_sense_errors; nstat->tx_fifo_errors = hwstat->tx_underrun; + spin_unlock_irq(&bp->stats_lock); return nstat; } @@ -3138,12 +3142,13 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) static void gem_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { - struct macb *bp; + struct macb *bp = netdev_priv(dev); - bp = netdev_priv(dev); + spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); memcpy(data, &bp->ethtool_stats, sizeof(u64) * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES)); + spin_unlock_irq(&bp->stats_lock); } static int gem_get_sset_count(struct net_device *dev, int sset) @@ -3193,6 +3198,7 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev) return gem_get_stats(bp); /* read stats from hardware */ + spin_lock_irq(&bp->stats_lock); macb_update_stats(bp); /* Convert HW stats into netdevice stats */ @@ -3226,6 +3232,7 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev) nstat->tx_carrier_errors = hwstat->tx_carrier_errors; nstat->tx_fifo_errors = hwstat->tx_underruns; /* Don't know about heartbeat or window errors... */ + spin_unlock_irq(&bp->stats_lock); return nstat; } @@ -5097,6 +5104,7 @@ static int macb_probe(struct platform_device *pdev) } } spin_lock_init(&bp->lock); + spin_lock_init(&bp->stats_lock); /* setup capabilities */ macb_configure_caps(bp, macb_config); -- GitLab From 28b04731a38c80092f47437af6c2770765e0b99f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 20 Feb 2025 16:50:12 -0800 Subject: [PATCH 0958/1585] MAINTAINERS: fix DWMAC S32 entry Using L: with more than a bare email address causes getmaintainer.pl to be unable to parse the entry. Fix this by doing as other entries that use this email address and convert it to an R: entry. Link: https://patch.msgid.link/20250221005012.1051897-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3864d473f52f2..ac15093537c6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2877,7 +2877,7 @@ F: drivers/pinctrl/nxp/ ARM/NXP S32G/S32R DWMAC ETHERNET DRIVER M: Jan Petrous -L: NXP S32 Linux Team +R: s32@nxp.com S: Maintained F: Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c -- GitLab From 781813db7909d945c33d3b035822225f3598774d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Feb 2025 16:12:12 +0100 Subject: [PATCH 0959/1585] i2c: core: Allocate temporary client dynamically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/i2c/i2c-core-base.c: In function ‘i2c_detect.isra’: drivers/i2c/i2c-core-base.c:2544:1: warning: the frame size of 1312 bytes is larger than 1024 bytes [-Wframe-larger-than=] 2544 | } | ^ Fix this by allocating the temporary client structure dynamically, as it is a rather large structure (1216 bytes, depending on kernel config). This is basically a revert of the to-be-fixed commit with some checkpatch improvements. Fixes: 735668f8e5c9 ("i2c: core: Allocate temp client on the stack in i2c_detect") Signed-off-by: Geert Uytterhoeven Reviewed-by: Su Hui Reviewed-by: Guenter Roeck [wsa: updated commit message, merged tags from similar patch] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 35a221e2c11c1..7ad1ad5c8c3f5 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2506,7 +2506,7 @@ static int i2c_detect_address(struct i2c_client *temp_client, static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver) { const unsigned short *address_list; - struct i2c_client temp_client; + struct i2c_client *temp_client; int i, err = 0; address_list = driver->address_list; @@ -2527,19 +2527,24 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver) return 0; /* Set up a temporary client to help detect callback */ - memset(&temp_client, 0, sizeof(temp_client)); - temp_client.adapter = adapter; + temp_client = kzalloc(sizeof(*temp_client), GFP_KERNEL); + if (!temp_client) + return -ENOMEM; + + temp_client->adapter = adapter; for (i = 0; address_list[i] != I2C_CLIENT_END; i += 1) { dev_dbg(&adapter->dev, "found normal entry for adapter %d, addr 0x%02x\n", i2c_adapter_id(adapter), address_list[i]); - temp_client.addr = address_list[i]; - err = i2c_detect_address(&temp_client, driver); + temp_client->addr = address_list[i]; + err = i2c_detect_address(temp_client, driver); if (unlikely(err)) break; } + kfree(temp_client); + return err; } -- GitLab From 50cef76d5cb0e199cda19f026842560f6eedc4f7 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Jan 2025 14:44:53 +0100 Subject: [PATCH 0960/1585] x86/microcode/AMD: Load only SHA256-checksummed patches Load patches for which the driver carries a SHA256 checksum of the patch blob. This can be disabled by adding "microcode.amd_sha_check=off" on the kernel cmdline. But it is highly NOT recommended. Signed-off-by: Borislav Petkov (AMD) --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/microcode/amd.c | 111 +++++- arch/x86/kernel/cpu/microcode/amd_shas.c | 444 +++++++++++++++++++++++ 3 files changed, 554 insertions(+), 2 deletions(-) create mode 100644 arch/x86/kernel/cpu/microcode/amd_shas.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be2c311f5118d..0e27ebd7e36a9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1341,6 +1341,7 @@ config X86_REBOOTFIXUPS config MICROCODE def_bool y depends on CPU_SUP_AMD || CPU_SUP_INTEL + select CRYPTO_LIB_SHA256 if CPU_SUP_AMD config MICROCODE_INITRD32 def_bool y diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 31f90e129b083..95ac1c6a84fbe 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -23,14 +23,18 @@ #include #include +#include #include #include #include #include #include +#include + #include #include +#include #include #include #include @@ -145,6 +149,98 @@ ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; */ static u32 bsp_cpuid_1_eax __ro_after_init; +static bool sha_check = true; + +struct patch_digest { + u32 patch_id; + u8 sha256[SHA256_DIGEST_SIZE]; +}; + +#include "amd_shas.c" + +static int cmp_id(const void *key, const void *elem) +{ + struct patch_digest *pd = (struct patch_digest *)elem; + u32 patch_id = *(u32 *)key; + + if (patch_id == pd->patch_id) + return 0; + else if (patch_id < pd->patch_id) + return -1; + else + return 1; +} + +static bool need_sha_check(u32 cur_rev) +{ + switch (cur_rev >> 8) { + case 0x80012: return cur_rev <= 0x800126f; break; + case 0x83010: return cur_rev <= 0x830107c; break; + case 0x86001: return cur_rev <= 0x860010e; break; + case 0x86081: return cur_rev <= 0x8608108; break; + case 0x87010: return cur_rev <= 0x8701034; break; + case 0x8a000: return cur_rev <= 0x8a0000a; break; + case 0xa0011: return cur_rev <= 0xa0011da; break; + case 0xa0012: return cur_rev <= 0xa001243; break; + case 0xa1011: return cur_rev <= 0xa101153; break; + case 0xa1012: return cur_rev <= 0xa10124e; break; + case 0xa1081: return cur_rev <= 0xa108109; break; + case 0xa2010: return cur_rev <= 0xa20102f; break; + case 0xa2012: return cur_rev <= 0xa201212; break; + case 0xa6012: return cur_rev <= 0xa60120a; break; + case 0xa7041: return cur_rev <= 0xa704109; break; + case 0xa7052: return cur_rev <= 0xa705208; break; + case 0xa7080: return cur_rev <= 0xa708009; break; + case 0xa70c0: return cur_rev <= 0xa70C009; break; + case 0xaa002: return cur_rev <= 0xaa00218; break; + default: break; + } + + pr_info("You should not be seeing this. Please send the following couple of lines to x86--kernel.org\n"); + pr_info("CPUID(1).EAX: 0x%x, current revision: 0x%x\n", bsp_cpuid_1_eax, cur_rev); + return true; +} + +static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsigned int len) +{ + struct patch_digest *pd = NULL; + u8 digest[SHA256_DIGEST_SIZE]; + struct sha256_state s; + int i; + + if (x86_family(bsp_cpuid_1_eax) < 0x17 || + x86_family(bsp_cpuid_1_eax) > 0x19) + return true; + + if (!need_sha_check(cur_rev)) + return true; + + if (!sha_check) + return true; + + pd = bsearch(&patch_id, phashes, ARRAY_SIZE(phashes), sizeof(struct patch_digest), cmp_id); + if (!pd) { + pr_err("No sha256 digest for patch ID: 0x%x found\n", patch_id); + return false; + } + + sha256_init(&s); + sha256_update(&s, data, len); + sha256_final(&s, digest); + + if (memcmp(digest, pd->sha256, sizeof(digest))) { + pr_err("Patch 0x%x SHA256 digest mismatch!\n", patch_id); + + for (i = 0; i < SHA256_DIGEST_SIZE; i++) + pr_cont("0x%x ", digest[i]); + pr_info("\n"); + + return false; + } + + return true; +} + static u32 get_patch_level(void) { u32 rev, dummy __always_unused; @@ -497,6 +593,9 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev, { unsigned long p_addr = (unsigned long)&mc->hdr.data_code; + if (!verify_sha256_digest(mc->hdr.patch_id, *cur_rev, (const u8 *)p_addr, psize)) + return -1; + native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr); if (x86_family(bsp_cpuid_1_eax) == 0x17) { @@ -571,8 +670,17 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_ struct cont_desc desc = { }; struct microcode_amd *mc; struct cpio_data cp = { }; + char buf[4]; u32 rev; + if (cmdline_find_option(boot_command_line, "microcode.amd_sha_check", buf, 4)) { + if (!strncmp(buf, "off", 3)) { + sha_check = false; + pr_warn_once("It is a very very bad idea to disable the blobs SHA check!\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } + } + bsp_cpuid_1_eax = cpuid_1_eax; rev = get_patch_level(); @@ -902,8 +1010,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover, } /* Scan the blob in @data and add microcode patches to the cache. */ -static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, - size_t size) +static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, size_t size) { u8 *fw = (u8 *)data; size_t offset; diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c new file mode 100644 index 0000000000000..2a1655b1fdd88 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -0,0 +1,444 @@ +/* Keep 'em sorted. */ +static const struct patch_digest phashes[] = { + { 0x8001227, { + 0x99,0xc0,0x9b,0x2b,0xcc,0x9f,0x52,0x1b, + 0x1a,0x5f,0x1d,0x83,0xa1,0x6c,0xc4,0x46, + 0xe2,0x6c,0xda,0x73,0xfb,0x2d,0x23,0xa8, + 0x77,0xdc,0x15,0x31,0x33,0x4a,0x46,0x18, + } + }, + { 0x8001250, { + 0xc0,0x0b,0x6b,0x19,0xfd,0x5c,0x39,0x60, + 0xd5,0xc3,0x57,0x46,0x54,0xe4,0xd1,0xaa, + 0xa8,0xf7,0x1f,0xa8,0x6a,0x60,0x3e,0xe3, + 0x27,0x39,0x8e,0x53,0x30,0xf8,0x49,0x19, + } + }, + { 0x800126e, { + 0xf3,0x8b,0x2b,0xb6,0x34,0xe3,0xc8,0x2c, + 0xef,0xec,0x63,0x6d,0xc8,0x76,0x77,0xb3, + 0x25,0x5a,0xb7,0x52,0x8c,0x83,0x26,0xe6, + 0x4c,0xbe,0xbf,0xe9,0x7d,0x22,0x6a,0x43, + } + }, + { 0x800126f, { + 0x2b,0x5a,0xf2,0x9c,0xdd,0xd2,0x7f,0xec, + 0xec,0x96,0x09,0x57,0xb0,0x96,0x29,0x8b, + 0x2e,0x26,0x91,0xf0,0x49,0x33,0x42,0x18, + 0xdd,0x4b,0x65,0x5a,0xd4,0x15,0x3d,0x33, + } + }, + { 0x800820d, { + 0x68,0x98,0x83,0xcd,0x22,0x0d,0xdd,0x59, + 0x73,0x2c,0x5b,0x37,0x1f,0x84,0x0e,0x67, + 0x96,0x43,0x83,0x0c,0x46,0x44,0xab,0x7c, + 0x7b,0x65,0x9e,0x57,0xb5,0x90,0x4b,0x0e, + } + }, + { 0x8301025, { + 0xe4,0x7d,0xdb,0x1e,0x14,0xb4,0x5e,0x36, + 0x8f,0x3e,0x48,0x88,0x3c,0x6d,0x76,0xa1, + 0x59,0xc6,0xc0,0x72,0x42,0xdf,0x6c,0x30, + 0x6f,0x0b,0x28,0x16,0x61,0xfc,0x79,0x77, + } + }, + { 0x8301055, { + 0x81,0x7b,0x99,0x1b,0xae,0x2d,0x4f,0x9a, + 0xef,0x13,0xce,0xb5,0x10,0xaf,0x6a,0xea, + 0xe5,0xb0,0x64,0x98,0x10,0x68,0x34,0x3b, + 0x9d,0x7a,0xd6,0x22,0x77,0x5f,0xb3,0x5b, + } + }, + { 0x8301072, { + 0xcf,0x76,0xa7,0x1a,0x49,0xdf,0x2a,0x5e, + 0x9e,0x40,0x70,0xe5,0xdd,0x8a,0xa8,0x28, + 0x20,0xdc,0x91,0xd8,0x2c,0xa6,0xa0,0xb1, + 0x2d,0x22,0x26,0x94,0x4b,0x40,0x85,0x30, + } + }, + { 0x830107a, { + 0x2a,0x65,0x8c,0x1a,0x5e,0x07,0x21,0x72, + 0xdf,0x90,0xa6,0x51,0x37,0xd3,0x4b,0x34, + 0xc4,0xda,0x03,0xe1,0x8a,0x6c,0xfb,0x20, + 0x04,0xb2,0x81,0x05,0xd4,0x87,0xf4,0x0a, + } + }, + { 0x830107b, { + 0xb3,0x43,0x13,0x63,0x56,0xc1,0x39,0xad, + 0x10,0xa6,0x2b,0xcc,0x02,0xe6,0x76,0x2a, + 0x1e,0x39,0x58,0x3e,0x23,0x6e,0xa4,0x04, + 0x95,0xea,0xf9,0x6d,0xc2,0x8a,0x13,0x19, + } + }, + { 0x830107c, { + 0x21,0x64,0xde,0xfb,0x9f,0x68,0x96,0x47, + 0x70,0x5c,0xe2,0x8f,0x18,0x52,0x6a,0xac, + 0xa4,0xd2,0x2e,0xe0,0xde,0x68,0x66,0xc3, + 0xeb,0x1e,0xd3,0x3f,0xbc,0x51,0x1d,0x38, + } + }, + { 0x860010d, { + 0x86,0xb6,0x15,0x83,0xbc,0x3b,0x9c,0xe0, + 0xb3,0xef,0x1d,0x99,0x84,0x35,0x15,0xf7, + 0x7c,0x2a,0xc6,0x42,0xdb,0x73,0x07,0x5c, + 0x7d,0xc3,0x02,0xb5,0x43,0x06,0x5e,0xf8, + } + }, + { 0x8608108, { + 0x14,0xfe,0x57,0x86,0x49,0xc8,0x68,0xe2, + 0x11,0xa3,0xcb,0x6e,0xff,0x6e,0xd5,0x38, + 0xfe,0x89,0x1a,0xe0,0x67,0xbf,0xc4,0xcc, + 0x1b,0x9f,0x84,0x77,0x2b,0x9f,0xaa,0xbd, + } + }, + { 0x8701034, { + 0xc3,0x14,0x09,0xa8,0x9c,0x3f,0x8d,0x83, + 0x9b,0x4c,0xa5,0xb7,0x64,0x8b,0x91,0x5d, + 0x85,0x6a,0x39,0x26,0x1e,0x14,0x41,0xa8, + 0x75,0xea,0xa6,0xf9,0xc9,0xd1,0xea,0x2b, + } + }, + { 0x8a00008, { + 0xd7,0x2a,0x93,0xdc,0x05,0x2f,0xa5,0x6e, + 0x0c,0x61,0x2c,0x07,0x9f,0x38,0xe9,0x8e, + 0xef,0x7d,0x2a,0x05,0x4d,0x56,0xaf,0x72, + 0xe7,0x56,0x47,0x6e,0x60,0x27,0xd5,0x8c, + } + }, + { 0x8a0000a, { + 0x73,0x31,0x26,0x22,0xd4,0xf9,0xee,0x3c, + 0x07,0x06,0xe7,0xb9,0xad,0xd8,0x72,0x44, + 0x33,0x31,0xaa,0x7d,0xc3,0x67,0x0e,0xdb, + 0x47,0xb5,0xaa,0xbc,0xf5,0xbb,0xd9,0x20, + } + }, + { 0xa00104c, { + 0x3c,0x8a,0xfe,0x04,0x62,0xd8,0x6d,0xbe, + 0xa7,0x14,0x28,0x64,0x75,0xc0,0xa3,0x76, + 0xb7,0x92,0x0b,0x97,0x0a,0x8e,0x9c,0x5b, + 0x1b,0xc8,0x9d,0x3a,0x1e,0x81,0x3d,0x3b, + } + }, + { 0xa00104e, { + 0xc4,0x35,0x82,0x67,0xd2,0x86,0xe5,0xb2, + 0xfd,0x69,0x12,0x38,0xc8,0x77,0xba,0xe0, + 0x70,0xf9,0x77,0x89,0x10,0xa6,0x74,0x4e, + 0x56,0x58,0x13,0xf5,0x84,0x70,0x28,0x0b, + } + }, + { 0xa001053, { + 0x92,0x0e,0xf4,0x69,0x10,0x3b,0xf9,0x9d, + 0x31,0x1b,0xa6,0x99,0x08,0x7d,0xd7,0x25, + 0x7e,0x1e,0x89,0xba,0x35,0x8d,0xac,0xcb, + 0x3a,0xb4,0xdf,0x58,0x12,0xcf,0xc0,0xc3, + } + }, + { 0xa001058, { + 0x33,0x7d,0xa9,0xb5,0x4e,0x62,0x13,0x36, + 0xef,0x66,0xc9,0xbd,0x0a,0xa6,0x3b,0x19, + 0xcb,0xf5,0xc2,0xc3,0x55,0x47,0x20,0xec, + 0x1f,0x7b,0xa1,0x44,0x0e,0x8e,0xa4,0xb2, + } + }, + { 0xa001075, { + 0x39,0x02,0x82,0xd0,0x7c,0x26,0x43,0xe9, + 0x26,0xa3,0xd9,0x96,0xf7,0x30,0x13,0x0a, + 0x8a,0x0e,0xac,0xe7,0x1d,0xdc,0xe2,0x0f, + 0xcb,0x9e,0x8d,0xbc,0xd2,0xa2,0x44,0xe0, + } + }, + { 0xa001078, { + 0x2d,0x67,0xc7,0x35,0xca,0xef,0x2f,0x25, + 0x4c,0x45,0x93,0x3f,0x36,0x01,0x8c,0xce, + 0xa8,0x5b,0x07,0xd3,0xc1,0x35,0x3c,0x04, + 0x20,0xa2,0xfc,0xdc,0xe6,0xce,0x26,0x3e, + } + }, + { 0xa001079, { + 0x43,0xe2,0x05,0x9c,0xfd,0xb7,0x5b,0xeb, + 0x5b,0xe9,0xeb,0x3b,0x96,0xf4,0xe4,0x93, + 0x73,0x45,0x3e,0xac,0x8d,0x3b,0xe4,0xdb, + 0x10,0x31,0xc1,0xe4,0xa2,0xd0,0x5a,0x8a, + } + }, + { 0xa00107a, { + 0x5f,0x92,0xca,0xff,0xc3,0x59,0x22,0x5f, + 0x02,0xa0,0x91,0x3b,0x4a,0x45,0x10,0xfd, + 0x19,0xe1,0x8a,0x6d,0x9a,0x92,0xc1,0x3f, + 0x75,0x78,0xac,0x78,0x03,0x1d,0xdb,0x18, + } + }, + { 0xa001143, { + 0x56,0xca,0xf7,0x43,0x8a,0x4c,0x46,0x80, + 0xec,0xde,0xe5,0x9c,0x50,0x84,0x9a,0x42, + 0x27,0xe5,0x51,0x84,0x8f,0x19,0xc0,0x8d, + 0x0c,0x25,0xb4,0xb0,0x8f,0x10,0xf3,0xf8, + } + }, + { 0xa001144, { + 0x42,0xd5,0x9b,0xa7,0xd6,0x15,0x29,0x41, + 0x61,0xc4,0x72,0x3f,0xf3,0x06,0x78,0x4b, + 0x65,0xf3,0x0e,0xfa,0x9c,0x87,0xde,0x25, + 0xbd,0xb3,0x9a,0xf4,0x75,0x13,0x53,0xdc, + } + }, + { 0xa00115d, { + 0xd4,0xc4,0x49,0x36,0x89,0x0b,0x47,0xdd, + 0xfb,0x2f,0x88,0x3b,0x5f,0xf2,0x8e,0x75, + 0xc6,0x6c,0x37,0x5a,0x90,0x25,0x94,0x3e, + 0x36,0x9c,0xae,0x02,0x38,0x6c,0xf5,0x05, + } + }, + { 0xa001173, { + 0x28,0xbb,0x9b,0xd1,0xa0,0xa0,0x7e,0x3a, + 0x59,0x20,0xc0,0xa9,0xb2,0x5c,0xc3,0x35, + 0x53,0x89,0xe1,0x4c,0x93,0x2f,0x1d,0xc3, + 0xe5,0xf7,0xf3,0xc8,0x9b,0x61,0xaa,0x9e, + } + }, + { 0xa0011a8, { + 0x97,0xc6,0x16,0x65,0x99,0xa4,0x85,0x3b, + 0xf6,0xce,0xaa,0x49,0x4a,0x3a,0xc5,0xb6, + 0x78,0x25,0xbc,0x53,0xaf,0x5d,0xcf,0xf4, + 0x23,0x12,0xbb,0xb1,0xbc,0x8a,0x02,0x2e, + } + }, + { 0xa0011ce, { + 0xcf,0x1c,0x90,0xa3,0x85,0x0a,0xbf,0x71, + 0x94,0x0e,0x80,0x86,0x85,0x4f,0xd7,0x86, + 0xae,0x38,0x23,0x28,0x2b,0x35,0x9b,0x4e, + 0xfe,0xb8,0xcd,0x3d,0x3d,0x39,0xc9,0x6a, + } + }, + { 0xa0011d1, { + 0xdf,0x0e,0xca,0xde,0xf6,0xce,0x5c,0x1e, + 0x4c,0xec,0xd7,0x71,0x83,0xcc,0xa8,0x09, + 0xc7,0xc5,0xfe,0xb2,0xf7,0x05,0xd2,0xc5, + 0x12,0xdd,0xe4,0xf3,0x92,0x1c,0x3d,0xb8, + } + }, + { 0xa0011d3, { + 0x91,0xe6,0x10,0xd7,0x57,0xb0,0x95,0x0b, + 0x9a,0x24,0xee,0xf7,0xcf,0x56,0xc1,0xa6, + 0x4a,0x52,0x7d,0x5f,0x9f,0xdf,0xf6,0x00, + 0x65,0xf7,0xea,0xe8,0x2a,0x88,0xe2,0x26, + } + }, + { 0xa0011d5, { + 0xed,0x69,0x89,0xf4,0xeb,0x64,0xc2,0x13, + 0xe0,0x51,0x1f,0x03,0x26,0x52,0x7d,0xb7, + 0x93,0x5d,0x65,0xca,0xb8,0x12,0x1d,0x62, + 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, + } + }, + { 0xa001223, { + 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, + 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, + 0x83,0x75,0x94,0xdd,0xeb,0x7e,0xb7,0x15, + 0x8e,0x3b,0x50,0x29,0x8a,0x9c,0xcc,0x45, + } + }, + { 0xa001224, { + 0x0e,0x0c,0xdf,0xb4,0x89,0xee,0x35,0x25, + 0xdd,0x9e,0xdb,0xc0,0x69,0x83,0x0a,0xad, + 0x26,0xa9,0xaa,0x9d,0xfc,0x3c,0xea,0xf9, + 0x6c,0xdc,0xd5,0x6d,0x8b,0x6e,0x85,0x4a, + } + }, + { 0xa001227, { + 0xab,0xc6,0x00,0x69,0x4b,0x50,0x87,0xad, + 0x5f,0x0e,0x8b,0xea,0x57,0x38,0xce,0x1d, + 0x0f,0x75,0x26,0x02,0xf6,0xd6,0x96,0xe9, + 0x87,0xb9,0xd6,0x20,0x27,0x7c,0xd2,0xe0, + } + }, + { 0xa001229, { + 0x7f,0x49,0x49,0x48,0x46,0xa5,0x50,0xa6, + 0x28,0x89,0x98,0xe2,0x9e,0xb4,0x7f,0x75, + 0x33,0xa7,0x04,0x02,0xe4,0x82,0xbf,0xb4, + 0xa5,0x3a,0xba,0x24,0x8d,0x31,0x10,0x1d, + } + }, + { 0xa00122e, { + 0x56,0x94,0xa9,0x5d,0x06,0x68,0xfe,0xaf, + 0xdf,0x7a,0xff,0x2d,0xdf,0x74,0x0f,0x15, + 0x66,0xfb,0x00,0xb5,0x51,0x97,0x9b,0xfa, + 0xcb,0x79,0x85,0x46,0x25,0xb4,0xd2,0x10, + } + }, + { 0xa001231, { + 0x0b,0x46,0xa5,0xfc,0x18,0x15,0xa0,0x9e, + 0xa6,0xdc,0xb7,0xff,0x17,0xf7,0x30,0x64, + 0xd4,0xda,0x9e,0x1b,0xc3,0xfc,0x02,0x3b, + 0xe2,0xc6,0x0e,0x41,0x54,0xb5,0x18,0xdd, + } + }, + { 0xa001234, { + 0x88,0x8d,0xed,0xab,0xb5,0xbd,0x4e,0xf7, + 0x7f,0xd4,0x0e,0x95,0x34,0x91,0xff,0xcc, + 0xfb,0x2a,0xcd,0xf7,0xd5,0xdb,0x4c,0x9b, + 0xd6,0x2e,0x73,0x50,0x8f,0x83,0x79,0x1a, + } + }, + { 0xa001236, { + 0x3d,0x30,0x00,0xb9,0x71,0xba,0x87,0x78, + 0xa8,0x43,0x55,0xc4,0x26,0x59,0xcf,0x9d, + 0x93,0xce,0x64,0x0e,0x8b,0x72,0x11,0x8b, + 0xa3,0x8f,0x51,0xe9,0xca,0x98,0xaa,0x25, + } + }, + { 0xa001238, { + 0x72,0xf7,0x4b,0x0c,0x7d,0x58,0x65,0xcc, + 0x00,0xcc,0x57,0x16,0x68,0x16,0xf8,0x2a, + 0x1b,0xb3,0x8b,0xe1,0xb6,0x83,0x8c,0x7e, + 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, + } + }, + { 0xa00820c, { + 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, + 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, + 0xf1,0x8c,0x88,0x45,0xd7,0x82,0x80,0xd1, + 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, + } + }, + { 0xa10113e, { + 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, + 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, + 0x15,0xe3,0x3f,0x4b,0x1d,0x0d,0x0a,0xd5, + 0xfa,0x90,0xc4,0xed,0x9d,0x90,0xaf,0x53, + } + }, + { 0xa101144, { + 0xb3,0x0b,0x26,0x9a,0xf8,0x7c,0x02,0x26, + 0x35,0x84,0x53,0xa4,0xd3,0x2c,0x7c,0x09, + 0x68,0x7b,0x96,0xb6,0x93,0xef,0xde,0xbc, + 0xfd,0x4b,0x15,0xd2,0x81,0xd3,0x51,0x47, + } + }, + { 0xa101148, { + 0x20,0xd5,0x6f,0x40,0x4a,0xf6,0x48,0x90, + 0xc2,0x93,0x9a,0xc2,0xfd,0xac,0xef,0x4f, + 0xfa,0xc0,0x3d,0x92,0x3c,0x6d,0x01,0x08, + 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, + } + }, + { 0xa10123e, { + 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, + 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, + 0x1d,0x13,0x53,0x63,0xfe,0x42,0x6f,0xfc, + 0x19,0x0f,0xf1,0xfc,0xa7,0xdd,0x89,0x1b, + } + }, + { 0xa101244, { + 0x71,0x56,0xb5,0x9f,0x21,0xbf,0xb3,0x3c, + 0x8c,0xd7,0x36,0xd0,0x34,0x52,0x1b,0xb1, + 0x46,0x2f,0x04,0xf0,0x37,0xd8,0x1e,0x72, + 0x24,0xa2,0x80,0x84,0x83,0x65,0x84,0xc0, + } + }, + { 0xa101248, { + 0xed,0x3b,0x95,0xa6,0x68,0xa7,0x77,0x3e, + 0xfc,0x17,0x26,0xe2,0x7b,0xd5,0x56,0x22, + 0x2c,0x1d,0xef,0xeb,0x56,0xdd,0xba,0x6e, + 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, + } + }, + { 0xa108108, { + 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, + 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, + 0xf5,0xd4,0x3f,0x7b,0x14,0xd5,0x60,0x2c, + 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, + } + }, + { 0xa20102d, { + 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, + 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, + 0x8b,0x89,0x2f,0xb5,0xbb,0x82,0xef,0x23, + 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, + } + }, + { 0xa201210, { + 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, + 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, + 0x6d,0x3d,0x0e,0x6b,0xa7,0xac,0xe3,0x68, + 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, + } + }, + { 0xa404107, { + 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, + 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, + 0x8b,0x0d,0x9f,0xf9,0x3a,0xdf,0xc6,0x81, + 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, + } + }, + { 0xa500011, { + 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, + 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, + 0xd7,0x5b,0x65,0x3a,0x7d,0xab,0xdf,0xa2, + 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, + } + }, + { 0xa601209, { + 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, + 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, + 0x15,0x86,0xcc,0x5d,0x97,0x0f,0xc0,0x46, + 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, + } + }, + { 0xa704107, { + 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, + 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, + 0x2a,0xad,0x8e,0x6b,0xea,0x9b,0xb7,0xc2, + 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, + } + }, + { 0xa705206, { + 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, + 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, + 0x67,0x6f,0x04,0x18,0xae,0x20,0x87,0x4b, + 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, + } + }, + { 0xa708007, { + 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, + 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, + 0x07,0xaa,0x3a,0xe0,0x57,0x13,0x72,0x80, + 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, + } + }, + { 0xa70c005, { + 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, + 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, + 0x1f,0x1f,0xf1,0x97,0xeb,0xfe,0x56,0x55, + 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, + } + }, + { 0xaa00116, { + 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, + 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, + 0xfe,0x1d,0x5e,0x65,0xc7,0xaa,0x92,0x4d, + 0x91,0xee,0x76,0xbb,0x4c,0x66,0x78,0xc9, + } + }, + { 0xaa00212, { + 0xbd,0x57,0x5d,0x0a,0x0a,0x30,0xc1,0x75, + 0x95,0x58,0x5e,0x93,0x02,0x28,0x43,0x71, + 0xed,0x42,0x29,0xc8,0xec,0x34,0x2b,0xb2, + 0x1a,0x65,0x4b,0xfe,0x07,0x0f,0x34,0xa1, + } + }, + { 0xaa00213, { + 0xed,0x58,0xb7,0x76,0x81,0x7f,0xd9,0x3a, + 0x1a,0xff,0x8b,0x34,0xb8,0x4a,0x99,0x0f, + 0x28,0x49,0x6c,0x56,0x2b,0xdc,0xb7,0xed, + 0x96,0xd5,0x9d,0xc1,0x7a,0xd4,0x51,0x9b, + } + }, + { 0xaa00215, { + 0x55,0xd3,0x28,0xcb,0x87,0xa9,0x32,0xe9, + 0x4e,0x85,0x4b,0x7c,0x6b,0xd5,0x7c,0xd4, + 0x1b,0x51,0x71,0x3a,0x0e,0x0b,0xdc,0x9b, + 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, + } + }, +}; -- GitLab From e1a0bdbdfdf08428f0ede5ae49c7f4139ac73ef5 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 20 Feb 2025 08:47:10 +0200 Subject: [PATCH 0961/1585] RDMA/mlx5: Fix bind QP error cleanup flow When there is a failure during bind QP, the cleanup flow destroys the counter regardless if it is the one that created it or not, which is problematic since if it isn't the one that created it, that counter could still be in use. Fix that by destroying the counter only if it was created during this call. Fixes: 45842fc627c7 ("IB/mlx5: Support statistic q counter configuration") Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Link: https://patch.msgid.link/25dfefddb0ebefa668c32e06a94d84e3216257cf.1740033937.git.leon@kernel.org Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 4f6c1968a2ee3..81cfa74147a18 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -546,6 +546,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, struct ib_qp *qp) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + bool new = false; int err; if (!counter->id) { @@ -560,6 +561,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, return err; counter->id = MLX5_GET(alloc_q_counter_out, out, counter_set_id); + new = true; } err = mlx5_ib_qp_set_counter(qp, counter); @@ -569,8 +571,10 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, return 0; fail_set_counter: - mlx5_ib_counter_dealloc(counter); - counter->id = 0; + if (new) { + mlx5_ib_counter_dealloc(counter); + counter->id = 0; + } return err; } -- GitLab From b66535356a4834a234f99e16a97eb51f2c6c5a7d Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Sat, 22 Feb 2025 07:20:21 -0800 Subject: [PATCH 0962/1585] RDMA/bnxt_re: Fix the page details for the srq created by kernel consumers While using nvme target with use_srq on, below kernel panic is noticed. [ 549.698111] bnxt_en 0000:41:00.0 enp65s0np0: FEC autoneg off encoding: Clause 91 RS(544,514) [ 566.393619] Oops: divide error: 0000 [#1] PREEMPT SMP NOPTI .. [ 566.393799] [ 566.393807] ? __die_body+0x1a/0x60 [ 566.393823] ? die+0x38/0x60 [ 566.393835] ? do_trap+0xe4/0x110 [ 566.393847] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] [ 566.393867] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] [ 566.393881] ? do_error_trap+0x7c/0x120 [ 566.393890] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] [ 566.393911] ? exc_divide_error+0x34/0x50 [ 566.393923] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] [ 566.393939] ? asm_exc_divide_error+0x16/0x20 [ 566.393966] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] [ 566.393997] bnxt_qplib_create_srq+0xc9/0x340 [bnxt_re] [ 566.394040] bnxt_re_create_srq+0x335/0x3b0 [bnxt_re] [ 566.394057] ? srso_return_thunk+0x5/0x5f [ 566.394068] ? __init_swait_queue_head+0x4a/0x60 [ 566.394090] ib_create_srq_user+0xa7/0x150 [ib_core] [ 566.394147] nvmet_rdma_queue_connect+0x7d0/0xbe0 [nvmet_rdma] [ 566.394174] ? lock_release+0x22c/0x3f0 [ 566.394187] ? srso_return_thunk+0x5/0x5f Page size and shift info is set only for the user space SRQs. Set page size and page shift for kernel space SRQs also. Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1740237621-29291-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 2de101d6e8255..6f5db32082dd7 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1870,6 +1870,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; + srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; + srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; nq = &rdev->nqr->nq[0]; if (udata) { -- GitLab From 174e5e9da4f5946de3d09c32ee56cbbc9d70505b Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Fri, 21 Feb 2025 09:12:42 +0100 Subject: [PATCH 0963/1585] efi/cper: Fix cper_ia_proc_ctx alignment According to the UEFI Common Platform Error Record appendix, the IA32/X64 Processor Context Information Structure is a variable length structure, but "is padded with zeros if the size is not a multiple of 16 bytes". Currently this isn't honoured, causing all but the first structure to be garbage when printed. Thus align the size to be a multiple of 16. Signed-off-by: Patrick Rudolph Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/cper-x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/cper-x86.c b/drivers/firmware/efi/cper-x86.c index 438ed9eff6d01..3949d7b5e808f 100644 --- a/drivers/firmware/efi/cper-x86.c +++ b/drivers/firmware/efi/cper-x86.c @@ -325,7 +325,7 @@ void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc) ctx_info = (struct cper_ia_proc_ctx *)err_info; for (i = 0; i < VALID_PROC_CXT_INFO_NUM(proc->validation_bits); i++) { - int size = sizeof(*ctx_info) + ctx_info->reg_arr_size; + int size = ALIGN(sizeof(*ctx_info) + ctx_info->reg_arr_size, 16); int groupsize = 4; printk("%sContext Information Structure %d:\n", pfx, i); -- GitLab From d6a2d02aa060531607f4a8411ec384470faa2761 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Fri, 21 Feb 2025 12:15:16 +0100 Subject: [PATCH 0964/1585] efi/cper: Fix cper_arm_ctx_info alignment According to the UEFI Common Platform Error Record appendix, the processor context information structure is a variable length structure, but "is padded with zeros if the size is not a multiple of 16 bytes". Currently this isn't honoured, causing all but the first structure to be garbage when printed. Thus align the size to be a multiple of 16. Signed-off-by: Patrick Rudolph Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/cper-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c index fa9c1c3bf168b..f0a63d09d3c49 100644 --- a/drivers/firmware/efi/cper-arm.c +++ b/drivers/firmware/efi/cper-arm.c @@ -311,7 +311,7 @@ void cper_print_proc_arm(const char *pfx, ctx_info = (struct cper_arm_ctx_info *)err_info; max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1; for (i = 0; i < proc->context_info_num; i++) { - int size = sizeof(*ctx_info) + ctx_info->size; + int size = ALIGN(sizeof(*ctx_info) + ctx_info->size, 16); printk("%sContext info structure %d:\n", pfx, i); if (len < size) { -- GitLab From cb6ae457bc6af58c84a7854df5e7e32ba1c6a715 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 23 Feb 2025 16:48:54 +0100 Subject: [PATCH 0965/1585] efivarfs: Defer PM notifier registration until .fill_super syzbot reports an issue that turns out to be caused by the fact that the efivarfs PM notifier may be invoked before the efivarfs_fs_info::sb field is populated, resulting in a NULL deference. So defer the registration until efivarfs_fill_super() is invoked. Reported-by: syzbot+00d13e505ef530a45100@syzkaller.appspotmail.com Tested-by: syzbot+00d13e505ef530a45100@syzkaller.appspotmail.com Signed-off-by: Ard Biesheuvel --- fs/efivarfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 09fcf731e65d6..6eae8cf655c12 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -367,6 +367,8 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; + register_pm_notifier(&sfi->pm_nb); + return efivar_init(efivarfs_callback, sb, true); } @@ -552,7 +554,6 @@ static int efivarfs_init_fs_context(struct fs_context *fc) sfi->pm_nb.notifier_call = efivarfs_pm_notify; sfi->pm_nb.priority = 0; - register_pm_notifier(&sfi->pm_nb); return 0; } -- GitLab From d082ecbc71e9e0bf49883ee4afd435a77a5101b6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Feb 2025 12:32:57 -0800 Subject: [PATCH 0966/1585] Linux 6.14-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 96407c1d6be16..30dab4c8b0120 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* -- GitLab From cf3e6960263a2ecdf5528056b321e41557e9b03d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 Feb 2025 09:05:36 -0500 Subject: [PATCH 0967/1585] bcachefs: fix bch2_extent_ptr_eq() Reviewed-by: Thorsten Blum Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 620b284aa34f0..204d765dd74c8 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -704,7 +704,7 @@ static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1, ptr1.unwritten == ptr2.unwritten && ptr1.offset == ptr2.offset && ptr1.dev == ptr2.dev && - ptr1.dev == ptr2.dev); + ptr1.gen == ptr2.gen); } void bch2_ptr_swab(struct bkey_s); -- GitLab From f15176b8b6e72ac30e14fd273282d2b72562d26b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 20 Feb 2025 19:48:15 +0100 Subject: [PATCH 0968/1585] net: dsa: rtl8366rb: Fix compilation problem When the kernel is compiled without LED framework support the rtl8366rb fails to build like this: rtl8366rb.o: in function `rtl8366rb_setup_led': rtl8366rb.c:953:(.text.unlikely.rtl8366rb_setup_led+0xe8): undefined reference to `led_init_default_state_get' rtl8366rb.c:980:(.text.unlikely.rtl8366rb_setup_led+0x240): undefined reference to `devm_led_classdev_register_ext' As this is constantly coming up in different randconfig builds, bite the bullet and create a separate file for the offending code, split out a header with all stuff needed both in the core driver and the leds code. Add a new bool Kconfig option for the LED compile target, such that it depends on LEDS_CLASS=y || LEDS_CLASS=RTL8366RB which make LED support always available when LEDS_CLASS is compiled into the kernel and enforce that if the LEDS_CLASS is a module, then the RTL8366RB driver needs to be a module as well so that modprobe can resolve the dependencies. Fixes: 32d617005475 ("net: dsa: realtek: add LED drivers for rtl8366rb") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502070525.xMUImayb-lkp@intel.com/ Signed-off-by: Linus Walleij Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/realtek/Kconfig | 6 + drivers/net/dsa/realtek/Makefile | 3 + drivers/net/dsa/realtek/rtl8366rb-leds.c | 177 ++++++++++++++++ drivers/net/dsa/realtek/rtl8366rb.c | 258 +---------------------- drivers/net/dsa/realtek/rtl8366rb.h | 107 ++++++++++ 5 files changed, 299 insertions(+), 252 deletions(-) create mode 100644 drivers/net/dsa/realtek/rtl8366rb-leds.c create mode 100644 drivers/net/dsa/realtek/rtl8366rb.h diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig index 6989972eebc30..10687722d14c0 100644 --- a/drivers/net/dsa/realtek/Kconfig +++ b/drivers/net/dsa/realtek/Kconfig @@ -43,4 +43,10 @@ config NET_DSA_REALTEK_RTL8366RB help Select to enable support for Realtek RTL8366RB. +config NET_DSA_REALTEK_RTL8366RB_LEDS + bool "Support RTL8366RB LED control" + depends on (LEDS_CLASS=y || LEDS_CLASS=NET_DSA_REALTEK_RTL8366RB) + depends on NET_DSA_REALTEK_RTL8366RB + default NET_DSA_REALTEK_RTL8366RB + endif diff --git a/drivers/net/dsa/realtek/Makefile b/drivers/net/dsa/realtek/Makefile index 35491dc20d6d6..17367bcba496c 100644 --- a/drivers/net/dsa/realtek/Makefile +++ b/drivers/net/dsa/realtek/Makefile @@ -12,4 +12,7 @@ endif obj-$(CONFIG_NET_DSA_REALTEK_RTL8366RB) += rtl8366.o rtl8366-objs := rtl8366-core.o rtl8366rb.o +ifdef CONFIG_NET_DSA_REALTEK_RTL8366RB_LEDS +rtl8366-objs += rtl8366rb-leds.o +endif obj-$(CONFIG_NET_DSA_REALTEK_RTL8365MB) += rtl8365mb.o diff --git a/drivers/net/dsa/realtek/rtl8366rb-leds.c b/drivers/net/dsa/realtek/rtl8366rb-leds.c new file mode 100644 index 0000000000000..99c890681ae60 --- /dev/null +++ b/drivers/net/dsa/realtek/rtl8366rb-leds.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include "rtl83xx.h" +#include "rtl8366rb.h" + +static inline u32 rtl8366rb_led_group_port_mask(u8 led_group, u8 port) +{ + switch (led_group) { + case 0: + return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + case 1: + return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + case 2: + return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + case 3: + return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + default: + return 0; + } +} + +static int rb8366rb_get_port_led(struct rtl8366rb_led *led) +{ + struct realtek_priv *priv = led->priv; + u8 led_group = led->led_group; + u8 port_num = led->port_num; + int ret; + u32 val; + + ret = regmap_read(priv->map, RTL8366RB_LED_X_X_CTRL_REG(led_group), + &val); + if (ret) { + dev_err(priv->dev, "error reading LED on port %d group %d\n", + led_group, port_num); + return ret; + } + + return !!(val & rtl8366rb_led_group_port_mask(led_group, port_num)); +} + +static int rb8366rb_set_port_led(struct rtl8366rb_led *led, bool enable) +{ + struct realtek_priv *priv = led->priv; + u8 led_group = led->led_group; + u8 port_num = led->port_num; + int ret; + + ret = regmap_update_bits(priv->map, + RTL8366RB_LED_X_X_CTRL_REG(led_group), + rtl8366rb_led_group_port_mask(led_group, + port_num), + enable ? 0xffff : 0); + if (ret) { + dev_err(priv->dev, "error updating LED on port %d group %d\n", + led_group, port_num); + return ret; + } + + /* Change the LED group to manual controlled LEDs if required */ + ret = rb8366rb_set_ledgroup_mode(priv, led_group, + RTL8366RB_LEDGROUP_FORCE); + + if (ret) { + dev_err(priv->dev, "error updating LED GROUP group %d\n", + led_group); + return ret; + } + + return 0; +} + +static int +rtl8366rb_cled_brightness_set_blocking(struct led_classdev *ldev, + enum led_brightness brightness) +{ + struct rtl8366rb_led *led = container_of(ldev, struct rtl8366rb_led, + cdev); + + return rb8366rb_set_port_led(led, brightness == LED_ON); +} + +static int rtl8366rb_setup_led(struct realtek_priv *priv, struct dsa_port *dp, + struct fwnode_handle *led_fwnode) +{ + struct rtl8366rb *rb = priv->chip_data; + struct led_init_data init_data = { }; + enum led_default_state state; + struct rtl8366rb_led *led; + u32 led_group; + int ret; + + ret = fwnode_property_read_u32(led_fwnode, "reg", &led_group); + if (ret) + return ret; + + if (led_group >= RTL8366RB_NUM_LEDGROUPS) { + dev_warn(priv->dev, "Invalid LED reg %d defined for port %d", + led_group, dp->index); + return -EINVAL; + } + + led = &rb->leds[dp->index][led_group]; + led->port_num = dp->index; + led->led_group = led_group; + led->priv = priv; + + state = led_init_default_state_get(led_fwnode); + switch (state) { + case LEDS_DEFSTATE_ON: + led->cdev.brightness = 1; + rb8366rb_set_port_led(led, 1); + break; + case LEDS_DEFSTATE_KEEP: + led->cdev.brightness = + rb8366rb_get_port_led(led); + break; + case LEDS_DEFSTATE_OFF: + default: + led->cdev.brightness = 0; + rb8366rb_set_port_led(led, 0); + } + + led->cdev.max_brightness = 1; + led->cdev.brightness_set_blocking = + rtl8366rb_cled_brightness_set_blocking; + init_data.fwnode = led_fwnode; + init_data.devname_mandatory = true; + + init_data.devicename = kasprintf(GFP_KERNEL, "Realtek-%d:0%d:%d", + dp->ds->index, dp->index, led_group); + if (!init_data.devicename) + return -ENOMEM; + + ret = devm_led_classdev_register_ext(priv->dev, &led->cdev, &init_data); + if (ret) { + dev_warn(priv->dev, "Failed to init LED %d for port %d", + led_group, dp->index); + return ret; + } + + return 0; +} + +int rtl8366rb_setup_leds(struct realtek_priv *priv) +{ + struct dsa_switch *ds = &priv->ds; + struct device_node *leds_np; + struct dsa_port *dp; + int ret = 0; + + dsa_switch_for_each_port(dp, ds) { + if (!dp->dn) + continue; + + leds_np = of_get_child_by_name(dp->dn, "leds"); + if (!leds_np) { + dev_dbg(priv->dev, "No leds defined for port %d", + dp->index); + continue; + } + + for_each_child_of_node_scoped(leds_np, led_np) { + ret = rtl8366rb_setup_led(priv, dp, + of_fwnode_handle(led_np)); + if (ret) + break; + } + + of_node_put(leds_np); + if (ret) + return ret; + } + return 0; +} diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c index 4c4a95d4380ce..f54771cab56d4 100644 --- a/drivers/net/dsa/realtek/rtl8366rb.c +++ b/drivers/net/dsa/realtek/rtl8366rb.c @@ -27,11 +27,7 @@ #include "realtek-smi.h" #include "realtek-mdio.h" #include "rtl83xx.h" - -#define RTL8366RB_PORT_NUM_CPU 5 -#define RTL8366RB_NUM_PORTS 6 -#define RTL8366RB_PHY_NO_MAX 4 -#define RTL8366RB_PHY_ADDR_MAX 31 +#include "rtl8366rb.h" /* Switch Global Configuration register */ #define RTL8366RB_SGCR 0x0000 @@ -176,39 +172,6 @@ */ #define RTL8366RB_VLAN_INGRESS_CTRL2_REG 0x037f -/* LED control registers */ -/* The LED blink rate is global; it is used by all triggers in all groups. */ -#define RTL8366RB_LED_BLINKRATE_REG 0x0430 -#define RTL8366RB_LED_BLINKRATE_MASK 0x0007 -#define RTL8366RB_LED_BLINKRATE_28MS 0x0000 -#define RTL8366RB_LED_BLINKRATE_56MS 0x0001 -#define RTL8366RB_LED_BLINKRATE_84MS 0x0002 -#define RTL8366RB_LED_BLINKRATE_111MS 0x0003 -#define RTL8366RB_LED_BLINKRATE_222MS 0x0004 -#define RTL8366RB_LED_BLINKRATE_446MS 0x0005 - -/* LED trigger event for each group */ -#define RTL8366RB_LED_CTRL_REG 0x0431 -#define RTL8366RB_LED_CTRL_OFFSET(led_group) \ - (4 * (led_group)) -#define RTL8366RB_LED_CTRL_MASK(led_group) \ - (0xf << RTL8366RB_LED_CTRL_OFFSET(led_group)) - -/* The RTL8366RB_LED_X_X registers are used to manually set the LED state only - * when the corresponding LED group in RTL8366RB_LED_CTRL_REG is - * RTL8366RB_LEDGROUP_FORCE. Otherwise, it is ignored. - */ -#define RTL8366RB_LED_0_1_CTRL_REG 0x0432 -#define RTL8366RB_LED_2_3_CTRL_REG 0x0433 -#define RTL8366RB_LED_X_X_CTRL_REG(led_group) \ - ((led_group) <= 1 ? \ - RTL8366RB_LED_0_1_CTRL_REG : \ - RTL8366RB_LED_2_3_CTRL_REG) -#define RTL8366RB_LED_0_X_CTRL_MASK GENMASK(5, 0) -#define RTL8366RB_LED_X_1_CTRL_MASK GENMASK(11, 6) -#define RTL8366RB_LED_2_X_CTRL_MASK GENMASK(5, 0) -#define RTL8366RB_LED_X_3_CTRL_MASK GENMASK(11, 6) - #define RTL8366RB_MIB_COUNT 33 #define RTL8366RB_GLOBAL_MIB_COUNT 1 #define RTL8366RB_MIB_COUNTER_PORT_OFFSET 0x0050 @@ -244,7 +207,6 @@ #define RTL8366RB_PORT_STATUS_AN_MASK 0x0080 #define RTL8366RB_NUM_VLANS 16 -#define RTL8366RB_NUM_LEDGROUPS 4 #define RTL8366RB_NUM_VIDS 4096 #define RTL8366RB_PRIORITYMAX 7 #define RTL8366RB_NUM_FIDS 8 @@ -351,46 +313,6 @@ #define RTL8366RB_GREEN_FEATURE_TX BIT(0) #define RTL8366RB_GREEN_FEATURE_RX BIT(2) -enum rtl8366_ledgroup_mode { - RTL8366RB_LEDGROUP_OFF = 0x0, - RTL8366RB_LEDGROUP_DUP_COL = 0x1, - RTL8366RB_LEDGROUP_LINK_ACT = 0x2, - RTL8366RB_LEDGROUP_SPD1000 = 0x3, - RTL8366RB_LEDGROUP_SPD100 = 0x4, - RTL8366RB_LEDGROUP_SPD10 = 0x5, - RTL8366RB_LEDGROUP_SPD1000_ACT = 0x6, - RTL8366RB_LEDGROUP_SPD100_ACT = 0x7, - RTL8366RB_LEDGROUP_SPD10_ACT = 0x8, - RTL8366RB_LEDGROUP_SPD100_10_ACT = 0x9, - RTL8366RB_LEDGROUP_FIBER = 0xa, - RTL8366RB_LEDGROUP_AN_FAULT = 0xb, - RTL8366RB_LEDGROUP_LINK_RX = 0xc, - RTL8366RB_LEDGROUP_LINK_TX = 0xd, - RTL8366RB_LEDGROUP_MASTER = 0xe, - RTL8366RB_LEDGROUP_FORCE = 0xf, - - __RTL8366RB_LEDGROUP_MODE_MAX -}; - -struct rtl8366rb_led { - u8 port_num; - u8 led_group; - struct realtek_priv *priv; - struct led_classdev cdev; -}; - -/** - * struct rtl8366rb - RTL8366RB-specific data - * @max_mtu: per-port max MTU setting - * @pvid_enabled: if PVID is set for respective port - * @leds: per-port and per-ledgroup led info - */ -struct rtl8366rb { - unsigned int max_mtu[RTL8366RB_NUM_PORTS]; - bool pvid_enabled[RTL8366RB_NUM_PORTS]; - struct rtl8366rb_led leds[RTL8366RB_NUM_PORTS][RTL8366RB_NUM_LEDGROUPS]; -}; - static struct rtl8366_mib_counter rtl8366rb_mib_counters[] = { { 0, 0, 4, "IfInOctets" }, { 0, 4, 4, "EtherStatsOctets" }, @@ -831,9 +753,10 @@ static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table, return 0; } -static int rb8366rb_set_ledgroup_mode(struct realtek_priv *priv, - u8 led_group, - enum rtl8366_ledgroup_mode mode) +/* This code is used also with LEDs disabled */ +int rb8366rb_set_ledgroup_mode(struct realtek_priv *priv, + u8 led_group, + enum rtl8366_ledgroup_mode mode) { int ret; u32 val; @@ -850,144 +773,7 @@ static int rb8366rb_set_ledgroup_mode(struct realtek_priv *priv, return 0; } -static inline u32 rtl8366rb_led_group_port_mask(u8 led_group, u8 port) -{ - switch (led_group) { - case 0: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); - case 1: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); - case 2: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); - case 3: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); - default: - return 0; - } -} - -static int rb8366rb_get_port_led(struct rtl8366rb_led *led) -{ - struct realtek_priv *priv = led->priv; - u8 led_group = led->led_group; - u8 port_num = led->port_num; - int ret; - u32 val; - - ret = regmap_read(priv->map, RTL8366RB_LED_X_X_CTRL_REG(led_group), - &val); - if (ret) { - dev_err(priv->dev, "error reading LED on port %d group %d\n", - led_group, port_num); - return ret; - } - - return !!(val & rtl8366rb_led_group_port_mask(led_group, port_num)); -} - -static int rb8366rb_set_port_led(struct rtl8366rb_led *led, bool enable) -{ - struct realtek_priv *priv = led->priv; - u8 led_group = led->led_group; - u8 port_num = led->port_num; - int ret; - - ret = regmap_update_bits(priv->map, - RTL8366RB_LED_X_X_CTRL_REG(led_group), - rtl8366rb_led_group_port_mask(led_group, - port_num), - enable ? 0xffff : 0); - if (ret) { - dev_err(priv->dev, "error updating LED on port %d group %d\n", - led_group, port_num); - return ret; - } - - /* Change the LED group to manual controlled LEDs if required */ - ret = rb8366rb_set_ledgroup_mode(priv, led_group, - RTL8366RB_LEDGROUP_FORCE); - - if (ret) { - dev_err(priv->dev, "error updating LED GROUP group %d\n", - led_group); - return ret; - } - - return 0; -} - -static int -rtl8366rb_cled_brightness_set_blocking(struct led_classdev *ldev, - enum led_brightness brightness) -{ - struct rtl8366rb_led *led = container_of(ldev, struct rtl8366rb_led, - cdev); - - return rb8366rb_set_port_led(led, brightness == LED_ON); -} - -static int rtl8366rb_setup_led(struct realtek_priv *priv, struct dsa_port *dp, - struct fwnode_handle *led_fwnode) -{ - struct rtl8366rb *rb = priv->chip_data; - struct led_init_data init_data = { }; - enum led_default_state state; - struct rtl8366rb_led *led; - u32 led_group; - int ret; - - ret = fwnode_property_read_u32(led_fwnode, "reg", &led_group); - if (ret) - return ret; - - if (led_group >= RTL8366RB_NUM_LEDGROUPS) { - dev_warn(priv->dev, "Invalid LED reg %d defined for port %d", - led_group, dp->index); - return -EINVAL; - } - - led = &rb->leds[dp->index][led_group]; - led->port_num = dp->index; - led->led_group = led_group; - led->priv = priv; - - state = led_init_default_state_get(led_fwnode); - switch (state) { - case LEDS_DEFSTATE_ON: - led->cdev.brightness = 1; - rb8366rb_set_port_led(led, 1); - break; - case LEDS_DEFSTATE_KEEP: - led->cdev.brightness = - rb8366rb_get_port_led(led); - break; - case LEDS_DEFSTATE_OFF: - default: - led->cdev.brightness = 0; - rb8366rb_set_port_led(led, 0); - } - - led->cdev.max_brightness = 1; - led->cdev.brightness_set_blocking = - rtl8366rb_cled_brightness_set_blocking; - init_data.fwnode = led_fwnode; - init_data.devname_mandatory = true; - - init_data.devicename = kasprintf(GFP_KERNEL, "Realtek-%d:0%d:%d", - dp->ds->index, dp->index, led_group); - if (!init_data.devicename) - return -ENOMEM; - - ret = devm_led_classdev_register_ext(priv->dev, &led->cdev, &init_data); - if (ret) { - dev_warn(priv->dev, "Failed to init LED %d for port %d", - led_group, dp->index); - return ret; - } - - return 0; -} - +/* This code is used also with LEDs disabled */ static int rtl8366rb_setup_all_leds_off(struct realtek_priv *priv) { int ret = 0; @@ -1008,38 +794,6 @@ static int rtl8366rb_setup_all_leds_off(struct realtek_priv *priv) return ret; } -static int rtl8366rb_setup_leds(struct realtek_priv *priv) -{ - struct dsa_switch *ds = &priv->ds; - struct device_node *leds_np; - struct dsa_port *dp; - int ret = 0; - - dsa_switch_for_each_port(dp, ds) { - if (!dp->dn) - continue; - - leds_np = of_get_child_by_name(dp->dn, "leds"); - if (!leds_np) { - dev_dbg(priv->dev, "No leds defined for port %d", - dp->index); - continue; - } - - for_each_child_of_node_scoped(leds_np, led_np) { - ret = rtl8366rb_setup_led(priv, dp, - of_fwnode_handle(led_np)); - if (ret) - break; - } - - of_node_put(leds_np); - if (ret) - return ret; - } - return 0; -} - static int rtl8366rb_setup(struct dsa_switch *ds) { struct realtek_priv *priv = ds->priv; diff --git a/drivers/net/dsa/realtek/rtl8366rb.h b/drivers/net/dsa/realtek/rtl8366rb.h new file mode 100644 index 0000000000000..685ff3275faa1 --- /dev/null +++ b/drivers/net/dsa/realtek/rtl8366rb.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _RTL8366RB_H +#define _RTL8366RB_H + +#include "realtek.h" + +#define RTL8366RB_PORT_NUM_CPU 5 +#define RTL8366RB_NUM_PORTS 6 +#define RTL8366RB_PHY_NO_MAX 4 +#define RTL8366RB_NUM_LEDGROUPS 4 +#define RTL8366RB_PHY_ADDR_MAX 31 + +/* LED control registers */ +/* The LED blink rate is global; it is used by all triggers in all groups. */ +#define RTL8366RB_LED_BLINKRATE_REG 0x0430 +#define RTL8366RB_LED_BLINKRATE_MASK 0x0007 +#define RTL8366RB_LED_BLINKRATE_28MS 0x0000 +#define RTL8366RB_LED_BLINKRATE_56MS 0x0001 +#define RTL8366RB_LED_BLINKRATE_84MS 0x0002 +#define RTL8366RB_LED_BLINKRATE_111MS 0x0003 +#define RTL8366RB_LED_BLINKRATE_222MS 0x0004 +#define RTL8366RB_LED_BLINKRATE_446MS 0x0005 + +/* LED trigger event for each group */ +#define RTL8366RB_LED_CTRL_REG 0x0431 +#define RTL8366RB_LED_CTRL_OFFSET(led_group) \ + (4 * (led_group)) +#define RTL8366RB_LED_CTRL_MASK(led_group) \ + (0xf << RTL8366RB_LED_CTRL_OFFSET(led_group)) + +/* The RTL8366RB_LED_X_X registers are used to manually set the LED state only + * when the corresponding LED group in RTL8366RB_LED_CTRL_REG is + * RTL8366RB_LEDGROUP_FORCE. Otherwise, it is ignored. + */ +#define RTL8366RB_LED_0_1_CTRL_REG 0x0432 +#define RTL8366RB_LED_2_3_CTRL_REG 0x0433 +#define RTL8366RB_LED_X_X_CTRL_REG(led_group) \ + ((led_group) <= 1 ? \ + RTL8366RB_LED_0_1_CTRL_REG : \ + RTL8366RB_LED_2_3_CTRL_REG) +#define RTL8366RB_LED_0_X_CTRL_MASK GENMASK(5, 0) +#define RTL8366RB_LED_X_1_CTRL_MASK GENMASK(11, 6) +#define RTL8366RB_LED_2_X_CTRL_MASK GENMASK(5, 0) +#define RTL8366RB_LED_X_3_CTRL_MASK GENMASK(11, 6) + +enum rtl8366_ledgroup_mode { + RTL8366RB_LEDGROUP_OFF = 0x0, + RTL8366RB_LEDGROUP_DUP_COL = 0x1, + RTL8366RB_LEDGROUP_LINK_ACT = 0x2, + RTL8366RB_LEDGROUP_SPD1000 = 0x3, + RTL8366RB_LEDGROUP_SPD100 = 0x4, + RTL8366RB_LEDGROUP_SPD10 = 0x5, + RTL8366RB_LEDGROUP_SPD1000_ACT = 0x6, + RTL8366RB_LEDGROUP_SPD100_ACT = 0x7, + RTL8366RB_LEDGROUP_SPD10_ACT = 0x8, + RTL8366RB_LEDGROUP_SPD100_10_ACT = 0x9, + RTL8366RB_LEDGROUP_FIBER = 0xa, + RTL8366RB_LEDGROUP_AN_FAULT = 0xb, + RTL8366RB_LEDGROUP_LINK_RX = 0xc, + RTL8366RB_LEDGROUP_LINK_TX = 0xd, + RTL8366RB_LEDGROUP_MASTER = 0xe, + RTL8366RB_LEDGROUP_FORCE = 0xf, + + __RTL8366RB_LEDGROUP_MODE_MAX +}; + +#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB_LEDS) + +struct rtl8366rb_led { + u8 port_num; + u8 led_group; + struct realtek_priv *priv; + struct led_classdev cdev; +}; + +int rtl8366rb_setup_leds(struct realtek_priv *priv); + +#else + +static inline int rtl8366rb_setup_leds(struct realtek_priv *priv) +{ + return 0; +} + +#endif /* IS_ENABLED(CONFIG_LEDS_CLASS) */ + +/** + * struct rtl8366rb - RTL8366RB-specific data + * @max_mtu: per-port max MTU setting + * @pvid_enabled: if PVID is set for respective port + * @leds: per-port and per-ledgroup led info + */ +struct rtl8366rb { + unsigned int max_mtu[RTL8366RB_NUM_PORTS]; + bool pvid_enabled[RTL8366RB_NUM_PORTS]; +#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB_LEDS) + struct rtl8366rb_led leds[RTL8366RB_NUM_PORTS][RTL8366RB_NUM_LEDGROUPS]; +#endif +}; + +/* This code is used also with LEDs disabled */ +int rb8366rb_set_ledgroup_mode(struct realtek_priv *priv, + u8 led_group, + enum rtl8366_ledgroup_mode mode); + +#endif /* _RTL8366RB_H */ -- GitLab From 02cfe2b6529c6c5fcf39d52a826927f4f93392af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 24 Feb 2025 11:27:02 +0100 Subject: [PATCH 0969/1585] pidfs: remove d_op->d_delete Pidfs only deals with unhashed dentries and there's currently no way for them to become hashed. So remove d_op->d_delete. Signed-off-by: Christian Brauner --- fs/pidfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 63f9699ebac36..c0478b3c55d9f 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -521,7 +521,6 @@ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) } const struct dentry_operations pidfs_dentry_operations = { - .d_delete = always_delete_dentry, .d_dname = pidfs_dname, .d_prune = stashed_dentry_prune, }; -- GitLab From 425e3e3bd62c568a4365af0923d6ebad71a7dcfc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 24 Feb 2025 11:30:35 +0100 Subject: [PATCH 0970/1585] nsfs: remove d_op->d_delete Nsfs only deals with unhashed dentries and there's currently no way for them to become hashed. So remove d_op->d_delete. Signed-off-by: Christian Brauner --- fs/nsfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nsfs.c b/fs/nsfs.c index 663f8656158d5..f7fddf8ecf73f 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -37,7 +37,6 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) } const struct dentry_operations ns_dentry_operations = { - .d_delete = always_delete_dentry, .d_dname = ns_dname, .d_prune = stashed_dentry_prune, }; -- GitLab From 36e1b81f599a093ec7477e4593e110104adcfb96 Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Wed, 19 Feb 2025 17:56:00 -0500 Subject: [PATCH 0971/1585] dm vdo: add missing spin_lock_init Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-vdo/dedupe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c index b6f8e2dc7729f..3f3d29af1be47 100644 --- a/drivers/md/dm-vdo/dedupe.c +++ b/drivers/md/dm-vdo/dedupe.c @@ -2178,6 +2178,7 @@ static int initialize_index(struct vdo *vdo, struct hash_zones *zones) vdo_set_dedupe_index_timeout_interval(vdo_dedupe_index_timeout_interval); vdo_set_dedupe_index_min_timer_interval(vdo_dedupe_index_min_timer_interval); + spin_lock_init(&zones->lock); /* * Since we will save up the timeouts that would have been reported but were ratelimited, -- GitLab From 5b0c02f9b8acf2a791e531bbc09acae2d51f4f9b Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Sat, 22 Feb 2025 20:39:57 +0100 Subject: [PATCH 0972/1585] ASoC: es8328: fix route from DAC to output The ES8328 codec driver, which is also used for the ES8388 chip that appears to have an identical register map, claims that the output can either take the route from DAC->Mixer->Output or through DAC->Output directly. To the best of what I could find, this is not true, and creates problems. Without DACCONTROL17 bit index 7 set for the left channel, as well as DACCONTROL20 bit index 7 set for the right channel, I cannot get any analog audio out on Left Out 2 and Right Out 2 respectively, despite the DAPM routes claiming that this should be possible. Furthermore, the same is the case for Left Out 1 and Right Out 1, showing that those two don't have a direct route from DAC to output bypassing the mixer either. Those control bits toggle whether the DACs are fed (stale bread?) into their respective mixers. If one "unmutes" the mixer controls in alsamixer, then sure, the audio output works, but if it doesn't work without the mixer being fed the DAC input then evidently it's not a direct output from the DAC. ES8328/ES8388 are seemingly not alone in this. ES8323, which uses a separate driver for what appears to be a very similar register map, simply flips those two bits on in its probe function, and then pretends there is no power management whatsoever for the individual controls. Fair enough. My theory as to why nobody has noticed this up to this point is that everyone just assumes it's their fault when they had to unmute an additional control in ALSA. Fix this in the es8328 driver by removing the erroneous direct route, then get rid of the playback switch controls and have those bits tied to the mixer's widget instead, which until now had no register to play with. Fixes: 567e4f98922c ("ASoC: add es8328 codec driver") Signed-off-by: Nicolas Frattaroli Link: https://patch.msgid.link/20250222-es8328-route-bludgeoning-v1-1-99bfb7fb22d9@collabora.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8328.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index f3c97da798dc8..76159c45e6b52 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -233,7 +233,6 @@ static const struct snd_kcontrol_new es8328_right_line_controls = /* Left Mixer */ static const struct snd_kcontrol_new es8328_left_mixer_controls[] = { - SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 7, 1, 0), SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 6, 1, 0), SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 7, 1, 0), SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 6, 1, 0), @@ -243,7 +242,6 @@ static const struct snd_kcontrol_new es8328_left_mixer_controls[] = { static const struct snd_kcontrol_new es8328_right_mixer_controls[] = { SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 7, 1, 0), SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 6, 1, 0), - SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 7, 1, 0), SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 6, 1, 0), }; @@ -336,10 +334,10 @@ static const struct snd_soc_dapm_widget es8328_dapm_widgets[] = { SND_SOC_DAPM_DAC("Left DAC", "Left Playback", ES8328_DACPOWER, ES8328_DACPOWER_LDAC_OFF, 1), - SND_SOC_DAPM_MIXER("Left Mixer", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_MIXER("Left Mixer", ES8328_DACCONTROL17, 7, 0, &es8328_left_mixer_controls[0], ARRAY_SIZE(es8328_left_mixer_controls)), - SND_SOC_DAPM_MIXER("Right Mixer", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_MIXER("Right Mixer", ES8328_DACCONTROL20, 7, 0, &es8328_right_mixer_controls[0], ARRAY_SIZE(es8328_right_mixer_controls)), @@ -418,19 +416,14 @@ static const struct snd_soc_dapm_route es8328_dapm_routes[] = { { "Right Line Mux", "PGA", "Right PGA Mux" }, { "Right Line Mux", "Differential", "Differential Mux" }, - { "Left Out 1", NULL, "Left DAC" }, - { "Right Out 1", NULL, "Right DAC" }, - { "Left Out 2", NULL, "Left DAC" }, - { "Right Out 2", NULL, "Right DAC" }, - - { "Left Mixer", "Playback Switch", "Left DAC" }, + { "Left Mixer", NULL, "Left DAC" }, { "Left Mixer", "Left Bypass Switch", "Left Line Mux" }, { "Left Mixer", "Right Playback Switch", "Right DAC" }, { "Left Mixer", "Right Bypass Switch", "Right Line Mux" }, { "Right Mixer", "Left Playback Switch", "Left DAC" }, { "Right Mixer", "Left Bypass Switch", "Left Line Mux" }, - { "Right Mixer", "Playback Switch", "Right DAC" }, + { "Right Mixer", NULL, "Right DAC" }, { "Right Mixer", "Right Bypass Switch", "Right Line Mux" }, { "DAC DIG", NULL, "DAC STM" }, -- GitLab From 0fe8813baf4b2e865d3b2c735ce1a15b86002c74 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 17 Jan 2025 06:41:07 -0800 Subject: [PATCH 0973/1585] perf/core: Add RCU read lock protection to perf_iterate_ctx() The perf_iterate_ctx() function performs RCU list traversal but currently lacks RCU read lock protection. This causes lockdep warnings when running perf probe with unshare(1) under CONFIG_PROVE_RCU_LIST=y: WARNING: suspicious RCU usage kernel/events/core.c:8168 RCU-list traversed in non-reader section!! Call Trace: lockdep_rcu_suspicious ? perf_event_addr_filters_apply perf_iterate_ctx perf_event_exec begin_new_exec ? load_elf_phdrs load_elf_binary ? lock_acquire ? find_held_lock ? bprm_execve bprm_execve do_execveat_common.isra.0 __x64_sys_execve do_syscall_64 entry_SYSCALL_64_after_hwframe This protection was previously present but was removed in commit bd2756811766 ("perf: Rewrite core context handling"). Add back the necessary rcu_read_lock()/rcu_read_unlock() pair around perf_iterate_ctx() call in perf_event_exec(). [ mingo: Use scoped_guard() as suggested by Peter ] Fixes: bd2756811766 ("perf: Rewrite core context handling") Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250117-fix_perf_rcu-v1-1-13cb9210fc6a@debian.org --- kernel/events/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index bcb09e011e9e1..7dabbcaf825a0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8321,7 +8321,8 @@ void perf_event_exec(void) perf_event_enable_on_exec(ctx); perf_event_remove_on_exec(ctx); - perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true); + scoped_guard(rcu) + perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true); perf_unpin_context(ctx); put_ctx(ctx); -- GitLab From 2016066c66192a99d9e0ebf433789c490a6785a2 Mon Sep 17 00:00:00 2001 From: Luo Gengkun Date: Wed, 22 Jan 2025 07:33:56 +0000 Subject: [PATCH 0974/1585] perf/core: Order the PMU list to fix warning about unordered pmu_ctx_list Syskaller triggers a warning due to prev_epc->pmu != next_epc->pmu in perf_event_swap_task_ctx_data(). vmcore shows that two lists have the same perf_event_pmu_context, but not in the same order. The problem is that the order of pmu_ctx_list for the parent is impacted by the time when an event/PMU is added. While the order for a child is impacted by the event order in the pinned_groups and flexible_groups. So the order of pmu_ctx_list in the parent and child may be different. To fix this problem, insert the perf_event_pmu_context to its proper place after iteration of the pmu_ctx_list. The follow testcase can trigger above warning: # perf record -e cycles --call-graph lbr -- taskset -c 3 ./a.out & # perf stat -e cpu-clock,cs -p xxx // xxx is the pid of a.out test.c void main() { int count = 0; pid_t pid; printf("%d running\n", getpid()); sleep(30); printf("running\n"); pid = fork(); if (pid == -1) { printf("fork error\n"); return; } if (pid == 0) { while (1) { count++; } } else { while (1) { count++; } } } The testcase first opens an LBR event, so it will allocate task_ctx_data, and then open tracepoint and software events, so the parent context will have 3 different perf_event_pmu_contexts. On inheritance, child ctx will insert the perf_event_pmu_context in another order and the warning will trigger. [ mingo: Tidied up the changelog. ] Fixes: bd2756811766 ("perf: Rewrite core context handling") Signed-off-by: Luo Gengkun Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20250122073356.1824736-1-luogengkun@huaweicloud.com --- kernel/events/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7dabbcaf825a0..086d46d096963 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4950,7 +4950,7 @@ static struct perf_event_pmu_context * find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, struct perf_event *event) { - struct perf_event_pmu_context *new = NULL, *epc; + struct perf_event_pmu_context *new = NULL, *pos = NULL, *epc; void *task_ctx_data = NULL; if (!ctx->task) { @@ -5007,12 +5007,19 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, atomic_inc(&epc->refcount); goto found_epc; } + /* Make sure the pmu_ctx_list is sorted by PMU type: */ + if (!pos && epc->pmu->type > pmu->type) + pos = epc; } epc = new; new = NULL; - list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); + if (!pos) + list_add_tail(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); + else + list_add(&epc->pmu_ctx_entry, pos->pmu_ctx_entry.prev); + epc->ctx = ctx; found_epc: -- GitLab From 0da83ab025bc45e9742e87c2cce19bff423377c8 Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Mon, 17 Feb 2025 10:04:37 +0900 Subject: [PATCH 0975/1585] ASoC: fsl: Rename stream name of SAI DAI driver If stream names of DAI driver are duplicated there'll be warnings when machine driver tries to add widgets on a route: [ 8.831335] fsl-asoc-card sound-wm8960: ASoC: sink widget CPU-Playback overwritten [ 8.839917] fsl-asoc-card sound-wm8960: ASoC: source widget CPU-Capture overwritten Use different stream names to avoid such warnings. DAI names in AUDMIX are also updated accordingly. Fixes: 15c958390460 ("ASoC: fsl_sai: Add separate DAI for transmitter and receiver") Signed-off-by: Chancel Liu Acked-by: Shengjiu Wang Link: https://patch.msgid.link/20250217010437.258621-1-chancel.liu@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 6 +++--- sound/soc/fsl/imx-audmix.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index c4eb87c5d39e4..9f33dd11d47f6 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -994,10 +994,10 @@ static struct snd_soc_dai_driver fsl_sai_dai_template[] = { { .name = "sai-tx", .playback = { - .stream_name = "CPU-Playback", + .stream_name = "SAI-Playback", .channels_min = 1, .channels_max = 32, - .rate_min = 8000, + .rate_min = 8000, .rate_max = 2822400, .rates = SNDRV_PCM_RATE_KNOT, .formats = FSL_SAI_FORMATS, @@ -1007,7 +1007,7 @@ static struct snd_soc_dai_driver fsl_sai_dai_template[] = { { .name = "sai-rx", .capture = { - .stream_name = "CPU-Capture", + .stream_name = "SAI-Capture", .channels_min = 1, .channels_max = 32, .rate_min = 8000, diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c index 50ecc5f51100e..dac5d4ddacd6e 100644 --- a/sound/soc/fsl/imx-audmix.c +++ b/sound/soc/fsl/imx-audmix.c @@ -119,8 +119,8 @@ static const struct snd_soc_ops imx_audmix_be_ops = { static const char *name[][3] = { {"HiFi-AUDMIX-FE-0", "HiFi-AUDMIX-FE-1", "HiFi-AUDMIX-FE-2"}, {"sai-tx", "sai-tx", "sai-rx"}, - {"AUDMIX-Playback-0", "AUDMIX-Playback-1", "CPU-Capture"}, - {"CPU-Playback", "CPU-Playback", "AUDMIX-Capture-0"}, + {"AUDMIX-Playback-0", "AUDMIX-Playback-1", "SAI-Capture"}, + {"SAI-Playback", "SAI-Playback", "AUDMIX-Capture-0"}, }; static int imx_audmix_probe(struct platform_device *pdev) -- GitLab From d31babd7e304d3b800d36ff74be6739405b985f2 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Fri, 21 Feb 2025 21:39:32 +0100 Subject: [PATCH 0976/1585] ASoC: dapm-graph: set fill colour of turned on nodes Some tools like KGraphViewer interpret the "ON" nodes not having an explicitly set fill colour as them being entirely black, which obscures the text on them and looks funny. In fact, I thought they were off for the longest time. Comparing to the output of the `dot` tool, I assume they are supposed to be white. Instead of speclawyering over who's in the wrong and must immediately atone for their wickedness at the altar of RFC2119, just be explicit about it, set the fillcolor to white, and nobody gets confused. Signed-off-by: Nicolas Frattaroli Tested-by: Luca Ceresoli Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20250221-dapm-graph-node-colour-v1-1-514ed0aa7069@collabora.com Signed-off-by: Mark Brown --- tools/sound/dapm-graph | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph index f14bdfedee8f1..b6196ee5065a4 100755 --- a/tools/sound/dapm-graph +++ b/tools/sound/dapm-graph @@ -10,7 +10,7 @@ set -eu STYLE_COMPONENT_ON="color=dodgerblue;style=bold" STYLE_COMPONENT_OFF="color=gray40;style=filled;fillcolor=gray90" -STYLE_NODE_ON="shape=box,style=bold,color=green4" +STYLE_NODE_ON="shape=box,style=bold,color=green4,fillcolor=white" STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95" # Print usage and exit -- GitLab From 39ec9eaaa165d297d008d1fa385748430bd18e4d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2025 11:53:16 -0800 Subject: [PATCH 0977/1585] coredump: Only sort VMAs when core_sort_vma sysctl is set The sorting of VMAs by size in commit 7d442a33bfe8 ("binfmt_elf: Dump smaller VMAs first in ELF cores") breaks elfutils[1]. Instead, sort based on the setting of the new sysctl, core_sort_vma, which defaults to 0, no sorting. Reported-by: Michael Stapelberg Closes: https://lore.kernel.org/all/20250218085407.61126-1-michael@stapelberg.de/ [1] Fixes: 7d442a33bfe8 ("binfmt_elf: Dump smaller VMAs first in ELF cores") Signed-off-by: Kees Cook --- Documentation/admin-guide/sysctl/kernel.rst | 11 +++++++++++ fs/coredump.c | 15 +++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index a43b78b4b6464..dd49a89a62d35 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -212,6 +212,17 @@ pid>/``). This value defaults to 0. +core_sort_vma +============= + +The default coredump writes VMAs in address order. By setting +``core_sort_vma`` to 1, VMAs will be written from smallest size +to largest size. This is known to break at least elfutils, but +can be handy when dealing with very large (and truncated) +coredumps where the more useful debugging details are included +in the smaller VMAs. + + core_uses_pid ============= diff --git a/fs/coredump.c b/fs/coredump.c index 591700e1b2ce6..4375c70144d0a 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -63,6 +63,7 @@ static void free_vma_snapshot(struct coredump_params *cprm); static int core_uses_pid; static unsigned int core_pipe_limit; +static unsigned int core_sort_vma; static char core_pattern[CORENAME_MAX_SIZE] = "core"; static int core_name_size = CORENAME_MAX_SIZE; unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; @@ -1026,6 +1027,15 @@ static const struct ctl_table coredump_sysctls[] = { .extra1 = (unsigned int *)&core_file_note_size_min, .extra2 = (unsigned int *)&core_file_note_size_max, }, + { + .procname = "core_sort_vma", + .data = &core_sort_vma, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static int __init init_fs_coredump_sysctls(void) @@ -1256,8 +1266,9 @@ static bool dump_vma_snapshot(struct coredump_params *cprm) cprm->vma_data_size += m->dump_size; } - sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), - cmp_vma_size, NULL); + if (core_sort_vma) + sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), + cmp_vma_size, NULL); return true; } -- GitLab From bddf10d26e6e5114e7415a0e442ec6f51a559468 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Mon, 24 Feb 2025 11:11:49 +0800 Subject: [PATCH 0978/1585] uprobes: Reject the shared zeropage in uprobe_write_opcode() We triggered the following crash in syzkaller tests: BUG: Bad page state in process syz.7.38 pfn:1eff3 page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1eff3 flags: 0x3fffff00004004(referenced|reserved|node=0|zone=1|lastcpupid=0x1fffff) raw: 003fffff00004004 ffffe6c6c07bfcc8 ffffe6c6c07bfcc8 0000000000000000 raw: 0000000000000000 0000000000000000 00000000fffffffe 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: dump_stack_lvl+0x32/0x50 bad_page+0x69/0xf0 free_unref_page_prepare+0x401/0x500 free_unref_page+0x6d/0x1b0 uprobe_write_opcode+0x460/0x8e0 install_breakpoint.part.0+0x51/0x80 register_for_each_vma+0x1d9/0x2b0 __uprobe_register+0x245/0x300 bpf_uprobe_multi_link_attach+0x29b/0x4f0 link_create+0x1e2/0x280 __sys_bpf+0x75f/0xac0 __x64_sys_bpf+0x1a/0x30 do_syscall_64+0x56/0x100 entry_SYSCALL_64_after_hwframe+0x78/0xe2 BUG: Bad rss-counter state mm:00000000452453e0 type:MM_FILEPAGES val:-1 The following syzkaller test case can be used to reproduce: r2 = creat(&(0x7f0000000000)='./file0\x00', 0x8) write$nbd(r2, &(0x7f0000000580)=ANY=[], 0x10) r4 = openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x42, 0x0) mmap$IORING_OFF_SQ_RING(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0, 0x12, r4, 0x0) r5 = userfaultfd(0x80801) ioctl$UFFDIO_API(r5, 0xc018aa3f, &(0x7f0000000040)={0xaa, 0x20}) r6 = userfaultfd(0x80801) ioctl$UFFDIO_API(r6, 0xc018aa3f, &(0x7f0000000140)) ioctl$UFFDIO_REGISTER(r6, 0xc020aa00, &(0x7f0000000100)={{&(0x7f0000ffc000/0x4000)=nil, 0x4000}, 0x2}) ioctl$UFFDIO_ZEROPAGE(r5, 0xc020aa04, &(0x7f0000000000)={{&(0x7f0000ffd000/0x1000)=nil, 0x1000}}) r7 = bpf$PROG_LOAD(0x5, &(0x7f0000000140)={0x2, 0x3, &(0x7f0000000200)=ANY=[@ANYBLOB="1800000000120000000000000000000095"], &(0x7f0000000000)='GPL\x00', 0x7, 0x0, 0x0, 0x0, 0x0, '\x00', 0x0, @fallback=0x30, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x10, 0x0, @void, @value}, 0x94) bpf$BPF_LINK_CREATE_XDP(0x1c, &(0x7f0000000040)={r7, 0x0, 0x30, 0x1e, @val=@uprobe_multi={&(0x7f0000000080)='./file0\x00', &(0x7f0000000100)=[0x2], 0x0, 0x0, 0x1}}, 0x40) The cause is that zero pfn is set to the PTE without increasing the RSS count in mfill_atomic_pte_zeropage() and the refcount of zero folio does not increase accordingly. Then, the operation on the same pfn is performed in uprobe_write_opcode()->__replace_page() to unconditional decrease the RSS count and old_folio's refcount. Therefore, two bugs are introduced: 1. The RSS count is incorrect, when process exit, the check_mm() report error "Bad rss-count". 2. The reserved folio (zero folio) is freed when folio->refcount is zero, then free_pages_prepare->free_page_is_bad() report error "Bad page state". There is more, the following warning could also theoretically be triggered: __replace_page() -> ... -> folio_remove_rmap_pte() -> VM_WARN_ON_FOLIO(is_zero_folio(folio), folio) Considering that uprobe hit on the zero folio is a very rare case, just reject zero old folio immediately after get_user_page_vma_remote(). [ mingo: Cleaned up the changelog ] Fixes: 7396fa818d62 ("uprobes/core: Make background page replacement logic account for rss_stat counters") Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints") Signed-off-by: Tong Tiangen Signed-off-by: Ingo Molnar Reviewed-by: David Hildenbrand Reviewed-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20250224031149.1598949-1-tongtiangen@huawei.com --- kernel/events/uprobes.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index bf2a87a0a3787..af53fbd2d12c4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -495,6 +495,11 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, if (ret <= 0) goto put_old; + if (is_zero_page(old_page)) { + ret = -EINVAL; + goto put_old; + } + if (WARN(!is_register && PageCompound(old_page), "uprobe unregister should never work on compound page\n")) { ret = -EINVAL; -- GitLab From 815291c11acda54515f1af5ce6fe307490de9127 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 08:28:59 -0800 Subject: [PATCH 0979/1585] configfs: update MAINTAINERS Joel will go back to maintain configfs alone on a time permitting basis. Signed-off-by: Christoph Hellwig Acked-by: Joel Becker Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4ff26fa94895d..089c1178f25a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5856,7 +5856,6 @@ F: Documentation/security/snp-tdx-threat-model.rst CONFIGFS M: Joel Becker -M: Christoph Hellwig S: Supported T: git git://git.infradead.org/users/hch/configfs.git F: fs/configfs/ -- GitLab From f7d5db965f3e132887779c6b449452db2b807caa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 08:27:21 -0800 Subject: [PATCH 0980/1585] dma-mapping: update MAINTAINERS Marek has graciously offered to maintain the dma-mapping tree. Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 089c1178f25a5..a78eaaa24a699 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6878,7 +6878,6 @@ F: kernel/dma/map_benchmark.c F: tools/testing/selftests/dma/ DMA MAPPING HELPERS -M: Christoph Hellwig M: Marek Szyprowski R: Robin Murphy L: iommu@lists.linux.dev -- GitLab From e043dc16c28c8446e66c55adfe7c6e862a6a7bb7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 21 Feb 2025 14:38:41 +0000 Subject: [PATCH 0981/1585] drm/xe/userptr: restore invalidation list on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On error restore anything still on the pin_list back to the invalidation list on error. For the actual pin, so long as the vma is tracked on either list it should get picked up on the next pin, however it looks possible for the vma to get nuked but still be present on this per vm pin_list leading to corruption. An alternative might be then to instead just remove the link when destroying the vma. v2: - Also add some asserts. - Keep the overzealous locking so that we are consistent with the docs; updating the docs and related bits will be done as a follow up. Fixes: ed2bdf3b264d ("drm/xe/vm: Subclass userptr vmas") Suggested-by: Matthew Brost Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250221143840.167150-4-matthew.auld@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 4e37e928928b730de9aa9a2f5dc853feeebc1742) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 690330352d4cd..47f7d8f2094b6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -666,15 +666,16 @@ int xe_vm_userptr_pin(struct xe_vm *vm) /* Collect invalidated userptrs */ spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, userptr.invalidate_link) { list_del_init(&uvma->userptr.invalidate_link); - list_move_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); + list_add_tail(&uvma->userptr.repin_link, + &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); - /* Pin and move to temporary list */ + /* Pin and move to bind list */ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, userptr.repin_link) { err = xe_vma_userptr_pin_pages(uvma); @@ -690,10 +691,10 @@ int xe_vm_userptr_pin(struct xe_vm *vm) err = xe_vm_invalidate_vma(&uvma->vma); xe_vm_unlock(vm); if (err) - return err; + break; } else { - if (err < 0) - return err; + if (err) + break; list_del_init(&uvma->userptr.repin_link); list_move_tail(&uvma->vma.combined_links.rebind, @@ -701,7 +702,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm) } } - return 0; + if (err) { + down_write(&vm->userptr.notifier_lock); + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + } + spin_unlock(&vm->userptr.invalidated_lock); + up_write(&vm->userptr.notifier_lock); + } + return err; } /** @@ -1066,6 +1079,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); list_del(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } else if (!xe_vma_is_null(vma)) { -- GitLab From a9f4fa3a7efa65615ff7db13023ac84516e99e21 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 21 Feb 2025 14:38:42 +0000 Subject: [PATCH 0982/1585] drm/xe/userptr: fix EFAULT handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we treat EFAULT from hmm_range_fault() as a non-fatal error when called from xe_vm_userptr_pin() with the idea that we want to avoid killing the entire vm and chucking an error, under the assumption that the user just did an unmap or something, and has no intention of actually touching that memory from the GPU. At this point we have already zapped the PTEs so any access should generate a page fault, and if the pin fails there also it will then become fatal. However it looks like it's possible for the userptr vma to still be on the rebind list in preempt_rebind_work_func(), if we had to retry the pin again due to something happening in the caller before we did the rebind step, but in the meantime needing to re-validate the userptr and this time hitting the EFAULT. This explains an internal user report of hitting: [ 191.738349] WARNING: CPU: 1 PID: 157 at drivers/gpu/drm/xe/xe_res_cursor.h:158 xe_pt_stage_bind.constprop.0+0x60a/0x6b0 [xe] [ 191.738551] Workqueue: xe-ordered-wq preempt_rebind_work_func [xe] [ 191.738616] RIP: 0010:xe_pt_stage_bind.constprop.0+0x60a/0x6b0 [xe] [ 191.738690] Call Trace: [ 191.738692] [ 191.738694] ? show_regs+0x69/0x80 [ 191.738698] ? __warn+0x93/0x1a0 [ 191.738703] ? xe_pt_stage_bind.constprop.0+0x60a/0x6b0 [xe] [ 191.738759] ? report_bug+0x18f/0x1a0 [ 191.738764] ? handle_bug+0x63/0xa0 [ 191.738767] ? exc_invalid_op+0x19/0x70 [ 191.738770] ? asm_exc_invalid_op+0x1b/0x20 [ 191.738777] ? xe_pt_stage_bind.constprop.0+0x60a/0x6b0 [xe] [ 191.738834] ? ret_from_fork_asm+0x1a/0x30 [ 191.738849] bind_op_prepare+0x105/0x7b0 [xe] [ 191.738906] ? dma_resv_reserve_fences+0x301/0x380 [ 191.738912] xe_pt_update_ops_prepare+0x28c/0x4b0 [xe] [ 191.738966] ? kmemleak_alloc+0x4b/0x80 [ 191.738973] ops_execute+0x188/0x9d0 [xe] [ 191.739036] xe_vm_rebind+0x4ce/0x5a0 [xe] [ 191.739098] ? trace_hardirqs_on+0x4d/0x60 [ 191.739112] preempt_rebind_work_func+0x76f/0xd00 [xe] Followed by NPD, when running some workload, since the sg was never actually populated but the vma is still marked for rebind when it should be skipped for this special EFAULT case. This is confirmed to fix the user report. v2 (MattB): - Move earlier. v3 (MattB): - Update the commit message to make it clear that this indeed fixes the issue. Fixes: 521db22a1d70 ("drm/xe: Invalidate userptr VMA on page pin fault") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Cc: # v6.10+ Reviewed-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20250221143840.167150-5-matthew.auld@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 6b93cb98910c826c2e2004942f8b060311e43618) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 47f7d8f2094b6..30259eba450b5 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -681,6 +681,18 @@ int xe_vm_userptr_pin(struct xe_vm *vm) err = xe_vma_userptr_pin_pages(uvma); if (err == -EFAULT) { list_del_init(&uvma->userptr.repin_link); + /* + * We might have already done the pin once already, but + * then had to retry before the re-bind happened, due + * some other condition in the caller, but in the + * meantime the userptr got dinged by the notifier such + * that we need to revalidate here, but this time we hit + * the EFAULT. In such a case make sure we remove + * ourselves from the rebind list to avoid going down in + * flames. + */ + if (!list_empty(&uvma->vma.combined_links.rebind)) + list_del_init(&uvma->vma.combined_links.rebind); /* Wait for pending binds */ xe_vm_lock(vm, false); -- GitLab From db10fde5c4f96231e1d2bbfd01feb5f2f59b96d1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 20 Feb 2025 18:51:40 -0800 Subject: [PATCH 0983/1585] net: ethtool: fix ioctl confusing drivers about desired HDS user config The legacy ioctl path does not have support for extended attributes. So we issue a GET to fetch the current settings from the driver, in an attempt to keep them unchanged. HDS is a bit "special" as the GET only returns on/off while the SET takes a "ternary" argument (on/off/default). If the driver was in the "default" setting - executing the ioctl path binds it to on or off, even tho the user did not intend to change HDS config. Factor the relevant logic out of the netlink code and reuse it. Fixes: 87c8f8496a05 ("bnxt_en: add support for tcp-data-split ethtool command") Acked-by: Stanislav Fomichev Tested-by: Daniel Xu Tested-by: Taehee Yoo Link: https://patch.msgid.link/20250221025141.1132944-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/common.c | 16 ++++++++++++++++ net/ethtool/common.h | 6 ++++++ net/ethtool/ioctl.c | 4 ++-- net/ethtool/rings.c | 9 ++++----- 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index d88e9080643b8..b97374b508f67 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "netlink.h" #include "common.h" @@ -771,6 +772,21 @@ int ethtool_check_ops(const struct ethtool_ops *ops) return 0; } +void ethtool_ringparam_get_cfg(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kparam, + struct netlink_ext_ack *extack) +{ + memset(param, 0, sizeof(*param)); + memset(kparam, 0, sizeof(*kparam)); + + param->cmd = ETHTOOL_GRINGPARAM; + dev->ethtool_ops->get_ringparam(dev, param, kparam, extack); + + /* Driver gives us current state, we want to return current config */ + kparam->tcp_data_split = dev->cfg->hds_config; +} + static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) { memset(info, 0, sizeof(*info)); diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 58e9e7db06f90..a1088c2441d0a 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -51,6 +51,12 @@ int ethtool_check_max_channel(struct net_device *dev, struct ethtool_channels channels, struct genl_info *info); int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context); + +void ethtool_ringparam_get_cfg(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kparam, + struct netlink_ext_ack *extack); + int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); int ethtool_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 7609ce2b2c5e2..1c3ba2247776b 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2059,8 +2059,8 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { - struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; struct kernel_ethtool_ringparam kernel_ringparam; + struct ethtool_ringparam ringparam, max; int ret; if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) @@ -2069,7 +2069,7 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) return -EFAULT; - dev->ethtool_ops->get_ringparam(dev, &max, &kernel_ringparam, NULL); + ethtool_ringparam_get_cfg(dev, &max, &kernel_ringparam, NULL); /* ensure new ring parameters are within the maximums */ if (ringparam.rx_pending > max.rx_max_pending || diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 7839bfd1ac6a0..aeedd5ec6b8cd 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -215,17 +215,16 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, static int ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) { - struct kernel_ethtool_ringparam kernel_ringparam = {}; - struct ethtool_ringparam ringparam = {}; + struct kernel_ethtool_ringparam kernel_ringparam; struct net_device *dev = req_info->dev; + struct ethtool_ringparam ringparam; struct nlattr **tb = info->attrs; const struct nlattr *err_attr; bool mod = false; int ret; - dev->ethtool_ops->get_ringparam(dev, &ringparam, - &kernel_ringparam, info->extack); - kernel_ringparam.tcp_data_split = dev->cfg->hds_config; + ethtool_ringparam_get_cfg(dev, &ringparam, &kernel_ringparam, + info->extack); ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, -- GitLab From 29b036be1b0bfcfc958380d5931325997fddf08a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 20 Feb 2025 18:51:41 -0800 Subject: [PATCH 0984/1585] selftests: drv-net: test XDP, HDS auto and the ioctl path Test XDP and HDS interaction. While at it add a test for using the IOCTL, as that turned out to be the real culprit. Testing bnxt: # NETIF=eth0 ./ksft-net-drv/drivers/net/hds.py KTAP version 1 1..12 ok 1 hds.get_hds ok 2 hds.get_hds_thresh ok 3 hds.set_hds_disable # SKIP disabling of HDS not supported by the device ok 4 hds.set_hds_enable ok 5 hds.set_hds_thresh_zero ok 6 hds.set_hds_thresh_max ok 7 hds.set_hds_thresh_gt ok 8 hds.set_xdp ok 9 hds.enabled_set_xdp ok 10 hds.ioctl ok 11 hds.ioctl_set_xdp ok 12 hds.ioctl_enabled_set_xdp # Totals: pass:11 fail:0 xfail:0 xpass:0 skip:1 error:0 and netdevsim: # ./ksft-net-drv/drivers/net/hds.py KTAP version 1 1..12 ok 1 hds.get_hds ok 2 hds.get_hds_thresh ok 3 hds.set_hds_disable ok 4 hds.set_hds_enable ok 5 hds.set_hds_thresh_zero ok 6 hds.set_hds_thresh_max ok 7 hds.set_hds_thresh_gt ok 8 hds.set_xdp ok 9 hds.enabled_set_xdp ok 10 hds.ioctl ok 11 hds.ioctl_set_xdp ok 12 hds.ioctl_enabled_set_xdp # Totals: pass:12 fail:0 xfail:0 xpass:0 skip:0 error:0 Netdevsim needs a sane default for tx/rx ring size. ethtool 6.11 is needed for the --disable-netlink option. Acked-by: Stanislav Fomichev Tested-by: Taehee Yoo Link: https://patch.msgid.link/20250221025141.1132944-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/ethtool.c | 2 + tools/testing/selftests/drivers/net/hds.py | 145 +++++++++++++++++- tools/testing/selftests/net/lib/Makefile | 3 + .../testing/selftests/net/lib/xdp_dummy.bpf.c | 13 ++ 4 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/net/lib/xdp_dummy.bpf.c diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 5c80fbee79138..7ab358616e035 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -184,9 +184,11 @@ static const struct ethtool_ops nsim_ethtool_ops = { static void nsim_ethtool_ring_init(struct netdevsim *ns) { + ns->ethtool.ring.rx_pending = 512; ns->ethtool.ring.rx_max_pending = 4096; ns->ethtool.ring.rx_jumbo_max_pending = 4096; ns->ethtool.ring.rx_mini_max_pending = 4096; + ns->ethtool.ring.tx_pending = 512; ns->ethtool.ring.tx_max_pending = 4096; } diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 394971b25c0b1..873f5219e41d7 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -2,17 +2,54 @@ # SPDX-License-Identifier: GPL-2.0 import errno +import os from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx -from lib.py import EthtoolFamily, NlError +from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv +from lib.py import defer, ethtool, ip -def get_hds(cfg, netnl) -> None: + +def _get_hds_mode(cfg, netnl) -> str: try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: raise KsftSkipEx('ring-get not supported by device') if 'tcp-data-split' not in rings: raise KsftSkipEx('tcp-data-split not supported by device') + return rings['tcp-data-split'] + + +def _xdp_onoff(cfg): + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + ip("link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, prog)) + ip("link set dev %s xdp off" % cfg.ifname) + + +def _ioctl_ringparam_modify(cfg, netnl) -> None: + """ + Helper for performing a hopefully unimportant IOCTL SET. + IOCTL does not support HDS, so it should not affect the HDS config. + """ + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + + if 'tx' not in rings: + raise KsftSkipEx('setting Tx ring size not supported') + + try: + ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}") + except CmdExitFailure as e: + ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}") + defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}") + + +def get_hds(cfg, netnl) -> None: + _get_hds_mode(cfg, netnl) + def get_hds_thresh(cfg, netnl) -> None: try: @@ -104,6 +141,103 @@ def set_hds_thresh_gt(cfg, netnl) -> None: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt}) ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + +def set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "auto" / UNKNOWN mode, XDP installation should work. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _xdp_onoff(cfg) + + +def enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + +def set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "auto" / UNKNOWN mode, XDP installation should work. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _xdp_onoff(cfg) + + +def enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) # Trigger skip if not supported + + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + +def ioctl(cfg, netnl) -> None: + mode1 = _get_hds_mode(cfg, netnl) + _ioctl_ringparam_modify(cfg, netnl) + mode2 = _get_hds_mode(cfg, netnl) + + ksft_eq(mode1, mode2) + + +def ioctl_set_xdp(cfg, netnl) -> None: + """ + Like set_xdp(), but we perturb the settings via the legacy ioctl. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _ioctl_ringparam_modify(cfg, netnl) + + _xdp_onoff(cfg) + + +def ioctl_enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) # Trigger skip if not supported + + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + def main() -> None: with NetDrvEnv(__file__, queue_count=3) as cfg: ksft_run([get_hds, @@ -112,7 +246,12 @@ def main() -> None: set_hds_enable, set_hds_thresh_zero, set_hds_thresh_max, - set_hds_thresh_gt], + set_hds_thresh_gt, + set_xdp, + enabled_set_xdp, + ioctl, + ioctl_set_xdp, + ioctl_enabled_set_xdp], args=(cfg, EthtoolFamily())) ksft_exit() diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index bc6b6762baf3e..c22623b9a2a5f 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -9,7 +9,10 @@ TEST_FILES := ../../../../../Documentation/netlink/specs TEST_FILES += ../../../../net/ynl TEST_GEN_FILES += csum +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) include ../../lib.mk + +include ../bpf.mk diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c new file mode 100644 index 0000000000000..d988b2e0cee84 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include +#include + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; -- GitLab From 8c1624b63a7d24142a2bbc3a5ee7e95f004ea36e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 20 Feb 2025 13:18:30 +0200 Subject: [PATCH 0985/1585] nvme-tcp: fix possible UAF in nvme_tcp_poll nvme_tcp_poll() may race with the send path error handler because it may complete the request while it is actively being polled for completion, resulting in a UAF panic [1]: We should make sure to stop polling when we see an error when trying to read from the socket. Hence make sure to propagate the error so that the block layer breaks the polling cycle. [1]: -- [35665.692310] nvme nvme2: failed to send request -13 [35665.702265] nvme nvme2: unsupported pdu type (3) [35665.702272] BUG: kernel NULL pointer dereference, address: 0000000000000000 [35665.702542] nvme nvme2: queue 1 receive failed: -22 [35665.703209] #PF: supervisor write access in kernel mode [35665.703213] #PF: error_code(0x0002) - not-present page [35665.703214] PGD 8000003801cce067 P4D 8000003801cce067 PUD 37e6f79067 PMD 0 [35665.703220] Oops: 0002 [#1] SMP PTI [35665.703658] nvme nvme2: starting error recovery [35665.705809] Hardware name: Inspur aaabbb/YZMB-00882-104, BIOS 4.1.26 09/22/2022 [35665.705812] Workqueue: kblockd blk_mq_requeue_work [35665.709172] RIP: 0010:_raw_spin_lock+0xc/0x30 [35665.715788] Call Trace: [35665.716201] [35665.716613] ? show_trace_log_lvl+0x1c1/0x2d9 [35665.717049] ? show_trace_log_lvl+0x1c1/0x2d9 [35665.717457] ? blk_mq_request_bypass_insert+0x2c/0xb0 [35665.717950] ? __die_body.cold+0x8/0xd [35665.718361] ? page_fault_oops+0xac/0x140 [35665.718749] ? blk_mq_start_request+0x30/0xf0 [35665.719144] ? nvme_tcp_queue_rq+0xc7/0x170 [nvme_tcp] [35665.719547] ? exc_page_fault+0x62/0x130 [35665.719938] ? asm_exc_page_fault+0x22/0x30 [35665.720333] ? _raw_spin_lock+0xc/0x30 [35665.720723] blk_mq_request_bypass_insert+0x2c/0xb0 [35665.721101] blk_mq_requeue_work+0xa5/0x180 [35665.721451] process_one_work+0x1e8/0x390 [35665.721809] worker_thread+0x53/0x3d0 [35665.722159] ? process_one_work+0x390/0x390 [35665.722501] kthread+0x124/0x150 [35665.722849] ? set_kthread_struct+0x50/0x50 [35665.723182] ret_from_fork+0x1f/0x30 Reported-by: Zhang Guanghui Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8a9131c95a3da..8c14018201dbd 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2699,6 +2699,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; + int ret; if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) return 0; @@ -2706,9 +2707,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) set_bit(NVME_TCP_Q_POLLING, &queue->flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); - nvme_tcp_try_recv(queue); + ret = nvme_tcp_try_recv(queue); clear_bit(NVME_TCP_Q_POLLING, &queue->flags); - return queue->nr_cqe; + return ret < 0 ? ret : queue->nr_cqe; } static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) -- GitLab From 6a3572e10f740acd48e2713ef37e92186a3ce5e8 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 13 Feb 2025 01:04:43 +0800 Subject: [PATCH 0986/1585] nvme-pci: clean up CMBMSC when registering CMB fails CMB decoding should get disabled when the CMB block isn't successfully registered to P2P DMA subsystem. Clean up the CMBMSC register in this error handling codepath to disable CMB decoding (and CMBLOC/CMBSZ registers). Signed-off-by: Icenowy Zheng Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 950289405ef28..218506e3dabea 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2003,6 +2003,7 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (pci_p2pdma_add_resource(pdev, bar, size, offset)) { dev_warn(dev->ctrl.device, "failed to register the CMB\n"); + hi_lo_writeq(0, dev->bar + NVME_REG_CMBMSC); return; } -- GitLab From 56cf7ef0d490b28fad8f8629fc135c5ab7c9f54e Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 13 Feb 2025 01:04:44 +0800 Subject: [PATCH 0987/1585] nvme-pci: skip CMB blocks incompatible with PCI P2P DMA The PCI P2PDMA code will register the CMB block to the memory hot-plugging subsystem, which have an alignment requirement. Memory blocks that do not satisfy this alignment requirement (usually 2MB) will lead to a WARNING from memory hotplugging. Verify the CMB block's address and size against the alignment and only try to send CMB blocks compatible with it to prevent this warning. Tested on Intel DC D4502 SSD, which has a 512K CMB block that is too small for memory hotplugging (thus PCI P2PDMA). Signed-off-by: Icenowy Zheng Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 218506e3dabea..640590b217282 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1982,6 +1982,18 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (offset > bar_size) return; + /* + * Controllers may support a CMB size larger than their BAR, for + * example, due to being behind a bridge. Reduce the CMB to the + * reported size of the BAR + */ + size = min(size, bar_size - offset); + + if (!IS_ALIGNED(size, memremap_compat_align()) || + !IS_ALIGNED(pci_resource_start(pdev, bar), + memremap_compat_align())) + return; + /* * Tell the controller about the host side address mapping the CMB, * and enable CMB decoding for the NVMe 1.4+ scheme: @@ -1992,14 +2004,6 @@ static void nvme_map_cmb(struct nvme_dev *dev) dev->bar + NVME_REG_CMBMSC); } - /* - * Controllers may support a CMB size larger than their BAR, - * for example, due to being behind a bridge. Reduce the CMB to - * the reported size of the BAR - */ - if (size > bar_size - offset) - size = bar_size - offset; - if (pci_p2pdma_add_resource(pdev, bar, size, offset)) { dev_warn(dev->ctrl.device, "failed to register the CMB\n"); -- GitLab From f5be37ca2c99fc764408ceeeaf941bea062cdc9b Mon Sep 17 00:00:00 2001 From: Andras Sebok Date: Mon, 24 Feb 2025 10:03:56 +0100 Subject: [PATCH 0988/1585] dt-bindings: input/touchscreen: imagis: add compatible for ist3038h IST3038H is a touchscreen IC which seems mostly compatible with IST3038C except that it reports a different chip ID value. Signed-off-by: Andras Sebok Link: https://lore.kernel.org/r/20250224090354.102903-4-sebokandris2009@gmail.com Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml index e24cbd9609933..bd8ede3a4ad89 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml @@ -19,6 +19,7 @@ properties: - imagis,ist3038 - imagis,ist3038b - imagis,ist3038c + - imagis,ist3038h reg: maxItems: 1 -- GitLab From 5797c04400ee117bfe459ff1e468d0ea38054ab4 Mon Sep 17 00:00:00 2001 From: Paul Fertser Date: Thu, 23 Jan 2025 15:20:02 +0300 Subject: [PATCH 0989/1585] hwmon: (peci/dimmtemp) Do not provide fake thresholds data When an Icelake or Sapphire Rapids CPU isn't providing the maximum and critical thresholds for particular DIMM the driver should return an error to the userspace instead of giving it stale (best case) or wrong (the structure contains all zeros after kzalloc() call) data. The issue can be reproduced by binding the peci driver while the host is fully booted and idle, this makes PECI interaction unreliable enough. Fixes: 73bc1b885dae ("hwmon: peci: Add dimmtemp driver") Fixes: 621995b6d795 ("hwmon: (peci/dimmtemp) Add Sapphire Rapids support") Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser Reviewed-by: Iwona Winiarska Link: https://lore.kernel.org/r/20250123122003.6010-1-fercerpav@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/peci/dimmtemp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/peci/dimmtemp.c b/drivers/hwmon/peci/dimmtemp.c index d6762259dd69c..fbe82d9852e01 100644 --- a/drivers/hwmon/peci/dimmtemp.c +++ b/drivers/hwmon/peci/dimmtemp.c @@ -127,8 +127,6 @@ static int update_thresholds(struct peci_dimmtemp *priv, int dimm_no) return 0; ret = priv->gen_info->read_thresholds(priv, dimm_order, chan_rank, &data); - if (ret == -ENODATA) /* Use default or previous value */ - return 0; if (ret) return ret; @@ -509,11 +507,11 @@ read_thresholds_icx(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd4, ®_val); if (ret || !(reg_val & BIT(31))) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd0, ®_val); if (ret) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; /* * Device 26, Offset 224e0: IMC 0 channel 0 -> rank 0 @@ -546,11 +544,11 @@ read_thresholds_spr(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u ret = peci_ep_pci_local_read(priv->peci_dev, 0, 30, 0, 2, 0xd4, ®_val); if (ret || !(reg_val & BIT(31))) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; ret = peci_ep_pci_local_read(priv->peci_dev, 0, 30, 0, 2, 0xd0, ®_val); if (ret) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; /* * Device 26, Offset 219a8: IMC 0 channel 0 -> rank 0 -- GitLab From 83a4a5a82a154277dfe61d135a445901cd6d0e6f Mon Sep 17 00:00:00 2001 From: Andras Sebok Date: Mon, 24 Feb 2025 10:03:54 +0100 Subject: [PATCH 0990/1585] Input: imagis - add support for imagis IST3038H Add support for imagis IST3038H, which seems mostly compatible with IST3038C except that it reports a different chip ID value. Tested on samsung,j5y17lte. Signed-off-by: Andras Sebok Link: https://lore.kernel.org/r/20250224090354.102903-2-sebokandris2009@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/imagis.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/touchscreen/imagis.c b/drivers/input/touchscreen/imagis.c index abeae9102323c..3c8bbe284b731 100644 --- a/drivers/input/touchscreen/imagis.c +++ b/drivers/input/touchscreen/imagis.c @@ -22,6 +22,7 @@ #define IST3032C_WHOAMI 0x32c #define IST3038C_WHOAMI 0x38c +#define IST3038H_WHOAMI 0x38d #define IST3038B_REG_CHIPID 0x30 #define IST3038B_WHOAMI 0x30380b @@ -428,11 +429,19 @@ static const struct imagis_properties imagis_3038c_data = { .protocol_b = true, }; +static const struct imagis_properties imagis_3038h_data = { + .interrupt_msg_cmd = IST3038C_REG_INTR_MESSAGE, + .touch_coord_cmd = IST3038C_REG_TOUCH_COORD, + .whoami_cmd = IST3038C_REG_CHIPID, + .whoami_val = IST3038H_WHOAMI, +}; + static const struct of_device_id imagis_of_match[] = { { .compatible = "imagis,ist3032c", .data = &imagis_3032c_data }, { .compatible = "imagis,ist3038", .data = &imagis_3038_data }, { .compatible = "imagis,ist3038b", .data = &imagis_3038b_data }, { .compatible = "imagis,ist3038c", .data = &imagis_3038c_data }, + { .compatible = "imagis,ist3038h", .data = &imagis_3038h_data }, { }, }; MODULE_DEVICE_TABLE(of, imagis_of_match); -- GitLab From 5bab1ae5a375ccde25ac9a142ea933a5d3bdf38d Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 3 Jan 2025 10:21:35 +0100 Subject: [PATCH 0991/1585] Input: goodix-berlin - fix comment referencing wrong regulator In the statement above AVDD gets enabled, and not IOVDD, so fix this copy-paste mistake. Fixes: 44362279bdd4 ("Input: add core support for Goodix Berlin Touchscreen IC") Reported-by: Jens Reidel Signed-off-by: Luca Weiss Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250103-goodix-berlin-fixes-v1-1-b014737b08b2@fairphone.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix_berlin_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c index 3fc03cf0ca23f..e273fb8edc6b9 100644 --- a/drivers/input/touchscreen/goodix_berlin_core.c +++ b/drivers/input/touchscreen/goodix_berlin_core.c @@ -263,7 +263,7 @@ static int goodix_berlin_power_on(struct goodix_berlin_core *cd) goto err_iovdd_disable; } - /* Vendor waits 15ms for IOVDD to settle */ + /* Vendor waits 15ms for AVDD to settle */ usleep_range(15000, 15100); gpiod_set_value_cansleep(cd->reset_gpio, 0); -- GitLab From 3b0011059334a1cf554c2c1f67d7a7b822d8238a Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 3 Jan 2025 10:21:36 +0100 Subject: [PATCH 0992/1585] Input: goodix-berlin - fix vddio regulator references As per dt-bindings the property is called vddio-supply, so use the correct name in the driver instead of iovdd. The datasheet also calls the supply 'VDDIO'. Fixes: 44362279bdd4 ("Input: add core support for Goodix Berlin Touchscreen IC") Cc: stable@vger.kernel.org Signed-off-by: Luca Weiss Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250103-goodix-berlin-fixes-v1-2-b014737b08b2@fairphone.com Signed-off-by: Dmitry Torokhov --- .../input/touchscreen/goodix_berlin_core.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c index e273fb8edc6b9..7f8cfdd106fae 100644 --- a/drivers/input/touchscreen/goodix_berlin_core.c +++ b/drivers/input/touchscreen/goodix_berlin_core.c @@ -165,7 +165,7 @@ struct goodix_berlin_core { struct device *dev; struct regmap *regmap; struct regulator *avdd; - struct regulator *iovdd; + struct regulator *vddio; struct gpio_desc *reset_gpio; struct touchscreen_properties props; struct goodix_berlin_fw_version fw_version; @@ -248,19 +248,19 @@ static int goodix_berlin_power_on(struct goodix_berlin_core *cd) { int error; - error = regulator_enable(cd->iovdd); + error = regulator_enable(cd->vddio); if (error) { - dev_err(cd->dev, "Failed to enable iovdd: %d\n", error); + dev_err(cd->dev, "Failed to enable vddio: %d\n", error); return error; } - /* Vendor waits 3ms for IOVDD to settle */ + /* Vendor waits 3ms for VDDIO to settle */ usleep_range(3000, 3100); error = regulator_enable(cd->avdd); if (error) { dev_err(cd->dev, "Failed to enable avdd: %d\n", error); - goto err_iovdd_disable; + goto err_vddio_disable; } /* Vendor waits 15ms for AVDD to settle */ @@ -283,8 +283,8 @@ static int goodix_berlin_power_on(struct goodix_berlin_core *cd) err_dev_reset: gpiod_set_value_cansleep(cd->reset_gpio, 1); regulator_disable(cd->avdd); -err_iovdd_disable: - regulator_disable(cd->iovdd); +err_vddio_disable: + regulator_disable(cd->vddio); return error; } @@ -292,7 +292,7 @@ static void goodix_berlin_power_off(struct goodix_berlin_core *cd) { gpiod_set_value_cansleep(cd->reset_gpio, 1); regulator_disable(cd->avdd); - regulator_disable(cd->iovdd); + regulator_disable(cd->vddio); } static int goodix_berlin_read_version(struct goodix_berlin_core *cd) @@ -744,10 +744,10 @@ int goodix_berlin_probe(struct device *dev, int irq, const struct input_id *id, return dev_err_probe(dev, PTR_ERR(cd->avdd), "Failed to request avdd regulator\n"); - cd->iovdd = devm_regulator_get(dev, "iovdd"); - if (IS_ERR(cd->iovdd)) - return dev_err_probe(dev, PTR_ERR(cd->iovdd), - "Failed to request iovdd regulator\n"); + cd->vddio = devm_regulator_get(dev, "vddio"); + if (IS_ERR(cd->vddio)) + return dev_err_probe(dev, PTR_ERR(cd->vddio), + "Failed to request vddio regulator\n"); error = goodix_berlin_power_on(cd); if (error) { -- GitLab From b5799106b44e1df594f4696500dbbc3b326bba18 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 24 Feb 2025 15:45:38 +0000 Subject: [PATCH 0993/1585] iomap: Minor code simplification in iomap_dio_bio_iter() Combine 'else' and 'if' conditional statements onto a single line and drop unrequired braces, as is standard coding style. The code had been like this since commit c3b0e880bbfa ("iomap: support REQ_OP_ZONE_APPEND"). Signed-off-by: John Garry Link: https://lore.kernel.org/r/20250224154538.548028-1-john.g.garry@oracle.com Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- fs/iomap/direct-io.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b521eb15759e8..0e47da82b0c24 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -427,12 +427,10 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, bio_put(bio); goto zero_tail; } - if (dio->flags & IOMAP_DIO_WRITE) { + if (dio->flags & IOMAP_DIO_WRITE) task_io_account_write(n); - } else { - if (dio->flags & IOMAP_DIO_DIRTY) - bio_set_pages_dirty(bio); - } + else if (dio->flags & IOMAP_DIO_DIRTY) + bio_set_pages_dirty(bio); dio->size += n; copied += n; -- GitLab From 423de5b5bc5b267586b449abd1c4fde562aa0cf9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 21 Feb 2025 17:57:11 +0100 Subject: [PATCH 0994/1585] thermal/of: Fix cdev lookup in thermal_of_should_bind() Since thermal_of_should_bind() terminates the loop after processing the first child found in cooling-maps, it will never match more than one cdev to a given trip point which is incorrect, as there may be cooling-maps associating one trip point with multiple cooling devices. Address this by letting the loop continue until either all children have been processed or a matching one has been found. To avoid adding conditionals or goto statements, put the loop in question into a separate function and make that function return right away after finding a matching cooling-maps entry. Fixes: 94c6110b0b13 ("thermal/of: Use the .should_bind() thermal zone callback") Link: https://lore.kernel.org/linux-pm/20250219-fix-thermal-of-v1-1-de36e7a590c4@chromium.org/ Reported-by: Yu-Che Cheng Signed-off-by: Rafael J. Wysocki Reviewed-by: Yu-Che Cheng Tested-by: Yu-Che Cheng Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Link: https://patch.msgid.link/2788228.mvXUDI8C0e@rjwysocki.net --- drivers/thermal/thermal_of.c | 50 +++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 5ab4ce4daaebd..5401f03d6b6c1 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -274,6 +274,34 @@ static bool thermal_of_get_cooling_spec(struct device_node *map_np, int index, return true; } +static bool thermal_of_cm_lookup(struct device_node *cm_np, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev, + struct cooling_spec *c) +{ + for_each_child_of_node_scoped(cm_np, child) { + struct device_node *tr_np; + int count, i; + + tr_np = of_parse_phandle(child, "trip", 0); + if (tr_np != trip->priv) + continue; + + /* The trip has been found, look up the cdev. */ + count = of_count_phandle_with_args(child, "cooling-device", + "#cooling-cells"); + if (count <= 0) + pr_err("Add a cooling_device property with at least one device\n"); + + for (i = 0; i < count; i++) { + if (thermal_of_get_cooling_spec(child, i, cdev, c)) + return true; + } + } + + return false; +} + static bool thermal_of_should_bind(struct thermal_zone_device *tz, const struct thermal_trip *trip, struct thermal_cooling_device *cdev, @@ -293,27 +321,7 @@ static bool thermal_of_should_bind(struct thermal_zone_device *tz, goto out; /* Look up the trip and the cdev in the cooling maps. */ - for_each_child_of_node_scoped(cm_np, child) { - struct device_node *tr_np; - int count, i; - - tr_np = of_parse_phandle(child, "trip", 0); - if (tr_np != trip->priv) - continue; - - /* The trip has been found, look up the cdev. */ - count = of_count_phandle_with_args(child, "cooling-device", "#cooling-cells"); - if (count <= 0) - pr_err("Add a cooling_device property with at least one device\n"); - - for (i = 0; i < count; i++) { - result = thermal_of_get_cooling_spec(child, i, cdev, c); - if (result) - break; - } - - break; - } + result = thermal_of_cm_lookup(cm_np, trip, cdev, c); of_node_put(cm_np); out: -- GitLab From 0cde378a10c1cbfaa8dd2b89672d42f36c2809c3 Mon Sep 17 00:00:00 2001 From: Yu-Che Cheng Date: Sat, 22 Feb 2025 11:20:34 +0800 Subject: [PATCH 0995/1585] thermal: gov_power_allocator: Update total_weight on bind and cdev updates params->total_weight is not initialized during bind and not updated when the bound cdev changes. The cooling device weight will not be used due to the uninitialized total_weight, until an update via sysfs is triggered. The bound cdevs are updated during thermal zone registration, where each cooling device will be bound to the thermal zone one by one, but power_allocator_bind() can be called without an additional cdev update when manually changing the policy of a thermal zone via sysfs. Add a new function to handle weight update logic, including updating total_weight, and call it when bind, weight changes, and cdev updates to ensure total_weight is always correct. Fixes: a3cd6db4cc2e ("thermal: gov_power_allocator: Support new update callback of weights") Signed-off-by: Yu-Che Cheng Link: https://patch.msgid.link/20250222-fix-power-allocator-weight-v2-1-a94de86b685a@chromium.org [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/gov_power_allocator.c | 30 ++++++++++++++++++++------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 3b626db55b2b9..0d9f636c80f4d 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -641,6 +641,22 @@ static int allocate_actors_buffer(struct power_allocator_params *params, return ret; } +static void power_allocator_update_weight(struct power_allocator_params *params) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; + + if (!params->trip_max) + return; + + td = trip_to_trip_desc(params->trip_max); + + params->total_weight = 0; + list_for_each_entry(instance, &td->thermal_instances, trip_node) + if (power_actor_is_valid(instance)) + params->total_weight += instance->weight; +} + static void power_allocator_update_tz(struct thermal_zone_device *tz, enum thermal_notify_event reason) { @@ -656,16 +672,12 @@ static void power_allocator_update_tz(struct thermal_zone_device *tz, if (power_actor_is_valid(instance)) num_actors++; - if (num_actors == params->num_actors) - return; + if (num_actors != params->num_actors) + allocate_actors_buffer(params, num_actors); - allocate_actors_buffer(params, num_actors); - break; + fallthrough; case THERMAL_INSTANCE_WEIGHT_CHANGED: - params->total_weight = 0; - list_for_each_entry(instance, &td->thermal_instances, trip_node) - if (power_actor_is_valid(instance)) - params->total_weight += instance->weight; + power_allocator_update_weight(params); break; default: break; @@ -731,6 +743,8 @@ static int power_allocator_bind(struct thermal_zone_device *tz) tz->governor_data = params; + power_allocator_update_weight(params); + return 0; free_params: -- GitLab From 4b90de5bc0f5a6d1151acd74c838275f9b7be3a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 15:48:52 -0800 Subject: [PATCH 0996/1585] xfs: reduce context switches for synchronous buffered I/O Currently all metadata I/O completions happen in the m_buf_workqueue workqueue. But for synchronous I/O (i.e. all buffer reads) there is no need for that, as there always is a called in process context that is waiting for the I/O. Factor out the guts of xfs_buf_ioend into a separate helper and call it from xfs_buf_iowait to avoid a double an extra context switch to the workqueue. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 15bb790359f81..dfc1849b3314b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1345,6 +1345,7 @@ xfs_buf_ioend_handle_error( resubmit: xfs_buf_ioerror(bp, 0); bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL); + reinit_completion(&bp->b_iowait); xfs_buf_submit(bp); return true; out_stale: @@ -1355,8 +1356,9 @@ xfs_buf_ioend_handle_error( return false; } -static void -xfs_buf_ioend( +/* returns false if the caller needs to resubmit the I/O, else true */ +static bool +__xfs_buf_ioend( struct xfs_buf *bp) { trace_xfs_buf_iodone(bp, _RET_IP_); @@ -1376,7 +1378,7 @@ xfs_buf_ioend( } if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp)) - return; + return false; /* clear the retry state */ bp->b_last_error = 0; @@ -1397,7 +1399,15 @@ xfs_buf_ioend( bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD | _XBF_LOGRECOVERY); + return true; +} +static void +xfs_buf_ioend( + struct xfs_buf *bp) +{ + if (!__xfs_buf_ioend(bp)) + return; if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); else @@ -1411,15 +1421,8 @@ xfs_buf_ioend_work( struct xfs_buf *bp = container_of(work, struct xfs_buf, b_ioend_work); - xfs_buf_ioend(bp); -} - -static void -xfs_buf_ioend_async( - struct xfs_buf *bp) -{ - INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); - queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); + if (__xfs_buf_ioend(bp)) + xfs_buf_relse(bp); } void @@ -1491,7 +1494,13 @@ xfs_buf_bio_end_io( XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) xfs_buf_ioerror(bp, -EIO); - xfs_buf_ioend_async(bp); + if (bp->b_flags & XBF_ASYNC) { + INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); + queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); + } else { + complete(&bp->b_iowait); + } + bio_put(bio); } @@ -1568,9 +1577,11 @@ xfs_buf_iowait( { ASSERT(!(bp->b_flags & XBF_ASYNC)); - trace_xfs_buf_iowait(bp, _RET_IP_); - wait_for_completion(&bp->b_iowait); - trace_xfs_buf_iowait_done(bp, _RET_IP_); + do { + trace_xfs_buf_iowait(bp, _RET_IP_); + wait_for_completion(&bp->b_iowait); + trace_xfs_buf_iowait_done(bp, _RET_IP_); + } while (!__xfs_buf_ioend(bp)); return bp->b_error; } -- GitLab From efc5f7a9f3d887ce44b7610bc39388094b6f97d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 15:48:53 -0800 Subject: [PATCH 0997/1585] xfs: decouple buffer readahead from the normal buffer read path xfs_buf_readahead_map is the only caller of xfs_buf_read_map and thus _xfs_buf_read that is not synchronous. Split it from xfs_buf_read_map so that the asynchronous path is self-contained and the now purely synchronous xfs_buf_read_map / _xfs_buf_read implementation can be simplified. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 41 ++++++++++++++++++++-------------------- fs/xfs/xfs_buf.h | 2 +- fs/xfs/xfs_log_recover.c | 2 +- fs/xfs/xfs_trace.h | 1 + 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index dfc1849b3314b..4ea20483d5213 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -794,18 +794,13 @@ xfs_buf_get_map( int _xfs_buf_read( - struct xfs_buf *bp, - xfs_buf_flags_t flags) + struct xfs_buf *bp) { - ASSERT(!(flags & XBF_WRITE)); ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE); - bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); - + bp->b_flags |= XBF_READ; xfs_buf_submit(bp); - if (flags & XBF_ASYNC) - return 0; return xfs_buf_iowait(bp); } @@ -857,6 +852,8 @@ xfs_buf_read_map( struct xfs_buf *bp; int error; + ASSERT(!(flags & (XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD))); + flags |= XBF_READ; *bpp = NULL; @@ -870,21 +867,11 @@ xfs_buf_read_map( /* Initiate the buffer read and wait. */ XFS_STATS_INC(target->bt_mount, xb_get_read); bp->b_ops = ops; - error = _xfs_buf_read(bp, flags); - - /* Readahead iodone already dropped the buffer, so exit. */ - if (flags & XBF_ASYNC) - return 0; + error = _xfs_buf_read(bp); } else { /* Buffer already read; all we need to do is check it. */ error = xfs_buf_reverify(bp, ops); - /* Readahead already finished; drop the buffer and exit. */ - if (flags & XBF_ASYNC) { - xfs_buf_relse(bp); - return 0; - } - /* We do not want read in the flags */ bp->b_flags &= ~XBF_READ; ASSERT(bp->b_ops != NULL || ops == NULL); @@ -936,6 +923,7 @@ xfs_buf_readahead_map( int nmaps, const struct xfs_buf_ops *ops) { + const xfs_buf_flags_t flags = XBF_READ | XBF_ASYNC | XBF_READ_AHEAD; struct xfs_buf *bp; /* @@ -945,9 +933,20 @@ xfs_buf_readahead_map( if (xfs_buftarg_is_mem(target)) return; - xfs_buf_read_map(target, map, nmaps, - XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops, - __this_address); + if (xfs_buf_get_map(target, map, nmaps, flags | XBF_TRYLOCK, &bp)) + return; + trace_xfs_buf_readahead(bp, 0, _RET_IP_); + + if (bp->b_flags & XBF_DONE) { + xfs_buf_reverify(bp, ops); + xfs_buf_relse(bp); + return; + } + XFS_STATS_INC(target->bt_mount, xb_get_read); + bp->b_ops = ops; + bp->b_flags &= ~(XBF_WRITE | XBF_DONE); + bp->b_flags |= flags; + xfs_buf_submit(bp); } /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 3b4ed42e11c01..2e747555ad3fa 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -291,7 +291,7 @@ int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, size_t numblks, xfs_buf_flags_t flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops); -int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags); +int _xfs_buf_read(struct xfs_buf *bp); void xfs_buf_hold(struct xfs_buf *bp); /* Releasing Buffers */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b3c27dbccce86..2f76531842f83 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3380,7 +3380,7 @@ xlog_do_recover( */ xfs_buf_lock(bp); xfs_buf_hold(bp); - error = _xfs_buf_read(bp, XBF_READ); + error = _xfs_buf_read(bp); if (error) { if (!xlog_is_shutdown(log)) { xfs_buf_ioerror_alert(bp, __this_address); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index b29462363b815..bfc2f12490224 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -593,6 +593,7 @@ DEFINE_EVENT(xfs_buf_flags_class, name, \ DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); +DEFINE_BUF_FLAGS_EVENT(xfs_buf_readahead); TRACE_EVENT(xfs_buf_ioerror, TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip), -- GitLab From 0d1120b9bbe48a2d119afe0dc64f9c0666745bc8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 15:48:54 -0800 Subject: [PATCH 0998/1585] xfs: remove most in-flight buffer accounting The buffer cache keeps a bt_io_count per-CPU counter to track all in-flight I/O, which is used to ensure no I/O is in flight when unmounting the file system. For most I/O we already keep track of inflight I/O at higher levels: - for synchronous I/O (xfs_buf_read/xfs_bwrite/xfs_buf_delwri_submit), the caller has a reference and waits for I/O completions using xfs_buf_iowait - for xfs_buf_delwri_submit_nowait the only caller (AIL writeback) tracks the log items that the buffer attached to This only leaves only xfs_buf_readahead_map as a submitter of asynchronous I/O that is not tracked by anything else. Replace the bt_io_count per-cpu counter with a more specific bt_readahead_count counter only tracking readahead I/O. This allows to simply increment it when submitting readahead I/O and decrementing it when it completed, and thus simplify xfs_buf_rele and remove the needed for the XBF_NO_IOACCT flags and the XFS_BSTATE_IN_FLIGHT buffer state. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 90 ++++++++------------------------------------ fs/xfs/xfs_buf.h | 5 +-- fs/xfs/xfs_buf_mem.c | 2 +- fs/xfs/xfs_mount.c | 7 +--- fs/xfs/xfs_rtalloc.c | 2 +- 5 files changed, 20 insertions(+), 86 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4ea20483d5213..e161f3ab41087 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -29,11 +29,6 @@ struct kmem_cache *xfs_buf_cache; /* * Locking orders * - * xfs_buf_ioacct_inc: - * xfs_buf_ioacct_dec: - * b_sema (caller holds) - * b_lock - * * xfs_buf_stale: * b_sema (caller holds) * b_lock @@ -81,51 +76,6 @@ xfs_buf_vmap_len( return (bp->b_page_count * PAGE_SIZE); } -/* - * Bump the I/O in flight count on the buftarg if we haven't yet done so for - * this buffer. The count is incremented once per buffer (per hold cycle) - * because the corresponding decrement is deferred to buffer release. Buffers - * can undergo I/O multiple times in a hold-release cycle and per buffer I/O - * tracking adds unnecessary overhead. This is used for sychronization purposes - * with unmount (see xfs_buftarg_drain()), so all we really need is a count of - * in-flight buffers. - * - * Buffers that are never released (e.g., superblock, iclog buffers) must set - * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count - * never reaches zero and unmount hangs indefinitely. - */ -static inline void -xfs_buf_ioacct_inc( - struct xfs_buf *bp) -{ - if (bp->b_flags & XBF_NO_IOACCT) - return; - - ASSERT(bp->b_flags & XBF_ASYNC); - spin_lock(&bp->b_lock); - if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { - bp->b_state |= XFS_BSTATE_IN_FLIGHT; - percpu_counter_inc(&bp->b_target->bt_io_count); - } - spin_unlock(&bp->b_lock); -} - -/* - * Clear the in-flight state on a buffer about to be released to the LRU or - * freed and unaccount from the buftarg. - */ -static inline void -__xfs_buf_ioacct_dec( - struct xfs_buf *bp) -{ - lockdep_assert_held(&bp->b_lock); - - if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { - bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; - percpu_counter_dec(&bp->b_target->bt_io_count); - } -} - /* * When we mark a buffer stale, we remove the buffer from the LRU and clear the * b_lru_ref count so that the buffer is freed immediately when the buffer @@ -156,8 +106,6 @@ xfs_buf_stale( * status now to preserve accounting consistency. */ spin_lock(&bp->b_lock); - __xfs_buf_ioacct_dec(bp); - atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && (list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru))) @@ -946,6 +894,7 @@ xfs_buf_readahead_map( bp->b_ops = ops; bp->b_flags &= ~(XBF_WRITE | XBF_DONE); bp->b_flags |= flags; + percpu_counter_inc(&target->bt_readahead_count); xfs_buf_submit(bp); } @@ -1002,10 +951,12 @@ xfs_buf_get_uncached( struct xfs_buf *bp; DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); + /* there are currently no valid flags for xfs_buf_get_uncached */ + ASSERT(flags == 0); + *bpp = NULL; - /* flags might contain irrelevant bits, pass only what we care about */ - error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); + error = _xfs_buf_alloc(target, &map, 1, flags, &bp); if (error) return error; @@ -1059,7 +1010,6 @@ xfs_buf_rele_uncached( spin_unlock(&bp->b_lock); return; } - __xfs_buf_ioacct_dec(bp); spin_unlock(&bp->b_lock); xfs_buf_free(bp); } @@ -1078,19 +1028,11 @@ xfs_buf_rele_cached( spin_lock(&bp->b_lock); ASSERT(bp->b_hold >= 1); if (bp->b_hold > 1) { - /* - * Drop the in-flight state if the buffer is already on the LRU - * and it holds the only reference. This is racy because we - * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT - * ensures the decrement occurs only once per-buf. - */ - if (--bp->b_hold == 1 && !list_empty(&bp->b_lru)) - __xfs_buf_ioacct_dec(bp); + bp->b_hold--; goto out_unlock; } /* we are asked to drop the last reference */ - __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU, keep the reference to the @@ -1370,6 +1312,8 @@ __xfs_buf_ioend( bp->b_ops->verify_read(bp); if (!bp->b_error) bp->b_flags |= XBF_DONE; + if (bp->b_flags & XBF_READ_AHEAD) + percpu_counter_dec(&bp->b_target->bt_readahead_count); } else { if (!bp->b_error) { bp->b_flags &= ~XBF_WRITE_FAIL; @@ -1658,9 +1602,6 @@ xfs_buf_submit( */ bp->b_error = 0; - if (bp->b_flags & XBF_ASYNC) - xfs_buf_ioacct_inc(bp); - if ((bp->b_flags & XBF_WRITE) && !xfs_buf_verify_write(bp)) { xfs_force_shutdown(bp->b_mount, SHUTDOWN_CORRUPT_INCORE); xfs_buf_ioend(bp); @@ -1786,9 +1727,8 @@ xfs_buftarg_wait( struct xfs_buftarg *btp) { /* - * First wait on the buftarg I/O count for all in-flight buffers to be - * released. This is critical as new buffers do not make the LRU until - * they are released. + * First wait for all in-flight readahead buffers to be released. This is + * critical as new buffers do not make the LRU until they are released. * * Next, flush the buffer workqueue to ensure all completion processing * has finished. Just waiting on buffer locks is not sufficient for @@ -1797,7 +1737,7 @@ xfs_buftarg_wait( * all reference counts have been dropped before we start walking the * LRU list. */ - while (percpu_counter_sum(&btp->bt_io_count)) + while (percpu_counter_sum(&btp->bt_readahead_count)) delay(100); flush_workqueue(btp->bt_mount->m_buf_workqueue); } @@ -1914,8 +1854,8 @@ xfs_destroy_buftarg( struct xfs_buftarg *btp) { shrinker_free(btp->bt_shrinker); - ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); - percpu_counter_destroy(&btp->bt_io_count); + ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0); + percpu_counter_destroy(&btp->bt_readahead_count); list_lru_destroy(&btp->bt_lru); } @@ -1969,7 +1909,7 @@ xfs_init_buftarg( if (list_lru_init(&btp->bt_lru)) return -ENOMEM; - if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) + if (percpu_counter_init(&btp->bt_readahead_count, 0, GFP_KERNEL)) goto out_destroy_lru; btp->bt_shrinker = @@ -1983,7 +1923,7 @@ xfs_init_buftarg( return 0; out_destroy_io_count: - percpu_counter_destroy(&btp->bt_io_count); + percpu_counter_destroy(&btp->bt_readahead_count); out_destroy_lru: list_lru_destroy(&btp->bt_lru); return -ENOMEM; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 2e747555ad3fa..80e06eecaf56e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -27,7 +27,6 @@ struct xfs_buf; #define XBF_READ (1u << 0) /* buffer intended for reading from device */ #define XBF_WRITE (1u << 1) /* buffer intended for writing to device */ #define XBF_READ_AHEAD (1u << 2) /* asynchronous read-ahead */ -#define XBF_NO_IOACCT (1u << 3) /* bypass I/O accounting (non-LRU bufs) */ #define XBF_ASYNC (1u << 4) /* initiator will not wait for completion */ #define XBF_DONE (1u << 5) /* all pages in the buffer uptodate */ #define XBF_STALE (1u << 6) /* buffer has been staled, do not find it */ @@ -58,7 +57,6 @@ typedef unsigned int xfs_buf_flags_t; { XBF_READ, "READ" }, \ { XBF_WRITE, "WRITE" }, \ { XBF_READ_AHEAD, "READ_AHEAD" }, \ - { XBF_NO_IOACCT, "NO_IOACCT" }, \ { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_STALE, "STALE" }, \ @@ -77,7 +75,6 @@ typedef unsigned int xfs_buf_flags_t; * Internal state flags. */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ -#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ struct xfs_buf_cache { struct rhashtable bc_hash; @@ -116,7 +113,7 @@ struct xfs_buftarg { struct shrinker *bt_shrinker; struct list_lru bt_lru; - struct percpu_counter bt_io_count; + struct percpu_counter bt_readahead_count; struct ratelimit_state bt_ioerror_rl; /* Atomic write unit values */ diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index 07bebbfb16ee1..5b64a2b3b113f 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -117,7 +117,7 @@ xmbuf_free( struct xfs_buftarg *btp) { ASSERT(xfs_buftarg_is_mem(btp)); - ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); + ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0); trace_xmbuf_free(btp); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 477c5262cf912..b69356582b86f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -181,14 +181,11 @@ xfs_readsb( /* * Allocate a (locked) buffer to hold the superblock. This will be kept - * around at all times to optimize access to the superblock. Therefore, - * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count - * elevated. + * around at all times to optimize access to the superblock. */ reread: error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), XBF_NO_IOACCT, &bp, - buf_ops); + BTOBB(sector_size), 0, &bp, buf_ops); if (error) { if (loud) xfs_warn(mp, "SB validate failed with error %d.", error); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index d8e6d073d64dc..57bef567e0116 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1407,7 +1407,7 @@ xfs_rtmount_readsb( /* m_blkbb_log is not set up yet */ error = xfs_buf_read_uncached(mp->m_rtdev_targp, XFS_RTSB_DADDR, - mp->m_sb.sb_blocksize >> BBSHIFT, XBF_NO_IOACCT, &bp, + mp->m_sb.sb_blocksize >> BBSHIFT, 0, &bp, &xfs_rtsb_buf_ops); if (error) { xfs_warn(mp, "rt sb validate failed with error %d.", error); -- GitLab From 9b47d37496e2669078c8616334e5a7200f91681a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 15:48:55 -0800 Subject: [PATCH 0999/1585] xfs: remove the XBF_STALE check from xfs_buf_rele_cached xfs_buf_stale already set b_lru_ref to 0, and thus prevents the buffer from moving to the LRU. Remove the duplicate check. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e161f3ab41087..5d560e9073f42 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -99,12 +99,6 @@ xfs_buf_stale( */ bp->b_flags &= ~_XBF_DELWRI_Q; - /* - * Once the buffer is marked stale and unlocked, a subsequent lookup - * could reset b_flags. There is no guarantee that the buffer is - * unaccounted (released to LRU) before that occurs. Drop in-flight - * status now to preserve accounting consistency. - */ spin_lock(&bp->b_lock); atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && @@ -1033,7 +1027,7 @@ xfs_buf_rele_cached( } /* we are asked to drop the last reference */ - if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { + if (atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU, keep the reference to the * buffer for the LRU and clear the (now stale) dispose list -- GitLab From de2c211868b9424f9aa9b3432c4430825bafb41b Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Sat, 22 Feb 2025 11:35:18 +0800 Subject: [PATCH 1000/1585] ipvs: Always clear ipvs_property flag in skb_scrub_packet() We found an issue when using bpf_redirect with ipvs NAT mode after commit ff70202b2d1a ("dev_forward_skb: do not scrub skb mark within the same name space"). Particularly, we use bpf_redirect to return the skb directly back to the netif it comes from, i.e., xnet is false in skb_scrub_packet(), and then ipvs_property is preserved and SNAT is skipped in the rx path. ipvs_property has been already cleared when netns is changed in commit 2b5ec1a5f973 ("netfilter/ipvs: clear ipvs_property flag when SKB net namespace changed"). This patch just clears it in spite of netns. Fixes: 2b5ec1a5f973 ("netfilter/ipvs: clear ipvs_property flag when SKB net namespace changed") Signed-off-by: Philo Lu Acked-by: Julian Anastasov Link: https://patch.msgid.link/20250222033518.126087-1-lulie@linux.alibaba.com Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7b03b64fdcb27..b1c81687e9d82 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6033,11 +6033,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->offload_fwd_mark = 0; skb->offload_l3_fwd_mark = 0; #endif + ipvs_reset(skb); if (!xnet) return; - ipvs_reset(skb); skb->mark = 0; skb_clear_tstamp(skb); } -- GitLab From bc50682128bde778a1ddc457a02d92a637c20c6f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 22 Feb 2025 12:28:04 -0500 Subject: [PATCH 1001/1585] MAINTAINERS: socket timestamping: add Jason Xing as reviewer Jason has been helping as reviewer for this area already, and has contributed various features directly, notably BPF timestamping. Also extend coverage to all timestamping tests, including those new with BPF timestamping. Link: https://lore.kernel.org/netdev/20250220072940.99994-1-kerneljasonxing@gmail.com/ Signed-off-by: Willem de Bruijn Reviewed-by: Jason Xing Link: https://patch.msgid.link/20250222172839.642079-1-willemdebruijn.kernel@gmail.com Signed-off-by: Paolo Abeni --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ac15093537c6b..c92bcd02049e6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21923,10 +21923,13 @@ F: sound/soc/uniphier/ SOCKET TIMESTAMPING M: Willem de Bruijn +R: Jason Xing S: Maintained F: Documentation/networking/timestamping.rst F: include/linux/net_tstamp.h F: include/uapi/linux/net_tstamp.h +F: tools/testing/selftests/bpf/*/net_timestamping* +F: tools/testing/selftests/net/*timestamp* F: tools/testing/selftests/net/so_txtime.c SOEKRIS NET48XX LED SUPPORT -- GitLab From 56a677293509b2a0d39ac8d02b583c1ab1fe4d94 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 25 Feb 2025 17:37:15 +0800 Subject: [PATCH 1002/1585] ASoC: SOF: Intel: don't check number of sdw links when set dmic_fixup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we assume that the PCH DMIC pins are pin-muxed with SoundWire links. However, we do see a HW design that use PCH DMIC along with 3 SoundWire links. Remove the check now. With this change the PCM DMIC will be presented if it is reported by the BIOS irrespective of whether there are SDW links present or not. Signed-off-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Link: https://patch.msgid.link/20250225093716.67240-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index be689f6e10c81..a1ccd95da8bb7 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1312,22 +1312,8 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) /* report to machine driver if any DMICs are found */ mach->mach_params.dmic_num = check_dmic_num(sdev); - if (sdw_mach_found) { - /* - * DMICs use up to 4 pins and are typically pin-muxed with SoundWire - * link 2 and 3, or link 1 and 2, thus we only try to enable dmics - * if all conditions are true: - * a) 2 or fewer links are used by SoundWire - * b) the NHLT table reports the presence of microphones - */ - if (hweight_long(mach->link_mask) <= 2) - dmic_fixup = true; - else - mach->mach_params.dmic_num = 0; - } else { - if (mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER) - dmic_fixup = true; - } + if (sdw_mach_found || mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER) + dmic_fixup = true; if (tplg_fixup && dmic_fixup && -- GitLab From 4bcef04ca67fa8c46591a211885bb9c37f25dbbd Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 25 Feb 2025 17:37:16 +0800 Subject: [PATCH 1003/1585] ASoC: Intel: sof_sdw: warn both sdw and pch dmic are used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typically, SoundWire MIC and PCH DMIC will not coexist. However, we may want to use both of them in some special cases. Add a warning to let users know that SoundWire MIC and PCH DMIC are both present and they could overwrite it with kernel params. Signed-off-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Link: https://patch.msgid.link/20250225093716.67240-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 203b07d4d833c..c13064c777261 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -803,7 +803,9 @@ static int create_sdw_dailink(struct snd_soc_card *card, int *be_id, struct snd_soc_codec_conf **codec_conf) { struct device *dev = card->dev; + struct snd_soc_acpi_mach *mach = dev_get_platdata(card->dev); struct asoc_sdw_mc_private *ctx = snd_soc_card_get_drvdata(card); + struct snd_soc_acpi_mach_params *mach_params = &mach->mach_params; struct intel_mc_ctx *intel_ctx = (struct intel_mc_ctx *)ctx->private; struct asoc_sdw_endpoint *sof_end; int stream; @@ -900,6 +902,11 @@ static int create_sdw_dailink(struct snd_soc_card *card, codecs[j].name = sof_end->codec_name; codecs[j].dai_name = sof_end->dai_info->dai_name; + if (sof_end->dai_info->dai_type == SOC_SDW_DAI_TYPE_MIC && + mach_params->dmic_num > 0) { + dev_warn(dev, + "Both SDW DMIC and PCH DMIC are present, if incorrect, please set kernel params snd_sof_intel_hda_generic dmic_num=0 to disable PCH DMIC\n"); + } j++; } -- GitLab From 88ec7eedbbd21cad38707620ad6c48a4e9a87c18 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 17 Jan 2025 07:19:11 -0800 Subject: [PATCH 1004/1585] perf/x86: Fix low freqency setting issue Perf doesn't work at low frequencies: $ perf record -e cpu_core/instructions/ppp -F 120 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cpu_core/instructions/ppp). "dmesg | grep -i perf" may provide additional information. The limit_period() check avoids a low sampling period on a counter. It doesn't intend to limit the frequency. The check in the x86_pmu_hw_config() should be limited to non-freq mode. The attr.sample_period and attr.sample_freq are union. The attr.sample_period should not be used to indicate the frequency mode. Fixes: c46e665f0377 ("perf/x86: Add INST_RETIRED.ALL workarounds") Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Reviewed-by: Ravi Bangoria Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250117151913.3043942-1-kan.liang@linux.intel.com Closes: https://lore.kernel.org/lkml/20250115154949.3147-1-ravi.bangoria@amd.com/ --- arch/x86/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8f218ac0d445c..2092d615333da 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (event->attr.sample_period && x86_pmu.limit_period) { + if (!event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) -- GitLab From 0d39844150546fa1415127c5fbae26db64070dd3 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 17 Jan 2025 07:19:12 -0800 Subject: [PATCH 1005/1585] perf/core: Fix low freq setting via IOC_PERIOD A low attr::freq value cannot be set via IOC_PERIOD on some platforms. The perf_event_check_period() introduced in: 81ec3f3c4c4d ("perf/x86: Add check_period PMU callback") was intended to check the period, rather than the frequency. A low frequency may be mistakenly rejected by limit_period(). Fix it. Fixes: 81ec3f3c4c4d ("perf/x86: Add check_period PMU callback") Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Reviewed-by: Ravi Bangoria Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250117151913.3043942-2-kan.liang@linux.intel.com Closes: https://lore.kernel.org/lkml/20250115154949.3147-1-ravi.bangoria@amd.com/ --- kernel/events/core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 086d46d096963..6364319e2f888 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5969,14 +5969,15 @@ static int _perf_event_period(struct perf_event *event, u64 value) if (!value) return -EINVAL; - if (event->attr.freq && value > sysctl_perf_event_sample_rate) - return -EINVAL; - - if (perf_event_check_period(event, value)) - return -EINVAL; - - if (!event->attr.freq && (value & (1ULL << 63))) - return -EINVAL; + if (event->attr.freq) { + if (value > sysctl_perf_event_sample_rate) + return -EINVAL; + } else { + if (perf_event_check_period(event, value)) + return -EINVAL; + if (value & (1ULL << 63)) + return -EINVAL; + } event_function_call(event, __perf_event_period, &value); -- GitLab From 5bd566703e16b17d17f4fb648440d54f8967462c Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 21 Feb 2025 13:33:52 -0800 Subject: [PATCH 1006/1585] drm/xe/oa: Allow oa_exponent value of 0 OA exponent value of 0 is a valid value for periodic reports. Allow user to pass 0 for the OA sampling interval since it gets converted to 2 gt clock ticks. v2: Update the check in xe_oa_stream_init as well (Ashutosh) v3: Fix mi-rpc failure by setting default exponent to -1 (CI) v4: Add the Fixes tag Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20250221213352.1712932-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit 30341f0b8ea71725cc4ab2c43e3a3b749892fc92) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index fa873f3d0a9d1..eb6cd91e1e226 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1689,7 +1689,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; stream->sample = param->sample; - stream->periodic = param->period_exponent > 0; + stream->periodic = param->period_exponent >= 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; stream->wait_num_reports = param->wait_num_reports; @@ -1970,6 +1970,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f } param.xef = xef; + param.period_exponent = -1; ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, ¶m); if (ret) return ret; @@ -2024,7 +2025,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - if (param.period_exponent > 0) { + if (param.period_exponent >= 0) { u64 oa_period, oa_freq_hz; /* Requesting samples from OAG buffer is a privileged operation */ -- GitLab From fe08b7d5085a9774abc30c26d5aebc5b9cdd6091 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 25 Feb 2025 13:18:42 +0000 Subject: [PATCH 1007/1585] firmware: cs_dsp: Remove async regmap writes Change calls to async regmap write functions to use the normal blocking writes so that the cs35l56 driver can use spi_bus_lock() to gain exclusive access to the SPI bus. As this is part of a fix, it makes only the minimal change to swap the functions to the blocking equivalents. There's no need to risk reworking the buffer allocation logic that is now partially redundant. The async writes are a 12-year-old workaround for inefficiency of synchronous writes in the SPI subsystem. The SPI subsystem has since been changed to avoid the overheads, so this workaround should not be necessary. The cs35l56 driver needs to use spi_bus_lock() prevent bus activity while it is soft-resetting the cs35l56. But spi_bus_lock() is incompatible with spi_async() calls, which will fail with -EBUSY. Fixes: 8a731fd37f8b ("ASoC: cs35l56: Move utility functions to shared file") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250225131843.113752-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/firmware/cirrus/cs_dsp.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index 5365e9a430007..42433c19eb308 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -1609,8 +1609,8 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, goto out_fw; } - ret = regmap_raw_write_async(regmap, reg, buf->buf, - le32_to_cpu(region->len)); + ret = regmap_raw_write(regmap, reg, buf->buf, + le32_to_cpu(region->len)); if (ret != 0) { cs_dsp_err(dsp, "%s.%d: Failed to write %d bytes at %d in %s: %d\n", @@ -1625,12 +1625,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, regions++; } - ret = regmap_async_complete(regmap); - if (ret != 0) { - cs_dsp_err(dsp, "Failed to complete async write: %d\n", ret); - goto out_fw; - } - if (pos > firmware->size) cs_dsp_warn(dsp, "%s.%d: %zu bytes at end of file\n", file, regions, pos - firmware->size); @@ -1638,7 +1632,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, cs_dsp_debugfs_save_wmfwname(dsp, file); out_fw: - regmap_async_complete(regmap); cs_dsp_buf_free(&buf_list); if (ret == -EOVERFLOW) @@ -2326,8 +2319,8 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware cs_dsp_dbg(dsp, "%s.%d: Writing %d bytes at %x\n", file, blocks, le32_to_cpu(blk->len), reg); - ret = regmap_raw_write_async(regmap, reg, buf->buf, - le32_to_cpu(blk->len)); + ret = regmap_raw_write(regmap, reg, buf->buf, + le32_to_cpu(blk->len)); if (ret != 0) { cs_dsp_err(dsp, "%s.%d: Failed to write to %x in %s: %d\n", @@ -2339,10 +2332,6 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware blocks++; } - ret = regmap_async_complete(regmap); - if (ret != 0) - cs_dsp_err(dsp, "Failed to complete async write: %d\n", ret); - if (pos > firmware->size) cs_dsp_warn(dsp, "%s.%d: %zu bytes at end of file\n", file, blocks, pos - firmware->size); @@ -2350,7 +2339,6 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware cs_dsp_debugfs_save_binname(dsp, file); out_fw: - regmap_async_complete(regmap); cs_dsp_buf_free(&buf_list); if (ret == -EOVERFLOW) @@ -2561,8 +2549,8 @@ static int cs_dsp_adsp2_enable_core(struct cs_dsp *dsp) { int ret; - ret = regmap_update_bits_async(dsp->regmap, dsp->base + ADSP2_CONTROL, - ADSP2_SYS_ENA, ADSP2_SYS_ENA); + ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_SYS_ENA, ADSP2_SYS_ENA); if (ret != 0) return ret; -- GitLab From 769c1b79295c38d60fde4c0a8f5f31e01360c54f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 25 Feb 2025 13:18:43 +0000 Subject: [PATCH 1008/1585] ASoC: cs35l56: Prevent races when soft-resetting using SPI control When SPI is used for control, the driver must hold the SPI bus lock while issuing the sequence of writes to perform a soft reset. >From the time the driver writes the SYSTEM_RESET command until the driver does a write to terminate the reset, there must not be any activity on the SPI bus lines. If there is any SPI activity during the soft-reset, another soft-reset will be triggered. The state of the SPI chip select is irrelevant. A repeated soft-reset does not in itself cause any problems, and it is not an infinite loop. The problem is a race between these resets and the driver polling for boot completion. There is a time window between soft resets where the driver could read HALO_STATE as 2 (fully booted) while the chip is actually soft-resetting. Although this window is small, it is long enough that it is possible to hit it in normal operation. To prevent this race and ensure the chip really is fully booted, the driver calls spi_bus_lock() to prevent other activity while resetting. It then issues the SYSTEM_RESET mailbox command. After allowing sufficient time for reset to take effect, the driver issues a PING mailbox command, which will force completion of the full soft-reset sequence. The SPI bus lock can then be released. The mailbox is checked for any boot or wakeup response from the firmware, before the value in HALO_STATE will be trusted. This does not affect SoundWire or I2C control. Fixes: 8a731fd37f8b ("ASoC: cs35l56: Move utility functions to shared file") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250225131843.113752-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 31 ++++++++++++ sound/pci/hda/cs35l56_hda_spi.c | 3 ++ sound/soc/codecs/cs35l56-shared.c | 80 +++++++++++++++++++++++++++++++ sound/soc/codecs/cs35l56-spi.c | 3 ++ 4 files changed, 117 insertions(+) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 3dc7a1551ac35..5d653a3491d07 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #define CS35L56_DEVID 0x0000000 @@ -61,6 +62,7 @@ #define CS35L56_IRQ1_MASK_8 0x000E0AC #define CS35L56_IRQ1_MASK_18 0x000E0D4 #define CS35L56_IRQ1_MASK_20 0x000E0DC +#define CS35L56_DSP_MBOX_1_RAW 0x0011000 #define CS35L56_DSP_VIRTUAL1_MBOX_1 0x0011020 #define CS35L56_DSP_VIRTUAL1_MBOX_2 0x0011024 #define CS35L56_DSP_VIRTUAL1_MBOX_3 0x0011028 @@ -224,6 +226,7 @@ #define CS35L56_HALO_STATE_SHUTDOWN 1 #define CS35L56_HALO_STATE_BOOT_DONE 2 +#define CS35L56_MBOX_CMD_PING 0x0A000000 #define CS35L56_MBOX_CMD_AUDIO_PLAY 0x0B000001 #define CS35L56_MBOX_CMD_AUDIO_PAUSE 0x0B000002 #define CS35L56_MBOX_CMD_AUDIO_REINIT 0x0B000003 @@ -254,6 +257,16 @@ #define CS35L56_NUM_BULK_SUPPLIES 3 #define CS35L56_NUM_DSP_REGIONS 5 +/* Additional margin for SYSTEM_RESET to control port ready on SPI */ +#define CS35L56_SPI_RESET_TO_PORT_READY_US (CS35L56_CONTROL_PORT_READY_US + 2500) + +struct cs35l56_spi_payload { + __be32 addr; + __be16 pad; + __be32 value; +} __packed; +static_assert(sizeof(struct cs35l56_spi_payload) == 10); + struct cs35l56_base { struct device *dev; struct regmap *regmap; @@ -269,6 +282,7 @@ struct cs35l56_base { s8 cal_index; struct cirrus_amp_cal_data cal_data; struct gpio_desc *reset_gpio; + struct cs35l56_spi_payload *spi_payload_buf; }; static inline bool cs35l56_is_otp_register(unsigned int reg) @@ -276,6 +290,23 @@ static inline bool cs35l56_is_otp_register(unsigned int reg) return (reg >> 16) == 3; } +static inline int cs35l56_init_config_for_spi(struct cs35l56_base *cs35l56, + struct spi_device *spi) +{ + cs35l56->spi_payload_buf = devm_kzalloc(&spi->dev, + sizeof(*cs35l56->spi_payload_buf), + GFP_KERNEL | GFP_DMA); + if (!cs35l56->spi_payload_buf) + return -ENOMEM; + + return 0; +} + +static inline bool cs35l56_is_spi(struct cs35l56_base *cs35l56) +{ + return IS_ENABLED(CONFIG_SPI_MASTER) && !!cs35l56->spi_payload_buf; +} + extern const struct regmap_config cs35l56_regmap_i2c; extern const struct regmap_config cs35l56_regmap_spi; extern const struct regmap_config cs35l56_regmap_sdw; diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c index d4ee5bb7c4866..9035784669053 100644 --- a/sound/pci/hda/cs35l56_hda_spi.c +++ b/sound/pci/hda/cs35l56_hda_spi.c @@ -22,6 +22,9 @@ static int cs35l56_hda_spi_probe(struct spi_device *spi) return -ENOMEM; cs35l56->base.dev = &spi->dev; + ret = cs35l56_init_config_for_spi(&cs35l56->base, spi); + if (ret) + return ret; #ifdef CS35L56_WAKE_HOLD_TIME_US cs35l56->base.can_hibernate = true; diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index e0ed4fc11155a..e28bfefa72f33 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -303,6 +304,79 @@ void cs35l56_wait_min_reset_pulse(void) } EXPORT_SYMBOL_NS_GPL(cs35l56_wait_min_reset_pulse, "SND_SOC_CS35L56_SHARED"); +static const struct { + u32 addr; + u32 value; +} cs35l56_spi_system_reset_stages[] = { + { .addr = CS35L56_DSP_VIRTUAL1_MBOX_1, .value = CS35L56_MBOX_CMD_SYSTEM_RESET }, + /* The next write is necessary to delimit the soft reset */ + { .addr = CS35L56_DSP_MBOX_1_RAW, .value = CS35L56_MBOX_CMD_PING }, +}; + +static void cs35l56_spi_issue_bus_locked_reset(struct cs35l56_base *cs35l56_base, + struct spi_device *spi) +{ + struct cs35l56_spi_payload *buf = cs35l56_base->spi_payload_buf; + struct spi_transfer t = { + .tx_buf = buf, + .len = sizeof(*buf), + }; + struct spi_message m; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(cs35l56_spi_system_reset_stages); i++) { + buf->addr = cpu_to_be32(cs35l56_spi_system_reset_stages[i].addr); + buf->value = cpu_to_be32(cs35l56_spi_system_reset_stages[i].value); + spi_message_init_with_transfers(&m, &t, 1); + ret = spi_sync_locked(spi, &m); + if (ret) + dev_warn(cs35l56_base->dev, "spi_sync failed: %d\n", ret); + + usleep_range(CS35L56_SPI_RESET_TO_PORT_READY_US, + 2 * CS35L56_SPI_RESET_TO_PORT_READY_US); + } +} + +static void cs35l56_spi_system_reset(struct cs35l56_base *cs35l56_base) +{ + struct spi_device *spi = to_spi_device(cs35l56_base->dev); + unsigned int val; + int read_ret, ret; + + /* + * There must not be any other SPI bus activity while the amp is + * soft-resetting. + */ + ret = spi_bus_lock(spi->controller); + if (ret) { + dev_warn(cs35l56_base->dev, "spi_bus_lock failed: %d\n", ret); + return; + } + + cs35l56_spi_issue_bus_locked_reset(cs35l56_base, spi); + spi_bus_unlock(spi->controller); + + /* + * Check firmware boot by testing for a response in MBOX_2. + * HALO_STATE cannot be trusted yet because the reset sequence + * can leave it with stale state. But MBOX is reset. + * The regmap must remain in cache-only until the chip has + * booted, so use a bypassed read. + */ + ret = read_poll_timeout(regmap_read_bypassed, read_ret, + (val > 0) && (val < 0xffffffff), + CS35L56_HALO_STATE_POLL_US, + CS35L56_HALO_STATE_TIMEOUT_US, + false, + cs35l56_base->regmap, + CS35L56_DSP_VIRTUAL1_MBOX_2, + &val); + if (ret) { + dev_err(cs35l56_base->dev, "SPI reboot timed out(%d): MBOX2=%#x\n", + read_ret, val); + } +} + static const struct reg_sequence cs35l56_system_reset_seq[] = { REG_SEQ0(CS35L56_DSP1_HALO_STATE, 0), REG_SEQ0(CS35L56_DSP_VIRTUAL1_MBOX_1, CS35L56_MBOX_CMD_SYSTEM_RESET), @@ -315,6 +389,12 @@ void cs35l56_system_reset(struct cs35l56_base *cs35l56_base, bool is_soundwire) * accesses other than the controlled system reset sequence below. */ regcache_cache_only(cs35l56_base->regmap, true); + + if (cs35l56_is_spi(cs35l56_base)) { + cs35l56_spi_system_reset(cs35l56_base); + return; + } + regmap_multi_reg_write_bypassed(cs35l56_base->regmap, cs35l56_system_reset_seq, ARRAY_SIZE(cs35l56_system_reset_seq)); diff --git a/sound/soc/codecs/cs35l56-spi.c b/sound/soc/codecs/cs35l56-spi.c index c101134e85328..ca6c03a8766d3 100644 --- a/sound/soc/codecs/cs35l56-spi.c +++ b/sound/soc/codecs/cs35l56-spi.c @@ -33,6 +33,9 @@ static int cs35l56_spi_probe(struct spi_device *spi) cs35l56->base.dev = &spi->dev; cs35l56->base.can_hibernate = true; + ret = cs35l56_init_config_for_spi(&cs35l56->base, spi); + if (ret) + return ret; ret = cs35l56_common_probe(cs35l56); if (ret != 0) -- GitLab From 8ec43c58d3be615a71548bc09148212013fb7e5f Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Thu, 19 Dec 2024 21:33:08 -0700 Subject: [PATCH 1009/1585] drm/vkms: Round fixp2int conversion in lerp_u16 fixp2int always rounds down, fixp2int_ceil rounds up. We need the new fixp2int_round. Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Reviewed-by: Louis Chauvet Link: https://patchwork.freedesktop.org/patch/msgid/20241220043410.416867-3-alex.hung@amd.com Signed-off-by: Louis Chauvet --- drivers/gpu/drm/vkms/vkms_composer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index b20ac17057262..fa269d279e257 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -67,7 +67,7 @@ static u16 lerp_u16(u16 a, u16 b, s64 t) s64 delta = drm_fixp_mul(b_fp - a_fp, t); - return drm_fixp2int(a_fp + delta); + return drm_fixp2int_round(a_fp + delta); } static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value) -- GitLab From 889c57066ceee5e9172232da0608a8ac053bb6e5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 25 Feb 2025 10:21:41 +0800 Subject: [PATCH 1010/1585] block: make segment size limit workable for > 4K PAGE_SIZE Using PAGE_SIZE as a minimum expected DMA segment size in consideration of devices which have a max DMA segment size of < 64k when used on 64k PAGE_SIZE systems leads to devices not being able to probe such as eMMC and Exynos UFS controller [0] [1] you can end up with a probe failure as follows: WARNING: CPU: 2 PID: 397 at block/blk-settings.c:339 blk_validate_limits+0x364/0x3c0 Ensure we use min(max_seg_size, seg_boundary_mask + 1) as the new min segment size when max segment size is < PAGE_SIZE for 16k and 64k base page size systems. If anyone need to backport this patch, the following commits are depended: commit 6aeb4f836480 ("block: remove bio_add_pc_page") commit 02ee5d69e3ba ("block: remove blk_rq_bio_prep") commit b7175e24d6ac ("block: add a dma mapping iterator") Link: https://lore.kernel.org/linux-block/20230612203314.17820-1-bvanassche@acm.org/ # [0] Link: https://lore.kernel.org/linux-block/1d55e942-5150-de4c-3a02-c3d066f87028@acm.org/ # [1] Cc: Yi Zhang Cc: John Garry Cc: Keith Busch Tested-by: Paul Bunyan Reviewed-by: Daniel Gomez Reviewed-by: Luis Chamberlain Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250225022141.2154581-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-merge.c | 2 +- block/blk-settings.c | 14 +++++++++++--- block/blk.h | 9 +++++++-- include/linux/blkdev.h | 1 + 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index c7c85e10cf9cb..1d1589c352976 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -329,7 +329,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, if (nsegs < lim->max_segments && bytes + bv.bv_len <= max_bytes && - bv.bv_offset + bv.bv_len <= PAGE_SIZE) { + bv.bv_offset + bv.bv_len <= lim->min_segment_size) { nsegs++; bytes += bv.bv_len; } else { diff --git a/block/blk-settings.c b/block/blk-settings.c index c44dadc35e1ec..b9c6f0ec1c499 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -246,6 +246,7 @@ int blk_validate_limits(struct queue_limits *lim) { unsigned int max_hw_sectors; unsigned int logical_block_sectors; + unsigned long seg_size; int err; /* @@ -303,7 +304,7 @@ int blk_validate_limits(struct queue_limits *lim) max_hw_sectors = min_not_zero(lim->max_hw_sectors, lim->max_dev_sectors); if (lim->max_user_sectors) { - if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE) + if (lim->max_user_sectors < BLK_MIN_SEGMENT_SIZE / SECTOR_SIZE) return -EINVAL; lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors); } else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) { @@ -341,7 +342,7 @@ int blk_validate_limits(struct queue_limits *lim) */ if (!lim->seg_boundary_mask) lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; - if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1)) + if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1)) return -EINVAL; /* @@ -362,10 +363,17 @@ int blk_validate_limits(struct queue_limits *lim) */ if (!lim->max_segment_size) lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; - if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) + if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE)) return -EINVAL; } + /* setup min segment size for building new segment in fast path */ + if (lim->seg_boundary_mask > lim->max_segment_size - 1) + seg_size = lim->max_segment_size; + else + seg_size = lim->seg_boundary_mask + 1; + lim->min_segment_size = min_t(unsigned int, seg_size, PAGE_SIZE); + /* * We require drivers to at least do logical block aligned I/O, but * historically could not check for that due to the separate calls diff --git a/block/blk.h b/block/blk.h index 90fa5f28ccabf..9cf9a0099416d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -14,6 +14,7 @@ struct elevator_type; #define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9) +#define BLK_MIN_SEGMENT_SIZE 4096 /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) @@ -358,8 +359,12 @@ struct bio *bio_split_zone_append(struct bio *bio, static inline bool bio_may_need_split(struct bio *bio, const struct queue_limits *lim) { - return lim->chunk_sectors || bio->bi_vcnt != 1 || - bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE; + if (lim->chunk_sectors) + return true; + if (bio->bi_vcnt != 1) + return true; + return bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > + lim->min_segment_size; } /** diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 248416ecd01c9..58ff5aca83b67 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -367,6 +367,7 @@ struct queue_limits { unsigned int max_sectors; unsigned int max_user_sectors; unsigned int max_segment_size; + unsigned int min_segment_size; unsigned int physical_block_size; unsigned int logical_block_size; unsigned int alignment_offset; -- GitLab From 9e7c6779e3530bbdd465214afcd13f19c33e51a2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Feb 2025 16:45:32 +0100 Subject: [PATCH 1011/1585] ALSA: hda/realtek: Fix wrong mic setup for ASUS VivoBook 15 ASUS VivoBook 15 with SSID 1043:1460 took an incorrect quirk via the pin pattern matching for ASUS (ALC256_FIXUP_ASUS_MIC), resulting in the two built-in mic pins (0x13 and 0x1b). This had worked without problems casually in the past because the right pin (0x1b) was picked up as the primary device. But since we fixed the pin enumeration for other bugs, the bogus one (0x13) is picked up as the primary device, hence the bug surfaced now. For addressing the regression, this patch explicitly specifies the quirk entry with ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, which sets up only the headset mic pin. Fixes: 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219807 Link: https://patch.msgid.link/20250225154540.13543-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 224616fbec4fa..e5c80d4be535c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10623,6 +10623,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x1460, "Asus VivoBook 15", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1463, "Asus GA402X/GA402N", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1473, "ASUS GU604VI/VC/VE/VG/VJ/VQ/VU/VV/VY/VZ", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603VQ/VU/VV/VJ/VI", ALC285_FIXUP_ASUS_HEADSET_MIC), -- GitLab From 6ebf05189dfc6d0d597c99a6448a4d1064439a18 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 25 Feb 2025 15:59:02 +0000 Subject: [PATCH 1012/1585] io_uring/net: save msg_control for compat Match the compat part of io_sendmsg_copy_hdr() with its counterpart and save msg_control. Fixes: c55978024d123 ("io_uring/net: move receive multishot out of the generic msghdr path") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/2a8418821fe83d3b64350ad2b3c0303e9b732bbd.1740498502.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 17852a6616ffe..5d0b56ff50eed 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -322,7 +322,9 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, if (unlikely(ret)) return ret; - return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + ret = __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + sr->msg_control = iomsg->msg.msg_control_user; + return ret; } #endif -- GitLab From 709329c48214ad2acf12eed1b5c0eb798e40a64c Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 24 Feb 2025 22:59:34 -0800 Subject: [PATCH 1013/1585] Input: xpad - add support for ZOTAC Gaming Zone ZOTAC Gaming Zone is ZOTAC's 2024 handheld release. As it is common with these handhelds, it uses a hybrid USB device with an xpad endpoint, a keyboard endpoint, and a vendor-specific endpoint for RGB control et al. Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250222170010.188761-2-lkml@antheas.dev Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 16493235bf9ee..29d7e6eb51cf0 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -349,6 +349,7 @@ static const struct xpad_device { { 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 }, + { 0x1ee9, 0x1590, "ZOTAC Gaming Zone", 0, XTYPE_XBOX360 }, { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, @@ -538,6 +539,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1a86), /* QH Electronics */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ + XPAD_XBOX360_VENDOR(0x1ee9), /* ZOTAC Technology Limited */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2345), /* Machenike Controllers */ -- GitLab From 95a54a96f657fd069d2a9922b6c2d293a72a001f Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 24 Feb 2025 23:00:29 -0800 Subject: [PATCH 1014/1585] Input: xpad - add support for TECNO Pocket Go TECNO Pocket Go is a kickstarter handheld by manufacturer TECNO Mobile. It poses a unique feature: it does not have a display. Instead, the handheld is essentially a pc in a controller. As customary, it has an xpad endpoint, a keyboard endpoint, and a vendor endpoint for its vendor software. Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250222170010.188761-3-lkml@antheas.dev Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 29d7e6eb51cf0..79e1c8f1f99c2 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -382,6 +382,7 @@ static const struct xpad_device { { 0x2563, 0x058d, "OneXPlayer Gamepad", 0, XTYPE_XBOX360 }, { 0x294b, 0x3303, "Snakebyte GAMEPAD BASE X", 0, XTYPE_XBOXONE }, { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, + { 0x2993, 0x2001, "TECNO Pocket Go", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, @@ -548,6 +549,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x2563), /* OneXPlayer Gamepad */ XPAD_XBOX360_VENDOR(0x260d), /* Dareu H101 */ XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */ + XPAD_XBOX360_VENDOR(0x2993), /* TECNO Mobile */ XPAD_XBOX360_VENDOR(0x2c22), /* Qanba Controllers */ XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Controllers */ XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Controllers */ -- GitLab From 659a7614dd72e2835ac0b220c2fa68fabd8d1df9 Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Mon, 24 Feb 2025 23:01:55 -0800 Subject: [PATCH 1015/1585] Input: xpad - rename QH controller to Legion Go S The QH controller is actually the controller of the Legion Go S, with the manufacturer string wch.cn and product name Legion Go S in its USB descriptor. A cursory lookup of the VID reveals the same. Therefore, rename the xpad entries to match. Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250222170010.188761-4-lkml@antheas.dev Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 79e1c8f1f99c2..c33e6f33265ba 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -312,7 +312,7 @@ static const struct xpad_device { { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, - { 0x1a86, 0xe310, "QH Electronics Controller", 0, XTYPE_XBOX360 }, + { 0x1a86, 0xe310, "Legion Go S", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0x0130, "Ion Drum Rocker", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -538,7 +538,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ - XPAD_XBOX360_VENDOR(0x1a86), /* QH Electronics */ + XPAD_XBOX360_VENDOR(0x1a86), /* Nanjing Qinheng Microelectronics (WCH) */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x1ee9), /* ZOTAC Technology Limited */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ -- GitLab From 68283c1cb573143c0b7515e93206f3503616bc10 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Fri, 7 Feb 2025 21:02:41 +0100 Subject: [PATCH 1016/1585] pinctrl: bcm281xx: Fix incorrect regmap max_registers value The max_registers value does not take into consideration the stride; currently, it's set to the number of the last pin, but this does not accurately represent the final register. Fix this by multiplying the current value by 4. Fixes: 54b1aa5a5b16 ("ARM: pinctrl: Add Broadcom Capri pinctrl driver") Signed-off-by: Artur Weber Link: https://lore.kernel.org/20250207-bcm21664-pinctrl-v1-2-e7cfac9b2d3b@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-bcm281xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c index 73dbf29c002f3..cf6efa9c0364a 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c @@ -974,7 +974,7 @@ static const struct regmap_config bcm281xx_pinctrl_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = BCM281XX_PIN_VC_CAM3_SDA, + .max_register = BCM281XX_PIN_VC_CAM3_SDA * 4, }; static int bcm281xx_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) -- GitLab From acf40ab42799e4ae1397ee6f5c5941092d66f999 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Wed, 12 Feb 2025 18:05:32 +0800 Subject: [PATCH 1017/1585] pinctrl: nuvoton: npcm8xx: Add NULL check in npcm8xx_gpio_fw devm_kasprintf() calls can return null pointers on failure. But the return values were not checked in npcm8xx_gpio_fw(). Add NULL check in npcm8xx_gpio_fw(), to handle kernel NULL pointer dereference error. Fixes: acf4884a5717 ("pinctrl: nuvoton: add NPCM8XX pinctrl and GPIO driver") Signed-off-by: Charles Han Link: https://lore.kernel.org/20250212100532.4317-1-hanchunchao@inspur.com Signed-off-by: Linus Walleij --- drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c index 471f644c5eef2..d09a5e9b2eca5 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c @@ -2374,6 +2374,9 @@ static int npcm8xx_gpio_fw(struct npcm8xx_pinctrl *pctrl) pctrl->gpio_bank[id].gc.parent = dev; pctrl->gpio_bank[id].gc.fwnode = child; pctrl->gpio_bank[id].gc.label = devm_kasprintf(dev, GFP_KERNEL, "%pfw", child); + if (pctrl->gpio_bank[id].gc.label == NULL) + return -ENOMEM; + pctrl->gpio_bank[id].gc.dbg_show = npcmgpio_dbg_show; pctrl->gpio_bank[id].direction_input = pctrl->gpio_bank[id].gc.direction_input; pctrl->gpio_bank[id].gc.direction_input = npcmgpio_direction_input; -- GitLab From 7ff4faba63571c51004280f7eb5d6362b15ec61f Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Tue, 18 Feb 2025 08:31:44 +0800 Subject: [PATCH 1018/1585] pinctrl: spacemit: enable config option Pinctrl is an essential driver for SpacemiT's SoC, The uart driver requires it, same as sd card driver, so let's enable it by default for this SoC. The CONFIG_PINCTRL_SPACEMIT_K1 isn't enabled when using 'make defconfig' to select kernel configuration options. This result in a broken uart driver where fail at probe() stage due to no pins found. Fixes: a83c29e1d145 ("pinctrl: spacemit: add support for SpacemiT K1 SoC") Reported-by: Alex Elder Acked-by: Conor Dooley Tested-by: Alex Elder Signed-off-by: Yixun Lan Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Link: https://lore.kernel.org/20250218-k1-pinctrl-option-v3-1-36e031e0da1b@gentoo.org Signed-off-by: Linus Walleij --- arch/riscv/Kconfig.socs | 1 + drivers/pinctrl/spacemit/Kconfig | 3 ++- drivers/pinctrl/spacemit/pinctrl-k1.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 1916cf7ba450e..17606940bb523 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -26,6 +26,7 @@ config ARCH_SOPHGO config ARCH_SPACEMIT bool "SpacemiT SoCs" + select PINCTRL help This enables support for SpacemiT SoC platform hardware. diff --git a/drivers/pinctrl/spacemit/Kconfig b/drivers/pinctrl/spacemit/Kconfig index 168f8a5ffbb95..a2f98b3f8a755 100644 --- a/drivers/pinctrl/spacemit/Kconfig +++ b/drivers/pinctrl/spacemit/Kconfig @@ -4,9 +4,10 @@ # config PINCTRL_SPACEMIT_K1 - tristate "SpacemiT K1 SoC Pinctrl driver" + bool "SpacemiT K1 SoC Pinctrl driver" depends on ARCH_SPACEMIT || COMPILE_TEST depends on OF + default y select GENERIC_PINCTRL_GROUPS select GENERIC_PINMUX_FUNCTIONS select GENERIC_PINCONF diff --git a/drivers/pinctrl/spacemit/pinctrl-k1.c b/drivers/pinctrl/spacemit/pinctrl-k1.c index a32579d736130..59fd555ff38d4 100644 --- a/drivers/pinctrl/spacemit/pinctrl-k1.c +++ b/drivers/pinctrl/spacemit/pinctrl-k1.c @@ -1044,7 +1044,7 @@ static struct platform_driver k1_pinctrl_driver = { .of_match_table = k1_pinctrl_ids, }, }; -module_platform_driver(k1_pinctrl_driver); +builtin_platform_driver(k1_pinctrl_driver); MODULE_AUTHOR("Yixun Lan "); MODULE_DESCRIPTION("Pinctrl driver for the SpacemiT K1 SoC"); -- GitLab From 00817f0f1c45b007965f5676b9a2013bb39c7228 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 24 Feb 2025 17:13:30 -0800 Subject: [PATCH 1019/1585] nvme-ioctl: fix leaked requests on mapping error All the callers assume nvme_map_user_request() frees the request on a failure. This wasn't happening on invalid metadata or io_uring command flags, so we've been leaking those requests. Fixes: 23fd22e55b767b ("nvme: wire up fixed buffer support for nvme passthrough") Fixes: 7c2fd76048e95d ("nvme: fix metadata handling in nvme-passthrough") Reviewed-by: Damien Le Moal Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index b1b46c2713e1c..24e2c702da7a2 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -128,8 +128,10 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, if (!nvme_ctrl_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked data buffer\n"); if (has_metadata) { - if (!supports_metadata) - return -EINVAL; + if (!supports_metadata) { + ret = -EINVAL; + goto out; + } if (!nvme_ctrl_meta_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked metadata buffer\n"); @@ -139,8 +141,10 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct iov_iter iter; /* fixedbufs is only for non-vectored io */ - if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) - return -EINVAL; + if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) { + ret = -EINVAL; + goto out; + } ret = io_uring_cmd_import_fixed(ubuffer, bufflen, rq_data_dir(req), &iter, ioucmd); if (ret < 0) -- GitLab From 91dcc66b34beb72dde8412421bdc1b4cd40e4fb8 Mon Sep 17 00:00:00 2001 From: "chr[]" Date: Wed, 12 Feb 2025 16:51:38 +0100 Subject: [PATCH 1020/1585] amdgpu/pm/legacy: fix suspend/resume issues resume and irq handler happily races in set_power_state() * amdgpu_legacy_dpm_compute_clocks() needs lock * protect irq work handler * fix dpm_enabled usage v2: fix clang build, integrate Lijo's comments (Alex) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2524 Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c") Reviewed-by: Lijo Lazar Tested-by: Maciej S. Szmigiero # on Oland PRO Signed-off-by: chr[] Signed-off-by: Alex Deucher (cherry picked from commit ee3dc9e204d271c9c7a8d4d38a0bce4745d33e71) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 25 +++++++++++++----- .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 8 ++++-- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 26 ++++++++++++++----- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 67a8e22b1126d..e237ea1185a71 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -3042,6 +3042,7 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_dpm) return 0; + mutex_lock(&adev->pm.mutex); kv_dpm_setup_asic(adev); ret = kv_dpm_enable(adev); if (ret) @@ -3049,6 +3050,8 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block) else adev->pm.dpm_enabled = true; amdgpu_legacy_dpm_compute_clocks(adev); + mutex_unlock(&adev->pm.mutex); + return ret; } @@ -3066,32 +3069,42 @@ static int kv_dpm_suspend(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + cancel_work_sync(&adev->pm.dpm.thermal.work); + if (adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm_enabled = false; /* disable dpm */ kv_dpm_disable(adev); /* reset the power state */ adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; + mutex_unlock(&adev->pm.mutex); } return 0; } static int kv_dpm_resume(struct amdgpu_ip_block *ip_block) { - int ret; + int ret = 0; struct amdgpu_device *adev = ip_block->adev; - if (adev->pm.dpm_enabled) { + if (!amdgpu_dpm) + return 0; + + if (!adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); /* asic init will reset to the boot state */ kv_dpm_setup_asic(adev); ret = kv_dpm_enable(adev); - if (ret) + if (ret) { adev->pm.dpm_enabled = false; - else + } else { adev->pm.dpm_enabled = true; - if (adev->pm.dpm_enabled) amdgpu_legacy_dpm_compute_clocks(adev); + } + mutex_unlock(&adev->pm.mutex); } - return 0; + return ret; } static bool kv_dpm_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index e861355ebd75b..c7518b13e7879 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -1009,9 +1009,12 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL; int temp, size = sizeof(temp); - if (!adev->pm.dpm_enabled) - return; + mutex_lock(&adev->pm.mutex); + if (!adev->pm.dpm_enabled) { + mutex_unlock(&adev->pm.mutex); + return; + } if (!pp_funcs->read_sensor(adev->powerplay.pp_handle, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&temp, @@ -1033,4 +1036,5 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) adev->pm.dpm.state = dpm_state; amdgpu_legacy_dpm_compute_clocks(adev->powerplay.pp_handle); + mutex_unlock(&adev->pm.mutex); } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index a87dcf0974bc1..d6dfe2599ebea 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7786,6 +7786,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_dpm) return 0; + mutex_lock(&adev->pm.mutex); si_dpm_setup_asic(adev); ret = si_dpm_enable(adev); if (ret) @@ -7793,6 +7794,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block) else adev->pm.dpm_enabled = true; amdgpu_legacy_dpm_compute_clocks(adev); + mutex_unlock(&adev->pm.mutex); return ret; } @@ -7810,32 +7812,44 @@ static int si_dpm_suspend(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + cancel_work_sync(&adev->pm.dpm.thermal.work); + if (adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm_enabled = false; /* disable dpm */ si_dpm_disable(adev); /* reset the power state */ adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; + mutex_unlock(&adev->pm.mutex); } + return 0; } static int si_dpm_resume(struct amdgpu_ip_block *ip_block) { - int ret; + int ret = 0; struct amdgpu_device *adev = ip_block->adev; - if (adev->pm.dpm_enabled) { + if (!amdgpu_dpm) + return 0; + + if (!adev->pm.dpm_enabled) { /* asic init will reset to the boot state */ + mutex_lock(&adev->pm.mutex); si_dpm_setup_asic(adev); ret = si_dpm_enable(adev); - if (ret) + if (ret) { adev->pm.dpm_enabled = false; - else + } else { adev->pm.dpm_enabled = true; - if (adev->pm.dpm_enabled) amdgpu_legacy_dpm_compute_clocks(adev); + } + mutex_unlock(&adev->pm.mutex); } - return 0; + + return ret; } static bool si_dpm_is_idle(void *handle) -- GitLab From 3502ab5022bb5ef1edd063bdb6465a8bf3b46e66 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Wed, 19 Feb 2025 17:34:38 -0500 Subject: [PATCH 1021/1585] drm/amdkfd: Preserve cp_hqd_pq_control on update_mqd When userspace applications call AMDKFD_IOC_UPDATE_QUEUE. Preserve bitfields that do not need to be modified as they contain flags to track queue states that are used by CP FW. Signed-off-by: David Yat Sin Reviewed-by: Jay Cornwall Signed-off-by: Alex Deucher (cherry picked from commit 8150827990b709ab5a40c46c30d21b7f7b9e9440) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 6 ++++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 5 ++++- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 2eff37aaf8273..1695dd78ede8e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -107,6 +107,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -167,10 +169,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 68dbc0399c87a..3c0ae28c5923b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -154,6 +154,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -221,10 +223,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index 2b72d5b4949b6..565858b9044d4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -121,6 +121,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -184,10 +186,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index ff417d5361c42..3014925d95ffc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -183,6 +183,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -245,7 +248,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); -- GitLab From 099bffc7cadff40bfab1517c3461c53a7a38a0d7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 17 Feb 2025 10:55:05 -0500 Subject: [PATCH 1022/1585] drm/amdgpu: disable BAR resize on Dell G5 SE There was a quirk added to add a workaround for a Sapphire RX 5600 XT Pulse that didn't allow BAR resizing. However, the quirk caused a regression with runtime pm on Dell laptops using those chips, rather than narrowing the scope of the resizing quirk, add a quirk to prevent amdgpu from resizing the BAR on those Dell platforms unless runtime pm is disabled. v2: update commit message, add runpm check Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1707 Fixes: 907830b0fc9e ("PCI: Add a REBAR size quirk for Sapphire RX 5600 XT Pulse") Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 5235053f443cef4210606e5fb71f99b915a9723d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d100bb7a137cd..018dfccd771ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1638,6 +1638,13 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ + if ((amdgpu_runtime_pm != 0) && + adev->pdev->vendor == PCI_VENDOR_ID_ATI && + adev->pdev->device == 0x731f && + adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) + return 0; + /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) DRM_WARN("System can't access extended configuration space, please check!!\n"); -- GitLab From 8005351c7d53c31fb7eb5a423da7ab4bc3ad7639 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Feb 2025 15:38:20 -0500 Subject: [PATCH 1023/1585] MAINTAINERS: update amdgpu maintainers list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xinhui's email is no longer valid. Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit c19390ca9094dfcbc16d96b233a409c01e21d85b) Cc: stable@vger.kernel.org --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4ff26fa94895d..d6ea828345fdb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19657,7 +19657,6 @@ F: drivers/net/wireless/quantenna RADEON and AMDGPU DRM DRIVERS M: Alex Deucher M: Christian König -M: Xinhui Pan L: amd-gfx@lists.freedesktop.org S: Supported B: https://gitlab.freedesktop.org/drm/amd/-/issues -- GitLab From e7ea88207cef513514e706aacc534527ac88b9b8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 13 Feb 2025 13:37:01 -0500 Subject: [PATCH 1024/1585] drm/amdgpu/gfx: only call mes for enforce isolation if supported This should not be called on chips without MES so check if MES is enabled and if the cleaner shader is supported. Fixes: 8521e3c5f058 ("drm/amd/amdgpu: limit single process inside MES") Reviewed-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher Cc: Shaoyun Liu Cc: Srinivasan Shanmugam (cherry picked from commit 80513e389765c8f9543b26d8fa4bbdf0e59ff8bc) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 784b03abb3a43..c6aff3ddb42d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1643,11 +1643,13 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, if (adev->enforce_isolation[i] && !partition_values[i]) { /* Going from enabled to disabled */ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, false); + if (adev->enable_mes && adev->gfx.enable_cleaner_shader) + amdgpu_mes_set_enforce_isolation(adev, i, false); } else if (!adev->enforce_isolation[i] && partition_values[i]) { /* Going from disabled to enabled */ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, true); + if (adev->enable_mes && adev->gfx.enable_cleaner_shader) + amdgpu_mes_set_enforce_isolation(adev, i, true); } adev->enforce_isolation[i] = partition_values[i]; } -- GitLab From 748a1f51bb74453f1fe22d3ca68a717cb31f02e5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 14 Feb 2025 12:32:30 -0500 Subject: [PATCH 1025/1585] drm/amdgpu/mes: keep enforce isolation up to date Re-send the mes message on resume to make sure the mes state is up to date. Fixes: 8521e3c5f058 ("drm/amd/amdgpu: limit single process inside MES") Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher Cc: Shaoyun Liu Cc: Srinivasan Shanmugam Signed-off-by: Alex Deucher (cherry picked from commit 27b791514789844e80da990c456c2465325e0851) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 13 ++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 20 +++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 4 ++++ 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c6aff3ddb42d7..c1f35ded684e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1638,24 +1638,19 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, } mutex_lock(&adev->enforce_isolation_mutex); - for (i = 0; i < num_partitions; i++) { - if (adev->enforce_isolation[i] && !partition_values[i]) { + if (adev->enforce_isolation[i] && !partition_values[i]) /* Going from enabled to disabled */ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); - if (adev->enable_mes && adev->gfx.enable_cleaner_shader) - amdgpu_mes_set_enforce_isolation(adev, i, false); - } else if (!adev->enforce_isolation[i] && partition_values[i]) { + else if (!adev->enforce_isolation[i] && partition_values[i]) /* Going from disabled to enabled */ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); - if (adev->enable_mes && adev->gfx.enable_cleaner_shader) - amdgpu_mes_set_enforce_isolation(adev, i, true); - } adev->enforce_isolation[i] = partition_values[i]; } - mutex_unlock(&adev->enforce_isolation_mutex); + amdgpu_mes_update_enforce_isolation(adev); + return count; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 32b27a1658e78..709c11cbeabd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1681,7 +1681,8 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) } /* Fix me -- node_id is used to identify the correct MES instances in the future */ -int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable) +static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, + uint32_t node_id, bool enable) { struct mes_misc_op_input op_input = {0}; int r; @@ -1703,6 +1704,23 @@ int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_i return r; } +int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) +{ + int i, r = 0; + + if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { + mutex_lock(&adev->enforce_isolation_mutex); + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + if (adev->enforce_isolation[i]) + r |= amdgpu_mes_set_enforce_isolation(adev, i, true); + else + r |= amdgpu_mes_set_enforce_isolation(adev, i, false); + } + mutex_unlock(&adev->enforce_isolation_mutex); + } + return r; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2df2444ee892c..e98ea7ede1bab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -534,6 +534,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); -int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable); +int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev); #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 65f389eb65e5f..f9a4d08eef925 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1633,6 +1633,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) goto failure; } + r = amdgpu_mes_update_enforce_isolation(adev); + if (r) + goto failure; + out: /* * Disable KIQ ring usage from the driver once MES is enabled. diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 901e924e69ad9..0fd0fa6ed5184 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1743,6 +1743,10 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) goto failure; } + r = amdgpu_mes_update_enforce_isolation(adev); + if (r) + goto failure; + out: /* * Disable KIQ ring usage from the driver once MES is enabled. -- GitLab From 733d675c2a436b416107893db87eb182585c1b39 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 19 Feb 2025 11:46:19 -0700 Subject: [PATCH 1026/1585] MAINTAINERS: Change my role from Maintainer to Reviewer Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher (cherry picked from commit 9b3ef540397cfc356f10f504841b2e9d16e31286) Cc: stable@vger.kernel.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d6ea828345fdb..f3fdc43bdd497 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1046,7 +1046,7 @@ F: drivers/crypto/ccp/hsti.* AMD DISPLAY CORE M: Harry Wentland M: Leo Li -M: Rodrigo Siqueira +R: Rodrigo Siqueira L: amd-gfx@lists.freedesktop.org S: Supported T: git https://gitlab.freedesktop.org/agd5f/linux.git -- GitLab From 96989f3dca6f51f202b6dbc92c37e17df6ca12f4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 19 Feb 2025 11:46:20 -0700 Subject: [PATCH 1027/1585] mailmap: Add entry for Rodrigo Siqueira Map all of my previously used email addresses to my @igalia.com address. Acked-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher (cherry picked from commit 289387d0dbf806bd59063ab93d94f48cd4c75c7c) Cc: stable@vger.kernel.org --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index a897c16d3baef..ec18b01f0be2a 100644 --- a/.mailmap +++ b/.mailmap @@ -613,6 +613,8 @@ Richard Leitner Richard Leitner Robert Foss Rocky Liao +Rodrigo Siqueira +Rodrigo Siqueira Roman Gushchin Roman Gushchin Roman Gushchin -- GitLab From 12f3b92d1cfa5526715fff93a6d6fe29300d5e2a Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Sat, 15 Feb 2025 18:15:47 -0300 Subject: [PATCH 1028/1585] drm/amd/display: restore edid reading from a given i2c adapter When switching to drm_edid, we slightly changed how to get edid by removing the possibility of getting them from dc_link when in aux transaction mode. As MST doesn't initialize the connector with `drm_connector_init_with_ddc()`, restore the original behavior to avoid functional changes. v2: - Fix build warning of unchecked dereference (kernel test bot) CC: Alex Hung CC: Mario Limonciello CC: Roman Li CC: Aurabindo Pillai Fixes: 48edb2a4256e ("drm/amd/display: switch amdgpu_dm_connector to use struct drm_edid") Reviewed-by: Alex Hung Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher (cherry picked from commit 81262b1656feb3813e3d917ab78824df6831e69e) --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ac3fd81fecef2..5ddd21466e22f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7240,8 +7240,14 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) struct dc_link *dc_link = aconnector->dc_link; struct dc_sink *dc_em_sink = aconnector->dc_em_sink; const struct drm_edid *drm_edid; + struct i2c_adapter *ddc; - drm_edid = drm_edid_read(connector); + if (dc_link && dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; + + drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); @@ -7286,14 +7292,21 @@ static int get_modes(struct drm_connector *connector) static void create_eml_sink(struct amdgpu_dm_connector *aconnector) { struct drm_connector *connector = &aconnector->base; + struct dc_link *dc_link = aconnector->dc_link; struct dc_sink_init_data init_params = { .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_VIRTUAL }; const struct drm_edid *drm_edid; const struct edid *edid; + struct i2c_adapter *ddc; + + if (dc_link && dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; - drm_edid = drm_edid_read(connector); + drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); -- GitLab From a04bf34e0829f2c5d5f1ea7317daae2efa560fd1 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 21 Feb 2025 14:19:12 -0500 Subject: [PATCH 1029/1585] MAINTAINERS: Update AMDGPU DML maintainers info Chaitanya is no longer with AMD, and the responsibility has been taken over by Austin. Signed-off-by: Aurabindo Pillai Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher (cherry picked from commit a101fa705d016d46463dd4ce488671369c922bc2) Cc: stable@vger.kernel.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f3fdc43bdd497..95bce73f3ef44 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1053,7 +1053,7 @@ T: git https://gitlab.freedesktop.org/agd5f/linux.git F: drivers/gpu/drm/amd/display/ AMD DISPLAY CORE - DML -M: Chaitanya Dhere +M: Austin Zheng M: Jun Lei S: Supported F: drivers/gpu/drm/amd/display/dc/dml/ -- GitLab From e8863f8b0316d8ee1e7e5291e8f2f72c91ac967d Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Thu, 6 Feb 2025 11:31:23 +0800 Subject: [PATCH 1030/1585] drm/amd/display: Disable PSR-SU on eDP panels [Why] PSR-SU may cause some glitching randomly on several panels. [How] Temporarily disable the PSR-SU and fallback to PSR1 for all eDP panels. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3388 Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 6deeefb820d0efb0b36753622fb982d03b37b3ad) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 45858bf1523d8..e140b7a04d724 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -54,7 +54,8 @@ static bool link_supports_psrsu(struct dc_link *link) if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU) return false; - return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub); + /* Temporarily disable PSR-SU to avoid glitches */ + return false; } /* -- GitLab From b5f7242e49b927cfe488b369fa552f2eff579ef1 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Fri, 7 Feb 2025 15:26:19 -0500 Subject: [PATCH 1031/1585] drm/amd/display: add a quirk to enable eDP0 on DP1 [why] some board designs have eDP0 connected to DP1, need a way to enable support_edp0_on_dp1 flag, otherwise edp related features cannot work [how] do a dmi check during dm initialization to identify systems that require support_edp0_on_dp1. Optimize quirk table with callback functions to set quirk entries, retrieve_dmi_info can set quirks according to quirk entries Cc: Mario Limonciello Reviewed-by: Mario Limonciello Reviewed-by: Nicholas Kazlauskas Signed-off-by: Yilin Chen Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit f6d17270d18a6a6753fff046330483d43f8405e4) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 69 +++++++++++++++++-- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5ddd21466e22f..9d9645a2d18ef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1618,75 +1618,130 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } -static const struct dmi_system_id hpd_disconnect_quirk_table[] = { +struct amdgpu_dm_quirks { + bool aux_hpd_discon; + bool support_edp0_on_dp1; +}; + +static struct amdgpu_dm_quirks quirk_entries = { + .aux_hpd_discon = false, + .support_edp0_on_dp1 = false +}; + +static int edp0_on_dp1_callback(const struct dmi_system_id *id) +{ + quirk_entries.support_edp0_on_dp1 = true; + return 0; +} + +static int aux_hpd_discon_callback(const struct dmi_system_id *id) +{ + quirk_entries.aux_hpd_discon = true; + return 0; +} + +static const struct dmi_system_id dmi_quirk_table[] = { { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), }, }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"), + }, + }, {} /* TODO: refactor this from a fixed table to a dynamic option */ }; -static void retrieve_dmi_info(struct amdgpu_display_manager *dm) +static void retrieve_dmi_info(struct amdgpu_display_manager *dm, struct dc_init_data *init_data) { - const struct dmi_system_id *dmi_id; + int dmi_id; + struct drm_device *dev = dm->ddev; dm->aux_hpd_discon_quirk = false; + init_data->flags.support_edp0_on_dp1 = false; + + dmi_id = dmi_check_system(dmi_quirk_table); - dmi_id = dmi_first_match(hpd_disconnect_quirk_table); - if (dmi_id) { + if (!dmi_id) + return; + + if (quirk_entries.aux_hpd_discon) { dm->aux_hpd_discon_quirk = true; - DRM_INFO("aux_hpd_discon_quirk attached\n"); + drm_info(dev, "aux_hpd_discon_quirk attached\n"); + } + if (quirk_entries.support_edp0_on_dp1) { + init_data->flags.support_edp0_on_dp1 = true; + drm_info(dev, "aux_hpd_discon_quirk attached\n"); } } @@ -1994,7 +2049,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; - retrieve_dmi_info(&adev->dm); + retrieve_dmi_info(&adev->dm, &init_data); if (adev->dm.bb_from_dmub) init_data.bb_from_dmub = adev->dm.bb_from_dmub; -- GitLab From 4de141b8b1b7991b607f77e5f4580e1c67c24717 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 12 Feb 2025 14:49:36 -0500 Subject: [PATCH 1032/1585] drm/amd/display: Fix HPD after gpu reset [Why] DC is not using amdgpu_irq_get/put to manage the HPD interrupt refcounts. So when amdgpu_irq_gpu_reset_resume_helper() reprograms all of the IRQs, HPD gets disabled. [How] Use amdgpu_irq_get/put() for HPD init/fini in DM in order to sync refcounts Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Mario Limonciello Reviewed-by: Aurabindo Pillai Signed-off-by: Roman Li Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit f3dde2ff7fcaacd77884502e8f572f2328e9c745) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 3390f0d8420a0..c4a7fd453e5fc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -894,6 +894,7 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int i; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -920,6 +921,12 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); + + /* Update reference counts for HPDs */ + for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { + if (amdgpu_irq_get(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) + drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", i); + } } /** @@ -935,6 +942,7 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int i; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -960,4 +968,10 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); + + /* Update reference counts for HPDs */ + for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { + if (amdgpu_irq_put(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) + drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", i); + } } -- GitLab From d3c7059b6a8600fc62cd863f1ea203b8675e63e1 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Thu, 20 Feb 2025 14:41:59 +0100 Subject: [PATCH 1033/1585] drm/amdgpu: init return value in amdgpu_ttm_clear_buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise an uninitialized value can be returned if amdgpu_res_cleared returns true for all regions. Possibly closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3812 Fixes: a68c7eaa7a8f ("drm/amdgpu: Enable clear page functionality") Signed-off-by: Pierre-Eric Pelloux-Prayer Acked-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 7c62aacc3b452f73a1284198c81551035fac6d71) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 01ae2f88dec8c..262bd010a283d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2281,7 +2281,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor cursor; u64 addr; - int r; + int r = 0; if (!adev->mman.buffer_funcs_enabled) return -EINVAL; -- GitLab From 68f3ea7ee199ef77551e090dfef5a49046ea8443 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 21 Feb 2025 14:57:06 +0100 Subject: [PATCH 1034/1585] vmlinux.lds: Ensure that const vars with relocations are mapped R/O In the kernel, there are architectures (x86, arm64) that perform boot-time relocation (for KASLR) without relying on PIE codegen. In this case, all const global objects are emitted into .rodata, including const objects with fields that will be fixed up by the boot-time relocation code. This implies that .rodata (and .text in some cases) need to be writable at boot, but they will usually be mapped read-only as soon as the boot completes. When using PIE codegen, the compiler will emit const global objects into .data.rel.ro rather than .rodata if the object contains fields that need such fixups at boot-time. This permits the linker to annotate such regions as requiring read-write access only at load time, but not at execution time (in user space), while keeping .rodata truly const (in user space, this is important for reducing the CoW footprint of dynamic executables). This distinction does not matter for the kernel, but it does imply that const data will end up in writable memory if the .data.rel.ro sections are not treated in a special way, as they will end up in the writable .data segment by default. So emit .data.rel.ro into the .rodata segment. Cc: stable@vger.kernel.org Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250221135704.431269-5-ardb+git@google.com Signed-off-by: Josh Poimboeuf --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 54504013c7491..337d3336e1756 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -457,7 +457,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) . = ALIGN((align)); \ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ __start_rodata = .; \ - *(.rodata) *(.rodata.*) \ + *(.rodata) *(.rodata.*) *(.data.rel.ro*) \ SCHED_DATA \ RO_AFTER_INIT_DATA /* Read only after init */ \ . = ALIGN(8); \ -- GitLab From 73cfc53cc3b6380eccf013049574485f64cb83ca Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 21 Feb 2025 14:57:07 +0100 Subject: [PATCH 1035/1585] objtool: Fix C jump table annotations for Clang A C jump table (such as the one used by the BPF interpreter) is a const global array of absolute code addresses, and this means that the actual values in the table may not be known until the kernel is booted (e.g., when using KASLR or when the kernel VA space is sized dynamically). When using PIE codegen, the compiler will default to placing such const global objects in .data.rel.ro (which is annotated as writable), rather than .rodata (which is annotated as read-only). As C jump tables are explicitly emitted into .rodata, this used to result in warnings for LoongArch builds (which uses PIE codegen for the entire kernel) like Warning: setting incorrect section attributes for .rodata..c_jump_table due to the fact that the explicitly specified .rodata section inherited the read-write annotation that the compiler uses for such objects when using PIE codegen. This warning was suppressed by explicitly adding the read-only annotation to the __attribute__((section(""))) string, by commit c5b1184decc8 ("compiler.h: specify correct attribute for .rodata..c_jump_table") Unfortunately, this hack does not work on Clang's integrated assembler, which happily interprets the appended section type and permission specifiers as part of the section name, which therefore no longer matches the hard-coded pattern '.rodata..c_jump_table' that objtool expects, causing it to emit a warning kernel/bpf/core.o: warning: objtool: ___bpf_prog_run+0x20: sibling call from callable instruction with modified stack frame Work around this, by emitting C jump tables into .data.rel.ro instead, which is treated as .rodata by the linker script for all builds, not just PIE based ones. Fixes: c5b1184decc8 ("compiler.h: specify correct attribute for .rodata..c_jump_table") Tested-by: Tiezhu Yang # on LoongArch Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250221135704.431269-6-ardb+git@google.com Signed-off-by: Josh Poimboeuf --- include/linux/compiler.h | 2 +- tools/objtool/check.c | 7 ++++--- tools/objtool/include/objtool/special.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index b087de2f3e94b..0c25f3e429bba 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -110,7 +110,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Unreachable code */ #ifdef CONFIG_OBJTOOL /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #") +#define __annotate_jump_table __section(".data.rel.ro.c_jump_table") #else /* !CONFIG_OBJTOOL */ #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 497cb8dfb3eb3..1b5a1b3ea7a9f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2471,13 +2471,14 @@ static void mark_rodata(struct objtool_file *file) * * - .rodata: can contain GCC switch tables * - .rodata.: same, if -fdata-sections is being used - * - .rodata..c_jump_table: contains C annotated jump tables + * - .data.rel.ro.c_jump_table: contains C annotated jump tables * * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file, sec) { - if (!strncmp(sec->name, ".rodata", 7) && - !strstr(sec->name, ".str1.")) { + if ((!strncmp(sec->name, ".rodata", 7) && + !strstr(sec->name, ".str1.")) || + !strncmp(sec->name, ".data.rel.ro", 12)) { sec->rodata = true; found = true; } diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index e7ee7ffccefd4..e049679bb17b2 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -10,7 +10,7 @@ #include #include -#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" +#define C_JUMP_TABLE_SECTION ".data.rel.ro.c_jump_table" struct special_alt { struct list_head list; -- GitLab From b4ae43b053537ec28f430c0ddb9b916ab296dbe5 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Tue, 18 Feb 2025 14:42:30 +0800 Subject: [PATCH 1036/1585] objtool: Add bch2_trans_unlocked_or_in_restart_error() to bcachefs noreturns Fix the following objtool warning during build time: fs/bcachefs/btree_cache.o: warning: objtool: btree_node_lock.constprop.0() falls through to next function bch2_recalc_btree_reserve() fs/bcachefs/btree_update.o: warning: objtool: bch2_trans_update_get_key_cache() falls through to next function need_whiteout_for_snapshot() bch2_trans_unlocked_or_in_restart_error() is an Obviously Correct (tm) panic() wrapper, add it to the list of known noreturns. Fixes: b318882022a8 ("bcachefs: bch2_trans_verify_not_unlocked_or_in_restart()") Reported-by: k2ci Signed-off-by: Youling Tang Reviewed-by: Kent Overstreet Link: https://lore.kernel.org/r/20250218064230.219997-1-youling.tang@linux.dev Signed-off-by: Josh Poimboeuf --- tools/objtool/noreturns.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index b2174894f9f71..6bb7edda3094d 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -19,7 +19,7 @@ NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) -NORETURN(bch2_trans_unlocked_error) +NORETURN(bch2_trans_unlocked_or_in_restart_error) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) NORETURN(do_exit) -- GitLab From 8fef0a3b17bb258130a4fcbcb5addf94b25e9ec5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 25 Feb 2025 06:02:23 -1000 Subject: [PATCH 1037/1585] sched_ext: Fix pick_task_scx() picking non-queued tasks when it's called without balance() a6250aa251ea ("sched_ext: Handle cases where pick_task_scx() is called without preceding balance_scx()") added a workaround to handle the cases where pick_task_scx() is called without prececing balance_scx() which is due to a fair class bug where pick_taks_fair() may return NULL after a true return from balance_fair(). The workaround detects when pick_task_scx() is called without preceding balance_scx() and emulates SCX_RQ_BAL_KEEP and triggers kicking to avoid stalling. Unfortunately, the workaround code was testing whether @prev was on SCX to decide whether to keep the task running. This is incorrect as the task may be on SCX but no longer runnable. This could lead to a non-runnable task to be returned from pick_task_scx() which cause interesting confusions and failures. e.g. A common failure mode is the task ending up with (!on_rq && on_cpu) state which can cause potential wakers to busy loop, which can easily lead to deadlocks. Fix it by testing whether @prev has SCX_TASK_QUEUED set. This makes @prev_on_scx only used in one place. Open code the usage and improve the comment while at it. Signed-off-by: Tejun Heo Reported-by: Pat Cody Fixes: a6250aa251ea ("sched_ext: Handle cases where pick_task_scx() is called without preceding balance_scx()") Cc: stable@vger.kernel.org # v6.12+ Acked-by: Andrea Righi --- kernel/sched/ext.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 5a81d9a1e31f2..0f1da199cfc7c 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3117,7 +3117,6 @@ static struct task_struct *pick_task_scx(struct rq *rq) { struct task_struct *prev = rq->curr; struct task_struct *p; - bool prev_on_scx = prev->sched_class == &ext_sched_class; bool keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP; bool kick_idle = false; @@ -3137,14 +3136,18 @@ static struct task_struct *pick_task_scx(struct rq *rq) * if pick_task_scx() is called without preceding balance_scx(). */ if (unlikely(rq->scx.flags & SCX_RQ_BAL_PENDING)) { - if (prev_on_scx) { + if (prev->scx.flags & SCX_TASK_QUEUED) { keep_prev = true; } else { keep_prev = false; kick_idle = true; } - } else if (unlikely(keep_prev && !prev_on_scx)) { - /* only allowed during transitions */ + } else if (unlikely(keep_prev && + prev->sched_class != &ext_sched_class)) { + /* + * Can happen while enabling as SCX_RQ_BAL_PENDING assertion is + * conditional on scx_enabled() and may have been skipped. + */ WARN_ON_ONCE(scx_ops_enable_state() == SCX_OPS_ENABLED); keep_prev = false; } -- GitLab From a26b24b2e21f6222635a95426b9ef9eec63d69b1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 17 Jan 2025 07:19:13 -0800 Subject: [PATCH 1038/1585] perf/x86/intel: Use better start period for frequency mode Freqency mode is the current default mode of Linux perf. A period of 1 is used as a starting period. The period is auto-adjusted on each tick or an overflow, to meet the frequency target. The start period of 1 is too low and may trigger some issues: - Many HWs do not support period 1 well. https://lore.kernel.org/lkml/875xs2oh69.ffs@tglx/ - For an event that occurs frequently, period 1 is too far away from the real period. Lots of samples are generated at the beginning. The distribution of samples may not be even. - A low starting period for frequently occurring events also challenges virtualization, which has a longer path to handle a PMI. The limit_period value only checks the minimum acceptable value for HW. It cannot be used to set the start period, because some events may need a very low period. The limit_period cannot be set too high. It doesn't help with the events that occur frequently. It's hard to find a universal starting period for all events. The idea implemented by this patch is to only give an estimate for the popular HW and HW cache events. For the rest of the events, start from the lowest possible recommended value. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250117151913.3043942-3-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 85 ++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index cdcebf30468a0..cdb19e3ba3aa3 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3952,6 +3952,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) return test_bit(idx, (unsigned long *)&intel_cap->capabilities); } +static u64 intel_pmu_freq_start_period(struct perf_event *event) +{ + int type = event->attr.type; + u64 config, factor; + s64 start; + + /* + * The 127 is the lowest possible recommended SAV (sample after value) + * for a 4000 freq (default freq), according to the event list JSON file. + * Also, assume the workload is idle 50% time. + */ + factor = 64 * 4000; + if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) + goto end; + + /* + * The estimation of the start period in the freq mode is + * based on the below assumption. + * + * For a cycles or an instructions event, 1GHZ of the + * underlying platform, 1 IPC. The workload is idle 50% time. + * The start period = 1,000,000,000 * 1 / freq / 2. + * = 500,000,000 / freq + * + * Usually, the branch-related events occur less than the + * instructions event. According to the Intel event list JSON + * file, the SAV (sample after value) of a branch-related event + * is usually 1/4 of an instruction event. + * The start period of branch-related events = 125,000,000 / freq. + * + * The cache-related events occurs even less. The SAV is usually + * 1/20 of an instruction event. + * The start period of cache-related events = 25,000,000 / freq. + */ + config = event->attr.config & PERF_HW_EVENT_MASK; + if (type == PERF_TYPE_HARDWARE) { + switch (config) { + case PERF_COUNT_HW_CPU_CYCLES: + case PERF_COUNT_HW_INSTRUCTIONS: + case PERF_COUNT_HW_BUS_CYCLES: + case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: + case PERF_COUNT_HW_STALLED_CYCLES_BACKEND: + case PERF_COUNT_HW_REF_CPU_CYCLES: + factor = 500000000; + break; + case PERF_COUNT_HW_BRANCH_INSTRUCTIONS: + case PERF_COUNT_HW_BRANCH_MISSES: + factor = 125000000; + break; + case PERF_COUNT_HW_CACHE_REFERENCES: + case PERF_COUNT_HW_CACHE_MISSES: + factor = 25000000; + break; + default: + goto end; + } + } + + if (type == PERF_TYPE_HW_CACHE) + factor = 25000000; +end: + /* + * Usually, a prime or a number with less factors (close to prime) + * is chosen as an SAV, which makes it less likely that the sampling + * period synchronizes with some periodic event in the workload. + * Minus 1 to make it at least avoiding values near power of twos + * for the default freq. + */ + start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1; + + if (start > x86_pmu.max_period) + start = x86_pmu.max_period; + + if (x86_pmu.limit_period) + x86_pmu.limit_period(event, &start); + + return start; +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -3963,6 +4042,12 @@ static int intel_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (event->attr.freq && event->attr.sample_freq) { + event->hw.sample_period = intel_pmu_freq_start_period(event); + event->hw.last_period = event->hw.sample_period; + local64_set(&event->hw.period_left, event->hw.sample_period); + } + if (event->attr.precise_ip) { if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) return -EINVAL; -- GitLab From 1f7a4f98c11fbeb18ed21f3b3a497e90a50ad2e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 15:52:21 +0100 Subject: [PATCH 1039/1585] sunrpc: suppress warnings for unused procfs functions There is a warning about unused variables when building with W=1 and no procfs: net/sunrpc/cache.c:1660:30: error: 'cache_flush_proc_ops' defined but not used [-Werror=unused-const-variable=] 1660 | static const struct proc_ops cache_flush_proc_ops = { | ^~~~~~~~~~~~~~~~~~~~ net/sunrpc/cache.c:1622:30: error: 'content_proc_ops' defined but not used [-Werror=unused-const-variable=] 1622 | static const struct proc_ops content_proc_ops = { | ^~~~~~~~~~~~~~~~ net/sunrpc/cache.c:1598:30: error: 'cache_channel_proc_ops' defined but not used [-Werror=unused-const-variable=] 1598 | static const struct proc_ops cache_channel_proc_ops = { | ^~~~~~~~~~~~~~~~~~~~~~ These are used inside of an #ifdef, so replacing that with an IS_ENABLED() check lets the compiler see how they are used while still dropping them during dead code elimination. Fixes: dbf847ecb631 ("knfsd: allow cache_register to return error on failure") Reviewed-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: Arnd Bergmann Signed-off-by: Anna Schumaker --- net/sunrpc/cache.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index cb279eb9ac4ba..7ce5e28a6c031 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1674,12 +1674,14 @@ static void remove_cache_proc_entries(struct cache_detail *cd) } } -#ifdef CONFIG_PROC_FS static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { struct proc_dir_entry *p; struct sunrpc_net *sn; + if (!IS_ENABLED(CONFIG_PROC_FS)) + return 0; + sn = net_generic(net, sunrpc_net_id); cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc); if (cd->procfs == NULL) @@ -1707,12 +1709,6 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) remove_cache_proc_entries(cd); return -ENOMEM; } -#else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) -{ - return 0; -} -#endif void __init cache_initialize(void) { -- GitLab From 9084ed79ddaaaa1ec01cd304af9fb532c26252db Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 20 Feb 2025 14:29:36 -0500 Subject: [PATCH 1040/1585] lsm,nfs: fix memory leak of lsm_context commit b530104f50e8 ("lsm: lsm_context in security_dentry_init_security") did not preserve the lsm id for subsequent release calls, which results in a memory leak. Fix it by saving the lsm id in the nfs4_label and providing it on the subsequent release call. Fixes: b530104f50e8 ("lsm: lsm_context in security_dentry_init_security") Signed-off-by: Stephen Smalley Acked-by: Paul Moore Acked-by: Casey Schaufler Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 7 ++++--- include/linux/nfs4.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c25ecdb76d304..6e95db6c17e92 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -133,6 +133,7 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, if (err) return NULL; + label->lsmid = shim.id; label->label = shim.context; label->len = shim.len; return label; @@ -145,7 +146,7 @@ nfs4_label_release_security(struct nfs4_label *label) if (label) { shim.context = label->label; shim.len = label->len; - shim.id = LSM_ID_UNDEF; + shim.id = label->lsmid; security_release_secctx(&shim); } } @@ -6272,7 +6273,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_label label = {0, 0, buflen, buf}; + struct nfs4_label label = {0, 0, 0, buflen, buf}; u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; struct nfs_fattr fattr = { @@ -6377,7 +6378,7 @@ static int nfs4_do_set_security_label(struct inode *inode, static int nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) { - struct nfs4_label ilabel = {0, 0, buflen, (char *)buf }; + struct nfs4_label ilabel = {0, 0, 0, buflen, (char *)buf }; struct nfs_fattr *fattr; int status; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 71fbebfa43c7e..9ac83ca883266 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -47,6 +47,7 @@ struct nfs4_acl { struct nfs4_label { uint32_t lfs; uint32_t pi; + u32 lsmid; u32 len; char *label; }; -- GitLab From 729d163232971672d0f41b93c02092fb91f0e758 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:22 +0100 Subject: [PATCH 1041/1585] Input: i8042 - swap old quirk combination with new quirk for NHxxRZQ Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. With the old i8042 quirks this devices keyboard is sometimes laggy after resume. With the new quirk this issue doesn't happen. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 127cfdc8668a0..a764248ea11d3 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1209,18 +1209,10 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { - /* - * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes - * the keyboard very laggy for ~5 seconds after boot and - * sometimes also after resume. - * However both are required for the keyboard to not fail - * completely sometimes after boot or resume. - */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { -- GitLab From 9ed468e17d5b80e7116fd35842df3648e808ae47 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:23 +0100 Subject: [PATCH 1042/1585] Input: i8042 - add required quirks for missing old boardnames Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. The PB71RD keyboard is sometimes laggy after resume and the PC70DR, PB51RF, P640RE, and PCX0DX_GN20 keyboard is sometimes unresponsive after resume. This quirk fixes that. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-2-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index a764248ea11d3..57a6eec4c2a5d 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1260,6 +1260,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "P640RE"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, { /* * This is only a partial board_name and might be followed by @@ -1335,6 +1341,24 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PB51RF"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PB71RD"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PC70DR"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"), @@ -1342,6 +1366,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PCX0DX_GN20"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, /* See comment on TUXEDO InfinityBook S17 Gen6 / Clevo NS70MU above */ { .matches = { -- GitLab From 75ee4ebebbbe8dc4b55ba37f388924fa96bf1564 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:24 +0100 Subject: [PATCH 1043/1585] Input: i8042 - swap old quirk combination with new quirk for several devices Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. While the old quirk combination did not show negative effects on these devices specifically, the new quirk works just as well and seems more stable in general. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-3-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 40 ++++++++++----------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 57a6eec4c2a5d..9df1ef6032dd0 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1080,16 +1080,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"), DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"), DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* Mivvy M310 */ @@ -1171,8 +1169,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "LAPQC71A"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1205,8 +1202,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1218,8 +1214,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, /* * At least one modern Clevo barebone has the touchpad connected both @@ -1235,17 +1230,15 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NS50MU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX | - SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | - SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_NOAUX | + SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NS50_70MU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX | - SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | - SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_NOAUX | + SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1319,8 +1312,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1338,8 +1330,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1363,8 +1354,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1383,15 +1373,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "X170SM"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "X170KM-G"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* -- GitLab From d85862ccca452eeb19329e9f4f9a6ce1d1e53561 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:25 +0100 Subject: [PATCH 1044/1585] Input: i8042 - swap old quirk combination with new quirk for more devices Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. We could not activly retest these devices because we no longer have them in our archive, but based on the other old Clevo barebones we tested where the new quirk had the same or a better behaviour I think it would be good to apply it on these too. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-4-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 31 +++++++++------------------ 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 9df1ef6032dd0..6ed9fc34948cb 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1157,9 +1157,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, /* * A lot of modern Clevo barebones have touchpad and/or keyboard issues - * after suspend fixable with nomux + reset + noloop + nopnp. Luckily, - * none of them have an external PS/2 port so this can safely be set for - * all of them. + * after suspend fixable with the forcenorestore quirk. * Clevo barebones come with board_vendor and/or system_vendor set to * either the very generic string "Notebook" and/or a different value * for each individual reseller. The only somewhat universal way to @@ -1175,22 +1173,19 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "LAPQC71B"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "N140CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "N141CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1250,8 +1245,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1268,16 +1262,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */ .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1288,8 +1280,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1300,8 +1291,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1323,8 +1313,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { -- GitLab From 96f41f644c4885761b0d117fc36dc5dcf92e15ec Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Sun, 5 Jan 2025 17:27:40 +0000 Subject: [PATCH 1045/1585] x86/of: Don't use DTB for SMP setup if ACPI is enabled There are cases when it is useful to use both ACPI and DTB provided by the bootloader, however in such cases we should make sure to prevent conflicts between the two. Namely, don't try to use DTB for SMP setup if ACPI is enabled. Precisely, this prevents at least: - incorrectly calling register_lapic_address(APIC_DEFAULT_PHYS_BASE) after the LAPIC was already successfully enumerated via ACPI, causing noisy kernel warnings and probably potential real issues as well - failed IOAPIC setup in the case when IOAPIC is enumerated via mptable instead of ACPI (e.g. with acpi=noirq), due to mpparse_parse_smp_config() overridden by x86_dtb_parse_smp_config() Signed-off-by: Dmytro Maluka Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250105172741.3476758-2-dmaluka@chromium.org --- arch/x86/kernel/devicetree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 59d23cdf4ed0f..dd8748c45529a 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -2,6 +2,7 @@ /* * Architecture specific OF callbacks. */ +#include #include #include #include @@ -313,6 +314,6 @@ void __init x86_flattree_get_config(void) if (initial_dtb) early_memunmap(dt, map_len); #endif - if (of_have_populated_dt()) + if (acpi_disabled && of_have_populated_dt()) x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; } -- GitLab From bebe35bb738b573c32a5033499cd59f20293f2a3 Mon Sep 17 00:00:00 2001 From: Russell Senior Date: Tue, 25 Feb 2025 22:31:20 +0100 Subject: [PATCH 1046/1585] x86/CPU: Fix warm boot hang regression on AMD SC1100 SoC systems I still have some Soekris net4826 in a Community Wireless Network I volunteer with. These devices use an AMD SC1100 SoC. I am running OpenWrt on them, which uses a patched kernel, that naturally has evolved over time. I haven't updated the ones in the field in a number of years (circa 2017), but have one in a test bed, where I have intermittently tried out test builds. A few years ago, I noticed some trouble, particularly when "warm booting", that is, doing a reboot without removing power, and noticed the device was hanging after the kernel message: [ 0.081615] Working around Cyrix MediaGX virtual DMA bugs. If I removed power and then restarted, it would boot fine, continuing through the message above, thusly: [ 0.081615] Working around Cyrix MediaGX virtual DMA bugs. [ 0.090076] Enable Memory-Write-back mode on Cyrix/NSC processor. [ 0.100000] Enable Memory access reorder on Cyrix/NSC processor. [ 0.100070] Last level iTLB entries: 4KB 0, 2MB 0, 4MB 0 [ 0.110058] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0, 1GB 0 [ 0.120037] CPU: NSC Geode(TM) Integrated Processor by National Semi (family: 0x5, model: 0x9, stepping: 0x1) [...] In order to continue using modern tools, like ssh, to interact with the software on these old devices, I need modern builds of the OpenWrt firmware on the devices. I confirmed that the warm boot hang was still an issue in modern OpenWrt builds (currently using a patched linux v6.6.65). Last night, I decided it was time to get to the bottom of the warm boot hang, and began bisecting. From preserved builds, I narrowed down the bisection window from late February to late May 2019. During this period, the OpenWrt builds were using 4.14.x. I was able to build using period-correct Ubuntu 18.04.6. After a number of bisection iterations, I identified a kernel bump from 4.14.112 to 4.14.113 as the commit that introduced the warm boot hang. https://github.com/openwrt/openwrt/commit/07aaa7e3d62ad32767d7067107db64b6ade81537 Looking at the upstream changes in the stable kernel between 4.14.112 and 4.14.113 (tig v4.14.112..v4.14.113), I spotted a likely suspect: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=20afb90f730982882e65b01fb8bdfe83914339c5 So, I tried reverting just that kernel change on top of the breaking OpenWrt commit, and my warm boot hang went away. Presumably, the warm boot hang is due to some register not getting cleared in the same way that a loss of power does. That is approximately as much as I understand about the problem. More poking/prodding and coaching from Jonas Gorski, it looks like this test patch fixes the problem on my board: Tested against v6.6.67 and v4.14.113. Fixes: 18fb053f9b82 ("x86/cpu/cyrix: Use correct macros for Cyrix calls on Geode processors") Debugged-by: Jonas Gorski Signed-off-by: Russell Senior Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/CAHP3WfOgs3Ms4Z+L9i0-iBOE21sdMk5erAiJurPjnrL9LSsgRA@mail.gmail.com Cc: Matthew Whitehead Cc: Thomas Gleixner --- arch/x86/kernel/cpu/cyrix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 9651275aecd1b..dfec2c61e3547 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -153,8 +153,8 @@ static void geode_configure(void) u8 ccr3; local_irq_save(flags); - /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + /* Suspend on halt power saving */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x08); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ -- GitLab From 9de7695925d5d2d2085681ba935857246eb2817d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 22:32:33 +0100 Subject: [PATCH 1047/1585] x86/irq: Define trace events conditionally When both of X86_LOCAL_APIC and X86_THERMAL_VECTOR are disabled, the irq tracing produces a W=1 build warning for the tracing definitions: In file included from include/trace/trace_events.h:27, from include/trace/define_trace.h:113, from arch/x86/include/asm/trace/irq_vectors.h:383, from arch/x86/kernel/irq.c:29: include/trace/stages/init.h:2:23: error: 'str__irq_vectors__trace_system_name' defined but not used [-Werror=unused-const-variable=] Make the tracepoints conditional on the same symbosl that guard their usage. Signed-off-by: Arnd Bergmann Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250225213236.3141752-1-arnd@kernel.org --- arch/x86/kernel/irq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 385e3a5fc3045..feca4f20b06aa 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -25,8 +25,10 @@ #include #include +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) #define CREATE_TRACE_POINTS #include +#endif DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); -- GitLab From 68a9b0e313302451468c0b0eda53c383fa51a8f4 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Tue, 24 Dec 2024 22:55:16 +0800 Subject: [PATCH 1048/1585] perf/x86/rapl: Add support for Intel Arrow Lake U Add Arrow Lake U model for RAPL: $ ls -1 /sys/devices/power/events/ energy-cores energy-cores.scale energy-cores.unit energy-gpu energy-gpu.scale energy-gpu.unit energy-pkg energy-pkg.scale energy-pkg.unit energy-psys energy-psys.scale energy-psys.unit The same output as ArrowLake: $ perf stat -a -I 1000 --per-socket -e power/energy-pkg/ Signed-off-by: Aaron Ma Signed-off-by: Ingo Molnar Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20241224145516.349028-1-aaron.ma@canonical.com --- arch/x86/events/rapl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 4952faf03e82d..6941f4811bec1 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -879,6 +879,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl), {}, }; -- GitLab From 0f6750b15ffdf274668b12824b09bd49ea854e18 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Thu, 19 Dec 2024 08:52:27 -0700 Subject: [PATCH 1049/1585] x86/entry: Fix kernel-doc warning The do_int80_emulation() function is missing a kernel-doc formatted description of its argument. This is causing a warning when building with W=1. Add a brief description of the argument to satisfy kernel-doc. Reported-by: kernel test robot Signed-off-by: Daniel Sneddon Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20241219155227.685692-1-daniel.sneddon@linux.intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202412131236.a5HhOqXo-lkp@intel.com/ --- arch/x86/entry/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 94941c5a10ac1..14db5b85114c1 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -190,6 +190,7 @@ static __always_inline bool int80_is_external(void) /** * do_int80_emulation - 32-bit legacy syscall C entry from asm + * @regs: syscall arguments in struct pt_args on the stack. * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in -- GitLab From f8c857238a392f21d5726d07966f6061007c8d4f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 25 Feb 2025 14:32:14 -0800 Subject: [PATCH 1050/1585] uprobes: Remove too strict lockdep_assert() condition in hprobe_expire() hprobe_expire() is used to atomically switch pending uretprobe instance (struct return_instance) from being SRCU protected to be refcounted. This can be done from background timer thread, or synchronously within current thread when task is forked. In the former case, return_instance has to be protected through RCU read lock, and that's what hprobe_expire() used to check with lockdep_assert(rcu_read_lock_held()). But in the latter case (hprobe_expire() called from dup_utask()) there is no RCU lock being held, and it's both unnecessary and incovenient. Inconvenient due to the intervening memory allocations inside dup_return_instance()'s loop. Unnecessary because dup_utask() is called synchronously in current thread, and no uretprobe can run at that point, so return_instance can't be freed either. So drop rcu_read_lock_held() condition, and expand corresponding comment to explain necessary lifetime guarantees. lockdep_assert()-detected issue is a false positive. Fixes: dd1a7567784e ("uprobes: SRCU-protect uretprobe lifetime (with timeout)") Reported-by: Breno Leitao Signed-off-by: Andrii Nakryiko Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250225223214.2970740-1-andrii@kernel.org --- kernel/events/uprobes.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index af53fbd2d12c4..b4ca8898fe178 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -767,10 +767,14 @@ static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get) enum hprobe_state hstate; /* - * return_instance's hprobe is protected by RCU. - * Underlying uprobe is itself protected from reuse by SRCU. + * Caller should guarantee that return_instance is not going to be + * freed from under us. This can be achieved either through holding + * rcu_read_lock() or by owning return_instance in the first place. + * + * Underlying uprobe is itself protected from reuse by SRCU, so ensure + * SRCU lock is held properly. */ - lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu)); + lockdep_assert(srcu_read_lock_held(&uretprobes_srcu)); hstate = READ_ONCE(hprobe->state); switch (hstate) { -- GitLab From 66cb85c441cd9c44b193ff75b4d0358fccdc6b9c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 25 Feb 2025 22:25:00 +0000 Subject: [PATCH 1051/1585] cifs: Fix the smb1 readv callback to correctly call netfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix cifs_readv_callback() to call netfs_read_subreq_terminated() rather than queuing the subrequest work item (which is unset). Also call the I/O progress tracepoint. cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Reported-by: Jean-Christophe Guillain Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219793 Tested-by: Jean-Christophe Guillain Tested-by: Pali Rohár Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: David Howells Signed-off-by: Steve French --- fs/smb/client/cifssmb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 3feaa0f681699..d07682020c645 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1338,7 +1338,8 @@ cifs_readv_callback(struct mid_q_entry *mid) rdata->credits.value = 0; rdata->subreq.error = rdata->result; rdata->subreq.transferred += rdata->got_bytes; - queue_work(cifsiod_wq, &rdata->subreq.work); + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress); + netfs_read_subreq_terminated(&rdata->subreq); release_mid(mid); add_credits(server, &credits, 0); } -- GitLab From bab3a6e9ffd600f9db0ebaf8f45e1c6111cf314c Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 24 Feb 2025 06:17:16 +0100 Subject: [PATCH 1052/1585] net: ethernet: ti: am65-cpsw: select PAGE_POOL am65-cpsw uses page_pool_dev_alloc_pages(), thus needs PAGE_POOL selected to avoid linker errors. This is missing since the driver started to use page_pool helpers in 8acacc40f733 ("net: ethernet: ti: am65-cpsw: Add minimal XDP support") Fixes: 8acacc40f733 ("net: ethernet: ti: am65-cpsw: Add minimal XDP support") Signed-off-by: Sascha Hauer Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250224-net-am654-nuss-kconfig-v2-1-c124f4915c92@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 0d5a862cd78a6..3a13d60a947a8 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -99,6 +99,7 @@ config TI_K3_AM65_CPSW_NUSS select NET_DEVLINK select TI_DAVINCI_MDIO select PHYLINK + select PAGE_POOL select TI_K3_CPPI_DESC_POOL imply PHY_TI_GMII_SEL depends on TI_K3_AM65_CPTS || !TI_K3_AM65_CPTS -- GitLab From 18912c520674ec4d920fe3826e7e4fefeecdf5ae Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 24 Feb 2025 09:44:01 -0800 Subject: [PATCH 1053/1585] tcp: devmem: don't write truncated dmabuf CMSGs to userspace Currently, we report -ETOOSMALL (err) only on the first iteration (!sent). When we get put_cmsg error after a bunch of successful put_cmsg calls, we don't signal the error at all. This might be confusing on the userspace side which will see truncated CMSGs but no MSG_CTRUNC signal. Consider the following case: - sizeof(struct cmsghdr) = 16 - sizeof(struct dmabuf_cmsg) = 24 - total cmsg size (CMSG_LEN) = 40 (16+24) When calling recvmsg with msg_controllen=60, the userspace will receive two(!) dmabuf_cmsg(s), the first one will be a valid one and the second one will be silently truncated. There is no easy way to discover the truncation besides doing something like "cm->cmsg_len != CMSG_LEN(sizeof(dmabuf_cmsg))". Introduce new put_devmem_cmsg wrapper that reports an error instead of doing the truncation. Mina suggests that it's the intended way this API should work. Note that we might now report MSG_CTRUNC when the users (incorrectly) call us with msg_control == NULL. Fixes: 8f0b3cc9a4c1 ("tcp: RX path for devmem TCP") Reviewed-by: Mina Almasry Signed-off-by: Stanislav Fomichev Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250224174401.3582695-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/socket.h | 2 ++ net/core/scm.c | 10 ++++++++++ net/ipv4/tcp.c | 26 ++++++++++---------------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index d18cc47e89bd0..c3322eb3d6865 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -392,6 +392,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data); struct timespec64; struct __kernel_timespec; diff --git a/net/core/scm.c b/net/core/scm.c index 4f6a14babe5ae..733c0cbd393d2 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -282,6 +282,16 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) } EXPORT_SYMBOL(put_cmsg); +int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data) +{ + /* Don't produce truncated CMSGs */ + if (!msg->msg_control || msg->msg_controllen < CMSG_LEN(len)) + return -ETOOSMALL; + + return put_cmsg(msg, level, type, len, data); +} + void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) { struct scm_timestamping64 tss; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0d704bda6c416..d74281eca14f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2438,14 +2438,12 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, */ memset(&dmabuf_cmsg, 0, sizeof(dmabuf_cmsg)); dmabuf_cmsg.frag_size = copy; - err = put_cmsg(msg, SOL_SOCKET, SO_DEVMEM_LINEAR, - sizeof(dmabuf_cmsg), &dmabuf_cmsg); - if (err || msg->msg_flags & MSG_CTRUNC) { - msg->msg_flags &= ~MSG_CTRUNC; - if (!err) - err = -ETOOSMALL; + err = put_cmsg_notrunc(msg, SOL_SOCKET, + SO_DEVMEM_LINEAR, + sizeof(dmabuf_cmsg), + &dmabuf_cmsg); + if (err) goto out; - } sent += copy; @@ -2499,16 +2497,12 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, offset += copy; remaining_len -= copy; - err = put_cmsg(msg, SOL_SOCKET, - SO_DEVMEM_DMABUF, - sizeof(dmabuf_cmsg), - &dmabuf_cmsg); - if (err || msg->msg_flags & MSG_CTRUNC) { - msg->msg_flags &= ~MSG_CTRUNC; - if (!err) - err = -ETOOSMALL; + err = put_cmsg_notrunc(msg, SOL_SOCKET, + SO_DEVMEM_DMABUF, + sizeof(dmabuf_cmsg), + &dmabuf_cmsg); + if (err) goto out; - } atomic_long_inc(&niov->pp_ref_count); tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag); -- GitLab From f865c24bc55158313d5779fc81116023a6940ca3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 24 Feb 2025 19:11:50 +0100 Subject: [PATCH 1054/1585] mptcp: always handle address removal under msk socket lock Syzkaller reported a lockdep splat in the PM control path: WARNING: CPU: 0 PID: 6693 at ./include/net/sock.h:1711 sock_owned_by_me include/net/sock.h:1711 [inline] WARNING: CPU: 0 PID: 6693 at ./include/net/sock.h:1711 msk_owned_by_me net/mptcp/protocol.h:363 [inline] WARNING: CPU: 0 PID: 6693 at ./include/net/sock.h:1711 mptcp_pm_nl_addr_send_ack+0x57c/0x610 net/mptcp/pm_netlink.c:788 Modules linked in: CPU: 0 UID: 0 PID: 6693 Comm: syz.0.205 Not tainted 6.14.0-rc2-syzkaller-00303-gad1b832bf1cf #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024 RIP: 0010:sock_owned_by_me include/net/sock.h:1711 [inline] RIP: 0010:msk_owned_by_me net/mptcp/protocol.h:363 [inline] RIP: 0010:mptcp_pm_nl_addr_send_ack+0x57c/0x610 net/mptcp/pm_netlink.c:788 Code: 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc e8 ca 7b d3 f5 eb b9 e8 c3 7b d3 f5 90 0f 0b 90 e9 dd fb ff ff e8 b5 7b d3 f5 90 <0f> 0b 90 e9 3e fb ff ff 44 89 f1 80 e1 07 38 c1 0f 8c eb fb ff ff RSP: 0000:ffffc900034f6f60 EFLAGS: 00010283 RAX: ffffffff8bee3c2b RBX: 0000000000000001 RCX: 0000000000080000 RDX: ffffc90004d42000 RSI: 000000000000a407 RDI: 000000000000a408 RBP: ffffc900034f7030 R08: ffffffff8bee37f6 R09: 0100000000000000 R10: dffffc0000000000 R11: ffffed100bcc62e4 R12: ffff88805e6316e0 R13: ffff88805e630c00 R14: dffffc0000000000 R15: ffff88805e630c00 FS: 00007f7e9a7e96c0(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2fd18ff8 CR3: 0000000032c24000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: mptcp_pm_remove_addr+0x103/0x1d0 net/mptcp/pm.c:59 mptcp_pm_remove_anno_addr+0x1f4/0x2f0 net/mptcp/pm_netlink.c:1486 mptcp_nl_remove_subflow_and_signal_addr net/mptcp/pm_netlink.c:1518 [inline] mptcp_pm_nl_del_addr_doit+0x118d/0x1af0 net/mptcp/pm_netlink.c:1629 genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0xb1f/0xec0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x206/0x480 net/netlink/af_netlink.c:2543 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline] netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1348 netlink_sendmsg+0x8de/0xcb0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:718 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:733 ____sys_sendmsg+0x53a/0x860 net/socket.c:2573 ___sys_sendmsg net/socket.c:2627 [inline] __sys_sendmsg+0x269/0x350 net/socket.c:2659 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f7e9998cde9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f7e9a7e9038 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f7e99ba5fa0 RCX: 00007f7e9998cde9 RDX: 000000002000c094 RSI: 0000400000000000 RDI: 0000000000000007 RBP: 00007f7e99a0e2a0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f7e99ba5fa0 R15: 00007fff49231088 Indeed the PM can try to send a RM_ADDR over a msk without acquiring first the msk socket lock. The bugged code-path comes from an early optimization: when there are no subflows, the PM should (usually) not send RM_ADDR notifications. The above statement is incorrect, as without locks another process could concurrent create a new subflow and cause the RM_ADDR generation. Additionally the supposed optimization is not very effective even performance-wise, as most mptcp sockets should have at least one subflow: the MPC one. Address the issue removing the buggy code path, the existing "slow-path" will handle correctly even the edge case. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Reported-by: syzbot+cd3ce3d03a3393ae9700@syzkaller.appspotmail.com Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/546 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250224-net-mptcp-misc-fixes-v1-1-f550f636b435@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 572d160edca33..c0e47f4f7b1aa 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1514,11 +1514,6 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, if (mptcp_pm_is_userspace(msk)) goto next; - if (list_empty(&msk->conn_list)) { - mptcp_pm_remove_anno_addr(msk, addr, false); - goto next; - } - lock_sock(sk); remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && -- GitLab From 8668860b0ad32a13fcd6c94a0995b7aa7638c9ef Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 24 Feb 2025 19:11:51 +0100 Subject: [PATCH 1055/1585] mptcp: reset when MPTCP opts are dropped after join Before this patch, if the checksum was not used, the subflow was only reset if map_data_len was != 0. If there were no MPTCP options or an invalid mapping, map_data_len was not set to the data len, and then the subflow was not reset as it should have been, leaving the MPTCP connection in a wrong fallback mode. This map_data_len condition has been introduced to handle the reception of the infinite mapping. Instead, a new dedicated mapping error could have been returned and treated as a special case. However, the commit 31bf11de146c ("mptcp: introduce MAPPING_BAD_CSUM") has been introduced by Paolo Abeni soon after, and backported later on to stable. It better handle the csum case, and it means the exception for valid_csum_seen in subflow_can_fallback(), plus this one for the infinite mapping in subflow_check_data_avail(), are no longer needed. In other words, the code can be simplified there: a fallback should only be done if msk->allow_infinite_fallback is set. This boolean is set to false once MPTCP-specific operations acting on the whole MPTCP connection vs the initial path have been done, e.g. a second path has been created, or an MPTCP re-injection -- yes, possible even with a single subflow. The subflow_can_fallback() helper can then be dropped, and replaced by this single condition. This also makes the code clearer: a fallback should only be done if it is possible to do so. While at it, no need to set map_data_len to 0 in get_mapping_status() for the infinite mapping case: it will be set to skb->len just after, at the end of subflow_check_data_avail(), and not read in between. Fixes: f8d4bcacff3b ("mptcp: infinite mapping receiving") Cc: stable@vger.kernel.org Reported-by: Chester A. Unal Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/544 Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Tested-by: Chester A. Unal Link: https://patch.msgid.link/20250224-net-mptcp-misc-fixes-v1-2-f550f636b435@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index dfcbef9c46246..9f18217dddc86 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1142,7 +1142,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (data_len == 0) { pr_debug("infinite mapping received\n"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); - subflow->map_data_len = 0; return MAPPING_INVALID; } @@ -1286,18 +1285,6 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss mptcp_schedule_work(sk); } -static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) -{ - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - - if (subflow->mp_join) - return false; - else if (READ_ONCE(msk->csum_enabled)) - return !subflow->valid_csum_seen; - else - return READ_ONCE(msk->allow_infinite_fallback); -} - static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -1393,7 +1380,7 @@ static bool subflow_check_data_avail(struct sock *ssk) return true; } - if (!subflow_can_fallback(subflow) && subflow->map_data_len) { + if (!READ_ONCE(msk->allow_infinite_fallback)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ -- GitLab From db75a16813aabae3b78c06b1b99f5e314c1f55d3 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 24 Feb 2025 19:11:52 +0100 Subject: [PATCH 1056/1585] mptcp: safety check before fallback Recently, some fallback have been initiated, while the connection was not supposed to fallback. Add a safety check with a warning to detect when an wrong attempt to fallback is being done. This should help detecting any future issues quicker. Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250224-net-mptcp-misc-fixes-v1-3-f550f636b435@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f6a207958459d..ad21925af0612 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1199,6 +1199,8 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk) pr_debug("TCP fallback already done (msk=%p)\n", msk); return; } + if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) + return; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } -- GitLab From a6aa36e957a1bfb5341986dec32d013d23228fe1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 14 Feb 2025 13:14:34 +0900 Subject: [PATCH 1057/1585] block: Remove zone write plugs when handling native zone append writes For devices that natively support zone append operations, REQ_OP_ZONE_APPEND BIOs are not processed through zone write plugging and are immediately issued to the zoned device. This means that there is no write pointer offset tracking done for these operations and that a zone write plug is not necessary. However, when receiving a zone append BIO, we may already have a zone write plug for the target zone if that zone was previously partially written using regular write operations. In such case, since the write pointer offset of the zone write plug is not incremented by the amount of sectors appended to the zone, 2 issues arise: 1) we risk leaving the plug in the disk hash table if the zone is fully written using zone append or regular write operations, because the write pointer offset will never reach the "zone full" state. 2) Regular write operations that are issued after zone append operations will always be failed by blk_zone_wplug_prepare_bio() as the write pointer alignment check will fail, even if the user correctly accounted for the zone append operations and issued the regular writes with a correct sector. Avoid these issues by immediately removing the zone write plug of zones that are the target of zone append operations when blk_zone_plug_bio() is called. The new function blk_zone_wplug_handle_native_zone_append() implements this for devices that natively support zone append. The removal of the zone write plug using disk_remove_zone_wplug() requires aborting all plugged regular write using disk_zone_wplug_abort() as otherwise the plugged write BIOs would never be executed (with the plug removed, the completion path will never see again the zone write plug as disk_get_zone_wplug() will return NULL). Rate-limited warnings are added to blk_zone_wplug_handle_native_zone_append() and to disk_zone_wplug_abort() to signal this. Since blk_zone_wplug_handle_native_zone_append() is called in the hot path for operations that will not be plugged, disk_get_zone_wplug() is optimized under the assumption that a user issuing zone append operations is not at the same time issuing regular writes and that there are no hashed zone write plugs. The struct gendisk atomic counter nr_zone_wplugs is added to check this, with this counter incremented in disk_insert_zone_wplug() and decremented in disk_remove_zone_wplug(). To be consistent with this fix, we do not need to fill the zone write plug hash table with zone write plugs for zones that are partially written for a device that supports native zone append operations. So modify blk_revalidate_seq_zone() to return early to avoid allocating and inserting a zone write plug for partially written sequential zones if the device natively supports zone append. Reported-by: Jorgen Hansen Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Tested-by: Jorgen Hansen Link: https://lore.kernel.org/r/20250214041434.82564-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 76 ++++++++++++++++++++++++++++++++++++++---- include/linux/blkdev.h | 7 ++-- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 761ea662ddc34..0c77244a35c92 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -410,13 +410,14 @@ static bool disk_insert_zone_wplug(struct gendisk *disk, } } hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]); + atomic_inc(&disk->nr_zone_wplugs); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); return true; } -static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, - sector_t sector) +static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk, + sector_t sector) { unsigned int zno = disk_zone_no(disk, sector); unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits); @@ -437,6 +438,15 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, return NULL; } +static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, + sector_t sector) +{ + if (!atomic_read(&disk->nr_zone_wplugs)) + return NULL; + + return disk_get_hashed_zone_wplug(disk, sector); +} + static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head) { struct blk_zone_wplug *zwplug = @@ -503,6 +513,7 @@ static void disk_remove_zone_wplug(struct gendisk *disk, zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED; spin_lock_irqsave(&disk->zone_wplugs_lock, flags); hlist_del_init_rcu(&zwplug->node); + atomic_dec(&disk->nr_zone_wplugs); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); disk_put_zone_wplug(zwplug); } @@ -593,6 +604,11 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) { struct bio *bio; + if (bio_list_empty(&zwplug->bio_list)) + return; + + pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n", + zwplug->disk->disk_name, zwplug->zone_no); while ((bio = bio_list_pop(&zwplug->bio_list))) blk_zone_wplug_bio_io_error(zwplug, bio); } @@ -1040,6 +1056,47 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) return true; } +static void blk_zone_wplug_handle_native_zone_append(struct bio *bio) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + struct blk_zone_wplug *zwplug; + unsigned long flags; + + /* + * We have native support for zone append operations, so we are not + * going to handle @bio through plugging. However, we may already have a + * zone write plug for the target zone if that zone was previously + * partially written using regular writes. In such case, we risk leaving + * the plug in the disk hash table if the zone is fully written using + * zone append operations. Avoid this by removing the zone write plug. + */ + zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); + if (likely(!zwplug)) + return; + + spin_lock_irqsave(&zwplug->lock, flags); + + /* + * We are about to remove the zone write plug. But if the user + * (mistakenly) has issued regular writes together with native zone + * append, we must aborts the writes as otherwise the plugged BIOs would + * not be executed by the plug BIO work as disk_get_zone_wplug() will + * return NULL after the plug is removed. Aborting the plugged write + * BIOs is consistent with the fact that these writes will most likely + * fail anyway as there is no ordering guarantees between zone append + * operations and regular write operations. + */ + if (!bio_list_empty(&zwplug->bio_list)) { + pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n", + disk->disk_name, zwplug->zone_no); + disk_zone_wplug_abort(zwplug); + } + disk_remove_zone_wplug(disk, zwplug); + spin_unlock_irqrestore(&zwplug->lock, flags); + + disk_put_zone_wplug(zwplug); +} + /** * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging * @bio: The BIO being submitted @@ -1096,8 +1153,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) */ switch (bio_op(bio)) { case REQ_OP_ZONE_APPEND: - if (!bdev_emulates_zone_append(bdev)) + if (!bdev_emulates_zone_append(bdev)) { + blk_zone_wplug_handle_native_zone_append(bio); return false; + } fallthrough; case REQ_OP_WRITE: case REQ_OP_WRITE_ZEROES: @@ -1284,6 +1343,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk, { unsigned int i; + atomic_set(&disk->nr_zone_wplugs, 0); disk->zone_wplugs_hash_bits = min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS); @@ -1338,6 +1398,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk) } } + WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs)); kfree(disk->zone_wplugs_hash); disk->zone_wplugs_hash = NULL; disk->zone_wplugs_hash_bits = 0; @@ -1550,11 +1611,12 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, } /* - * We need to track the write pointer of all zones that are not - * empty nor full. So make sure we have a zone write plug for - * such zone if the device has a zone write plug hash table. + * If the device needs zone append emulation, we need to track the + * write pointer of all zones that are not empty nor full. So make sure + * we have a zone write plug for such zone if the device has a zone + * write plug hash table. */ - if (!disk->zone_wplugs_hash) + if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash) return 0; disk_zone_wplug_sync_wp_offset(disk, zone); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 58ff5aca83b67..d37751789bf58 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,10 +196,11 @@ struct gendisk { unsigned int zone_capacity; unsigned int last_zone_capacity; unsigned long __rcu *conv_zones_bitmap; - unsigned int zone_wplugs_hash_bits; - spinlock_t zone_wplugs_lock; + unsigned int zone_wplugs_hash_bits; + atomic_t nr_zone_wplugs; + spinlock_t zone_wplugs_lock; struct mempool_s *zone_wplugs_pool; - struct hlist_head *zone_wplugs_hash; + struct hlist_head *zone_wplugs_hash; struct workqueue_struct *zone_wplugs_wq; #endif /* CONFIG_BLK_DEV_ZONED */ -- GitLab From 79990cf5e7aded76d0c092c9f5ed31eb1c75e02c Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Mon, 24 Feb 2025 11:06:41 -0800 Subject: [PATCH 1058/1585] ice: Fix deinitializing VF in error path If ice_ena_vfs() fails after calling ice_create_vf_entries(), it frees all VFs without removing them from snapshot PF-VF mailbox list, leading to list corruption. Reproducer: devlink dev eswitch set $PF1_PCI mode switchdev ip l s $PF1 up ip l s $PF1 promisc on sleep 1 echo 1 > /sys/class/net/$PF1/device/sriov_numvfs sleep 1 echo 1 > /sys/class/net/$PF1/device/sriov_numvfs Trace (minimized): list_add corruption. next->prev should be prev (ffff8882e241c6f0), but was 0000000000000000. (next=ffff888455da1330). kernel BUG at lib/list_debug.c:29! RIP: 0010:__list_add_valid_or_report+0xa6/0x100 ice_mbx_init_vf_info+0xa7/0x180 [ice] ice_initialize_vf_entry+0x1fa/0x250 [ice] ice_sriov_configure+0x8d7/0x1520 [ice] ? __percpu_ref_switch_mode+0x1b1/0x5d0 ? __pfx_ice_sriov_configure+0x10/0x10 [ice] Sometimes a KASAN report can be seen instead with a similar stack trace: BUG: KASAN: use-after-free in __list_add_valid_or_report+0xf1/0x100 VFs are added to this list in ice_mbx_init_vf_info(), but only removed in ice_free_vfs(). Move the removing to ice_free_vf_entries(), which is also being called in other places where VFs are being removed (including ice_free_vfs() itself). Fixes: 8cd8a6b17d27 ("ice: move VF overflow message count into struct ice_mbx_vf_info") Reported-by: Sujai Buvaneswaran Closes: https://lore.kernel.org/intel-wired-lan/PH0PR11MB50138B635F2E5CEB7075325D961F2@PH0PR11MB5013.namprd11.prod.outlook.com Reviewed-by: Martyna Szapar-Mudlaw Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250224190647.3601930-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_sriov.c | 5 +---- drivers/net/ethernet/intel/ice/ice_vf_lib.c | 8 ++++++++ drivers/net/ethernet/intel/ice/ice_vf_lib_private.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index b83f99c01d91b..8aabf7749aa5e 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -36,6 +36,7 @@ static void ice_free_vf_entries(struct ice_pf *pf) hash_for_each_safe(vfs->table, bkt, tmp, vf, entry) { hash_del_rcu(&vf->entry); + ice_deinitialize_vf_entry(vf); ice_put_vf(vf); } } @@ -193,10 +194,6 @@ void ice_free_vfs(struct ice_pf *pf) wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); } - /* clear malicious info since the VF is getting released */ - if (!ice_is_feature_supported(pf, ICE_F_MBX_LIMIT)) - list_del(&vf->mbx_info.list_entry); - mutex_unlock(&vf->cfg_lock); } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index c7c0c2f50c265..815ad0bfe8326 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -1036,6 +1036,14 @@ void ice_initialize_vf_entry(struct ice_vf *vf) mutex_init(&vf->cfg_lock); } +void ice_deinitialize_vf_entry(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + + if (!ice_is_feature_supported(pf, ICE_F_MBX_LIMIT)) + list_del(&vf->mbx_info.list_entry); +} + /** * ice_dis_vf_qs - Disable the VF queues * @vf: pointer to the VF structure diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h index 0c7e77c0a09fa..5392b04049862 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h @@ -24,6 +24,7 @@ #endif void ice_initialize_vf_entry(struct ice_vf *vf); +void ice_deinitialize_vf_entry(struct ice_vf *vf); void ice_dis_vf_qs(struct ice_vf *vf); int ice_check_vf_init(struct ice_vf *vf); enum virtchnl_status_code ice_err_to_virt_err(int err); -- GitLab From 5c07be96d8b3f8447e980f29b967bf2e1d7ac732 Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Mon, 24 Feb 2025 11:06:42 -0800 Subject: [PATCH 1059/1585] ice: Avoid setting default Rx VSI twice in switchdev setup As part of switchdev environment setup, uplink VSI is configured as default for both Tx and Rx. Default Rx VSI is also used by promiscuous mode. If promisc mode is enabled and an attempt to enter switchdev mode is made, the setup will fail because Rx VSI is already configured as default (rule exists). Reproducer: devlink dev eswitch set $PF1_PCI mode switchdev ip l s $PF1 up ip l s $PF1 promisc on echo 1 > /sys/class/net/$PF1/device/sriov_numvfs In switchdev setup, use ice_set_dflt_vsi() instead of plain ice_cfg_dflt_vsi(), which avoids repeating setting default VSI for Rx if it's already configured. Fixes: 50d62022f455 ("ice: default Tx rule instead of to queue") Reported-by: Sujai Buvaneswaran Closes: https://lore.kernel.org/intel-wired-lan/PH0PR11MB50138B635F2E5CEB7075325D961F2@PH0PR11MB5013.namprd11.prod.outlook.com Reviewed-by: Martyna Szapar-Mudlaw Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250224190647.3601930-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index fb527434b58b1..d649c197cf673 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -38,8 +38,7 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) if (ice_vsi_add_vlan_zero(uplink_vsi)) goto err_vlan_zero; - if (ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, true, - ICE_FLTR_RX)) + if (ice_set_dflt_vsi(uplink_vsi)) goto err_def_rx; if (ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, true, -- GitLab From c6124f6fd3ca37d53ec5cbf62f9d9130ef439eca Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 24 Feb 2025 11:06:44 -0800 Subject: [PATCH 1060/1585] iavf: fix circular lock dependency with netdev_lock We have recently seen reports of lockdep circular lock dependency warnings when loading the iAVF driver: [ 1504.790308] ====================================================== [ 1504.790309] WARNING: possible circular locking dependency detected [ 1504.790310] 6.13.0 #net_next_rt.c2933b2befe2.el9 Not tainted [ 1504.790311] ------------------------------------------------------ [ 1504.790312] kworker/u128:0/13566 is trying to acquire lock: [ 1504.790313] ffff97d0e4738f18 (&dev->lock){+.+.}-{4:4}, at: register_netdevice+0x52c/0x710 [ 1504.790320] [ 1504.790320] but task is already holding lock: [ 1504.790321] ffff97d0e47392e8 (&adapter->crit_lock){+.+.}-{4:4}, at: iavf_finish_config+0x37/0x240 [iavf] [ 1504.790330] [ 1504.790330] which lock already depends on the new lock. [ 1504.790330] [ 1504.790330] [ 1504.790330] the existing dependency chain (in reverse order) is: [ 1504.790331] [ 1504.790331] -> #1 (&adapter->crit_lock){+.+.}-{4:4}: [ 1504.790333] __lock_acquire+0x52d/0xbb0 [ 1504.790337] lock_acquire+0xd9/0x330 [ 1504.790338] mutex_lock_nested+0x4b/0xb0 [ 1504.790341] iavf_finish_config+0x37/0x240 [iavf] [ 1504.790347] process_one_work+0x248/0x6d0 [ 1504.790350] worker_thread+0x18d/0x330 [ 1504.790352] kthread+0x10e/0x250 [ 1504.790354] ret_from_fork+0x30/0x50 [ 1504.790357] ret_from_fork_asm+0x1a/0x30 [ 1504.790361] [ 1504.790361] -> #0 (&dev->lock){+.+.}-{4:4}: [ 1504.790364] check_prev_add+0xf1/0xce0 [ 1504.790366] validate_chain+0x46a/0x570 [ 1504.790368] __lock_acquire+0x52d/0xbb0 [ 1504.790370] lock_acquire+0xd9/0x330 [ 1504.790371] mutex_lock_nested+0x4b/0xb0 [ 1504.790372] register_netdevice+0x52c/0x710 [ 1504.790374] iavf_finish_config+0xfa/0x240 [iavf] [ 1504.790379] process_one_work+0x248/0x6d0 [ 1504.790381] worker_thread+0x18d/0x330 [ 1504.790383] kthread+0x10e/0x250 [ 1504.790385] ret_from_fork+0x30/0x50 [ 1504.790387] ret_from_fork_asm+0x1a/0x30 [ 1504.790389] [ 1504.790389] other info that might help us debug this: [ 1504.790389] [ 1504.790389] Possible unsafe locking scenario: [ 1504.790389] [ 1504.790390] CPU0 CPU1 [ 1504.790391] ---- ---- [ 1504.790391] lock(&adapter->crit_lock); [ 1504.790393] lock(&dev->lock); [ 1504.790394] lock(&adapter->crit_lock); [ 1504.790395] lock(&dev->lock); [ 1504.790397] [ 1504.790397] *** DEADLOCK *** This appears to be caused by the change in commit 5fda3f35349b ("net: make netdev_lock() protect netdev->reg_state"), which added a netdev_lock() in register_netdevice. The iAVF driver calls register_netdevice() from iavf_finish_config(), as a final stage of its state machine post-probe. It currently takes the RTNL lock, then the netdev lock, and then the device critical lock. This pattern is used throughout the driver. Thus there is a strong dependency that the crit_lock should not be acquired before the net device lock. The change to register_netdevice creates an ABBA lock order violation because the iAVF driver is holding the crit_lock while calling register_netdevice, which then takes the netdev_lock. It seems likely that future refactors could result in netdev APIs which hold the netdev_lock while calling into the driver. This means that we should not re-order the locks so that netdev_lock is acquired after the device private crit_lock. Instead, notice that we already release the netdev_lock prior to calling the register_netdevice. This flow only happens during the early driver initialization as we transition through the __IAVF_STARTUP, __IAVF_INIT_VERSION_CHECK, __IAVF_INIT_GET_RESOURCES, etc. Analyzing the places where we take crit_lock in the driver there are two sources: a) several of the work queue tasks including adminq_task, watchdog_task, reset_task, and the finish_config task. b) various callbacks which ultimately stem back to .ndo operations or ethtool operations. The latter cannot be triggered until after the netdevice registration is completed successfully. The iAVF driver uses alloc_ordered_workqueue, which is an unbound workqueue that has a max limit of 1, and thus guarantees that only a single work item on the queue is executing at any given time, so none of the other work threads could be executing due to the ordered workqueue guarantees. The iavf_finish_config() function also does not do anything else after register_netdevice, unless it fails. It seems unlikely that the driver private crit_lock is protecting anything that register_netdevice() itself touches. Thus, to fix this ABBA lock violation, lets simply release the adapter->crit_lock as well as netdev_lock prior to calling register_netdevice(). We do still keep holding the RTNL lock as required by the function. If we do fail to register the netdevice, then we re-acquire the adapter critical lock to finish the transition back to __IAVF_INIT_CONFIG_ADAPTER. This ensures every call where both netdev_lock and the adapter->crit_lock are acquired under the same ordering. Fixes: afc664987ab3 ("eth: iavf: extend the netdev_lock usage") Signed-off-by: Jacob Keller Tested-by: Przemek Kitszel Reviewed-by: Przemek Kitszel Reviewed-by: Jakub Kicinski Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250224190647.3601930-5-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf_main.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 852e5b62f0a5d..6faa62bced3a2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1983,7 +1983,7 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool runni static void iavf_finish_config(struct work_struct *work) { struct iavf_adapter *adapter; - bool netdev_released = false; + bool locks_released = false; int pairs, err; adapter = container_of(work, struct iavf_adapter, finish_config); @@ -2012,19 +2012,22 @@ static void iavf_finish_config(struct work_struct *work) netif_set_real_num_tx_queues(adapter->netdev, pairs); if (adapter->netdev->reg_state != NETREG_REGISTERED) { + mutex_unlock(&adapter->crit_lock); netdev_unlock(adapter->netdev); - netdev_released = true; + locks_released = true; err = register_netdevice(adapter->netdev); if (err) { dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n", err); /* go back and try again.*/ + mutex_lock(&adapter->crit_lock); iavf_free_rss(adapter); iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); + mutex_unlock(&adapter->crit_lock); goto out; } } @@ -2040,9 +2043,10 @@ static void iavf_finish_config(struct work_struct *work) } out: - mutex_unlock(&adapter->crit_lock); - if (!netdev_released) + if (!locks_released) { + mutex_unlock(&adapter->crit_lock); netdev_unlock(adapter->netdev); + } rtnl_unlock(); } -- GitLab From b1e44b4aecb551727a368df5b85c535f2ce932ea Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Mon, 24 Feb 2025 11:06:45 -0800 Subject: [PATCH 1061/1585] ixgbe: fix media cage present detection for E610 device The commit 23c0e5a16bcc ("ixgbe: Add link management support for E610 device") introduced incorrect checking of media cage presence for E610 device. Fix it. Fixes: 23c0e5a16bcc ("ixgbe: Add link management support for E610 device") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/e7d73b32-f12a-49d1-8b60-1ef83359ec13@stanley.mountain/ Reviewed-by: Michal Swiatkowski Reviewed-by: Przemek Kitszel Signed-off-by: Piotr Kwapulinski Reviewed-by: Simon Horman Tested-by: Bharath R Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250224190647.3601930-6-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index 683c668672d65..cb07ecd8937d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -1122,7 +1122,7 @@ static bool ixgbe_is_media_cage_present(struct ixgbe_hw *hw) * returns error (ENOENT), then no cage present. If no cage present then * connection type is backplane or BASE-T. */ - return ixgbe_aci_get_netlist_node(hw, cmd, NULL, NULL); + return !ixgbe_aci_get_netlist_node(hw, cmd, NULL, NULL); } /** -- GitLab From 39ab773e4c120f7f98d759415ccc2aca706bbc10 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 24 Feb 2025 19:12:44 +0800 Subject: [PATCH 1062/1585] net: enetc: fix the off-by-one issue in enetc_map_tx_buffs() When a DMA mapping error occurs while processing skb frags, it will free one more tx_swbd than expected, so fix this off-by-one issue. Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Cc: stable@vger.kernel.org Suggested-by: Vladimir Oltean Suggested-by: Michal Swiatkowski Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20250224111251.1061098-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 26 ++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 6a6fc819dfdee..55ad31a5073e7 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -167,6 +167,24 @@ static bool enetc_skb_is_tcp(struct sk_buff *skb) return skb->csum_offset == offsetof(struct tcphdr, check); } +/** + * enetc_unwind_tx_frame() - Unwind the DMA mappings of a multi-buffer Tx frame + * @tx_ring: Pointer to the Tx ring on which the buffer descriptors are located + * @count: Number of Tx buffer descriptors which need to be unmapped + * @i: Index of the last successfully mapped Tx buffer descriptor + */ +static void enetc_unwind_tx_frame(struct enetc_bdr *tx_ring, int count, int i) +{ + while (count--) { + struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i]; + + enetc_free_tx_frame(tx_ring, tx_swbd); + if (i == 0) + i = tx_ring->bd_count; + i--; + } +} + static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) { bool do_vlan, do_onestep_tstamp = false, do_twostep_tstamp = false; @@ -372,13 +390,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) dma_err: dev_err(tx_ring->dev, "DMA map error"); - do { - tx_swbd = &tx_ring->tx_swbd[i]; - enetc_free_tx_frame(tx_ring, tx_swbd); - if (i == 0) - i = tx_ring->bd_count; - i--; - } while (count--); + enetc_unwind_tx_frame(tx_ring, count, i); return 0; } -- GitLab From da291996b16ebd10626d4b20288327b743aff110 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 24 Feb 2025 19:12:45 +0800 Subject: [PATCH 1063/1585] net: enetc: keep track of correct Tx BD count in enetc_map_tx_tso_buffs() When creating a TSO header, if the skb is VLAN tagged, the extended BD will be used and the 'count' should be increased by 2 instead of 1. Otherwise, when an error occurs, less tx_swbd will be freed than the actual number. Fixes: fb8629e2cbfc ("net: enetc: add support for software TSO") Cc: stable@vger.kernel.org Suggested-by: Vladimir Oltean Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20250224111251.1061098-3-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 55ad31a5073e7..174db9e2ce813 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -395,14 +395,15 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) return 0; } -static void enetc_map_tx_tso_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb, - struct enetc_tx_swbd *tx_swbd, - union enetc_tx_bd *txbd, int *i, int hdr_len, - int data_len) +static int enetc_map_tx_tso_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb, + struct enetc_tx_swbd *tx_swbd, + union enetc_tx_bd *txbd, int *i, int hdr_len, + int data_len) { union enetc_tx_bd txbd_tmp; u8 flags = 0, e_flags = 0; dma_addr_t addr; + int count = 1; enetc_clear_tx_bd(&txbd_tmp); addr = tx_ring->tso_headers_dma + *i * TSO_HEADER_SIZE; @@ -445,7 +446,10 @@ static void enetc_map_tx_tso_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb, /* Write the BD */ txbd_tmp.ext.e_flags = e_flags; *txbd = txbd_tmp; + count++; } + + return count; } static int enetc_map_tx_tso_data(struct enetc_bdr *tx_ring, struct sk_buff *skb, @@ -802,9 +806,9 @@ static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb /* compute the csum over the L4 header */ csum = enetc_tso_hdr_csum(&tso, skb, hdr, hdr_len, &pos); - enetc_map_tx_tso_hdr(tx_ring, skb, tx_swbd, txbd, &i, hdr_len, data_len); + count += enetc_map_tx_tso_hdr(tx_ring, skb, tx_swbd, txbd, + &i, hdr_len, data_len); bd_data_num = 0; - count++; while (data_len > 0) { int size; -- GitLab From 432a2cb3ee97a7c6ea578888fe81baad035b9307 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 24 Feb 2025 19:12:46 +0800 Subject: [PATCH 1064/1585] net: enetc: correct the xdp_tx statistics The 'xdp_tx' is used to count the number of XDP_TX frames sent, not the number of Tx BDs. Fixes: 7ed2bc80074e ("net: enetc: add support for XDP_TX") Cc: stable@vger.kernel.org Signed-off-by: Wei Fang Reviewed-by: Ioana Ciornei Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250224111251.1061098-4-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 174db9e2ce813..3cb9ebb13b19b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1917,7 +1917,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, enetc_xdp_drop(rx_ring, orig_i, i); tx_ring->stats.xdp_tx_drops++; } else { - tx_ring->stats.xdp_tx += xdp_tx_bd_cnt; + tx_ring->stats.xdp_tx++; rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt; xdp_tx_frm_cnt++; /* The XDP_TX enqueue was successful, so we -- GitLab From a562d0c4a893eae3ea51d512c4d90ab858a6b7ec Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 24 Feb 2025 19:12:47 +0800 Subject: [PATCH 1065/1585] net: enetc: VFs do not support HWTSTAMP_TX_ONESTEP_SYNC Actually ENETC VFs do not support HWTSTAMP_TX_ONESTEP_SYNC because only ENETC PF can access PMa_SINGLE_STEP registers. And there will be a crash if VFs are used to test one-step timestamp, the crash log as follows. [ 129.110909] Unable to handle kernel paging request at virtual address 00000000000080c0 [ 129.287769] Call trace: [ 129.290219] enetc_port_mac_wr+0x30/0xec (P) [ 129.294504] enetc_start_xmit+0xda4/0xe74 [ 129.298525] enetc_xmit+0x70/0xec [ 129.301848] dev_hard_start_xmit+0x98/0x118 Fixes: 41514737ecaa ("enetc: add get_ts_info interface for ethtool") Cc: stable@vger.kernel.org Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://patch.msgid.link/20250224111251.1061098-5-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 3 +++ drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3cb9ebb13b19b..e946d86527904 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -3244,6 +3244,9 @@ static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) new_offloads |= ENETC_F_TX_TSTAMP; break; case HWTSTAMP_TX_ONESTEP_SYNC: + if (!enetc_si_is_pf(priv->si)) + return -EOPNOTSUPP; + new_offloads &= ~ENETC_F_TX_TSTAMP_MASK; new_offloads |= ENETC_F_TX_ONESTEP_SYNC_TSTAMP; break; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index bf34b5bb1e358..ece3ae28ba827 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -832,6 +832,7 @@ static int enetc_set_coalesce(struct net_device *ndev, static int enetc_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { + struct enetc_ndev_priv *priv = netdev_priv(ndev); int *phc_idx; phc_idx = symbol_get(enetc_phc_index); @@ -852,8 +853,10 @@ static int enetc_get_ts_info(struct net_device *ndev, SOF_TIMESTAMPING_TX_SOFTWARE; info->tx_types = (1 << HWTSTAMP_TX_OFF) | - (1 << HWTSTAMP_TX_ON) | - (1 << HWTSTAMP_TX_ONESTEP_SYNC); + (1 << HWTSTAMP_TX_ON); + + if (enetc_si_is_pf(priv->si)) + info->tx_types |= (1 << HWTSTAMP_TX_ONESTEP_SYNC); info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_ALL); -- GitLab From bbcbc906ab7b5834c1219cd17a38d78dba904aa0 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 24 Feb 2025 19:12:48 +0800 Subject: [PATCH 1066/1585] net: enetc: update UDP checksum when updating originTimestamp field There is an issue with one-step timestamp based on UDP/IP. The peer will discard the sync packet because of the wrong UDP checksum. For ENETC v1, the software needs to update the UDP checksum when updating the originTimestamp field, so that the hardware can correctly update the UDP checksum when updating the correction field. Otherwise, the UDP checksum in the sync packet will be wrong. Fixes: 7294380c5211 ("enetc: support PTP Sync packet one-step timestamping") Cc: stable@vger.kernel.org Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://patch.msgid.link/20250224111251.1061098-6-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 41 ++++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index e946d86527904..9801c51b6a590 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -297,9 +297,11 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) } if (do_onestep_tstamp) { - u32 lo, hi, val; - u64 sec, nsec; + __be32 new_sec_l, new_nsec; + u32 lo, hi, nsec, val; + __be16 new_sec_h; u8 *data; + u64 sec; lo = enetc_rd_hot(hw, ENETC_SICTR0); hi = enetc_rd_hot(hw, ENETC_SICTR1); @@ -313,13 +315,38 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) /* Update originTimestamp field of Sync packet * - 48 bits seconds field * - 32 bits nanseconds field + * + * In addition, the UDP checksum needs to be updated + * by software after updating originTimestamp field, + * otherwise the hardware will calculate the wrong + * checksum when updating the correction field and + * update it to the packet. */ data = skb_mac_header(skb); - *(__be16 *)(data + offset2) = - htons((sec >> 32) & 0xffff); - *(__be32 *)(data + offset2 + 2) = - htonl(sec & 0xffffffff); - *(__be32 *)(data + offset2 + 6) = htonl(nsec); + new_sec_h = htons((sec >> 32) & 0xffff); + new_sec_l = htonl(sec & 0xffffffff); + new_nsec = htonl(nsec); + if (udp) { + struct udphdr *uh = udp_hdr(skb); + __be32 old_sec_l, old_nsec; + __be16 old_sec_h; + + old_sec_h = *(__be16 *)(data + offset2); + inet_proto_csum_replace2(&uh->check, skb, old_sec_h, + new_sec_h, false); + + old_sec_l = *(__be32 *)(data + offset2 + 2); + inet_proto_csum_replace4(&uh->check, skb, old_sec_l, + new_sec_l, false); + + old_nsec = *(__be32 *)(data + offset2 + 6); + inet_proto_csum_replace4(&uh->check, skb, old_nsec, + new_nsec, false); + } + + *(__be16 *)(data + offset2) = new_sec_h; + *(__be32 *)(data + offset2 + 2) = new_sec_l; + *(__be32 *)(data + offset2 + 6) = new_nsec; /* Configure single-step register */ val = ENETC_PM0_SINGLE_STEP_EN; -- GitLab From 8e43decdfbb477dd7800e3902d2d2f105d22ef5f Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 24 Feb 2025 19:12:49 +0800 Subject: [PATCH 1067/1585] net: enetc: add missing enetc4_link_deinit() The enetc4_link_init() is called when the PF driver probes to create phylink and MDIO bus, but we forgot to call enetc4_link_deinit() to free the phylink and MDIO bus when the driver was unbound. so add missing enetc4_link_deinit() to enetc4_pf_netdev_destroy(). Fixes: 99100d0d9922 ("net: enetc: add preliminary support for i.MX95 ENETC PF") Cc: stable@vger.kernel.org Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250224111251.1061098-7-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc4_pf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index fc41078c4f5da..48861c8b499a0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -684,6 +684,7 @@ static void enetc4_pf_netdev_destroy(struct enetc_si *si) struct net_device *ndev = si->ndev; unregister_netdev(ndev); + enetc4_link_deinit(priv); enetc_free_msix(priv); free_netdev(ndev); } -- GitLab From 119049b66b883c7e7e575a0b69dc6e3d211662cc Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 24 Feb 2025 19:12:50 +0800 Subject: [PATCH 1068/1585] net: enetc: remove the mm_lock from the ENETC v4 driver Currently, the ENETC v4 driver has not added the MAC merge layer support in the upstream, so the mm_lock is not initialized and used, so remove the mm_lock from the driver. Fixes: 99100d0d9922 ("net: enetc: add preliminary support for i.MX95 ENETC PF") Cc: stable@vger.kernel.org Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250224111251.1061098-8-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc4_pf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 48861c8b499a0..73ac8c6afb3ad 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -672,7 +672,6 @@ static int enetc4_pf_netdev_create(struct enetc_si *si) err_alloc_msix: err_config_si: err_clk_get: - mutex_destroy(&priv->mm_lock); free_netdev(ndev); return err; -- GitLab From 249df695c3ffe8c8d36d46c2580ce72410976f96 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 24 Feb 2025 19:12:51 +0800 Subject: [PATCH 1069/1585] net: enetc: fix the off-by-one issue in enetc_map_tx_tso_buffs() There is an off-by-one issue for the err_chained_bd path, it will free one more tx_swbd than expected. But there is no such issue for the err_map_data path. To fix this off-by-one issue and make the two error handling consistent, the increment of 'i' and 'count' remain in sync and enetc_unwind_tx_frame() is called for error handling. Fixes: fb8629e2cbfc ("net: enetc: add support for software TSO") Cc: stable@vger.kernel.org Suggested-by: Vladimir Oltean Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20250224111251.1061098-9-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 9801c51b6a590..2106861463e40 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -859,8 +859,13 @@ static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb err = enetc_map_tx_tso_data(tx_ring, skb, tx_swbd, txbd, tso.data, size, size == data_len); - if (err) + if (err) { + if (i == 0) + i = tx_ring->bd_count; + i--; + goto err_map_data; + } data_len -= size; count++; @@ -889,13 +894,7 @@ static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb dev_err(tx_ring->dev, "DMA map error"); err_chained_bd: - do { - tx_swbd = &tx_ring->tx_swbd[i]; - enetc_free_tx_frame(tx_ring, tx_swbd); - if (i == 0) - i = tx_ring->bd_count; - i--; - } while (count--); + enetc_unwind_tx_frame(tx_ring, count, i); return 0; } -- GitLab From 5a4041f2c47247575a6c2e53ce14f7b0ac946c33 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 19 Feb 2025 16:02:11 +0900 Subject: [PATCH 1070/1585] btrfs: zoned: fix extent range end unlock in cow_file_range() Running generic/751 on the for-next branch often results in a hang like below. They are both stack by locking an extent. This suggests someone forget to unlock an extent. INFO: task kworker/u128:1:12 blocked for more than 323 seconds. Not tainted 6.13.0-BTRFS-ZNS+ #503 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u128:1 state:D stack:0 pid:12 tgid:12 ppid:2 flags:0x00004000 Workqueue: btrfs-fixup btrfs_work_helper [btrfs] Call Trace: __schedule+0x534/0xdd0 schedule+0x39/0x140 __lock_extent+0x31b/0x380 [btrfs] ? __pfx_autoremove_wake_function+0x10/0x10 btrfs_writepage_fixup_worker+0xf1/0x3a0 [btrfs] btrfs_work_helper+0xff/0x480 [btrfs] ? lock_release+0x178/0x2c0 process_one_work+0x1ee/0x570 ? srso_return_thunk+0x5/0x5f worker_thread+0x1d1/0x3b0 ? __pfx_worker_thread+0x10/0x10 kthread+0x10b/0x230 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x30/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 INFO: task kworker/u134:0:184 blocked for more than 323 seconds. Not tainted 6.13.0-BTRFS-ZNS+ #503 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u134:0 state:D stack:0 pid:184 tgid:184 ppid:2 flags:0x00004000 Workqueue: writeback wb_workfn (flush-btrfs-4) Call Trace: __schedule+0x534/0xdd0 schedule+0x39/0x140 __lock_extent+0x31b/0x380 [btrfs] ? __pfx_autoremove_wake_function+0x10/0x10 find_lock_delalloc_range+0xdb/0x260 [btrfs] writepage_delalloc+0x12f/0x500 [btrfs] ? srso_return_thunk+0x5/0x5f extent_write_cache_pages+0x232/0x840 [btrfs] btrfs_writepages+0x72/0x130 [btrfs] do_writepages+0xe7/0x260 ? srso_return_thunk+0x5/0x5f ? lock_acquire+0xd2/0x300 ? srso_return_thunk+0x5/0x5f ? find_held_lock+0x2b/0x80 ? wbc_attach_and_unlock_inode.part.0+0x102/0x250 ? wbc_attach_and_unlock_inode.part.0+0x102/0x250 __writeback_single_inode+0x5c/0x4b0 writeback_sb_inodes+0x22d/0x550 __writeback_inodes_wb+0x4c/0xe0 wb_writeback+0x2f6/0x3f0 wb_workfn+0x32a/0x510 process_one_work+0x1ee/0x570 ? srso_return_thunk+0x5/0x5f worker_thread+0x1d1/0x3b0 ? __pfx_worker_thread+0x10/0x10 kthread+0x10b/0x230 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x30/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 This happens because we have another success path for the zoned mode. When there is no active zone available, btrfs_reserve_extent() returns -EAGAIN. In this case, we have two reactions. (1) If the given range is never allocated, we can only wait for someone to finish a zone, so wait on BTRFS_FS_NEED_ZONE_FINISH bit and retry afterward. (2) Or, if some allocations are already done, we must bail out and let the caller to send IOs for the allocation. This is because these IOs may be necessary to finish a zone. The commit 06f364284794 ("btrfs: do proper folio cleanup when cow_file_range() failed") moved the unlock code from the inside of the loop to the outside. So, previously, the allocated extents are unlocked just after the allocation and so before returning from the function. However, they are no longer unlocked on the case (2) above. That caused the hang issue. Fix the issue by modifying the 'end' to the end of the allocated range. Then, we can exit the loop and the same unlock code can properly handle the case. Reported-by: Shin'ichiro Kawasaki Tested-by: Johannes Thumshirn Fixes: 06f364284794 ("btrfs: do proper folio cleanup when cow_file_range() failed") CC: stable@vger.kernel.org Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/inode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fe2c810335ff0..69e90ed33cfe5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1382,8 +1382,13 @@ static noinline int cow_file_range(struct btrfs_inode *inode, continue; } if (done_offset) { - *done_offset = start - 1; - return 0; + /* + * Move @end to the end of the processed range, + * and exit the loop to unlock the processed extents. + */ + end = start - 1; + ret = 0; + break; } ret = -ENOSPC; } -- GitLab From 2df2c6ed89600a02e1c7a581a6a55e53c38ce0f5 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 25 Feb 2025 20:26:14 +0100 Subject: [PATCH 1071/1585] btrfs: replace deprecated strncpy() with strscpy() strncpy() is deprecated for NUL-terminated destination buffers. Use strscpy() instead and don't zero-initialize the param array. Link: https://github.com/KSPP/linux/issues/90 Cc: linux-hardening@vger.kernel.org Signed-off-by: Thorsten Blum Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 53b846d99ecea..14f53f7575553 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1330,13 +1330,13 @@ MODULE_PARM_DESC(read_policy, int btrfs_read_policy_to_enum(const char *str, s64 *value_ret) { - char param[32] = { 0 }; + char param[32]; char __maybe_unused *value_str; if (!str || strlen(str) == 0) return 0; - strncpy(param, str, sizeof(param) - 1); + strscpy(param, str); #ifdef CONFIG_BTRFS_EXPERIMENTAL /* Separate value from input in policy:value format. */ -- GitLab From 8d52da23b6c68a0f6bad83959ebb61a2cf623c4e Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 24 Feb 2025 17:00:47 +0800 Subject: [PATCH 1072/1585] tcp: Defer ts_recent changes until req is owned Recently a bug was discovered where the server had entered TCP_ESTABLISHED state, but the upper layers were not notified. The same 5-tuple packet may be processed by different CPUSs, so two CPUs may receive different ack packets at the same time when the state is TCP_NEW_SYN_RECV. In that case, req->ts_recent in tcp_check_req may be changed concurrently, which will probably cause the newsk's ts_recent to be incorrectly large. So that tcp_validate_incoming will fail. At this point, newsk will not be able to enter the TCP_ESTABLISHED. cpu1 cpu2 tcp_check_req tcp_check_req req->ts_recent = rcv_tsval = t1 req->ts_recent = rcv_tsval = t2 syn_recv_sock tcp_sk(child)->rx_opt.ts_recent = req->ts_recent = t2 // t1 < t2 tcp_child_process tcp_rcv_state_process tcp_validate_incoming tcp_paws_check if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) // t2 - t1 > paws_win, failed tcp_v4_do_rcv tcp_rcv_state_process // TCP_ESTABLISHED The cpu2's skb or a newly received skb will call tcp_v4_do_rcv to get the newsk into the TCP_ESTABLISHED state, but at this point it is no longer possible to notify the upper layer application. A notification mechanism could be added here, but the fix is more complex, so the current fix is used. In tcp_check_req, req->ts_recent is used to assign a value to tcp_sk(child)->rx_opt.ts_recent, so removing the change in req->ts_recent and changing tcp_sk(child)->rx_opt.ts_recent directly after owning the req fixes this bug. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Wang Hai Reviewed-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b089b08e96178..dfdb7a4608a85 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -815,12 +815,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* In sequence, PAWS is OK. */ - /* TODO: We probably should defer ts_recent change once - * we take ownership of @req. - */ - if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) - WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval); - if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { /* Truncate SYN, it is out of window starting at tcp_rsk(req)->rcv_isn + 1. */ @@ -869,6 +863,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!child) goto listen_overflow; + if (own_req && tmp_opt.saw_tstamp && + !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) + tcp_sk(child)->rx_opt.ts_recent = tmp_opt.rcv_tsval; + if (own_req && rsk_drop_req(req)) { reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req); inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); -- GitLab From 17bcd714426386fda741a4bccd96a2870179344b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 24 Feb 2025 15:55:36 -0800 Subject: [PATCH 1073/1585] KVM: x86: Free vCPUs before freeing VM state Free vCPUs before freeing any VM state, as both SVM and VMX may access VM state when "freeing" a vCPU that is currently "in" L2, i.e. that needs to be kicked out of nested guest mode. Commit 6fcee03df6a1 ("KVM: x86: avoid loading a vCPU after .vm_destroy was called") partially fixed the issue, but for unknown reasons only moved the MMU unloading before VM destruction. Complete the change, and free all vCPU state prior to destroying VM state, as nVMX accesses even more state than nSVM. In addition to the AVIC, KVM can hit a use-after-free on MSR filters: kvm_msr_allowed+0x4c/0xd0 __kvm_set_msr+0x12d/0x1e0 kvm_set_msr+0x19/0x40 load_vmcs12_host_state+0x2d8/0x6e0 [kvm_intel] nested_vmx_vmexit+0x715/0xbd0 [kvm_intel] nested_vmx_free_vcpu+0x33/0x50 [kvm_intel] vmx_free_vcpu+0x54/0xc0 [kvm_intel] kvm_arch_vcpu_destroy+0x28/0xf0 kvm_vcpu_destroy+0x12/0x50 kvm_arch_destroy_vm+0x12c/0x1c0 kvm_put_kvm+0x263/0x3c0 kvm_vm_release+0x21/0x30 and an upcoming fix to process injectable interrupts on nested VM-Exit will access the PIC: BUG: kernel NULL pointer dereference, address: 0000000000000090 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page CPU: 23 UID: 1000 PID: 2658 Comm: kvm-nx-lpage-re RIP: 0010:kvm_cpu_has_extint+0x2f/0x60 [kvm] Call Trace: kvm_cpu_has_injectable_intr+0xe/0x60 [kvm] nested_vmx_vmexit+0x2d7/0xdf0 [kvm_intel] nested_vmx_free_vcpu+0x40/0x50 [kvm_intel] vmx_vcpu_free+0x2d/0x80 [kvm_intel] kvm_arch_vcpu_destroy+0x2d/0x130 [kvm] kvm_destroy_vcpus+0x8a/0x100 [kvm] kvm_arch_destroy_vm+0xa7/0x1d0 [kvm] kvm_destroy_vm+0x172/0x300 [kvm] kvm_vcpu_release+0x31/0x50 [kvm] Inarguably, both nSVM and nVMX need to be fixed, but punt on those cleanups for the moment. Conceptually, vCPUs should be freed before VM state. Assets like the I/O APIC and PIC _must_ be allocated before vCPUs are created, so it stands to reason that they must be freed _after_ vCPUs are destroyed. Reported-by: Aaron Lewis Closes: https://lore.kernel.org/all/20240703175618.2304869-2-aaronlewis@google.com Cc: Jim Mattson Cc: Yan Zhao Cc: Rick P Edgecombe Cc: Kai Huang Cc: Isaku Yamahata Signed-off-by: Sean Christopherson Message-ID: <20250224235542.2562848-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02159c967d29e..6fc4ddc606bd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12877,11 +12877,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->slots_lock); } kvm_unload_vcpu_mmus(kvm); + kvm_destroy_vcpus(kvm); kvm_x86_call(vm_destroy)(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); - kvm_destroy_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); -- GitLab From 982caaa1150479f022003390cd72a1941663d211 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 24 Feb 2025 15:55:37 -0800 Subject: [PATCH 1074/1585] KVM: nVMX: Process events on nested VM-Exit if injectable IRQ or NMI is pending Process pending events on nested VM-Exit if the vCPU has an injectable IRQ or NMI, as the event may have become pending while L2 was active, i.e. may not be tracked in the context of vmcs01. E.g. if L1 has passed its APIC through to L2 and an IRQ arrives while L2 is active, then KVM needs to request an IRQ window prior to running L1, otherwise delivery of the IRQ will be delayed until KVM happens to process events for some other reason. The missed failure is detected by vmx_apic_passthrough_tpr_threshold_test in KVM-Unit-Tests, but has effectively been masked due to a flaw in KVM's PIC emulation that causes KVM to make spurious KVM_REQ_EVENT requests (and apparently no one ever ran the test with split IRQ chips). Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20250224235542.2562848-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8a7af02d466e9..ed8a3cb539612 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5084,6 +5084,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, load_vmcs12_host_state(vcpu, vmcs12); + /* + * Process events if an injectable IRQ or NMI is pending, even + * if the event is blocked (RFLAGS.IF is cleared on VM-Exit). + * If an event became pending while L2 was active, KVM needs to + * either inject the event or request an IRQ/NMI window. SMIs + * don't need to be processed as SMM is mutually exclusive with + * non-root mode. INIT/SIPI don't need to be checked as INIT + * is blocked post-VMXON, and SIPIs are ignored. + */ + if (kvm_cpu_has_injectable_intr(vcpu) || vcpu->arch.nmi_pending) + kvm_make_request(KVM_REQ_EVENT, vcpu); return; } -- GitLab From 2e064e3f3282ec016d80cb7b1fadff0d8e2014ca Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 10 Feb 2025 19:23:50 +0900 Subject: [PATCH 1075/1585] drm/imagination: remove unnecessary header include path drivers/gpu/drm/imagination/ includes local headers with the double-quote form (#include "..."). Hence, the header search path addition is unneeded. Signed-off-by: Masahiro Yamada Reviewed-by: Matt Coster Link: https://patchwork.freedesktop.org/patch/msgid/20250210102352.1517115-1-masahiroy@kernel.org Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile index 9bc6a3884c223..3d9d4d40fb806 100644 --- a/drivers/gpu/drm/imagination/Makefile +++ b/drivers/gpu/drm/imagination/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only OR MIT # Copyright (c) 2023 Imagination Technologies Ltd. -subdir-ccflags-y := -I$(src) - powervr-y := \ pvr_ccb.o \ pvr_cccb.o \ -- GitLab From 130ff5c8b78e6fd05270a04985c50bce6a3de6c1 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 25 Feb 2025 15:16:12 +0100 Subject: [PATCH 1076/1585] ata: ahci: Make ahci_ignore_port() handle empty mask_port_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 8c87215dd3a2 ("ata: libahci_platform: support non-consecutive port numbers") added a skip to ahci_platform_enable_phys() for ports that are not in mask_port_map. The code in ahci_platform_get_resources(), will currently set mask_port_map for each child "port" node it finds in the device tree. However, device trees that do not have any child "port" nodes will not have mask_port_map set, and for non-device tree platforms mask_port_map will only exist as a quirk for specific PCI device + vendor IDs, or as a kernel module parameter, but will not be set by default. Therefore, the common thing is that mask_port_map is only set if you do not want to use all ports (as defined by Offset 0Ch: PI – Ports Implemented register), but instead only want to use the ports in mask_port_map. If mask_port_map is not set, all ports are available. Thus, ahci_ignore_port() must be able to handle an empty mask_port_map. Fixes: 8c87215dd3a2 ("ata: libahci_platform: support non-consecutive port numbers") Fixes: 2c202e6c4f4d ("ata: libahci_platform: Do not set mask_port_map when not needed") Fixes: c9b5be909e65 ("ahci: Introduce ahci_ignore_port() helper") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/linux-ide/10b31dd0-d0bb-4f76-9305-2195c3e17670@samsung.com/ Tested-by: Marek Szyprowski Co-developed-by: Damien Le Moal Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250225141612.942170-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/ahci.h | 8 ++++++-- drivers/ata/libahci.c | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index aea30df50c581..b2e0ef4efbdc3 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -386,8 +386,12 @@ struct ahci_host_priv { static inline bool ahci_ignore_port(struct ahci_host_priv *hpriv, unsigned int portid) { - return portid >= hpriv->nports || - !(hpriv->mask_port_map & (1 << portid)); + if (portid >= hpriv->nports) + return true; + /* mask_port_map not set means that all ports are available */ + if (!hpriv->mask_port_map) + return false; + return !(hpriv->mask_port_map & (1 << portid)); } extern int ahci_ignore_sss; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index fdfa7b2662180..e7ace4b10f15b 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -541,6 +541,7 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) hpriv->saved_port_map = port_map; } + /* mask_port_map not set means that all ports are available */ if (hpriv->mask_port_map) { dev_warn(dev, "masking port_map 0x%lx -> 0x%lx\n", port_map, -- GitLab From f2ba0cf1ca32e075617813de98c826ab55d57f11 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Tue, 25 Feb 2025 15:31:01 +0800 Subject: [PATCH 1077/1585] drm/xe/regs: remove a duplicate definition for RING_CTL_SIZE(size) Commit b79e8fd954c4 ("drm/xe: Remove dependency on intel_engine_regs.h") introduced an internal set of engine registers, however, as part of this change, it has also introduced two duplicate `define' lines for `RING_CTL_SIZE(size)'. This commit was introduced to the tree in v6.8-rc1. While this is harmless as the definitions did not change, so no compiler warning was observed. Drop this line anyway for the sake of correctness. Cc: stable@vger.kernel.org # v6.8-rc1+ Fixes: b79e8fd954c4 ("drm/xe: Remove dependency on intel_engine_regs.h") Signed-off-by: Mingcong Bai Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250225073104.865230-1-jeffbai@aosc.io Signed-off-by: Rodrigo Vivi (cherry picked from commit 6b68c4542ffecc36087a9e14db8fc990c88bb01b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index d86219dedde2a..b732c89816dff 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -53,7 +53,6 @@ #define RING_CTL(base) XE_REG((base) + 0x3c) #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ -#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_START_UDW(base) XE_REG((base) + 0x48) -- GitLab From 12c2f962fe71f390951d9242725bc7e608f55927 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 25 Feb 2025 10:27:54 +0530 Subject: [PATCH 1078/1585] drm/xe: cancel pending job timer before freeing scheduler The async call to __guc_exec_queue_fini_async frees the scheduler while a submission may time out and restart. To prevent this race condition, the pending job timer should be canceled before freeing the scheduler. V3(MattB): - Adjust position of cancel pending job - Remove gitlab issue# from commit message V2(MattB): - Cancel pending jobs before scheduler finish Fixes: a20c75dba192 ("drm/xe: Call __guc_exec_queue_fini_async direct for KERNEL exec_queues") Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250225045754.600905-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 18fbd567e75f9b97b699b2ab4f1fa76b7cf268f6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 913c74d6e2aeb..b6a2dd742ebdc 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1248,6 +1248,8 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); + /* Confirm no work left behind accessing device structures */ + cancel_delayed_work_sync(&ge->sched.base.work_tdr); release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); -- GitLab From 16fef33fdb1e2269c20697d9b61ae8022bc92665 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 24 Feb 2025 11:32:42 +0200 Subject: [PATCH 1079/1585] drm/i915/dp_mst: Fix encoder HW state readout for UHBR MST The encoder HW/SW state verification should use a SW state which stays unchanged while the encoder/output is active. The intel_dp::is_mst flag used during state computation to choose between the DP SST/MST modes can change while the output is active, if the sink gets disconnected or the MST topology is removed for another reason. A subsequent state verification using intel_dp::is_mst leads then to a mismatch if the output is disabled/re-enabled without recomputing its state. Use the encoder's active MST link count instead, which will be always non-zero for an active MST output and will be zero for SST. Fixes: 35d2e4b75649 ("drm/i915/ddi: start distinguishing 128b/132b SST and MST at state readout") Fixes: 40d489fac0e8 ("drm/i915/ddi: handle 128b/132b SST in intel_ddi_read_func_ctl()") Cc: Jani Nikula Reviewed-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20250224093242.1859583-1-imre.deak@intel.com (cherry picked from commit 0159e311772af9d6598aafe072c020687720f1d7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_ddi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 18c66992aa1d8..ff2cf3daa7a2b 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -866,7 +866,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, encoder->base.base.id, encoder->base.name); if (!mst_pipe_mask && dp128b132b_pipe_mask) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); /* * If we don't have 8b/10b MST, but have more than one @@ -878,7 +878,8 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, * we don't expect MST to have been enabled at that point, and * can assume it's SST. */ - if (hweight8(dp128b132b_pipe_mask) > 1 || intel_dp->is_mst) + if (hweight8(dp128b132b_pipe_mask) > 1 || + intel_dp_mst_encoder_active_links(dig_port)) mst_pipe_mask = dp128b132b_pipe_mask; } @@ -4151,13 +4152,13 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, } else if (ddi_mode == TRANS_DDI_MODE_SELECT_DP_MST) { intel_ddi_read_func_ctl_dp_mst(encoder, pipe_config, ddi_func_ctl); } else if (ddi_mode == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B && HAS_DP20(display)) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); /* * If this is true, we know we're being called from mst stream * encoder's ->get_config(). */ - if (intel_dp->is_mst) + if (intel_dp_mst_encoder_active_links(dig_port)) intel_ddi_read_func_ctl_dp_mst(encoder, pipe_config, ddi_func_ctl); else intel_ddi_read_func_ctl_dp_sst(encoder, pipe_config, ddi_func_ctl); -- GitLab From c6557ccf8094ce2e1142c6e49cd47f5d5e2933a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Verg=C3=A9?= Date: Wed, 26 Feb 2025 14:55:15 +0100 Subject: [PATCH 1080/1585] ALSA: hda/realtek: Fix microphone regression on ASUS N705UD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a regression introduced a few weeks ago in stable kernels 6.12.14 and 6.13.3. The internal microphone on ASUS Vivobook N705UD / X705UD laptops is broken: the microphone appears in userspace (e.g. Gnome settings) but no sound is detected. I bisected it to commit 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort"). I figured out the cause: 1. The initial pins enabled for the ALC256 driver are: cfg->inputs == { { pin=0x19, type=AUTO_PIN_MIC, is_headset_mic=1, is_headphone_mic=0, has_boost_on_pin=1 }, { pin=0x1a, type=AUTO_PIN_MIC, is_headset_mic=0, is_headphone_mic=0, has_boost_on_pin=1 } } 2. Since 2017 and commits c1732ede5e8 ("ALSA: hda/realtek - Fix headset and mic on several ASUS laptops with ALC256") and 28e8af8a163 ("ALSA: hda/realtek: Fix mic and headset jack sense on ASUS X705UD"), the quirk ALC256_FIXUP_ASUS_MIC is also applied to ASUS X705UD / N705UD laptops. This added another internal microphone on pin 0x13: cfg->inputs == { { pin=0x13, type=AUTO_PIN_MIC, is_headset_mic=0, is_headphone_mic=0, has_boost_on_pin=1 }, { pin=0x19, type=AUTO_PIN_MIC, is_headset_mic=1, is_headphone_mic=0, has_boost_on_pin=1 }, { pin=0x1a, type=AUTO_PIN_MIC, is_headset_mic=0, is_headphone_mic=0, has_boost_on_pin=1 } } I don't know what this pin 0x13 corresponds to. To the best of my knowledge, these laptops have only one internal microphone. 3. Before 2025 and commit 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort"), the sort function would let the microphone of pin 0x1a (the working one) *before* the microphone of pin 0x13 (the phantom one). 4. After this commit 3b4309546b48, the fixed sort function puts the working microphone (pin 0x1a) *after* the phantom one (pin 0x13). As a result, no sound is detected anymore. It looks like the quirk ALC256_FIXUP_ASUS_MIC is not needed anymore for ASUS Vivobook X705UD / N705UD laptops. Without it, everything works fine: - the internal microphone is detected and records actual sound, - plugging in a jack headset is detected and can record actual sound with it, - unplugging the jack headset makes the system go back to internal microphone and can record actual sound. Cc: stable@vger.kernel.org Cc: Kuan-Wei Chiu Cc: Chris Chiu Fixes: 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort") Tested-by: Adrien Vergé Signed-off-by: Adrien Vergé Link: https://patch.msgid.link/20250226135515.24219-1-adrienverge@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e5c80d4be535c..c735f630ecb5a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10657,7 +10657,6 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), - SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), -- GitLab From 99ca2c28e6b68084a0fb65585df09b9e28c3ec16 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2025 11:24:50 +0100 Subject: [PATCH 1081/1585] wifi: mac80211: fix MLE non-inheritance parsing The code is erroneously applying the non-inheritance element to the inner elements rather than the outer, which is clearly completely wrong. Fix it by finding the MLE basic element at the beginning, and then applying the non-inheritance for the outer parsing. While at it, do some general cleanups such as not allowing callers to try looking for a specific non-transmitted BSS and link at the same time. Fixes: 45ebac4f059b ("wifi: mac80211: Parse station profile from association response") Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20250221112451.b46d42f45b66.If5b95dc3c80208e0c62d8895fb6152aa54b6620b@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 1 + net/mac80211/parse.c | 127 ++++++++++++++++++++++++++++--------------- 2 files changed, 83 insertions(+), 45 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f8d52b3b0d0e4..36a9be9a66c8e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4959,6 +4959,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, parse_params.start = bss_ies->data; parse_params.len = bss_ies->len; parse_params.bss = cbss; + parse_params.link_id = -1; bss_elems = ieee802_11_parse_elems_full(&parse_params); if (!bss_elems) { ret = false; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index cd318c1c67bec..3d5d6658fe8d5 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -47,6 +47,8 @@ struct ieee80211_elems_parse { /* The EPCS Multi-Link element in the original elements */ const struct element *ml_epcs_elem; + bool multi_link_inner; + /* * scratch buffer that can be used for various element parsing related * tasks, e.g., element de-fragmentation etc. @@ -152,12 +154,11 @@ ieee80211_parse_extension_element(u32 *crc, switch (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: - if (elems_parse->ml_basic_elem) { + if (elems_parse->multi_link_inner) { elems->parse_error |= IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; break; } - elems_parse->ml_basic_elem = elem; break; case IEEE80211_ML_CONTROL_TYPE_RECONF: elems_parse->ml_reconf_elem = elem; @@ -866,21 +867,36 @@ ieee80211_mle_get_sta_prof(struct ieee80211_elems_parse *elems_parse, } } -static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, - struct ieee80211_elems_parse_params *params) +static const struct element * +ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, + struct ieee80211_elems_parse_params *params, + struct ieee80211_elems_parse_params *sub) { struct ieee802_11_elems *elems = &elems_parse->elems; struct ieee80211_mle_per_sta_profile *prof; - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .action = params->action, - .from_ap = params->from_ap, - .link_id = -1, - }; - ssize_t ml_len = elems->ml_basic_len; - const struct element *non_inherit = NULL; + const struct element *tmp; + ssize_t ml_len; const u8 *end; + if (params->mode < IEEE80211_CONN_MODE_EHT) + return NULL; + + for_each_element_extid(tmp, WLAN_EID_EXT_EHT_MULTI_LINK, + elems->ie_start, elems->total_len) { + const struct ieee80211_multi_link_elem *mle = + (void *)tmp->data + 1; + + if (!ieee80211_mle_size_ok(tmp->data + 1, tmp->datalen - 1)) + continue; + + if (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE) != + IEEE80211_ML_CONTROL_TYPE_BASIC) + continue; + + elems_parse->ml_basic_elem = tmp; + break; + } + ml_len = cfg80211_defragment_element(elems_parse->ml_basic_elem, elems->ie_start, elems->total_len, @@ -891,26 +907,26 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, WLAN_EID_FRAGMENT); if (ml_len < 0) - return; + return NULL; elems->ml_basic = (const void *)elems_parse->scratch_pos; elems->ml_basic_len = ml_len; elems_parse->scratch_pos += ml_len; if (params->link_id == -1) - return; + return NULL; ieee80211_mle_get_sta_prof(elems_parse, params->link_id); prof = elems->prof; if (!prof) - return; + return NULL; /* check if we have the 4 bytes for the fixed part in assoc response */ if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) { elems->prof = NULL; elems->sta_prof_len = 0; - return; + return NULL; } /* @@ -919,13 +935,17 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, * the -1 is because the 'sta_info_len' is accounted to as part of the * per-STA profile, but not part of the 'u8 variable[]' portion. */ - sub.start = prof->variable + prof->sta_info_len - 1 + 4; + sub->start = prof->variable + prof->sta_info_len - 1 + 4; end = (const u8 *)prof + elems->sta_prof_len; - sub.len = end - sub.start; + sub->len = end - sub->start; - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - sub.start, sub.len); - _ieee802_11_parse_elems_full(&sub, elems_parse, non_inherit); + sub->mode = params->mode; + sub->action = params->action; + sub->from_ap = params->from_ap; + sub->link_id = -1; + + return cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub->start, sub->len); } static void @@ -973,15 +993,19 @@ ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse) struct ieee802_11_elems * ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) { + struct ieee80211_elems_parse_params sub = {}; struct ieee80211_elems_parse *elems_parse; - struct ieee802_11_elems *elems; const struct element *non_inherit = NULL; - u8 *nontransmitted_profile; - int nontransmitted_profile_len = 0; + struct ieee802_11_elems *elems; size_t scratch_len = 3 * params->len; + bool multi_link_inner = false; BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0); + /* cannot parse for both a specific link and non-transmitted BSS */ + if (WARN_ON(params->link_id >= 0 && params->bss)) + return NULL; + elems_parse = kzalloc(struct_size(elems_parse, scratch, scratch_len), GFP_ATOMIC); if (!elems_parse) @@ -998,34 +1022,47 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) ieee80211_clear_tpe(&elems->tpe); ieee80211_clear_tpe(&elems->csa_tpe); - nontransmitted_profile = elems_parse->scratch_pos; - nontransmitted_profile_len = - ieee802_11_find_bssid_profile(params->start, params->len, - elems, params->bss, - nontransmitted_profile); - elems_parse->scratch_pos += nontransmitted_profile_len; - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - nontransmitted_profile, - nontransmitted_profile_len); + /* + * If we're looking for a non-transmitted BSS then we cannot at + * the same time be looking for a second link as the two can only + * appear in the same frame carrying info for different BSSes. + * + * In any case, we only look for one at a time, as encoded by + * the WARN_ON above. + */ + if (params->bss) { + int nontx_len = + ieee802_11_find_bssid_profile(params->start, + params->len, + elems, params->bss, + elems_parse->scratch_pos); + sub.start = elems_parse->scratch_pos; + sub.mode = params->mode; + sub.len = nontx_len; + sub.action = params->action; + sub.link_id = params->link_id; + + /* consume the space used for non-transmitted profile */ + elems_parse->scratch_pos += nontx_len; + + non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub.start, nontx_len); + } else { + /* must always parse to get elems_parse->ml_basic_elem */ + non_inherit = ieee80211_prep_mle_link_parse(elems_parse, params, + &sub); + multi_link_inner = true; + } elems->crc = _ieee802_11_parse_elems_full(params, elems_parse, non_inherit); - /* Override with nontransmitted profile, if found */ - if (nontransmitted_profile_len) { - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .start = nontransmitted_profile, - .len = nontransmitted_profile_len, - .action = params->action, - .link_id = params->link_id, - }; - + /* Override with nontransmitted/per-STA profile if found */ + if (sub.len) { + elems_parse->multi_link_inner = multi_link_inner; _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } - ieee80211_mle_parse_link(elems_parse, params); - ieee80211_mle_defrag_reconf(elems_parse); ieee80211_mle_defrag_epcs(elems_parse); -- GitLab From 130067e9c13bdc4820748ef16076a6972364745f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2025 11:24:51 +0100 Subject: [PATCH 1082/1585] wifi: mac80211: fix vendor-specific inheritance If there's any vendor-specific element in the subelements then the outer element parsing must not parse any vendor element at all. This isn't implemented correctly now due to parsing into the pointers and then overriding them, so explicitly skip vendor elements if any exist in the sub- elements (non-transmitted profile or per-STA profile). Fixes: 671042a4fb77 ("mac80211: support non-inheritance element") Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20250221112451.fd71e5268840.I9db3e6a3367e6ff38d052d07dc07005f0dd3bd5c@changeid Signed-off-by: Johannes Berg --- net/mac80211/parse.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 3d5d6658fe8d5..6da39c864f45b 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -48,6 +48,7 @@ struct ieee80211_elems_parse { const struct element *ml_epcs_elem; bool multi_link_inner; + bool skip_vendor; /* * scratch buffer that can be used for various element parsing related @@ -400,6 +401,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VENDOR_SPECIFIC: + if (elems_parse->skip_vendor) + break; + if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && pos[2] == 0xf2) { /* Microsoft OUI (00:50:F2) */ @@ -1054,12 +1058,16 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) multi_link_inner = true; } + elems_parse->skip_vendor = + cfg80211_find_elem(WLAN_EID_VENDOR_SPECIFIC, + sub.start, sub.len); elems->crc = _ieee802_11_parse_elems_full(params, elems_parse, non_inherit); /* Override with nontransmitted/per-STA profile if found */ if (sub.len) { elems_parse->multi_link_inner = multi_link_inner; + elems_parse->skip_vendor = false; _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } -- GitLab From 861d0445e72e9e33797f2ceef882c74decb16a87 Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Thu, 13 Feb 2025 22:43:30 +0100 Subject: [PATCH 1083/1585] wifi: mac80211: Fix sparse warning for monitor_sdata Use rcu_access_pointer() to avoid sparse warning in drv_remove_interface(). Signed-off-by: Alexander Wetzel Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502130534.bVrZZBK0-lkp@intel.com/ Fixes: 646262c71aca ("wifi: mac80211: remove debugfs dir for virtual monitor") Link: https://patch.msgid.link/20250213214330.6113-1-Alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 2fc60e1e77a55..35349a7f16cb4 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -121,7 +121,7 @@ void drv_remove_interface(struct ieee80211_local *local, * The virtual monitor interface doesn't get a debugfs * entry, so it's exempt here. */ - if (sdata != local->monitor_sdata) + if (sdata != rcu_access_pointer(local->monitor_sdata)) ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links); -- GitLab From 8c3170628a9ce24a59647bd24f897e666af919b8 Mon Sep 17 00:00:00 2001 From: Matthias Proske Date: Wed, 12 Feb 2025 19:59:35 +0100 Subject: [PATCH 1084/1585] wifi: brcmfmac: keep power during suspend if board requires it After commit 92cadedd9d5f ("brcmfmac: Avoid keeping power to SDIO card unless WOWL is used"), the wifi adapter by default is turned off on suspend and then re-probed on resume. This conflicts with some embedded boards that require to remain powered. They will fail on resume with: brcmfmac: brcmf_sdio_bus_rxctl: resumed on timeout ieee80211 phy1: brcmf_bus_started: failed: -110 ieee80211 phy1: brcmf_attach: dongle is not responding: err=-110 brcmfmac: brcmf_sdio_firmware_callback: brcmf_attach failed This commit checks for the Device Tree property 'cap-power-off-cards'. If this property is not set, it means that we do not have the capability to power off and should therefore remain powered. Signed-off-by: Matthias Proske Acked-by: Arend van Spriel Link: https://patch.msgid.link/20250212185941.146958-2-email@matthias-proske.de Signed-off-by: Johannes Berg --- .../broadcom/brcm80211/brcmfmac/bcmsdh.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index 60eb95fc19a5a..6bc107476a2a3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -1172,6 +1172,7 @@ static int brcmf_ops_sdio_suspend(struct device *dev) struct brcmf_bus *bus_if; struct brcmf_sdio_dev *sdiodev; mmc_pm_flag_t sdio_flags; + bool cap_power_off; int ret = 0; func = container_of(dev, struct sdio_func, dev); @@ -1179,19 +1180,23 @@ static int brcmf_ops_sdio_suspend(struct device *dev) if (func->num != 1) return 0; + cap_power_off = !!(func->card->host->caps & MMC_CAP_POWER_OFF_CARD); bus_if = dev_get_drvdata(dev); sdiodev = bus_if->bus_priv.sdio; - if (sdiodev->wowl_enabled) { + if (sdiodev->wowl_enabled || !cap_power_off) { brcmf_sdiod_freezer_on(sdiodev); brcmf_sdio_wd_timer(sdiodev->bus, 0); sdio_flags = MMC_PM_KEEP_POWER; - if (sdiodev->settings->bus.sdio.oob_irq_supported) - enable_irq_wake(sdiodev->settings->bus.sdio.oob_irq_nr); - else - sdio_flags |= MMC_PM_WAKE_SDIO_IRQ; + + if (sdiodev->wowl_enabled) { + if (sdiodev->settings->bus.sdio.oob_irq_supported) + enable_irq_wake(sdiodev->settings->bus.sdio.oob_irq_nr); + else + sdio_flags |= MMC_PM_WAKE_SDIO_IRQ; + } if (sdio_set_host_pm_flags(sdiodev->func1, sdio_flags)) brcmf_err("Failed to set pm_flags %x\n", sdio_flags); @@ -1213,18 +1218,19 @@ static int brcmf_ops_sdio_resume(struct device *dev) struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio; struct sdio_func *func = container_of(dev, struct sdio_func, dev); int ret = 0; + bool cap_power_off = !!(func->card->host->caps & MMC_CAP_POWER_OFF_CARD); brcmf_dbg(SDIO, "Enter: F%d\n", func->num); if (func->num != 2) return 0; - if (!sdiodev->wowl_enabled) { + if (!sdiodev->wowl_enabled && cap_power_off) { /* bus was powered off and device removed, probe again */ ret = brcmf_sdiod_probe(sdiodev); if (ret) brcmf_err("Failed to probe device on resume\n"); } else { - if (sdiodev->settings->bus.sdio.oob_irq_supported) + if (sdiodev->wowl_enabled && sdiodev->settings->bus.sdio.oob_irq_supported) disable_irq_wake(sdiodev->settings->bus.sdio.oob_irq_nr); brcmf_sdiod_freezer_off(sdiodev); -- GitLab From e4cf8ec4de4e13f156c1d61977d282d90c221085 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 20 Feb 2025 08:14:43 +0000 Subject: [PATCH 1085/1585] affs: generate OFS sequence numbers starting at 1 If I write a file to an OFS floppy image, and try to read it back on an emulated Amiga running Workbench 1.3, the Amiga reports a disk error trying to read the file. (That is, it's unable to read it _at all_, even to copy it to the NIL: device. It isn't a matter of getting the wrong data and being unable to parse the file format.) This is because the 'sequence number' field in the OFS data block header is supposed to be based at 1, but affs writes it based at 0. All three locations changed by this patch were setting the sequence number to a variable 'bidx' which was previously obtained by dividing a file position by bsize, so bidx will naturally use 0 for the first block. Therefore all three should add 1 to that value before writing it into the sequence number field. With this change, the Amiga successfully reads the file. For data block reference: https://wiki.osdev.org/FFS_(Amiga) Signed-off-by: Simon Tatham Signed-off-by: David Sterba --- fs/affs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index a5a861dd52230..226308f8627e7 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -596,7 +596,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) BUG_ON(tmp > bsize); AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); - AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); + AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); affs_fix_checksum(sb, bh); bh->b_state &= ~(1UL << BH_New); @@ -746,7 +746,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); - AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); + AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(bsize); AFFS_DATA_HEAD(bh)->next = 0; bh->b_state &= ~(1UL << BH_New); @@ -780,7 +780,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); - AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); + AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); AFFS_DATA_HEAD(bh)->next = 0; bh->b_state &= ~(1UL << BH_New); -- GitLab From 011ea742a25a77bac3d995f457886a67d178c6f0 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 20 Feb 2025 08:14:44 +0000 Subject: [PATCH 1086/1585] affs: don't write overlarge OFS data block size fields If a data sector on an OFS floppy contains a value > 0x1e8 (the largest amount of data that fits in the sector after its header), then an Amiga reading the file can return corrupt data, by taking the overlarge size at its word and reading past the end of the buffer it read the disk sector into! The cause: when affs_write_end_ofs() writes data to an OFS filesystem, the new size field for a data block was computed by adding the amount of data currently being written (into the block) to the existing value of the size field. This is correct if you're extending the file at the end, but if you seek backwards in the file and overwrite _existing_ data, it can lead to the size field being larger than the maximum legal value. This commit changes the calculation so that it sets the size field to the max of its previous size and the position within the block that we just wrote up to. Signed-off-by: Simon Tatham Signed-off-by: David Sterba --- fs/affs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index 226308f8627e7..7a71018e3f675 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -724,7 +724,8 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, tmp = min(bsize - boff, to - from); BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); - be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp); + AFFS_DATA_HEAD(bh)->size = cpu_to_be32( + max(boff + tmp, be32_to_cpu(AFFS_DATA_HEAD(bh)->size))); affs_fix_checksum(sb, bh); mark_buffer_dirty_inode(bh, inode); written += tmp; -- GitLab From 01f1d77a2630e774ce33233c4e6723bca3ae9daa Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 14 Jan 2025 10:57:25 +0100 Subject: [PATCH 1087/1585] drm/nouveau: Do not override forced connector status Keep user-forced connector status even if it cannot be programmed. Same behavior as for the rest of the drivers. Signed-off-by: Thomas Zimmermann Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20250114100214.195386-1-tzimmermann@suse.de --- drivers/gpu/drm/nouveau/nouveau_connector.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 8d5c9c74cbb90..eac0d1d2dbda2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -775,7 +775,6 @@ nouveau_connector_force(struct drm_connector *connector) if (!nv_encoder) { NV_ERROR(drm, "can't find encoder to force %s on!\n", connector->name); - connector->status = connector_status_disconnected; return; } -- GitLab From 75f1f311d883dfaffb98be3c1da208d6ed5d4df9 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Feb 2025 13:38:19 -0600 Subject: [PATCH 1088/1585] Revert "of: reserved-memory: Fix using wrong number of cells to get property 'alignment'" This reverts commit 267b21d0bef8e67dbe6c591c9991444e58237ec9. Turns out some DTs do depend on this behavior. Specifically, a downstream Pixel 6 DT. Revert the change at least until we can decide if the DT spec can be changed instead. Cc: stable@vger.kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/of_reserved_mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 75e819f66a561..ee2e31522d7ef 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -415,12 +415,12 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam prop = of_get_flat_dt_prop(node, "alignment", &len); if (prop) { - if (len != dt_root_size_cells * sizeof(__be32)) { + if (len != dt_root_addr_cells * sizeof(__be32)) { pr_err("invalid alignment property in '%s' node.\n", uname); return -EINVAL; } - align = dt_mem_next_cell(dt_root_size_cells, &prop); + align = dt_mem_next_cell(dt_root_addr_cells, &prop); } nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; -- GitLab From dd1998e243f5fa25d348a384ba0b6c84d980f2b2 Mon Sep 17 00:00:00 2001 From: Tyrone Ting Date: Thu, 20 Feb 2025 12:00:29 +0800 Subject: [PATCH 1089/1585] i2c: npcm: disable interrupt enable bit before devm_request_irq The customer reports that there is a soft lockup issue related to the i2c driver. After checking, the i2c module was doing a tx transfer and the bmc machine reboots in the middle of the i2c transaction, the i2c module keeps the status without being reset. Due to such an i2c module status, the i2c irq handler keeps getting triggered since the i2c irq handler is registered in the kernel booting process after the bmc machine is doing a warm rebooting. The continuous triggering is stopped by the soft lockup watchdog timer. Disable the interrupt enable bit in the i2c module before calling devm_request_irq to fix this issue since the i2c relative status bit is read-only. Here is the soft lockup log. [ 28.176395] watchdog: BUG: soft lockup - CPU#0 stuck for 26s! [swapper/0:1] [ 28.183351] Modules linked in: [ 28.186407] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.15.120-yocto-s-dirty-bbebc78 #1 [ 28.201174] pstate: 40000005 (nZcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 28.208128] pc : __do_softirq+0xb0/0x368 [ 28.212055] lr : __do_softirq+0x70/0x368 [ 28.215972] sp : ffffff8035ebca00 [ 28.219278] x29: ffffff8035ebca00 x28: 0000000000000002 x27: ffffff80071a3780 [ 28.226412] x26: ffffffc008bdc000 x25: ffffffc008bcc640 x24: ffffffc008be50c0 [ 28.233546] x23: ffffffc00800200c x22: 0000000000000000 x21: 000000000000001b [ 28.240679] x20: 0000000000000000 x19: ffffff80001c3200 x18: ffffffffffffffff [ 28.247812] x17: ffffffc02d2e0000 x16: ffffff8035eb8b40 x15: 00001e8480000000 [ 28.254945] x14: 02c3647e37dbfcb6 x13: 02c364f2ab14200c x12: 0000000002c364f2 [ 28.262078] x11: 00000000fa83b2da x10: 000000000000b67e x9 : ffffffc008010250 [ 28.269211] x8 : 000000009d983d00 x7 : 7fffffffffffffff x6 : 0000036d74732434 [ 28.276344] x5 : 00ffffffffffffff x4 : 0000000000000015 x3 : 0000000000000198 [ 28.283476] x2 : ffffffc02d2e0000 x1 : 00000000000000e0 x0 : ffffffc008bdcb40 [ 28.290611] Call trace: [ 28.293052] __do_softirq+0xb0/0x368 [ 28.296625] __irq_exit_rcu+0xe0/0x100 [ 28.300374] irq_exit+0x14/0x20 [ 28.303513] handle_domain_irq+0x68/0x90 [ 28.307440] gic_handle_irq+0x78/0xb0 [ 28.311098] call_on_irq_stack+0x20/0x38 [ 28.315019] do_interrupt_handler+0x54/0x5c [ 28.319199] el1_interrupt+0x2c/0x4c [ 28.322777] el1h_64_irq_handler+0x14/0x20 [ 28.326872] el1h_64_irq+0x74/0x78 [ 28.330269] __setup_irq+0x454/0x780 [ 28.333841] request_threaded_irq+0xd0/0x1b4 [ 28.338107] devm_request_threaded_irq+0x84/0x100 [ 28.342809] npcm_i2c_probe_bus+0x188/0x3d0 [ 28.346990] platform_probe+0x6c/0xc4 [ 28.350653] really_probe+0xcc/0x45c [ 28.354227] __driver_probe_device+0x8c/0x160 [ 28.358578] driver_probe_device+0x44/0xe0 [ 28.362670] __driver_attach+0x124/0x1d0 [ 28.366589] bus_for_each_dev+0x7c/0xe0 [ 28.370426] driver_attach+0x28/0x30 [ 28.373997] bus_add_driver+0x124/0x240 [ 28.377830] driver_register+0x7c/0x124 [ 28.381662] __platform_driver_register+0x2c/0x34 [ 28.386362] npcm_i2c_init+0x3c/0x5c [ 28.389937] do_one_initcall+0x74/0x230 [ 28.393768] kernel_init_freeable+0x24c/0x2b4 [ 28.398126] kernel_init+0x28/0x130 [ 28.401614] ret_from_fork+0x10/0x20 [ 28.405189] Kernel panic - not syncing: softlockup: hung tasks [ 28.411011] SMP: stopping secondary CPUs [ 28.414933] Kernel Offset: disabled [ 28.418412] CPU features: 0x00000000,00000802 [ 28.427644] Rebooting in 20 seconds.. Fixes: 56a1485b102e ("i2c: npcm7xx: Add Nuvoton NPCM I2C controller driver") Signed-off-by: Tyrone Ting Cc: # v5.8+ Reviewed-by: Tali Perry Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250220040029.27596-2-kfting@nuvoton.com --- drivers/i2c/busses/i2c-npcm7xx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 3ca08b8ef8af3..de713b5747fe5 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2554,6 +2554,13 @@ static int npcm_i2c_probe_bus(struct platform_device *pdev) if (irq < 0) return irq; + /* + * Disable the interrupt to avoid the interrupt handler being triggered + * incorrectly by the asynchronous interrupt status since the machine + * might do a warm reset during the last smbus/i2c transfer session. + */ + npcm_i2c_int_enable(bus, false); + ret = devm_request_irq(bus->dev, irq, npcm_i2c_bus_irq, 0, dev_name(bus->dev), bus); if (ret) -- GitLab From 71c49ee9bb41e1709abac7e2eb05f9193222e580 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Thu, 20 Feb 2025 20:56:12 +0800 Subject: [PATCH 1090/1585] i2c: ls2x: Fix frequency division register access According to the chip manual, the I2C register access type of Loongson-2K2000/LS7A is "B", so we can only access registers in byte form (readb()/writeb()). Although Loongson-2K0500/Loongson-2K1000 do not have similar constraints, register accesses in byte form also behave correctly. Also, in hardware, the frequency division registers are defined as two separate registers (high 8-bit and low 8-bit), so we just access them directly as bytes. Fixes: 015e61f0bffd ("i2c: ls2x: Add driver for Loongson-2K/LS7A I2C controller") Co-developed-by: Hongliang Wang Signed-off-by: Hongliang Wang Signed-off-by: Binbin Zhou Cc: stable@vger.kernel.org # v6.3+ Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250220125612.1910990-1-zhoubinbin@loongson.cn --- drivers/i2c/busses/i2c-ls2x.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-ls2x.c b/drivers/i2c/busses/i2c-ls2x.c index 8821cac3897b6..b475dd27b7af9 100644 --- a/drivers/i2c/busses/i2c-ls2x.c +++ b/drivers/i2c/busses/i2c-ls2x.c @@ -10,6 +10,7 @@ * Rewritten for mainline by Binbin Zhou */ +#include #include #include #include @@ -26,7 +27,8 @@ #include /* I2C Registers */ -#define I2C_LS2X_PRER 0x0 /* Freq Division Register(16 bits) */ +#define I2C_LS2X_PRER_LO 0x0 /* Freq Division Low Byte Register */ +#define I2C_LS2X_PRER_HI 0x1 /* Freq Division High Byte Register */ #define I2C_LS2X_CTR 0x2 /* Control Register */ #define I2C_LS2X_TXR 0x3 /* Transport Data Register */ #define I2C_LS2X_RXR 0x3 /* Receive Data Register */ @@ -93,6 +95,7 @@ static irqreturn_t ls2x_i2c_isr(int this_irq, void *dev_id) */ static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv) { + u16 val; struct i2c_timings *t = &priv->i2c_t; struct device *dev = priv->adapter.dev.parent; u32 acpi_speed = i2c_acpi_find_bus_speed(dev); @@ -104,9 +107,14 @@ static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv) else t->bus_freq_hz = LS2X_I2C_FREQ_STD; - /* Calculate and set i2c frequency. */ - writew(LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1, - priv->base + I2C_LS2X_PRER); + /* + * According to the chip manual, we can only access the registers as bytes, + * otherwise the high bits will be truncated. + * So set the I2C frequency with a sequential writeb() instead of writew(). + */ + val = LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1; + writeb(FIELD_GET(GENMASK(7, 0), val), priv->base + I2C_LS2X_PRER_LO); + writeb(FIELD_GET(GENMASK(15, 8), val), priv->base + I2C_LS2X_PRER_HI); } static void ls2x_i2c_init(struct ls2x_i2c_priv *priv) -- GitLab From 9f3c507cb44498067c980674139bcad56e582ee6 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Wed, 19 Feb 2025 19:27:47 +0530 Subject: [PATCH 1091/1585] i2c: amd-asf: Fix EOI register write to enable successive interrupts The commit b1f8921dfbaa ("i2c: amd-asf: Clear remote IRR bit to get successive interrupt") introduced a method to enable successive interrupts but inadvertently omitted the necessary write to the EOI register, resulting in a failure to receive successive interrupts. Fix this by adding the required write to the EOI register. Fixes: b1f8921dfbaa ("i2c: amd-asf: Clear remote IRR bit to get successive interrupt") Cc: stable@vger.kernel.org # v6.13+ Co-developed-by: Sanket Goswami Signed-off-by: Sanket Goswami Signed-off-by: Shyam Sundar S K Fixes: 9b25419ad397 ("i2c: amd-asf: Add routine to handle the ASF slave process") Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250219135747.3251182-1-Shyam-sundar.S-k@amd.com --- drivers/i2c/busses/i2c-amd-asf-plat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-amd-asf-plat.c b/drivers/i2c/busses/i2c-amd-asf-plat.c index 7512614bf4b73..93ebec162c6dd 100644 --- a/drivers/i2c/busses/i2c-amd-asf-plat.c +++ b/drivers/i2c/busses/i2c-amd-asf-plat.c @@ -293,6 +293,7 @@ static irqreturn_t amd_asf_irq_handler(int irq, void *ptr) amd_asf_update_ioport_target(piix4_smba, ASF_SLV_INTR, SMBHSTSTS, true); } + iowrite32(irq, dev->eoi_base); return IRQ_HANDLED; } -- GitLab From b8501febdc513541afc5663d063bfac7ea575b71 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 29 Jan 2025 16:45:19 +0100 Subject: [PATCH 1092/1585] clk: qcom: dispcc-sm8750: Drop incorrect CLK_SET_RATE_PARENT on byte intf parent The parent of disp_cc_mdss_byte0_intf_clk clock should not propagate up the rates, because this messes up entire clock hierarchy when setting clock rates in MSM DSI driver. The dsi_link_clk_set_rate_6g() first sets entire clock hierarchy rates via dev_pm_opp_set_rate() on byte clock and then sets individual clock rates, like pixel and byte_intf clocks, to proper frequencies. Having CLK_SET_RATE_PARENT caused that entire tree was re-calced and the byte clock received halved frequency. Drop CLK_SET_RATE_PARENT to fix this and align with SM8550 and SM8650. Fixes: f1080d8dab0f ("clk: qcom: dispcc-sm8750: Add SM8750 Display clock controller") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250129154519.209791-1-krzysztof.kozlowski@linaro.org Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Signed-off-by: Stephen Boyd --- drivers/clk/qcom/dispcc-sm8750.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/clk/qcom/dispcc-sm8750.c b/drivers/clk/qcom/dispcc-sm8750.c index 0358dff91da5d..e9bca179998b9 100644 --- a/drivers/clk/qcom/dispcc-sm8750.c +++ b/drivers/clk/qcom/dispcc-sm8750.c @@ -827,7 +827,6 @@ static struct clk_regmap_div disp_cc_mdss_byte0_div_clk_src = { &disp_cc_mdss_byte0_clk_src.clkr.hw, }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_regmap_div_ops, }, }; @@ -842,7 +841,6 @@ static struct clk_regmap_div disp_cc_mdss_byte1_div_clk_src = { &disp_cc_mdss_byte1_clk_src.clkr.hw, }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, .ops = &clk_regmap_div_ops, }, }; -- GitLab From ac965d7d88fc36fb42e3d50225c0a44dd8326da4 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 26 Feb 2025 15:18:46 +0900 Subject: [PATCH 1093/1585] tracing: tprobe-events: Fix a memory leak when tprobe with $retval Fix a memory leak when a tprobe is defined with $retval. This combination is not allowed, but the parse_symbol_and_return() does not free the *symbol which should not be used if it returns the error. Thus, it leaks the *symbol memory in that error path. Link: https://lore.kernel.org/all/174055072650.4079315.3063014346697447838.stgit@mhiramat.tok.corp.google.com/ Fixes: ce51e6153f77 ("tracing: fprobe-event: Fix to check tracepoint event and return") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Cc: stable@vger.kernel.org --- kernel/trace/trace_fprobe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index b8f3c4ba309b6..8826f44f69a44 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1056,6 +1056,8 @@ static int parse_symbol_and_return(int argc, const char *argv[], if (is_tracepoint) { trace_probe_log_set_index(i); trace_probe_log_err(tmp - argv[i], RETVAL_ON_PROBE); + kfree(*symbol); + *symbol = NULL; return -EINVAL; } *is_return = true; -- GitLab From d0453655b6ddc685a4837f3cc0776ae8eef62d01 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 26 Feb 2025 15:18:54 +0900 Subject: [PATCH 1094/1585] tracing: tprobe-events: Reject invalid tracepoint name Commit 57a7e6de9e30 ("tracing/fprobe: Support raw tracepoints on future loaded modules") allows user to set a tprobe on non-exist tracepoint but it does not check the tracepoint name is acceptable. So it leads tprobe has a wrong character for events (e.g. with subsystem prefix). In this case, the event is not shown in the events directory. Reject such invalid tracepoint name. The tracepoint name must consist of alphabet or digit or '_'. Link: https://lore.kernel.org/all/174055073461.4079315.15875502830565214255.stgit@mhiramat.tok.corp.google.com/ Fixes: 57a7e6de9e30 ("tracing/fprobe: Support raw tracepoints on future loaded modules") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Cc: stable@vger.kernel.org --- kernel/trace/trace_fprobe.c | 13 +++++++++++++ kernel/trace/trace_probe.h | 1 + 2 files changed, 14 insertions(+) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 8826f44f69a44..85f037dc14623 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1049,6 +1049,19 @@ static int parse_symbol_and_return(int argc, const char *argv[], if (*is_return) return 0; + if (is_tracepoint) { + tmp = *symbol; + while (*tmp && (isalnum(*tmp) || *tmp == '_')) + tmp++; + if (*tmp) { + /* find a wrong character. */ + trace_probe_log_err(tmp - *symbol, BAD_TP_NAME); + kfree(*symbol); + *symbol = NULL; + return -EINVAL; + } + } + /* If there is $retval, this should be a return fprobe. */ for (i = 2; i < argc; i++) { tmp = strstr(argv[i], "$retval"); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5803e6a415705..fba3ede870541 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -481,6 +481,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(NON_UNIQ_SYMBOL, "The symbol is not unique"), \ C(BAD_RETPROBE, "Retprobe address must be an function entry"), \ C(NO_TRACEPOINT, "Tracepoint is not found"), \ + C(BAD_TP_NAME, "Invalid character in tracepoint name"),\ C(BAD_ADDR_SUFFIX, "Invalid probed address suffix"), \ C(NO_GROUP_NAME, "Group name is not specified"), \ C(GROUP_TOO_LONG, "Group name is too long"), \ -- GitLab From db5e228611b118cf7b1f8084063feda5c037f4a7 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 26 Feb 2025 15:19:02 +0900 Subject: [PATCH 1095/1585] tracing: fprobe-events: Log error for exceeding the number of entry args Add error message when the number of entry argument exceeds the maximum size of entry data. This is currently checked when registering fprobe, but in this case no error message is shown in the error_log file. Link: https://lore.kernel.org/all/174055074269.4079315.17809232650360988538.stgit@mhiramat.tok.corp.google.com/ Fixes: 25f00e40ce79 ("tracing/probes: Support $argN in return probe (kprobe and fprobe)") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_fprobe.c | 5 +++++ kernel/trace/trace_probe.h | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 85f037dc14623..e27305d31fc57 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1230,6 +1230,11 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], if (is_return && tf->tp.entry_arg) { tf->fp.entry_handler = trace_fprobe_entry_handler; tf->fp.entry_data_size = traceprobe_get_entry_data_size(&tf->tp); + if (ALIGN(tf->fp.entry_data_size, sizeof(long)) > MAX_FPROBE_DATA_SIZE) { + trace_probe_log_set_index(2); + trace_probe_log_err(0, TOO_MANY_EARGS); + return -E2BIG; + } } ret = traceprobe_set_print_fmt(&tf->tp, diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index fba3ede870541..c47ca002347a7 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -545,7 +545,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(NO_BTF_FIELD, "This field is not found."), \ C(BAD_BTF_TID, "Failed to get BTF type info."),\ C(BAD_TYPE4STR, "This type does not fit for string."),\ - C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"), + C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"),\ + C(TOO_MANY_EARGS, "Too many entry arguments specified"), #undef C #define C(a, b) TP_ERR_##a -- GitLab From 2b1283e1ea9b5e0b06f075f79391a51d9f70749b Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 25 Feb 2025 11:46:36 +0000 Subject: [PATCH 1096/1585] arm64/mm: Fix Boot panic on Ampere Altra When the range of present physical memory is sufficiently small enough and the reserved address space for the linear map is sufficiently large enough, The linear map base address is randomized in arm64_memblock_init(). Prior to commit 62cffa496aac ("arm64/mm: Override PARange for !LPA2 and use it consistently"), we decided if the sizes were suitable with the help of the raw mmfr0.parange. But the commit changed this to use the sanitized version instead. But the function runs before the register has been sanitized so this returns 0, interpreted as a parange of 32 bits. Some fun wrapping occurs and the logic concludes that there is enough room to randomize the linear map base address, when really there isn't. So the top of the linear map ends up outside the reserved address space. Since the PA range cannot be overridden in the first place, restore the mmfr0 reading logic to its state prior to 62cffa496aac, where the raw register value is used. Reported-by: Luiz Capitulino Suggested-by: Ard Biesheuvel Closes: https://lore.kernel.org/all/a3d9acbe-07c2-43b6-9ba9-a7585f770e83@redhat.com/ Fixes: 62cffa496aac ("arm64/mm: Override PARange for !LPA2 and use it consistently") Signed-off-by: Ryan Roberts Link: https://lore.kernel.org/r/20250225114638.2038006-1-ryan.roberts@arm.com Cc: stable@vger.kernel.org Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9c0b8d9558fc4..ccdef53872a0b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -279,12 +279,7 @@ void __init arm64_memblock_init(void) if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern u16 memstart_offset_seed; - - /* - * Use the sanitised version of id_aa64mmfr0_el1 so that linear - * map randomization can be enabled by shrinking the IPA space. - */ - u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); int parange = cpuid_feature_extract_unsigned_field( mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); s64 range = linear_region_size - -- GitLab From 4804f3ac2649475509b1836a4d252c04de143249 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Feb 2025 19:30:38 -0500 Subject: [PATCH 1097/1585] bcachefs: Revert directory i_size This turned out to have several bugs, which were missed because the fsck code wasn't properly reporting errors - whoops. Kicking it out for now, hopefully it can make 6.15. Cc: Hongbo Li Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.h | 5 ----- fs/bcachefs/fs-common.c | 11 ----------- fs/bcachefs/fsck.c | 21 --------------------- fs/bcachefs/sb-downgrade.c | 5 +---- 4 files changed, 1 insertion(+), 41 deletions(-) diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index a633f83c1ac78..362b3b2f2f2e3 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -31,11 +31,6 @@ static inline unsigned dirent_val_u64s(unsigned len) sizeof(u64)); } -static inline unsigned int dirent_occupied_size(const struct qstr *name) -{ - return (BKEY_U64s + dirent_val_u64s(name->len)) * sizeof(u64); -} - int bch2_dirent_read_target(struct btree_trans *, subvol_inum, struct bkey_s_c_dirent, subvol_inum *); diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index d70d9f634cea9..2c3d46ac70c61 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -152,7 +152,6 @@ int bch2_create_trans(struct btree_trans *trans, if (is_subdir_for_nlink(new_inode)) dir_u->bi_nlink++; dir_u->bi_mtime = dir_u->bi_ctime = now; - dir_u->bi_size += dirent_occupied_size(name); ret = bch2_inode_write(trans, &dir_iter, dir_u); if (ret) @@ -221,7 +220,6 @@ int bch2_link_trans(struct btree_trans *trans, } dir_u->bi_mtime = dir_u->bi_ctime = now; - dir_u->bi_size += dirent_occupied_size(name); dir_hash = bch2_hash_info_init(c, dir_u); @@ -324,7 +322,6 @@ int bch2_unlink_trans(struct btree_trans *trans, dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now; dir_u->bi_nlink -= is_subdir_for_nlink(inode_u); - dir_u->bi_size -= dirent_occupied_size(name); ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, &dir_hash, &dirent_iter, @@ -463,14 +460,6 @@ int bch2_rename_trans(struct btree_trans *trans, goto err; } - if (mode == BCH_RENAME) { - src_dir_u->bi_size -= dirent_occupied_size(src_name); - dst_dir_u->bi_size += dirent_occupied_size(dst_name); - } - - if (mode == BCH_RENAME_OVERWRITE) - src_dir_u->bi_size -= dirent_occupied_size(src_name); - if (src_inode_u->bi_parent_subvol) src_inode_u->bi_parent_subvol = dst_dir.subvol; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9bf316e7b845d..0e85131d0af88 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1978,31 +1978,10 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ return ret; } -static int check_dir_i_size_notnested(struct btree_trans *trans, struct inode_walker *w) -{ - struct bch_fs *c = trans->c; - int ret = 0; - - darray_for_each(w->inodes, i) - if (fsck_err_on(i->inode.bi_size != i->i_size, - trans, inode_dir_wrong_nlink, - "directory %llu:%u with wrong i_size: got %llu, should be %llu", - w->last_pos.inode, i->snapshot, i->inode.bi_size, i->i_size)) { - i->inode.bi_size = i->i_size; - ret = bch2_fsck_write_inode(trans, &i->inode); - if (ret) - break; - } -fsck_err: - bch_err_fn(c, ret); - return ret; -} - static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_walker *w) { u32 restart_count = trans->restart_count; return check_subdir_count_notnested(trans, w) ?: - check_dir_i_size_notnested(trans, w) ?: trans_was_restarted(trans, restart_count); } diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 35e07bc8fbd34..051214fdc7352 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -90,10 +90,7 @@ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_accounting_mismatch, \ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ - BCH_FSCK_ERR_accounting_key_junk_at_end) \ - x(directory_size, \ - BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \ - BCH_FSCK_ERR_directory_size_mismatch) \ + BCH_FSCK_ERR_accounting_key_junk_at_end) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ -- GitLab From 7909d1fb90e290ffd7b8570f4e2f97fe2fb381d0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 25 Feb 2025 22:35:28 -0500 Subject: [PATCH 1098/1585] bcachefs: Check for -BCH_ERR_open_buckets_empty in journal resize This fixes occasional failures from journal resize. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 24c294d4634e0..5dabbf3c0965c 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1194,7 +1194,9 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, closure_sync(&cl); - if (ret && ret != -BCH_ERR_bucket_alloc_blocked) + if (ret && + ret != -BCH_ERR_bucket_alloc_blocked && + ret != -BCH_ERR_open_buckets_empty) break; } -- GitLab From 677bdb7346b6fd806ea45b11cbfe36de0b0cd644 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Wed, 26 Feb 2025 17:33:22 +0800 Subject: [PATCH 1099/1585] bcachefs: Fix deadlock This fixes two deadlocks: 1.pcpu_alloc_mutex involved one as pointed by syzbot[1] 2.recursion deadlock. The root cause is that we hold the bc lock during alloc_percpu, fix it by following the pattern used by __btree_node_mem_alloc(). [1] https://lore.kernel.org/all/66f97d9a.050a0220.6bad9.001d.GAE@google.com/T/ Reported-by: syzbot+fe63f377148a6371a9db@syzkaller.appspotmail.com Tested-by: syzbot+fe63f377148a6371a9db@syzkaller.appspotmail.com Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 9 +++++---- fs/bcachefs/btree_key_cache.c | 2 +- fs/bcachefs/btree_locking.c | 5 +++-- fs/bcachefs/btree_locking.h | 2 +- fs/bcachefs/six.c | 5 +++-- fs/bcachefs/six.h | 7 ++++--- 6 files changed, 17 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index ca755e8d1a372..1ec1f90e0eb38 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -203,7 +203,7 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) return NULL; } - bch2_btree_lock_init(&b->c, 0); + bch2_btree_lock_init(&b->c, 0, GFP_KERNEL); __bch2_btree_node_to_freelist(bc, b); return b; @@ -795,17 +795,18 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea } b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); - if (!b) { + if (b) { + bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT); + } else { mutex_unlock(&bc->lock); bch2_trans_unlock(trans); b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) goto err; + bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL); mutex_lock(&bc->lock); } - bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0); - BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 1821f40c161a1..edce594333756 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -156,7 +156,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k } if (ck) { - bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0); + bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL); ck->c.cached = true; goto lock; } diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 10b805a60f526..caef65adeae49 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -7,9 +7,10 @@ static struct lock_class_key bch2_btree_node_lock_key; void bch2_btree_lock_init(struct btree_bkey_cached_common *b, - enum six_lock_init_flags flags) + enum six_lock_init_flags flags, + gfp_t gfp) { - __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags); + __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags, gfp); lockdep_set_notrack_class(&b->lock); } diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index b54ef48eb8cc2..b33ab7af84402 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -13,7 +13,7 @@ #include "btree_iter.h" #include "six.h" -void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags); +void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags, gfp_t gfp); void bch2_trans_unlock_noassert(struct btree_trans *); void bch2_trans_unlock_write(struct btree_trans *); diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index 7e7c66a1e1a6b..7c403427fbdb8 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -850,7 +850,8 @@ void six_lock_exit(struct six_lock *lock) EXPORT_SYMBOL_GPL(six_lock_exit); void __six_lock_init(struct six_lock *lock, const char *name, - struct lock_class_key *key, enum six_lock_init_flags flags) + struct lock_class_key *key, enum six_lock_init_flags flags, + gfp_t gfp) { atomic_set(&lock->state, 0); raw_spin_lock_init(&lock->wait_lock); @@ -873,7 +874,7 @@ void __six_lock_init(struct six_lock *lock, const char *name, * failure if they wish by checking lock->readers, but generally * will not want to treat it as an error. */ - lock->readers = alloc_percpu(unsigned); + lock->readers = alloc_percpu_gfp(unsigned, gfp); } #endif } diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h index c142e06b7a3a7..59b851cf8bacc 100644 --- a/fs/bcachefs/six.h +++ b/fs/bcachefs/six.h @@ -164,18 +164,19 @@ enum six_lock_init_flags { }; void __six_lock_init(struct six_lock *lock, const char *name, - struct lock_class_key *key, enum six_lock_init_flags flags); + struct lock_class_key *key, enum six_lock_init_flags flags, + gfp_t gfp); /** * six_lock_init - initialize a six lock * @lock: lock to initialize * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU */ -#define six_lock_init(lock, flags) \ +#define six_lock_init(lock, flags, gfp) \ do { \ static struct lock_class_key __key; \ \ - __six_lock_init((lock), #lock, &__key, flags); \ + __six_lock_init((lock), #lock, &__key, flags, gfp); \ } while (0) /** -- GitLab From eb54d2695b57426638fed0ec066ae17a18c4426c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Feb 2025 10:57:26 -0500 Subject: [PATCH 1100/1585] bcachefs: Fix truncate sometimes failing and returning 1 __bch_truncate_folio() may return 1 to indicate dirtyness of the folio being truncated, needed for fpunch to get the i_size writes correct. But truncate was forgetting to clear ret, and sometimes returning it as an error. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 94bf34b9b65f0..717e7b94c66f8 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -466,6 +466,7 @@ int bchfs_truncate(struct mnt_idmap *idmap, ret = bch2_truncate_folio(inode, iattr->ia_size); if (unlikely(ret < 0)) goto err; + ret = 0; truncate_setsize(&inode->v, iattr->ia_size); -- GitLab From 01c9c123db76357d4373b2e97b760a856d6fe822 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 25 Feb 2025 13:10:23 -0800 Subject: [PATCH 1101/1585] net: Use rtnl_net_dev_lock() in register_netdevice_notifier_dev_net(). Breno Leitao reported the splat below. [0] Commit 65161fb544aa ("net: Fix dev_net(dev) race in unregister_netdevice_notifier_dev_net().") added the DEBUG_NET_WARN_ON_ONCE(), assuming that the netdev is not registered before register_netdevice_notifier_dev_net(). But the assumption was simply wrong. Let's use rtnl_net_dev_lock() in register_netdevice_notifier_dev_net(). [0]: WARNING: CPU: 25 PID: 849 at net/core/dev.c:2150 register_netdevice_notifier_dev_net (net/core/dev.c:2150) ? __warn (kernel/panic.c:242 kernel/panic.c:748) ? register_netdevice_notifier_dev_net (net/core/dev.c:2150) ? register_netdevice_notifier_dev_net (net/core/dev.c:2150) ? report_bug (lib/bug.c:? lib/bug.c:219) ? handle_bug (arch/x86/kernel/traps.c:285) ? exc_invalid_op (arch/x86/kernel/traps.c:309) ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621) ? register_netdevice_notifier_dev_net (net/core/dev.c:2150) ? register_netdevice_notifier_dev_net (./include/net/net_namespace.h:406 ./include/linux/netdevice.h:2663 net/core/dev.c:2144) mlx5e_mdev_notifier_event+0x9f/0xf0 mlx5_ib notifier_call_chain.llvm.12241336988804114627 (kernel/notifier.c:85) blocking_notifier_call_chain (kernel/notifier.c:380) mlx5_core_uplink_netdev_event_replay (drivers/net/ethernet/mellanox/mlx5/core/main.c:352) mlx5_ib_roce_init.llvm.12447516292400117075+0x1c6/0x550 mlx5_ib mlx5r_probe+0x375/0x6a0 mlx5_ib ? kernfs_put (./include/linux/instrumented.h:96 ./include/linux/atomic/atomic-arch-fallback.h:2278 ./include/linux/atomic/atomic-instrumented.h:1384 fs/kernfs/dir.c:557) ? auxiliary_match_id (drivers/base/auxiliary.c:174) ? mlx5r_mp_remove+0x160/0x160 mlx5_ib really_probe (drivers/base/dd.c:? drivers/base/dd.c:658) driver_probe_device (drivers/base/dd.c:830) __driver_attach (drivers/base/dd.c:1217) bus_for_each_dev (drivers/base/bus.c:369) ? driver_attach (drivers/base/dd.c:1157) bus_add_driver (drivers/base/bus.c:679) driver_register (drivers/base/driver.c:249) Fixes: 7fb1073300a2 ("net: Hold rtnl_net_lock() in (un)?register_netdevice_notifier_dev_net().") Reported-by: Breno Leitao Closes: https://lore.kernel.org/netdev/20250224-noisy-cordial-roadrunner-fad40c@leitao/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Tested-by: Breno Leitao Link: https://patch.msgid.link/20250225211023.96448-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1b252e9459fdb..70c01bd1799e5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2141,21 +2141,15 @@ int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) { - struct net *net = dev_net(dev); int err; - /* rtnl_net_lock() assumes dev is not yet published by - * register_netdevice(). - */ - DEBUG_NET_WARN_ON_ONCE(!list_empty(&dev->dev_list)); - - rtnl_net_lock(net); - err = __register_netdevice_notifier_net(net, nb, false); + rtnl_net_dev_lock(dev); + err = __register_netdevice_notifier_net(dev_net(dev), nb, false); if (!err) { nn->nb = nb; list_add(&nn->list, &dev->net_notifier_list); } - rtnl_net_unlock(net); + rtnl_net_dev_unlock(dev); return err; } -- GitLab From de70981f295e7eab86325db3bf349fa676f16c42 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Wed, 26 Feb 2025 00:35:26 +0000 Subject: [PATCH 1102/1585] gve: unlink old napi when stopping a queue using queue API When a queue is stopped using the ndo queue API, before destroying its page pool, the associated NAPI instance needs to be unlinked to avoid warnings. Handle this by calling page_pool_disable_direct_recycling() when stopping a queue. Cc: stable@vger.kernel.org Fixes: ebdfae0d377b ("gve: adopt page pool for DQ RDA mode") Reviewed-by: Praveen Kaligineedi Signed-off-by: Harshitha Ramamurthy Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250226003526.1546854-1-hramamurthy@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 8ac0047f1ada1..f0674a4435670 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -109,10 +109,12 @@ static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + struct gve_rx_ring *rx = &priv->rx[idx]; if (!gve_rx_was_added_to_block(priv, idx)) return; + page_pool_disable_direct_recycling(rx->dqo.page_pool); gve_remove_napi(priv, ntfy_idx); gve_rx_remove_from_block(priv, idx); gve_rx_reset_ring_dqo(priv, idx); -- GitLab From 49806fe6e61b045b5be8610e08b5a3083c109aa0 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Tue, 25 Feb 2025 13:28:52 +0200 Subject: [PATCH 1103/1585] net: Clear old fragment checksum value in napi_reuse_skb In certain cases, napi_get_frags() returns an skb that points to an old received fragment, This skb may have its skb->ip_summed, csum, and other fields set from previous fragment handling. Some network drivers set skb->ip_summed to either CHECKSUM_COMPLETE or CHECKSUM_UNNECESSARY when getting skb from napi_get_frags(), while others only set skb->ip_summed when RX checksum offload is enabled on the device, and do not set any value for skb->ip_summed when hardware checksum offload is disabled, assuming that the skb->ip_summed initiated to zero by napi_reuse_skb, ionic driver for example will ignore/unset any value for the ip_summed filed if HW checksum offload is disabled, and if we have a situation where the user disables the checksum offload during a traffic that could lead to the following errors shown in the kernel logs: dump_stack_lvl+0x34/0x48 __skb_gro_checksum_complete+0x7e/0x90 tcp6_gro_receive+0xc6/0x190 ipv6_gro_receive+0x1ec/0x430 dev_gro_receive+0x188/0x360 ? ionic_rx_clean+0x25a/0x460 [ionic] napi_gro_frags+0x13c/0x300 ? __pfx_ionic_rx_service+0x10/0x10 [ionic] ionic_rx_service+0x67/0x80 [ionic] ionic_cq_service+0x58/0x90 [ionic] ionic_txrx_napi+0x64/0x1b0 [ionic] __napi_poll+0x27/0x170 net_rx_action+0x29c/0x370 handle_softirqs+0xce/0x270 __irq_exit_rcu+0xa3/0xc0 common_interrupt+0x80/0xa0 This inconsistency sometimes leads to checksum validation issues in the upper layers of the network stack. To resolve this, this patch clears the skb->ip_summed value for each reused skb in by napi_reuse_skb(), ensuring that the caller is responsible for setting the correct checksum status. This eliminates potential checksum validation issues caused by improper handling of skb->ip_summed. Fixes: 76620aafd66f ("gro: New frags interface to avoid copying shinfo") Signed-off-by: Mohammad Heib Reviewed-by: Shannon Nelson Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250225112852.2507709-1-mheib@redhat.com Signed-off-by: Jakub Kicinski --- net/core/gro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/gro.c b/net/core/gro.c index 78b320b631744..0ad549b07e039 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -653,6 +653,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->pkt_type = PACKET_HOST; skb->encapsulation = 0; + skb->ip_summed = CHECKSUM_NONE; skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->gso_size = 0; if (unlikely(skb->slow_gro)) { -- GitLab From 77e45145e3039a0fb212556ab3f8c87f54771757 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 23 Feb 2025 23:17:08 +0100 Subject: [PATCH 1104/1585] net: Handle napi_schedule() calls from non-interrupt napi_schedule() is expected to be called either: * From an interrupt, where raised softirqs are handled on IRQ exit * From a softirq disabled section, where raised softirqs are handled on the next call to local_bh_enable(). * From a softirq handler, where raised softirqs are handled on the next round in do_softirq(), or further deferred to a dedicated kthread. Other bare tasks context may end up ignoring the raised NET_RX vector until the next random softirq handling opportunity, which may not happen before a while if the CPU goes idle afterwards with the tick stopped. Such "misuses" have been detected on several places thanks to messages of the kind: "NOHZ tick-stop error: local softirq work is pending, handler #08!!!" For example: __raise_softirq_irqoff __napi_schedule rtl8152_runtime_resume.isra.0 rtl8152_resume usb_resume_interface.isra.0 usb_resume_both __rpm_callback rpm_callback rpm_resume __pm_runtime_resume usb_autoresume_device usb_remote_wakeup hub_event process_one_work worker_thread kthread ret_from_fork ret_from_fork_asm And also: * drivers/net/usb/r8152.c::rtl_work_func_t * drivers/net/netdevsim/netdev.c::nsim_start_xmit There is a long history of issues of this kind: 019edd01d174 ("ath10k: sdio: Add missing BH locking around napi_schdule()") 330068589389 ("idpf: disable local BH when scheduling napi for marker packets") e3d5d70cb483 ("net: lan78xx: fix "softirq work is pending" error") e55c27ed9ccf ("mt76: mt7615: add missing bh-disable around rx napi schedule") c0182aa98570 ("mt76: mt7915: add missing bh-disable around tx napi enable/schedule") 970be1dff26d ("mt76: disable BH around napi_schedule() calls") 019edd01d174 ("ath10k: sdio: Add missing BH locking around napi_schdule()") 30bfec4fec59 ("can: rx-offload: can_rx_offload_threaded_irq_finish(): add new function to be called from threaded interrupt") e63052a5dd3c ("mlx5e: add add missing BH locking around napi_schdule()") 83a0c6e58901 ("i40e: Invoke softirqs after napi_reschedule") bd4ce941c8d5 ("mlx4: Invoke softirqs after napi_reschedule") 8cf699ec849f ("mlx4: do not call napi_schedule() without care") ec13ee80145c ("virtio_net: invoke softirqs after __napi_schedule") This shows that relying on the caller to arrange a proper context for the softirqs to be handled while calling napi_schedule() is very fragile and error prone. Also fixing them can also prove challenging if the caller may be called from different kinds of contexts. Therefore fix this from napi_schedule() itself with waking up ksoftirqd when softirqs are raised from task contexts. Reported-by: Paul Menzel Reported-by: Jakub Kicinski Reported-by: Francois Romieu Closes: https://lore.kernel.org/lkml/354a2690-9bbf-4ccb-8769-fa94707a9340@molgen.mpg.de/ Cc: Breno Leitao Signed-off-by: Frederic Weisbecker Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250223221708.27130-1-frederic@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 70c01bd1799e5..30da277c5a6f8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4757,7 +4757,7 @@ static inline void ____napi_schedule(struct softnet_data *sd, * we have to raise NET_RX_SOFTIRQ. */ if (!sd->in_net_rx_action) - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + raise_softirq_irqoff(NET_RX_SOFTIRQ); } #ifdef CONFIG_RPS -- GitLab From bc23d4e30866011700787bab8563de45d5bf8431 Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Tue, 25 Feb 2025 10:14:57 +0800 Subject: [PATCH 1105/1585] af_unix: Fix memory leak in unix_dgram_sendmsg() After running the 'sendmsg02' program of Linux Test Project (LTP), kmemleak reports the following memory leak: # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff888243866800 (size 2048): comm "sendmsg02", pid 67, jiffies 4294903166 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 5e 00 00 00 00 00 00 00 ........^....... 01 00 07 40 00 00 00 00 00 00 00 00 00 00 00 00 ...@............ backtrace (crc 7e96a3f2): kmemleak_alloc+0x56/0x90 kmem_cache_alloc_noprof+0x209/0x450 sk_prot_alloc.constprop.0+0x60/0x160 sk_alloc+0x32/0xc0 unix_create1+0x67/0x2b0 unix_create+0x47/0xa0 __sock_create+0x12e/0x200 __sys_socket+0x6d/0x100 __x64_sys_socket+0x1b/0x30 x64_sys_call+0x7e1/0x2140 do_syscall_64+0x54/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e Commit 689c398885cc ("af_unix: Defer sock_put() to clean up path in unix_dgram_sendmsg().") defers sock_put() in the error handling path. However, it fails to account for the condition 'msg->msg_namelen != 0', resulting in a memory leak when the code jumps to the 'lookup' label. Fix issue by calling sock_put() if 'msg->msg_namelen != 0' is met. Fixes: 689c398885cc ("af_unix: Defer sock_put() to clean up path in unix_dgram_sendmsg().") Signed-off-by: Adrian Huang Acked-by: Joe Damato Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250225021457.1824-1-ahuang12@lenovo.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 34945de1fb1fa..f0e613d976640 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2102,6 +2102,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out_sock_put; } + sock_put(other); goto lookup; } -- GitLab From 2d253726ff7106b39a44483b6864398bba8a2f74 Mon Sep 17 00:00:00 2001 From: Harshal Chaudhari Date: Mon, 24 Feb 2025 20:20:58 -0800 Subject: [PATCH 1106/1585] net: mvpp2: cls: Fixed Non IP flow, with vlan tag flow defination. Non IP flow, with vlan tag not working as expected while running below command for vlan-priority. fixed that. ethtool -N eth1 flow-type ether vlan 0x8000 vlan-mask 0x1fff action 0 loc 0 Fixes: 1274daede3ef ("net: mvpp2: cls: Add steering based on vlan Id and priority.") Signed-off-by: Harshal Chaudhari Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20250225042058.2643838-1-hchaudhari@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index 1641791a2d5b4..8ed83fb988624 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -324,7 +324,7 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { MVPP2_PRS_RI_VLAN_MASK), /* Non IP flow, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_ETHERNET, MVPP2_FL_NON_IP_TAG, - MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_TAGGED, 0, 0), }; -- GitLab From 7f3528f7d2f98b70e19a6bb7b130fc82c079ac54 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 25 Feb 2025 09:26:06 +0200 Subject: [PATCH 1107/1585] net/mlx5: Fix vport QoS cleanup on error When enabling vport QoS fails, the scheduling node was never freed, causing a leak. Add the missing free and reset the vport scheduling node pointer to NULL. Fixes: be034baba83e ("net/mlx5: Make vport QoS enablement more flexible for future extensions") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250225072608.526866-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 8b7c843446e11..07a28073a49ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -591,8 +591,11 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t sched_node->vport = vport; vport->qos.sched_node = sched_node; err = esw_qos_vport_enable(vport, parent, extack); - if (err) + if (err) { + __esw_qos_free_node(sched_node); esw_qos_put(esw); + vport->qos.sched_node = NULL; + } return err; } -- GitLab From 47bcd9bf3d231bfd4698d7d3013597490fd5e2d6 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 25 Feb 2025 09:26:07 +0200 Subject: [PATCH 1108/1585] net/mlx5: Restore missing trace event when enabling vport QoS Restore the `trace_mlx5_esw_vport_qos_create` event when creating the vport scheduling element. This trace event was lost during refactoring. Fixes: be034baba83e ("net/mlx5: Make vport QoS enablement more flexible for future extensions") Signed-off-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250225072608.526866-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 07a28073a49ea..823c1ba456cd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -564,6 +564,9 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_ return err; esw_qos_normalize_min_rate(parent->esw, parent, extack); + trace_mlx5_esw_vport_qos_create(vport->dev, vport, + vport->qos.sched_node->max_rate, + vport->qos.sched_node->bw_share); return 0; } -- GitLab From 2f5a6014eb168a97b24153adccfa663d3b282767 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 25 Feb 2025 09:26:08 +0200 Subject: [PATCH 1109/1585] net/mlx5: IRQ, Fix null string in debug print irq_pool_alloc() debug print can print a null string. Fix it by providing a default string to print. Fixes: 71e084e26414 ("net/mlx5: Allocating a pool of MSI-X vectors for SFs") Signed-off-by: Shay Drory Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501141055.SwfIphN0-lkp@intel.com/ Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Reviewed-by: Kalesh AP Link: https://patch.msgid.link/20250225072608.526866-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 7db9cab9bedf6..d9362eabc6a1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -572,7 +572,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", - name, size, start); + name ? name : "mlx5_pcif_pool", size, start); return pool; } -- GitLab From e521f516716de7895acd1b5b7fac788214a390b9 Mon Sep 17 00:00:00 2001 From: Caleb Connolly Date: Sat, 8 Feb 2025 22:30:54 +0000 Subject: [PATCH 1110/1585] dmaengine: Revert "dmaengine: qcom: bam_dma: Avoid writing unavailable register" This commit causes a hard crash on sdm845 and likely other platforms. Revert it until a proper fix is found. This reverts commit 57a7138d0627: ("dmaengine: qcom: bam_dma: Avoid writing unavailable register") Signed-off-by: Caleb Connolly Fixes: 57a7138d0627 ("dmaengine: qcom: bam_dma: Avoid writing unavailable register") Tested-by: Neil Armstrong # on sdm845-DB845c Tested-by: David Heidelberg Link: https://lore.kernel.org/r/20250208223112.142567-1-caleb.connolly@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/bam_dma.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index c14557efd5770..bbc3276992bb0 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -59,9 +59,6 @@ struct bam_desc_hw { #define DESC_FLAG_NWD BIT(12) #define DESC_FLAG_CMD BIT(11) -#define BAM_NDP_REVISION_START 0x20 -#define BAM_NDP_REVISION_END 0x27 - struct bam_async_desc { struct virt_dma_desc vd; @@ -401,7 +398,6 @@ struct bam_device { /* dma start transaction tasklet */ struct tasklet_struct task; - u32 bam_revision; }; /** @@ -445,10 +441,8 @@ static void bam_reset(struct bam_device *bdev) writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL)); /* set descriptor threshold, start with 4 bytes */ - if (in_range(bdev->bam_revision, BAM_NDP_REVISION_START, - BAM_NDP_REVISION_END)) - writel_relaxed(DEFAULT_CNT_THRSHLD, - bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + writel_relaxed(DEFAULT_CNT_THRSHLD, + bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); /* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */ writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS)); @@ -1006,10 +1000,9 @@ static void bam_apply_new_config(struct bam_chan *bchan, maxburst = bchan->slave.src_maxburst; else maxburst = bchan->slave.dst_maxburst; - if (in_range(bdev->bam_revision, BAM_NDP_REVISION_START, - BAM_NDP_REVISION_END)) - writel_relaxed(maxburst, - bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + + writel_relaxed(maxburst, + bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); } bchan->reconfigure = 0; @@ -1199,11 +1192,10 @@ static int bam_init(struct bam_device *bdev) u32 val; /* read revision and configuration information */ - val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)); - if (!bdev->num_ees) + if (!bdev->num_ees) { + val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)); bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK; - - bdev->bam_revision = val & REVISION_MASK; + } /* check that configured EE is within range */ if (bdev->ee >= bdev->num_ees) -- GitLab From 3603996432997f7c88da37a97062a46cda01ac9d Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 11 Dec 2024 10:06:28 +0100 Subject: [PATCH 1111/1585] drm/fbdev-dma: Add shadow buffering for deferred I/O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DMA areas are not necessarily backed by struct page, so we cannot rely on it for deferred I/O. Allocate a shadow buffer for drivers that require deferred I/O and use it as framebuffer memory. Fixes driver errors about being "Unable to handle kernel NULL pointer dereference at virtual address" or "Unable to handle kernel paging request at virtual address". The patch splits drm_fbdev_dma_driver_fbdev_probe() in an initial allocation, which creates the DMA-backed buffer object, and a tail that sets up the fbdev data structures. There is a tail function for direct memory mappings and a tail function for deferred I/O with the shadow buffer. It is no longer possible to use deferred I/O without shadow buffer. It can be re-added if there exists a reliably test for usable struct page in the allocated DMA-backed buffer object. Signed-off-by: Thomas Zimmermann Reported-by: Nuno Gonçalves CLoses: https://lore.kernel.org/dri-devel/CAEXMXLR55DziAMbv_+2hmLeH-jP96pmit6nhs6siB22cpQFr9w@mail.gmail.com/ Tested-by: Nuno Gonçalves Fixes: 5ab91447aa13 ("drm/tiny/ili9225: Use fbdev-dma") Cc: Thomas Zimmermann Cc: # v6.11+ Reviewed-by: Simona Vetter Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20241211090643.74250-1-tzimmermann@suse.de --- drivers/gpu/drm/drm_fbdev_dma.c | 217 +++++++++++++++++++++++--------- 1 file changed, 155 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c index b14b581c059d3..02a516e771927 100644 --- a/drivers/gpu/drm/drm_fbdev_dma.c +++ b/drivers/gpu/drm/drm_fbdev_dma.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: MIT #include +#include #include #include @@ -70,37 +71,102 @@ static const struct fb_ops drm_fbdev_dma_fb_ops = { .fb_destroy = drm_fbdev_dma_fb_destroy, }; -FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma, +FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma_shadowed, drm_fb_helper_damage_range, drm_fb_helper_damage_area); -static int drm_fbdev_dma_deferred_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) +static void drm_fbdev_dma_shadowed_fb_destroy(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; - struct drm_framebuffer *fb = fb_helper->fb; - struct drm_gem_dma_object *dma = drm_fb_dma_get_gem_obj(fb, 0); + void *shadow = info->screen_buffer; + + if (!fb_helper->dev) + return; - if (!dma->map_noncoherent) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (info->fbdefio) + fb_deferred_io_cleanup(info); + drm_fb_helper_fini(fb_helper); + vfree(shadow); - return fb_deferred_io_mmap(info, vma); + drm_client_buffer_vunmap(fb_helper->buffer); + drm_client_framebuffer_delete(fb_helper->buffer); + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); } -static const struct fb_ops drm_fbdev_dma_deferred_fb_ops = { +static const struct fb_ops drm_fbdev_dma_shadowed_fb_ops = { .owner = THIS_MODULE, .fb_open = drm_fbdev_dma_fb_open, .fb_release = drm_fbdev_dma_fb_release, - __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_dma), + FB_DEFAULT_DEFERRED_OPS(drm_fbdev_dma_shadowed), DRM_FB_HELPER_DEFAULT_OPS, - __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_dma), - .fb_mmap = drm_fbdev_dma_deferred_fb_mmap, - .fb_destroy = drm_fbdev_dma_fb_destroy, + .fb_destroy = drm_fbdev_dma_shadowed_fb_destroy, }; /* * struct drm_fb_helper */ +static void drm_fbdev_dma_damage_blit_real(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip, + struct iosys_map *dst) +{ + struct drm_framebuffer *fb = fb_helper->fb; + size_t offset = clip->y1 * fb->pitches[0]; + size_t len = clip->x2 - clip->x1; + unsigned int y; + void *src; + + switch (drm_format_info_bpp(fb->format, 0)) { + case 1: + offset += clip->x1 / 8; + len = DIV_ROUND_UP(len + clip->x1 % 8, 8); + break; + case 2: + offset += clip->x1 / 4; + len = DIV_ROUND_UP(len + clip->x1 % 4, 4); + break; + case 4: + offset += clip->x1 / 2; + len = DIV_ROUND_UP(len + clip->x1 % 2, 2); + break; + default: + offset += clip->x1 * fb->format->cpp[0]; + len *= fb->format->cpp[0]; + break; + } + + src = fb_helper->info->screen_buffer + offset; + iosys_map_incr(dst, offset); /* go to first pixel within clip rect */ + + for (y = clip->y1; y < clip->y2; y++) { + iosys_map_memcpy_to(dst, 0, src, len); + iosys_map_incr(dst, fb->pitches[0]); + src += fb->pitches[0]; + } +} + +static int drm_fbdev_dma_damage_blit(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip) +{ + struct drm_client_buffer *buffer = fb_helper->buffer; + struct iosys_map dst; + + /* + * For fbdev emulation, we only have to protect against fbdev modeset + * operations. Nothing else will involve the client buffer's BO. So it + * is sufficient to acquire struct drm_fb_helper.lock here. + */ + mutex_lock(&fb_helper->lock); + + dst = buffer->map; + drm_fbdev_dma_damage_blit_real(fb_helper, clip, &dst); + + mutex_unlock(&fb_helper->lock); + + return 0; +} static int drm_fbdev_dma_helper_fb_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip) { @@ -112,6 +178,10 @@ static int drm_fbdev_dma_helper_fb_dirty(struct drm_fb_helper *helper, return 0; if (helper->fb->funcs->dirty) { + ret = drm_fbdev_dma_damage_blit(helper, clip); + if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", ret)) + return ret; + ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) return ret; @@ -128,14 +198,80 @@ static const struct drm_fb_helper_funcs drm_fbdev_dma_helper_funcs = { * struct drm_fb_helper */ +static int drm_fbdev_dma_driver_fbdev_probe_tail(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_device *dev = fb_helper->dev; + struct drm_client_buffer *buffer = fb_helper->buffer; + struct drm_gem_dma_object *dma_obj = to_drm_gem_dma_obj(buffer->gem); + struct drm_framebuffer *fb = fb_helper->fb; + struct fb_info *info = fb_helper->info; + struct iosys_map map = buffer->map; + + info->fbops = &drm_fbdev_dma_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB; /* system memory */ + if (dma_obj->map_noncoherent) + info->flags |= FBINFO_READS_FAST; /* signal caching */ + info->screen_size = sizes->surface_height * fb->pitches[0]; + info->screen_buffer = map.vaddr; + if (!(info->flags & FBINFO_HIDE_SMEM_START)) { + if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) + info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); + } + info->fix.smem_len = info->screen_size; + + return 0; +} + +static int drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_client_buffer *buffer = fb_helper->buffer; + struct fb_info *info = fb_helper->info; + size_t screen_size = buffer->gem->size; + void *screen_buffer; + int ret; + + /* + * Deferred I/O requires struct page for framebuffer memory, + * which is not guaranteed for all DMA ranges. We thus create + * a shadow buffer in system memory. + */ + screen_buffer = vzalloc(screen_size); + if (!screen_buffer) + return -ENOMEM; + + info->fbops = &drm_fbdev_dma_shadowed_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB; /* system memory */ + info->flags |= FBINFO_READS_FAST; /* signal caching */ + info->screen_buffer = screen_buffer; + info->fix.smem_len = screen_size; + + fb_helper->fbdefio.delay = HZ / 20; + fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; + + info->fbdefio = &fb_helper->fbdefio; + ret = fb_deferred_io_init(info); + if (ret) + goto err_vfree; + + return 0; + +err_vfree: + vfree(screen_buffer); + return ret; +} + int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; - bool use_deferred_io = false; struct drm_client_buffer *buffer; - struct drm_gem_dma_object *dma_obj; struct drm_framebuffer *fb; struct fb_info *info; u32 format; @@ -152,19 +288,9 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, sizes->surface_height, format); if (IS_ERR(buffer)) return PTR_ERR(buffer); - dma_obj = to_drm_gem_dma_obj(buffer->gem); fb = buffer->fb; - /* - * Deferred I/O requires struct page for framebuffer memory, - * which is not guaranteed for all DMA ranges. We thus only - * install deferred I/O if we have a framebuffer that requires - * it. - */ - if (fb->funcs->dirty) - use_deferred_io = true; - ret = drm_client_buffer_vmap(buffer, &map); if (ret) { goto err_drm_client_buffer_delete; @@ -185,45 +311,12 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, drm_fb_helper_fill_info(info, fb_helper, sizes); - if (use_deferred_io) - info->fbops = &drm_fbdev_dma_deferred_fb_ops; + if (fb->funcs->dirty) + ret = drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(fb_helper, sizes); else - info->fbops = &drm_fbdev_dma_fb_ops; - - /* screen */ - info->flags |= FBINFO_VIRTFB; /* system memory */ - if (dma_obj->map_noncoherent) - info->flags |= FBINFO_READS_FAST; /* signal caching */ - info->screen_size = sizes->surface_height * fb->pitches[0]; - info->screen_buffer = map.vaddr; - if (!(info->flags & FBINFO_HIDE_SMEM_START)) { - if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) - info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); - } - info->fix.smem_len = info->screen_size; - - /* - * Only set up deferred I/O if the screen buffer supports - * it. If this disagrees with the previous test for ->dirty, - * mmap on the /dev/fb file might not work correctly. - */ - if (!is_vmalloc_addr(info->screen_buffer) && info->fix.smem_start) { - unsigned long pfn = info->fix.smem_start >> PAGE_SHIFT; - - if (drm_WARN_ON(dev, !pfn_to_page(pfn))) - use_deferred_io = false; - } - - /* deferred I/O */ - if (use_deferred_io) { - fb_helper->fbdefio.delay = HZ / 20; - fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; - - info->fbdefio = &fb_helper->fbdefio; - ret = fb_deferred_io_init(info); - if (ret) - goto err_drm_fb_helper_release_info; - } + ret = drm_fbdev_dma_driver_fbdev_probe_tail(fb_helper, sizes); + if (ret) + goto err_drm_fb_helper_release_info; return 0; -- GitLab From 6d48ad04075729519f6baaa1dc9e5a3a39d05f53 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Wed, 26 Feb 2025 21:28:41 +0800 Subject: [PATCH 1112/1585] MIPS: Ignore relocs against __ex_table for relocatable kernel Since commit 6f2c2f93a190 ("scripts/sorttable: Remove unneeded Elf_Rel"), sorttable no longer clears relocs against __ex_table, claiming "it was never used." But in fact MIPS relocatable kernel had been implicitly depending on this behavior, so after this commit the MIPS relocatable kernel has started to spit oops like: CPU 1 Unable to handle kernel paging request at virtual address 000000fffbbdbff8, epc == ffffffff818f9a6c, ra == ffffffff813ad7d0 ... ... Call Trace: [] __raw_copy_from_user+0x48/0x2fc [] cp_statx+0x1a0/0x1e0 [] do_statx_fd+0xa8/0x118 [] sys_statx+0xd8/0xf8 [] syscall_common+0x34/0x58 So ignore those relocs on our own to fix the issue. Fixes: 6f2c2f93a190 ("scripts/sorttable: Remove unneeded Elf_Rel") Signed-off-by: Xi Ruoyao Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/tools/relocs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/mips/boot/tools/relocs.c b/arch/mips/boot/tools/relocs.c index a88d66c46d7f7..9863e1d5c62e3 100644 --- a/arch/mips/boot/tools/relocs.c +++ b/arch/mips/boot/tools/relocs.c @@ -468,6 +468,8 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, const char *symname)) { int i; + struct section *extab_sec = sec_lookup("__ex_table"); + int extab_index = extab_sec ? extab_sec - secs : -1; /* Walk through the relocations */ for (i = 0; i < ehdr.e_shnum; i++) { @@ -480,6 +482,9 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, if (sec->shdr.sh_type != SHT_REL_TYPE) continue; + if (sec->shdr.sh_info == extab_index) + continue; + sec_symtab = sec->link; sec_applies = &secs[sec->shdr.sh_info]; if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) -- GitLab From fc20737d8b85691ecabab3739ed7d06c9b7bc00f Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 26 Feb 2025 16:48:26 -0500 Subject: [PATCH 1113/1585] efivarfs: allow creation of zero length files Temporarily allow the creation of zero length files in efivarfs so the 'fwupd' user space firmware update tool can continue to operate. This hack should be reverted as soon as the fwupd mechanisms for updating firmware have been fixed. fwupd has been coded to open a firmware file, close it, remove the immutable bit and write to it. Since commit 908af31f4896 ("efivarfs: fix error on write to new variable leaving remnants") this behaviour results in the first close removing the file which causes the second write to fail. To allow fwupd to keep working code up an indicator of size 1 if a write fails and only remove the file on that condition (so create at zero size is allowed). Tested-by: Richard Hughes Signed-off-by: James Bottomley [ardb: replace LVFS with fwupd, as suggested by Richard] Signed-off-by: Ard Biesheuvel --- fs/efivarfs/file.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index cb1b6d0c34545..c294a8fc566da 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -57,10 +57,11 @@ static ssize_t efivarfs_file_write(struct file *file, if (bytes == -ENOENT) { /* - * zero size signals to release that the write deleted - * the variable + * FIXME: temporary workaround for fwupdate, signal + * failed write with a 1 to keep created but not + * written files */ - i_size_write(inode, 0); + i_size_write(inode, 1); } else { i_size_write(inode, datasize + sizeof(attributes)); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); @@ -124,7 +125,8 @@ static int efivarfs_file_release(struct inode *inode, struct file *file) struct efivar_entry *var = inode->i_private; inode_lock(inode); - var->removed = (--var->open_count == 0 && i_size_read(inode) == 0); + /* FIXME: temporary work around for fwupdate */ + var->removed = (--var->open_count == 0 && i_size_read(inode) == 1); inode_unlock(inode); if (var->removed) -- GitLab From 1cf9631d836b289bd5490776551961c883ae8a4f Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Mon, 24 Feb 2025 20:29:17 +0300 Subject: [PATCH 1114/1585] usbnet: gl620a: fix endpoint checking in genelink_bind() Syzbot reports [1] a warning in usb_submit_urb() triggered by inconsistencies between expected and actually present endpoints in gl620a driver. Since genelink_bind() does not properly verify whether specified eps are in fact provided by the device, in this case, an artificially manufactured one, one may get a mismatch. Fix the issue by resorting to a usbnet utility function usbnet_get_endpoints(), usually reserved for this very problem. Check for endpoints and return early before proceeding further if any are missing. [1] Syzbot report: usb 5-1: Manufacturer: syz usb 5-1: SerialNumber: syz usb 5-1: config 0 descriptor?? gl620a 5-1:0.23 usb0: register 'gl620a' at usb-dummy_hcd.0-1, ... ------------[ cut here ]------------ usb 5-1: BOGUS urb xfer, pipe 3 != type 1 WARNING: CPU: 2 PID: 1841 at drivers/usb/core/urb.c:503 usb_submit_urb+0xe4b/0x1730 drivers/usb/core/urb.c:503 Modules linked in: CPU: 2 UID: 0 PID: 1841 Comm: kworker/2:2 Not tainted 6.12.0-syzkaller-07834-g06afb0f36106 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Workqueue: mld mld_ifc_work RIP: 0010:usb_submit_urb+0xe4b/0x1730 drivers/usb/core/urb.c:503 ... Call Trace: usbnet_start_xmit+0x6be/0x2780 drivers/net/usb/usbnet.c:1467 __netdev_start_xmit include/linux/netdevice.h:5002 [inline] netdev_start_xmit include/linux/netdevice.h:5011 [inline] xmit_one net/core/dev.c:3590 [inline] dev_hard_start_xmit+0x9a/0x7b0 net/core/dev.c:3606 sch_direct_xmit+0x1ae/0xc30 net/sched/sch_generic.c:343 __dev_xmit_skb net/core/dev.c:3827 [inline] __dev_queue_xmit+0x13d4/0x43e0 net/core/dev.c:4400 dev_queue_xmit include/linux/netdevice.h:3168 [inline] neigh_resolve_output net/core/neighbour.c:1514 [inline] neigh_resolve_output+0x5bc/0x950 net/core/neighbour.c:1494 neigh_output include/net/neighbour.h:539 [inline] ip6_finish_output2+0xb1b/0x2070 net/ipv6/ip6_output.c:141 __ip6_finish_output net/ipv6/ip6_output.c:215 [inline] ip6_finish_output+0x3f9/0x1360 net/ipv6/ip6_output.c:226 NF_HOOK_COND include/linux/netfilter.h:303 [inline] ip6_output+0x1f8/0x540 net/ipv6/ip6_output.c:247 dst_output include/net/dst.h:450 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] mld_sendpack+0x9f0/0x11d0 net/ipv6/mcast.c:1819 mld_send_cr net/ipv6/mcast.c:2120 [inline] mld_ifc_work+0x740/0xca0 net/ipv6/mcast.c:2651 process_one_work+0x9c5/0x1ba0 kernel/workqueue.c:3229 process_scheduled_works kernel/workqueue.c:3310 [inline] worker_thread+0x6c8/0xf00 kernel/workqueue.c:3391 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Reported-by: syzbot+d693c07c6f647e0388d3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d693c07c6f647e0388d3 Fixes: 47ee3051c856 ("[PATCH] USB: usbnet (5/9) module for genesys gl620a cables") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Link: https://patch.msgid.link/20250224172919.1220522-1-n.zhandarovich@fintech.ru Signed-off-by: Paolo Abeni --- drivers/net/usb/gl620a.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/usb/gl620a.c b/drivers/net/usb/gl620a.c index 46af78caf457a..0bfa37c140591 100644 --- a/drivers/net/usb/gl620a.c +++ b/drivers/net/usb/gl620a.c @@ -179,9 +179,7 @@ static int genelink_bind(struct usbnet *dev, struct usb_interface *intf) { dev->hard_mtu = GL_RCV_BUF_SIZE; dev->net->hard_header_len += 4; - dev->in = usb_rcvbulkpipe(dev->udev, dev->driver_info->in); - dev->out = usb_sndbulkpipe(dev->udev, dev->driver_info->out); - return 0; + return usbnet_get_endpoints(dev, intf); } static const struct driver_info genelink_info = { -- GitLab From c64a0727f9b1cbc63a5538c8c0014e9a175ad864 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 25 Feb 2025 18:51:38 +0100 Subject: [PATCH 1115/1585] net: ipv6: fix dst ref loop on input in seg6 lwt Prevent a dst ref loop on input in seg6_iptunnel. Fixes: af4a2209b134 ("ipv6: sr: use dst_cache in seg6_input") Cc: David Lebrun Cc: Ido Schimmel Reviewed-by: Ido Schimmel Signed-off-by: Justin Iurman Signed-off-by: Paolo Abeni --- net/ipv6/seg6_iptunnel.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 33833b2064c07..51583461ae29b 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -472,10 +472,18 @@ static int seg6_input_core(struct net *net, struct sock *sk, { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; + struct lwtunnel_state *lwtst; struct seg6_lwt *slwt; int err; - slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + /* We cannot dereference "orig_dst" once ip6_route_input() or + * skb_dst_drop() is called. However, in order to detect a dst loop, we + * need the address of its lwtstate. So, save the address of lwtstate + * now and use it later as a comparison. + */ + lwtst = orig_dst->lwtstate; + + slwt = seg6_lwt_lwtunnel(lwtst); local_bh_disable(); dst = dst_cache_get(&slwt->cache); @@ -490,7 +498,9 @@ static int seg6_input_core(struct net *net, struct sock *sk, if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); - if (!dst->error) { + + /* cache only if we don't create a dst reference loop */ + if (!dst->error && lwtst != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&slwt->cache, dst, &ipv6_hdr(skb)->saddr); -- GitLab From 13e55fbaec176119cff68a7e1693b251c8883c5f Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 25 Feb 2025 18:51:39 +0100 Subject: [PATCH 1116/1585] net: ipv6: fix dst ref loop on input in rpl lwt Prevent a dst ref loop on input in rpl_iptunnel. Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel") Cc: Alexander Aring Cc: Ido Schimmel Reviewed-by: Ido Schimmel Signed-off-by: Justin Iurman Signed-off-by: Paolo Abeni --- net/ipv6/rpl_iptunnel.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 0ac4283acdf20..7c05ac846646f 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -262,10 +262,18 @@ static int rpl_input(struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; + struct lwtunnel_state *lwtst; struct rpl_lwt *rlwt; int err; - rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); + /* We cannot dereference "orig_dst" once ip6_route_input() or + * skb_dst_drop() is called. However, in order to detect a dst loop, we + * need the address of its lwtstate. So, save the address of lwtstate + * now and use it later as a comparison. + */ + lwtst = orig_dst->lwtstate; + + rlwt = rpl_lwt_lwtunnel(lwtst); local_bh_disable(); dst = dst_cache_get(&rlwt->cache); @@ -280,7 +288,9 @@ static int rpl_input(struct sk_buff *skb) if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); - if (!dst->error) { + + /* cache only if we don't create a dst reference loop */ + if (!dst->error && lwtst != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&rlwt->cache, dst, &ipv6_hdr(skb)->saddr); -- GitLab From 1cbddbddee68d17feb6467fc556c144777af91ef Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 26 Feb 2025 18:19:57 +0000 Subject: [PATCH 1117/1585] selftests: drv-net: Check if combined-count exists Some drivers, like tg3, do not set combined-count: $ ethtool -l enp4s0f1 Channel parameters for enp4s0f1: Pre-set maximums: RX: 4 TX: 4 Other: n/a Combined: n/a Current hardware settings: RX: 4 TX: 1 Other: n/a Combined: n/a In the case where combined-count is not set, the ethtool netlink code in the kernel elides the value and the code in the test: netnl.channels_get(...) With a tg3 device, the returned dictionary looks like: {'header': {'dev-index': 3, 'dev-name': 'enp4s0f1'}, 'rx-max': 4, 'rx-count': 4, 'tx-max': 4, 'tx-count': 1} Note that the key 'combined-count' is missing. As a result of this missing key the test raises an exception: # Exception| if channels['combined-count'] == 0: # Exception| ~~~~~~~~^^^^^^^^^^^^^^^^^^ # Exception| KeyError: 'combined-count' Change the test to check if 'combined-count' is a key in the dictionary first and if not assume that this means the driver has separate RX and TX queues. With this change, the test now passes successfully on tg3 and mlx5 (which does have a 'combined-count'). Fixes: 1cf270424218 ("net: selftest: add test for netdev netlink queue-get API") Signed-off-by: Joe Damato Reviewed-by: David Wei Link: https://patch.msgid.link/20250226181957.212189-1-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/queues.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 38303da957ee5..8a518905a9f9c 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -45,10 +45,9 @@ def addremove_queues(cfg, nl) -> None: netnl = EthtoolFamily() channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) - if channels['combined-count'] == 0: - rx_type = 'rx' - else: - rx_type = 'combined' + rx_type = 'rx' + if channels.get('combined-count', 0) > 0: + rx_type = 'combined' expected = curr_queues - 1 cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10) -- GitLab From 674fcb4f4a7e3e277417a01788cc6daae47c3804 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Feb 2025 22:12:52 +0000 Subject: [PATCH 1118/1585] idpf: fix checksums set in idpf_rx_rsc() idpf_rx_rsc() uses skb_transport_offset(skb) while the transport header is not set yet. This triggers the following warning for CONFIG_DEBUG_NET=y builds. DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)) [ 69.261620] WARNING: CPU: 7 PID: 0 at ./include/linux/skbuff.h:3020 idpf_vport_splitq_napi_poll (include/linux/skbuff.h:3020) idpf [ 69.261629] Modules linked in: vfat fat dummy bridge intel_uncore_frequency_tpmi intel_uncore_frequency_common intel_vsec_tpmi idpf intel_vsec cdc_ncm cdc_eem cdc_ether usbnet mii xhci_pci xhci_hcd ehci_pci ehci_hcd libeth [ 69.261644] CPU: 7 UID: 0 PID: 0 Comm: swapper/7 Tainted: G S W 6.14.0-smp-DEV #1697 [ 69.261648] Tainted: [S]=CPU_OUT_OF_SPEC, [W]=WARN [ 69.261650] RIP: 0010:idpf_vport_splitq_napi_poll (include/linux/skbuff.h:3020) idpf [ 69.261677] ? __warn (kernel/panic.c:242 kernel/panic.c:748) [ 69.261682] ? idpf_vport_splitq_napi_poll (include/linux/skbuff.h:3020) idpf [ 69.261687] ? report_bug (lib/bug.c:?) [ 69.261690] ? handle_bug (arch/x86/kernel/traps.c:285) [ 69.261694] ? exc_invalid_op (arch/x86/kernel/traps.c:309) [ 69.261697] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:621) [ 69.261700] ? __pfx_idpf_vport_splitq_napi_poll (drivers/net/ethernet/intel/idpf/idpf_txrx.c:4011) idpf [ 69.261704] ? idpf_vport_splitq_napi_poll (include/linux/skbuff.h:3020) idpf [ 69.261708] ? idpf_vport_splitq_napi_poll (drivers/net/ethernet/intel/idpf/idpf_txrx.c:3072) idpf [ 69.261712] __napi_poll (net/core/dev.c:7194) [ 69.261716] net_rx_action (net/core/dev.c:7265) [ 69.261718] ? __qdisc_run (net/sched/sch_generic.c:293) [ 69.261721] ? sched_clock (arch/x86/include/asm/preempt.h:84 arch/x86/kernel/tsc.c:288) [ 69.261726] handle_softirqs (kernel/softirq.c:561) Fixes: 3a8845af66edb ("idpf: add RX splitq napi poll support") Signed-off-by: Eric Dumazet Cc: Alan Brady Cc: Joshua Hay Cc: Willem de Bruijn Acked-by: Przemek Kitszel Link: https://patch.msgid.link/20250226221253.1927782-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 9be6a6b59c4e1..977741c414980 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3013,7 +3013,6 @@ static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb, skb_shinfo(skb)->gso_size = rsc_seg_len; skb_reset_network_header(skb); - len = skb->len - skb_transport_offset(skb); if (ipv4) { struct iphdr *ipv4h = ip_hdr(skb); @@ -3022,6 +3021,7 @@ static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb, /* Reset and set transport header offset in skb */ skb_set_transport_header(skb, sizeof(struct iphdr)); + len = skb->len - skb_transport_offset(skb); /* Compute the TCP pseudo header checksum*/ tcp_hdr(skb)->check = @@ -3031,6 +3031,7 @@ static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb, skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + len = skb->len - skb_transport_offset(skb); tcp_hdr(skb)->check = ~tcp_v6_check(len, &ipv6h->saddr, &ipv6h->daddr, 0); } -- GitLab From 54e1b4becf5e220be03db4e1be773c1310e8cbbd Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Thu, 27 Feb 2025 14:54:41 +0530 Subject: [PATCH 1119/1585] net: ti: icss-iep: Reject perout generation request IEP driver supports both perout and pps signal generation but perout feature is faulty with half-cooked support due to some missing configuration. Remove perout support from the driver and reject perout requests with "not supported" error code. Fixes: c1e0230eeaab2 ("net: ti: icss-iep: Add IEP driver") Signed-off-by: Meghana Malladi Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250227092441.1848419-1-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icss_iep.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index 768578c0d9587..d59c1744840af 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -474,26 +474,7 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, static int icss_iep_perout_enable(struct icss_iep *iep, struct ptp_perout_request *req, int on) { - int ret = 0; - - mutex_lock(&iep->ptp_clk_mutex); - - if (iep->pps_enabled) { - ret = -EBUSY; - goto exit; - } - - if (iep->perout_enabled == !!on) - goto exit; - - ret = icss_iep_perout_enable_hw(iep, req, on); - if (!ret) - iep->perout_enabled = !!on; - -exit: - mutex_unlock(&iep->ptp_clk_mutex); - - return ret; + return -EOPNOTSUPP; } static void icss_iep_cap_cmp_work(struct work_struct *work) -- GitLab From 2b90e7ace79774a3540ce569e000388f8d22c9e0 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Wed, 26 Feb 2025 15:18:39 -0500 Subject: [PATCH 1120/1585] efi: Don't map the entire mokvar table to determine its size Currently, when validating the mokvar table, we (re)map the entire table on each iteration of the loop, adding space as we discover new entries. If the table grows over a certain size, this fails due to limitations of early_memmap(), and we get a failure and traceback: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at mm/early_ioremap.c:139 __early_ioremap+0xef/0x220 ... Call Trace: ? __early_ioremap+0xef/0x220 ? __warn.cold+0x93/0xfa ? __early_ioremap+0xef/0x220 ? report_bug+0xff/0x140 ? early_fixup_exception+0x5d/0xb0 ? early_idt_handler_common+0x2f/0x3a ? __early_ioremap+0xef/0x220 ? efi_mokvar_table_init+0xce/0x1d0 ? setup_arch+0x864/0xc10 ? start_kernel+0x6b/0xa10 ? x86_64_start_reservations+0x24/0x30 ? x86_64_start_kernel+0xed/0xf0 ? common_startup_64+0x13e/0x141 ---[ end trace 0000000000000000 ]--- mokvar: Failed to map EFI MOKvar config table pa=0x7c4c3000, size=265187. Mapping the entire structure isn't actually necessary, as we don't ever need more than one entry header mapped at once. Changes efi_mokvar_table_init() to only map each entry header, not the entire table, when determining the table size. Since we're not mapping any data past the variable name, it also changes the code to enforce that each variable name is NUL terminated, rather than attempting to verify it in place. Cc: Signed-off-by: Peter Jones Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/mokvar-table.c | 41 +++++++++-------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index 5ed0602c2f75f..d865cb1dbaad1 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -103,7 +103,6 @@ void __init efi_mokvar_table_init(void) void *va = NULL; unsigned long cur_offset = 0; unsigned long offset_limit; - unsigned long map_size = 0; unsigned long map_size_needed = 0; unsigned long size; struct efi_mokvar_table_entry *mokvar_entry; @@ -134,48 +133,34 @@ void __init efi_mokvar_table_init(void) */ err = -EINVAL; while (cur_offset + sizeof(*mokvar_entry) <= offset_limit) { - mokvar_entry = va + cur_offset; - map_size_needed = cur_offset + sizeof(*mokvar_entry); - if (map_size_needed > map_size) { - if (va) - early_memunmap(va, map_size); - /* - * Map a little more than the fixed size entry - * header, anticipating some data. It's safe to - * do so as long as we stay within current memory - * descriptor. - */ - map_size = min(map_size_needed + 2*EFI_PAGE_SIZE, - offset_limit); - va = early_memremap(efi.mokvar_table, map_size); - if (!va) { - pr_err("Failed to map EFI MOKvar config table pa=0x%lx, size=%lu.\n", - efi.mokvar_table, map_size); - return; - } - mokvar_entry = va + cur_offset; + if (va) + early_memunmap(va, sizeof(*mokvar_entry)); + va = early_memremap(efi.mokvar_table + cur_offset, sizeof(*mokvar_entry)); + if (!va) { + pr_err("Failed to map EFI MOKvar config table pa=0x%lx, size=%zu.\n", + efi.mokvar_table + cur_offset, sizeof(*mokvar_entry)); + return; } + mokvar_entry = va; /* Check for last sentinel entry */ if (mokvar_entry->name[0] == '\0') { if (mokvar_entry->data_size != 0) break; err = 0; + map_size_needed = cur_offset + sizeof(*mokvar_entry); break; } - /* Sanity check that the name is null terminated */ - size = strnlen(mokvar_entry->name, - sizeof(mokvar_entry->name)); - if (size >= sizeof(mokvar_entry->name)) - break; + /* Enforce that the name is NUL terminated */ + mokvar_entry->name[sizeof(mokvar_entry->name) - 1] = '\0'; /* Advance to the next entry */ - cur_offset = map_size_needed + mokvar_entry->data_size; + cur_offset += sizeof(*mokvar_entry) + mokvar_entry->data_size; } if (va) - early_memunmap(va, map_size); + early_memunmap(va, sizeof(*mokvar_entry)); if (err) { pr_err("EFI MOKvar config table is not valid\n"); return; -- GitLab From e3cf2d91d0583cae70aeb512da87e3ade25ea912 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Feb 2025 14:30:22 +0100 Subject: [PATCH 1121/1585] efi/mokvar-table: Avoid repeated map/unmap of the same page Tweak the logic that traverses the MOKVAR UEFI configuration table to only unmap the entry header and map the next one if they don't live in the same physical page. Link: https://lore.kernel.org/all/8f085931-3e9d-4386-9209-1d6c95616327@uncooperative.org/ Tested-By: Peter Jones Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/mokvar-table.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index d865cb1dbaad1..208db29613c63 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -99,13 +99,13 @@ static struct kobject *mokvar_kobj; */ void __init efi_mokvar_table_init(void) { + struct efi_mokvar_table_entry __aligned(1) *mokvar_entry, *next_entry; efi_memory_desc_t md; void *va = NULL; unsigned long cur_offset = 0; unsigned long offset_limit; unsigned long map_size_needed = 0; unsigned long size; - struct efi_mokvar_table_entry *mokvar_entry; int err; if (!efi_enabled(EFI_MEMMAP)) @@ -142,7 +142,7 @@ void __init efi_mokvar_table_init(void) return; } mokvar_entry = va; - +next: /* Check for last sentinel entry */ if (mokvar_entry->name[0] == '\0') { if (mokvar_entry->data_size != 0) @@ -156,7 +156,19 @@ void __init efi_mokvar_table_init(void) mokvar_entry->name[sizeof(mokvar_entry->name) - 1] = '\0'; /* Advance to the next entry */ - cur_offset += sizeof(*mokvar_entry) + mokvar_entry->data_size; + size = sizeof(*mokvar_entry) + mokvar_entry->data_size; + cur_offset += size; + + /* + * Don't bother remapping if the current entry header and the + * next one end on the same page. + */ + next_entry = (void *)((unsigned long)mokvar_entry + size); + if (((((unsigned long)(mokvar_entry + 1) - 1) ^ + ((unsigned long)(next_entry + 1) - 1)) & PAGE_MASK) == 0) { + mokvar_entry = next_entry; + goto next; + } } if (va) -- GitLab From 02410ac72ac3707936c07ede66e94360d0d65319 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 26 Feb 2025 12:06:51 +0000 Subject: [PATCH 1122/1585] mm: hugetlb: Add huge page size param to huge_ptep_get_and_clear() In order to fix a bug, arm64 needs to be told the size of the huge page for which the huge_pte is being cleared in huge_ptep_get_and_clear(). Provide for this by adding an `unsigned long sz` parameter to the function. This follows the same pattern as huge_pte_clear() and set_huge_pte_at(). This commit makes the required interface modifications to the core mm as well as all arches that implement this function (arm64, loongarch, mips, parisc, powerpc, riscv, s390, sparc). The actual arm64 bug will be fixed in a separate commit. Cc: stable@vger.kernel.org Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit") Acked-by: David Hildenbrand Reviewed-by: Alexandre Ghiti # riscv Reviewed-by: Christophe Leroy Reviewed-by: Catalin Marinas Reviewed-by: Anshuman Khandual Signed-off-by: Ryan Roberts Acked-by: Alexander Gordeev # s390 Link: https://lore.kernel.org/r/20250226120656.2400136-2-ryan.roberts@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/hugetlb.h | 4 ++-- arch/arm64/mm/hugetlbpage.c | 8 +++++--- arch/loongarch/include/asm/hugetlb.h | 6 ++++-- arch/mips/include/asm/hugetlb.h | 6 ++++-- arch/parisc/include/asm/hugetlb.h | 2 +- arch/parisc/mm/hugetlbpage.c | 2 +- arch/powerpc/include/asm/hugetlb.h | 6 ++++-- arch/riscv/include/asm/hugetlb.h | 3 ++- arch/riscv/mm/hugetlbpage.c | 2 +- arch/s390/include/asm/hugetlb.h | 16 ++++++++++++---- arch/s390/mm/hugetlbpage.c | 4 ++-- arch/sparc/include/asm/hugetlb.h | 2 +- arch/sparc/mm/hugetlbpage.c | 2 +- include/asm-generic/hugetlb.h | 2 +- include/linux/hugetlb.h | 4 +++- mm/hugetlb.c | 4 ++-- 16 files changed, 46 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index c6dff3e69539b..03db9cb21ace8 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -42,8 +42,8 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT extern void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 98a2a0e64e255..06db4649af916 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -396,8 +396,8 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr, __pte_clear(mm, addr, ptep); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) { int ncontig; size_t pgsize; @@ -549,6 +549,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + unsigned long psize = huge_page_size(hstate_vma(vma)); + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* * Break-before-make (BBM) is required for all user space mappings @@ -558,7 +560,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr if (pte_user_exec(__ptep_get(ptep))) return huge_ptep_clear_flush(vma, addr, ptep); } - return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index c8e4057734d0d..4dc4b3e04225f 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -36,7 +36,8 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { pte_t clear; pte_t pte = ptep_get(ptep); @@ -51,8 +52,9 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_tlb_page(vma, addr); return pte; } diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index d0a86ce83de91..fbc71ddcf0f68 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -27,7 +27,8 @@ static inline int prepare_hugepage_range(struct file *file, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { pte_t clear; pte_t pte = *ptep; @@ -42,13 +43,14 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); /* * clear the huge pte entry firstly, so that the other smp threads will * not get old pte entry after finishing flush_tlb_page and before * setting new huge pte entry */ - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_tlb_page(vma, addr); return pte; } diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h index 5b3a5429f71b3..21e9ace177395 100644 --- a/arch/parisc/include/asm/hugetlb.h +++ b/arch/parisc/include/asm/hugetlb.h @@ -10,7 +10,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index e9d18cf25b792..a94fe546d434f 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -126,7 +126,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { pte_t entry; diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index dad2e7980f245..86326587e58de 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -45,7 +45,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1)); } @@ -55,8 +56,9 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_hugetlb_page(vma, addr); return pte; } diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index faf3624d80577..4461264977684 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -28,7 +28,8 @@ void set_huge_pte_at(struct mm_struct *mm, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); + unsigned long addr, pte_t *ptep, + unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 42314f0939220..b4a78a4b35cff 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -293,7 +293,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { pte_t orig_pte = ptep_get(ptep); int pte_num; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 7c52acaf9f828..663e87220e89f 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -25,8 +25,16 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned long sz) +{ + return __huge_ptep_get_and_clear(mm, addr, ptep); +} static inline void arch_clear_hugetlb_flags(struct folio *folio) { @@ -48,7 +56,7 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, address, ptep); + return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep); } #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS @@ -59,7 +67,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte); if (changed) { - huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + __huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } return changed; @@ -69,7 +77,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); + pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep); __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index d9ce199953de9..2e568f175cd41 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -188,8 +188,8 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) return __rste_to_pte(pte_val(*ptep)); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get(mm, addr, ptep); pmd_t *pmdp = (pmd_t *) ptep; diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index c714ca6a05aa0..e7a9cdd498dca 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -20,7 +20,7 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index eee601a0d2cfb..80504148d8a5b 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -260,7 +260,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { unsigned int i, nptes, orig_shift, shift; unsigned long size; diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index f42133dae68e5..2afc95bf1655f 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -90,7 +90,7 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #ifndef __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, unsigned long sz) { return ptep_get_and_clear(mm, addr, ptep); } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ec8c0ccc8f959..bf5f7256bd281 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1004,7 +1004,9 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } #endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 65068671e460a..de9d49e521c13 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5447,7 +5447,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, if (src_ptl != dst_ptl) spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); - pte = huge_ptep_get_and_clear(mm, old_addr, src_pte); + pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz); if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) huge_pte_clear(mm, new_addr, dst_pte, sz); @@ -5622,7 +5622,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED); } - pte = huge_ptep_get_and_clear(mm, address, ptep); + pte = huge_ptep_get_and_clear(mm, address, ptep, sz); tlb_remove_huge_tlb_entry(h, tlb, ptep, address); if (huge_pte_dirty(pte)) set_page_dirty(page); -- GitLab From 49c87f7677746f3c5bd16c81b23700bb6b88bfd4 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 26 Feb 2025 12:06:52 +0000 Subject: [PATCH 1123/1585] arm64: hugetlb: Fix huge_ptep_get_and_clear() for non-present ptes arm64 supports multiple huge_pte sizes. Some of the sizes are covered by a single pte entry at a particular level (PMD_SIZE, PUD_SIZE), and some are covered by multiple ptes at a particular level (CONT_PTE_SIZE, CONT_PMD_SIZE). So the function has to figure out the size from the huge_pte pointer. This was previously done by walking the pgtable to determine the level and by using the PTE_CONT bit to determine the number of ptes at the level. But the PTE_CONT bit is only valid when the pte is present. For non-present pte values (e.g. markers, migration entries), the previous implementation was therefore erroneously determining the size. There is at least one known caller in core-mm, move_huge_pte(), which may call huge_ptep_get_and_clear() for a non-present pte. So we must be robust to this case. Additionally the "regular" ptep_get_and_clear() is robust to being called for non-present ptes so it makes sense to follow the behavior. Fix this by using the new sz parameter which is now provided to the function. Additionally when clearing each pte in a contig range, don't gather the access and dirty bits if the pte is not present. An alternative approach that would not require API changes would be to store the PTE_CONT bit in a spare bit in the swap entry pte for the non-present case. But it felt cleaner to follow other APIs' lead and just pass in the size. As an aside, PTE_CONT is bit 52, which corresponds to bit 40 in the swap entry offset field (layout of non-present pte). Since hugetlb is never swapped to disk, this field will only be populated for markers, which always set this bit to 0 and hwpoison swap entries, which set the offset field to a PFN; So it would only ever be 1 for a 52-bit PVA system where memory in that high half was poisoned (I think!). So in practice, this bit would almost always be zero for non-present ptes and we would only clear the first entry if it was actually a contiguous block. That's probably a less severe symptom than if it was always interpreted as 1 and cleared out potentially-present neighboring PTEs. Cc: stable@vger.kernel.org Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit") Reviewed-by: Catalin Marinas Signed-off-by: Ryan Roberts Link: https://lore.kernel.org/r/20250226120656.2400136-3-ryan.roberts@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/hugetlbpage.c | 53 ++++++++++++++----------------------- 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 06db4649af916..b3a7fafe8892d 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -100,20 +100,11 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr, static inline int num_contig_ptes(unsigned long size, size_t *pgsize) { - int contig_ptes = 0; + int contig_ptes = 1; *pgsize = size; switch (size) { -#ifndef __PAGETABLE_PMD_FOLDED - case PUD_SIZE: - if (pud_sect_supported()) - contig_ptes = 1; - break; -#endif - case PMD_SIZE: - contig_ptes = 1; - break; case CONT_PMD_SIZE: *pgsize = PMD_SIZE; contig_ptes = CONT_PMDS; @@ -122,6 +113,8 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) *pgsize = PAGE_SIZE; contig_ptes = CONT_PTES; break; + default: + WARN_ON(!__hugetlb_valid_size(size)); } return contig_ptes; @@ -163,24 +156,23 @@ static pte_t get_clear_contig(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - pte_t orig_pte = __ptep_get(ptep); - unsigned long i; - - for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { - pte_t pte = __ptep_get_and_clear(mm, addr, ptep); - - /* - * If HW_AFDBM is enabled, then the HW could turn on - * the dirty or accessed bit for any page in the set, - * so check them all. - */ - if (pte_dirty(pte)) - orig_pte = pte_mkdirty(orig_pte); - - if (pte_young(pte)) - orig_pte = pte_mkyoung(orig_pte); + pte_t pte, tmp_pte; + bool present; + + pte = __ptep_get_and_clear(mm, addr, ptep); + present = pte_present(pte); + while (--ncontig) { + ptep++; + addr += pgsize; + tmp_pte = __ptep_get_and_clear(mm, addr, ptep); + if (present) { + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } } - return orig_pte; + return pte; } static pte_t get_clear_contig_flush(struct mm_struct *mm, @@ -401,13 +393,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, { int ncontig; size_t pgsize; - pte_t orig_pte = __ptep_get(ptep); - - if (!pte_cont(orig_pte)) - return __ptep_get_and_clear(mm, addr, ptep); - - ncontig = find_num_contig(mm, addr, ptep, &pgsize); + ncontig = num_contig_ptes(sz, &pgsize); return get_clear_contig(mm, addr, ptep, pgsize, ncontig); } -- GitLab From eed6bfa8b28230382b797a88569f2c7569a1a419 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 26 Feb 2025 12:06:53 +0000 Subject: [PATCH 1124/1585] arm64: hugetlb: Fix flush_hugetlb_tlb_range() invalidation level commit c910f2b65518 ("arm64/mm: Update tlb invalidation routines for FEAT_LPA2") changed the "invalidation level unknown" hint from 0 to TLBI_TTL_UNKNOWN (INT_MAX). But the fallback "unknown level" path in flush_hugetlb_tlb_range() was not updated. So as it stands, when trying to invalidate CONT_PMD_SIZE or CONT_PTE_SIZE hugetlb mappings, we will spuriously try to invalidate at level 0 on LPA2-enabled systems. Fix this so that the fallback passes TLBI_TTL_UNKNOWN, and while we are at it, explicitly use the correct stride and level for CONT_PMD_SIZE and CONT_PTE_SIZE, which should provide a minor optimization. Cc: stable@vger.kernel.org Fixes: c910f2b65518 ("arm64/mm: Update tlb invalidation routines for FEAT_LPA2") Reviewed-by: Anshuman Khandual Reviewed-by: Catalin Marinas Signed-off-by: Ryan Roberts Link: https://lore.kernel.org/r/20250226120656.2400136-4-ryan.roberts@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/hugetlb.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 03db9cb21ace8..07fbf5bf85a7e 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -76,12 +76,22 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, { unsigned long stride = huge_page_size(hstate_vma(vma)); - if (stride == PMD_SIZE) - __flush_tlb_range(vma, start, end, stride, false, 2); - else if (stride == PUD_SIZE) - __flush_tlb_range(vma, start, end, stride, false, 1); - else - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + switch (stride) { +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + __flush_tlb_range(vma, start, end, PUD_SIZE, false, 1); + break; +#endif + case CONT_PMD_SIZE: + case PMD_SIZE: + __flush_tlb_range(vma, start, end, PMD_SIZE, false, 2); + break; + case CONT_PTE_SIZE: + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 3); + break; + default: + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); + } } #endif /* __ASM_HUGETLB_H */ -- GitLab From 6b481ab0e6855fb30e2923c51f62f1662d1cda7e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 7 Feb 2025 11:25:31 +1000 Subject: [PATCH 1125/1585] drm/nouveau: select FW caching nouveau tries to load some firmware during suspend that it loaded earlier, but with fw caching disabled it hangs suspend, so just rely on FW cache enabling instead of working around it in the driver. Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20250207012531.621369-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index ce840300578d8..1050a4617fc15 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -4,6 +4,7 @@ config DRM_NOUVEAU depends on DRM && PCI && MMU select IOMMU_API select FW_LOADER + select FW_CACHE if PM_SLEEP select DRM_CLIENT_SELECTION select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER -- GitLab From 82c387ef7568c0d96a918a5a78d9cad6256cfa15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Dec 2024 14:20:56 +0100 Subject: [PATCH 1126/1585] sched/core: Prevent rescheduling when interrupts are disabled David reported a warning observed while loop testing kexec jump: Interrupts enabled after irqrouter_resume+0x0/0x50 WARNING: CPU: 0 PID: 560 at drivers/base/syscore.c:103 syscore_resume+0x18a/0x220 kernel_kexec+0xf6/0x180 __do_sys_reboot+0x206/0x250 do_syscall_64+0x95/0x180 The corresponding interrupt flag trace: hardirqs last enabled at (15573): [] __up_console_sem+0x7e/0x90 hardirqs last disabled at (15580): [] __up_console_sem+0x63/0x90 That means __up_console_sem() was invoked with interrupts enabled. Further instrumentation revealed that in the interrupt disabled section of kexec jump one of the syscore_suspend() callbacks woke up a task, which set the NEED_RESCHED flag. A later callback in the resume path invoked cond_resched() which in turn led to the invocation of the scheduler: __cond_resched+0x21/0x60 down_timeout+0x18/0x60 acpi_os_wait_semaphore+0x4c/0x80 acpi_ut_acquire_mutex+0x3d/0x100 acpi_ns_get_node+0x27/0x60 acpi_ns_evaluate+0x1cb/0x2d0 acpi_rs_set_srs_method_data+0x156/0x190 acpi_pci_link_set+0x11c/0x290 irqrouter_resume+0x54/0x60 syscore_resume+0x6a/0x200 kernel_kexec+0x145/0x1c0 __do_sys_reboot+0xeb/0x240 do_syscall_64+0x95/0x180 This is a long standing problem, which probably got more visible with the recent printk changes. Something does a task wakeup and the scheduler sets the NEED_RESCHED flag. cond_resched() sees it set and invokes schedule() from a completely bogus context. The scheduler enables interrupts after context switching, which causes the above warning at the end. Quite some of the code paths in syscore_suspend()/resume() can result in triggering a wakeup with the exactly same consequences. They might not have done so yet, but as they share a lot of code with normal operations it's just a question of time. The problem only affects the PREEMPT_NONE and PREEMPT_VOLUNTARY scheduling models. Full preemption is not affected as cond_resched() is disabled and the preemption check preemptible() takes the interrupt disabled flag into account. Cure the problem by adding a corresponding check into cond_resched(). Reported-by: David Woodhouse Suggested-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Tested-by: David Woodhouse Cc: Linus Torvalds Cc: stable@vger.kernel.org Closes: https://lore.kernel.org/all/7717fe2ac0ce5f0a2c43fdab8b11f4483d54a2a4.camel@infradead.org --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9aecd914ac691..67189907214d3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7285,7 +7285,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) int __sched __cond_resched(void) { - if (should_resched(0)) { + if (should_resched(0) && !irqs_disabled()) { preempt_schedule_common(); return 1; } -- GitLab From c133ec0e5717868c9967fa3df92a55e537b1aead Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 25 Feb 2025 11:59:27 +0200 Subject: [PATCH 1127/1585] usb: xhci: Enable the TRB overfetch quirk on VIA VL805 Raspberry Pi is a major user of those chips and they discovered a bug - when the end of a transfer ring segment is reached, up to four TRBs can be prefetched from the next page even if the segment ends with link TRB and on page boundary (the chip claims to support standard 4KB pages). It also appears that if the prefetched TRBs belong to a different ring whose doorbell is later rung, they may be used without refreshing from system RAM and the endpoint will stay idle if their cycle bit is stale. Other users complain about IOMMU faults on x86 systems, unsurprisingly. Deal with it by using existing quirk which allocates a dummy page after each transfer ring segment. This was seen to resolve both problems. RPi came up with a more efficient solution, shortening each segment by four TRBs, but it complicated the driver and they ditched it for this quirk. Also rename the quirk and add VL805 device ID macro. Signed-off-by: Michal Pecio Link: https://github.com/raspberrypi/linux/issues/4685 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=215906 CC: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250225095927.2512358-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 3 ++- drivers/usb/host/xhci-pci.c | 10 +++++++--- drivers/usb/host/xhci.h | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 92703efda1f7b..fdf0c1008225a 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2437,7 +2437,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) * and our use of dma addresses in the trb_address_map radix tree needs * TRB_SEGMENT_SIZE alignment, so we pick the greater alignment need. */ - if (xhci->quirks & XHCI_ZHAOXIN_TRB_FETCH) + if (xhci->quirks & XHCI_TRB_OVERFETCH) + /* Buggy HC prefetches beyond segment bounds - allocate dummy space at the end */ xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, TRB_SEGMENT_SIZE * 2, TRB_SEGMENT_SIZE * 2, xhci->page_size * 2); else diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index ad0ff356f6fa0..54460d11f7ee8 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -38,6 +38,8 @@ #define PCI_DEVICE_ID_ETRON_EJ168 0x7023 #define PCI_DEVICE_ID_ETRON_EJ188 0x7052 +#define PCI_DEVICE_ID_VIA_VL805 0x3483 + #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI 0x8c31 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 #define PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_XHCI 0x9cb1 @@ -418,8 +420,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x3432) xhci->quirks |= XHCI_BROKEN_STREAMS; - if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483) + if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == PCI_DEVICE_ID_VIA_VL805) { xhci->quirks |= XHCI_LPM_SUPPORT; + xhci->quirks |= XHCI_TRB_OVERFETCH; + } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) { @@ -467,11 +471,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->device == 0x9202) { xhci->quirks |= XHCI_RESET_ON_RESUME; - xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; + xhci->quirks |= XHCI_TRB_OVERFETCH; } if (pdev->device == 0x9203) - xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; + xhci->quirks |= XHCI_TRB_OVERFETCH; } if (pdev->vendor == PCI_VENDOR_ID_CDNS && diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 8c164340a2c35..779b01dee068f 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1632,7 +1632,7 @@ struct xhci_hcd { #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42) #define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) -#define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) +#define XHCI_TRB_OVERFETCH BIT_ULL(45) #define XHCI_ZHAOXIN_HOST BIT_ULL(46) #define XHCI_WRITE_64_HI_LO BIT_ULL(47) #define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48) -- GitLab From 2397d61ee45cddb8f3bd3a3a9840ef0f0b5aa843 Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Mon, 24 Feb 2025 09:32:59 +0100 Subject: [PATCH 1128/1585] Revert "drivers/card_reader/rtsx_usb: Restore interrupt based detection" This reverts commit 235b630eda072d7e7b102ab346d6b8a2c028a772. This commit was found responsible for issues with SD card recognition, as users had to re-insert their cards in the readers and wait for a while. As for some people the SD card was involved in the boot process it also caused boot failures. Cc: stable@vger.kernel.org Link: https://bbs.archlinux.org/viewtopic.php?id=303321 Fixes: 235b630eda07 ("drivers/card_reader/rtsx_usb: Restore interrupt based detection") Reported-by: qf Closes: https://lore.kernel.org/all/1de87dfa-1e81-45b7-8dcb-ad86c21d5352@heusel.eu Signed-off-by: Christian Heusel Link: https://lore.kernel.org/r/20250224-revert-sdcard-patch-v1-1-d1a457fbb796@heusel.eu Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rtsx_usb.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index e0174da5e9fc3..77b0490a1b38d 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -286,7 +286,6 @@ static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) { int ret; - u8 interrupt_val = 0; u16 *buf; if (!status) @@ -309,20 +308,6 @@ int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) ret = rtsx_usb_get_status_with_bulk(ucr, status); } - rtsx_usb_read_register(ucr, CARD_INT_PEND, &interrupt_val); - /* Cross check presence with interrupts */ - if (*status & XD_CD) - if (!(interrupt_val & XD_INT)) - *status &= ~XD_CD; - - if (*status & SD_CD) - if (!(interrupt_val & SD_INT)) - *status &= ~SD_CD; - - if (*status & MS_CD) - if (!(interrupt_val & MS_INT)) - *status &= ~MS_CD; - /* usb_control_msg may return positive when success */ if (ret < 0) return ret; -- GitLab From cbf85b9cb80bec6345ffe0368dfff98386f4714f Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Fri, 21 Feb 2025 22:32:59 +0100 Subject: [PATCH 1129/1585] bluetooth: btusb: Initialize .owner field of force_poll_sync_fops Initialize .owner field of force_poll_sync_fops to THIS_MODULE in order to prevent btusb from being unloaded while its operations are in use. Fixes: 800fe5ec302e ("Bluetooth: btusb: Add support for queuing during polling interval") Signed-off-by: Salah Triki Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 8149e53fd0a76..2a8d91963c63f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3644,6 +3644,7 @@ static ssize_t force_poll_sync_write(struct file *file, } static const struct file_operations force_poll_sync_fops = { + .owner = THIS_MODULE, .open = simple_open, .read = force_poll_sync_read, .write = force_poll_sync_write, -- GitLab From f2176a07e7b19f73e05c805cf3d130a2999154cb Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Fri, 21 Feb 2025 16:49:47 +0800 Subject: [PATCH 1130/1585] Bluetooth: Add check for mgmt_alloc_skb() in mgmt_remote_name() Add check for the return value of mgmt_alloc_skb() in mgmt_remote_name() to prevent null pointer dereference. Fixes: ba17bb62ce41 ("Bluetooth: Fix skb allocation in mgmt_remote_name() & mgmt_device_connected()") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f53304cb09dbe..3e0f88cd975c8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -10413,6 +10413,8 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0)); + if (!skb) + return; ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, bdaddr); -- GitLab From d8df010f72b8a32aaea393e36121738bb53ed905 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Fri, 21 Feb 2025 16:58:01 +0800 Subject: [PATCH 1131/1585] Bluetooth: Add check for mgmt_alloc_skb() in mgmt_device_connected() Add check for the return value of mgmt_alloc_skb() in mgmt_device_connected() to prevent null pointer dereference. Fixes: e96741437ef0 ("Bluetooth: mgmt: Make use of mgmt_send_event_skb in MGMT_EV_DEVICE_CONNECTED") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3e0f88cd975c8..621c555f639be 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9660,6 +9660,9 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) + eir_precalc_len(sizeof(conn->dev_class))); + if (!skb) + return; + ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, &conn->dst); ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type); -- GitLab From 0979ff3676b1b4e6a20970bc265491d23c2da42b Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Thu, 27 Feb 2025 20:00:05 +0100 Subject: [PATCH 1132/1585] nvmet: remove old function prototype nvmet_subsys_nsid_exists() doesn't exist anymore Fixes: 74d16965d7ac ("nvmet-loop: avoid using mutex in IO hotpath") Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/target/nvmet.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index d2c1233981e1a..fcf4f460dc9a4 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -647,7 +647,6 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, struct nvmet_host *host); void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page); -bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid); #define NVMET_MIN_QUEUE_SIZE 16 #define NVMET_MAX_QUEUE_SIZE 1024 -- GitLab From afb41b08c44e5386f2f52fa859010ac4afd2b66f Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 24 Feb 2025 15:40:58 +0100 Subject: [PATCH 1133/1585] nvme-tcp: Fix a C2HTermReq error message In H2CTermReq, a FES with value 0x05 means "R2T Limit Exceeded"; but in C2HTermReq the same value has a different meaning (Data Transfer Limit Exceeded). Fixes: 84e009042d0f ("nvme-tcp: add basic support for the C2HTermReq PDU") Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8c14018201dbd..1094cbbec169f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -775,7 +775,7 @@ static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue, [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error", [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error", [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range", - [NVME_TCP_FES_R2T_LIMIT_EXCEEDED] = "R2T Limit Exceeded", + [NVME_TCP_FES_DATA_LIMIT_EXCEEDED] = "Data Transfer Limit Exceeded", [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter", }; -- GitLab From ada9ce437a4da8e27243251bd7a9ecec32ebd72a Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 27 Feb 2025 02:21:26 +0100 Subject: [PATCH 1134/1585] mailmap: remove unwanted entry for Antonio Quartulli antonio@openvpn.net is still used for sending patches under the OpenVPN Inc. umbrella, therefore this address should not be re-mapped. Signed-off-by: Antonio Quartulli Link: https://patch.msgid.link/20250227-b4-ovpn-v20-1-93f363310834@openvpn.net Signed-off-by: Jakub Kicinski --- .mailmap | 1 - 1 file changed, 1 deletion(-) diff --git a/.mailmap b/.mailmap index a897c16d3baef..598f31c4b498e 100644 --- a/.mailmap +++ b/.mailmap @@ -88,7 +88,6 @@ Antonio Quartulli Antonio Quartulli Antonio Quartulli Antonio Quartulli -Antonio Quartulli Antonio Quartulli Anup Patel Archit Taneja -- GitLab From 6f86bdeab633a56d5c6dccf1a2c5989b6a5e323e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Feb 2025 16:39:44 -0500 Subject: [PATCH 1135/1585] tracing: Fix bad hist from corrupting named_triggers list The following commands causes a crash: ~# cd /sys/kernel/tracing/events/rcu/rcu_callback ~# echo 'hist:name=bad:keys=common_pid:onmax(bogus).save(common_pid)' > trigger bash: echo: write error: Invalid argument ~# echo 'hist:name=bad:keys=common_pid' > trigger Because the following occurs: event_trigger_write() { trigger_process_regex() { event_hist_trigger_parse() { data = event_trigger_alloc(..); event_trigger_register(.., data) { cmd_ops->reg(.., data, ..) [hist_register_trigger()] { data->ops->init() [event_hist_trigger_init()] { save_named_trigger(name, data) { list_add(&data->named_list, &named_triggers); } } } } ret = create_actions(); (return -EINVAL) if (ret) goto out_unreg; [..] ret = hist_trigger_enable(data, ...) { list_add_tail_rcu(&data->list, &file->triggers); <<<---- SKIPPED!!! (this is important!) [..] out_unreg: event_hist_unregister(.., data) { cmd_ops->unreg(.., data, ..) [hist_unregister_trigger()] { list_for_each_entry(iter, &file->triggers, list) { if (!hist_trigger_match(data, iter, named_data, false)) <- never matches continue; [..] test = iter; } if (test && test->ops->free) <<<-- test is NULL test->ops->free(test) [event_hist_trigger_free()] { [..] if (data->name) del_named_trigger(data) { list_del(&data->named_list); <<<<-- NEVER gets removed! } } } } [..] kfree(data); <<<-- frees item but it is still on list The next time a hist with name is registered, it causes an u-a-f bug and the kernel can crash. Move the code around such that if event_trigger_register() succeeds, the next thing called is hist_trigger_enable() which adds it to the list. A bunch of actions is called if get_named_trigger_data() returns false. But that doesn't need to be called after event_trigger_register(), so it can be moved up, allowing event_trigger_register() to be called just before hist_trigger_enable() keeping them together and allowing the file->triggers to be properly populated. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250227163944.1c37f85f@gandalf.local.home Fixes: 067fe038e70f6 ("tracing: Add variable reference handling to hist triggers") Reported-by: Tomas Glozar Tested-by: Tomas Glozar Reviewed-by: Tom Zanussi Closes: https://lore.kernel.org/all/CAP4=nvTsxjckSBTz=Oe_UYh8keD9_sZC4i++4h72mJLic4_W4A@mail.gmail.com/ Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 261163b00137a..ad7419e240556 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6724,27 +6724,27 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, if (existing_hist_update_only(glob, trigger_data, file)) goto out_free; - ret = event_trigger_register(cmd_ops, file, glob, trigger_data); - if (ret < 0) - goto out_free; + if (!get_named_trigger_data(trigger_data)) { - if (get_named_trigger_data(trigger_data)) - goto enable; + ret = create_actions(hist_data); + if (ret) + goto out_free; - ret = create_actions(hist_data); - if (ret) - goto out_unreg; + if (has_hist_vars(hist_data) || hist_data->n_var_refs) { + ret = save_hist_vars(hist_data); + if (ret) + goto out_free; + } - if (has_hist_vars(hist_data) || hist_data->n_var_refs) { - ret = save_hist_vars(hist_data); + ret = tracing_map_init(hist_data->map); if (ret) - goto out_unreg; + goto out_free; } - ret = tracing_map_init(hist_data->map); - if (ret) - goto out_unreg; -enable: + ret = event_trigger_register(cmd_ops, file, glob, trigger_data); + if (ret < 0) + goto out_free; + ret = hist_trigger_enable(trigger_data, file); if (ret) goto out_unreg; -- GitLab From 3908b6baf2ac20138915b5ca98338b4f063954d8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 26 Feb 2025 15:27:03 +0100 Subject: [PATCH 1136/1585] selftests/ftrace: Let fprobe test consider already enabled functions The fprobe test fails on Fedora 41 since the fprobe test assumption that the number of enabled_functions is zero before the test starts is not necessarily true. Some user space tools, like systemd, add BPF programs that attach to functions. Those will show up in the enabled_functions table and must be taken into account by the fprobe test. Therefore count the number of lines of enabled_functions before tests start, and use that as base when comparing expected results. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250226142703.910860-1-hca@linux.ibm.com Fixes: e85c5e9792b9 ("selftests/ftrace: Update fprobe test to check enabled_functions file") Signed-off-by: Heiko Carstens Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- .../test.d/dynevent/add_remove_fprobe.tc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index 449f9d8be7462..73f6c6fcecabe 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -10,12 +10,16 @@ PLACE=$FUNCTION_FORK PLACE2="kmem_cache_free" PLACE3="schedule_timeout" +# Some functions may have BPF programs attached, therefore +# count already enabled_functions before tests start +ocnt=`cat enabled_functions | wc -l` + echo "f:myevent1 $PLACE" >> dynamic_events # Make sure the event is attached and is the only one grep -q $PLACE enabled_functions cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 1 ]; then +if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi @@ -23,7 +27,7 @@ echo "f:myevent2 $PLACE%return" >> dynamic_events # It should till be the only attached function cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 1 ]; then +if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi @@ -32,7 +36,7 @@ echo "f:myevent3 $PLACE2" >> dynamic_events grep -q $PLACE2 enabled_functions cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 2 ]; then +if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi @@ -49,7 +53,7 @@ grep -q myevent1 dynamic_events # should still have 2 left cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 2 ]; then +if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi @@ -57,7 +61,7 @@ echo > dynamic_events # Should have none left cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 0 ]; then +if [ $cnt -ne $ocnt ]; then exit_fail fi @@ -65,7 +69,7 @@ echo "f:myevent4 $PLACE" >> dynamic_events # Should only have one enabled cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 1 ]; then +if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi @@ -73,7 +77,7 @@ echo > dynamic_events # Should have none left cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne 0 ]; then +if [ $cnt -ne $ocnt ]; then exit_fail fi -- GitLab From a1a7eb89ca0b89dc1c326eeee2596f263291aca3 Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Thu, 6 Feb 2025 12:01:56 +0300 Subject: [PATCH 1137/1585] ftrace: Avoid potential division by zero in function_stat_show() Check whether denominator expression x * (x - 1) * 1000 mod {2^32, 2^64} produce zero and skip stddev computation in that case. For now don't care about rec->counter * rec->counter overflow because rec->time * rec->time overflow will likely happen earlier. Cc: stable@vger.kernel.org Cc: Wen Yang Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250206090156.1561783-1-kniv@yandex-team.ru Fixes: e31f7939c1c27 ("ftrace: Avoid potential division by zero in function profiler") Signed-off-by: Nikolay Kuratov Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6b0c25761ccb1..fc88e0688daf0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -540,6 +540,7 @@ static int function_stat_show(struct seq_file *m, void *v) static struct trace_seq s; unsigned long long avg; unsigned long long stddev; + unsigned long long stddev_denom; #endif guard(mutex)(&ftrace_profile_lock); @@ -559,23 +560,19 @@ static int function_stat_show(struct seq_file *m, void *v) #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_puts(m, " "); - /* Sample standard deviation (s^2) */ - if (rec->counter <= 1) - stddev = 0; - else { - /* - * Apply Welford's method: - * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) - */ + /* + * Variance formula: + * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) + * Maybe Welford's method is better here? + * Divide only by 1000 for ns^2 -> us^2 conversion. + * trace_print_graph_duration will divide by 1000 again. + */ + stddev = 0; + stddev_denom = rec->counter * (rec->counter - 1) * 1000; + if (stddev_denom) { stddev = rec->counter * rec->time_squared - rec->time * rec->time; - - /* - * Divide only 1000 for ns^2 -> us^2 conversion. - * trace_print_graph_duration will divide 1000 again. - */ - stddev = div64_ul(stddev, - rec->counter * (rec->counter - 1) * 1000); + stddev = div64_ul(stddev, stddev_denom); } trace_seq_init(&s); -- GitLab From ad95bab0cd28ed77c2c0d0b6e76e03e031391064 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 26 Feb 2025 14:42:18 +0100 Subject: [PATCH 1138/1585] nvme-tcp: fix potential memory corruption in nvme_tcp_recv_pdu() nvme_tcp_recv_pdu() doesn't check the validity of the header length. When header digests are enabled, a target might send a packet with an invalid header length (e.g. 255), causing nvme_tcp_verify_hdgst() to access memory outside the allocated area and cause memory corruptions by overwriting it with the calculated digest. Fix this by rejecting packets with an unexpected header length. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1094cbbec169f..23f11527d29d0 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -217,6 +217,19 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) return queue - queue->ctrl->queues; } +static inline bool nvme_tcp_recv_pdu_supported(enum nvme_tcp_pdu_type type) +{ + switch (type) { + case nvme_tcp_c2h_term: + case nvme_tcp_c2h_data: + case nvme_tcp_r2t: + case nvme_tcp_rsp: + return true; + default: + return false; + } +} + /* * Check if the queue is TLS encrypted */ @@ -818,6 +831,16 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return 0; hdr = queue->pdu; + if (unlikely(hdr->hlen != sizeof(struct nvme_tcp_rsp_pdu))) { + if (!nvme_tcp_recv_pdu_supported(hdr->type)) + goto unsupported_pdu; + + dev_err(queue->ctrl->ctrl.device, + "pdu type %d has unexpected header length (%d)\n", + hdr->type, hdr->hlen); + return -EPROTO; + } + if (unlikely(hdr->type == nvme_tcp_c2h_term)) { /* * C2HTermReq never includes Header or Data digests. @@ -850,10 +873,13 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, nvme_tcp_init_recv_ctx(queue); return nvme_tcp_handle_r2t(queue, (void *)queue->pdu); default: - dev_err(queue->ctrl->ctrl.device, - "unsupported pdu type (%d)\n", hdr->type); - return -EINVAL; + goto unsupported_pdu; } + +unsupported_pdu: + dev_err(queue->ctrl->ctrl.device, + "unsupported pdu type (%d)\n", hdr->type); + return -EINVAL; } static inline void nvme_tcp_end_request(struct request *rq, u16 status) -- GitLab From a16f88964c647103dad7743a484b216d488a6352 Mon Sep 17 00:00:00 2001 From: Meir Elisha Date: Wed, 26 Feb 2025 09:28:12 +0200 Subject: [PATCH 1139/1585] nvmet-tcp: Fix a possible sporadic response drops in weakly ordered arch The order in which queue->cmd and rcv_state are updated is crucial. If these assignments are reordered by the compiler, the worker might not get queued in nvmet_tcp_queue_response(), hanging the IO. to enforce the the correct reordering, set rcv_state using smp_store_release(). Fixes: bdaf13279192 ("nvmet-tcp: fix a segmentation fault during io parsing error") Signed-off-by: Meir Elisha Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 7c51c2a8c109a..4f9cac8a5abe0 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -571,10 +571,16 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) struct nvmet_tcp_cmd *cmd = container_of(req, struct nvmet_tcp_cmd, req); struct nvmet_tcp_queue *queue = cmd->queue; + enum nvmet_tcp_recv_state queue_state; + struct nvmet_tcp_cmd *queue_cmd; struct nvme_sgl_desc *sgl; u32 len; - if (unlikely(cmd == queue->cmd)) { + /* Pairs with store_release in nvmet_prepare_receive_pdu() */ + queue_state = smp_load_acquire(&queue->rcv_state); + queue_cmd = READ_ONCE(queue->cmd); + + if (unlikely(cmd == queue_cmd)) { sgl = &cmd->req.cmd->common.dptr.sgl; len = le32_to_cpu(sgl->length); @@ -583,7 +589,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) * Avoid using helpers, this might happen before * nvmet_req_init is completed. */ - if (queue->rcv_state == NVMET_TCP_RECV_PDU && + if (queue_state == NVMET_TCP_RECV_PDU && len && len <= cmd->req.port->inline_data_size && nvme_is_write(cmd->req.cmd)) return; @@ -847,8 +853,9 @@ static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) { queue->offset = 0; queue->left = sizeof(struct nvme_tcp_hdr); - queue->cmd = NULL; - queue->rcv_state = NVMET_TCP_RECV_PDU; + WRITE_ONCE(queue->cmd, NULL); + /* Ensure rcv_state is visible only after queue->cmd is set */ + smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU); } static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) -- GitLab From 00371a3f48775967950c2fe3ec97b7c786ca956d Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 26 Feb 2025 09:52:05 +0100 Subject: [PATCH 1140/1585] stmmac: loongson: Pass correct arg to PCI function pcim_iomap_regions() should receive the driver's name as its third parameter, not the PCI device's name. Define the driver name with a macro and use it at the appropriate places, including pcim_iomap_regions(). Cc: stable@vger.kernel.org # v5.14+ Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: Philipp Stanner Reviewed-by: Andrew Lunn Reviewed-by: Yanteng Si Tested-by: Henry Chen Link: https://patch.msgid.link/20250226085208.97891-2-phasta@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index f5acfb7d4ff65..ab7c2750c1042 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -11,6 +11,8 @@ #include "dwmac_dma.h" #include "dwmac1000.h" +#define DRIVER_NAME "dwmac-loongson-pci" + /* Normal Loongson Tx Summary */ #define DMA_INTR_ENA_NIE_TX_LOONGSON 0x00040000 /* Normal Loongson Rx Summary */ @@ -568,7 +570,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id for (i = 0; i < PCI_STD_NUM_BARS; i++) { if (pci_resource_len(pdev, i) == 0) continue; - ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + ret = pcim_iomap_regions(pdev, BIT(0), DRIVER_NAME); if (ret) goto err_disable_device; break; @@ -687,7 +689,7 @@ static const struct pci_device_id loongson_dwmac_id_table[] = { MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table); static struct pci_driver loongson_dwmac_driver = { - .name = "dwmac-loongson-pci", + .name = DRIVER_NAME, .id_table = loongson_dwmac_id_table, .probe = loongson_dwmac_probe, .remove = loongson_dwmac_remove, -- GitLab From c72e455b89f216b43cd0dbb518036ec4c98f5c46 Mon Sep 17 00:00:00 2001 From: Manuel Fombuena Date: Tue, 25 Feb 2025 22:01:02 +0000 Subject: [PATCH 1141/1585] leds: leds-st1202: Fix NULL pointer access on race condition st1202_dt_init() calls devm_led_classdev_register_ext() before the internal data structures are properly set up, so the LEDs become visible to user space while being partially initialized, leading to a window where trying to access them causes a NULL pointer access. Move devm_led_classdev_register_ext() from DT initialization to the end of the probe function when DT and hardware are fully initialized and ready to interact with user space. Fixes: 259230378c65 ("leds: Add LED1202 I2C driver") Signed-off-by: Manuel Fombuena Link: https://lore.kernel.org/r/CWLP123MB54732771AC0CE5491B3C84DCC5C32@CWLP123MB5473.GBRP123.PROD.OUTLOOK.COM Signed-off-by: Lee Jones --- drivers/leds/leds-st1202.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/leds/leds-st1202.c b/drivers/leds/leds-st1202.c index b691c4886993f..e894b3f9a0f46 100644 --- a/drivers/leds/leds-st1202.c +++ b/drivers/leds/leds-st1202.c @@ -261,8 +261,6 @@ static int st1202_dt_init(struct st1202_chip *chip) int err, reg; for_each_available_child_of_node_scoped(dev_of_node(dev), child) { - struct led_init_data init_data = {}; - err = of_property_read_u32(child, "reg", ®); if (err) return dev_err_probe(dev, err, "Invalid register\n"); @@ -276,15 +274,6 @@ static int st1202_dt_init(struct st1202_chip *chip) led->led_cdev.pattern_set = st1202_led_pattern_set; led->led_cdev.pattern_clear = st1202_led_pattern_clear; led->led_cdev.default_trigger = "pattern"; - - init_data.fwnode = led->fwnode; - init_data.devicename = "st1202"; - init_data.default_label = ":"; - - err = devm_led_classdev_register_ext(dev, &led->led_cdev, &init_data); - if (err < 0) - return dev_err_probe(dev, err, "Failed to register LED class device\n"); - led->led_cdev.brightness_set = st1202_brightness_set; led->led_cdev.brightness_get = st1202_brightness_get; } @@ -368,6 +357,7 @@ static int st1202_probe(struct i2c_client *client) return ret; for (int i = 0; i < ST1202_MAX_LEDS; i++) { + struct led_init_data init_data = {}; led = &chip->leds[i]; led->chip = chip; led->led_num = i; @@ -384,6 +374,15 @@ static int st1202_probe(struct i2c_client *client) if (ret < 0) return dev_err_probe(&client->dev, ret, "Failed to clear LED pattern\n"); + + init_data.fwnode = led->fwnode; + init_data.devicename = "st1202"; + init_data.default_label = ":"; + + ret = devm_led_classdev_register_ext(&client->dev, &led->led_cdev, &init_data); + if (ret < 0) + return dev_err_probe(&client->dev, ret, + "Failed to register LED class device\n"); } return 0; -- GitLab From 3414cda9d41f41703832d0abd01063dd8de82b89 Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Thu, 27 Feb 2025 18:51:06 +0100 Subject: [PATCH 1142/1585] ALSA: hda/realtek: Remove (revert) duplicate Ally X config In commit 1e9c708dc3ae ("ALSA: hda/tas2781: Add new quirk for Lenovo, ASUS, Dell projects") Baojun adds a bunch of projects to the file, including for the Ally X. Turns out the initial Ally X was not sorted properly, so the kernel had 2 quirks for it. The previous quirk overrode the new one due to being earlier and they are different. When AB testing, the normal pin fixup seems to work ok but causes a bit of a minor popping. Given the other config is more complicated and may cause undefined behavior, revert it. Fixes: 1e9c708dc3ae ("ALSA: hda/tas2781: Add new quirk for Lenovo, ASUS, Dell projects") Signed-off-by: Antheas Kapenekakis Link: https://patch.msgid.link/20250227175107.33432-2-lkml@antheas.dev Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c735f630ecb5a..f92de4a95a4c6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7690,7 +7690,6 @@ enum { ALC285_FIXUP_THINKPAD_X1_GEN7, ALC285_FIXUP_THINKPAD_HEADSET_JACK, ALC294_FIXUP_ASUS_ALLY, - ALC294_FIXUP_ASUS_ALLY_X, ALC294_FIXUP_ASUS_ALLY_PINS, ALC294_FIXUP_ASUS_ALLY_VERBS, ALC294_FIXUP_ASUS_ALLY_SPEAKER, @@ -9138,12 +9137,6 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_ALLY_PINS }, - [ALC294_FIXUP_ASUS_ALLY_X] = { - .type = HDA_FIXUP_FUNC, - .v.func = tas2781_fixup_i2c, - .chained = true, - .chain_id = ALC294_FIXUP_ASUS_ALLY_PINS - }, [ALC294_FIXUP_ASUS_ALLY_PINS] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -10645,7 +10638,6 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS), SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally NR2301L/X", ALC294_FIXUP_ASUS_ALLY), - SND_PCI_QUIRK(0x1043, 0x1eb3, "ROG Ally X RC72LA", ALC294_FIXUP_ASUS_ALLY_X), SND_PCI_QUIRK(0x1043, 0x1863, "ASUS UX6404VI/VV", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), -- GitLab From 12784ca33b62fd327631749e6a0cd2a10110a56c Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Thu, 27 Feb 2025 18:51:07 +0100 Subject: [PATCH 1143/1585] ALSA: hda/realtek: Fix Asus Z13 2025 audio Use the basic quirk for this type of amplifier. Sound works in speakers, headphones, and microphone. Whereas none worked before. Tested-by: Kyle Gospodnetich Signed-off-by: Antheas Kapenekakis Link: https://patch.msgid.link/20250227175107.33432-3-lkml@antheas.dev Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f92de4a95a4c6..926007b4a9ba7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10692,6 +10692,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f1f, "ASUS H7604JI/JV/J3D", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), -- GitLab From f479ecc5ef15ed8d774968c1a8726a49420f11a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20B=C3=A1rta?= Date: Thu, 27 Feb 2025 17:12:55 +0100 Subject: [PATCH 1144/1585] ALSA: hda: Fix speakers on ASUS EXPERTBOOK P5405CSA 1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After some digging around I have found that this laptop has Cirrus's smart aplifiers connected to SPI bus (spi1-CSC3551:00-cs35l41-hda). To get them correctly detected and working I had to modify patch_realtek.c with ASUS EXPERTBOOK P5405CSA 1.0 SystemID (0x1043, 0x1f63) and add corresponding hda_quirk (ALC245_FIXUP_CS35L41_SPI_2). Signed-off-by: Daniel Bárta Link: https://patch.msgid.link/20250227161256.18061-2-daniel.barta@trustlab.cz Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 926007b4a9ba7..d58743b955f81 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10691,6 +10691,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1f1f, "ASUS H7604JI/JV/J3D", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1f63, "ASUS P5405CSA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), -- GitLab From c5b0320bbf79548fbf058a3925a07c8f281beeab Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Mon, 6 Jan 2025 19:14:13 +0000 Subject: [PATCH 1145/1585] iommu/amd: Preserve default DTE fields when updating Host Page Table Root When updating the page table root field on the DTE, avoid overwriting any bits that are already set. The earlier call to make_clear_dte() writes default values that all DTEs must have set (currently DTE[V]), and those must be preserved. Currently this doesn't cause problems since the page table root update is the first field that is set after make_clear_dte() is called, and DTE_FLAG_V is set again later along with the permission bits (IR/IW). Remove this redundant assignment too. Fixes: fd5dff9de4be ("iommu/amd: Modify set_dte_entry() to use 256-bit DTE helpers") Signed-off-by: Alejandro Jimenez Reviewed-by: Dheeraj Kumar Srivastava Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250106191413.3107140-1-alejandro.j.jimenez@oracle.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b48a72bd7b23d..cd5116d8c3b28 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2043,12 +2043,12 @@ static void set_dte_entry(struct amd_iommu *iommu, make_clear_dte(dev_data, dte, &new); if (domain->iop.mode != PAGE_MODE_NONE) - new.data[0] = iommu_virt_to_phys(domain->iop.root); + new.data[0] |= iommu_virt_to_phys(domain->iop.root); new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; + new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW; /* * When SNP is enabled, we can only support TV=1 with non-zero domain ID. -- GitLab From 64f792981e35e191eb619f6f2fefab76cc7d6112 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Fri, 28 Feb 2025 18:27:25 +0800 Subject: [PATCH 1146/1585] iommu/vt-d: Remove device comparison in context_setup_pass_through_cb Remove the device comparison check in context_setup_pass_through_cb. pci_for_each_dma_alias already makes a decision on whether the callback function should be called for a device. With the check in place it will fail to create context entries for aliases as it walks up to the root bus. Fixes: 2031c469f816 ("iommu/vt-d: Add support for static identity domain") Closes: https://lore.kernel.org/linux-iommu/82499eb6-00b7-4f83-879a-e97b4144f576@linux.intel.com/ Cc: stable@vger.kernel.org Signed-off-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20250224180316.140123-1-jsnitsel@redhat.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index cc46098f875b1..4d8d4593c9c81 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4378,9 +4378,6 @@ static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void * { struct device *dev = data; - if (dev != &pdev->dev) - return 0; - return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); } -- GitLab From b150654f74bf0df8e6a7936d5ec51400d9ec06d8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 28 Feb 2025 18:27:26 +0800 Subject: [PATCH 1147/1585] iommu/vt-d: Fix suspicious RCU usage Commit ("iommu/vt-d: Allocate DMAR fault interrupts locally") moved the call to enable_drhd_fault_handling() to a code path that does not hold any lock while traversing the drhd list. Fix it by ensuring the dmar_global_lock lock is held when traversing the drhd list. Without this fix, the following warning is triggered: ============================= WARNING: suspicious RCU usage 6.14.0-rc3 #55 Not tainted ----------------------------- drivers/iommu/intel/dmar.c:2046 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 2 locks held by cpuhp/1/23: #0: ffffffff84a67c50 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x87/0x2c0 #1: ffffffff84a6a380 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun+0x87/0x2c0 stack backtrace: CPU: 1 UID: 0 PID: 23 Comm: cpuhp/1 Not tainted 6.14.0-rc3 #55 Call Trace: dump_stack_lvl+0xb7/0xd0 lockdep_rcu_suspicious+0x159/0x1f0 ? __pfx_enable_drhd_fault_handling+0x10/0x10 enable_drhd_fault_handling+0x151/0x180 cpuhp_invoke_callback+0x1df/0x990 cpuhp_thread_fun+0x1ea/0x2c0 smpboot_thread_fn+0x1f5/0x2e0 ? __pfx_smpboot_thread_fn+0x10/0x10 kthread+0x12a/0x2d0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x4a/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Holding the lock in enable_drhd_fault_handling() triggers a lockdep splat about a possible deadlock between dmar_global_lock and cpu_hotplug_lock. This is avoided by not holding dmar_global_lock when calling iommu_device_register(), which initiates the device probe process. Fixes: d74169ceb0d2 ("iommu/vt-d: Allocate DMAR fault interrupts locally") Reported-and-tested-by: Ido Schimmel Closes: https://lore.kernel.org/linux-iommu/Zx9OwdLIc_VoQ0-a@shredder.mtl.com/ Tested-by: Breno Leitao Cc: stable@vger.kernel.org Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250218022422.2315082-1-baolu.lu@linux.intel.com Tested-by: Ido Schimmel Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 1 + drivers/iommu/intel/iommu.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 9f424acf474e9..e540092d664d2 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -2043,6 +2043,7 @@ int enable_drhd_fault_handling(unsigned int cpu) /* * Enable fault control interrupt. */ + guard(rwsem_read)(&dmar_global_lock); for_each_iommu(iommu, drhd) { u32 fault_status; int ret; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 4d8d4593c9c81..bf1f0c8143483 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3146,7 +3146,14 @@ int __init intel_iommu_init(void) iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); + /* + * The iommu device probe is protected by the iommu_probe_device_lock. + * Release the dmar_global_lock before entering the device probe path + * to avoid unnecessary lock order splat. + */ + up_read(&dmar_global_lock); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); + down_read(&dmar_global_lock); iommu_pmu_register(iommu); } -- GitLab From b654f7a51ffb386131de42aa98ed831f8c126546 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 28 Feb 2025 21:26:56 +0800 Subject: [PATCH 1148/1585] block: fix 'kmem_cache of name 'bio-108' already exists' Device mapper bioset often has big bio_slab size, which can be more than 1000, then 8byte can't hold the slab name any more, cause the kmem_cache allocation warning of 'kmem_cache of name 'bio-108' already exists'. Fix the warning by extending bio_slab->name to 12 bytes, but fix output of /proc/slabinfo Reported-by: Guangwu Zhang Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250228132656.2838008-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index f0c416e5931d9..6ac5983ba51e6 100644 --- a/block/bio.c +++ b/block/bio.c @@ -77,7 +77,7 @@ struct bio_slab { struct kmem_cache *slab; unsigned int slab_ref; unsigned int slab_size; - char name[8]; + char name[12]; }; static DEFINE_MUTEX(bio_slab_lock); static DEFINE_XARRAY(bio_slabs); -- GitLab From 64407f4b5807dc9dec8135e1bfd45d2cb11b4ea0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Feb 2025 16:03:47 +0300 Subject: [PATCH 1149/1585] gpiolib: Fix Oops in gpiod_direction_input_nonotify() The gpiod_direction_input_nonotify() function is supposed to return zero if the direction for the pin is input. But instead it accidentally returns GPIO_LINE_DIRECTION_IN (1) which will be cast into an ERR_PTR() in gpiochip_request_own_desc(). The callers dereference it and it leads to a crash. I changed gpiod_direction_output_raw_commit() just for consistency but returning GPIO_LINE_DIRECTION_OUT (0) is fine. Cc: stable@vger.kernel.org Fixes: 9d846b1aebbe ("gpiolib: check the return value of gpio_chip::get_direction()") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/254f3925-3015-4c9d-aac5-bb9b4b2cd2c5@stanley.mountain [Bartosz: moved the variable declarations to the top of the functions] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index fc19df5a64c2b..8741600af7efb 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2712,7 +2712,7 @@ EXPORT_SYMBOL_GPL(gpiod_direction_input); int gpiod_direction_input_nonotify(struct gpio_desc *desc) { - int ret = 0; + int ret = 0, dir; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) @@ -2740,12 +2740,12 @@ int gpiod_direction_input_nonotify(struct gpio_desc *desc) ret = guard.gc->direction_input(guard.gc, gpio_chip_hwgpio(desc)); } else if (guard.gc->get_direction) { - ret = guard.gc->get_direction(guard.gc, + dir = guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc)); - if (ret < 0) - return ret; + if (dir < 0) + return dir; - if (ret != GPIO_LINE_DIRECTION_IN) { + if (dir != GPIO_LINE_DIRECTION_IN) { gpiod_warn(desc, "%s: missing direction_input() operation and line is output\n", __func__); @@ -2764,7 +2764,7 @@ int gpiod_direction_input_nonotify(struct gpio_desc *desc) static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) { - int val = !!value, ret = 0; + int val = !!value, ret = 0, dir; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) @@ -2788,12 +2788,12 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) } else { /* Check that we are in output mode if we can */ if (guard.gc->get_direction) { - ret = guard.gc->get_direction(guard.gc, + dir = guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc)); - if (ret < 0) - return ret; + if (dir < 0) + return dir; - if (ret != GPIO_LINE_DIRECTION_OUT) { + if (dir != GPIO_LINE_DIRECTION_OUT) { gpiod_warn(desc, "%s: missing direction_output() operation\n", __func__); -- GitLab From be45bc4eff33d9a7dae84a2150f242a91a617402 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 24 Feb 2025 08:54:41 -0800 Subject: [PATCH 1150/1585] KVM: SVM: Set RFLAGS.IF=1 in C code, to get VMRUN out of the STI shadow Enable/disable local IRQs, i.e. set/clear RFLAGS.IF, in the common svm_vcpu_enter_exit() just after/before guest_state_{enter,exit}_irqoff() so that VMRUN is not executed in an STI shadow. AMD CPUs have a quirk (some would say "bug"), where the STI shadow bleeds into the guest's intr_state field if a #VMEXIT occurs during injection of an event, i.e. if the VMRUN doesn't complete before the subsequent #VMEXIT. The spurious "interrupts masked" state is relatively benign, as it only occurs during event injection and is transient. Because KVM is already injecting an event, the guest can't be in HLT, and if KVM is querying IRQ blocking for injection, then KVM would need to force an immediate exit anyways since injecting multiple events is impossible. However, because KVM copies int_state verbatim from vmcb02 to vmcb12, the spurious STI shadow is visible to L1 when running a nested VM, which can trip sanity checks, e.g. in VMware's VMM. Hoist the STI+CLI all the way to C code, as the aforementioned calls to guest_state_{enter,exit}_irqoff() already inform lockdep that IRQs are enabled/disabled, and taking a fault on VMRUN with RFLAGS.IF=1 is already possible. I.e. if there's kernel code that is confused by running with RFLAGS.IF=1, then it's already a problem. In practice, since GIF=0 also blocks NMIs, the only change in exposure to non-KVM code (relative to surrounding VMRUN with STI+CLI) is exception handling code, and except for the kvm_rebooting=1 case, all exception in the core VM-Enter/VM-Exit path are fatal. Use the "raw" variants to enable/disable IRQs to avoid tracing in the "no instrumentation" code; the guest state helpers also take care of tracing IRQ state. Oppurtunstically document why KVM needs to do STI in the first place. Reported-by: Doug Covelli Closes: https://lore.kernel.org/all/CADH9ctBs1YPmE4aCfGPNBwA10cA8RuAk2gO7542DjMZgs4uzJQ@mail.gmail.com Fixes: f14eec0a3203 ("KVM: SVM: move more vmentry code to assembly") Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20250224165442.2338294-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 14 ++++++++++++++ arch/x86/kvm/svm/vmenter.S | 10 +--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a713c803a3a37..0d299f3f921e6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4189,6 +4189,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in guest_state_enter_irqoff(); + /* + * Set RFLAGS.IF prior to VMRUN, as the host's RFLAGS.IF at the time of + * VMRUN controls whether or not physical IRQs are masked (KVM always + * runs with V_INTR_MASKING_MASK). Toggle RFLAGS.IF here to avoid the + * temptation to do STI+VMRUN+CLI, as AMD CPUs bleed the STI shadow + * into guest state if delivery of an event during VMRUN triggers a + * #VMEXIT, and the guest_state transitions already tell lockdep that + * IRQs are being enabled/disabled. Note! GIF=0 for the entirety of + * this path, so IRQs aren't actually unmasked while running host code. + */ + raw_local_irq_enable(); + amd_clear_divider(); if (sev_es_guest(vcpu->kvm)) @@ -4197,6 +4209,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in else __svm_vcpu_run(svm, spec_ctrl_intercepted); + raw_local_irq_disable(); + guest_state_exit_irqoff(); } diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 2ed80aea3bb13..0c61153b275f6 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -170,12 +170,8 @@ SYM_FUNC_START(__svm_vcpu_run) mov VCPU_RDI(%_ASM_DI), %_ASM_DI /* Enter guest mode */ - sti - 3: vmrun %_ASM_AX 4: - cli - /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX @@ -340,12 +336,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov KVM_VMCB_pa(%rax), %rax /* Enter guest mode */ - sti - 1: vmrun %rax - -2: cli - +2: /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT -- GitLab From f3513a335e71296a1851167b4e3b0e2bf09fc5f1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 24 Feb 2025 08:54:42 -0800 Subject: [PATCH 1151/1585] KVM: selftests: Assert that STI blocking isn't set after event injection Add an L1 (guest) assert to the nested exceptions test to verify that KVM doesn't put VMRUN in an STI shadow (AMD CPUs bleed the shadow into the guest's int_state if a #VMEXIT occurs before VMRUN fully completes). Add a similar assert to the VMX side as well, because why not. Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20250224165442.2338294-3-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/nested_exceptions_test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 3eb0313ffa397..3641a42934acb 100644 --- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -85,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector)); GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code); + GUEST_ASSERT(!ctrl->int_state); } static void l1_svm_code(struct svm_test_data *svm) @@ -122,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector); GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code); + GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO)); } static void l1_vmx_code(struct vmx_pages *vmx) -- GitLab From ee89e8013383d50a27ea9bf3c8a69eed6799856f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:06 -0800 Subject: [PATCH 1152/1585] KVM: SVM: Drop DEBUGCTL[5:2] from guest's effective value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop bits 5:2 from the guest's effective DEBUGCTL value, as AMD changed the architectural behavior of the bits and broke backwards compatibility. On CPUs without BusLockTrap (or at least, in APMs from before ~2023), bits 5:2 controlled the behavior of external pins: Performance-Monitoring/Breakpoint Pin-Control (PBi)—Bits 5:2, read/write. Software uses thesebits to control the type of information reported by the four external performance-monitoring/breakpoint pins on the processor. When a PBi bit is cleared to 0, the corresponding external pin (BPi) reports performance-monitor information. When a PBi bit is set to 1, the corresponding external pin (BPi) reports breakpoint information. With the introduction of BusLockTrap, presumably to be compatible with Intel CPUs, AMD redefined bit 2 to be BLCKDB: Bus Lock #DB Trap (BLCKDB)—Bit 2, read/write. Software sets this bit to enable generation of a #DB trap following successful execution of a bus lock when CPL is > 0. and redefined bits 5:3 (and bit 6) as "6:3 Reserved MBZ". Ideally, KVM would treat bits 5:2 as reserved. Defer that change to a feature cleanup to avoid breaking existing guest in LTS kernels. For now, drop the bits to retain backwards compatibility (of a sort). Note, dropping bits 5:2 is still a guest-visible change, e.g. if the guest is enabling LBRs *and* the legacy PBi bits, then the state of the PBi bits is visible to the guest, whereas now the guest will always see '0'. Reported-by: Ravi Bangoria Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 12 ++++++++++++ arch/x86/kvm/svm/svm.h | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0d299f3f921e6..bdafbde1f211a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3165,6 +3165,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break; } + + /* + * AMD changed the architectural behavior of bits 5:2. On CPUs + * without BusLockTrap, bits 5:2 control "external pins", but + * on CPUs that support BusLockDetect, bit 2 enables BusLockTrap + * and bits 5:3 are reserved-to-zero. Sadly, old KVM allowed + * the guest to set bits 5:2 despite not actually virtualizing + * Performance-Monitoring/Breakpoint external pins. Drop bits + * 5:2 for backwards compatibility. + */ + data &= ~GENMASK(5, 2); + if (data & DEBUGCTL_RESERVED_BITS) return 1; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9d7cdb8fbf872..3a931d3885e71 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -584,7 +584,7 @@ static inline bool is_vnmi_enabled(struct vcpu_svm *svm) /* svm.c */ #define MSR_INVALID 0xffffffffU -#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define DEBUGCTL_RESERVED_BITS (~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) extern bool dump_invalid_vmcb; -- GitLab From d0eac42f5cecce009d315655bee341304fbe075e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:07 -0800 Subject: [PATCH 1153/1585] KVM: SVM: Suppress DEBUGCTL.BTF on AMD Mark BTF as reserved in DEBUGCTL on AMD, as KVM doesn't actually support BTF, and fully enabling BTF virtualization is non-trivial due to interactions with the emulator, guest_debug, #DB interception, nested SVM, etc. Don't inject #GP if the guest attempts to set BTF, as there's no way to communicate lack of support to the guest, and instead suppress the flag and treat the WRMSR as (partially) unsupported. In short, make KVM behave the same on AMD and Intel (VMX already squashes BTF). Note, due to other bugs in KVM's handling of DEBUGCTL, the only way BTF has "worked" in any capacity is if the guest simultaneously enables LBRs. Reported-by: Ravi Bangoria Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 9 +++++++++ arch/x86/kvm/svm/svm.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index bdafbde1f211a..ed48465186961 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3177,6 +3177,15 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) */ data &= ~GENMASK(5, 2); + /* + * Suppress BTF as KVM doesn't virtualize BTF, but there's no + * way to communicate lack of support to the guest. + */ + if (data & DEBUGCTLMSR_BTF) { + kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data); + data &= ~DEBUGCTLMSR_BTF; + } + if (data & DEBUGCTL_RESERVED_BITS) return 1; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 3a931d3885e71..ea44c1da5a7c9 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -584,7 +584,7 @@ static inline bool is_vnmi_enabled(struct vcpu_svm *svm) /* svm.c */ #define MSR_INVALID 0xffffffffU -#define DEBUGCTL_RESERVED_BITS (~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) +#define DEBUGCTL_RESERVED_BITS (~DEBUGCTLMSR_LBR) extern bool dump_invalid_vmcb; -- GitLab From fb71c795935652fa20eaf9517ca9547f5af99a76 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:08 -0800 Subject: [PATCH 1154/1585] KVM: x86: Snapshot the host's DEBUGCTL in common x86 Move KVM's snapshot of DEBUGCTL to kvm_vcpu_arch and take the snapshot in common x86, so that SVM can also use the snapshot. Opportunistically change the field to a u64. While bits 63:32 are reserved on AMD, not mentioned at all in Intel's SDM, and managed as an "unsigned long" by the kernel, DEBUGCTL is an MSR and therefore a 64-bit value. Reviewed-by: Xiaoyao Li Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 8 ++------ arch/x86/kvm/vmx/vmx.h | 2 -- arch/x86/kvm/x86.c | 1 + 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b7af5902ff75..32ae3aa50c7e3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -780,6 +780,7 @@ struct kvm_vcpu_arch { u32 pkru; u32 hflags; u64 efer; + u64 host_debugctl; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool load_eoi_exitmap_pending; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6c56d5235f0f3..3b92f893b2392 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1514,16 +1514,12 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, */ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm)) shrink_ple_window(vcpu); vmx_vcpu_load_vmcs(vcpu, cpu, NULL); vmx_vcpu_pi_load(vcpu, cpu); - - vmx->host_debugctlmsr = get_debugctlmsr(); } void vmx_vcpu_put(struct kvm_vcpu *vcpu) @@ -7458,8 +7454,8 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) } /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (vmx->host_debugctlmsr) - update_debugctlmsr(vmx->host_debugctlmsr); + if (vcpu->arch.host_debugctl) + update_debugctlmsr(vcpu->arch.host_debugctl); #ifndef CONFIG_X86_64 /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 8b111ce1087c7..951e44dc9d0ea 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -340,8 +340,6 @@ struct vcpu_vmx { /* apic deadline value in host tsc */ u64 hv_deadline_tsc; - unsigned long host_debugctlmsr; - /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02159c967d29e..5c6fd0edc41f4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4968,6 +4968,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Save host pkru register if supported */ vcpu->arch.host_pkru = read_pkru(); + vcpu->arch.host_debugctl = get_debugctlmsr(); /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { -- GitLab From 433265870ab3455b418885bff48fa5fd02f7e448 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:09 -0800 Subject: [PATCH 1155/1585] KVM: SVM: Manually context switch DEBUGCTL if LBR virtualization is disabled Manually load the guest's DEBUGCTL prior to VMRUN (and restore the host's value on #VMEXIT) if it diverges from the host's value and LBR virtualization is disabled, as hardware only context switches DEBUGCTL if LBR virtualization is fully enabled. Running the guest with the host's value has likely been mildly problematic for quite some time, e.g. it will result in undesirable behavior if BTF diverges (with the caveat that KVM now suppresses guest BTF due to lack of support). But the bug became fatal with the introduction of Bus Lock Trap ("Detect" in kernel paralance) support for AMD (commit 408eb7417a92 ("x86/bus_lock: Add support for AMD")), as a bus lock in the guest will trigger an unexpected #DB. Note, suppressing the bus lock #DB, i.e. simply resuming the guest without injecting a #DB, is not an option. It wouldn't address the general issue with DEBUGCTL, e.g. for things like BTF, and there are other guest-visible side effects if BusLockTrap is left enabled. If BusLockTrap is disabled, then DR6.BLD is reserved-to-1; any attempts to clear it by software are ignored. But if BusLockTrap is enabled, software can clear DR6.BLD: Software enables bus lock trap by setting DebugCtl MSR[BLCKDB] (bit 2) to 1. When bus lock trap is enabled, ... The processor indicates that this #DB was caused by a bus lock by clearing DR6[BLD] (bit 11). DR6[11] previously had been defined to be always 1. and clearing DR6.BLD is "sticky" in that it's not set (i.e. lowered) by other #DBs: All other #DB exceptions leave DR6[BLD] unmodified E.g. leaving BusLockTrap enable can confuse a legacy guest that writes '0' to reset DR6. Reported-by: rangemachine@gmail.com Reported-by: whanos@sergal.fun Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219787 Closes: https://lore.kernel.org/all/bug-219787-28872@https.bugzilla.kernel.org%2F Cc: Ravi Bangoria Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ed48465186961..e67de787fc714 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4288,6 +4288,16 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, clgi(); kvm_load_guest_xsave_state(vcpu); + /* + * Hardware only context switches DEBUGCTL if LBR virtualization is + * enabled. Manually load DEBUGCTL if necessary (and restore it after + * VM-Exit), as running with the host's DEBUGCTL can negatively affect + * guest state and can even be fatal, e.g. due to Bus Lock Detect. + */ + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(svm->vmcb->save.dbgctl); + kvm_wait_lapic_expire(vcpu); /* @@ -4315,6 +4325,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(vcpu->arch.host_debugctl); + kvm_load_host_xsave_state(vcpu); stgi(); -- GitLab From 189ecdb3e112da703ac0699f4ec76aa78122f911 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:10 -0800 Subject: [PATCH 1156/1585] KVM: x86: Snapshot the host's DEBUGCTL after disabling IRQs Snapshot the host's DEBUGCTL after disabling IRQs, as perf can toggle debugctl bits from IRQ context, e.g. when enabling/disabling events via smp_call_function_single(). Taking the snapshot (long) before IRQs are disabled could result in KVM effectively clobbering DEBUGCTL due to using a stale snapshot. Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5c6fd0edc41f4..12d5f47c1bbe9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4968,7 +4968,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Save host pkru register if supported */ vcpu->arch.host_pkru = read_pkru(); - vcpu->arch.host_debugctl = get_debugctlmsr(); /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { @@ -10969,6 +10968,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } + vcpu->arch.host_debugctl = get_debugctlmsr(); + guest_timing_enter_irqoff(); for (;;) { -- GitLab From c157d351460bcf202970e97e611cb6b54a3dd4a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2025 23:37:08 +0100 Subject: [PATCH 1157/1585] intel_idle: Handle older CPUs, which stop the TSC in deeper C states, correctly The Intel idle driver is preferred over the ACPI processor idle driver, but fails to implement the work around for Core2 generation CPUs, where the TSC stops in C2 and deeper C-states. This causes stalls and boot delays, when the clocksource watchdog does not catch the unstable TSC before the CPU goes deep idle for the first time. The ACPI driver marks the TSC unstable when it detects that the CPU supports C2 or deeper and the CPU does not have a non-stop TSC. Add the equivivalent work around to the Intel idle driver to cure that. Fixes: 18734958e9bf ("intel_idle: Use ACPI _CST for processor models without C-state tables") Reported-by: Fab Stz Signed-off-by: Thomas Gleixner Tested-by: Fab Stz Cc: All applicable Closes: https://lore.kernel.org/all/10cf96aa-1276-4bd4-8966-c890377030c3@yahoo.fr Link: https://patch.msgid.link/87bjupfy7f.ffs@tglx Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 118fe1d37c226..0fdb1d1316c44 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #define INTEL_IDLE_VERSION "0.5.1" @@ -1799,6 +1800,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) if (intel_idle_state_needs_timer_stop(state)) state->flags |= CPUIDLE_FLAG_TIMER_STOP; + if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle"); + state->enter = intel_idle; state->enter_s2idle = intel_idle_s2idle; } -- GitLab From 4a4f9b5c7c13601c4f1b3d8c607d7439e39f40d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Feb 2025 15:58:30 -0500 Subject: [PATCH 1158/1585] bcachefs: Don't set BCH_FEATURE_incompat_version_field unless requested We shouldn't be setting incompatible bits or the incompatible version field unless explicitly request or allowed - otherwise we break mounting with old kernels or userspace. Reported-by: Dave Hansen Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 24 ++++++++++++++++-------- fs/bcachefs/super-io.h | 11 ++++------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8037ccbacf6af..a81a7b6c09897 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -69,14 +69,20 @@ enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_meta return v; } -void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) +bool bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) { - mutex_lock(&c->sb_lock); - SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, - max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); - c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + bool ret = (c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && + version <= c->sb.version_incompat_allowed; + + if (ret) { + mutex_lock(&c->sb_lock); + SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, + max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + + return ret; } const char * const bch2_sb_fields[] = { @@ -1219,9 +1225,11 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) c->disk_sb.sb->version = cpu_to_le16(new_version); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); - if (incompat) + if (incompat) { SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); + } } static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index f1ab4f9437203..b4cff9ebdebbf 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -21,17 +21,14 @@ static inline bool bch2_version_compatible(u16 version) void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version); enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version); -void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); +bool bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); static inline bool bch2_request_incompat_feature(struct bch_fs *c, enum bcachefs_metadata_version version) { - if (unlikely(version > c->sb.version_incompat)) { - if (version > c->sb.version_incompat_allowed) - return false; - bch2_set_version_incompat(c, version); - } - return true; + return likely(version <= c->sb.version_incompat) + ? true + : bch2_set_version_incompat(c, version); } static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) -- GitLab From ee01b2f2d7d0010787c2343463965bbc283a497f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 26 Feb 2025 18:13:42 +0100 Subject: [PATCH 1159/1585] net: gso: fix ownership in __udp_gso_segment In __udp_gso_segment the skb destructor is removed before segmenting the skb but the socket reference is kept as-is. This is an issue if the original skb is later orphaned as we can hit the following bug: kernel BUG at ./include/linux/skbuff.h:3312! (skb_orphan) RIP: 0010:ip_rcv_core+0x8b2/0xca0 Call Trace: ip_rcv+0xab/0x6e0 __netif_receive_skb_one_core+0x168/0x1b0 process_backlog+0x384/0x1100 __napi_poll.constprop.0+0xa1/0x370 net_rx_action+0x925/0xe50 The above can happen following a sequence of events when using OpenVSwitch, when an OVS_ACTION_ATTR_USERSPACE action precedes an OVS_ACTION_ATTR_OUTPUT action: 1. OVS_ACTION_ATTR_USERSPACE is handled (in do_execute_actions): the skb goes through queue_gso_packets and then __udp_gso_segment, where its destructor is removed. 2. The segments' data are copied and sent to userspace. 3. OVS_ACTION_ATTR_OUTPUT is handled (in do_execute_actions) and the same original skb is sent to its path. 4. If it later hits skb_orphan, we hit the bug. Fix this by also removing the reference to the socket in __udp_gso_segment. Fixes: ad405857b174 ("udp: better wmem accounting on gso") Signed-off-by: Antoine Tenart Link: https://patch.msgid.link/20250226171352.258045-1-atenart@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index a5be6e4ed326f..ecfca59f31f13 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -321,13 +321,17 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, /* clear destructor to avoid skb_segment assigning it to tail */ copy_dtor = gso_skb->destructor == sock_wfree; - if (copy_dtor) + if (copy_dtor) { gso_skb->destructor = NULL; + gso_skb->sk = NULL; + } segs = skb_segment(gso_skb, features); if (IS_ERR_OR_NULL(segs)) { - if (copy_dtor) + if (copy_dtor) { gso_skb->destructor = sock_wfree; + gso_skb->sk = sk; + } return segs; } -- GitLab From a466fd7e9fafd975949e5945e2f70c33a94b1a70 Mon Sep 17 00:00:00 2001 From: Vitaliy Shevtsov Date: Thu, 27 Feb 2025 23:46:27 +0500 Subject: [PATCH 1160/1585] caif_virtio: fix wrong pointer check in cfv_probe() del_vqs() frees virtqueues, therefore cfv->vq_tx pointer should be checked for NULL before calling it, not cfv->vdev. Also the current implementation is redundant because the pointer cfv->vdev is dereferenced before it is checked for NULL. Fix this by checking cfv->vq_tx for NULL instead of cfv->vdev before calling del_vqs(). Fixes: 0d2e1a2926b1 ("caif_virtio: Introduce caif over virtio") Signed-off-by: Vitaliy Shevtsov Reviewed-by: Gerhard Engleder Link: https://patch.msgid.link/20250227184716.4715-1-v.shevtsov@mt-integration.ru Signed-off-by: Jakub Kicinski --- drivers/net/caif/caif_virtio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 7fea00c7ca8a6..c60386bf2d1a4 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -745,7 +745,7 @@ static int cfv_probe(struct virtio_device *vdev) if (cfv->vr_rx) vdev->vringh_config->del_vrhs(cfv->vdev); - if (cfv->vdev) + if (cfv->vq_tx) vdev->config->del_vqs(cfv->vdev); free_netdev(netdev); return err; -- GitLab From cb380909ae3b1ebf14d6a455a4f92d7916d790cb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 27 Feb 2025 15:06:30 -0800 Subject: [PATCH 1161/1585] vhost: return task creation error instead of NULL Lets callers distinguish why the vhost task creation failed. No one currently cares why it failed, so no real runtime change from this patch, but that will not be the case for long. Signed-off-by: Keith Busch Message-ID: <20250227230631.303431-2-kbusch@meta.com> Reviewed-by: Mike Christie Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- drivers/vhost/vhost.c | 2 +- kernel/vhost_task.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d4ac4a1f8b81b..18ca1ea6dc240 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7471,7 +7471,7 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) kvm_nx_huge_page_recovery_worker_kill, kvm, "kvm-nx-lpage-recovery"); - if (!nx_thread) + if (IS_ERR(nx_thread)) return; vhost_task_start(nx_thread); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9ac25d08f473e..63612faeab727 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -666,7 +666,7 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, worker, name); - if (!vtsk) + if (IS_ERR(vtsk)) goto free_worker; mutex_init(&worker->mutex); diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 8800f5acc0071..2ef2e1b800916 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -133,7 +133,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL); if (!vtsk) - return NULL; + return ERR_PTR(-ENOMEM); init_completion(&vtsk->exited); mutex_init(&vtsk->exit_mutex); vtsk->data = arg; @@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args); if (IS_ERR(tsk)) { kfree(vtsk); - return NULL; + return ERR_PTR(PTR_ERR(tsk)); } vtsk->task = tsk; -- GitLab From 916b7f42b3b3b539a71c204a9b49fdc4ca92cd82 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 27 Feb 2025 15:06:31 -0800 Subject: [PATCH 1162/1585] kvm: retry nx_huge_page_recovery_thread creation A VMM may send a non-fatal signal to its threads, including vCPU tasks, at any time, and thus may signal vCPU tasks during KVM_RUN. If a vCPU task receives the signal while its trying to spawn the huge page recovery vhost task, then KVM_RUN will fail due to copy_process() returning -ERESTARTNOINTR. Rework call_once() to mark the call complete if and only if the called function succeeds, and plumb the function's true error code back to the call_once() invoker. This provides userspace with the correct, non-fatal error code so that the VMM doesn't terminate the VM on -ENOMEM, and allows subsequent KVM_RUN a succeed by virtue of retrying creation of the NX huge page task. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson [implemented the kvm user side] Signed-off-by: Keith Busch Message-ID: <20250227230631.303431-3-kbusch@meta.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 10 ++++----- include/linux/call_once.h | 47 ++++++++++++++++++++++++++++----------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 18ca1ea6dc240..8160870398b90 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7460,7 +7460,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data) return true; } -static void kvm_mmu_start_lpage_recovery(struct once *once) +static int kvm_mmu_start_lpage_recovery(struct once *once) { struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); struct kvm *kvm = container_of(ka, struct kvm, arch); @@ -7472,12 +7472,13 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) kvm, "kvm-nx-lpage-recovery"); if (IS_ERR(nx_thread)) - return; + return PTR_ERR(nx_thread); vhost_task_start(nx_thread); /* Make the task visible only once it is fully started. */ WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread); + return 0; } int kvm_mmu_post_init_vm(struct kvm *kvm) @@ -7485,10 +7486,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm) if (nx_hugepage_mitigation_hard_disabled) return 0; - call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); - if (!kvm->arch.nx_huge_page_recovery_thread) - return -ENOMEM; - return 0; + return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) diff --git a/include/linux/call_once.h b/include/linux/call_once.h index 6261aa0b3fb00..13cd6469e7e56 100644 --- a/include/linux/call_once.h +++ b/include/linux/call_once.h @@ -26,20 +26,41 @@ do { \ __once_init((once), #once, &__key); \ } while (0) -static inline void call_once(struct once *once, void (*cb)(struct once *)) +/* + * call_once - Ensure a function has been called exactly once + * + * @once: Tracking struct + * @cb: Function to be called + * + * If @once has never completed successfully before, call @cb and, if + * it returns a zero or positive value, mark @once as completed. Return + * the value returned by @cb + * + * If @once has completed succesfully before, return 0. + * + * The call to @cb is implicitly surrounded by a mutex, though for + * efficiency the * function avoids taking it after the first call. + */ +static inline int call_once(struct once *once, int (*cb)(struct once *)) { - /* Pairs with atomic_set_release() below. */ - if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) - return; - - guard(mutex)(&once->lock); - WARN_ON(atomic_read(&once->state) == ONCE_RUNNING); - if (atomic_read(&once->state) != ONCE_NOT_STARTED) - return; - - atomic_set(&once->state, ONCE_RUNNING); - cb(once); - atomic_set_release(&once->state, ONCE_COMPLETED); + int r, state; + + /* Pairs with atomic_set_release() below. */ + if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) + return 0; + + guard(mutex)(&once->lock); + state = atomic_read(&once->state); + if (unlikely(state != ONCE_NOT_STARTED)) + return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0; + + atomic_set(&once->state, ONCE_RUNNING); + r = cb(once); + if (r < 0) + atomic_set(&once->state, ONCE_NOT_STARTED); + else + atomic_set_release(&once->state, ONCE_COMPLETED); + return r; } #endif /* _LINUX_CALL_ONCE_H */ -- GitLab From a2f925a2f62254119cdaa360cfc9c0424bccd531 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 28 Feb 2025 13:26:04 +0100 Subject: [PATCH 1163/1585] Revert "ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives" This reverts commit cc77e2ce187d26cc66af3577bf896d7410eb25ab. It was reported that adding ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives breaks entering lower package states for certain systems. It turns out that Samsung SSD 870 QVO actually has working LPM when using a recent SSD firmware version. The author of commit cc77e2ce187d ("ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives") reported himself that only older SSD firmware versions have broken LPM: https://lore.kernel.org/stable/93c10d38-718c-459d-84a5-4d87680b4da7@debian.org/ Unfortunately, he did not specify which older firmware version he was using which had broken LPM. Let's revert this quirk, which has FW version field specified as NULL (which means that it applies for all Samsung SSD 870 QVO firmware versions) for now. Once the author reports which older firmware version(s) that are broken, we can create a more fine grained quirk, which populates the FW version field accordingly. Fixes: cc77e2ce187d ("ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives") Reported-by: Dieter Mummenschanz Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219747 Link: https://lore.kernel.org/r/20250228122603.91814-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 63ec2f2184319..c085dd81ebe7f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,10 +4143,6 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, - { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | - ATA_QUIRK_ZERO_AFTER_TRIM | - ATA_QUIRK_NO_NCQ_ON_ATI | - ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, -- GitLab From 2565e42539b120b81a68a58da961ce5d1e34eac8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Nov 2024 14:39:11 +0100 Subject: [PATCH 1164/1585] perf/core: Fix pmus_lock vs. pmus_srcu ordering Commit a63fbed776c7 ("perf/tracing/cpuhotplug: Fix locking order") placed pmus_lock inside pmus_srcu, this makes perf_pmu_unregister() trip lockdep. Move the locking about such that only pmu_idr and pmus (list) are modified while holding pmus_lock. This avoids doing synchronize_srcu() while holding pmus_lock and all is well again. Fixes: a63fbed776c7 ("perf/tracing/cpuhotplug: Fix locking order") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20241104135517.679556858@infradead.org --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6364319e2f888..11793d690cbb7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11939,6 +11939,8 @@ void perf_pmu_unregister(struct pmu *pmu) { mutex_lock(&pmus_lock); list_del_rcu(&pmu->entry); + idr_remove(&pmu_idr, pmu->type); + mutex_unlock(&pmus_lock); /* * We dereference the pmu list under both SRCU and regular RCU, so @@ -11948,7 +11950,6 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_rcu(); free_percpu(pmu->pmu_disable_count); - idr_remove(&pmu_idr, pmu->type); if (pmu_bus_running && pmu->dev && pmu->dev != PMU_NULL_DEV) { if (pmu->nr_addr_filters) device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); @@ -11956,7 +11957,6 @@ void perf_pmu_unregister(struct pmu *pmu) put_device(pmu->dev); } free_pmu_context(pmu); - mutex_unlock(&pmus_lock); } EXPORT_SYMBOL_GPL(perf_pmu_unregister); -- GitLab From 003659fec9f6d8c04738cb74b5384398ae8a7e88 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Nov 2024 14:39:12 +0100 Subject: [PATCH 1165/1585] perf/core: Fix perf_pmu_register() vs. perf_init_event() There is a fairly obvious race between perf_init_event() doing idr_find() and perf_pmu_register() doing idr_alloc() with an incompletely initialized PMU pointer. Avoid by doing idr_alloc() on a NULL pointer to register the id, and swizzling the real struct pmu pointer at the end using idr_replace(). Also making sure to not set struct pmu members after publishing the struct pmu, duh. [ introduce idr_cmpxchg() in order to better handle the idr_replace() error case -- if it were to return an unexpected pointer, it will already have replaced the value and there is no going back. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20241104135517.858805880@infradead.org --- kernel/events/core.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 11793d690cbb7..823aa08249161 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11830,6 +11830,21 @@ static int pmu_dev_alloc(struct pmu *pmu) static struct lock_class_key cpuctx_mutex; static struct lock_class_key cpuctx_lock; +static bool idr_cmpxchg(struct idr *idr, unsigned long id, void *old, void *new) +{ + void *tmp, *val = idr_find(idr, id); + + if (val != old) + return false; + + tmp = idr_replace(idr, new, id); + if (IS_ERR(tmp)) + return false; + + WARN_ON_ONCE(tmp != val); + return true; +} + int perf_pmu_register(struct pmu *pmu, const char *name, int type) { int cpu, ret, max = PERF_TYPE_MAX; @@ -11856,7 +11871,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) if (type >= 0) max = type; - ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL); + ret = idr_alloc(&pmu_idr, NULL, max, 0, GFP_KERNEL); if (ret < 0) goto free_pdc; @@ -11864,6 +11879,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) type = ret; pmu->type = type; + atomic_set(&pmu->exclusive_cnt, 0); if (pmu_bus_running && !pmu->dev) { ret = pmu_dev_alloc(pmu); @@ -11912,14 +11928,22 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) if (!pmu->event_idx) pmu->event_idx = perf_event_idx_default; + /* + * Now that the PMU is complete, make it visible to perf_try_init_event(). + */ + if (!idr_cmpxchg(&pmu_idr, pmu->type, NULL, pmu)) + goto free_context; list_add_rcu(&pmu->entry, &pmus); - atomic_set(&pmu->exclusive_cnt, 0); + ret = 0; unlock: mutex_unlock(&pmus_lock); return ret; +free_context: + free_percpu(pmu->cpu_pmu_context); + free_dev: if (pmu->dev && pmu->dev != PMU_NULL_DEV) { device_del(pmu->dev); -- GitLab From c9ce148ea753bef66686460fa3cec6641cdfbb9f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 1 Mar 2025 12:45:29 +0100 Subject: [PATCH 1166/1585] ALSA: seq: Avoid module auto-load handling at event delivery snd_seq_client_use_ptr() is supposed to return the snd_seq_client object for the given client ID, and it tries to handle the module auto-loading when no matching object is found. Although the module handling is performed only conditionally with "!in_interrupt()", this condition may be fragile, e.g. when the code is called from the ALSA timer callback where the spinlock is temporarily disabled while the irq is disabled. Then his doesn't fit well and spews the error about sleep from invalid context, as complained recently by syzbot. Also, in general, handling the module-loading at each time if no matching object is found is really an overkill. It can be still useful when performed at the top-level ioctl or proc reads, but it shouldn't be done at event delivery at all. For addressing the issues above, this patch disables the module handling in snd_seq_client_use_ptr() in normal cases like event deliveries, but allow only in limited and safe situations. A new function client_load_and_use_ptr() is used for the cases where the module loading can be done safely, instead. Reported-by: syzbot+4cb9fad083898f54c517@syzkaller.appspotmail.com Closes: https://lore.kernel.org/67c272e5.050a0220.dc10f.0159.GAE@google.com Cc: Link: https://patch.msgid.link/20250301114530.8975-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 46 ++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index cb66ec42a3f8a..706f53e39b53c 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -106,7 +106,7 @@ static struct snd_seq_client *clientptr(int clientid) return clienttab[clientid]; } -struct snd_seq_client *snd_seq_client_use_ptr(int clientid) +static struct snd_seq_client *client_use_ptr(int clientid, bool load_module) { unsigned long flags; struct snd_seq_client *client; @@ -126,7 +126,7 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) } spin_unlock_irqrestore(&clients_lock, flags); #ifdef CONFIG_MODULES - if (!in_interrupt()) { + if (load_module) { static DECLARE_BITMAP(client_requested, SNDRV_SEQ_GLOBAL_CLIENTS); static DECLARE_BITMAP(card_requested, SNDRV_CARDS); @@ -168,6 +168,20 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) return client; } +/* get snd_seq_client object for the given id quickly */ +struct snd_seq_client *snd_seq_client_use_ptr(int clientid) +{ + return client_use_ptr(clientid, false); +} + +/* get snd_seq_client object for the given id; + * if not found, retry after loading the modules + */ +static struct snd_seq_client *client_load_and_use_ptr(int clientid) +{ + return client_use_ptr(clientid, IS_ENABLED(CONFIG_MODULES)); +} + /* Take refcount and perform ioctl_mutex lock on the given client; * used only for OSS sequencer * Unlock via snd_seq_client_ioctl_unlock() below @@ -176,7 +190,7 @@ bool snd_seq_client_ioctl_lock(int clientid) { struct snd_seq_client *client; - client = snd_seq_client_use_ptr(clientid); + client = client_load_and_use_ptr(clientid); if (!client) return false; mutex_lock(&client->ioctl_mutex); @@ -1195,7 +1209,7 @@ static int snd_seq_ioctl_running_mode(struct snd_seq_client *client, void *arg) int err = 0; /* requested client number */ - cptr = snd_seq_client_use_ptr(info->client); + cptr = client_load_and_use_ptr(info->client); if (cptr == NULL) return -ENOENT; /* don't change !!! */ @@ -1257,7 +1271,7 @@ static int snd_seq_ioctl_get_client_info(struct snd_seq_client *client, struct snd_seq_client *cptr; /* requested client number */ - cptr = snd_seq_client_use_ptr(client_info->client); + cptr = client_load_and_use_ptr(client_info->client); if (cptr == NULL) return -ENOENT; /* don't change !!! */ @@ -1396,7 +1410,7 @@ static int snd_seq_ioctl_get_port_info(struct snd_seq_client *client, void *arg) struct snd_seq_client *cptr; struct snd_seq_client_port *port; - cptr = snd_seq_client_use_ptr(info->addr.client); + cptr = client_load_and_use_ptr(info->addr.client); if (cptr == NULL) return -ENXIO; @@ -1503,10 +1517,10 @@ static int snd_seq_ioctl_subscribe_port(struct snd_seq_client *client, struct snd_seq_client *receiver = NULL, *sender = NULL; struct snd_seq_client_port *sport = NULL, *dport = NULL; - receiver = snd_seq_client_use_ptr(subs->dest.client); + receiver = client_load_and_use_ptr(subs->dest.client); if (!receiver) goto __end; - sender = snd_seq_client_use_ptr(subs->sender.client); + sender = client_load_and_use_ptr(subs->sender.client); if (!sender) goto __end; sport = snd_seq_port_use_ptr(sender, subs->sender.port); @@ -1871,7 +1885,7 @@ static int snd_seq_ioctl_get_client_pool(struct snd_seq_client *client, struct snd_seq_client_pool *info = arg; struct snd_seq_client *cptr; - cptr = snd_seq_client_use_ptr(info->client); + cptr = client_load_and_use_ptr(info->client); if (cptr == NULL) return -ENOENT; memset(info, 0, sizeof(*info)); @@ -1975,7 +1989,7 @@ static int snd_seq_ioctl_get_subscription(struct snd_seq_client *client, struct snd_seq_client_port *sport = NULL; result = -EINVAL; - sender = snd_seq_client_use_ptr(subs->sender.client); + sender = client_load_and_use_ptr(subs->sender.client); if (!sender) goto __end; sport = snd_seq_port_use_ptr(sender, subs->sender.port); @@ -2006,7 +2020,7 @@ static int snd_seq_ioctl_query_subs(struct snd_seq_client *client, void *arg) struct list_head *p; int i; - cptr = snd_seq_client_use_ptr(subs->root.client); + cptr = client_load_and_use_ptr(subs->root.client); if (!cptr) goto __end; port = snd_seq_port_use_ptr(cptr, subs->root.port); @@ -2073,7 +2087,7 @@ static int snd_seq_ioctl_query_next_client(struct snd_seq_client *client, if (info->client < 0) info->client = 0; for (; info->client < SNDRV_SEQ_MAX_CLIENTS; info->client++) { - cptr = snd_seq_client_use_ptr(info->client); + cptr = client_load_and_use_ptr(info->client); if (cptr) break; /* found */ } @@ -2096,7 +2110,7 @@ static int snd_seq_ioctl_query_next_port(struct snd_seq_client *client, struct snd_seq_client *cptr; struct snd_seq_client_port *port = NULL; - cptr = snd_seq_client_use_ptr(info->addr.client); + cptr = client_load_and_use_ptr(info->addr.client); if (cptr == NULL) return -ENXIO; @@ -2193,7 +2207,7 @@ static int snd_seq_ioctl_client_ump_info(struct snd_seq_client *caller, size = sizeof(struct snd_ump_endpoint_info); else size = sizeof(struct snd_ump_block_info); - cptr = snd_seq_client_use_ptr(client); + cptr = client_load_and_use_ptr(client); if (!cptr) return -ENOENT; @@ -2475,7 +2489,7 @@ int snd_seq_kernel_client_enqueue(int client, struct snd_seq_event *ev, if (check_event_type_and_length(ev)) return -EINVAL; - cptr = snd_seq_client_use_ptr(client); + cptr = client_load_and_use_ptr(client); if (cptr == NULL) return -EINVAL; @@ -2707,7 +2721,7 @@ void snd_seq_info_clients_read(struct snd_info_entry *entry, /* list the client table */ for (c = 0; c < SNDRV_SEQ_MAX_CLIENTS; c++) { - client = snd_seq_client_use_ptr(c); + client = client_load_and_use_ptr(c); if (client == NULL) continue; if (client->type == NO_CLIENT) { -- GitLab From 7a68b55ff39b0a1638acb1694c185d49f6077a0d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 27 Feb 2025 18:05:25 +0000 Subject: [PATCH 1167/1585] KVM: arm64: Initialize HCR_EL2.E2H early On CPUs without FEAT_E2H0, HCR_EL2.E2H is RES1, but may reset to an UNKNOWN value out of reset and consequently may not read as 1 unless it has been explicitly initialized. We handled this for the head.S boot code in commits: 3944382fa6f22b54 ("arm64: Treat HCR_EL2.E2H as RES1 when ID_AA64MMFR4_EL1.E2H0 is negative") b3320142f3db9b3f ("arm64: Fix early handling of FEAT_E2H0 not being implemented") Unfortunately, we forgot to apply a similar fix to the KVM PSCI entry points used when relaying CPU_ON, CPU_SUSPEND, and SYSTEM SUSPEND. When KVM is entered via these entry points, the value of HCR_EL2.E2H may be consumed before it has been initialized (e.g. by the 'init_el2_state' macro). Initialize HCR_EL2.E2H early in these paths such that it can be consumed reliably. The existing code in head.S is factored out into a new 'init_el2_hcr' macro, and this is used in the __kvm_hyp_init_cpu() function common to all the relevant PSCI entry points. For clarity, I've tweaked the assembly used to check whether ID_AA64MMFR4_EL1.E2H0 is negative. The bitfield is extracted as a signed value, and this is checked with a signed-greater-or-equal (GE) comparison. As the hyp code will reconfigure HCR_EL2 later in ___kvm_hyp_init(), all bits other than E2H are initialized to zero in __kvm_hyp_init_cpu(). Fixes: 3944382fa6f22b54 ("arm64: Treat HCR_EL2.E2H as RES1 when ID_AA64MMFR4_EL1.E2H0 is negative") Fixes: b3320142f3db9b3f ("arm64: Fix early handling of FEAT_E2H0 not being implemented") Signed-off-by: Mark Rutland Cc: Ahmed Genidi Cc: Ben Horgan Cc: Catalin Marinas Cc: Leo Yan Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Link: https://lore.kernel.org/r/20250227180526.1204723-2-mark.rutland@arm.com [maz: fixed LT->GE thinko] Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/el2_setup.h | 26 ++++++++++++++++++++++++++ arch/arm64/kernel/head.S | 19 +------------------ arch/arm64/kvm/hyp/nvhe/hyp-init.S | 8 +++++++- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 25e1626517500..56034a394b437 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -16,6 +16,32 @@ #include #include +.macro init_el2_hcr val + mov_q x0, \val + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it + * can reset into an UNKNOWN state and might not read as 1 until it has + * been initialized explicitly. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + * + * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H + * indicating whether the CPU is running in E2H mode. + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH + cmp x1, #0 + b.ge .LnVHE_\@ + + orr x0, x0, #HCR_E2H +.LnVHE_\@: + msr hcr_el2, x0 + isb +.endm + .macro __init_el2_sctlr mov_q x0, INIT_SCTLR_EL2_MMU_OFF msr sctlr_el2, x0 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 5ab1970ee5436..2d56459d6c94c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -298,25 +298,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el2, x0 isb 0: - mov_q x0, HCR_HOST_NVHE_FLAGS - - /* - * Compliant CPUs advertise their VHE-onlyness with - * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be - * RES1 in that case. Publish the E2H bit early so that - * it can be picked up by the init_el2_state macro. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but - * don't advertise it (they predate this relaxation). - */ - mrs_s x1, SYS_ID_AA64MMFR4_EL1 - tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f - - orr x0, x0, #HCR_E2H -1: - msr hcr_el2, x0 - isb + init_el2_hcr HCR_HOST_NVHE_FLAGS init_el2_state /* Hypervisor stub */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index fc18662260676..3fb5504a7d7fc 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -73,8 +73,12 @@ __do_hyp_init: eret SYM_CODE_END(__kvm_hyp_init) +/* + * Initialize EL2 CPU state to sane values. + * + * HCR_EL2.E2H must have been initialized already. + */ SYM_CODE_START_LOCAL(__kvm_init_el2_state) - /* Initialize EL2 CPU state to sane values. */ init_el2_state // Clobbers x0..x2 finalise_el2_state ret @@ -206,6 +210,8 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) 2: msr SPsel, #1 // We want to use SP_EL{1,2} + init_el2_hcr 0 + bl __kvm_init_el2_state __init_el2_nvhe_prepare_eret -- GitLab From 3855a7b91d42ebf3513b7ccffc44807274978b3d Mon Sep 17 00:00:00 2001 From: Ahmed Genidi Date: Thu, 27 Feb 2025 18:05:26 +0000 Subject: [PATCH 1168/1585] KVM: arm64: Initialize SCTLR_EL1 in __kvm_hyp_init_cpu() When KVM is in protected mode, host calls to PSCI are proxied via EL2, and cold entries from CPU_ON, CPU_SUSPEND, and SYSTEM_SUSPEND bounce through __kvm_hyp_init_cpu() at EL2 before entering the host kernel's entry point at EL1. While __kvm_hyp_init_cpu() initializes SPSR_EL2 for the exception return to EL1, it does not initialize SCTLR_EL1. Due to this, it's possible to enter EL1 with SCTLR_EL1 in an UNKNOWN state. In practice this has been seen to result in kernel crashes after CPU_ON as a result of SCTLR_EL1.M being 1 in violation of the initial core configuration specified by PSCI. Fix this by initializing SCTLR_EL1 for cold entry to the host kernel. As it's necessary to write to SCTLR_EL12 in VHE mode, this initialization is moved into __kvm_host_psci_cpu_entry() where we can use write_sysreg_el1(). The remnants of the '__init_el2_nvhe_prepare_eret' macro are folded into its only caller, as this is clearer than having the macro. Fixes: cdf367192766ad11 ("KVM: arm64: Intercept host's CPU_ON SMCs") Reported-by: Leo Yan Signed-off-by: Ahmed Genidi [ Mark: clarify commit message, handle E2H, move to C, remove macro ] Signed-off-by: Mark Rutland Cc: Ahmed Genidi Cc: Ben Horgan Cc: Catalin Marinas Cc: Leo Yan Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Reviewed-by: Leo Yan Link: https://lore.kernel.org/r/20250227180526.1204723-3-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/el2_setup.h | 5 ----- arch/arm64/kernel/head.S | 3 ++- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 2 -- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 +++ 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 56034a394b437..555c613fd2324 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -270,11 +270,6 @@ .Lskip_gcs_\@: .endm -.macro __init_el2_nvhe_prepare_eret - mov x0, #INIT_PSTATE_EL1 - msr spsr_el2, x0 -.endm - .macro __init_el2_mpam /* Memory Partitioning And Monitoring: disable EL2 traps */ mrs x1, id_aa64pfr0_el1 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2d56459d6c94c..2ce73525de2c9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -322,7 +322,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el1, x1 mov x2, xzr 3: - __init_el2_nvhe_prepare_eret + mov x0, #INIT_PSTATE_EL1 + msr spsr_el2, x0 mov w0, #BOOT_CPU_MODE_EL2 orr x0, x0, x2 diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 3fb5504a7d7fc..f8af11189572f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -214,8 +214,6 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) bl __kvm_init_el2_state - __init_el2_nvhe_prepare_eret - /* Enable MMU, set vectors and stack. */ mov x0, x28 bl ___kvm_hyp_init // Clobbers x0..x2 diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 9c2ce1e0e99a5..c3e196fb8b18f 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -218,6 +218,9 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) if (is_cpu_on) release_boot_args(boot_args); + write_sysreg_el1(INIT_SCTLR_EL1_MMU_OFF, SYS_SCTLR); + write_sysreg(INIT_PSTATE_EL1, SPSR_EL2); + __host_enter(host_ctxt); } -- GitLab From e04918dc594669068f5d59d567d08db531167188 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Mar 2025 15:18:24 +0800 Subject: [PATCH 1169/1585] cred: Fix RCU warnings in override/revert_creds Fix RCU warnings in override_creds and revert_creds by turning the RCU pointer into a normal pointer using rcu_replace_pointer. These warnings were previously private to the cred code, but due to the move into the header file they are now polluting unrelated subsystems. Fixes: 49dffdfde462 ("cred: Add a light version of override/revert_creds()") Signed-off-by: Herbert Xu Link: https://lore.kernel.org/r/Z8QGQGW0IaSklKG7@gondor.apana.org.au Signed-off-by: Christian Brauner --- include/linux/cred.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index 0c3c4b16b469c..5658a3bfe803c 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -172,18 +172,12 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) static inline const struct cred *override_creds(const struct cred *override_cred) { - const struct cred *old = current->cred; - - rcu_assign_pointer(current->cred, override_cred); - return old; + return rcu_replace_pointer(current->cred, override_cred, 1); } static inline const struct cred *revert_creds(const struct cred *revert_cred) { - const struct cred *override_cred = current->cred; - - rcu_assign_pointer(current->cred, revert_cred); - return override_cred; + return rcu_replace_pointer(current->cred, revert_cred, 1); } /** -- GitLab From 6b6e2e8fd0de3fa7c6f4f8fe6841b01770b2e7bc Mon Sep 17 00:00:00 2001 From: Titus Rwantare Date: Thu, 27 Feb 2025 22:24:55 +0000 Subject: [PATCH 1170/1585] hwmon: (pmbus) Initialise page count in pmbus_identify() The `pmbus_identify()` function fails to correctly determine the number of supported pages on PMBus devices. This occurs because `info->pages` is implicitly zero-initialised, and `pmbus_set_page()` does not perform writes to the page register if `info->pages` is not yet initialised. Without this patch, `info->pages` is always set to the maximum after scanning. This patch initialises `info->pages` to `PMBUS_PAGES` before the probing loop, enabling `pmbus_set_page()` writes to make it out onto the bus correctly identifying the number of pages. `PMBUS_PAGES` seemed like a reasonable non-zero number because that's the current result of the identification process. Testing was done with a PMBus device in QEMU. Signed-off-by: Titus Rwantare Fixes: 442aba78728e7 ("hwmon: PMBus device driver") Link: https://lore.kernel.org/r/20250227222455.2583468-1-titusr@google.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c index 77cf268e7d2d6..920cd5408141a 100644 --- a/drivers/hwmon/pmbus/pmbus.c +++ b/drivers/hwmon/pmbus/pmbus.c @@ -103,6 +103,8 @@ static int pmbus_identify(struct i2c_client *client, if (pmbus_check_byte_register(client, 0, PMBUS_PAGE)) { int page; + info->pages = PMBUS_PAGES; + for (page = 1; page < PMBUS_PAGES; page++) { if (pmbus_set_page(client, page, 0xff) < 0) break; -- GitLab From 1c7932d5ae0f5c22fa52ac811b4c427bbca5aff5 Mon Sep 17 00:00:00 2001 From: Maud Spierings Date: Thu, 27 Feb 2025 13:57:53 +0100 Subject: [PATCH 1171/1585] hwmon: (ntc_thermistor) Fix the ncpXXxh103 sensor table I could not find a single table that has the values currently present in the table, change it to the actual values that can be found in [1]/[2] and [3] (page 15 column 2) [1]: https://www.murata.com/products/productdetail?partno=NCP15XH103F03RC [2]: https://www.murata.com/products/productdata/8796836626462/NTHCG83.txt?1437969843000 [3]: https://nl.mouser.com/datasheet/2/281/r44e-522712.pdf Fixes: 54ce3a0d8011 ("hwmon: (ntc_thermistor) Add support for ncpXXxh103") Signed-off-by: Maud Spierings Link: https://lore.kernel.org/r/20250227-ntc_thermistor_fixes-v1-3-70fa73200b52@gocontroll.com Reviewed-by: Linus Walleij Signed-off-by: Guenter Roeck --- drivers/hwmon/ntc_thermistor.c | 66 +++++++++++++++++----------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index b5352900463fb..0d29c8f97ba7c 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -181,40 +181,40 @@ static const struct ntc_compensation ncpXXwf104[] = { }; static const struct ntc_compensation ncpXXxh103[] = { - { .temp_c = -40, .ohm = 247565 }, - { .temp_c = -35, .ohm = 181742 }, - { .temp_c = -30, .ohm = 135128 }, - { .temp_c = -25, .ohm = 101678 }, - { .temp_c = -20, .ohm = 77373 }, - { .temp_c = -15, .ohm = 59504 }, - { .temp_c = -10, .ohm = 46222 }, - { .temp_c = -5, .ohm = 36244 }, - { .temp_c = 0, .ohm = 28674 }, - { .temp_c = 5, .ohm = 22878 }, - { .temp_c = 10, .ohm = 18399 }, - { .temp_c = 15, .ohm = 14910 }, - { .temp_c = 20, .ohm = 12169 }, + { .temp_c = -40, .ohm = 195652 }, + { .temp_c = -35, .ohm = 148171 }, + { .temp_c = -30, .ohm = 113347 }, + { .temp_c = -25, .ohm = 87559 }, + { .temp_c = -20, .ohm = 68237 }, + { .temp_c = -15, .ohm = 53650 }, + { .temp_c = -10, .ohm = 42506 }, + { .temp_c = -5, .ohm = 33892 }, + { .temp_c = 0, .ohm = 27219 }, + { .temp_c = 5, .ohm = 22021 }, + { .temp_c = 10, .ohm = 17926 }, + { .temp_c = 15, .ohm = 14674 }, + { .temp_c = 20, .ohm = 12081 }, { .temp_c = 25, .ohm = 10000 }, - { .temp_c = 30, .ohm = 8271 }, - { .temp_c = 35, .ohm = 6883 }, - { .temp_c = 40, .ohm = 5762 }, - { .temp_c = 45, .ohm = 4851 }, - { .temp_c = 50, .ohm = 4105 }, - { .temp_c = 55, .ohm = 3492 }, - { .temp_c = 60, .ohm = 2985 }, - { .temp_c = 65, .ohm = 2563 }, - { .temp_c = 70, .ohm = 2211 }, - { .temp_c = 75, .ohm = 1915 }, - { .temp_c = 80, .ohm = 1666 }, - { .temp_c = 85, .ohm = 1454 }, - { .temp_c = 90, .ohm = 1275 }, - { .temp_c = 95, .ohm = 1121 }, - { .temp_c = 100, .ohm = 990 }, - { .temp_c = 105, .ohm = 876 }, - { .temp_c = 110, .ohm = 779 }, - { .temp_c = 115, .ohm = 694 }, - { .temp_c = 120, .ohm = 620 }, - { .temp_c = 125, .ohm = 556 }, + { .temp_c = 30, .ohm = 8315 }, + { .temp_c = 35, .ohm = 6948 }, + { .temp_c = 40, .ohm = 5834 }, + { .temp_c = 45, .ohm = 4917 }, + { .temp_c = 50, .ohm = 4161 }, + { .temp_c = 55, .ohm = 3535 }, + { .temp_c = 60, .ohm = 3014 }, + { .temp_c = 65, .ohm = 2586 }, + { .temp_c = 70, .ohm = 2228 }, + { .temp_c = 75, .ohm = 1925 }, + { .temp_c = 80, .ohm = 1669 }, + { .temp_c = 85, .ohm = 1452 }, + { .temp_c = 90, .ohm = 1268 }, + { .temp_c = 95, .ohm = 1110 }, + { .temp_c = 100, .ohm = 974 }, + { .temp_c = 105, .ohm = 858 }, + { .temp_c = 110, .ohm = 758 }, + { .temp_c = 115, .ohm = 672 }, + { .temp_c = 120, .ohm = 596 }, + { .temp_c = 125, .ohm = 531 }, }; /* -- GitLab From e278d5e8aef4c0a1d9a9fa8b8910d713a89aa800 Mon Sep 17 00:00:00 2001 From: Erik Schumacher Date: Mon, 24 Feb 2025 09:19:04 +0000 Subject: [PATCH 1172/1585] hwmon: (ad7314) Validate leading zero bits and return error Leading zero bits are sent on the bus before the temperature value is transmitted. If any of these bits are high, the connection might be unstable or there could be no AD7314 / ADT730x (or compatible) at all. Return -EIO in that case. Signed-off-by: Erik Schumacher Fixes: 4f3a659581cab ("hwmon: AD7314 driver (ported from IIO)") Link: https://lore.kernel.org/r/24a50c2981a318580aca8f50d23be7987b69ea00.camel@iris-sensing.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ad7314.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hwmon/ad7314.c b/drivers/hwmon/ad7314.c index 7802bbf5f9587..59424103f6348 100644 --- a/drivers/hwmon/ad7314.c +++ b/drivers/hwmon/ad7314.c @@ -22,11 +22,13 @@ */ #define AD7314_TEMP_MASK 0x7FE0 #define AD7314_TEMP_SHIFT 5 +#define AD7314_LEADING_ZEROS_MASK BIT(15) /* * ADT7301 and ADT7302 temperature masks */ #define ADT7301_TEMP_MASK 0x3FFF +#define ADT7301_LEADING_ZEROS_MASK (BIT(15) | BIT(14)) enum ad7314_variant { adt7301, @@ -65,12 +67,20 @@ static ssize_t ad7314_temperature_show(struct device *dev, return ret; switch (spi_get_device_id(chip->spi_dev)->driver_data) { case ad7314: + if (ret & AD7314_LEADING_ZEROS_MASK) { + /* Invalid read-out, leading zero part is missing */ + return -EIO; + } data = (ret & AD7314_TEMP_MASK) >> AD7314_TEMP_SHIFT; data = sign_extend32(data, 9); return sprintf(buf, "%d\n", 250 * data); case adt7301: case adt7302: + if (ret & ADT7301_LEADING_ZEROS_MASK) { + /* Invalid read-out, leading zero part is missing */ + return -EIO; + } /* * Documented as a 13 bit twos complement register * with a sign bit - which is a 14 bit 2's complement -- GitLab From 7eb172143d5508b4da468ed59ee857c6e5e01da6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Mar 2025 11:48:20 -0800 Subject: [PATCH 1173/1585] Linux 6.14-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 30dab4c8b0120..70bdbf2218fc5 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* -- GitLab From fd5ba38390c59e1c147480ae49b6133c4ac24001 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 26 Feb 2025 15:19:18 +0900 Subject: [PATCH 1174/1585] tracing: probe-events: Remove unused MAX_ARG_BUF_LEN macro Commit 18b1e870a496 ("tracing/probes: Add $arg* meta argument for all function args") introduced MAX_ARG_BUF_LEN but it is not used. Remove it. Link: https://lore.kernel.org/all/174055075876.4079315.8805416872155957588.stgit@mhiramat.tok.corp.google.com/ Fixes: 18b1e870a496 ("tracing/probes: Add $arg* meta argument for all function args") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/trace_probe.h | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index c47ca002347a7..96792bc4b0924 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -36,7 +36,6 @@ #define MAX_BTF_ARGS_LEN 128 #define MAX_DENTRY_ARGS_LEN 256 #define MAX_STRING_SIZE PATH_MAX -#define MAX_ARG_BUF_LEN (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN) /* Reserved field names */ #define FIELD_STRING_IP "__probe_ip" -- GitLab From 4dd541f9d9e4d8cdfa9797e68d893b0c27e4c46c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 15 Feb 2025 11:01:49 +0900 Subject: [PATCH 1175/1585] MAINTAINERS: update email address in cifs and ksmbd entry Steve mainly checks his email through his gmail address. I also check issues through another email address. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8e0736dc2ee0e..ca11a553d4121 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5775,6 +5775,7 @@ X: drivers/clk/clkdev.c COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3) M: Steve French +M: Steve French R: Paulo Alcantara (DFS, global name space) R: Ronnie Sahlberg (directory leases, sparse files) R: Shyam Prasad N (multichannel) @@ -12655,7 +12656,9 @@ F: tools/testing/selftests/ KERNEL SMB3 SERVER (KSMBD) M: Namjae Jeon +M: Namjae Jeon M: Steve French +M: Steve French R: Sergey Senozhatsky R: Tom Talpey L: linux-cifs@vger.kernel.org -- GitLab From d6e13e19063db24f94b690159d0633aaf72a0f03 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 18 Feb 2025 22:49:50 +0900 Subject: [PATCH 1176/1585] ksmbd: fix out-of-bounds in parse_sec_desc() If osidoffset, gsidoffset and dacloffset could be greater than smb_ntsd struct size. If it is smaller, It could cause slab-out-of-bounds. And when validating sid, It need to check it included subauth array size. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index d39d3e553366d..89415b02dd649 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -807,6 +807,13 @@ static int parse_sid(struct smb_sid *psid, char *end_of_acl) return -EINVAL; } + if (!psid->num_subauth) + return 0; + + if (psid->num_subauth > SID_MAX_SUB_AUTHORITIES || + end_of_acl < (char *)psid + 8 + sizeof(__le32) * psid->num_subauth) + return -EINVAL; + return 0; } @@ -848,6 +855,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, pntsd->type = cpu_to_le16(DACL_PRESENT); if (pntsd->osidoffset) { + if (le32_to_cpu(pntsd->osidoffset) < sizeof(struct smb_ntsd)) + return -EINVAL; + rc = parse_sid(owner_sid_ptr, end_of_acl); if (rc) { pr_err("%s: Error %d parsing Owner SID\n", __func__, rc); @@ -863,6 +873,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, } if (pntsd->gsidoffset) { + if (le32_to_cpu(pntsd->gsidoffset) < sizeof(struct smb_ntsd)) + return -EINVAL; + rc = parse_sid(group_sid_ptr, end_of_acl); if (rc) { pr_err("%s: Error %d mapping Owner SID to gid\n", @@ -884,6 +897,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, pntsd->type |= cpu_to_le16(DACL_PROTECTED); if (dacloffset) { + if (dacloffset < sizeof(struct smb_ntsd)) + return -EINVAL; + parse_dacl(idmap, dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr, fattr); } -- GitLab From e2ff19f0b7a30e03516e6eb73b948e27a55bc9d2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 21 Feb 2025 14:16:23 +0900 Subject: [PATCH 1177/1585] ksmbd: fix type confusion via race condition when using ipc_msg_send_request req->handle is allocated using ksmbd_acquire_id(&ipc_ida), based on ida_alloc. req->handle from ksmbd_ipc_login_request and FSCTL_PIPE_TRANSCEIVE ioctl can be same and it could lead to type confusion between messages, resulting in access to unexpected parts of memory after an incorrect delivery. ksmbd check type of ipc response but missing add continue to check next ipc reponse. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_ipc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 0460ebea6ff02..3f185ae60dc51 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -281,6 +281,7 @@ static int handle_response(int type, void *payload, size_t sz) if (entry->type + 1 != type) { pr_err("Waiting for IPC type %d, got %d. Ignore.\n", entry->type + 1, type); + continue; } entry->response = kvzalloc(sz, KSMBD_DEFAULT_GFP); -- GitLab From 84d2d1641b71dec326e8736a749b7ee76a9599fc Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 26 Feb 2025 15:44:02 +0900 Subject: [PATCH 1178/1585] ksmbd: fix use-after-free in smb2_lock If smb_lock->zero_len has value, ->llist of smb_lock is not delete and flock is old one. It will cause use-after-free on error handling routine. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f1efcd0274750..35bed8fc1b970 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7458,13 +7458,13 @@ int smb2_lock(struct ksmbd_work *work) } no_check_cl: + flock = smb_lock->fl; + list_del(&smb_lock->llist); + if (smb_lock->zero_len) { err = 0; goto skip; } - - flock = smb_lock->fl; - list_del(&smb_lock->llist); retry: rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL); skip: -- GitLab From e26e2d2e15daf1ab33e0135caf2304a0cfa2744b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 27 Feb 2025 15:49:10 +0900 Subject: [PATCH 1179/1585] ksmbd: fix bug on trap in smb2_lock If lock count is greater than 1, flags could be old value. It should be checked with flags of smb_lock, not flags. It will cause bug-on trap from locks_free_lock in error handling routine. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 35bed8fc1b970..c53121538990e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7468,7 +7468,7 @@ int smb2_lock(struct ksmbd_work *work) retry: rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL); skip: - if (flags & SMB2_LOCKFLAG_UNLOCK) { + if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) { if (!rc) { ksmbd_debug(SMB, "File unlocked\n"); } else if (rc == -ENOENT) { -- GitLab From 62e7dd0a39c2d0d7ff03274c36df971f1b3d2d0d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 12 Feb 2025 23:26:09 +0900 Subject: [PATCH 1180/1585] smb: common: change the data type of num_aces to le16 2.4.5 in [MS-DTYP].pdf describe the data type of num_aces as le16. AceCount (2 bytes): An unsigned 16-bit integer that specifies the count of the number of ACE records in the ACL. Change it to le16 and add reserved field to smb_acl struct. Reported-by: Igor Leite Ladessa Tested-by: Igor Leite Ladessa Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/cifsacl.c | 26 +++++++++++++------------- fs/smb/common/smbacl.h | 3 ++- fs/smb/server/smbacl.c | 31 ++++++++++++++++--------------- fs/smb/server/smbacl.h | 2 +- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 699a3f76d0834..7d953208046af 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -763,7 +763,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, struct cifs_fattr *fattr, bool mode_from_special_sid) { int i; - int num_aces = 0; + u16 num_aces = 0; int acl_size; char *acl_base; struct smb_ace **ppace; @@ -785,7 +785,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, cifs_dbg(NOISY, "DACL revision %d size %d num aces %d\n", le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), - le32_to_cpu(pdacl->num_aces)); + le16_to_cpu(pdacl->num_aces)); /* reset rwx permissions for user/group/other. Also, if num_aces is 0 i.e. DACL has no ACEs, @@ -795,7 +795,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, acl_base = (char *)pdacl; acl_size = sizeof(struct smb_acl); - num_aces = le32_to_cpu(pdacl->num_aces); + num_aces = le16_to_cpu(pdacl->num_aces); if (num_aces > 0) { umode_t denied_mode = 0; @@ -937,12 +937,12 @@ unsigned int setup_special_user_owner_ACE(struct smb_ace *pntace) static void populate_new_aces(char *nacl_base, struct smb_sid *pownersid, struct smb_sid *pgrpsid, - __u64 *pnmode, u32 *pnum_aces, u16 *pnsize, + __u64 *pnmode, u16 *pnum_aces, u16 *pnsize, bool modefromsid, bool posix) { __u64 nmode; - u32 num_aces = 0; + u16 num_aces = 0; u16 nsize = 0; __u64 user_mode; __u64 group_mode; @@ -1050,7 +1050,7 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p u16 size = 0; struct smb_ace *pntace = NULL; char *acl_base = NULL; - u32 src_num_aces = 0; + u16 src_num_aces = 0; u16 nsize = 0; struct smb_ace *pnntace = NULL; char *nacl_base = NULL; @@ -1058,7 +1058,7 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p acl_base = (char *)pdacl; size = sizeof(struct smb_acl); - src_num_aces = le32_to_cpu(pdacl->num_aces); + src_num_aces = le16_to_cpu(pdacl->num_aces); nacl_base = (char *)pndacl; nsize = sizeof(struct smb_acl); @@ -1090,11 +1090,11 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, u16 size = 0; struct smb_ace *pntace = NULL; char *acl_base = NULL; - u32 src_num_aces = 0; + u16 src_num_aces = 0; u16 nsize = 0; struct smb_ace *pnntace = NULL; char *nacl_base = NULL; - u32 num_aces = 0; + u16 num_aces = 0; bool new_aces_set = false; /* Assuming that pndacl and pnmode are never NULL */ @@ -1112,7 +1112,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, acl_base = (char *)pdacl; size = sizeof(struct smb_acl); - src_num_aces = le32_to_cpu(pdacl->num_aces); + src_num_aces = le16_to_cpu(pdacl->num_aces); /* Retain old ACEs which we can retain */ for (i = 0; i < src_num_aces; ++i) { @@ -1158,7 +1158,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, } finalize_dacl: - pndacl->num_aces = cpu_to_le32(num_aces); + pndacl->num_aces = cpu_to_le16(num_aces); pndacl->size = cpu_to_le16(nsize); return 0; @@ -1293,7 +1293,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION); ndacl_ptr->size = cpu_to_le16(0); - ndacl_ptr->num_aces = cpu_to_le32(0); + ndacl_ptr->num_aces = cpu_to_le16(0); rc = set_chmod_dacl(dacl_ptr, ndacl_ptr, owner_sid_ptr, group_sid_ptr, pnmode, mode_from_sid, posix); @@ -1653,7 +1653,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); if (mode_from_sid) nsecdesclen += - le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct smb_ace); + le16_to_cpu(dacl_ptr->num_aces) * sizeof(struct smb_ace); else /* cifsacl */ nsecdesclen += le16_to_cpu(dacl_ptr->size); } diff --git a/fs/smb/common/smbacl.h b/fs/smb/common/smbacl.h index 6a60698fc6f0f..a624ec9e4a144 100644 --- a/fs/smb/common/smbacl.h +++ b/fs/smb/common/smbacl.h @@ -107,7 +107,8 @@ struct smb_sid { struct smb_acl { __le16 revision; /* revision level */ __le16 size; - __le32 num_aces; + __le16 num_aces; + __le16 reserved; } __attribute__((packed)); struct smb_ace { diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 89415b02dd649..561f80d3f953e 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -333,7 +333,7 @@ void posix_state_to_acl(struct posix_acl_state *state, pace->e_perm = state->other.allow; } -int init_acl_state(struct posix_acl_state *state, int cnt) +int init_acl_state(struct posix_acl_state *state, u16 cnt) { int alloc; @@ -368,7 +368,7 @@ static void parse_dacl(struct mnt_idmap *idmap, struct smb_fattr *fattr) { int i, ret; - int num_aces = 0; + u16 num_aces = 0; unsigned int acl_size; char *acl_base; struct smb_ace **ppace; @@ -389,12 +389,12 @@ static void parse_dacl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "DACL revision %d size %d num aces %d\n", le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), - le32_to_cpu(pdacl->num_aces)); + le16_to_cpu(pdacl->num_aces)); acl_base = (char *)pdacl; acl_size = sizeof(struct smb_acl); - num_aces = le32_to_cpu(pdacl->num_aces); + num_aces = le16_to_cpu(pdacl->num_aces); if (num_aces <= 0) return; @@ -580,7 +580,7 @@ static void parse_dacl(struct mnt_idmap *idmap, static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap, struct smb_ace *pndace, - struct smb_fattr *fattr, u32 *num_aces, + struct smb_fattr *fattr, u16 *num_aces, u16 *size, u32 nt_aces_num) { struct posix_acl_entry *pace; @@ -701,7 +701,7 @@ static void set_ntacl_dacl(struct mnt_idmap *idmap, struct smb_fattr *fattr) { struct smb_ace *ntace, *pndace; - int nt_num_aces = le32_to_cpu(nt_dacl->num_aces), num_aces = 0; + u16 nt_num_aces = le16_to_cpu(nt_dacl->num_aces), num_aces = 0; unsigned short size = 0; int i; @@ -728,7 +728,7 @@ static void set_ntacl_dacl(struct mnt_idmap *idmap, set_posix_acl_entries_dacl(idmap, pndace, fattr, &num_aces, &size, nt_num_aces); - pndacl->num_aces = cpu_to_le32(num_aces); + pndacl->num_aces = cpu_to_le16(num_aces); pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size); } @@ -736,7 +736,7 @@ static void set_mode_dacl(struct mnt_idmap *idmap, struct smb_acl *pndacl, struct smb_fattr *fattr) { struct smb_ace *pace, *pndace; - u32 num_aces = 0; + u16 num_aces = 0; u16 size = 0, ace_size = 0; uid_t uid; const struct smb_sid *sid; @@ -792,7 +792,7 @@ static void set_mode_dacl(struct mnt_idmap *idmap, fattr->cf_mode, 0007); out: - pndacl->num_aces = cpu_to_le32(num_aces); + pndacl->num_aces = cpu_to_le16(num_aces); pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size); } @@ -1022,8 +1022,9 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct smb_sid owner_sid, group_sid; struct dentry *parent = path->dentry->d_parent; struct mnt_idmap *idmap = mnt_idmap(path->mnt); - int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size; - int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; + int inherited_flags = 0, flags = 0, i, nt_size = 0, pdacl_size; + int rc = 0, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; + u16 num_aces, ace_cnt = 0; char *aces_base; bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); @@ -1039,7 +1040,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset); acl_len = pntsd_size - dacloffset; - num_aces = le32_to_cpu(parent_pdacl->num_aces); + num_aces = le16_to_cpu(parent_pdacl->num_aces); pntsd_type = le16_to_cpu(parent_pntsd->type); pdacl_size = le16_to_cpu(parent_pdacl->size); @@ -1199,7 +1200,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset)); pdacl->revision = cpu_to_le16(2); pdacl->size = cpu_to_le16(sizeof(struct smb_acl) + nt_size); - pdacl->num_aces = cpu_to_le32(ace_cnt); + pdacl->num_aces = cpu_to_le16(ace_cnt); pace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); memcpy(pace, aces_base, nt_size); pntsd_size += sizeof(struct smb_acl) + nt_size; @@ -1280,7 +1281,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); aces_size = acl_size - sizeof(struct smb_acl); - for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { + for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) { if (offsetof(struct smb_ace, access_req) > aces_size) break; ace_size = le16_to_cpu(ace->size); @@ -1301,7 +1302,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); aces_size = acl_size - sizeof(struct smb_acl); - for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { + for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) { if (offsetof(struct smb_ace, access_req) > aces_size) break; ace_size = le16_to_cpu(ace->size); diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h index 24ce576fc2924..355adaee39b87 100644 --- a/fs/smb/server/smbacl.h +++ b/fs/smb/server/smbacl.h @@ -86,7 +86,7 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, int build_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info, __u32 *secdesclen, struct smb_fattr *fattr); -int init_acl_state(struct posix_acl_state *state, int cnt); +int init_acl_state(struct posix_acl_state *state, u16 cnt); void free_acl_state(struct posix_acl_state *state); void posix_state_to_acl(struct posix_acl_state *state, struct posix_acl_entry *pace); -- GitLab From 1b8b67f3c5e5169535e26efedd3e422172e2db64 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 12 Feb 2025 09:32:11 +0900 Subject: [PATCH 1181/1585] ksmbd: fix incorrect validation for num_aces field of smb_acl parse_dcal() validate num_aces to allocate posix_ace_state_array. if (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) It is an incorrect validation that we can create an array of size ULONG_MAX. smb_acl has ->size field to calculate actual number of aces in request buffer size. Use this to check invalid num_aces. Reported-by: Igor Leite Ladessa Tested-by: Igor Leite Ladessa Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 561f80d3f953e..49b128698670a 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -398,7 +398,9 @@ static void parse_dacl(struct mnt_idmap *idmap, if (num_aces <= 0) return; - if (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) + if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) / + (offsetof(struct smb_ace, sid) + + offsetof(struct smb_sid, sub_auth) + sizeof(__le16))) return; ret = init_acl_state(&acl_state, num_aces); @@ -432,6 +434,7 @@ static void parse_dacl(struct mnt_idmap *idmap, offsetof(struct smb_sid, sub_auth); if (end_of_acl - acl_base < acl_size || + ppace[i]->sid.num_subauth == 0 || ppace[i]->sid.num_subauth > SID_MAX_SUB_AUTHORITIES || (end_of_acl - acl_base < acl_size + sizeof(__le32) * ppace[i]->sid.num_subauth) || -- GitLab From aa2a739a75ab6f24ef72fb3fdb9192c081eacf06 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 12 Feb 2025 09:37:57 +0900 Subject: [PATCH 1182/1585] cifs: fix incorrect validation for num_aces field of smb_acl parse_dcal() validate num_aces to allocate ace array. f (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) It is an incorrect validation that we can create an array of size ULONG_MAX. smb_acl has ->size field to calculate actual number of aces in response buffer size. Use this to check invalid num_aces. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/cifsacl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 7d953208046af..64bd68f750f84 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -778,7 +778,8 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, } /* validate that we do not go past end of acl */ - if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) { + if (end_of_acl < (char *)pdacl + sizeof(struct smb_acl) || + end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) { cifs_dbg(VFS, "ACL too small to parse DACL\n"); return; } @@ -799,8 +800,11 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, if (num_aces > 0) { umode_t denied_mode = 0; - if (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) + if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) / + (offsetof(struct smb_ace, sid) + + offsetof(struct smb_sid, sub_auth) + sizeof(__le16))) return; + ppace = kmalloc_array(num_aces, sizeof(struct smb_ace *), GFP_KERNEL); if (!ppace) -- GitLab From f603b159231b0c58f0c27ab39348534063d38223 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 3 Mar 2025 14:56:10 +0800 Subject: [PATCH 1183/1585] ALSA: hda/realtek - add supported Mic Mute LED for Lenovo platform Support Mic Mute LED for ThinkCentre M series. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/c211a2702f1f411e86bd7420d7eebc03@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d58743b955f81..ebf54ef5877a4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5053,6 +5053,16 @@ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, } } +static void alc233_fixup_lenovo_low_en_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->micmute_led_polarity = 1; + alc233_fixup_lenovo_line2_mic_hotkey(codec, fix, action); +} + static void alc_hp_mute_disable(struct hda_codec *codec, unsigned int delay) { if (delay <= 0) @@ -7621,6 +7631,7 @@ enum { ALC275_FIXUP_DELL_XPS, ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, + ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED, ALC255_FIXUP_DELL_SPK_NOISE, ALC225_FIXUP_DISABLE_MIC_VREF, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -8615,6 +8626,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_fixup_lenovo_line2_mic_hotkey, }, + [ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_fixup_lenovo_low_en_micmute_led, + }, [ALC233_FIXUP_INTEL_NUC8_DMIC] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic, @@ -10906,6 +10921,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x334b, "Lenovo ThinkCentre M70 Gen5", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3384, "ThinkCentre M90a PRO", ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED), + SND_PCI_QUIRK(0x17aa, 0x3386, "ThinkCentre M90a Gen6", ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED), + SND_PCI_QUIRK(0x17aa, 0x3387, "ThinkCentre M70a Gen6", ALC233_FIXUP_LENOVO_L2MH_LOW_ENLED), SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), HDA_CODEC_QUIRK(0x17aa, 0x3802, "DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8", ALC287_FIXUP_TAS2781_I2C), -- GitLab From 59b348be7597c4a9903cb003c69e37df20c04a30 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Fri, 28 Feb 2025 16:46:57 +0300 Subject: [PATCH 1184/1585] wifi: cfg80211: regulatory: improve invalid hints checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Syzbot keeps reporting an issue [1] that occurs when erroneous symbols sent from userspace get through into user_alpha2[] via regulatory_hint_user() call. Such invalid regulatory hints should be rejected. While a sanity check from commit 47caf685a685 ("cfg80211: regulatory: reject invalid hints") looks to be enough to deter these very cases, there is a way to get around it due to 2 reasons. 1) The way isalpha() works, symbols other than latin lower and upper letters may be used to determine a country/domain. For instance, greek letters will also be considered upper/lower letters and for such characters isalpha() will return true as well. However, ISO-3166-1 alpha2 codes should only hold latin characters. 2) While processing a user regulatory request, between reg_process_hint_user() and regulatory_hint_user() there happens to be a call to queue_regulatory_request() which modifies letters in request->alpha2[] with toupper(). This works fine for latin symbols, less so for weird letter characters from the second part of _ctype[]. Syzbot triggers a warning in is_user_regdom_saved() by first sending over an unexpected non-latin letter that gets malformed by toupper() into a character that ends up failing isalpha() check. Prevent this by enhancing is_an_alpha2() to ensure that incoming symbols are latin letters and nothing else. [1] Syzbot report: ------------[ cut here ]------------ Unexpected user alpha2: A� WARNING: CPU: 1 PID: 964 at net/wireless/reg.c:442 is_user_regdom_saved net/wireless/reg.c:440 [inline] WARNING: CPU: 1 PID: 964 at net/wireless/reg.c:442 restore_alpha2 net/wireless/reg.c:3424 [inline] WARNING: CPU: 1 PID: 964 at net/wireless/reg.c:442 restore_regulatory_settings+0x3c0/0x1e50 net/wireless/reg.c:3516 Modules linked in: CPU: 1 UID: 0 PID: 964 Comm: kworker/1:2 Not tainted 6.12.0-rc5-syzkaller-00044-gc1e939a21eb1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: events_power_efficient crda_timeout_work RIP: 0010:is_user_regdom_saved net/wireless/reg.c:440 [inline] RIP: 0010:restore_alpha2 net/wireless/reg.c:3424 [inline] RIP: 0010:restore_regulatory_settings+0x3c0/0x1e50 net/wireless/reg.c:3516 ... Call Trace: crda_timeout_work+0x27/0x50 net/wireless/reg.c:542 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa65/0x1850 kernel/workqueue.c:3310 worker_thread+0x870/0xd30 kernel/workqueue.c:3391 kthread+0x2f2/0x390 kernel/kthread.c:389 ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Reported-by: syzbot+e10709ac3c44f3d4e800@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e10709ac3c44f3d4e800 Fixes: 09d989d179d0 ("cfg80211: add regulatory hint disconnect support") Cc: stable@kernel.org Signed-off-by: Nikita Zhandarovich Link: https://patch.msgid.link/20250228134659.1577656-1-n.zhandarovich@fintech.ru Signed-off-by: Johannes Berg --- net/wireless/reg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2dd0533e76605..212e9561aae77 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -407,7 +407,8 @@ static bool is_an_alpha2(const char *alpha2) { if (!alpha2) return false; - return isalpha(alpha2[0]) && isalpha(alpha2[1]); + return isascii(alpha2[0]) && isalpha(alpha2[0]) && + isascii(alpha2[1]) && isalpha(alpha2[1]); } static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) -- GitLab From 487cfd4a8e3dc42d34a759017978a4edaf85fce0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Feb 2025 19:45:29 +0000 Subject: [PATCH 1185/1585] xhci: Restrict USB4 tunnel detection for USB3 devices to Intel hosts When adding support for USB3-over-USB4 tunnelling detection, a check for an Intel-specific capability was added. This capability, which goes by ID 206, is used without any check that we are actually dealing with an Intel host. As it turns out, the Cadence XHCI controller *also* exposes an extended capability numbered 206 (for unknown purposes), but of course doesn't have the Intel-specific registers that the tunnelling code is trying to access. Fun follows. The core of the problems is that the tunnelling code blindly uses vendor-specific capabilities without any check (the Intel-provided documentation I have at hand indicates that 192-255 are indeed vendor-specific). Restrict the detection code to Intel HW for real, preventing any further explosion on my (non-Intel) HW. Cc: stable Fixes: 948ce83fbb7df ("xhci: Add USB4 tunnel detection for USB3 devices on Intel hosts") Signed-off-by: Marc Zyngier Acked-by: Mathias Nyman Link: https://lore.kernel.org/r/20250227194529.2288718-1-maz@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 9693464c05204..69c278b64084b 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "xhci.h" #include "xhci-trace.h" @@ -770,9 +771,16 @@ static int xhci_exit_test_mode(struct xhci_hcd *xhci) enum usb_link_tunnel_mode xhci_port_is_tunneled(struct xhci_hcd *xhci, struct xhci_port *port) { + struct usb_hcd *hcd; void __iomem *base; u32 offset; + /* Don't try and probe this capability for non-Intel hosts */ + hcd = xhci_to_hcd(xhci); + if (!dev_is_pci(hcd->self.controller) || + to_pci_dev(hcd->self.controller)->vendor != PCI_VENDOR_ID_INTEL) + return USB_LINK_UNKNOWN; + base = &xhci->cap_regs->hc_capbase; offset = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_INTEL_SPR_SHADOW); -- GitLab From 8e812e9355a6f14dffd54a33d951ca403b9732f5 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Mon, 24 Feb 2025 14:26:04 +0530 Subject: [PATCH 1186/1585] usb: gadget: Check bmAttributes only if configuration is valid If the USB configuration is not valid, then avoid checking for bmAttributes to prevent null pointer deference. Cc: stable Fixes: 40e89ff5750f ("usb: gadget: Set self-powered based on MaxPower and bmAttributes") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20250224085604.417327-1-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 4bcf73bae7610..869ad99afb48b 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1051,7 +1051,7 @@ static int set_config(struct usb_composite_dev *cdev, usb_gadget_set_remote_wakeup(gadget, 0); done: if (power > USB_SELF_POWER_VBUS_MAX_DRAW || - !(c->bmAttributes & USB_CONFIG_ATT_SELFPOWER)) + (c && !(c->bmAttributes & USB_CONFIG_ATT_SELFPOWER))) usb_gadget_clear_selfpowered(gadget); else usb_gadget_set_selfpowered(gadget); -- GitLab From 69c58deec19628c8a686030102176484eb94fed4 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sun, 16 Feb 2025 22:30:02 +0000 Subject: [PATCH 1187/1585] usb: dwc3: gadget: Prevent irq storm when TH re-executes While commit d325a1de49d6 ("usb: dwc3: gadget: Prevent losing events in event cache") makes sure that top half(TH) does not end up overwriting the cached events before processing them when the TH gets invoked more than one time, returning IRQ_HANDLED results in occasional irq storm where the TH hogs the CPU. The irq storm can be prevented by the flag before event handler busy is cleared. Default enable interrupt moderation in all versions which support them. ftrace event stub during dwc3 irq storm: irq/504_dwc3-1111 ( 1111) [000] .... 70.000866: irq_handler_exit: irq=14 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000872: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000874: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000881: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000883: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000889: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000892: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000898: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000901: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000907: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000909: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000915: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000918: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000924: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000927: irq_handler_exit: irq=504 ret=handled irq/504_dwc3-1111 ( 1111) [000] .... 70.000933: irq_handler_entry: irq=504 name=dwc3 irq/504_dwc3-1111 ( 1111) [000] .... 70.000935: irq_handler_exit: irq=504 ret=handled .... Cc: stable Suggested-by: Thinh Nguyen Fixes: d325a1de49d6 ("usb: dwc3: gadget: Prevent losing events in event cache") Signed-off-by: Badhri Jagan Sridharan Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250216223003.3568039-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 16 ++++++---------- drivers/usb/dwc3/gadget.c | 10 +++++++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index dfa1b5fe48dc4..2c472cb97f6c7 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1835,8 +1835,6 @@ static void dwc3_get_properties(struct dwc3 *dwc) dwc->tx_thr_num_pkt_prd = tx_thr_num_pkt_prd; dwc->tx_max_burst_prd = tx_max_burst_prd; - dwc->imod_interval = 0; - dwc->tx_fifo_resize_max_num = tx_fifo_resize_max_num; } @@ -1854,21 +1852,19 @@ static void dwc3_check_params(struct dwc3 *dwc) unsigned int hwparam_gen = DWC3_GHWPARAMS3_SSPHY_IFC(dwc->hwparams.hwparams3); - /* Check for proper value of imod_interval */ - if (dwc->imod_interval && !dwc3_has_imod(dwc)) { - dev_warn(dwc->dev, "Interrupt moderation not supported\n"); - dwc->imod_interval = 0; - } - /* + * Enable IMOD for all supporting controllers. + * + * Particularly, DWC_usb3 v3.00a must enable this feature for + * the following reason: + * * Workaround for STAR 9000961433 which affects only version * 3.00a of the DWC_usb3 core. This prevents the controller * interrupt from being masked while handling events. IMOD * allows us to work around this issue. Enable it for the * affected version. */ - if (!dwc->imod_interval && - DWC3_VER_IS(DWC3, 300A)) + if (dwc3_has_imod((dwc))) dwc->imod_interval = 1; /* Check the maximum_speed parameter */ diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ddd6b2ce57107..89a4dc8ebf948 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4501,14 +4501,18 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), DWC3_GEVNTSIZ_SIZE(evt->length)); + evt->flags &= ~DWC3_EVENT_PENDING; + /* + * Add an explicit write memory barrier to make sure that the update of + * clearing DWC3_EVENT_PENDING is observed in dwc3_check_event_buf() + */ + wmb(); + if (dwc->imod_interval) { dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), DWC3_GEVNTCOUNT_EHB); dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval); } - /* Keep the clearing of DWC3_EVENT_PENDING at the end */ - evt->flags &= ~DWC3_EVENT_PENDING; - return ret; } -- GitLab From b5ea08aa883da05106fcc683d12489a4292d1122 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 25 Feb 2025 13:02:46 +0200 Subject: [PATCH 1188/1585] usb: renesas_usbhs: Call clk_put() Clocks acquired with of_clk_get() need to be freed with clk_put(). Call clk_put() on priv->clks[0] on error path. Fixes: 3df0e240caba ("usb: renesas_usbhs: Add multiple clocks management") Cc: stable Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250225110248.870417-2-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 935fc496fe94b..6c7857b66a219 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -312,8 +312,10 @@ static int usbhsc_clk_get(struct device *dev, struct usbhs_priv *priv) priv->clks[1] = of_clk_get(dev_of_node(dev), 1); if (PTR_ERR(priv->clks[1]) == -ENOENT) priv->clks[1] = NULL; - else if (IS_ERR(priv->clks[1])) + else if (IS_ERR(priv->clks[1])) { + clk_put(priv->clks[0]); return PTR_ERR(priv->clks[1]); + } return 0; } -- GitLab From e0c92440938930e7fa7aa6362780d39cdea34449 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 25 Feb 2025 13:02:47 +0200 Subject: [PATCH 1189/1585] usb: renesas_usbhs: Use devm_usb_get_phy() The gpriv->transceiver is retrieved in probe() through usb_get_phy() but never released. Use devm_usb_get_phy() to handle this scenario. This issue was identified through code investigation. No issue was found without this change. Fixes: b5a2875605ca ("usb: renesas_usbhs: Allow an OTG PHY driver to provide VBUS") Cc: stable Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250225110248.870417-3-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/mod_gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 105132ae87acb..e8e5723f54122 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -1094,7 +1094,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv) goto usbhs_mod_gadget_probe_err_gpriv; } - gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED); + gpriv->transceiver = devm_usb_get_phy(dev, USB_PHY_TYPE_UNDEFINED); dev_info(dev, "%stransceiver found\n", !IS_ERR(gpriv->transceiver) ? "" : "no "); -- GitLab From 552ca6b87e3778f3dd5b87842f95138162e16c82 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 25 Feb 2025 13:02:48 +0200 Subject: [PATCH 1190/1585] usb: renesas_usbhs: Flush the notify_hotplug_work When performing continuous unbind/bind operations on the USB drivers available on the Renesas RZ/G2L SoC, a kernel crash with the message "Unable to handle kernel NULL pointer dereference at virtual address" may occur. This issue points to the usbhsc_notify_hotplug() function. Flush the delayed work to avoid its execution when driver resources are unavailable. Fixes: bc57381e6347 ("usb: renesas_usbhs: use delayed_work instead of work_struct") Cc: stable Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250225110248.870417-4-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 6c7857b66a219..4b35ef216125c 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -781,6 +781,8 @@ static void usbhs_remove(struct platform_device *pdev) dev_dbg(&pdev->dev, "usb remove\n"); + flush_delayed_work(&priv->notify_hotplug_work); + /* power off */ if (!usbhs_get_dparam(priv, runtime_pwctrl)) usbhsc_power_ctrl(priv, 0); -- GitLab From 2b66ef84d0d2a0ea955b40bd306f5e3abbc5cf9c Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 28 Feb 2025 07:50:25 +0000 Subject: [PATCH 1191/1585] usb: hub: lack of clearing xHC resources The xHC resources allocated for USB devices are not released in correct order after resuming in case when while suspend device was reconnected. This issue has been detected during the fallowing scenario: - connect hub HS to root port - connect LS/FS device to hub port - wait for enumeration to finish - force host to suspend - reconnect hub attached to root port - wake host For this scenario during enumeration of USB LS/FS device the Cadence xHC reports completion error code for xHC commands because the xHC resources used for devices has not been properly released. XHCI specification doesn't mention that device can be reset in any order so, we should not treat this issue as Cadence xHC controller bug. Similar as during disconnecting in this case the device resources should be cleared starting form the last usb device in tree toward the root hub. To fix this issue usbcore driver should call hcd->driver->reset_device for all USB devices connected to hub which was reconnected while suspending. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/PH7PR07MB953841E38C088678ACDCF6EEDDCC2@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a76bb50b62026..dcba4281ea486 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -6065,6 +6065,36 @@ void usb_hub_cleanup(void) usb_deregister(&hub_driver); } /* usb_hub_cleanup() */ +/** + * hub_hc_release_resources - clear resources used by host controller + * @udev: pointer to device being released + * + * Context: task context, might sleep + * + * Function releases the host controller resources in correct order before + * making any operation on resuming usb device. The host controller resources + * allocated for devices in tree should be released starting from the last + * usb device in tree toward the root hub. This function is used only during + * resuming device when usb device require reinitialization – that is, when + * flag udev->reset_resume is set. + * + * This call is synchronous, and may not be used in an interrupt context. + */ +static void hub_hc_release_resources(struct usb_device *udev) +{ + struct usb_hub *hub = usb_hub_to_struct_hub(udev); + struct usb_hcd *hcd = bus_to_hcd(udev->bus); + int i; + + /* Release up resources for all children before this device */ + for (i = 0; i < udev->maxchild; i++) + if (hub->ports[i]->child) + hub_hc_release_resources(hub->ports[i]->child); + + if (hcd->driver->reset_device) + hcd->driver->reset_device(hcd, udev); +} + /** * usb_reset_and_verify_device - perform a USB port reset to reinitialize a device * @udev: device to reset (not in SUSPENDED or NOTATTACHED state) @@ -6129,6 +6159,9 @@ static int usb_reset_and_verify_device(struct usb_device *udev) bos = udev->bos; udev->bos = NULL; + if (udev->reset_resume) + hub_hc_release_resources(udev); + mutex_lock(hcd->address0_mutex); for (i = 0; i < PORT_INIT_TRIES; ++i) { -- GitLab From 1be4e29e94a6be77de3bc210820b74f40814f17a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 27 Feb 2025 11:03:06 -0600 Subject: [PATCH 1192/1585] platform/x86/amd/pmf: Initialize and clean up `cb_mutex` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `cb_mutex` was introduced in commit 9e0894d07072e ("platform/x86/amd/pmf: Enable Custom BIOS Inputs for PMF-TA") to prevent conccurrent access for BIOS inputs. It however isn't initialized and so on use it may lead to a NULL pointer dereference. Add code to initialize on probe and clean up on destroy. Reported-by: Yijun Shen Cc: Richard Gong Fixes: 9e0894d07072e ("platform/x86/amd/pmf: Enable Custom BIOS Inputs for PMF-TA") Signed-off-by: Mario Limonciello Tested-By: Yijun Shen Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250227170308.435862-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 764cc1fe90ae4..a2cb2d5544f5b 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -452,6 +452,7 @@ static int amd_pmf_probe(struct platform_device *pdev) mutex_init(&dev->lock); mutex_init(&dev->update_mutex); + mutex_init(&dev->cb_mutex); apmf_acpi_init(dev); platform_set_drvdata(pdev, dev); @@ -477,6 +478,7 @@ static void amd_pmf_remove(struct platform_device *pdev) amd_pmf_dbgfs_unregister(dev); mutex_destroy(&dev->lock); mutex_destroy(&dev->update_mutex); + mutex_destroy(&dev->cb_mutex); kfree(dev->buf); } -- GitLab From 010c4a461c1dbf3fa75ddea8df018a6128b700c6 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 28 Feb 2025 18:35:43 -0800 Subject: [PATCH 1193/1585] x86/speculation: Simplify and make CALL_NOSPEC consistent CALL_NOSPEC macro is used to generate Spectre-v2 mitigation friendly indirect branches. At compile time the macro defaults to indirect branch, and at runtime those can be patched to thunk based mitigations. This approach is opposite of what is done for the rest of the kernel, where the compile time default is to replace indirect calls with retpoline thunk calls. Make CALL_NOSPEC consistent with the rest of the kernel, default to retpoline thunk at compile time when CONFIG_MITIGATION_RETPOLINE is enabled. Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250228-call-nospec-v3-1-96599fed0f33@linux.intel.com --- arch/x86/include/asm/nospec-branch.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7e8bf78c03d5d..1e6b915ce9564 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -424,16 +424,11 @@ static inline void call_depth_return_thunk(void) {} * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. */ -# define CALL_NOSPEC \ - ALTERNATIVE_2( \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - "call __x86_indirect_thunk_%V[thunk_target]\n", \ - X86_FEATURE_RETPOLINE, \ - "lfence;\n" \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_LFENCE) +#ifdef CONFIG_MITIGATION_RETPOLINE +#define CALL_NOSPEC "call __x86_indirect_thunk_%V[thunk_target]\n" +#else +#define CALL_NOSPEC "call *%[thunk_target]\n" +#endif # define THUNK_TARGET(addr) [thunk_target] "r" (addr) -- GitLab From 9af9ad85ac44cb754e526d468c3006b48db5dfd8 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 28 Feb 2025 18:35:58 -0800 Subject: [PATCH 1194/1585] x86/speculation: Add a conditional CS prefix to CALL_NOSPEC Retpoline mitigation for spectre-v2 uses thunks for indirect branches. To support this mitigation compilers add a CS prefix with -mindirect-branch-cs-prefix. For an indirect branch in asm, this needs to be added manually. CS prefix is already being added to indirect branches in asm files, but not in inline asm. Add CS prefix to CALL_NOSPEC for inline asm as well. There is no JMP_NOSPEC for inline asm. Reported-by: Josh Poimboeuf Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250228-call-nospec-v3-2-96599fed0f33@linux.intel.com --- arch/x86/include/asm/nospec-branch.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1e6b915ce9564..aee26bb8230f8 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -198,9 +198,8 @@ .endm /* - * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call - * to the retpoline thunk with a CS prefix when the register requires - * a RAX prefix byte to encode. Also see apply_retpolines(). + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. */ .macro __CS_PREFIX reg:req .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 @@ -420,12 +419,24 @@ static inline void call_depth_return_thunk(void) {} #ifdef CONFIG_X86_64 +/* + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. + */ +#define __CS_PREFIX(reg) \ + ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \ + ".ifc \\rs," reg "\n" \ + ".byte 0x2e\n" \ + ".endif\n" \ + ".endr\n" + /* * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. */ #ifdef CONFIG_MITIGATION_RETPOLINE -#define CALL_NOSPEC "call __x86_indirect_thunk_%V[thunk_target]\n" +#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \ + "call __x86_indirect_thunk_%V[thunk_target]\n" #else #define CALL_NOSPEC "call *%[thunk_target]\n" #endif -- GitLab From df1a1ed5e1bdd9cc13148e0e5549f5ebcf76cf13 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Wed, 26 Feb 2025 15:42:19 +0000 Subject: [PATCH 1195/1585] drm/imagination: avoid deadlock on fence release Do scheduler queue fence release processing on a workqueue, rather than in the release function itself. Fixes deadlock issues such as the following: [ 607.400437] ============================================ [ 607.405755] WARNING: possible recursive locking detected [ 607.415500] -------------------------------------------- [ 607.420817] weston:zfq0/24149 is trying to acquire lock: [ 607.426131] ffff000017d041a0 (reservation_ww_class_mutex){+.+.}-{3:3}, at: pvr_gem_object_vunmap+0x40/0xc0 [powervr] [ 607.436728] but task is already holding lock: [ 607.442554] ffff000017d105a0 (reservation_ww_class_mutex){+.+.}-{3:3}, at: dma_buf_ioctl+0x250/0x554 [ 607.451727] other info that might help us debug this: [ 607.458245] Possible unsafe locking scenario: [ 607.464155] CPU0 [ 607.466601] ---- [ 607.469044] lock(reservation_ww_class_mutex); [ 607.473584] lock(reservation_ww_class_mutex); [ 607.478114] *** DEADLOCK *** Cc: stable@vger.kernel.org Fixes: eaf01ee5ba28 ("drm/imagination: Implement job submission and scheduling") Signed-off-by: Brendan King Reviewed-by: Matt Coster Link: https://patchwork.freedesktop.org/patch/msgid/20250226-fence-release-deadlock-v2-1-6fed2fc1fe88@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_queue.c | 13 +++++++++++-- drivers/gpu/drm/imagination/pvr_queue.h | 4 ++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index c4f08432882b1..f3f1c5212df74 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -109,12 +109,20 @@ pvr_queue_fence_get_driver_name(struct dma_fence *f) return PVR_DRIVER_NAME; } +static void pvr_queue_fence_release_work(struct work_struct *w) +{ + struct pvr_queue_fence *fence = container_of(w, struct pvr_queue_fence, release_work); + + pvr_context_put(fence->queue->ctx); + dma_fence_free(&fence->base); +} + static void pvr_queue_fence_release(struct dma_fence *f) { struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base); + struct pvr_device *pvr_dev = fence->queue->ctx->pvr_dev; - pvr_context_put(fence->queue->ctx); - dma_fence_free(f); + queue_work(pvr_dev->sched_wq, &fence->release_work); } static const char * @@ -268,6 +276,7 @@ pvr_queue_fence_init(struct dma_fence *f, pvr_context_get(queue->ctx); fence->queue = queue; + INIT_WORK(&fence->release_work, pvr_queue_fence_release_work); dma_fence_init(&fence->base, fence_ops, &fence_ctx->lock, fence_ctx->id, atomic_inc_return(&fence_ctx->seqno)); diff --git a/drivers/gpu/drm/imagination/pvr_queue.h b/drivers/gpu/drm/imagination/pvr_queue.h index e06ced69302fc..93fe9ac9f58cc 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.h +++ b/drivers/gpu/drm/imagination/pvr_queue.h @@ -5,6 +5,7 @@ #define PVR_QUEUE_H #include +#include #include "pvr_cccb.h" #include "pvr_device.h" @@ -63,6 +64,9 @@ struct pvr_queue_fence { /** @queue: Queue that created this fence. */ struct pvr_queue *queue; + + /** @release_work: Fence release work structure. */ + struct work_struct release_work; }; /** -- GitLab From a5c4c3ba95a52d66315acdfbaba9bd82ed39c250 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Wed, 26 Feb 2025 15:43:06 +0000 Subject: [PATCH 1196/1585] drm/imagination: Hold drm_gem_gpuva lock for unmap Avoid a warning from drm_gem_gpuva_assert_lock_held in drm_gpuva_unlink. The Imagination driver uses the GEM object reservation lock to protect the gpuva list, but the GEM object was not always known in the code paths that ended up calling drm_gpuva_unlink. When the GEM object isn't known, it is found by calling drm_gpuva_find to lookup the object associated with a given virtual address range, or by calling drm_gpuva_find_first when removing all mappings. Cc: stable@vger.kernel.org Fixes: 4bc736f890ce ("drm/imagination: vm: make use of GPUVM's drm_exec helper") Signed-off-by: Brendan King Reviewed-by: Matt Coster Link: https://patchwork.freedesktop.org/patch/msgid/20250226-hold-drm_gem_gpuva-lock-for-unmap-v2-1-3fdacded227f@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_fw_meta.c | 6 +- drivers/gpu/drm/imagination/pvr_vm.c | 134 +++++++++++++++++----- drivers/gpu/drm/imagination/pvr_vm.h | 3 + 3 files changed, 115 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_fw_meta.c b/drivers/gpu/drm/imagination/pvr_fw_meta.c index c39beb70c3173..6d13864851fc2 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_meta.c +++ b/drivers/gpu/drm/imagination/pvr_fw_meta.c @@ -527,8 +527,10 @@ pvr_meta_vm_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) static void pvr_meta_vm_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) { - pvr_vm_unmap(pvr_dev->kernel_vm_ctx, fw_obj->fw_mm_node.start, - fw_obj->fw_mm_node.size); + struct pvr_gem_object *pvr_obj = fw_obj->gem; + + pvr_vm_unmap_obj(pvr_dev->kernel_vm_ctx, pvr_obj, + fw_obj->fw_mm_node.start, fw_obj->fw_mm_node.size); } static bool diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c index 363f885a70982..2896fa7501b1c 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.c +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -293,8 +293,9 @@ pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op, static int pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op, - struct pvr_vm_context *vm_ctx, u64 device_addr, - u64 size) + struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) { int err; @@ -318,6 +319,7 @@ pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op, goto err_bind_op_fini; } + bind_op->pvr_obj = pvr_obj; bind_op->vm_ctx = vm_ctx; bind_op->device_addr = device_addr; bind_op->size = size; @@ -597,20 +599,6 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context) return ERR_PTR(err); } -/** - * pvr_vm_unmap_all() - Unmap all mappings associated with a VM context. - * @vm_ctx: Target VM context. - * - * This function ensures that no mappings are left dangling by unmapping them - * all in order of ascending device-virtual address. - */ -void -pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx) -{ - WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start, - vm_ctx->gpuvm_mgr.mm_range)); -} - /** * pvr_vm_context_release() - Teardown a VM context. * @ref_count: Pointer to reference counter of the VM context. @@ -703,11 +691,7 @@ pvr_vm_lock_extra(struct drm_gpuvm_exec *vm_exec) struct pvr_vm_bind_op *bind_op = vm_exec->extra.priv; struct pvr_gem_object *pvr_obj = bind_op->pvr_obj; - /* Unmap operations don't have an object to lock. */ - if (!pvr_obj) - return 0; - - /* Acquire lock on the GEM being mapped. */ + /* Acquire lock on the GEM object being mapped/unmapped. */ return drm_exec_lock_obj(&vm_exec->exec, gem_from_pvr_gem(pvr_obj)); } @@ -772,8 +756,10 @@ pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, } /** - * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * pvr_vm_unmap_obj_locked() - Unmap an already mapped section of device-virtual + * memory. * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. * @device_addr: Virtual device address at the start of the target mapping. * @size: Size of the target mapping. * @@ -784,9 +770,13 @@ pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, * * Any error encountered while performing internal operations required to * destroy the mapping (returned from pvr_vm_gpuva_unmap or * pvr_vm_gpuva_remap). + * + * The vm_ctx->lock must be held when calling this function. */ -int -pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +static int +pvr_vm_unmap_obj_locked(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) { struct pvr_vm_bind_op bind_op = {0}; struct drm_gpuvm_exec vm_exec = { @@ -799,11 +789,13 @@ pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) }, }; - int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, device_addr, - size); + int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, pvr_obj, + device_addr, size); if (err) return err; + pvr_gem_object_get(pvr_obj); + err = drm_gpuvm_exec_lock(&vm_exec); if (err) goto err_cleanup; @@ -818,6 +810,96 @@ pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) return err; } +/** + * pvr_vm_unmap_obj() - Unmap an already mapped section of device-virtual + * memory. + * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * Any error encountered by pvr_vm_unmap_obj_locked. + */ +int +pvr_vm_unmap_obj(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size) +{ + int err; + + mutex_lock(&vm_ctx->lock); + err = pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, device_addr, size); + mutex_unlock(&vm_ctx->lock); + + return err; +} + +/** + * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * @vm_ctx: Target VM context. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * Any error encountered by drm_gpuva_find, + * * Any error encountered by pvr_vm_unmap_obj_locked. + */ +int +pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +{ + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + int err; + + mutex_lock(&vm_ctx->lock); + + va = drm_gpuva_find(&vm_ctx->gpuvm_mgr, device_addr, size); + if (va) { + pvr_obj = gem_to_pvr_gem(va->gem.obj); + err = pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, + va->va.addr, va->va.range); + } else { + err = -ENOENT; + } + + mutex_unlock(&vm_ctx->lock); + + return err; +} + +/** + * pvr_vm_unmap_all() - Unmap all mappings associated with a VM context. + * @vm_ctx: Target VM context. + * + * This function ensures that no mappings are left dangling by unmapping them + * all in order of ascending device-virtual address. + */ +void +pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx) +{ + mutex_lock(&vm_ctx->lock); + + for (;;) { + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + + va = drm_gpuva_find_first(&vm_ctx->gpuvm_mgr, + vm_ctx->gpuvm_mgr.mm_start, + vm_ctx->gpuvm_mgr.mm_range); + if (!va) + break; + + pvr_obj = gem_to_pvr_gem(va->gem.obj); + + WARN_ON(pvr_vm_unmap_obj_locked(vm_ctx, pvr_obj, + va->va.addr, va->va.range)); + } + + mutex_unlock(&vm_ctx->lock); +} + /* Static data areas are determined by firmware. */ static const struct drm_pvr_static_data_area static_data_areas[] = { { diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h index 79406243617c1..b0528dffa7f1b 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.h +++ b/drivers/gpu/drm/imagination/pvr_vm.h @@ -38,6 +38,9 @@ struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev, int pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset, u64 device_addr, u64 size); +int pvr_vm_unmap_obj(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, + u64 device_addr, u64 size); int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size); void pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx); -- GitLab From 91cf42c63f2d8a9c1bcdfe923218e079b32e1a69 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 3 Mar 2025 10:47:40 +0000 Subject: [PATCH 1197/1585] spi: microchip-core: prevent RX overflows when transmit size > FIFO size When the size of a transfer exceeds the size of the FIFO (32 bytes), RX overflows will be generated and receive data will be corrupted and warnings will be produced. For example, here's an error generated by a transfer of 36 bytes: spi_master spi0: mchp_corespi_interrupt: RX OVERFLOW: rxlen: 4, txlen: 0 The driver is currently split between handling receiving in the interrupt handler, and sending outside of it. Move all handling out of the interrupt handling, and explicitly link the number of bytes read of of the RX FIFO to the number written into the TX one. This both resolves the overflow problems as well as simplifying the flow of the driver. CC: stable@vger.kernel.org Fixes: 9ac8d17694b6 ("spi: add support for microchip fpga spi controllers") Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20250303-veal-snooper-712c1dfad336@wendy Signed-off-by: Mark Brown --- drivers/spi/spi-microchip-core.c | 41 ++++++++++++++------------------ 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/spi/spi-microchip-core.c b/drivers/spi/spi-microchip-core.c index 5b6af55855efc..62ba0bd9cbb7e 100644 --- a/drivers/spi/spi-microchip-core.c +++ b/drivers/spi/spi-microchip-core.c @@ -70,8 +70,7 @@ #define INT_RX_CHANNEL_OVERFLOW BIT(2) #define INT_TX_CHANNEL_UNDERRUN BIT(3) -#define INT_ENABLE_MASK (CONTROL_RX_DATA_INT | CONTROL_TX_DATA_INT | \ - CONTROL_RX_OVER_INT | CONTROL_TX_UNDER_INT) +#define INT_ENABLE_MASK (CONTROL_RX_OVER_INT | CONTROL_TX_UNDER_INT) #define REG_CONTROL (0x00) #define REG_FRAME_SIZE (0x04) @@ -133,10 +132,15 @@ static inline void mchp_corespi_disable(struct mchp_corespi *spi) mchp_corespi_write(spi, REG_CONTROL, control); } -static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi) +static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi, int fifo_max) { - while (spi->rx_len >= spi->n_bytes && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY)) { - u32 data = mchp_corespi_read(spi, REG_RX_DATA); + for (int i = 0; i < fifo_max; i++) { + u32 data; + + while (mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY) + ; + + data = mchp_corespi_read(spi, REG_RX_DATA); spi->rx_len -= spi->n_bytes; @@ -211,11 +215,10 @@ static inline void mchp_corespi_set_xfer_size(struct mchp_corespi *spi, int len) mchp_corespi_write(spi, REG_FRAMESUP, len); } -static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi) +static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi, int fifo_max) { - int fifo_max, i = 0; + int i = 0; - fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes); mchp_corespi_set_xfer_size(spi, fifo_max); while ((i < fifo_max) && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_TXFIFO_FULL)) { @@ -413,19 +416,6 @@ static irqreturn_t mchp_corespi_interrupt(int irq, void *dev_id) if (intfield == 0) return IRQ_NONE; - if (intfield & INT_TXDONE) - mchp_corespi_write(spi, REG_INT_CLEAR, INT_TXDONE); - - if (intfield & INT_RXRDY) { - mchp_corespi_write(spi, REG_INT_CLEAR, INT_RXRDY); - - if (spi->rx_len) - mchp_corespi_read_fifo(spi); - } - - if (!spi->rx_len && !spi->tx_len) - finalise = true; - if (intfield & INT_RX_CHANNEL_OVERFLOW) { mchp_corespi_write(spi, REG_INT_CLEAR, INT_RX_CHANNEL_OVERFLOW); finalise = true; @@ -512,9 +502,14 @@ static int mchp_corespi_transfer_one(struct spi_controller *host, mchp_corespi_write(spi, REG_SLAVE_SELECT, spi->pending_slave_select); - while (spi->tx_len) - mchp_corespi_write_fifo(spi); + while (spi->tx_len) { + int fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes); + + mchp_corespi_write_fifo(spi, fifo_max); + mchp_corespi_read_fifo(spi, fifo_max); + } + spi_finalize_current_transfer(host); return 1; } -- GitLab From 68c3de7f707e8a70e0a6d8087cf0fe4a3d5dbfb0 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Wed, 26 Feb 2025 15:43:54 +0000 Subject: [PATCH 1198/1585] drm/imagination: only init job done fences once Ensure job done fences are only initialised once. This fixes a memory manager not clean warning from drm_mm_takedown on module unload. Cc: stable@vger.kernel.org Fixes: eaf01ee5ba28 ("drm/imagination: Implement job submission and scheduling") Signed-off-by: Brendan King Reviewed-by: Matt Coster Link: https://patchwork.freedesktop.org/patch/msgid/20250226-init-done-fences-once-v2-1-c1b2f556b329@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_queue.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index f3f1c5212df74..43411be930a21 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -313,8 +313,9 @@ pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue) static void pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue) { - pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, - &queue->job_fence_ctx); + if (!fence->ops) + pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops, + &queue->job_fence_ctx); } /** -- GitLab From 1d2eabb6616433ccaa13927811bdfa205e91ba60 Mon Sep 17 00:00:00 2001 From: Alessio Belle Date: Fri, 21 Feb 2025 10:49:35 +0000 Subject: [PATCH 1199/1585] drm/imagination: Fix timestamps in firmware traces When firmware traces are enabled, the firmware dumps 48-bit timestamps for each trace as two 32-bit values, highest 32 bits (of which only 16 useful) first. The driver was reassembling them the other way round i.e. interpreting the first value in memory as the lowest 32 bits, and the second value as the highest 32 bits (then truncated to 16 bits). Due to this, firmware trace dumps showed very large timestamps even for traces recorded shortly after GPU boot. The timestamps in these dumps would also sometimes jump backwards because of the truncation. Example trace dumped after loading the powervr module and enabling firmware traces, where each line is commented with the timestamp value in hexadecimal to better show both issues: [93540092739584] : Host Sync Partition marker: 1 // 0x551300000000 [28419798597632] : GPU units deinit // 0x19d900000000 [28548647616512] : GPU deinit // 0x19f700000000 Update logic to reassemble the timestamps halves in the correct order. Fixes: cb56cd610866 ("drm/imagination: Add firmware trace to debugfs") Signed-off-by: Alessio Belle Reviewed-by: Matt Coster Link: https://patchwork.freedesktop.org/patch/msgid/20250221-fix-fw-trace-timestamps-v1-1-dba4aeb030ca@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_fw_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c index 73707daa4e52d..5dbb636d7d4ff 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_trace.c +++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c @@ -333,8 +333,8 @@ static int fw_trace_seq_show(struct seq_file *s, void *v) if (sf_id == ROGUE_FW_SF_LAST) return -EINVAL; - timestamp = read_fw_trace(trace_seq_data, 1) | - ((u64)read_fw_trace(trace_seq_data, 2) << 32); + timestamp = ((u64)read_fw_trace(trace_seq_data, 1) << 32) | + read_fw_trace(trace_seq_data, 2); timestamp = (timestamp & ~ROGUE_FWT_TIMESTAMP_TIME_CLRMSK) >> ROGUE_FWT_TIMESTAMP_TIME_SHIFT; -- GitLab From 2738d06fb4f01145b24c542fb06de538ffc56430 Mon Sep 17 00:00:00 2001 From: Dmitry Panchenko Date: Thu, 20 Feb 2025 17:39:31 +0200 Subject: [PATCH 1200/1585] platform/x86: intel-hid: fix volume buttons on Microsoft Surface Go 4 tablet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Volume buttons on Microsoft Surface Go 4 tablet didn't send any events. Add Surface Go 4 DMI match to button_array_table to fix this. Signed-off-by: Dmitry Panchenko Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250220154016.3620917-1-dmitry@d-systems.ee Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/hid.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 927a2993f6160..88a1a9ff2f344 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -139,6 +139,13 @@ static const struct dmi_system_id button_array_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), }, }, + { + .ident = "Microsoft Surface Go 4", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 4"), + }, + }, { } }; -- GitLab From d0d10eaedcb53740883d7e5d53c5e15c879b48fb Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Sat, 22 Feb 2025 00:48:24 +0800 Subject: [PATCH 1201/1585] platform/x86: thinkpad_acpi: Add battery quirk for ThinkPad X131e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the dmesg messages from the original reporter: [ 4.964073] ACPI: \_SB_.PCI0.LPCB.EC__.HKEY: BCTG evaluated but flagged as error [ 4.964083] thinkpad_acpi: Error probing battery 2 Lenovo ThinkPad X131e also needs this battery quirk. Reported-by: Fan Yang <804284660@qq.com> Tested-by: Fan Yang <804284660@qq.com> Co-developed-by: Xi Ruoyao Signed-off-by: Xi Ruoyao Signed-off-by: Mingcong Bai Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250221164825.77315-1-jeffbai@aosc.io Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 72a10ed2017ce..1cc91173e0127 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -9972,6 +9972,7 @@ static const struct tpacpi_quirk battery_quirk_table[] __initconst = { * Individual addressing is broken on models that expose the * primary battery as BAT1. */ + TPACPI_Q_LNV('G', '8', true), /* ThinkPad X131e */ TPACPI_Q_LNV('8', 'F', true), /* Thinkpad X120e */ TPACPI_Q_LNV('J', '7', true), /* B5400 */ TPACPI_Q_LNV('J', 'I', true), /* Thinkpad 11e */ -- GitLab From f317f38e7fbb15a0d8329289fef8cf034938fb4f Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 26 Feb 2025 13:47:27 -0800 Subject: [PATCH 1202/1585] platform/x86/intel/vsec: Add Diamond Rapids support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add PCI ID for the Diamond Rapids Platforms Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20250226214728.1256747-1-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/vsec.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c index 8272f1dd0fbc0..db3c031d17572 100644 --- a/drivers/platform/x86/intel/vsec.c +++ b/drivers/platform/x86/intel/vsec.c @@ -404,6 +404,11 @@ static const struct intel_vsec_platform_info oobmsm_info = { .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_SDSI | VSEC_CAP_TPMI, }; +/* DMR OOBMSM info */ +static const struct intel_vsec_platform_info dmr_oobmsm_info = { + .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_TPMI, +}; + /* TGL info */ static const struct intel_vsec_platform_info tgl_info = { .caps = VSEC_CAP_TELEMETRY, @@ -420,6 +425,7 @@ static const struct intel_vsec_platform_info lnl_info = { #define PCI_DEVICE_ID_INTEL_VSEC_MTL_M 0x7d0d #define PCI_DEVICE_ID_INTEL_VSEC_MTL_S 0xad0d #define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM 0x09a7 +#define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM_DMR 0x09a1 #define PCI_DEVICE_ID_INTEL_VSEC_RPL 0xa77d #define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d #define PCI_DEVICE_ID_INTEL_VSEC_LNL_M 0x647d @@ -430,6 +436,7 @@ static const struct pci_device_id intel_vsec_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, VSEC_MTL_M, &mtl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_MTL_S, &mtl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM, &oobmsm_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM_DMR, &dmr_oobmsm_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_RPL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_LNL_M, &lnl_info) }, -- GitLab From 172a0f509723fe4741d4b8e9190cf434b18320d8 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Mon, 3 Mar 2025 13:04:13 +0300 Subject: [PATCH 1203/1585] ALSA: usx2y: validate nrpacks module parameter on probe The module parameter defines number of iso packets per one URB. User is allowed to set any value to the parameter of type int, which can lead to various kinds of weird and incorrect behavior like integer overflows, truncations, etc. Number of packets should be a small non-negative number. Since this parameter is read-only, its value can be validated on driver probe. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20250303100413.835-1-m.masimov@mt-integration.ru Signed-off-by: Takashi Iwai --- sound/usb/usx2y/usbusx2y.c | 11 +++++++++++ sound/usb/usx2y/usbusx2y.h | 26 ++++++++++++++++++++++++++ sound/usb/usx2y/usbusx2yaudio.c | 27 --------------------------- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c index 5f81c68fd42b6..5756ff3528a2d 100644 --- a/sound/usb/usx2y/usbusx2y.c +++ b/sound/usb/usx2y/usbusx2y.c @@ -151,6 +151,12 @@ static int snd_usx2y_card_used[SNDRV_CARDS]; static void snd_usx2y_card_private_free(struct snd_card *card); static void usx2y_unlinkseq(struct snd_usx2y_async_seq *s); +#ifdef USX2Y_NRPACKS_VARIABLE +int nrpacks = USX2Y_NRPACKS; /* number of packets per urb */ +module_param(nrpacks, int, 0444); +MODULE_PARM_DESC(nrpacks, "Number of packets per URB."); +#endif + /* * pipe 4 is used for switching the lamps, setting samplerate, volumes .... */ @@ -432,6 +438,11 @@ static int snd_usx2y_probe(struct usb_interface *intf, struct snd_card *card; int err; +#ifdef USX2Y_NRPACKS_VARIABLE + if (nrpacks < 0 || nrpacks > USX2Y_NRPACKS_MAX) + return -EINVAL; +#endif + if (le16_to_cpu(device->descriptor.idVendor) != 0x1604 || (le16_to_cpu(device->descriptor.idProduct) != USB_ID_US122 && le16_to_cpu(device->descriptor.idProduct) != USB_ID_US224 && diff --git a/sound/usb/usx2y/usbusx2y.h b/sound/usb/usx2y/usbusx2y.h index 391fd7b4ed5ef..6a76d04bf1c7d 100644 --- a/sound/usb/usx2y/usbusx2y.h +++ b/sound/usb/usx2y/usbusx2y.h @@ -7,6 +7,32 @@ #define NRURBS 2 +/* Default value used for nr of packs per urb. + * 1 to 4 have been tested ok on uhci. + * To use 3 on ohci, you'd need a patch: + * look for "0000425-linux-2.6.9-rc4-mm1_ohci-hcd.patch.gz" on + * "https://bugtrack.alsa-project.org/alsa-bug/bug_view_page.php?bug_id=0000425" + * + * 1, 2 and 4 work out of the box on ohci, if I recall correctly. + * Bigger is safer operation, smaller gives lower latencies. + */ +#define USX2Y_NRPACKS 4 + +#define USX2Y_NRPACKS_MAX 1024 + +/* If your system works ok with this module's parameter + * nrpacks set to 1, you might as well comment + * this define out, and thereby produce smaller, faster code. + * You'd also set USX2Y_NRPACKS to 1 then. + */ +#define USX2Y_NRPACKS_VARIABLE 1 + +#ifdef USX2Y_NRPACKS_VARIABLE +extern int nrpacks; +#define nr_of_packs() nrpacks +#else +#define nr_of_packs() USX2Y_NRPACKS +#endif #define URBS_ASYNC_SEQ 10 #define URB_DATA_LEN_ASYNC_SEQ 32 diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index f540f46a0b143..acca8bead82e5 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -28,33 +28,6 @@ #include "usx2y.h" #include "usbusx2y.h" -/* Default value used for nr of packs per urb. - * 1 to 4 have been tested ok on uhci. - * To use 3 on ohci, you'd need a patch: - * look for "0000425-linux-2.6.9-rc4-mm1_ohci-hcd.patch.gz" on - * "https://bugtrack.alsa-project.org/alsa-bug/bug_view_page.php?bug_id=0000425" - * - * 1, 2 and 4 work out of the box on ohci, if I recall correctly. - * Bigger is safer operation, smaller gives lower latencies. - */ -#define USX2Y_NRPACKS 4 - -/* If your system works ok with this module's parameter - * nrpacks set to 1, you might as well comment - * this define out, and thereby produce smaller, faster code. - * You'd also set USX2Y_NRPACKS to 1 then. - */ -#define USX2Y_NRPACKS_VARIABLE 1 - -#ifdef USX2Y_NRPACKS_VARIABLE -static int nrpacks = USX2Y_NRPACKS; /* number of packets per urb */ -#define nr_of_packs() nrpacks -module_param(nrpacks, int, 0444); -MODULE_PARM_DESC(nrpacks, "Number of packets per URB."); -#else -#define nr_of_packs() USX2Y_NRPACKS -#endif - static int usx2y_urb_capt_retire(struct snd_usx2y_substream *subs) { struct urb *urb = subs->completed_urb; -- GitLab From 5cfe5612ca9590db69b9be29dc83041dbf001108 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 17 Feb 2025 17:02:42 +0100 Subject: [PATCH 1204/1585] netfilter: nft_ct: Use __refcount_inc() for per-CPU nft_ct_pcpu_template. nft_ct_pcpu_template is a per-CPU variable and relies on disabled BH for its locking. The refcounter is read and if its value is set to one then the refcounter is incremented and variable is used - otherwise it is already in use and left untouched. Without per-CPU locking in local_bh_disable() on PREEMPT_RT the read-then-increment operation is not atomic and therefore racy. This can be avoided by using unconditionally __refcount_inc() which will increment counter and return the old value as an atomic operation. In case the returned counter is not one, the variable is in use and we need to decrement counter. Otherwise we can use it. Use __refcount_inc() instead of read and a conditional increment. Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 2e59aba681a13..d526e69a2a2b8 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -230,6 +230,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, enum ip_conntrack_info ctinfo; u16 value = nft_reg_load16(®s->data[priv->sreg]); struct nf_conn *ct; + int oldcnt; ct = nf_ct_get(skb, &ctinfo); if (ct) /* already tracked */ @@ -250,10 +251,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); - if (likely(refcount_read(&ct->ct_general.use) == 1)) { - refcount_inc(&ct->ct_general.use); + __refcount_inc(&ct->ct_general.use, &oldcnt); + if (likely(oldcnt == 1)) { nf_ct_zone_add(ct, &zone); } else { + refcount_dec(&ct->ct_general.use); /* previous skb got queued to userspace, allocate temporary * one until percpu template can be reused. */ -- GitLab From 4363f02a39e25e80e68039b4323c570b0848ec66 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 3 Mar 2025 14:55:52 +0800 Subject: [PATCH 1205/1585] ASoC: Intel: sof_sdw: Fix unlikely uninitialized variable use in create_sdw_dailinks() Initialize current_be_id to 0 to handle the unlikely case when there are no devices connected to a DAI. In this case create_sdw_dailink() would return without touching the passed pointer to current_be_id. Found by gcc -fanalyzer Fixes: 59bf457d8055 ("ASoC: intel: sof_sdw: Factor out SoundWire DAI creation") Signed-off-by: Peter Ujfalusi Cc: stable@vger.kernel.org Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://patch.msgid.link/20250303065552.78328-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index c13064c777261..90dafa810b2ec 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -954,7 +954,7 @@ static int create_sdw_dailinks(struct snd_soc_card *card, /* generate DAI links by each sdw link */ while (sof_dais->initialised) { - int current_be_id; + int current_be_id = 0; ret = create_sdw_dailink(card, sof_dais, dai_links, ¤t_be_id, codec_conf); -- GitLab From d776f016d24816f15033169dcd081f077b6c10f4 Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Fri, 21 Feb 2025 04:40:24 +0000 Subject: [PATCH 1206/1585] ASoC: codecs: wsa884x: report temps to hwmon in millidegree of Celsius MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Temperatures are reported in units of Celsius however hwmon expects values to be in millidegree of Celsius. Userspace tools observe values close to zero and report it as "Not available" or incorrect values like 0C or 1C. Add a simple conversion to fix that. Before the change: wsa884x-virtual-0 Adapter: Virtual device temp1: +0.0°C -- wsa884x-virtual-0 Adapter: Virtual device temp1: +0.0°C Also reported as N/A before first amplifier power on. After this change and initial wsa884x power on: wsa884x-virtual-0 Adapter: Virtual device temp1: +39.0°C -- wsa884x-virtual-0 Adapter: Virtual device temp1: +37.0°C Tested on sm8550 only. Cc: Krzysztof Kozlowski Cc: Srinivas Kandagatla Signed-off-by: Alexey Klimov Link: https://patch.msgid.link/20250221044024.1207921-1-alexey.klimov@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa884x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c index 86df5152c547b..560a2c04b6955 100644 --- a/sound/soc/codecs/wsa884x.c +++ b/sound/soc/codecs/wsa884x.c @@ -1875,7 +1875,7 @@ static int wsa884x_get_temp(struct wsa884x_priv *wsa884x, long *temp) * Reading temperature is possible only when Power Amplifier is * off. Report last cached data. */ - *temp = wsa884x->temperature; + *temp = wsa884x->temperature * 1000; return 0; } @@ -1934,7 +1934,7 @@ static int wsa884x_get_temp(struct wsa884x_priv *wsa884x, long *temp) if ((val > WSA884X_LOW_TEMP_THRESHOLD) && (val < WSA884X_HIGH_TEMP_THRESHOLD)) { wsa884x->temperature = val; - *temp = val; + *temp = val * 1000; ret = 0; } else { ret = -EAGAIN; -- GitLab From 3d6c9dd4cb3013fe83524949b914f1497855e3de Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 22 Feb 2025 23:56:59 +0100 Subject: [PATCH 1207/1585] ASoC: tegra: Fix ADX S24_LE audio format Commit 4204eccc7b2a ("ASoC: tegra: Add support for S24_LE audio format") added support for the S24_LE audio format, but duplicated S16_LE in OUT_DAI() for ADX instead. Fix this by adding support for the S24_LE audio format. Compile-tested only. Cc: stable@vger.kernel.org Fixes: 4204eccc7b2a ("ASoC: tegra: Add support for S24_LE audio format") Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250222225700.539673-2-thorsten.blum@linux.dev Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_adx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_adx.c b/sound/soc/tegra/tegra210_adx.c index 3e6e8f51f380b..0aa93b948378f 100644 --- a/sound/soc/tegra/tegra210_adx.c +++ b/sound/soc/tegra/tegra210_adx.c @@ -264,7 +264,7 @@ static const struct snd_soc_dai_ops tegra210_adx_out_dai_ops = { .rates = SNDRV_PCM_RATE_8000_192000, \ .formats = SNDRV_PCM_FMTBIT_S8 | \ SNDRV_PCM_FMTBIT_S16_LE | \ - SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ SNDRV_PCM_FMTBIT_S32_LE, \ }, \ .capture = { \ @@ -274,7 +274,7 @@ static const struct snd_soc_dai_ops tegra210_adx_out_dai_ops = { .rates = SNDRV_PCM_RATE_8000_192000, \ .formats = SNDRV_PCM_FMTBIT_S8 | \ SNDRV_PCM_FMTBIT_S16_LE | \ - SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ SNDRV_PCM_FMTBIT_S32_LE, \ }, \ .ops = &tegra210_adx_out_dai_ops, \ -- GitLab From 64e6a754d33d31aa844b3ee66fb93ac84ca1565e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Feb 2025 08:26:42 +0000 Subject: [PATCH 1208/1585] llc: do not use skb_get() before dev_queue_xmit() syzbot is able to crash hosts [1], using llc and devices not supporting IFF_TX_SKB_SHARING. In this case, e1000 driver calls eth_skb_pad(), while the skb is shared. Simply replace skb_get() by skb_clone() in net/llc/llc_s_ac.c Note that e1000 driver might have an issue with pktgen, because it does not clear IFF_TX_SKB_SHARING, this is an orthogonal change. We need to audit other skb_get() uses in net/llc. [1] kernel BUG at net/core/skbuff.c:2178 ! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 0 UID: 0 PID: 16371 Comm: syz.2.2764 Not tainted 6.14.0-rc4-syzkaller-00052-gac9c34d1e45a #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:pskb_expand_head+0x6ce/0x1240 net/core/skbuff.c:2178 Call Trace: __skb_pad+0x18a/0x610 net/core/skbuff.c:2466 __skb_put_padto include/linux/skbuff.h:3843 [inline] skb_put_padto include/linux/skbuff.h:3862 [inline] eth_skb_pad include/linux/etherdevice.h:656 [inline] e1000_xmit_frame+0x2d99/0x5800 drivers/net/ethernet/intel/e1000/e1000_main.c:3128 __netdev_start_xmit include/linux/netdevice.h:5151 [inline] netdev_start_xmit include/linux/netdevice.h:5160 [inline] xmit_one net/core/dev.c:3806 [inline] dev_hard_start_xmit+0x9a/0x7b0 net/core/dev.c:3822 sch_direct_xmit+0x1ae/0xc30 net/sched/sch_generic.c:343 __dev_xmit_skb net/core/dev.c:4045 [inline] __dev_queue_xmit+0x13d4/0x43e0 net/core/dev.c:4621 dev_queue_xmit include/linux/netdevice.h:3313 [inline] llc_sap_action_send_test_c+0x268/0x320 net/llc/llc_s_ac.c:144 llc_exec_sap_trans_actions net/llc/llc_sap.c:153 [inline] llc_sap_next_state net/llc/llc_sap.c:182 [inline] llc_sap_state_process+0x239/0x510 net/llc/llc_sap.c:209 llc_ui_sendmsg+0xd0d/0x14e0 net/llc/af_llc.c:993 sock_sendmsg_nosec net/socket.c:718 [inline] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+da65c993ae113742a25f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67c020c0.050a0220.222324.0011.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/llc/llc_s_ac.c | 49 +++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 06fb8e6944b06..7a0cae9a81114 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -24,7 +24,7 @@ #include #include #include - +#include /** * llc_sap_action_unitdata_ind - forward UI PDU to network layer @@ -40,6 +40,26 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb) return 0; } +static int llc_prepare_and_xmit(struct sk_buff *skb) +{ + struct llc_sap_state_ev *ev = llc_sap_ev(skb); + struct sk_buff *nskb; + int rc; + + rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); + if (rc) + return rc; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + if (skb->sk) + skb_set_owner_w(nskb, skb->sk); + + return dev_queue_xmit(nskb); +} + /** * llc_sap_action_send_ui - sends UI PDU resp to UNITDATA REQ to MAC layer * @sap: SAP @@ -52,17 +72,12 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } /** @@ -77,17 +92,12 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } /** @@ -133,17 +143,12 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) -- GitLab From 10fce7ebe888fa8c97eee7e317a47e7603e5e78d Mon Sep 17 00:00:00 2001 From: Xinghuo Chen Date: Mon, 3 Mar 2025 07:57:33 -0500 Subject: [PATCH 1209/1585] hwmon: fix a NULL vs IS_ERR_OR_NULL() check in xgene_hwmon_probe() The devm_memremap() function returns error pointers on error, it doesn't return NULL. Fixes: c7cefce03e69 ("hwmon: (xgene) access mailbox as RAM") Signed-off-by: Xinghuo Chen Link: https://lore.kernel.org/r/tencent_9AD8E7683EC29CAC97496B44F3F865BA070A@qq.com Signed-off-by: Guenter Roeck --- drivers/hwmon/xgene-hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index 1e3bd129a922d..7087197383c96 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -706,7 +706,7 @@ static int xgene_hwmon_probe(struct platform_device *pdev) goto out; } - if (!ctx->pcc_comm_addr) { + if (IS_ERR_OR_NULL(ctx->pcc_comm_addr)) { dev_err(&pdev->dev, "Failed to ioremap PCC comm region\n"); rc = -ENOMEM; -- GitLab From 23e0832d6d7be2d3c713f9390c060b6f1c48bf36 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Tue, 18 Feb 2025 13:41:50 +0100 Subject: [PATCH 1210/1585] drm/sched: Fix preprocessor guard When writing the header guard for gpu_scheduler_trace.h, a typo, apparently, occurred. Fix the typo and document the scope of the guard. Fixes: 353da3c520b4 ("drm/amdgpu: add tracepoint for scheduler (v2)") Reviewed-by: Tvrtko Ursulin Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250218124149.118002-2-phasta@kernel.org --- drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index c75302ca3427c..f56e77e7f6d02 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -21,7 +21,7 @@ * */ -#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_GPU_SCHED_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _GPU_SCHED_TRACE_H_ #include @@ -106,7 +106,7 @@ TRACE_EVENT(drm_sched_job_wait_dep, __entry->seqno) ); -#endif +#endif /* _GPU_SCHED_TRACE_H_ */ /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH -- GitLab From b2653cd3b75f62f29b72df4070e20357acb52bc4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 26 Feb 2025 17:25:32 -0800 Subject: [PATCH 1211/1585] KVM: SVM: Save host DR masks on CPUs with DebugSwap When running SEV-SNP guests on a CPU that supports DebugSwap, always save the host's DR0..DR3 mask MSR values irrespective of whether or not DebugSwap is enabled, to ensure the host values aren't clobbered by the CPU. And for now, also save DR0..DR3, even though doing so isn't necessary (see below). SVM_VMGEXIT_AP_CREATE is deeply flawed in that it allows the *guest* to create a VMSA with guest-controlled SEV_FEATURES. A well behaved guest can inform the hypervisor, i.e. KVM, of its "requested" features, but on CPUs without ALLOWED_SEV_FEATURES support, nothing prevents the guest from lying about which SEV features are being enabled (or not!). If a misbehaving guest enables DebugSwap in a secondary vCPU's VMSA, the CPU will load the DR0..DR3 mask MSRs on #VMEXIT, i.e. will clobber the MSRs with '0' if KVM doesn't save its desired value. Note, DR0..DR3 themselves are "ok", as DR7 is reset on #VMEXIT, and KVM restores all DRs in common x86 code as needed via hw_breakpoint_restore(). I.e. there is no risk of host DR0..DR3 being clobbered (when it matters). However, there is a flaw in the opposite direction; because the guest can lie about enabling DebugSwap, i.e. can *disable* DebugSwap without KVM's knowledge, KVM must not rely on the CPU to restore DRs. Defer fixing that wart, as it's more of a documentation issue than a bug in the code. Note, KVM added support for DebugSwap on commit d1f85fbe836e ("KVM: SEV: Enable data breakpoints in SEV-ES"), but that is not an appropriate Fixes, as the underlying flaw exists in hardware, not in KVM. I.e. all kernels that support SEV-SNP need to be patched, not just kernels with KVM's full support for DebugSwap (ignoring that DebugSwap support landed first). Opportunistically fix an incorrect statement in the comment; on CPUs without DebugSwap, the CPU does NOT save or load debug registers, i.e. Fixes: e366f92ea99e ("KVM: SEV: Support SEV-SNP AP Creation NAE event") Cc: stable@vger.kernel.org Cc: Naveen N Rao Cc: Kim Phillips Cc: Tom Lendacky Cc: Alexey Kardashevskiy Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20250227012541.3234589-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/sev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a2a794c320503..ef057c85a67ce 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -4580,6 +4580,8 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm) void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa) { + struct kvm *kvm = svm->vcpu.kvm; + /* * All host state for SEV-ES guests is categorized into three swap types * based on how it is handled by hardware during a world switch: @@ -4603,10 +4605,15 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are /* * If DebugSwap is enabled, debug registers are loaded but NOT saved by - * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both - * saves and loads debug registers (Type-A). + * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU does + * not save or load debug registers. Sadly, on CPUs without + * ALLOWED_SEV_FEATURES, KVM can't prevent SNP guests from enabling + * DebugSwap on secondary vCPUs without KVM's knowledge via "AP Create". + * Save all registers if DebugSwap is supported to prevent host state + * from being clobbered by a misbehaving guest. */ - if (sev_vcpu_has_debug_swap(svm)) { + if (sev_vcpu_has_debug_swap(svm) || + (sev_snp_guest(kvm) && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP))) { hostsa->dr0 = native_get_debugreg(0); hostsa->dr1 = native_get_debugreg(1); hostsa->dr2 = native_get_debugreg(2); -- GitLab From 807cb9ce2ed9a1b6e79e70fb2cdb7860f1517dcc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 26 Feb 2025 17:25:33 -0800 Subject: [PATCH 1212/1585] KVM: SVM: Don't rely on DebugSwap to restore host DR0..DR3 Never rely on the CPU to restore/load host DR0..DR3 values, even if the CPU supports DebugSwap, as there are no guarantees that SNP guests will actually enable DebugSwap on APs. E.g. if KVM were to rely on the CPU to load DR0..DR3 and skipped them during hw_breakpoint_restore(), KVM would run with clobbered-to-zero DRs if an SNP guest created APs without DebugSwap enabled. Update the comment to explain the dangers, and hopefully prevent breaking KVM in the future. Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20250227012541.3234589-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/sev.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index ef057c85a67ce..080f8cecd7ca6 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -4606,18 +4606,21 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are /* * If DebugSwap is enabled, debug registers are loaded but NOT saved by * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU does - * not save or load debug registers. Sadly, on CPUs without - * ALLOWED_SEV_FEATURES, KVM can't prevent SNP guests from enabling - * DebugSwap on secondary vCPUs without KVM's knowledge via "AP Create". - * Save all registers if DebugSwap is supported to prevent host state - * from being clobbered by a misbehaving guest. + * not save or load debug registers. Sadly, KVM can't prevent SNP + * guests from lying about DebugSwap on secondary vCPUs, i.e. the + * SEV_FEATURES provided at "AP Create" isn't guaranteed to match what + * the guest has actually enabled (or not!) in the VMSA. + * + * If DebugSwap is *possible*, save the masks so that they're restored + * if the guest enables DebugSwap. But for the DRs themselves, do NOT + * rely on the CPU to restore the host values; KVM will restore them as + * needed in common code, via hw_breakpoint_restore(). Note, KVM does + * NOT support virtualizing Breakpoint Extensions, i.e. the mask MSRs + * don't need to be restored per se, KVM just needs to ensure they are + * loaded with the correct values *if* the CPU writes the MSRs. */ if (sev_vcpu_has_debug_swap(svm) || (sev_snp_guest(kvm) && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP))) { - hostsa->dr0 = native_get_debugreg(0); - hostsa->dr1 = native_get_debugreg(1); - hostsa->dr2 = native_get_debugreg(2); - hostsa->dr3 = native_get_debugreg(3); hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0); hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1); hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2); -- GitLab From d88ed5fb7c88f404e57fe2b2a6d19fefc35b4dc7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Feb 2025 15:08:04 -0800 Subject: [PATCH 1213/1585] KVM: selftests: Ensure all vCPUs hit -EFAULT during initial RO stage During the initial mprotect(RO) stage of mmu_stress_test, keep vCPUs spinning until all vCPUs have hit -EFAULT, i.e. until all vCPUs have tried to write to a read-only page. If a vCPU manages to complete an entire iteration of the loop without hitting a read-only page, *and* the vCPU observes mprotect_ro_done before starting a second iteration, then the vCPU will prematurely fall through to GUEST_SYNC(3) (on x86 and arm64) and get out of sequence. Replace the "do-while (!r)" loop around the associated _vcpu_run() with a single invocation, as barring a KVM bug, the vCPU is guaranteed to hit -EFAULT, and retrying on success is super confusion, hides KVM bugs, and complicates this fix. The do-while loop was semi-unintentionally added specifically to fudge around a KVM x86 bug, and said bug is unhittable without modifying the test to force x86 down the !(x86||arm64) path. On x86, if forced emulation is enabled, vcpu_arch_put_guest() may trigger emulation of the store to memory. Due a (very, very) longstanding bug in KVM x86's emulator, emulate writes to guest memory that fail during __kvm_write_guest_page() unconditionally return KVM_EXIT_MMIO. While that is desirable in the !memslot case, it's wrong in this case as the failure happens due to __copy_to_user() hitting a read-only page, not an emulated MMIO region. But as above, x86 only uses vcpu_arch_put_guest() if the __x86_64__ guards are clobbered to force x86 down the common path, and of course the unexpected MMIO is a KVM bug, i.e. *should* cause a test failure. Fixes: b6c304aec648 ("KVM: selftests: Verify KVM correctly handles mprotect(PROT_READ)") Reported-by: Yan Zhao Closes: https://lore.kernel.org/all/20250208105318.16861-1-yan.y.zhao@intel.com Debugged-by: Yan Zhao Reviewed-by: Yan Zhao Tested-by: Yan Zhao Link: https://lore.kernel.org/r/20250228230804.3845860-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index d9c76b4c0d88a..6a437d2be9fa4 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -18,6 +18,7 @@ #include "ucall_common.h" static bool mprotect_ro_done; +static bool all_vcpus_hit_ro_fault; static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { @@ -36,9 +37,9 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) /* * Write to the region while mprotect(PROT_READ) is underway. Keep - * looping until the memory is guaranteed to be read-only, otherwise - * vCPUs may complete their writes and advance to the next stage - * prematurely. + * looping until the memory is guaranteed to be read-only and a fault + * has occurred, otherwise vCPUs may complete their writes and advance + * to the next stage prematurely. * * For architectures that support skipping the faulting instruction, * generate the store via inline assembly to ensure the exact length @@ -56,7 +57,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) #else vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); #endif - } while (!READ_ONCE(mprotect_ro_done)); + } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault)); /* * Only architectures that write the entire range can explicitly sync, @@ -81,6 +82,7 @@ struct vcpu_info { static int nr_vcpus; static atomic_t rendezvous; +static atomic_t nr_ro_faults; static void rendezvous_with_boss(void) { @@ -148,12 +150,16 @@ static void *vcpu_worker(void *data) * be stuck on the faulting instruction for other architectures. Go to * stage 3 without a rendezvous */ - do { - r = _vcpu_run(vcpu); - } while (!r); + r = _vcpu_run(vcpu); TEST_ASSERT(r == -1 && errno == EFAULT, "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + atomic_inc(&nr_ro_faults); + if (atomic_read(&nr_ro_faults) == nr_vcpus) { + WRITE_ONCE(all_vcpus_hit_ro_fault, true); + sync_global_to_guest(vm, all_vcpus_hit_ro_fault); + } + #if defined(__x86_64__) || defined(__aarch64__) /* * Verify *all* writes from the guest hit EFAULT due to the VMA now @@ -378,7 +384,6 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_run2, "run 2"); mprotect(mem, slot_size, PROT_READ); - usleep(10); mprotect_ro_done = true; sync_global_to_guest(vm, mprotect_ro_done); -- GitLab From 3b2d3db368013729fd2167a0d91fec821dba807c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Feb 2025 15:38:52 -0800 Subject: [PATCH 1214/1585] KVM: selftests: Fix printf() format goof in SEV smoke test Print out the index of mismatching XSAVE bytes using unsigned decimal format. Some versions of clang complain about trying to print an integer as an unsigned char. x86/sev_smoke_test.c:55:51: error: format specifies type 'unsigned char' but the argument has type 'int' [-Werror,-Wformat] Fixes: 8c53183dbaa2 ("selftests: kvm: add test for transferring FPU state into VMSA") Link: https://lore.kernel.org/r/20250228233852.3855676-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/sev_smoke_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index a1a688e752666..d97816dc476a2 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -52,7 +52,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) bool bad = false; for (i = 0; i < 4095; i++) { if (from_host[i] != from_guest[i]) { - printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + printf("mismatch at %u | %02hhx %02hhx\n", + i, from_host[i], from_guest[i]); bad = true; } } -- GitLab From 9360dfe4cbd62ff1eb8217b815964931523b75b3 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 3 Mar 2025 18:51:59 +0100 Subject: [PATCH 1215/1585] sched_ext: Validate prev_cpu in scx_bpf_select_cpu_dfl() If a BPF scheduler provides an invalid CPU (outside the nr_cpu_ids range) as prev_cpu to scx_bpf_select_cpu_dfl() it can cause a kernel crash. To prevent this, validate prev_cpu in scx_bpf_select_cpu_dfl() and trigger an scx error if an invalid CPU is specified. Fixes: f0e1a0643a59b ("sched_ext: Implement BPF extensible scheduler class") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 0f1da199cfc7c..7b9dfee858e79 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6422,6 +6422,9 @@ static bool check_builtin_idle_enabled(void) __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) { + if (!ops_cpu_valid(prev_cpu, NULL)) + goto prev_cpu; + if (!check_builtin_idle_enabled()) goto prev_cpu; -- GitLab From cc5bfc4e16fc1d1c520cd7bb28646e82b6e69217 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 30 Jan 2025 23:49:31 +0000 Subject: [PATCH 1216/1585] usb: dwc3: Set SUSPENDENABLE soon after phy init After phy initialization, some phy operations can only be executed while in lower P states. Ensure GUSB3PIPECTL.SUSPENDENABLE and GUSB2PHYCFG.SUSPHY are set soon after initialization to avoid blocking phy ops. Previously the SUSPENDENABLE bits are only set after the controller initialization, which may not happen right away if there's no gadget driver or xhci driver bound. Revise this to clear SUSPENDENABLE bits only when there's mode switching (change in GCTL.PRTCAPDIR). Fixes: 6d735722063a ("usb: dwc3: core: Prevent phy suspend during init") Cc: stable Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/633aef0afee7d56d2316f7cc3e1b2a6d518a8cc9.1738280911.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 69 +++++++++++++++++++++++++---------------- drivers/usb/dwc3/core.h | 2 +- drivers/usb/dwc3/drd.c | 4 +-- 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 2c472cb97f6c7..66a08b5271653 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -131,11 +131,24 @@ void dwc3_enable_susphy(struct dwc3 *dwc, bool enable) } } -void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) +void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode, bool ignore_susphy) { + unsigned int hw_mode; u32 reg; reg = dwc3_readl(dwc->regs, DWC3_GCTL); + + /* + * For DRD controllers, GUSB3PIPECTL.SUSPENDENABLE and + * GUSB2PHYCFG.SUSPHY should be cleared during mode switching, + * and they can be set after core initialization. + */ + hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); + if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD && !ignore_susphy) { + if (DWC3_GCTL_PRTCAP(reg) != mode) + dwc3_enable_susphy(dwc, false); + } + reg &= ~(DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG)); reg |= DWC3_GCTL_PRTCAPDIR(mode); dwc3_writel(dwc->regs, DWC3_GCTL, reg); @@ -216,7 +229,7 @@ static void __dwc3_set_mode(struct work_struct *work) spin_lock_irqsave(&dwc->lock, flags); - dwc3_set_prtcap(dwc, desired_dr_role); + dwc3_set_prtcap(dwc, desired_dr_role, false); spin_unlock_irqrestore(&dwc->lock, flags); @@ -658,16 +671,7 @@ static int dwc3_ss_phy_setup(struct dwc3 *dwc, int index) */ reg &= ~DWC3_GUSB3PIPECTL_UX_EXIT_PX; - /* - * Above DWC_usb3.0 1.94a, it is recommended to set - * DWC3_GUSB3PIPECTL_SUSPHY to '0' during coreConsultant configuration. - * So default value will be '0' when the core is reset. Application - * needs to set it to '1' after the core initialization is completed. - * - * Similarly for DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be - * cleared after power-on reset, and it can be set after core - * initialization. - */ + /* Ensure the GUSB3PIPECTL.SUSPENDENABLE is cleared prior to phy init. */ reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; if (dwc->u2ss_inp3_quirk) @@ -747,15 +751,7 @@ static int dwc3_hs_phy_setup(struct dwc3 *dwc, int index) break; } - /* - * Above DWC_usb3.0 1.94a, it is recommended to set - * DWC3_GUSB2PHYCFG_SUSPHY to '0' during coreConsultant configuration. - * So default value will be '0' when the core is reset. Application - * needs to set it to '1' after the core initialization is completed. - * - * Similarly for DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared - * after power-on reset, and it can be set after core initialization. - */ + /* Ensure the GUSB2PHYCFG.SUSPHY is cleared prior to phy init. */ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; if (dwc->dis_enblslpm_quirk) @@ -830,6 +826,25 @@ static int dwc3_phy_init(struct dwc3 *dwc) goto err_exit_usb3_phy; } + /* + * Above DWC_usb3.0 1.94a, it is recommended to set + * DWC3_GUSB3PIPECTL_SUSPHY and DWC3_GUSB2PHYCFG_SUSPHY to '0' during + * coreConsultant configuration. So default value will be '0' when the + * core is reset. Application needs to set it to '1' after the core + * initialization is completed. + * + * Certain phy requires to be in P0 power state during initialization. + * Make sure GUSB3PIPECTL.SUSPENDENABLE and GUSB2PHYCFG.SUSPHY are clear + * prior to phy init to maintain in the P0 state. + * + * After phy initialization, some phy operations can only be executed + * while in lower P states. Ensure GUSB3PIPECTL.SUSPENDENABLE and + * GUSB2PHYCFG.SUSPHY are set soon after initialization to avoid + * blocking phy ops. + */ + if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) + dwc3_enable_susphy(dwc, true); + return 0; err_exit_usb3_phy: @@ -1588,7 +1603,7 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) switch (dwc->dr_mode) { case USB_DR_MODE_PERIPHERAL: - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE, false); if (dwc->usb2_phy) otg_set_vbus(dwc->usb2_phy->otg, false); @@ -1600,7 +1615,7 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) return dev_err_probe(dev, ret, "failed to initialize gadget\n"); break; case USB_DR_MODE_HOST: - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST, false); if (dwc->usb2_phy) otg_set_vbus(dwc->usb2_phy->otg, true); @@ -1645,7 +1660,7 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc) } /* de-assert DRVVBUS for HOST and OTG mode */ - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE, true); } static void dwc3_get_software_properties(struct dwc3 *dwc) @@ -2453,7 +2468,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (ret) return ret; - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE, true); dwc3_gadget_resume(dwc); break; case DWC3_GCTL_PRTCAP_HOST: @@ -2461,7 +2476,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) ret = dwc3_core_init_for_resume(dwc); if (ret) return ret; - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST, true); break; } /* Restore GUSB2PHYCFG bits that were modified in suspend */ @@ -2490,7 +2505,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (ret) return ret; - dwc3_set_prtcap(dwc, dwc->current_dr_role); + dwc3_set_prtcap(dwc, dwc->current_dr_role, true); dwc3_otg_init(dwc); if (dwc->current_otg_role == DWC3_OTG_ROLE_HOST) { diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index c955039bb4f62..aaa39e663f60a 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1558,7 +1558,7 @@ struct dwc3_gadget_ep_cmd_params { #define DWC3_HAS_OTG BIT(3) /* prototypes */ -void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode); +void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode, bool ignore_susphy); void dwc3_set_mode(struct dwc3 *dwc, u32 mode); u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type); diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index d76ae676783cf..7977860932b14 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -173,7 +173,7 @@ void dwc3_otg_init(struct dwc3 *dwc) * block "Initialize GCTL for OTG operation". */ /* GCTL.PrtCapDir=2'b11 */ - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG, true); /* GUSB2PHYCFG0.SusPHY=0 */ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; @@ -556,7 +556,7 @@ int dwc3_drd_init(struct dwc3 *dwc) dwc3_drd_update(dwc); } else { - dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG); + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_OTG, true); /* use OTG block to get ID event */ irq = dwc3_otg_get_irq(dwc); -- GitLab From dfd3df31c9db752234d7d2e09bef2aeabb643ce4 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Fri, 28 Feb 2025 13:13:56 +0100 Subject: [PATCH 1217/1585] mm/slab/kvfree_rcu: Switch to WQ_MEM_RECLAIM wq Currently kvfree_rcu() APIs use a system workqueue which is "system_unbound_wq" to driver RCU machinery to reclaim a memory. Recently, it has been noted that the following kernel warning can be observed: workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work WARNING: CPU: 21 PID: 330 at kernel/workqueue.c:3719 check_flush_dependency+0x112/0x120 Modules linked in: intel_uncore_frequency(E) intel_uncore_frequency_common(E) skx_edac(E) ... CPU: 21 UID: 0 PID: 330 Comm: kworker/u144:6 Tainted: G E 6.13.2-0_g925d379822da #1 Hardware name: Wiwynn Twin Lakes MP/Twin Lakes Passive MP, BIOS YMM20 02/01/2023 Workqueue: nvme-wq nvme_scan_work RIP: 0010:check_flush_dependency+0x112/0x120 Code: 05 9a 40 14 02 01 48 81 c6 c0 00 00 00 48 8b 50 18 48 81 c7 c0 00 00 00 48 89 f9 48 ... RSP: 0018:ffffc90000df7bd8 EFLAGS: 00010082 RAX: 000000000000006a RBX: ffffffff81622390 RCX: 0000000000000027 RDX: 00000000fffeffff RSI: 000000000057ffa8 RDI: ffff88907f960c88 RBP: 0000000000000000 R08: ffffffff83068e50 R09: 000000000002fffd R10: 0000000000000004 R11: 0000000000000000 R12: ffff8881001a4400 R13: 0000000000000000 R14: ffff88907f420fb8 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88907f940000(0000) knlGS:0000000000000000 CR2: 00007f60c3001000 CR3: 000000107d010005 CR4: 00000000007726f0 PKRU: 55555554 Call Trace: ? __warn+0xa4/0x140 ? check_flush_dependency+0x112/0x120 ? report_bug+0xe1/0x140 ? check_flush_dependency+0x112/0x120 ? handle_bug+0x5e/0x90 ? exc_invalid_op+0x16/0x40 ? asm_exc_invalid_op+0x16/0x20 ? timer_recalc_next_expiry+0x190/0x190 ? check_flush_dependency+0x112/0x120 ? check_flush_dependency+0x112/0x120 __flush_work.llvm.1643880146586177030+0x174/0x2c0 flush_rcu_work+0x28/0x30 kvfree_rcu_barrier+0x12f/0x160 kmem_cache_destroy+0x18/0x120 bioset_exit+0x10c/0x150 disk_release.llvm.6740012984264378178+0x61/0xd0 device_release+0x4f/0x90 kobject_put+0x95/0x180 nvme_put_ns+0x23/0xc0 nvme_remove_invalid_namespaces+0xb3/0xd0 nvme_scan_work+0x342/0x490 process_scheduled_works+0x1a2/0x370 worker_thread+0x2ff/0x390 ? pwq_release_workfn+0x1e0/0x1e0 kthread+0xb1/0xe0 ? __kthread_parkme+0x70/0x70 ret_from_fork+0x30/0x40 ? __kthread_parkme+0x70/0x70 ret_from_fork_asm+0x11/0x20 ---[ end trace 0000000000000000 ]--- To address this switch to use of independent WQ_MEM_RECLAIM workqueue, so the rules are not violated from workqueue framework point of view. Apart of that, since kvfree_rcu() does reclaim memory it is worth to go with WQ_MEM_RECLAIM type of wq because it is designed for this purpose. Fixes: 6c6c47b063b5 ("mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()"), Reported-by: Keith Busch Closes: https://lore.kernel.org/all/Z7iqJtCjHKfo8Kho@kbusch-mbp/ Cc: stable@vger.kernel.org Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Joel Fernandes Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 4030907b6b7d8..4c9f0a87f733b 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1304,6 +1304,8 @@ module_param(rcu_min_cached_objs, int, 0444); static int rcu_delay_page_cache_fill_msec = 5000; module_param(rcu_delay_page_cache_fill_msec, int, 0444); +static struct workqueue_struct *rcu_reclaim_wq; + /* Maximum number of jiffies to wait before draining a batch. */ #define KFREE_DRAIN_JIFFIES (5 * HZ) #define KFREE_N_BATCHES 2 @@ -1632,10 +1634,10 @@ __schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp) if (delayed_work_pending(&krcp->monitor_work)) { delay_left = krcp->monitor_work.timer.expires - jiffies; if (delay < delay_left) - mod_delayed_work(system_unbound_wq, &krcp->monitor_work, delay); + mod_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay); return; } - queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay); + queue_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay); } static void @@ -1733,7 +1735,7 @@ kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp) // "free channels", the batch can handle. Break // the loop since it is done with this CPU thus // queuing an RCU work is _always_ success here. - queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work); + queued = queue_rcu_work(rcu_reclaim_wq, &krwp->rcu_work); WARN_ON_ONCE(!queued); break; } @@ -1883,7 +1885,7 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp) if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && !atomic_xchg(&krcp->work_in_progress, 1)) { if (atomic_read(&krcp->backoff_page_cache_fill)) { - queue_delayed_work(system_unbound_wq, + queue_delayed_work(rcu_reclaim_wq, &krcp->page_cache_work, msecs_to_jiffies(rcu_delay_page_cache_fill_msec)); } else { @@ -2120,6 +2122,10 @@ void __init kvfree_rcu_init(void) int i, j; struct shrinker *kfree_rcu_shrinker; + rcu_reclaim_wq = alloc_workqueue("kvfree_rcu_reclaim", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + WARN_ON(!rcu_reclaim_wq); + /* Clamp it to [0:100] seconds interval. */ if (rcu_delay_page_cache_fill_msec < 0 || rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) { -- GitLab From 8177c6bedb7013cf736137da586cf783922309dd Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:12 +0100 Subject: [PATCH 1218/1585] x86/cacheinfo: Validate CPUID leaf 0x2 EDX output CPUID leaf 0x2 emits one-byte descriptors in its four output registers EAX, EBX, ECX, and EDX. For these descriptors to be valid, the most significant bit (MSB) of each register must be clear. The historical Git commit: 019361a20f016 ("- pre6: Intel: start to add Pentium IV specific stuff (128-byte cacheline etc)...") introduced leaf 0x2 output parsing. It only validated the MSBs of EAX, EBX, and ECX, but left EDX unchecked. Validate EDX's most-significant bit. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250304085152.51092-2-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index e6fa03ed9172c..a6c6bccfa8b8d 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -808,7 +808,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 3 ; j++) + for (j = 0 ; j < 4 ; j++) if (regs[j] & (1 << 31)) regs[j] = 0; -- GitLab From 1881148215c67151b146450fb89ec22fd92337a7 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:13 +0100 Subject: [PATCH 1219/1585] x86/cpu: Validate CPUID leaf 0x2 EDX output CPUID leaf 0x2 emits one-byte descriptors in its four output registers EAX, EBX, ECX, and EDX. For these descriptors to be valid, the most significant bit (MSB) of each register must be clear. Leaf 0x2 parsing at intel.c only validated the MSBs of EAX, EBX, and ECX, but left EDX unchecked. Validate EDX's most-significant bit as well. Fixes: e0ba94f14f74 ("x86/tlb_info: get last level TLB entry number of CPU") Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: stable@kernel.org Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250304085152.51092-3-darwi@linutronix.de --- arch/x86/kernel/cpu/intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3dce22f00dc34..2a3716afee633 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -799,7 +799,7 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c) cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 3 ; j++) + for (j = 0 ; j < 4 ; j++) if (regs[j] & (1 << 31)) regs[j] = 0; -- GitLab From f6bdaab79ee4228a143ee1b4cb80416d6ffc0c63 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:14 +0100 Subject: [PATCH 1220/1585] x86/cpu: Properly parse CPUID leaf 0x2 TLB descriptor 0x63 CPUID leaf 0x2's one-byte TLB descriptors report the number of entries for specific TLB types, among other properties. Typically, each emitted descriptor implies the same number of entries for its respective TLB type(s). An emitted 0x63 descriptor is an exception: it implies 4 data TLB entries for 1GB pages and 32 data TLB entries for 2MB or 4MB pages. For the TLB descriptors parsing code, the entry count for 1GB pages is encoded at the intel_tlb_table[] mapping, but the 2MB/4MB entry count is totally ignored. Update leaf 0x2's parsing logic 0x2 to account for 32 data TLB entries for 2MB/4MB pages implied by the 0x63 descriptor. Fixes: e0ba94f14f74 ("x86/tlb_info: get last level TLB entry number of CPU") Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: stable@kernel.org Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250304085152.51092-4-darwi@linutronix.de --- arch/x86/kernel/cpu/intel.c | 50 +++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 2a3716afee633..134368a3f4b1e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -635,26 +635,37 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) } #endif -#define TLB_INST_4K 0x01 -#define TLB_INST_4M 0x02 -#define TLB_INST_2M_4M 0x03 +#define TLB_INST_4K 0x01 +#define TLB_INST_4M 0x02 +#define TLB_INST_2M_4M 0x03 -#define TLB_INST_ALL 0x05 -#define TLB_INST_1G 0x06 +#define TLB_INST_ALL 0x05 +#define TLB_INST_1G 0x06 -#define TLB_DATA_4K 0x11 -#define TLB_DATA_4M 0x12 -#define TLB_DATA_2M_4M 0x13 -#define TLB_DATA_4K_4M 0x14 +#define TLB_DATA_4K 0x11 +#define TLB_DATA_4M 0x12 +#define TLB_DATA_2M_4M 0x13 +#define TLB_DATA_4K_4M 0x14 -#define TLB_DATA_1G 0x16 +#define TLB_DATA_1G 0x16 +#define TLB_DATA_1G_2M_4M 0x17 -#define TLB_DATA0_4K 0x21 -#define TLB_DATA0_4M 0x22 -#define TLB_DATA0_2M_4M 0x23 +#define TLB_DATA0_4K 0x21 +#define TLB_DATA0_4M 0x22 +#define TLB_DATA0_2M_4M 0x23 -#define STLB_4K 0x41 -#define STLB_4K_2M 0x42 +#define STLB_4K 0x41 +#define STLB_4K_2M 0x42 + +/* + * All of leaf 0x2's one-byte TLB descriptors implies the same number of + * entries for their respective TLB types. The 0x63 descriptor is an + * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries + * for 2MB or 4MB pages. Encode descriptor 0x63 dTLB entry count for + * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the + * intel_tlb_table[] mapping. + */ +#define TLB_0x63_2M_4M_ENTRIES 32 static const struct _tlb_table intel_tlb_table[] = { { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, @@ -676,7 +687,8 @@ static const struct _tlb_table intel_tlb_table[] = { { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, - { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, + { 0x63, TLB_DATA_1G_2M_4M, 4, " TLB_DATA 1 GByte pages, 4-way set associative" + " (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here)" }, { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, @@ -776,6 +788,12 @@ static void intel_tlb_lookup(const unsigned char desc) if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; break; + case TLB_DATA_1G_2M_4M: + if (tlb_lld_2m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) + tlb_lld_2m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + if (tlb_lld_4m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) + tlb_lld_4m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + fallthrough; case TLB_DATA_1G: if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; -- GitLab From 1a82d19ca2d6835904ee71e2d40fd331098f94a0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 27 Feb 2025 18:41:29 +0200 Subject: [PATCH 1221/1585] be2net: fix sleeping while atomic bugs in be_ndo_bridge_getlink Partially revert commit b71724147e73 ("be2net: replace polling with sleeping in the FW completion path") w.r.t mcc mutex it introduces and the use of usleep_range. The be2net be_ndo_bridge_getlink() callback is called with rcu_read_lock, so this code has been broken for a long time. Both the mutex_lock and the usleep_range can cause the issue Ian Kumlien reported[1]. The call path is: be_ndo_bridge_getlink -> be_cmd_get_hsw_config -> be_mcc_notify_wait -> be_mcc_wait_compl -> usleep_range() [1] https://lore.kernel.org/netdev/CAA85sZveppNgEVa_FD+qhOMtG_AavK9_mFiU+jWrMtXmwqefGA@mail.gmail.com/ Tested-by: Ian Kumlien Fixes: b71724147e73 ("be2net: replace polling with sleeping in the FW completion path") Signed-off-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250227164129.1201164-1-razor@blackwall.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/emulex/benet/be.h | 2 +- drivers/net/ethernet/emulex/benet/be_cmds.c | 197 ++++++++++---------- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 3 files changed, 100 insertions(+), 101 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index e48b861e4ce15..270ff9aab3352 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -562,7 +562,7 @@ struct be_adapter { struct be_dma_mem mbox_mem_alloced; struct be_mcc_obj mcc_obj; - struct mutex mcc_lock; /* For serializing mcc cmds to BE card */ + spinlock_t mcc_lock; /* For serializing mcc cmds to BE card */ spinlock_t mcc_cq_lock; u16 cfg_num_rx_irqs; /* configured via set-channels */ diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 61adcebeef010..51b8377edd1d0 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -575,7 +575,7 @@ int be_process_mcc(struct be_adapter *adapter) /* Wait till no more pending mcc requests are present */ static int be_mcc_wait_compl(struct be_adapter *adapter) { -#define mcc_timeout 12000 /* 12s timeout */ +#define mcc_timeout 120000 /* 12s timeout */ int i, status = 0; struct be_mcc_obj *mcc_obj = &adapter->mcc_obj; @@ -589,7 +589,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) if (atomic_read(&mcc_obj->q.used) == 0) break; - usleep_range(500, 1000); + udelay(100); } if (i == mcc_timeout) { dev_err(&adapter->pdev->dev, "FW not responding\n"); @@ -866,7 +866,7 @@ static bool use_mcc(struct be_adapter *adapter) static int be_cmd_lock(struct be_adapter *adapter) { if (use_mcc(adapter)) { - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); return 0; } else { return mutex_lock_interruptible(&adapter->mbox_lock); @@ -877,7 +877,7 @@ static int be_cmd_lock(struct be_adapter *adapter) static void be_cmd_unlock(struct be_adapter *adapter) { if (use_mcc(adapter)) - return mutex_unlock(&adapter->mcc_lock); + return spin_unlock_bh(&adapter->mcc_lock); else return mutex_unlock(&adapter->mbox_lock); } @@ -1047,7 +1047,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, struct be_cmd_req_mac_query *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1076,7 +1076,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1088,7 +1088,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, struct be_cmd_req_pmac_add *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1113,7 +1113,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST) status = -EPERM; @@ -1131,7 +1131,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) if (pmac_id == -1) return 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1151,7 +1151,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1414,7 +1414,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, struct be_dma_mem *q_mem = &rxq->dma_mem; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1444,7 +1444,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1508,7 +1508,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) struct be_cmd_req_q_destroy *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1525,7 +1525,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) q->created = false; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1593,7 +1593,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) struct be_cmd_req_hdr *hdr; int status = 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1621,7 +1621,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) adapter->stats_cmd_sent = true; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1637,7 +1637,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, CMD_SUBSYSTEM_ETH)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1660,7 +1660,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, adapter->stats_cmd_sent = true; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1697,7 +1697,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, struct be_cmd_req_link_status *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); if (link_status) *link_status = LINK_DOWN; @@ -1736,7 +1736,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1747,7 +1747,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) struct be_cmd_req_get_cntl_addnl_attribs *req; int status = 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1762,7 +1762,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) status = be_mcc_notify(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1811,7 +1811,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) if (!get_fat_cmd.va) return -ENOMEM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); while (total_size) { buf_size = min(total_size, (u32)60 * 1024); @@ -1849,9 +1849,9 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) log_offset += buf_size; } err: + spin_unlock_bh(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size, get_fat_cmd.va, get_fat_cmd.dma); - mutex_unlock(&adapter->mcc_lock); return status; } @@ -1862,7 +1862,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) struct be_cmd_req_get_fw_version *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1885,7 +1885,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) sizeof(adapter->fw_on_flash)); } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1899,7 +1899,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, struct be_cmd_req_modify_eq_delay *req; int status = 0, i; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1922,7 +1922,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, status = be_mcc_notify(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1949,7 +1949,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, struct be_cmd_req_vlan_config *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1971,7 +1971,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -1982,7 +1982,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) struct be_cmd_req_rx_filter *req = mem->va; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2015,7 +2015,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2046,7 +2046,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2066,7 +2066,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (base_status(status) == MCC_STATUS_FEATURE_NOT_SUPPORTED) return -EOPNOTSUPP; @@ -2085,7 +2085,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2108,7 +2108,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2189,7 +2189,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) return 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2214,7 +2214,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2226,7 +2226,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, struct be_cmd_req_enable_disable_beacon *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2247,7 +2247,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2258,7 +2258,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) struct be_cmd_req_get_beacon_state *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2282,7 +2282,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2306,7 +2306,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, return -ENOMEM; } - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2328,7 +2328,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, memcpy(data, resp->page_data + off, len); } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); return status; } @@ -2345,7 +2345,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, void *ctxt = NULL; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2387,7 +2387,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, if (status) goto err_unlock; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(60000))) @@ -2406,7 +2406,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, return status; err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2460,7 +2460,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, struct be_mcc_wrb *wrb; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2478,7 +2478,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2491,7 +2491,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, struct lancer_cmd_resp_read_object *resp; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2525,7 +2525,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, } err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2537,7 +2537,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_cmd_write_flashrom *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2562,7 +2562,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, if (status) goto err_unlock; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(40000))) @@ -2573,7 +2573,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, return status; err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2584,7 +2584,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, struct be_mcc_wrb *wrb; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2611,7 +2611,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, memcpy(flashed_crc, req->crc, 4); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3217,7 +3217,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, struct be_cmd_req_acpi_wol_magic_config *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3234,7 +3234,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3249,7 +3249,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3272,7 +3272,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, if (status) goto err_unlock; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(SET_LB_MODE_TIMEOUT))) @@ -3281,7 +3281,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, return status; err_unlock: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3298,7 +3298,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3324,7 +3324,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, if (status) goto err; - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); wait_for_completion(&adapter->et_cmd_compl); resp = embedded_payload(wrb); @@ -3332,7 +3332,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, return status; err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3348,7 +3348,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3382,7 +3382,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3393,7 +3393,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, struct be_cmd_req_seeprom_read *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3409,7 +3409,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3424,7 +3424,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3469,7 +3469,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) } dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3479,7 +3479,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) struct be_cmd_req_set_qos *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3499,7 +3499,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3611,7 +3611,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, struct be_cmd_req_get_fn_privileges *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3643,7 +3643,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3655,7 +3655,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, struct be_cmd_req_set_fn_privileges *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3675,7 +3675,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3707,7 +3707,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, return -ENOMEM; } - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3771,7 +3771,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, } out: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size, get_mac_list_cmd.va, get_mac_list_cmd.dma); return status; @@ -3831,7 +3831,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, if (!cmd.va) return -ENOMEM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3853,7 +3853,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, err: dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3889,7 +3889,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, CMD_SUBSYSTEM_COMMON)) return -EPERM; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3930,7 +3930,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -3944,7 +3944,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, int status; u16 vid; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3991,7 +3991,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4190,7 +4190,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, struct be_cmd_req_set_ext_fat_caps *req; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4206,7 +4206,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4684,7 +4684,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) if (iface == 0xFFFFFFFF) return -1; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4701,7 +4701,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4735,7 +4735,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, struct be_cmd_resp_get_iface_list *resp; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4756,7 +4756,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, } err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4850,7 +4850,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) if (BEx_chip(adapter)) return 0; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4868,7 +4868,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) req->enable = 1; status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -4941,7 +4941,7 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter, u32 link_config = 0; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4969,7 +4969,7 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -5000,8 +5000,7 @@ int be_cmd_set_features(struct be_adapter *adapter) struct be_mcc_wrb *wrb; int status; - if (mutex_lock_interruptible(&adapter->mcc_lock)) - return -1; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -5039,7 +5038,7 @@ int be_cmd_set_features(struct be_adapter *adapter) dev_info(&adapter->pdev->dev, "Adapter does not support HW error recovery\n"); - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -5053,7 +5052,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, struct be_cmd_resp_hdr *resp; int status; - mutex_lock(&adapter->mcc_lock); + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -5076,7 +5075,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, memcpy(wrb_payload, resp, sizeof(*resp) + resp->response_length); be_dws_le_to_cpu(wrb_payload, sizeof(*resp) + resp->response_length); err: - mutex_unlock(&adapter->mcc_lock); + spin_unlock_bh(&adapter->mcc_lock); return status; } EXPORT_SYMBOL(be_roce_mcc_cmd); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 875fe379eea21..3d2e215921191 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5667,8 +5667,8 @@ static int be_drv_init(struct be_adapter *adapter) } mutex_init(&adapter->mbox_lock); - mutex_init(&adapter->mcc_lock); mutex_init(&adapter->rx_filter_lock); + spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); init_completion(&adapter->et_cmd_compl); -- GitLab From c34424eb3be4c01db831428c0d7d483701ae820f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Feb 2025 16:45:34 -0800 Subject: [PATCH 1222/1585] net: dsa: rtl8366rb: don't prompt users for LED control Make NET_DSA_REALTEK_RTL8366RB_LEDS a hidden symbol. It seems very unlikely user would want to intentionally disable it. Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250228004534.3428681-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/dsa/realtek/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig index 10687722d14c0..d6eb6713e5f6b 100644 --- a/drivers/net/dsa/realtek/Kconfig +++ b/drivers/net/dsa/realtek/Kconfig @@ -44,7 +44,7 @@ config NET_DSA_REALTEK_RTL8366RB Select to enable support for Realtek RTL8366RB. config NET_DSA_REALTEK_RTL8366RB_LEDS - bool "Support RTL8366RB LED control" + bool depends on (LEDS_CLASS=y || LEDS_CLASS=NET_DSA_REALTEK_RTL8366RB) depends on NET_DSA_REALTEK_RTL8366RB default NET_DSA_REALTEK_RTL8366RB -- GitLab From 1f860eb4cdda634589d75e78ff586d5dff20b8af Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Mar 2025 11:05:36 +0100 Subject: [PATCH 1223/1585] wifi: nl80211: disable multi-link reconfiguration Both the APIs in cfg80211 and the implementation in mac80211 aren't really ready yet, we have a large number of fixes. In addition, it's not possible right now to discover support for this feature from userspace. Disable it for now, there's no rush. Link: https://patch.msgid.link/20250303110538.fbeef42a5687.Iab122c22137e5675ebd99f5c031e30c0e5c7af2e@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fdb2aac951d18..e87267fbb442e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -16534,7 +16534,7 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) goto out; } - err = cfg80211_assoc_ml_reconf(rdev, dev, links, rem_links); + err = -EOPNOTSUPP; out: for (link_id = 0; link_id < ARRAY_SIZE(links); link_id++) -- GitLab From b7365eab39831487a84e63a9638209b68dc54008 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Fri, 28 Feb 2025 18:52:58 +0800 Subject: [PATCH 1224/1585] net: hns3: make sure ptp clock is unregister and freed if hclge_ptp_get_cycle returns an error During the initialization of ptp, hclge_ptp_get_cycle might return an error and returned directly without unregister clock and free it. To avoid that, call hclge_ptp_destroy_clock to unregist and free clock if hclge_ptp_get_cycle failed. Fixes: 8373cd38a888 ("net: hns3: change the method of obtaining default ptp cycle") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250228105258.1243461-1-shaojijie@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index bab16c2191b2f..181af419b878d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -483,7 +483,7 @@ int hclge_ptp_init(struct hclge_dev *hdev) ret = hclge_ptp_get_cycle(hdev); if (ret) - return ret; + goto out; } ret = hclge_ptp_int_en(hdev, true); -- GitLab From c7c1f3b05c67173f462d73d301d572b3f9e57e3b Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 4 Mar 2025 13:31:47 +0200 Subject: [PATCH 1225/1585] usb: xhci: Fix host controllers "dying" after suspend and resume A recent cleanup went a bit too far and dropped clearing the cycle bit of link TRBs, so it stays different from the rest of the ring half of the time. Then a race occurs: if the xHC reaches such link TRB before more commands are queued, the link's cycle bit unintentionally matches the xHC's cycle so it follows the link and waits for further commands. If more commands are queued before the xHC gets there, inc_enq() flips the bit so the xHC later sees a mismatch and stops executing commands. This function is called before suspend and 50% of times after resuming the xHC is doomed to get stuck sooner or later. Then some Stop Endpoint command fails to complete in 5 seconds and this shows up xhci_hcd 0000:00:10.0: xHCI host not responding to stop endpoint command xhci_hcd 0000:00:10.0: xHCI host controller not responding, assume dead xhci_hcd 0000:00:10.0: HC died; cleaning up followed by loss of all USB decives on the affected bus. That's if you are lucky, because if Set Deq gets stuck instead, the failure is silent. Likely responsible for kernel bug 219824. I found this while searching for possible causes of that regression and reproduced it locally before hearing back from the reporter. To repro, simply wait for link cycle to become set (debugfs), then suspend, resume and wait. To accelerate the failure I used a script which repeatedly starts and stops a UVC camera. Some HCs get fully reinitialized on resume and they are not affected. Link: https://bugzilla.kernel.org/show_bug.cgi?id=219824 Fixes: 36b972d4b7ce ("usb: xhci: improve xhci_clear_command_ring()") Cc: stable@vger.kernel.org Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250304113147.3322584-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 45653114ccd7f..1a90ebc8a30ea 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -780,8 +780,12 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci) struct xhci_segment *seg; ring = xhci->cmd_ring; - xhci_for_each_ring_seg(ring->first_seg, seg) + xhci_for_each_ring_seg(ring->first_seg, seg) { + /* erase all TRBs before the link */ memset(seg->trbs, 0, sizeof(union xhci_trb) * (TRBS_PER_SEGMENT - 1)); + /* clear link cycle bit */ + seg->trbs[TRBS_PER_SEGMENT - 1].link.control &= cpu_to_le32(~TRB_CYCLE); + } xhci_initialize_ring_info(ring); /* -- GitLab From 164b7dd4546b57c08b373e9e3cf315ff98cb032d Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Tue, 4 Mar 2025 14:05:04 +0000 Subject: [PATCH 1226/1585] ASoC: cs42l43: Add jack delay debounce after suspend Hardware reports jack absent after reset/suspension regardless of jack state, so introduce an additional delay only in suspension case to allow proper detection to take place after a short delay. Signed-off-by: Maciej Strozek Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250304140504.139245-1-mstrozek@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 13 ++++++++++--- sound/soc/codecs/cs42l43.c | 15 ++++++++++++++- sound/soc/codecs/cs42l43.h | 3 +++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index d9ab003e166bf..ac19a572fe70c 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -167,7 +167,7 @@ int cs42l43_set_jack(struct snd_soc_component *component, autocontrol |= 0x3 << CS42L43_JACKDET_MODE_SHIFT; ret = cs42l43_find_index(priv, "cirrus,tip-fall-db-ms", 500, - NULL, cs42l43_accdet_db_ms, + &priv->tip_fall_db_ms, cs42l43_accdet_db_ms, ARRAY_SIZE(cs42l43_accdet_db_ms)); if (ret < 0) goto error; @@ -175,7 +175,7 @@ int cs42l43_set_jack(struct snd_soc_component *component, tip_deb |= ret << CS42L43_TIPSENSE_FALLING_DB_TIME_SHIFT; ret = cs42l43_find_index(priv, "cirrus,tip-rise-db-ms", 500, - NULL, cs42l43_accdet_db_ms, + &priv->tip_rise_db_ms, cs42l43_accdet_db_ms, ARRAY_SIZE(cs42l43_accdet_db_ms)); if (ret < 0) goto error; @@ -764,6 +764,8 @@ void cs42l43_tip_sense_work(struct work_struct *work) error: mutex_unlock(&priv->jack_lock); + priv->suspend_jack_debounce = false; + pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_autosuspend(priv->dev); } @@ -771,14 +773,19 @@ void cs42l43_tip_sense_work(struct work_struct *work) irqreturn_t cs42l43_tip_sense(int irq, void *data) { struct cs42l43_codec *priv = data; + unsigned int db_delay = priv->tip_debounce_ms; cancel_delayed_work(&priv->bias_sense_timeout); cancel_delayed_work(&priv->tip_sense_work); cancel_delayed_work(&priv->button_press_work); cancel_work(&priv->button_release_work); + // Ensure delay after suspend is long enough to avoid false detection + if (priv->suspend_jack_debounce) + db_delay += priv->tip_fall_db_ms + priv->tip_rise_db_ms; + queue_delayed_work(system_long_wq, &priv->tip_sense_work, - msecs_to_jiffies(priv->tip_debounce_ms)); + msecs_to_jiffies(db_delay)); return IRQ_HANDLED; } diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index d2a2daefc2ec6..4257dbefe9dd1 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2402,9 +2402,22 @@ static int cs42l43_codec_runtime_resume(struct device *dev) return 0; } +static int cs42l43_codec_runtime_force_suspend(struct device *dev) +{ + struct cs42l43_codec *priv = dev_get_drvdata(dev); + + dev_dbg(priv->dev, "Runtime suspend\n"); + + priv->suspend_jack_debounce = true; + + pm_runtime_force_suspend(dev); + + return 0; +} + static const struct dev_pm_ops cs42l43_codec_pm_ops = { RUNTIME_PM_OPS(NULL, cs42l43_codec_runtime_resume, NULL) - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + SET_SYSTEM_SLEEP_PM_OPS(cs42l43_codec_runtime_force_suspend, pm_runtime_force_resume) }; static const struct platform_device_id cs42l43_codec_id_table[] = { diff --git a/sound/soc/codecs/cs42l43.h b/sound/soc/codecs/cs42l43.h index 9c144e129535f..1cd9d8a71c439 100644 --- a/sound/soc/codecs/cs42l43.h +++ b/sound/soc/codecs/cs42l43.h @@ -78,6 +78,8 @@ struct cs42l43_codec { bool use_ring_sense; unsigned int tip_debounce_ms; + unsigned int tip_fall_db_ms; + unsigned int tip_rise_db_ms; unsigned int bias_low; unsigned int bias_sense_ua; unsigned int bias_ramp_ms; @@ -95,6 +97,7 @@ struct cs42l43_codec { bool button_detect_running; bool jack_present; int jack_override; + bool suspend_jack_debounce; struct work_struct hp_ilimit_work; struct delayed_work hp_ilimit_clear_work; -- GitLab From d0bbe332669c5db32c8c92bc967f8e7f8d460ddf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 4 Mar 2025 15:25:55 +0100 Subject: [PATCH 1227/1585] ALSA: hda: realtek: fix incorrect IS_REACHABLE() usage The alternative path leads to a build error after a recent change: sound/pci/hda/patch_realtek.c: In function 'alc233_fixup_lenovo_low_en_micmute_led': include/linux/stddef.h:9:14: error: called object is not a function or function pointer 9 | #define NULL ((void *)0) | ^ sound/pci/hda/patch_realtek.c:5041:49: note: in expansion of macro 'NULL' 5041 | #define alc233_fixup_lenovo_line2_mic_hotkey NULL | ^~~~ sound/pci/hda/patch_realtek.c:5063:9: note: in expansion of macro 'alc233_fixup_lenovo_line2_mic_hotkey' 5063 | alc233_fixup_lenovo_line2_mic_hotkey(codec, fix, action); Using IS_REACHABLE() is somewhat questionable here anyway since it leads to the input code not working when the HDA driver is builtin but input is in a loadable module. Replace this with a hard compile-time dependency on CONFIG_INPUT. In practice this won't chance much other than solve the compiler error because it is rare to require sound output but no input support. Fixes: f603b159231b ("ALSA: hda/realtek - add supported Mic Mute LED for Lenovo platform") Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20250304142620.582191-1-arnd@kernel.org Signed-off-by: Takashi Iwai --- sound/pci/hda/Kconfig | 1 + sound/pci/hda/patch_realtek.c | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index e393578cbe684..84ebf19f28836 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -222,6 +222,7 @@ comment "Set to Y if you want auto-loading the side codec driver" config SND_HDA_CODEC_REALTEK tristate "Build Realtek HD-audio codec support" + depends on INPUT select SND_HDA_GENERIC select SND_HDA_GENERIC_LEDS select SND_HDA_SCODEC_COMPONENT diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ebf54ef5877a4..697a38e41e166 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4927,7 +4927,6 @@ static void alc298_fixup_samsung_amp_v2_4_amps(struct hda_codec *codec, alc298_samsung_v2_init_amps(codec, 4); } -#if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) { @@ -5036,10 +5035,6 @@ static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec, spec->kb_dev = NULL; } } -#else /* INPUT */ -#define alc280_fixup_hp_gpio2_mic_hotkey NULL -#define alc233_fixup_lenovo_line2_mic_hotkey NULL -#endif /* INPUT */ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) -- GitLab From 5623bc23a1cb9f9a9470fa73b3a20321dc4c4870 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 25 Feb 2025 10:53:10 +0100 Subject: [PATCH 1228/1585] s390/traps: Fix test_monitor_call() inline assembly The test_monitor_call() inline assembly uses the xgr instruction, which also modifies the condition code, to clear a register. However the clobber list of the inline assembly does not specify that the condition code is modified, which may lead to incorrect code generation. Use the lhi instruction instead to clear the register without that the condition code is modified. Furthermore this limits clearing to the lower 32 bits of val, since its type is int. Fixes: 17248ea03674 ("s390: fix __EMIT_BUG() macro") Cc: stable@vger.kernel.org Reviewed-by: Juergen Christ Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/traps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 24fee11b030d8..b746213d3110c 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -285,10 +285,10 @@ static void __init test_monitor_call(void) return; asm volatile( " mc 0,0\n" - "0: xgr %0,%0\n" + "0: lhi %[val],0\n" "1:\n" - EX_TABLE(0b,1b) - : "+d" (val)); + EX_TABLE(0b, 1b) + : [val] "+d" (val)); if (!val) panic("Monitor call doesn't work!\n"); } -- GitLab From a22ee38d2efe18edc53791fd1036396c23b43ad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 14:04:18 +0100 Subject: [PATCH 1229/1585] selftests/vDSO: Fix GNU hash table entry size for s390x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 14be4e6f3522 ("selftests: vDSO: fix ELF hash table entry size for s390x") changed the type of the ELF hash table entries to 64bit on s390x. However the *GNU* hash tables entries are always 32bit. The "bucket" pointer is shared between both hash algorithms. On s390, this caused the GNU hash algorithm to access its 32-bit entries as if they were 64-bit, triggering compiler warnings (assignment between "Elf64_Xword *" and "Elf64_Word *") and runtime crashes. Introduce a new dedicated "gnu_bucket" pointer which is used by the GNU hash. Fixes: e0746bde6f82 ("selftests/vDSO: support DT_GNU_HASH") Reviewed-by: Jens Remus Signed-off-by: Thomas Weißschuh Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20250217-selftests-vdso-s390-gnu-hash-v2-1-f6c2532ffe2a@linutronix.de Signed-off-by: Vasily Gorbik --- tools/testing/selftests/vDSO/parse_vdso.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 2fe5e983cb22f..f89d052c730eb 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -53,7 +53,7 @@ static struct vdso_info /* Symbol table */ ELF(Sym) *symtab; const char *symstrings; - ELF(Word) *gnu_hash; + ELF(Word) *gnu_hash, *gnu_bucket; ELF_HASH_ENTRY *bucket, *chain; ELF_HASH_ENTRY nbucket, nchain; @@ -185,8 +185,8 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) /* The bucket array is located after the header (4 uint32) and the bloom * filter (size_t array of gnu_hash[2] elements). */ - vdso_info.bucket = vdso_info.gnu_hash + 4 + - sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; + vdso_info.gnu_bucket = vdso_info.gnu_hash + 4 + + sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; } else { vdso_info.nbucket = hash[0]; vdso_info.nchain = hash[1]; @@ -268,11 +268,11 @@ void *vdso_sym(const char *version, const char *name) if (vdso_info.gnu_hash) { uint32_t h1 = gnu_hash(name), h2, *hashval; - i = vdso_info.bucket[h1 % vdso_info.nbucket]; + i = vdso_info.gnu_bucket[h1 % vdso_info.nbucket]; if (i == 0) return 0; h1 |= 1; - hashval = vdso_info.bucket + vdso_info.nbucket + + hashval = vdso_info.gnu_bucket + vdso_info.nbucket + (i - vdso_info.gnu_hash[1]); for (;; i++) { ELF(Sym) *sym = &vdso_info.symtab[i]; -- GitLab From b4a1dec11793936ffe1a9fb811724532ff3b1174 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Mon, 24 Feb 2025 12:27:04 +0100 Subject: [PATCH 1230/1585] s390/ftrace: Fix return address recovery of traced function When fgraph is enabled the traced function return address is replaced with trampoline return_to_handler(). The original return address of the traced function is saved in per task return stack along with a stack pointer for reliable stack unwinding via function_graph_enter_regs(). During stack unwinding e.g. for livepatching, ftrace_graph_ret_addr() identifies the original return address of the traced function with the saved stack pointer. With a recent change, the stack pointers passed to ftrace_graph_ret_addr() and function_graph_enter_regs() do not match anymore, and therefore the original return address is not found. Pass the correct stack pointer to function_graph_enter_regs() to fix this. Fixes: 7495e179b478 ("s390/tracing: Enable HAVE_FTRACE_GRAPH_FUNC") Reviewed-by: Heiko Carstens Signed-off-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 63ba6306632ef..e540b022ceb23 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -266,12 +266,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15]; if (unlikely(ftrace_graph_is_dead())) return; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; - if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs)) + if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs)) *parent = (unsigned long)&return_to_handler; } -- GitLab From f9dc8fb3afc968042bdaf4b6e445a9272071c9f3 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 4 Mar 2025 03:23:14 -0500 Subject: [PATCH 1231/1585] KVM: x86: Explicitly zero EAX and EBX when PERFMON_V2 isn't supported by KVM Fix a goof where KVM sets CPUID.0x80000022.EAX to CPUID.0x80000022.EBX instead of zeroing both when PERFMON_V2 isn't supported by KVM. In practice, barring a buggy CPU (or vCPU model when running nested) only the !enable_pmu case is affected, as KVM always supports PERFMON_V2 if it's available in hardware, i.e. CPUID.0x80000022.EBX will be '0' if PERFMON_V2 is unsupported. For the !enable_pmu case, the bug is relatively benign as KVM will refuse to enable PMU capabilities, but a VMM that reflects KVM's supported CPUID into the guest could inadvertently induce #GPs in the guest due to advertising support for MSRs that KVM refuses to emulate. Fixes: 94cdeebd8211 ("KVM: x86/cpuid: Add AMD CPUID ExtPerfMonAndDbg leaf 0x80000022") Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250304082314.472202-3-xiaoyao.li@intel.com [sean: massage shortlog and changelog, tag for stable] Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8eb3a88707f21..121edf1f2a79a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1763,7 +1763,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ecx = entry->edx = 0; if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) { - entry->eax = entry->ebx; + entry->eax = entry->ebx = 0; break; } -- GitLab From 3d252160b818045f3a152b13756f6f37ca34639d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 4 Mar 2025 13:51:38 +0000 Subject: [PATCH 1232/1585] fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex pipe_readable(), pipe_writable(), and pipe_poll() can read "pipe->head" and "pipe->tail" outside of "pipe->mutex" critical section. When the head and the tail are read individually in that order, there is a window for interruption between the two reads in which both the head and the tail can be updated by concurrent readers and writers. One of the problematic scenarios observed with hackbench running multiple groups on a large server on a particular pipe inode is as follows: pipe->head = 36 pipe->tail = 36 hackbench-118762 [057] ..... 1029.550548: pipe_write: *wakes up: pipe not full* hackbench-118762 [057] ..... 1029.550548: pipe_write: head: 36 -> 37 [tail: 36] hackbench-118762 [057] ..... 1029.550548: pipe_write: *wake up next reader 118740* hackbench-118762 [057] ..... 1029.550548: pipe_write: *wake up next writer 118768* hackbench-118768 [206] ..... 1029.55055X: pipe_write: *writer wakes up* hackbench-118768 [206] ..... 1029.55055X: pipe_write: head = READ_ONCE(pipe->head) [37] ... CPU 206 interrupted (exact wakeup was not traced but 118768 did read head at 37 in traces) hackbench-118740 [057] ..... 1029.550558: pipe_read: *reader wakes up: pipe is not empty* hackbench-118740 [057] ..... 1029.550558: pipe_read: tail: 36 -> 37 [head = 37] hackbench-118740 [057] ..... 1029.550559: pipe_read: *pipe is empty; wakeup writer 118768* hackbench-118740 [057] ..... 1029.550559: pipe_read: *sleeps* hackbench-118766 [185] ..... 1029.550592: pipe_write: *New writer comes in* hackbench-118766 [185] ..... 1029.550592: pipe_write: head: 37 -> 38 [tail: 37] hackbench-118766 [185] ..... 1029.550592: pipe_write: *wakes up reader 118766* hackbench-118740 [185] ..... 1029.550598: pipe_read: *reader wakes up; pipe not empty* hackbench-118740 [185] ..... 1029.550599: pipe_read: tail: 37 -> 38 [head: 38] hackbench-118740 [185] ..... 1029.550599: pipe_read: *pipe is empty* hackbench-118740 [185] ..... 1029.550599: pipe_read: *reader sleeps; wakeup writer 118768* ... CPU 206 switches back to writer hackbench-118768 [206] ..... 1029.550601: pipe_write: tail = READ_ONCE(pipe->tail) [38] hackbench-118768 [206] ..... 1029.550601: pipe_write: pipe_full()? (u32)(37 - 38) >= 16? Yes hackbench-118768 [206] ..... 1029.550601: pipe_write: *writer goes back to sleep* [ Tasks 118740 and 118768 can then indefinitely wait on each other. ] The unsigned arithmetic in pipe_occupancy() wraps around when "pipe->tail > pipe->head" leading to pipe_full() returning true despite the pipe being empty. The case of genuine wraparound of "pipe->head" is handled since pipe buffer has data allowing readers to make progress until the pipe->tail wraps too after which the reader will wakeup a sleeping writer, however, mistaking the pipe to be full when it is in fact empty can lead to readers and writers waiting on each other indefinitely. This issue became more problematic and surfaced as a hang in hackbench after the optimization in commit aaec5a95d596 ("pipe_read: don't wake up the writer if the pipe is still full") significantly reduced the number of spurious wakeups of writers that had previously helped mask the issue. To avoid missing any updates between the reads of "pipe->head" and "pipe->write", unionize the two with a single unsigned long "pipe->head_tail" member that can be loaded atomically. Using "pipe->head_tail" to read the head and the tail ensures the lockless checks do not miss any updates to the head or the tail and since those two are only updated under "pipe->mutex", it ensures that the head is always ahead of, or equal to the tail resulting in correct calculations. [ prateek: commit log, testing on x86 platforms. ] Reported-and-debugged-by: Swapnil Sapkal Closes: https://lore.kernel.org/lkml/e813814e-7094-4673-bc69-731af065a0eb@amd.com/ Reported-by: Alexey Gladkov Closes: https://lore.kernel.org/all/Z8Wn0nTvevLRG_4m@example.org/ Fixes: 8cefc107ca54 ("pipe: Use head and tail pointers for the ring, not cursor and length") Tested-by: Swapnil Sapkal Reviewed-by: Oleg Nesterov Tested-by: Alexey Gladkov Signed-off-by: K Prateek Nayak Signed-off-by: Linus Torvalds --- fs/pipe.c | 19 ++++++++----------- include/linux/pipe_fs_i.h | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index ce1af7592780d..e8e6698f36981 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -210,11 +210,10 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_readable(const struct pipe_inode_info *pipe) { - unsigned int head = READ_ONCE(pipe->head); - unsigned int tail = READ_ONCE(pipe->tail); + union pipe_index idx = { .head_tail = READ_ONCE(pipe->head_tail) }; unsigned int writers = READ_ONCE(pipe->writers); - return !pipe_empty(head, tail) || !writers; + return !pipe_empty(idx.head, idx.tail) || !writers; } static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe, @@ -417,11 +416,10 @@ static inline int is_packetized(struct file *file) /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_writable(const struct pipe_inode_info *pipe) { - unsigned int head = READ_ONCE(pipe->head); - unsigned int tail = READ_ONCE(pipe->tail); + union pipe_index idx = { .head_tail = READ_ONCE(pipe->head_tail) }; unsigned int max_usage = READ_ONCE(pipe->max_usage); - return !pipe_full(head, tail, max_usage) || + return !pipe_full(idx.head, idx.tail, max_usage) || !READ_ONCE(pipe->readers); } @@ -659,7 +657,7 @@ pipe_poll(struct file *filp, poll_table *wait) { __poll_t mask; struct pipe_inode_info *pipe = filp->private_data; - unsigned int head, tail; + union pipe_index idx; /* Epoll has some historical nasty semantics, this enables them */ WRITE_ONCE(pipe->poll_usage, true); @@ -680,19 +678,18 @@ pipe_poll(struct file *filp, poll_table *wait) * if something changes and you got it wrong, the poll * table entry will wake you up and fix it. */ - head = READ_ONCE(pipe->head); - tail = READ_ONCE(pipe->tail); + idx.head_tail = READ_ONCE(pipe->head_tail); mask = 0; if (filp->f_mode & FMODE_READ) { - if (!pipe_empty(head, tail)) + if (!pipe_empty(idx.head, idx.tail)) mask |= EPOLLIN | EPOLLRDNORM; if (!pipe->writers && filp->f_pipe != pipe->w_counter) mask |= EPOLLHUP; } if (filp->f_mode & FMODE_WRITE) { - if (!pipe_full(head, tail, pipe->max_usage)) + if (!pipe_full(idx.head, idx.tail, pipe->max_usage)) mask |= EPOLLOUT | EPOLLWRNORM; /* * Most Unices do not set EPOLLERR for FIFOs but on Linux they diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8ff23bf5a8197..3cc4f8eab853f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -31,6 +31,33 @@ struct pipe_buffer { unsigned long private; }; +/* + * Really only alpha needs 32-bit fields, but + * might as well do it for 64-bit architectures + * since that's what we've historically done, + * and it makes 'head_tail' always be a simple + * 'unsigned long'. + */ +#ifdef CONFIG_64BIT +typedef unsigned int pipe_index_t; +#else +typedef unsigned short pipe_index_t; +#endif + +/* + * We have to declare this outside 'struct pipe_inode_info', + * but then we can't use 'union pipe_index' for an anonymous + * union, so we end up having to duplicate this declaration + * below. Annoying. + */ +union pipe_index { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; +}; + /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing @@ -58,8 +85,16 @@ struct pipe_buffer { struct pipe_inode_info { struct mutex mutex; wait_queue_head_t rd_wait, wr_wait; - unsigned int head; - unsigned int tail; + + /* This has to match the 'union pipe_index' above */ + union { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; + }; + unsigned int max_usage; unsigned int ring_size; unsigned int nr_accounted; -- GitLab From dfc1b168a8c4b376fa222b27b97c2c4ad4b786e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 08:27:54 +0100 Subject: [PATCH 1233/1585] kbuild: userprogs: use correct lld when linking through clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The userprog infrastructure links objects files through $(CC). Either explicitly by manually calling $(CC) on multiple object files or implicitly by directly compiling a source file to an executable. The documentation at Documentation/kbuild/llvm.rst indicates that ld.lld would be used for linking if LLVM=1 is specified. However clang instead will use either a globally installed cross linker from $PATH called ${target}-ld or fall back to the system linker, which probably does not support crosslinking. For the normal kernel build this is not an issue because the linker is always executed directly, without the compiler being involved. Explicitly pass --ld-path to clang so $(LD) is respected. As clang 13.0.1 is required to build the kernel, this option is available. Fixes: 7f3a59db274c ("kbuild: add infrastructure to build userspace programs") Cc: stable@vger.kernel.org # needs wrapping in $(cc-option) for < 6.9 Signed-off-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 70bdbf2218fc5..6a8e5be6b0043 100644 --- a/Makefile +++ b/Makefile @@ -1123,6 +1123,11 @@ endif KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) +# userspace programs are linked via the compiler, use the correct linker +ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_LD_IS_LLD),yy) +KBUILD_USERLDFLAGS += --ld-path=$(LD) +endif + # make the checker run with the right architecture CHECKFLAGS += --arch=$(ARCH) -- GitLab From 02e9a22ceef0227175e391902d8760425fa072c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 11:00:31 +0100 Subject: [PATCH 1234/1585] kbuild: hdrcheck: fix cross build with clang The headercheck tries to call clang with a mix of compiler arguments that don't include the target architecture. When building e.g. x86 headers on arm64, this produces a warning like clang: warning: unknown platform, assuming -mfloat-abi=soft Add in the KBUILD_CPPFLAGS, which contain the target, in order to make it build properly. See also 1b71c2fb04e7 ("kbuild: userprogs: fix bitsize and target detection on clang"). Reviewed-by: Nathan Chancellor Fixes: feb843a469fb ("kbuild: add $(CLANG_FLAGS) to KBUILD_CPPFLAGS") Signed-off-by: Arnd Bergmann --- usr/include/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr/include/Makefile b/usr/include/Makefile index 6c6de1b1622b1..e3d6b03527fec 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -10,7 +10,7 @@ UAPI_CFLAGS := -std=c90 -Wall -Werror=implicit-function-declaration # In theory, we do not care -m32 or -m64 for header compile tests. # It is here just because CONFIG_CC_CAN_LINK is tested with -m32 or -m64. -UAPI_CFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) +UAPI_CFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) # USERCFLAGS might contain sysroot location for CC. UAPI_CFLAGS += $(USERCFLAGS) -- GitLab From dd0b7d4a56e3349de65bf9752734510fb55baf29 Mon Sep 17 00:00:00 2001 From: Satoru Takeuchi Date: Fri, 28 Feb 2025 20:37:08 +0900 Subject: [PATCH 1235/1585] docs: Kconfig: fix defconfig description Commit 2a86f6612164 ("kbuild: use KBUILD_DEFCONFIG as the fallback for DEFCONFIG_LIST") removed arch/$ARCH/defconfig; however, the document has not been updated to reflect this change yet. Signed-off-by: Satoru Takeuchi Signed-off-by: Masahiro Yamada --- Documentation/admin-guide/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index eb94526689091..b557cf1c820d2 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -176,7 +176,7 @@ Configuring the kernel values without prompting. "make defconfig" Create a ./.config file by using the default - symbol values from either arch/$ARCH/defconfig + symbol values from either arch/$ARCH/configs/defconfig or arch/$ARCH/configs/${PLATFORM}_defconfig, depending on the architecture. -- GitLab From 30bfc151f0c1ec80c27a80a7651b2c15c648ad16 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 10 Dec 2024 09:31:02 +0100 Subject: [PATCH 1236/1585] drm/xe: Remove double pageflip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is already handled below in the code by fixup_initial_plane_config. Fixes: a8153627520a ("drm/i915: Try to relocate the BIOS fb to the start of ggtt") Cc: Ville Syrjälä Reviewed-by: Vinod Govindapillai Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20241210083111.230484-3-dev@lankhorst.se Signed-off-by: Maarten Lankhorst (cherry picked from commit 2218704997979fbf11765281ef752f07c5cf25bb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_plane_initial.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 2eb9633f163a7..2a2f250fa495d 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -194,8 +194,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, to_intel_plane(crtc->base.primary); struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); struct drm_framebuffer *fb; struct i915_vma *vma; @@ -241,14 +239,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits); plane_config->vma = vma; - - /* - * Flip to the newly created mapping ASAP, so we can re-use the - * first part of GGTT for WOPCM, prevent flickering, and prevent - * the lookup of sysmem scratch pages. - */ - plane->check_plane(crtc_state, plane_state); - plane->async_flip(NULL, plane, crtc_state, plane_state, true); return; nofb: -- GitLab From d1039a3c12fffe501c5379c7eb1372eaab318e0a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 26 Feb 2025 15:56:26 +0200 Subject: [PATCH 1237/1585] drm/i915/mst: update max stream count to match number of pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We create the stream encoders and attach connectors for each pipe we have. As the number of pipes has increased, we've failed to update the topology manager maximum number of payloads to match that. Bump up the max stream count to match number of pipes, enabling the fourth stream on platforms that support four pipes. Cc: stable@vger.kernel.org Cc: Imre Deak Cc: Ville Syrjala Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250226135626.1956012-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 15bccbfb78d63a2a621b30caff8b9424160c6c89) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a65cf97ad12df..86d6185fda50a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1867,7 +1867,8 @@ intel_dp_mst_encoder_init(struct intel_digital_port *dig_port, int conn_base_id) /* create encoders */ mst_stream_encoders_create(dig_port); ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, display->drm, - &intel_dp->aux, 16, 3, conn_base_id); + &intel_dp->aux, 16, + INTEL_NUM_PIPES(display), conn_base_id); if (ret) { intel_dp->mst_mgr.cbs = NULL; return ret; -- GitLab From 475d06e00b7496c7915d87f7ae67af26738e4649 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 26 Feb 2025 17:47:49 +0000 Subject: [PATCH 1238/1585] drm/xe/userptr: properly setup pfn_flags_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we just leave it uninitialised, which at first looks harmless, however we also don't zero out the pfn array, and with pfn_flags_mask the idea is to be able set individual flags for a given range of pfn or completely ignore them, outside of default_flags. So here we end up with pfn[i] & pfn_flags_mask, and if both are uninitialised we might get back an unexpected flags value, like asking for read only with default_flags, but getting back write on top, leading to potentially bogus behaviour. To fix this ensure we zero the pfn_flags_mask, such that hmm only considers the default_flags and not also the initial pfn[i] value. v2 (Thomas): - Prefer proper initializer. Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Cc: # v6.10+ Reviewed-by: Thomas Hellström Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20250226174748.294285-2-matthew.auld@intel.com (cherry picked from commit dd8c01e42f4c5c1eaf02f003d7d588ba6706aa71) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 0898344678801..2e4ae61567d8d 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -166,13 +166,20 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, { unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - unsigned long *pfns, flags = HMM_PFN_REQ_FAULT; + unsigned long *pfns; struct xe_userptr *userptr; struct xe_vma *vma = &uvma->vma; u64 userptr_start = xe_vma_userptr(vma); u64 userptr_end = userptr_start + xe_vma_size(vma); struct xe_vm *vm = xe_vma_vm(vma); - struct hmm_range hmm_range; + struct hmm_range hmm_range = { + .pfn_flags_mask = 0, /* ignore pfns */ + .default_flags = HMM_PFN_REQ_FAULT, + .start = userptr_start, + .end = userptr_end, + .notifier = &uvma->userptr.notifier, + .dev_private_owner = vm->xe, + }; bool write = !xe_vma_read_only(vma); unsigned long notifier_seq; u64 npages; @@ -199,19 +206,14 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, return -ENOMEM; if (write) - flags |= HMM_PFN_REQ_WRITE; + hmm_range.default_flags |= HMM_PFN_REQ_WRITE; if (!mmget_not_zero(userptr->notifier.mm)) { ret = -EFAULT; goto free_pfns; } - hmm_range.default_flags = flags; hmm_range.hmm_pfns = pfns; - hmm_range.notifier = &userptr->notifier; - hmm_range.start = userptr_start; - hmm_range.end = userptr_end; - hmm_range.dev_private_owner = vm->xe; while (true) { hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); -- GitLab From 54f94dc7f6b4db45dbc23b4db3d20c7194e2c54f Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 27 Feb 2025 10:13:00 +0000 Subject: [PATCH 1239/1585] drm/xe: Fix GT "for each engine" workarounds Any rules using engine matching are currently broken due RTP processing happening too in early init, before the list of hardware engines has been initialised. Fix this by moving workaround processing to later in the driver probe sequence, to just before the processed list is used for the first time. Looking at the debugfs gt0/workarounds on ADL-P we notice 14011060649 should be present while we see, before: GT Workarounds 14011059788 14015795083 And with the patch: GT Workarounds 14011060649 14011059788 14015795083 Signed-off-by: Tvrtko Ursulin Cc: Lucas De Marchi Cc: Matt Roper Cc: stable@vger.kernel.org # v6.11+ Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250227101304.46660-2-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi (cherry picked from commit 25d434cef791e03cf40680f5441b576c639bfa84) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 5d6fb79957b63..9f4f27d1ef4a9 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -380,9 +380,7 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_wa_process_gt(gt); xe_wa_process_oob(gt); - xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); @@ -474,6 +472,8 @@ static int all_fw_domain_init(struct xe_gt *gt) } xe_gt_mcr_set_implicit_defaults(gt); + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_gt_clock_init(gt); -- GitLab From 778b94d7ac17b5800aa857222911f09cc986b509 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 11:01:53 -0600 Subject: [PATCH 1240/1585] ACPI: platform_profile: Add support for hidden choices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When two drivers don't support all the same profiles the legacy interface only exports the common profiles. This causes problems for cases where one driver uses low-power but another uses quiet because the result is that neither is exported to sysfs. To allow two drivers to disagree, add support for "hidden choices". Hidden choices are platform profiles that a driver supports to be compatible with the platform profile of another driver. Fixes: 688834743d67 ("ACPI: platform_profile: Allow multiple handlers") Reported-by: Antheas Kapenekakis Closes: https://lore.kernel.org/platform-driver-x86/e64b771e-3255-42ad-9257-5b8fc6c24ac9@gmx.de/T/#mc068042dd29df36c16c8af92664860fc4763974b Signed-off-by: Mario Limonciello Tested-by: Antheas Kapenekakis Tested-by: Derek J. Clark Acked-by: Ilpo Järvinen Link: https://patch.msgid.link/20250228170155.2623386-2-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- drivers/acpi/platform_profile.c | 94 +++++++++++++++++++++++++------- include/linux/platform_profile.h | 3 + 2 files changed, 76 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index 2ad53cc6aae53..ef9444482db19 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -21,9 +21,15 @@ struct platform_profile_handler { struct device dev; int minor; unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + unsigned long hidden_choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; const struct platform_profile_ops *ops; }; +struct aggregate_choices_data { + unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + int count; +}; + static const char * const profile_names[] = { [PLATFORM_PROFILE_LOW_POWER] = "low-power", [PLATFORM_PROFILE_COOL] = "cool", @@ -73,7 +79,7 @@ static int _store_class_profile(struct device *dev, void *data) lockdep_assert_held(&profile_lock); handler = to_pprof_handler(dev); - if (!test_bit(*bit, handler->choices)) + if (!test_bit(*bit, handler->choices) && !test_bit(*bit, handler->hidden_choices)) return -EOPNOTSUPP; return handler->ops->profile_set(dev, *bit); @@ -239,21 +245,44 @@ static const struct class platform_profile_class = { /** * _aggregate_choices - Aggregate the available profile choices * @dev: The device - * @data: The available profile choices + * @arg: struct aggregate_choices_data * * Return: 0 on success, -errno on failure */ -static int _aggregate_choices(struct device *dev, void *data) +static int _aggregate_choices(struct device *dev, void *arg) { + unsigned long tmp[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + struct aggregate_choices_data *data = arg; struct platform_profile_handler *handler; - unsigned long *aggregate = data; lockdep_assert_held(&profile_lock); handler = to_pprof_handler(dev); - if (test_bit(PLATFORM_PROFILE_LAST, aggregate)) - bitmap_copy(aggregate, handler->choices, PLATFORM_PROFILE_LAST); + bitmap_or(tmp, handler->choices, handler->hidden_choices, PLATFORM_PROFILE_LAST); + if (test_bit(PLATFORM_PROFILE_LAST, data->aggregate)) + bitmap_copy(data->aggregate, tmp, PLATFORM_PROFILE_LAST); else - bitmap_and(aggregate, handler->choices, aggregate, PLATFORM_PROFILE_LAST); + bitmap_and(data->aggregate, tmp, data->aggregate, PLATFORM_PROFILE_LAST); + data->count++; + + return 0; +} + +/** + * _remove_hidden_choices - Remove hidden choices from aggregate data + * @dev: The device + * @arg: struct aggregate_choices_data + * + * Return: 0 on success, -errno on failure + */ +static int _remove_hidden_choices(struct device *dev, void *arg) +{ + struct aggregate_choices_data *data = arg; + struct platform_profile_handler *handler; + + lockdep_assert_held(&profile_lock); + handler = to_pprof_handler(dev); + bitmap_andnot(data->aggregate, handler->choices, + handler->hidden_choices, PLATFORM_PROFILE_LAST); return 0; } @@ -270,22 +299,31 @@ static ssize_t platform_profile_choices_show(struct device *dev, struct device_attribute *attr, char *buf) { - unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + struct aggregate_choices_data data = { + .aggregate = { [0 ... BITS_TO_LONGS(PLATFORM_PROFILE_LAST) - 1] = ~0UL }, + .count = 0, + }; int err; - set_bit(PLATFORM_PROFILE_LAST, aggregate); + set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { err = class_for_each_device(&platform_profile_class, NULL, - aggregate, _aggregate_choices); + &data, _aggregate_choices); if (err) return err; + if (data.count == 1) { + err = class_for_each_device(&platform_profile_class, NULL, + &data, _remove_hidden_choices); + if (err) + return err; + } } /* no profile handler registered any more */ - if (bitmap_empty(aggregate, PLATFORM_PROFILE_LAST)) + if (bitmap_empty(data.aggregate, PLATFORM_PROFILE_LAST)) return -EINVAL; - return _commmon_choices_show(aggregate, buf); + return _commmon_choices_show(data.aggregate, buf); } /** @@ -373,7 +411,10 @@ static ssize_t platform_profile_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; + struct aggregate_choices_data data = { + .aggregate = { [0 ... BITS_TO_LONGS(PLATFORM_PROFILE_LAST) - 1] = ~0UL }, + .count = 0, + }; int ret; int i; @@ -381,13 +422,13 @@ static ssize_t platform_profile_store(struct device *dev, i = sysfs_match_string(profile_names, buf); if (i < 0 || i == PLATFORM_PROFILE_CUSTOM) return -EINVAL; - set_bit(PLATFORM_PROFILE_LAST, choices); + set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { ret = class_for_each_device(&platform_profile_class, NULL, - choices, _aggregate_choices); + &data, _aggregate_choices); if (ret) return ret; - if (!test_bit(i, choices)) + if (!test_bit(i, data.aggregate)) return -EOPNOTSUPP; ret = class_for_each_device(&platform_profile_class, NULL, &i, @@ -453,12 +494,15 @@ EXPORT_SYMBOL_GPL(platform_profile_notify); */ int platform_profile_cycle(void) { + struct aggregate_choices_data data = { + .aggregate = { [0 ... BITS_TO_LONGS(PLATFORM_PROFILE_LAST) - 1] = ~0UL }, + .count = 0, + }; enum platform_profile_option next = PLATFORM_PROFILE_LAST; enum platform_profile_option profile = PLATFORM_PROFILE_LAST; - unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; int err; - set_bit(PLATFORM_PROFILE_LAST, choices); + set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { err = class_for_each_device(&platform_profile_class, NULL, &profile, _aggregate_profiles); @@ -470,14 +514,14 @@ int platform_profile_cycle(void) return -EINVAL; err = class_for_each_device(&platform_profile_class, NULL, - choices, _aggregate_choices); + &data, _aggregate_choices); if (err) return err; /* never iterate into a custom if all drivers supported it */ - clear_bit(PLATFORM_PROFILE_CUSTOM, choices); + clear_bit(PLATFORM_PROFILE_CUSTOM, data.aggregate); - next = find_next_bit_wrap(choices, + next = find_next_bit_wrap(data.aggregate, PLATFORM_PROFILE_LAST, profile + 1); @@ -532,6 +576,14 @@ struct device *platform_profile_register(struct device *dev, const char *name, return ERR_PTR(-EINVAL); } + if (ops->hidden_choices) { + err = ops->hidden_choices(drvdata, pprof->hidden_choices); + if (err) { + dev_err(dev, "platform_profile hidden_choices failed\n"); + return ERR_PTR(err); + } + } + guard(mutex)(&profile_lock); /* create class interface for individual handler */ diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index 8ab5b0e8eb2c1..8c9df7dadd5d3 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -33,6 +33,8 @@ enum platform_profile_option { * @probe: Callback to setup choices available to the new class device. These * choices will only be enforced when setting a new profile, not when * getting the current one. + * @hidden_choices: Callback to setup choices that are not visible to the user + * but can be set by the driver. * @profile_get: Callback that will be called when showing the current platform * profile in sysfs. * @profile_set: Callback that will be called when storing a new platform @@ -40,6 +42,7 @@ enum platform_profile_option { */ struct platform_profile_ops { int (*probe)(void *drvdata, unsigned long *choices); + int (*hidden_choices)(void *drvdata, unsigned long *choices); int (*profile_get)(struct device *dev, enum platform_profile_option *profile); int (*profile_set)(struct device *dev, enum platform_profile_option profile); }; -- GitLab From 44e94fece5170ed9110564efec592d0e88830a28 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 11:01:54 -0600 Subject: [PATCH 1241/1585] platform/x86/amd: pmf: Add 'quiet' to hidden choices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When amd-pmf and asus-wmi are both bound no low power option shows up in sysfs. Add a hidden choice for amd-pmf to support 'quiet' mode to let both bind. Fixes: 688834743d67 ("ACPI: platform_profile: Allow multiple handlers") Suggested-by: Antheas Kapenekakis Signed-off-by: Mario Limonciello Tested-by: Antheas Kapenekakis Tested-by: Derek J. Clark Acked-by: Ilpo Järvinen Link: https://patch.msgid.link/20250228170155.2623386-3-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/amd/pmf/sps.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index e6cf0b22dac33..3a0079c17cb17 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -303,6 +303,7 @@ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf) mode = POWER_MODE_BALANCED_POWER; break; case PLATFORM_PROFILE_LOW_POWER: + case PLATFORM_PROFILE_QUIET: mode = POWER_MODE_POWER_SAVER; break; default: @@ -387,6 +388,13 @@ static int amd_pmf_profile_set(struct device *dev, return 0; } +static int amd_pmf_hidden_choices(void *drvdata, unsigned long *choices) +{ + set_bit(PLATFORM_PROFILE_QUIET, choices); + + return 0; +} + static int amd_pmf_profile_probe(void *drvdata, unsigned long *choices) { set_bit(PLATFORM_PROFILE_LOW_POWER, choices); @@ -398,6 +406,7 @@ static int amd_pmf_profile_probe(void *drvdata, unsigned long *choices) static const struct platform_profile_ops amd_pmf_profile_ops = { .probe = amd_pmf_profile_probe, + .hidden_choices = amd_pmf_hidden_choices, .profile_get = amd_pmf_profile_get, .profile_set = amd_pmf_profile_set, }; -- GitLab From 9a43102daf64dd0d172d8b39836dbc1dba4da1ea Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 11:01:55 -0600 Subject: [PATCH 1242/1585] platform/x86/amd: pmf: Add balanced-performance to hidden choices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acer's WMI driver uses balanced-performance but AMD-PMF doesn't. In case a machine binds with both drivers let amd-pmf use balanced-performance as well. Fixes: 688834743d67 ("ACPI: platform_profile: Allow multiple handlers") Suggested-by: Antheas Kapenekakis Signed-off-by: Mario Limonciello Tested-by: Antheas Kapenekakis Tested-by: Derek J. Clark Acked-by: Ilpo Järvinen Link: https://patch.msgid.link/20250228170155.2623386-4-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/amd/pmf/sps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index 3a0079c17cb17..d3083383f11fb 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -297,6 +297,7 @@ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf) switch (pmf->current_profile) { case PLATFORM_PROFILE_PERFORMANCE: + case PLATFORM_PROFILE_BALANCED_PERFORMANCE: mode = POWER_MODE_PERFORMANCE; break; case PLATFORM_PROFILE_BALANCED: @@ -391,6 +392,7 @@ static int amd_pmf_profile_set(struct device *dev, static int amd_pmf_hidden_choices(void *drvdata, unsigned long *choices) { set_bit(PLATFORM_PROFILE_QUIET, choices); + set_bit(PLATFORM_PROFILE_BALANCED_PERFORMANCE, choices); return 0; } -- GitLab From 723aa55c08c9d1e0734e39a815fd41272eac8269 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Fri, 14 Feb 2025 19:04:18 +0800 Subject: [PATCH 1243/1585] HID: i2c-hid: improve i2c_hid_get_report error message We have two places to print "failed to set a report to ...", use "get a report from" instead of "set a report to", it makes people who knows less about the module to know where the error happened. Before: i2c_hid_acpi i2c-FTSC1000:00: failed to set a report to device: -11 After: i2c_hid_acpi i2c-FTSC1000:00: failed to get a report from device: -11 Signed-off-by: Wentao Guan Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 75544448c2393..d3912e3f2f13a 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -290,7 +290,7 @@ static int i2c_hid_get_report(struct i2c_hid *ihid, ihid->rawbuf, recv_len + sizeof(__le16)); if (error) { dev_err(&ihid->client->dev, - "failed to set a report to device: %d\n", error); + "failed to get a report from device: %d\n", error); return error; } -- GitLab From 221cea1003d8a412e5ec64a58df7ab19b654f490 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Sun, 23 Feb 2025 22:36:30 -0700 Subject: [PATCH 1244/1585] HID: apple: disable Fn key handling on the Omoton KB066 Remove the fixup to make the Omoton KB066's F6 key F6 when not holding Fn. That was really just a hack to allow typing F6 in fnmode>0, and it didn't fix any of the other F keys that were likewise untypable in fnmode>0. Instead, because the Omoton's Fn key is entirely internal to the keyboard, completely disable Fn key translation when an Omoton is detected, which will prevent the hid-apple driver from interfering with the keyboard's built-in Fn key handling. All of the F keys, including F6, are then typable when Fn is held. The Omoton KB066 and the Apple A1255 both have HID product code 05ac:022c. The self-reported name of every original A1255 when they left the factory was "Apple Wireless Keyboard". By default, Mac OS changes the name to "'s keyboard" when pairing with the keyboard, but Mac OS allows the user to set the internal name of Apple keyboards to anything they like. The Omoton KB066's name, on the other hand, is not configurable: It is always "Bluetooth Keyboard". Because that name is so generic that a user might conceivably use the same name for a real Apple keyboard, detect Omoton keyboards based on both having that exact name and having HID product code 022c. Fixes: 819083cb6eed ("HID: apple: fix up the F6 key on the Omoton KB066 keyboard") Signed-off-by: Alex Henrie Reviewed-by: Aditya Garg Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 49812a76b7edd..d900dd05c335c 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -378,6 +378,12 @@ static bool apple_is_non_apple_keyboard(struct hid_device *hdev) return false; } +static bool apple_is_omoton_kb066(struct hid_device *hdev) +{ + return hdev->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI && + strcmp(hdev->name, "Bluetooth Keyboard") == 0; +} + static inline void apple_setup_key_translation(struct input_dev *input, const struct apple_key_translation *table) { @@ -546,9 +552,6 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, } } - if (usage->hid == 0xc0301) /* Omoton KB066 quirk */ - code = KEY_F6; - if (usage->code != code) { input_event_with_scancode(input, usage->type, code, usage->hid, value); @@ -728,7 +731,7 @@ static int apple_input_configured(struct hid_device *hdev, { struct apple_sc *asc = hid_get_drvdata(hdev); - if ((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) { + if (((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) || apple_is_omoton_kb066(hdev)) { hid_info(hdev, "Fn key not found (Apple Wireless Keyboard clone?), disabling Fn key handling\n"); asc->quirks &= ~APPLE_HAS_FN; } -- GitLab From 2ff5baa9b5275e3acafdf7f2089f74cccb2f38d1 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Mon, 24 Feb 2025 20:30:30 +0300 Subject: [PATCH 1245/1585] HID: appleir: Fix potential NULL dereference at raw event handle Syzkaller reports a NULL pointer dereference issue in input_event(). BUG: KASAN: null-ptr-deref in instrument_atomic_read include/linux/instrumented.h:68 [inline] BUG: KASAN: null-ptr-deref in _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline] BUG: KASAN: null-ptr-deref in is_event_supported drivers/input/input.c:67 [inline] BUG: KASAN: null-ptr-deref in input_event+0x42/0xa0 drivers/input/input.c:395 Read of size 8 at addr 0000000000000028 by task syz-executor199/2949 CPU: 0 UID: 0 PID: 2949 Comm: syz-executor199 Not tainted 6.13.0-rc4-syzkaller-00076-gf097a36ef88d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 kasan_report+0xd9/0x110 mm/kasan/report.c:602 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0xef/0x1a0 mm/kasan/generic.c:189 instrument_atomic_read include/linux/instrumented.h:68 [inline] _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline] is_event_supported drivers/input/input.c:67 [inline] input_event+0x42/0xa0 drivers/input/input.c:395 input_report_key include/linux/input.h:439 [inline] key_down drivers/hid/hid-appleir.c:159 [inline] appleir_raw_event+0x3e5/0x5e0 drivers/hid/hid-appleir.c:232 __hid_input_report.constprop.0+0x312/0x440 drivers/hid/hid-core.c:2111 hid_ctrl+0x49f/0x550 drivers/hid/usbhid/hid-core.c:484 __usb_hcd_giveback_urb+0x389/0x6e0 drivers/usb/core/hcd.c:1650 usb_hcd_giveback_urb+0x396/0x450 drivers/usb/core/hcd.c:1734 dummy_timer+0x17f7/0x3960 drivers/usb/gadget/udc/dummy_hcd.c:1993 __run_hrtimer kernel/time/hrtimer.c:1739 [inline] __hrtimer_run_queues+0x20a/0xae0 kernel/time/hrtimer.c:1803 hrtimer_run_softirq+0x17d/0x350 kernel/time/hrtimer.c:1820 handle_softirqs+0x206/0x8d0 kernel/softirq.c:561 __do_softirq kernel/softirq.c:595 [inline] invoke_softirq kernel/softirq.c:435 [inline] __irq_exit_rcu+0xfa/0x160 kernel/softirq.c:662 irq_exit_rcu+0x9/0x30 kernel/softirq.c:678 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1049 [inline] sysvec_apic_timer_interrupt+0x90/0xb0 arch/x86/kernel/apic/apic.c:1049 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 __mod_timer+0x8f6/0xdc0 kernel/time/timer.c:1185 add_timer+0x62/0x90 kernel/time/timer.c:1295 schedule_timeout+0x11f/0x280 kernel/time/sleep_timeout.c:98 usbhid_wait_io+0x1c7/0x380 drivers/hid/usbhid/hid-core.c:645 usbhid_init_reports+0x19f/0x390 drivers/hid/usbhid/hid-core.c:784 hiddev_ioctl+0x1133/0x15b0 drivers/hid/usbhid/hiddev.c:794 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl fs/ioctl.c:892 [inline] __x64_sys_ioctl+0x190/0x200 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f This happens due to the malformed report items sent by the emulated device which results in a report, that has no fields, being added to the report list. Due to this appleir_input_configured() is never called, hidinput_connect() fails which results in the HID_CLAIMED_INPUT flag is not being set. However, it does not make appleir_probe() fail and lets the event callback to be called without the associated input device. Thus, add a check for the HID_CLAIMED_INPUT flag and leave the event hook early if the driver didn't claim any input_dev for some reason. Moreover, some other hid drivers accessing input_dev in their event callbacks do have similar checks, too. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 9a4a5574ce42 ("HID: appleir: add support for Apple ir devices") Cc: stable@vger.kernel.org Signed-off-by: Daniil Dulov Signed-off-by: Jiri Kosina --- drivers/hid/hid-appleir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-appleir.c b/drivers/hid/hid-appleir.c index 8deded1857254..c45e5aa569d25 100644 --- a/drivers/hid/hid-appleir.c +++ b/drivers/hid/hid-appleir.c @@ -188,7 +188,7 @@ static int appleir_raw_event(struct hid_device *hid, struct hid_report *report, static const u8 flatbattery[] = { 0x25, 0x87, 0xe0 }; unsigned long flags; - if (len != 5) + if (len != 5 || !(hid->claimed & HID_CLAIMED_INPUT)) goto out; if (!memcmp(data, keydown, sizeof(keydown))) { -- GitLab From a6a4f4e9b8018806cca30049b59a1c3c8b513701 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 27 Feb 2025 22:33:57 +0000 Subject: [PATCH 1246/1585] HID: debug: Fix spelling mistake "Messanger" -> "Messenger" There is a spelling mistake in a literal string. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 541d682af15aa..8433306148d57 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3450,7 +3450,7 @@ static const char *keys[KEY_MAX + 1] = { [KEY_MACRO_RECORD_START] = "MacroRecordStart", [KEY_MACRO_RECORD_STOP] = "MacroRecordStop", [KEY_MARK_WAYPOINT] = "MarkWayPoint", [KEY_MEDIA_REPEAT] = "MediaRepeat", - [KEY_MEDIA_TOP_MENU] = "MediaTopMenu", [KEY_MESSENGER] = "Messanger", + [KEY_MEDIA_TOP_MENU] = "MediaTopMenu", [KEY_MESSENGER] = "Messenger", [KEY_NAV_CHART] = "NavChar", [KEY_NAV_INFO] = "NavInfo", [KEY_NEWS] = "News", [KEY_NEXT_ELEMENT] = "NextElement", [KEY_NEXT_FAVORITE] = "NextFavorite", [KEY_NOTIFICATION_CENTER] = "NotificationCenter", -- GitLab From e53fc232a65f7488ab75d03a5b95f06aaada7262 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Thu, 27 Feb 2025 15:41:33 -0800 Subject: [PATCH 1247/1585] HID: hid-steam: Fix use-after-free when detaching device When a hid-steam device is removed it must clean up the client_hdev used for intercepting hidraw access. This can lead to scheduling deferred work to reattach the input device. Though the cleanup cancels the deferred work, this was done before the client_hdev itself is cleaned up, so it gets rescheduled. This patch fixes the ordering to make sure the deferred work is properly canceled. Reported-by: syzbot+0154da2d403396b2bd59@syzkaller.appspotmail.com Fixes: 79504249d7e2 ("HID: hid-steam: Move hidraw input (un)registering to work") Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index c9e65e9088b31..10460b7bde1a2 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1327,11 +1327,11 @@ static void steam_remove(struct hid_device *hdev) return; } + hid_destroy_device(steam->client_hdev); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->work_connect); cancel_work_sync(&steam->rumble_work); cancel_work_sync(&steam->unregister_work); - hid_destroy_device(steam->client_hdev); steam->client_hdev = NULL; steam->client_opened = 0; if (steam->quirks & STEAM_QUIRK_WIRELESS) { -- GitLab From 0132c406705a466b95854ce1058f3d8354f90a42 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 28 Feb 2025 08:41:39 +0000 Subject: [PATCH 1248/1585] HID: intel-thc-hid: Fix spelling mistake "intput" -> "input" There is a spelling mistake in a dev_err_once message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c index 7373238ceb18b..918050af73e55 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c @@ -107,7 +107,7 @@ static int quickspi_get_device_descriptor(struct quickspi_device *qsdev) return 0; } - dev_err_once(qsdev->dev, "Unexpected intput report type: %d\n", input_rep_type); + dev_err_once(qsdev->dev, "Unexpected input report type: %d\n", input_rep_type); return -EINVAL; } -- GitLab From db52926fb0be40e1d588a346df73f5ea3a34a4c6 Mon Sep 17 00:00:00 2001 From: Even Xu Date: Tue, 4 Mar 2025 11:22:55 +0800 Subject: [PATCH 1249/1585] HID: Intel-thc-hid: Intel-quickspi: Correct device state after S4 During S4 retore flow, quickspi device was resetted by driver and state was changed to RESETTED. It is needed to be change to ENABLED state after S4 re-initialization finished, otherwise, device will run in wrong state and HID input data will be dropped. Signed-off-by: Even Xu Fixes: 6912aaf3fd24 ("HID: intel-thc-hid: intel-quickspi: Add PM implementation") Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c index 4641e818dfa44..6b2c7620be2b1 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c @@ -909,6 +909,8 @@ static int quickspi_restore(struct device *device) thc_change_ltr_mode(qsdev->thc_hw, THC_LTR_MODE_ACTIVE); + qsdev->state = QUICKSPI_ENABLED; + return 0; } -- GitLab From 5eb3dc1396aa7e315486b24df80df782912334b7 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Thu, 27 Feb 2025 11:33:40 +0100 Subject: [PATCH 1250/1585] net: ipa: Fix v4.7 resource group names In the downstream IPA driver there's only one group defined for source and destination, and the destination group doesn't have a _DPL suffix. Fixes: b310de784bac ("net: ipa: add IPA v4.7 support") Signed-off-by: Luca Weiss Reviewed-by: Alex Elder Link: https://patch.msgid.link/20250227-ipa-v4-7-fixes-v1-1-a88dd8249d8a@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/data/ipa_data-v4.7.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index c8c23d9be961b..7e315779e6648 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -28,12 +28,10 @@ enum ipa_resource_type { enum ipa_rsrc_group_id { /* Source resource group identifiers */ IPA_RSRC_GROUP_SRC_UL_DL = 0, - IPA_RSRC_GROUP_SRC_UC_RX_Q, IPA_RSRC_GROUP_SRC_COUNT, /* Last in set; not a source group */ /* Destination resource group identifiers */ - IPA_RSRC_GROUP_DST_UL_DL_DPL = 0, - IPA_RSRC_GROUP_DST_UNUSED_1, + IPA_RSRC_GROUP_DST_UL_DL = 0, IPA_RSRC_GROUP_DST_COUNT, /* Last; not a destination group */ }; @@ -81,7 +79,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL, .aggregation = true, .status_enable = true, .rx = { @@ -128,7 +126,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL, .qmap = true, .aggregation = true, .rx = { @@ -197,12 +195,12 @@ static const struct ipa_resource ipa_resource_src[] = { /* Destination resource configuration data for an SoC having IPA v4.7 */ static const struct ipa_resource ipa_resource_dst[] = { [IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = { - .limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = { + .limits[IPA_RSRC_GROUP_DST_UL_DL] = { .min = 7, .max = 7, }, }, [IPA_RESOURCE_TYPE_DST_DPS_DMARS] = { - .limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = { + .limits[IPA_RSRC_GROUP_DST_UL_DL] = { .min = 2, .max = 2, }, }, -- GitLab From 6a2843aaf551d87beb92d774f7d5b8ae007fe774 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Thu, 27 Feb 2025 11:33:41 +0100 Subject: [PATCH 1251/1585] net: ipa: Fix QSB data for v4.7 As per downstream reference, max_writes should be 12 and max_reads should be 13. Fixes: b310de784bac ("net: ipa: add IPA v4.7 support") Signed-off-by: Luca Weiss Reviewed-by: Alex Elder Link: https://patch.msgid.link/20250227-ipa-v4-7-fixes-v1-2-a88dd8249d8a@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/data/ipa_data-v4.7.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index 7e315779e6648..e63dcf8d45567 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -38,8 +38,8 @@ enum ipa_rsrc_group_id { /* QSB configuration data for an SoC having IPA v4.7 */ static const struct ipa_qsb_data ipa_qsb_data[] = { [IPA_QSB_MASTER_DDR] = { - .max_writes = 8, - .max_reads = 0, /* no limit (hardware max) */ + .max_writes = 12, + .max_reads = 13, .max_reads_beats = 120, }, }; -- GitLab From 934e69669e32eb653234898424ae007bae2f636e Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Thu, 27 Feb 2025 11:33:42 +0100 Subject: [PATCH 1252/1585] net: ipa: Enable checksum for IPA_ENDPOINT_AP_MODEM_{RX,TX} for v4.7 Enable the checksum option for these two endpoints in order to allow mobile data to actually work. Without this, no packets seem to make it through the IPA. Fixes: b310de784bac ("net: ipa: add IPA v4.7 support") Signed-off-by: Luca Weiss Reviewed-by: Alex Elder Link: https://patch.msgid.link/20250227-ipa-v4-7-fixes-v1-3-a88dd8249d8a@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/data/ipa_data-v4.7.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index e63dcf8d45567..41f212209993f 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -104,6 +104,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, .config = { .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, + .checksum = true, .qmap = true, .status_enable = true, .tx = { @@ -127,6 +128,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .config = { .resource_group = IPA_RSRC_GROUP_DST_UL_DL, + .checksum = true, .qmap = true, .aggregation = true, .rx = { -- GitLab From 4c2d14c40a68678d885eab4008a0129646805bae Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Fri, 28 Feb 2025 22:14:08 +0800 Subject: [PATCH 1253/1585] ppp: Fix KMSAN uninit-value warning with bpf Syzbot caught an "KMSAN: uninit-value" warning [1], which is caused by the ppp driver not initializing a 2-byte header when using socket filter. The following code can generate a PPP filter BPF program: ''' struct bpf_program fp; pcap_t *handle; handle = pcap_open_dead(DLT_PPP_PPPD, 65535); pcap_compile(handle, &fp, "ip and outbound", 0, 0); bpf_dump(&fp, 1); ''' Its output is: ''' (000) ldh [2] (001) jeq #0x21 jt 2 jf 5 (002) ldb [0] (003) jeq #0x1 jt 4 jf 5 (004) ret #65535 (005) ret #0 ''' Wen can find similar code at the following link: https://github.com/ppp-project/ppp/blob/master/pppd/options.c#L1680 The maintainer of this code repository is also the original maintainer of the ppp driver. As you can see the BPF program skips 2 bytes of data and then reads the 'Protocol' field to determine if it's an IP packet. Then it read the first byte of the first 2 bytes to determine the direction. The issue is that only the first byte indicating direction is initialized in current ppp driver code while the second byte is not initialized. For normal BPF programs generated by libpcap, uninitialized data won't be used, so it's not a problem. However, for carefully crafted BPF programs, such as those generated by syzkaller [2], which start reading from offset 0, the uninitialized data will be used and caught by KMSAN. [1] https://syzkaller.appspot.com/bug?extid=853242d9c9917165d791 [2] https://syzkaller.appspot.com/text?tag=ReproC&x=11994913980000 Cc: Paul Mackerras Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+853242d9c9917165d791@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/000000000000dea025060d6bc3bc@google.com/ Signed-off-by: Jiayuan Chen Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250228141408.393864-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 4583e15ad03a0..1420c4efa48e6 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -72,6 +72,17 @@ #define PPP_PROTO_LEN 2 #define PPP_LCP_HDRLEN 4 +/* The filter instructions generated by libpcap are constructed + * assuming a four-byte PPP header on each packet, where the last + * 2 bytes are the protocol field defined in the RFC and the first + * byte of the first 2 bytes indicates the direction. + * The second byte is currently unused, but we still need to initialize + * it to prevent crafted BPF programs from reading them which would + * cause reading of uninitialized data. + */ +#define PPP_FILTER_OUTBOUND_TAG 0x0100 +#define PPP_FILTER_INBOUND_TAG 0x0000 + /* * An instance of /dev/ppp can be associated with either a ppp * interface unit or a ppp channel. In both cases, file->private_data @@ -1762,10 +1773,10 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) if (proto < 0x8000) { #ifdef CONFIG_PPP_FILTER - /* check if we should pass this packet */ - /* the filter instructions are constructed assuming - a four-byte PPP header on each packet */ - *(u8 *)skb_push(skb, 2) = 1; + /* check if the packet passes the pass and active filters. + * See comment for PPP_FILTER_OUTBOUND_TAG above. + */ + *(__be16 *)skb_push(skb, 2) = htons(PPP_FILTER_OUTBOUND_TAG); if (ppp->pass_filter && bpf_prog_run(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) @@ -2482,14 +2493,13 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) /* network protocol frame - give it to the kernel */ #ifdef CONFIG_PPP_FILTER - /* check if the packet passes the pass and active filters */ - /* the filter instructions are constructed assuming - a four-byte PPP header on each packet */ if (ppp->pass_filter || ppp->active_filter) { if (skb_unclone(skb, GFP_ATOMIC)) goto err; - - *(u8 *)skb_push(skb, 2) = 0; + /* Check if the packet passes the pass and active filters. + * See comment for PPP_FILTER_INBOUND_TAG above. + */ + *(__be16 *)skb_push(skb, 2) = htons(PPP_FILTER_INBOUND_TAG); if (ppp->pass_filter && bpf_prog_run(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) -- GitLab From 637399bf7e77797811adf340090b561a8f9d1213 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Sat, 1 Mar 2025 15:11:13 +0100 Subject: [PATCH 1254/1585] net: ethtool: netlink: Allow NULL nlattrs when getting a phy_device ethnl_req_get_phydev() is used to lookup a phy_device, in the case an ethtool netlink command targets a specific phydev within a netdev's topology. It takes as a parameter a const struct nlattr *header that's used for error handling : if (!phydev) { NL_SET_ERR_MSG_ATTR(extack, header, "no phy matching phyindex"); return ERR_PTR(-ENODEV); } In the notify path after a ->set operation however, there's no request attributes available. The typical callsite for the above function looks like: phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_XXX_HEADER], info->extack); So, when tb is NULL (such as in the ethnl notify path), we have a nice crash. It turns out that there's only the PLCA command that is in that case, as the other phydev-specific commands don't have a notification. This commit fixes the crash by passing the cmd index and the nlattr array separately, allowing NULL-checking it directly inside the helper. Fixes: c15e065b46dc ("net: ethtool: Allow passing a phy index for some commands") Signed-off-by: Maxime Chevallier Reviewed-by: Kory Maincent Reported-by: Parthiban Veerasooran Link: https://patch.msgid.link/20250301141114.97204-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/cabletest.c | 8 ++++---- net/ethtool/linkstate.c | 2 +- net/ethtool/netlink.c | 6 +++--- net/ethtool/netlink.h | 5 +++-- net/ethtool/phy.c | 2 +- net/ethtool/plca.c | 6 +++--- net/ethtool/pse-pd.c | 4 ++-- net/ethtool/stats.c | 2 +- net/ethtool/strset.c | 2 +- 9 files changed, 19 insertions(+), 18 deletions(-) diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index f22051f33868a..84096f6b0236e 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -72,8 +72,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); - phydev = ethnl_req_get_phydev(&req_info, - tb[ETHTOOL_A_CABLE_TEST_HEADER], + phydev = ethnl_req_get_phydev(&req_info, tb, + ETHTOOL_A_CABLE_TEST_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; @@ -339,8 +339,8 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) goto out_dev_put; rtnl_lock(); - phydev = ethnl_req_get_phydev(&req_info, - tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER], + phydev = ethnl_req_get_phydev(&req_info, tb, + ETHTOOL_A_CABLE_TEST_TDR_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index af19e1bed303f..05a5f72c99fab 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -103,7 +103,7 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_LINKSTATE_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER, info->extack); if (IS_ERR(phydev)) { ret = PTR_ERR(phydev); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index b4c45207fa32e..734849a573691 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -211,7 +211,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, } struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, - const struct nlattr *header, + struct nlattr **tb, unsigned int header, struct netlink_ext_ack *extack) { struct phy_device *phydev; @@ -225,8 +225,8 @@ struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, return req_info->dev->phydev; phydev = phy_link_topo_get_phy(req_info->dev, req_info->phy_index); - if (!phydev) { - NL_SET_ERR_MSG_ATTR(extack, header, + if (!phydev && tb) { + NL_SET_ERR_MSG_ATTR(extack, tb[header], "no phy matching phyindex"); return ERR_PTR(-ENODEV); } diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index ff69ca0715dea..ec6ab5443a6f2 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -275,7 +275,8 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) * ethnl_req_get_phydev() - Gets the phy_device targeted by this request, * if any. Must be called under rntl_lock(). * @req_info: The ethnl request to get the phy from. - * @header: The netlink header, used for error reporting. + * @tb: The netlink attributes array, for error reporting. + * @header: The netlink header index, used for error reporting. * @extack: The netlink extended ACK, for error reporting. * * The caller must hold RTNL, until it's done interacting with the returned @@ -289,7 +290,7 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) * is returned. */ struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, - const struct nlattr *header, + struct nlattr **tb, unsigned int header, struct netlink_ext_ack *extack); /** diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index ed8f690f6bac8..e067cc234419d 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -125,7 +125,7 @@ static int ethnl_phy_parse_request(struct ethnl_req_info *req_base, struct phy_req_info *req_info = PHY_REQINFO(req_base); struct phy_device *phydev; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PHY_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PHY_HEADER, extack); if (!phydev) return 0; diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c index d95d92f173a6d..e1f7820a6158f 100644 --- a/net/ethtool/plca.c +++ b/net/ethtool/plca.c @@ -62,7 +62,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { @@ -152,7 +152,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) bool mod = false; int ret; - phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) @@ -211,7 +211,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2819e2ba6be2d..4f6b99eab2a6c 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -64,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PSE_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PSE_HEADER, info->extack); if (IS_ERR(phydev)) return -ENODEV; @@ -261,7 +261,7 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER], + phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PSE_HEADER, info->extack); ret = ethnl_set_pse_validate(phydev, info); if (ret) diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index 038a2558f0520..3ca8eb2a3b314 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -138,7 +138,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_STATS_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_STATS_HEADER, info->extack); if (IS_ERR(phydev)) return PTR_ERR(phydev); diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 6b76c05caba4d..f6a67109beda1 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -309,7 +309,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base, return 0; } - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_HEADER_FLAGS], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS, info->extack); /* phydev can be NULL, check for errors only */ -- GitLab From 022bfe24aad8937705704ff2e414b100cf0f2e1a Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Mon, 3 Mar 2025 18:10:13 +0100 Subject: [PATCH 1255/1585] mptcp: fix 'scheduling while atomic' in mptcp_pm_nl_append_new_local_addr If multiple connection requests attempt to create an implicit mptcp endpoint in parallel, more than one caller may end up in mptcp_pm_nl_append_new_local_addr because none found the address in local_addr_list during their call to mptcp_pm_nl_get_local_id. In this case, the concurrent new_local_addr calls may delete the address entry created by the previous caller. These deletes use synchronize_rcu, but this is not permitted in some of the contexts where this function may be called. During packet recv, the caller may be in a rcu read critical section and have preemption disabled. An example stack: BUG: scheduling while atomic: swapper/2/0/0x00000302 Call Trace: dump_stack_lvl (lib/dump_stack.c:117 (discriminator 1)) dump_stack (lib/dump_stack.c:124) __schedule_bug (kernel/sched/core.c:5943) schedule_debug.constprop.0 (arch/x86/include/asm/preempt.h:33 kernel/sched/core.c:5970) __schedule (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:207 kernel/sched/features.h:29 kernel/sched/core.c:6621) schedule (arch/x86/include/asm/preempt.h:84 kernel/sched/core.c:6804 kernel/sched/core.c:6818) schedule_timeout (kernel/time/timer.c:2160) wait_for_completion (kernel/sched/completion.c:96 kernel/sched/completion.c:116 kernel/sched/completion.c:127 kernel/sched/completion.c:148) __wait_rcu_gp (include/linux/rcupdate.h:311 kernel/rcu/update.c:444) synchronize_rcu (kernel/rcu/tree.c:3609) mptcp_pm_nl_append_new_local_addr (net/mptcp/pm_netlink.c:966 net/mptcp/pm_netlink.c:1061) mptcp_pm_nl_get_local_id (net/mptcp/pm_netlink.c:1164) mptcp_pm_get_local_id (net/mptcp/pm.c:420) subflow_check_req (net/mptcp/subflow.c:98 net/mptcp/subflow.c:213) subflow_v4_route_req (net/mptcp/subflow.c:305) tcp_conn_request (net/ipv4/tcp_input.c:7216) subflow_v4_conn_request (net/mptcp/subflow.c:651) tcp_rcv_state_process (net/ipv4/tcp_input.c:6709) tcp_v4_do_rcv (net/ipv4/tcp_ipv4.c:1934) tcp_v4_rcv (net/ipv4/tcp_ipv4.c:2334) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205 (discriminator 1)) ip_local_deliver_finish (include/linux/rcupdate.h:813 net/ipv4/ip_input.c:234) ip_local_deliver (include/linux/netfilter.h:314 include/linux/netfilter.h:308 net/ipv4/ip_input.c:254) ip_sublist_rcv_finish (include/net/dst.h:461 net/ipv4/ip_input.c:580) ip_sublist_rcv (net/ipv4/ip_input.c:640) ip_list_rcv (net/ipv4/ip_input.c:675) __netif_receive_skb_list_core (net/core/dev.c:5583 net/core/dev.c:5631) netif_receive_skb_list_internal (net/core/dev.c:5685 net/core/dev.c:5774) napi_complete_done (include/linux/list.h:37 include/net/gro.h:449 include/net/gro.h:444 net/core/dev.c:6114) igb_poll (drivers/net/ethernet/intel/igb/igb_main.c:8244) igb __napi_poll (net/core/dev.c:6582) net_rx_action (net/core/dev.c:6653 net/core/dev.c:6787) handle_softirqs (kernel/softirq.c:553) __irq_exit_rcu (kernel/softirq.c:588 kernel/softirq.c:427 kernel/softirq.c:636) irq_exit_rcu (kernel/softirq.c:651) common_interrupt (arch/x86/kernel/irq.c:247 (discriminator 14)) This problem seems particularly prevalent if the user advertises an endpoint that has a different external vs internal address. In the case where the external address is advertised and multiple connections already exist, multiple subflow SYNs arrive in parallel which tends to trigger the race during creation of the first local_addr_list entries which have the internal address instead. Fix by skipping the replacement of an existing implicit local address if called via mptcp_pm_nl_get_local_id. Fixes: d045b9eb95a9 ("mptcp: introduce implicit endpoints") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Signed-off-by: Krister Johansen Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250303-net-mptcp-fix-sched-while-atomic-v1-1-f6a216c5a74c@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c0e47f4f7b1aa..7868207c4e9d9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -977,7 +977,7 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, struct mptcp_pm_addr_entry *entry, - bool needs_id) + bool needs_id, bool replace) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -1017,6 +1017,17 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, if (entry->addr.id) goto out; + /* allow callers that only need to look up the local + * addr's id to skip replacement. This allows them to + * avoid calling synchronize_rcu in the packet recv + * path. + */ + if (!replace) { + kfree(entry); + ret = cur->addr.id; + goto out; + } + pernet->addrs--; entry->addr.id = cur->addr.id; list_del_rcu(&cur->list); @@ -1165,7 +1176,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); if (ret < 0) kfree(entry); @@ -1433,7 +1444,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) } } ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, - !mptcp_pm_has_addr_attr_id(attr, info)); + !mptcp_pm_has_addr_attr_id(attr, info), + true); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; -- GitLab From b33a534610067ade2bdaf2052900aaad99701353 Mon Sep 17 00:00:00 2001 From: Oscar Maes Date: Mon, 3 Mar 2025 16:56:19 +0100 Subject: [PATCH 1256/1585] vlan: enforce underlying device type Currently, VLAN devices can be created on top of non-ethernet devices. Besides the fact that it doesn't make much sense, this also causes a bug which leaks the address of a kernel function to usermode. When creating a VLAN device, we initialize GARP (garp_init_applicant) and MRP (mrp_init_applicant) for the underlying device. As part of the initialization process, we add the multicast address of each applicant to the underlying device, by calling dev_mc_add. __dev_mc_add uses dev->addr_len to determine the length of the new multicast address. This causes an out-of-bounds read if dev->addr_len is greater than 6, since the multicast addresses provided by GARP and MRP are only 6 bytes long. This behaviour can be reproduced using the following commands: ip tunnel add gretest mode ip6gre local ::1 remote ::2 dev lo ip l set up dev gretest ip link add link gretest name vlantest type vlan id 100 Then, the following command will display the address of garp_pdu_rcv: ip maddr show | grep 01:80:c2:00:00:21 Fix the bug by enforcing the type of the underlying device during VLAN device initialization. Fixes: 22bedad3ce11 ("net: convert multicast list to list_head") Reported-by: syzbot+91161fe81857b396c8a0@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/000000000000ca9a81061a01ec20@google.com/ Signed-off-by: Oscar Maes Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250303155619.8918-1-oscmaes92@gmail.com Signed-off-by: Jakub Kicinski --- net/8021q/vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index e45187b882206..41be38264493d 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -131,7 +131,8 @@ int vlan_check_real_dev(struct net_device *real_dev, { const char *name = real_dev->name; - if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { + if (real_dev->features & NETIF_F_VLAN_CHALLENGED || + real_dev->type != ARPHRD_ETHER) { pr_info("VLANs not supported on %s\n", name); NL_SET_ERR_MSG_MOD(extack, "VLANs not supported on device"); return -EOPNOTSUPP; -- GitLab From 0d3e0dfd68fb9e6b0ec865be9f3377cc3ff55733 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 5 Mar 2025 07:00:05 +0200 Subject: [PATCH 1257/1585] x86/sgx: Fix size overflows in sgx_encl_create() The total size calculated for EPC can overflow u64 given the added up page for SECS. Further, the total size calculated for shmem can overflow even when the EPC size stays within limits of u64, given that it adds the extra space for 128 byte PCMD structures (one for each page). Address this by pre-evaluating the micro-architectural requirement of SGX: the address space size must be power of two. This is eventually checked up by ECREATE but the pre-check has the additional benefit of making sure that there is some space for additional data. Fixes: 888d24911787 ("x86/sgx: Add SGX_IOC_ENCLAVE_CREATE") Reported-by: Dan Carpenter Signed-off-by: Jarkko Sakkinen Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Cc: Peter Zijlstra Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20250305050006.43896-1-jarkko@kernel.org Closes: https://lore.kernel.org/linux-sgx/c87e01a0-e7dd-4749-a348-0980d3444f04@stanley.mountain/ --- arch/x86/kernel/cpu/sgx/ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index b65ab214bdf57..776a20172867e 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -64,6 +64,13 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) struct file *backing; long ret; + /* + * ECREATE would detect this too, but checking here also ensures + * that the 'encl_size' calculations below can never overflow. + */ + if (!is_power_of_2(secs->size)) + return -EINVAL; + va_page = sgx_encl_grow(encl, true); if (IS_ERR(va_page)) return PTR_ERR(va_page); -- GitLab From ca0dedaff92307591f66c9206933fbdfe87add10 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 5 Mar 2025 13:54:34 +0800 Subject: [PATCH 1258/1585] ALSA: hda/realtek: update ALC222 depop optimize Add ALC222 its own depop functions for alc_init and alc_shutup. [note: this fixes pop noise issues on the models with two headphone jacks -- tiwai ] Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 76 +++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 697a38e41e166..4ca457e7ca9dd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3843,6 +3843,79 @@ static void alc225_shutup(struct hda_codec *codec) } } +static void alc222_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); + bool hp1_pin_sense, hp2_pin_sense; + + if (!hp_pin) + return; + + msleep(30); + + hp1_pin_sense = snd_hda_jack_detect(codec, hp_pin); + hp2_pin_sense = snd_hda_jack_detect(codec, 0x14); + + if (hp1_pin_sense || hp2_pin_sense) { + msleep(2); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + msleep(75); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); + + msleep(75); + } +} + +static void alc222_shutup(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); + bool hp1_pin_sense, hp2_pin_sense; + + if (!hp_pin) + hp_pin = 0x21; + + hp1_pin_sense = snd_hda_jack_detect(codec, hp_pin); + hp2_pin_sense = snd_hda_jack_detect(codec, 0x14); + + if (hp1_pin_sense || hp2_pin_sense) { + msleep(2); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + msleep(75); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x14, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + msleep(75); + } + alc_auto_setup_eapd(codec, false); + alc_shutup_pins(codec); +} + static void alc_default_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -11907,8 +11980,11 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC300; spec->gen.mixer_nid = 0; /* no loopback on ALC300 */ break; + case 0x10ec0222: case 0x10ec0623: spec->codec_variant = ALC269_TYPE_ALC623; + spec->shutup = alc222_shutup; + spec->init_hook = alc222_init; break; case 0x10ec0700: case 0x10ec0701: -- GitLab From 1ee5aa765c22a0577ec552d460bf2035300b4b51 Mon Sep 17 00:00:00 2001 From: Hoku Ishibe Date: Sun, 23 Feb 2025 21:05:17 -0500 Subject: [PATCH 1259/1585] ALSA: hda: intel: Add Dell ALC3271 to power_save denylist Dell XPS 13 7390 with the Realtek ALC3271 codec experiences persistent humming noise when the power_save mode is enabled. This issue occurs when the codec enters power saving mode, leading to unwanted noise from the speakers. This patch adds the affected model (PCI ID 0x1028:0x0962) to the power_save denylist to ensure power_save is disabled by default, preventing power-off related noise issues. Steps to Reproduce 1. Boot the system with `snd_hda_intel` loaded. 2. Verify that `power_save` mode is enabled: ```sh cat /sys/module/snd_hda_intel/parameters/power_save ```` output: 10 (default power save timeout) 3. Wait for the power save timeout 4. Observe a persistent humming noise from the speakers 5. Disable `power_save` manually: ```sh echo 0 | sudo tee /sys/module/snd_hda_intel/parameters/power_save ```` 6. Confirm that the noise disappears immediately. This issue has been observed on my system, and this patch successfully eliminates the unwanted noise. If other users experience similar issues, additional reports would be helpful. Signed-off-by: Hoku Ishibe Cc: Link: https://patch.msgid.link/20250224020517.51035-1-me@hokuishi.be Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 67540e0373099..e67c22c59f02b 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2232,6 +2232,8 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x1631, 0xe017, "Packard Bell NEC IMEDIA 5204", 0), /* KONTRON SinglePC may cause a stall at runtime resume */ SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0), + /* Dell ALC3271 */ + SND_PCI_QUIRK(0x1028, 0x0962, "Dell ALC3271", 0), {} }; -- GitLab From 50dc696c3a482ea35bd0691f728d47e40b668483 Mon Sep 17 00:00:00 2001 From: Aiden Ma Date: Tue, 4 Mar 2025 19:54:01 +0800 Subject: [PATCH 1260/1585] doc: correcting two prefix errors in idmappings.rst Add the 'k' prefix to id 21000. And id `u1000` in the third idmapping should be mapped to `k31000`, not `u31000`. Signed-off-by: Aiden Ma Link: https://lore.kernel.org/r/tencent_4E7B1F143E8051530C21FCADF4E014DCBB06@qq.com Signed-off-by: Christian Brauner --- Documentation/filesystems/idmappings.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst index 77930c77fcfe6..2a206129f8284 100644 --- a/Documentation/filesystems/idmappings.rst +++ b/Documentation/filesystems/idmappings.rst @@ -63,8 +63,8 @@ what id ``k11000`` corresponds to in the second or third idmapping. The straightforward algorithm to use is to apply the inverse of the first idmapping, mapping ``k11000`` up to ``u1000``. Afterwards, we can map ``u1000`` down using either the second idmapping mapping or third idmapping mapping. The second -idmapping would map ``u1000`` down to ``21000``. The third idmapping would map -``u1000`` down to ``u31000``. +idmapping would map ``u1000`` down to ``k21000``. The third idmapping would map +``u1000`` down to ``k31000``. If we were given the same task for the following three idmappings:: -- GitLab From 9ba93cb8212d62bccd8b41b8adb6656abf37280a Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Wed, 5 Mar 2025 10:28:41 +0530 Subject: [PATCH 1261/1585] platform/x86/amd/pmf: Propagate PMF-TA return codes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the amd_pmf_invoke_cmd_init() function within the PMF driver ensure that the actual result from the PMF-TA is returned rather than a generic EIO. This change allows for proper handling of errors originating from the PMF-TA. Reviewed-by: Mario Limonciello Co-developed-by: Patil Rajesh Reddy Signed-off-by: Patil Rajesh Reddy Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250305045842.4117767-1-Shyam-sundar.S-k@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/tee-if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 8c88769ea1d87..b404764550c4c 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -323,7 +323,7 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) } else { dev_err(dev->dev, "ta invoke cmd init failed err: %x\n", res); dev->smart_pc_enabled = false; - return -EIO; + return res; } return 0; -- GitLab From 376a8c2a144397d9cf2a67d403dd64f4a7ff9104 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Wed, 5 Mar 2025 10:28:42 +0530 Subject: [PATCH 1262/1585] platform/x86/amd/pmf: Update PMF Driver for Compatibility with new PMF-TA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PMF driver allocates a shared memory buffer using tee_shm_alloc_kernel_buf() for communication with the PMF-TA. The latest PMF-TA version introduces new structures with OEM debug information and additional policy input conditions for evaluating the policy binary. Consequently, the shared memory size must be increased to ensure compatibility between the PMF driver and the updated PMF-TA. To do so, introduce the new PMF-TA UUID and update the PMF shared memory configuration to ensure compatibility with the latest PMF-TA version. Additionally, export the TA UUID. These updates will result in modifications to the prototypes of amd_pmf_tee_init() and amd_pmf_ta_open_session(). Link: https://lore.kernel.org/all/55ac865f-b1c7-fa81-51c4-d211c7963e7e@linux.intel.com/ Reviewed-by: Mario Limonciello Co-developed-by: Patil Rajesh Reddy Signed-off-by: Patil Rajesh Reddy Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250305045842.4117767-2-Shyam-sundar.S-k@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/pmf.h | 5 ++- drivers/platform/x86/amd/pmf/tee-if.c | 50 +++++++++++++++++++-------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h index 41b2b91b8fdc6..e6bdee68ccf34 100644 --- a/drivers/platform/x86/amd/pmf/pmf.h +++ b/drivers/platform/x86/amd/pmf/pmf.h @@ -106,9 +106,12 @@ struct cookie_header { #define PMF_TA_IF_VERSION_MAJOR 1 #define TA_PMF_ACTION_MAX 32 #define TA_PMF_UNDO_MAX 8 -#define TA_OUTPUT_RESERVED_MEM 906 +#define TA_OUTPUT_RESERVED_MEM 922 #define MAX_OPERATION_PARAMS 4 +#define TA_ERROR_CRYPTO_INVALID_PARAM 0x20002 +#define TA_ERROR_CRYPTO_BIN_TOO_LARGE 0x2000d + #define PMF_IF_V1 1 #define PMF_IF_V2 2 diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index b404764550c4c..ceaff1ebb7b93 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -27,8 +27,11 @@ module_param(pb_side_load, bool, 0444); MODULE_PARM_DESC(pb_side_load, "Sideload policy binaries debug policy failures"); #endif -static const uuid_t amd_pmf_ta_uuid = UUID_INIT(0x6fd93b77, 0x3fb8, 0x524d, - 0xb1, 0x2d, 0xc5, 0x29, 0xb1, 0x3d, 0x85, 0x43); +static const uuid_t amd_pmf_ta_uuid[] = { UUID_INIT(0xd9b39bf2, 0x66bd, 0x4154, 0xaf, 0xb8, 0x8a, + 0xcc, 0x2b, 0x2b, 0x60, 0xd6), + UUID_INIT(0x6fd93b77, 0x3fb8, 0x524d, 0xb1, 0x2d, 0xc5, + 0x29, 0xb1, 0x3d, 0x85, 0x43), + }; static const char *amd_pmf_uevent_as_str(unsigned int state) { @@ -321,7 +324,7 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) */ schedule_delayed_work(&dev->pb_work, msecs_to_jiffies(pb_actions_ms * 3)); } else { - dev_err(dev->dev, "ta invoke cmd init failed err: %x\n", res); + dev_dbg(dev->dev, "ta invoke cmd init failed err: %x\n", res); dev->smart_pc_enabled = false; return res; } @@ -390,12 +393,12 @@ static int amd_pmf_amdtee_ta_match(struct tee_ioctl_version_data *ver, const voi return ver->impl_id == TEE_IMPL_ID_AMDTEE; } -static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id) +static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id, const uuid_t *uuid) { struct tee_ioctl_open_session_arg sess_arg = {}; int rc; - export_uuid(sess_arg.uuid, &amd_pmf_ta_uuid); + export_uuid(sess_arg.uuid, uuid); sess_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC; sess_arg.num_params = 0; @@ -434,7 +437,7 @@ static int amd_pmf_register_input_device(struct amd_pmf_dev *dev) return 0; } -static int amd_pmf_tee_init(struct amd_pmf_dev *dev) +static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid) { u32 size; int ret; @@ -445,7 +448,7 @@ static int amd_pmf_tee_init(struct amd_pmf_dev *dev) return PTR_ERR(dev->tee_ctx); } - ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id); + ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id, uuid); if (ret) { dev_err(dev->dev, "Failed to open TA session (%d)\n", ret); ret = -EINVAL; @@ -489,7 +492,8 @@ static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev) int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) { - int ret; + bool status; + int ret, i; ret = apmf_check_smart_pc(dev); if (ret) { @@ -502,10 +506,6 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) return -ENODEV; } - ret = amd_pmf_tee_init(dev); - if (ret) - return ret; - INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd); ret = amd_pmf_set_dram_addr(dev, true); @@ -534,8 +534,30 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) goto error; } - ret = amd_pmf_start_policy_engine(dev); - if (ret) + for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) { + ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]); + if (ret) + return ret; + + ret = amd_pmf_start_policy_engine(dev); + switch (ret) { + case TA_PMF_TYPE_SUCCESS: + status = true; + break; + case TA_ERROR_CRYPTO_INVALID_PARAM: + case TA_ERROR_CRYPTO_BIN_TOO_LARGE: + amd_pmf_tee_deinit(dev); + status = false; + break; + default: + goto error; + } + + if (status) + break; + } + + if (!status && !pb_side_load) goto error; if (pb_side_load) -- GitLab From 12f65d1203507f7db3ba59930fe29a3b8eee9945 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Mon, 24 Feb 2025 23:31:26 +0900 Subject: [PATCH 1263/1585] gpio: aggregator: protect driver attr handlers against module unload Both new_device_store and delete_device_store touch module global resources (e.g. gpio_aggregator_lock). To prevent race conditions with module unload, a reference needs to be held. Add try_module_get() in these handlers. For new_device_store, this eliminates what appears to be the most dangerous scenario: if an id is allocated from gpio_aggregator_idr but platform_device_register has not yet been called or completed, a concurrent module unload could fail to unregister/delete the device, leaving behind a dangling platform device/GPIO forwarder. This can result in various issues. The following simple reproducer demonstrates these problems: #!/bin/bash while :; do # note: whether 'gpiochip0 0' exists or not does not matter. echo 'gpiochip0 0' > /sys/bus/platform/drivers/gpio-aggregator/new_device done & while :; do modprobe gpio-aggregator modprobe -r gpio-aggregator done & wait Starting with the following warning, several kinds of warnings will appear and the system may become unstable: ------------[ cut here ]------------ list_del corruption, ffff888103e2e980->next is LIST_POISON1 (dead000000000100) WARNING: CPU: 1 PID: 1327 at lib/list_debug.c:56 __list_del_entry_valid_or_report+0xa3/0x120 [...] RIP: 0010:__list_del_entry_valid_or_report+0xa3/0x120 [...] Call Trace: ? __list_del_entry_valid_or_report+0xa3/0x120 ? __warn.cold+0x93/0xf2 ? __list_del_entry_valid_or_report+0xa3/0x120 ? report_bug+0xe6/0x170 ? __irq_work_queue_local+0x39/0xe0 ? handle_bug+0x58/0x90 ? exc_invalid_op+0x13/0x60 ? asm_exc_invalid_op+0x16/0x20 ? __list_del_entry_valid_or_report+0xa3/0x120 gpiod_remove_lookup_table+0x22/0x60 new_device_store+0x315/0x350 [gpio_aggregator] kernfs_fop_write_iter+0x137/0x1f0 vfs_write+0x262/0x430 ksys_write+0x60/0xd0 do_syscall_64+0x6c/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] ---[ end trace 0000000000000000 ]--- Fixes: 828546e24280 ("gpio: Add GPIO Aggregator") Cc: stable@vger.kernel.org Signed-off-by: Koichiro Den Link: https://lore.kernel.org/r/20250224143134.3024598-2-koichiro.den@canonical.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 65f41cc3eafcc..d668ddb2e81d3 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -119,10 +119,15 @@ static ssize_t new_device_store(struct device_driver *driver, const char *buf, struct platform_device *pdev; int res, id; + if (!try_module_get(THIS_MODULE)) + return -ENOENT; + /* kernfs guarantees string termination, so count + 1 is safe */ aggr = kzalloc(sizeof(*aggr) + count + 1, GFP_KERNEL); - if (!aggr) - return -ENOMEM; + if (!aggr) { + res = -ENOMEM; + goto put_module; + } memcpy(aggr->args, buf, count + 1); @@ -161,6 +166,7 @@ static ssize_t new_device_store(struct device_driver *driver, const char *buf, } aggr->pdev = pdev; + module_put(THIS_MODULE); return count; remove_table: @@ -175,6 +181,8 @@ static ssize_t new_device_store(struct device_driver *driver, const char *buf, kfree(aggr->lookups); free_ga: kfree(aggr); +put_module: + module_put(THIS_MODULE); return res; } @@ -203,13 +211,19 @@ static ssize_t delete_device_store(struct device_driver *driver, if (error) return error; + if (!try_module_get(THIS_MODULE)) + return -ENOENT; + mutex_lock(&gpio_aggregator_lock); aggr = idr_remove(&gpio_aggregator_idr, id); mutex_unlock(&gpio_aggregator_lock); - if (!aggr) + if (!aggr) { + module_put(THIS_MODULE); return -ENOENT; + } gpio_aggregator_free(aggr); + module_put(THIS_MODULE); return count; } static DRIVER_ATTR_WO(delete_device); -- GitLab From f02c41f87cfe61440c18bf77d1ef0a884b9ee2b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Tue, 21 Jan 2025 14:58:33 +0100 Subject: [PATCH 1264/1585] gpio: rcar: Use raw_spinlock to protect register access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use raw_spinlock in order to fix spurious messages about invalid context when spinlock debugging is enabled. The lock is only used to serialize register access. [ 4.239592] ============================= [ 4.239595] [ BUG: Invalid wait context ] [ 4.239599] 6.13.0-rc7-arm64-renesas-05496-gd088502a519f #35 Not tainted [ 4.239603] ----------------------------- [ 4.239606] kworker/u8:5/76 is trying to lock: [ 4.239609] ffff0000091898a0 (&p->lock){....}-{3:3}, at: gpio_rcar_config_interrupt_input_mode+0x34/0x164 [ 4.239641] other info that might help us debug this: [ 4.239643] context-{5:5} [ 4.239646] 5 locks held by kworker/u8:5/76: [ 4.239651] #0: ffff0000080fb148 ((wq_completion)async){+.+.}-{0:0}, at: process_one_work+0x190/0x62c [ 4.250180] OF: /soc/sound@ec500000/ports/port@0/endpoint: Read of boolean property 'frame-master' with a value. [ 4.254094] #1: ffff80008299bd80 ((work_completion)(&entry->work)){+.+.}-{0:0}, at: process_one_work+0x1b8/0x62c [ 4.254109] #2: ffff00000920c8f8 [ 4.258345] OF: /soc/sound@ec500000/ports/port@1/endpoint: Read of boolean property 'bitclock-master' with a value. [ 4.264803] (&dev->mutex){....}-{4:4}, at: __device_attach_async_helper+0x3c/0xdc [ 4.264820] #3: ffff00000a50ca40 (request_class#2){+.+.}-{4:4}, at: __setup_irq+0xa0/0x690 [ 4.264840] #4: [ 4.268872] OF: /soc/sound@ec500000/ports/port@1/endpoint: Read of boolean property 'frame-master' with a value. [ 4.273275] ffff00000a50c8c8 (lock_class){....}-{2:2}, at: __setup_irq+0xc4/0x690 [ 4.296130] renesas_sdhi_internal_dmac ee100000.mmc: mmc1 base at 0x00000000ee100000, max clock rate 200 MHz [ 4.304082] stack backtrace: [ 4.304086] CPU: 1 UID: 0 PID: 76 Comm: kworker/u8:5 Not tainted 6.13.0-rc7-arm64-renesas-05496-gd088502a519f #35 [ 4.304092] Hardware name: Renesas Salvator-X 2nd version board based on r8a77965 (DT) [ 4.304097] Workqueue: async async_run_entry_fn [ 4.304106] Call trace: [ 4.304110] show_stack+0x14/0x20 (C) [ 4.304122] dump_stack_lvl+0x6c/0x90 [ 4.304131] dump_stack+0x14/0x1c [ 4.304138] __lock_acquire+0xdfc/0x1584 [ 4.426274] lock_acquire+0x1c4/0x33c [ 4.429942] _raw_spin_lock_irqsave+0x5c/0x80 [ 4.434307] gpio_rcar_config_interrupt_input_mode+0x34/0x164 [ 4.440061] gpio_rcar_irq_set_type+0xd4/0xd8 [ 4.444422] __irq_set_trigger+0x5c/0x178 [ 4.448435] __setup_irq+0x2e4/0x690 [ 4.452012] request_threaded_irq+0xc4/0x190 [ 4.456285] devm_request_threaded_irq+0x7c/0xf4 [ 4.459398] ata1: link resume succeeded after 1 retries [ 4.460902] mmc_gpiod_request_cd_irq+0x68/0xe0 [ 4.470660] mmc_start_host+0x50/0xac [ 4.474327] mmc_add_host+0x80/0xe4 [ 4.477817] tmio_mmc_host_probe+0x2b0/0x440 [ 4.482094] renesas_sdhi_probe+0x488/0x6f4 [ 4.486281] renesas_sdhi_internal_dmac_probe+0x60/0x78 [ 4.491509] platform_probe+0x64/0xd8 [ 4.495178] really_probe+0xb8/0x2a8 [ 4.498756] __driver_probe_device+0x74/0x118 [ 4.503116] driver_probe_device+0x3c/0x154 [ 4.507303] __device_attach_driver+0xd4/0x160 [ 4.511750] bus_for_each_drv+0x84/0xe0 [ 4.515588] __device_attach_async_helper+0xb0/0xdc [ 4.520470] async_run_entry_fn+0x30/0xd8 [ 4.524481] process_one_work+0x210/0x62c [ 4.528494] worker_thread+0x1ac/0x340 [ 4.532245] kthread+0x10c/0x110 [ 4.535476] ret_from_fork+0x10/0x20 Signed-off-by: Niklas Söderlund Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250121135833.3769310-1-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rcar.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index 2ecee3269a0cc..8e0544e924886 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -40,7 +40,7 @@ struct gpio_rcar_info { struct gpio_rcar_priv { void __iomem *base; - spinlock_t lock; + raw_spinlock_t lock; struct device *dev; struct gpio_chip gpio_chip; unsigned int irq_parent; @@ -123,7 +123,7 @@ static void gpio_rcar_config_interrupt_input_mode(struct gpio_rcar_priv *p, * "Setting Level-Sensitive Interrupt Input Mode" */ - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); /* Configure positive or negative logic in POSNEG */ gpio_rcar_modify_bit(p, POSNEG, hwirq, !active_high_rising_edge); @@ -142,7 +142,7 @@ static void gpio_rcar_config_interrupt_input_mode(struct gpio_rcar_priv *p, if (!level_trigger) gpio_rcar_write(p, INTCLR, BIT(hwirq)); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static int gpio_rcar_irq_set_type(struct irq_data *d, unsigned int type) @@ -246,7 +246,7 @@ static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, * "Setting General Input Mode" */ - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); /* Configure positive logic in POSNEG */ gpio_rcar_modify_bit(p, POSNEG, gpio, false); @@ -261,7 +261,7 @@ static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, if (p->info.has_outdtsel && output) gpio_rcar_modify_bit(p, OUTDTSEL, gpio, false); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static int gpio_rcar_request(struct gpio_chip *chip, unsigned offset) @@ -347,7 +347,7 @@ static int gpio_rcar_get_multiple(struct gpio_chip *chip, unsigned long *mask, return 0; } - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); outputs = gpio_rcar_read(p, INOUTSEL); m = outputs & bankmask; if (m) @@ -356,7 +356,7 @@ static int gpio_rcar_get_multiple(struct gpio_chip *chip, unsigned long *mask, m = ~outputs & bankmask; if (m) val |= gpio_rcar_read(p, INDT) & m; - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); bits[0] = val; return 0; @@ -367,9 +367,9 @@ static void gpio_rcar_set(struct gpio_chip *chip, unsigned offset, int value) struct gpio_rcar_priv *p = gpiochip_get_data(chip); unsigned long flags; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); gpio_rcar_modify_bit(p, OUTDT, offset, value); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static void gpio_rcar_set_multiple(struct gpio_chip *chip, unsigned long *mask, @@ -386,12 +386,12 @@ static void gpio_rcar_set_multiple(struct gpio_chip *chip, unsigned long *mask, if (!bankmask) return; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); val = gpio_rcar_read(p, OUTDT); val &= ~bankmask; val |= (bankmask & bits[0]); gpio_rcar_write(p, OUTDT, val); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset, @@ -505,7 +505,7 @@ static int gpio_rcar_probe(struct platform_device *pdev) return -ENOMEM; p->dev = dev; - spin_lock_init(&p->lock); + raw_spin_lock_init(&p->lock); /* Get device configuration from DT node */ ret = gpio_rcar_parse_dt(p, &npins); -- GitLab From 6697f819a10b238ccf01998c3f203d65d8374696 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Fri, 22 Nov 2024 10:50:55 +0800 Subject: [PATCH 1265/1585] exfat: fix just enough dentries but allocate a new cluster to dir This commit fixes the condition for allocating cluster to parent directory to avoid allocating new cluster to parent directory when there are just enough empty directory entries at the end of the parent directory. Fixes: af02c72d0b62 ("exfat: convert exfat_find_empty_entry() to use dentry cache") Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 691dd77b6ab5f..5b16181a4c2e9 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -232,7 +232,7 @@ static int exfat_search_empty_slot(struct super_block *sb, dentry = 0; } - while (dentry + num_entries < total_entries && + while (dentry + num_entries <= total_entries && clu.dir != EXFAT_EOF_CLUSTER) { i = dentry & (dentries_per_clu - 1); -- GitLab From 9da33619e0ca53627641bc97d1b93ec741299111 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 31 Jan 2025 12:55:55 +0900 Subject: [PATCH 1266/1585] exfat: fix soft lockup in exfat_clear_bitmap bitmap clear loop will take long time in __exfat_free_cluster() if data size of file/dir enty is invalid. If cluster bit in bitmap is already clear, stop clearing bitmap go to out of loop. Fixes: 31023864e67a ("exfat: add fat entry operations") Reported-by: Kun Hu , Jiaji Qin Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/balloc.c | 10 ++++++++-- fs/exfat/exfat_fs.h | 2 +- fs/exfat/fatent.c | 11 +++++++---- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index ce9be95c9172f..9ff825f1502d5 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -141,7 +141,7 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) return 0; } -void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) +int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) { int i, b; unsigned int ent_idx; @@ -150,13 +150,17 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) struct exfat_mount_options *opts = &sbi->options; if (!is_valid_cluster(sbi, clu)) - return; + return -EIO; ent_idx = CLUSTER_TO_BITMAP_ENT(clu); i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); + if (!test_bit_le(b, sbi->vol_amap[i]->b_data)) + return -EIO; + clear_bit_le(b, sbi->vol_amap[i]->b_data); + exfat_update_bh(sbi->vol_amap[i], sync); if (opts->discard) { @@ -171,6 +175,8 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) opts->discard = 0; } } + + return 0; } /* diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 78be6964a8a08..d30ce18a88b7a 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -456,7 +456,7 @@ int exfat_count_num_clusters(struct super_block *sb, int exfat_load_bitmap(struct super_block *sb); void exfat_free_bitmap(struct exfat_sb_info *sbi); int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync); -void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync); +int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync); unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu); int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count); int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 9e5492ac409b0..6f3651c6ca91e 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -175,6 +175,7 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(clu)); if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + int err; unsigned int last_cluster = p_chain->dir + p_chain->size - 1; do { bool sync = false; @@ -189,7 +190,9 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain cur_cmap_i = next_cmap_i; } - exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + err = exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + if (err) + break; clu++; num_clusters++; } while (num_clusters < p_chain->size); @@ -210,12 +213,13 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain cur_cmap_i = next_cmap_i; } - exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + if (exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)))) + break; clu = n_clu; num_clusters++; if (err) - goto dec_used_clus; + break; if (num_clusters >= sbi->num_clusters - EXFAT_FIRST_CLUSTER) { /* @@ -229,7 +233,6 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain } while (clu != EXFAT_EOF_CLUSTER); } -dec_used_clus: sbi->used_clusters -= num_clusters; return 0; } -- GitLab From fda94a9919fd632033979ad7765a99ae3cab9289 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 11 Feb 2025 14:14:21 -0600 Subject: [PATCH 1267/1585] exfat: short-circuit zero-byte writes in exfat_file_write_iter When generic_write_checks() returns zero, it means that iov_iter_count() is zero, and there is no work to do. Simply return success like all other filesystems do, rather than proceeding down the write path, which today yields an -EFAULT in generic_perform_write() via the (fault_in_iov_iter_readable(i, bytes) == bytes) check when bytes == 0. Fixes: 11a347fb6cef ("exfat: change to get file size from DataLength") Reported-by: Noah Signed-off-by: Eric Sandeen Reviewed-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 05b51e7217838..807349d8ea050 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -587,7 +587,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) valid_size = ei->valid_size; ret = generic_write_checks(iocb, iter); - if (ret < 0) + if (ret <= 0) goto unlock; if (iocb->ki_flags & IOCB_DIRECT) { -- GitLab From 13940cef95491472760ca261b6713692ece9b946 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Sat, 8 Feb 2025 17:16:58 +0800 Subject: [PATCH 1268/1585] exfat: add a check for invalid data size Add a check for invalid data size to avoid corrupted filesystem from being further corrupted. Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/namei.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 5b16181a4c2e9..8b30027d82512 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -646,6 +646,11 @@ static int exfat_find(struct inode *dir, struct qstr *qname, info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size); info->size = le64_to_cpu(ep2->dentry.stream.size); + if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) { + exfat_fs_error(sb, "data size is invalid(%lld)", info->size); + return -EIO; + } + info->start_clu = le32_to_cpu(ep2->dentry.stream.start_clu); if (!is_valid_cluster(sbi, info->start_clu) && info->size) { exfat_warn(sb, "start_clu is invalid cluster(0x%x)", -- GitLab From 3c9231ea6497dfc50ac0ef69fff484da27d0df66 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Tue, 4 Mar 2025 08:44:29 +0800 Subject: [PATCH 1269/1585] net-timestamp: support TCP GSO case for a few missing flags When I read through the TSO codes, I found out that we probably miss initializing the tx_flags of last seg when TSO is turned off, which means at the following points no more timestamp (for this last one) will be generated. There are three flags to be handled in this patch: 1. SKBTX_HW_TSTAMP 2. SKBTX_BPF 3. SKBTX_SCHED_TSTAMP Note that SKBTX_BPF[1] was added in 6.14.0-rc2 by commit 6b98ec7e882af ("bpf: Add BPF_SOCK_OPS_TSTAMP_SCHED_CB callback") and only belongs to net-next branch material for now. The common issue of the above three flags can be fixed by this single patch. This patch initializes the tx_flags to SKBTX_ANY_TSTAMP like what the UDP GSO does to make the newly segmented last skb inherit the tx_flags so that requested timestamp will be generated in each certain layer, or else that last one has zero value of tx_flags which leads to no timestamp at all. Fixes: 4ed2d765dfacc ("net-timestamp: TCP timestamping") Signed-off-by: Jason Xing Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 2308665b51c53..2dfac79dc78b8 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -13,12 +13,15 @@ #include #include -static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, +static void tcp_gso_tstamp(struct sk_buff *skb, struct sk_buff *gso_skb, unsigned int seq, unsigned int mss) { + u32 flags = skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP; + u32 ts_seq = skb_shinfo(gso_skb)->tskey; + while (skb) { if (before(ts_seq, seq + mss)) { - skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP; + skb_shinfo(skb)->tx_flags |= flags; skb_shinfo(skb)->tskey = ts_seq; return; } @@ -193,8 +196,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th = tcp_hdr(skb); seq = ntohl(th->seq); - if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) - tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); + if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP)) + tcp_gso_tstamp(segs, gso_skb, seq, mss); newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta)); -- GitLab From 927e6bec5cf3624665b0a2e9f64a1d32f3d22cdd Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 5 Mar 2025 21:41:13 +0800 Subject: [PATCH 1270/1585] ASoC: rt1320: set wake_capable = 0 explicitly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "generic_new_peripheral_assigned: invalid dev_num 1, wake supported 1" is reported by our internal CI test. Rt1320's wake feature is not used in Linux and that's why it is not in the wake_capable_list[] list in intel_auxdevice.c. However, BIOS may set it as wake-capable. Overwrite wake_capable to 0 in the codec driver to align with wake_capable_list[]. Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Acked-by: Shuming Fan Link: https://patch.msgid.link/20250305134113.201326-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1320-sdw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/rt1320-sdw.c b/sound/soc/codecs/rt1320-sdw.c index 3510c3819074b..d83b236a04503 100644 --- a/sound/soc/codecs/rt1320-sdw.c +++ b/sound/soc/codecs/rt1320-sdw.c @@ -535,6 +535,9 @@ static int rt1320_read_prop(struct sdw_slave *slave) /* set the timeout values */ prop->clk_stop_timeout = 64; + /* BIOS may set wake_capable. Make sure it is 0 as wake events are disabled. */ + prop->wake_capable = 0; + return 0; } -- GitLab From 5ac60242b0173be83709603ebaf27a473f16c4e4 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Tue, 4 Mar 2025 14:34:26 -0700 Subject: [PATCH 1271/1585] ublk: set_params: properly check if parameters can be applied The parameters set by the set_params call are only applied to the block device in the start_dev call. So if a device has already been started, a subsequently issued set_params on that device will not have the desired effect, and should return an error. There is an existing check for this - set_params fails on devices in the LIVE state. But this check is not sufficient to cover the recovery case. In this case, the device will be in the QUIESCED or FAIL_IO states, so set_params will succeed. But this success is misleading, because the parameters will not be applied, since the device has already been started (by a previous ublk server). The bit UB_STATE_USED is set on completion of the start_dev; use it to detect and fail set_params commands which arrive too late to be applied (after start_dev). Signed-off-by: Uday Shankar Fixes: 0aa73170eba5 ("ublk_drv: add SET_PARAMS/GET_PARAMS control command") Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250304-set_params-v1-1-17b5e0887606@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 529085181f355..ca9a67b5b537a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2715,9 +2715,12 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, if (ph.len > sizeof(struct ublk_params)) ph.len = sizeof(struct ublk_params); - /* parameters can only be changed when device isn't live */ mutex_lock(&ub->mutex); - if (ub->dev_info.state == UBLK_S_DEV_LIVE) { + if (test_bit(UB_STATE_USED, &ub->state)) { + /* + * Parameters can only be changed when device hasn't + * been started yet + */ ret = -EACCES; } else if (copy_from_user(&ub->params, argp, ph.len)) { ret = -EFAULT; -- GitLab From e06472bab2a5393430cc2fbc3211cd3602422c1e Mon Sep 17 00:00:00 2001 From: Olivier Gayot Date: Wed, 5 Mar 2025 10:21:54 +0800 Subject: [PATCH 1272/1585] block: fix conversion of GPT partition name to 7-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The utf16_le_to_7bit function claims to, naively, convert a UTF-16 string to a 7-bit ASCII string. By naively, we mean that it: * drops the first byte of every character in the original UTF-16 string * checks if all characters are printable, and otherwise replaces them by exclamation mark "!". This means that theoretically, all characters outside the 7-bit ASCII range should be replaced by another character. Examples: * lower-case alpha (ɒ) 0x0252 becomes 0x52 (R) * ligature OE (œ) 0x0153 becomes 0x53 (S) * hangul letter pieup (ㅂ) 0x3142 becomes 0x42 (B) * upper-case gamma (Ɣ) 0x0194 becomes 0x94 (not printable) so gets replaced by "!" The result of this conversion for the GPT partition name is passed to user-space as PARTNAME via udev, which is confusing and feels questionable. However, there is a flaw in the conversion function itself. By dropping one byte of each character and using isprint() to check if the remaining byte corresponds to a printable character, we do not actually guarantee that the resulting character is 7-bit ASCII. This happens because we pass 8-bit characters to isprint(), which in the kernel returns 1 for many values > 0x7f - as defined in ctype.c. This results in many values which should be replaced by "!" to be kept as-is, despite not being valid 7-bit ASCII. Examples: * e with acute accent (é) 0x00E9 becomes 0xE9 - kept as-is because isprint(0xE9) returns 1. * euro sign (€) 0x20AC becomes 0xAC - kept as-is because isprint(0xAC) returns 1. This way has broken pyudev utility[1], fixes it by using a mask of 7 bits instead of 8 bits before calling isprint. Link: https://github.com/pyudev/pyudev/issues/490#issuecomment-2685794648 [1] Link: https://lore.kernel.org/linux-block/4cac90c2-e414-4ebb-ae62-2a4589d9dc6e@canonical.com/ Cc: Mulhern Cc: Davidlohr Bueso Cc: stable@vger.kernel.org Signed-off-by: Olivier Gayot Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250305022154.3903128-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/partitions/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 5e9be13a56a82..7acba66eed481 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le16 *in, unsigned int size, u8 *out) out[size] = 0; while (i < size) { - u8 c = le16_to_cpu(in[i]) & 0xff; + u8 c = le16_to_cpu(in[i]) & 0x7f; if (c && !isprint(c)) c = '!'; -- GitLab From 3be83ee9de0298f8321aa0b148d8f9995102e40f Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 9 Dec 2024 15:08:53 +0100 Subject: [PATCH 1273/1585] ice: do not configure destination override for switchdev After switchdev is enabled and disabled later, LLDP packets sending stops, despite working perfectly fine before and during switchdev state. To reproduce (creating/destroying VF is what triggers the reconfiguration): devlink dev eswitch set pci/
mode switchdev echo '2' > /sys/class/net//device/sriov_numvfs echo '0' > /sys/class/net//device/sriov_numvfs This happens because LLDP relies on the destination override functionality. It needs to 1) set a flag in the descriptor, 2) set the VSI permission to make it valid. The permissions are set when the PF VSI is first configured, but switchdev then enables it for the uplink VSI (which is always the PF) once more when configured and disables when deconfigured, which leads to software-generated LLDP packets being blocked. Do not modify the destination override permissions when configuring switchdev, as the enabled state is the default configuration that is never modified. Fixes: 1a1c40df2e80 ("ice: set and release switchdev environment") Reviewed-by: Michal Swiatkowski Signed-off-by: Larysa Zaremba Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 6 ------ drivers/net/ethernet/intel/ice/ice_lib.c | 18 ------------------ drivers/net/ethernet/intel/ice/ice_lib.h | 4 ---- 3 files changed, 28 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index d649c197cf673..ed21d7f55ac11 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -49,9 +49,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) if (vlan_ops->dis_rx_filtering(uplink_vsi)) goto err_vlan_filtering; - if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) - goto err_override_uplink; - if (ice_vsi_update_local_lb(uplink_vsi, true)) goto err_override_local_lb; @@ -63,8 +60,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) err_up: ice_vsi_update_local_lb(uplink_vsi, false); err_override_local_lb: - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); -err_override_uplink: vlan_ops->ena_rx_filtering(uplink_vsi); err_vlan_filtering: ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, @@ -275,7 +270,6 @@ static void ice_eswitch_release_env(struct ice_pf *pf) vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); ice_vsi_update_local_lb(uplink_vsi, false); - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); vlan_ops->ena_rx_filtering(uplink_vsi); ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, ICE_FLTR_TX); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 38a1c8372180b..d0faa087793da 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3936,24 +3936,6 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx) ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); } -/** - * ice_vsi_ctx_set_allow_override - allow destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - -/** - * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - /** * ice_vsi_update_local_lb - update sw block in VSI with local loopback bit * @vsi: pointer to VSI structure diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index eabb35834a245..b4c9cb28a016e 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -105,10 +105,6 @@ ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)) void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx); void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx); int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set); int ice_vsi_add_vlan_zero(struct ice_vsi *vsi); int ice_vsi_del_vlan_zero(struct ice_vsi *vsi); -- GitLab From 3b4035ddbfc8e4521f85569998a7569668cccf51 Mon Sep 17 00:00:00 2001 From: Zecheng Li Date: Tue, 4 Mar 2025 21:40:31 +0000 Subject: [PATCH 1274/1585] sched/fair: Fix potential memory corruption in child_cfs_rq_on_list child_cfs_rq_on_list attempts to convert a 'prev' pointer to a cfs_rq. This 'prev' pointer can originate from struct rq's leaf_cfs_rq_list, making the conversion invalid and potentially leading to memory corruption. Depending on the relative positions of leaf_cfs_rq_list and the task group (tg) pointer within the struct, this can cause a memory fault or access garbage data. The issue arises in list_add_leaf_cfs_rq, where both cfs_rq->leaf_cfs_rq_list and rq->leaf_cfs_rq_list are added to the same leaf list. Also, rq->tmp_alone_branch can be set to rq->leaf_cfs_rq_list. This adds a check `if (prev == &rq->leaf_cfs_rq_list)` after the main conditional in child_cfs_rq_on_list. This ensures that the container_of operation will convert a correct cfs_rq struct. This check is sufficient because only cfs_rqs on the same CPU are added to the list, so verifying the 'prev' pointer against the current rq's list head is enough. Fixes a potential memory corruption issue that due to current struct layout might not be manifesting as a crash but could lead to unpredictable behavior when the layout changes. Fixes: fdaba61ef8a2 ("sched/fair: Ensure that the CFS parent is added after unthrottling") Signed-off-by: Zecheng Li Reviewed-and-tested-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20250304214031.2882646-1-zecheng@google.com --- kernel/sched/fair.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1c0ef435a7aae..c798d27952431 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4045,15 +4045,17 @@ static inline bool child_cfs_rq_on_list(struct cfs_rq *cfs_rq) { struct cfs_rq *prev_cfs_rq; struct list_head *prev; + struct rq *rq = rq_of(cfs_rq); if (cfs_rq->on_list) { prev = cfs_rq->leaf_cfs_rq_list.prev; } else { - struct rq *rq = rq_of(cfs_rq); - prev = rq->tmp_alone_branch; } + if (prev == &rq->leaf_cfs_rq_list) + return false; + prev_cfs_rq = container_of(prev, struct cfs_rq, leaf_cfs_rq_list); return (prev_cfs_rq->tg->parent == cfs_rq->tg); -- GitLab From 23d97f18901ef5e4e264e3b1777fe65c760186b5 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Thu, 23 Jan 2025 09:15:39 +0100 Subject: [PATCH 1275/1585] ice: fix memory leak in aRFS after reset Fix aRFS (accelerated Receive Flow Steering) structures memory leak by adding a checker to verify if aRFS memory is already allocated while configuring VSI. aRFS objects are allocated in two cases: - as part of VSI initialization (at probe), and - as part of reset handling However, VSI reconfiguration executed during reset involves memory allocation one more time, without prior releasing already allocated resources. This led to the memory leak with the following signature: [root@os-delivery ~]# cat /sys/kernel/debug/kmemleak unreferenced object 0xff3c1ca7252e6000 (size 8192): comm "kworker/0:0", pid 8, jiffies 4296833052 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): [] __kmalloc_cache_noprof+0x275/0x340 [] ice_init_arfs+0x3a/0xe0 [ice] [] ice_vsi_cfg_def+0x607/0x850 [ice] [] ice_vsi_setup+0x5b/0x130 [ice] [] ice_init+0x1c1/0x460 [ice] [] ice_probe+0x2af/0x520 [ice] [] local_pci_probe+0x43/0xa0 [] work_for_cpu_fn+0x13/0x20 [] process_one_work+0x179/0x390 [] worker_thread+0x239/0x340 [] kthread+0xcc/0x100 [] ret_from_fork+0x2d/0x50 [] ret_from_fork_asm+0x1a/0x30 ... Fixes: 28bf26724fdb ("ice: Implement aRFS") Reviewed-by: Michal Swiatkowski Signed-off-by: Grzegorz Nitka Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 7cee365cc7d16..405ddd17de1bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -511,7 +511,7 @@ void ice_init_arfs(struct ice_vsi *vsi) struct hlist_head *arfs_fltr_list; unsigned int i; - if (!vsi || vsi->type != ICE_VSI_PF) + if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi)) return; arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list), -- GitLab From dce97cb0a3e34204c0b99345418a714eac85953f Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Thu, 2 Jan 2025 20:07:52 +0100 Subject: [PATCH 1276/1585] ice: Fix switchdev slow-path in LAG Ever since removing switchdev control VSI and using PF for port representor Tx/Rx, switchdev slow-path has been working improperly after failover in SR-IOV LAG. LAG assumes that the first uplink to be added to the aggregate will own VFs and have switchdev configured. After failing-over to the other uplink, representors are still configured to Tx through the uplink they are set up on, which fails because that uplink is now down. On failover, update all PRs on primary uplink to use the currently active uplink for Tx. Call netif_keep_dst(), as the secondary uplink might not be in switchdev mode. Also make sure to call ice_eswitch_set_target_vsi() if uplink is in LAG. On the Rx path, representors are already working properly, because default Tx from VFs is set to PF owning the eswitch. After failover the same PF is receiving traffic from VFs, even though link is down. Fixes: defd52455aee ("ice: do Tx through PF netdev in slow-path") Reviewed-by: Michal Swiatkowski Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 27 +++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_txrx.c | 4 +++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 1ccb572ce285d..22371011c2492 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1000,6 +1000,28 @@ static void ice_lag_link(struct ice_lag *lag) netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n"); } +/** + * ice_lag_config_eswitch - configure eswitch to work with LAG + * @lag: lag info struct + * @netdev: active network interface device struct + * + * Updates all port representors in eswitch to use @netdev for Tx. + * + * Configures the netdev to keep dst metadata (also used in representor Tx). + * This is required for an uplink without switchdev mode configured. + */ +static void ice_lag_config_eswitch(struct ice_lag *lag, + struct net_device *netdev) +{ + struct ice_repr *repr; + unsigned long id; + + xa_for_each(&lag->pf->eswitch.reprs, id, repr) + repr->dst->u.port_info.lower_dev = netdev; + + netif_keep_dst(netdev); +} + /** * ice_lag_unlink - handle unlink event * @lag: LAG info struct @@ -1021,6 +1043,9 @@ static void ice_lag_unlink(struct ice_lag *lag) ice_lag_move_vf_nodes(lag, act_port, pri_port); lag->primary = false; lag->active_port = ICE_LAG_INVALID_PORT; + + /* Config primary's eswitch back to normal operation. */ + ice_lag_config_eswitch(lag, lag->netdev); } else { struct ice_lag *primary_lag; @@ -1419,6 +1444,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) ice_lag_move_vf_nodes(lag, prim_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); return; } @@ -1428,6 +1454,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) /* new active port */ ice_lag_move_vf_nodes(lag, lag->active_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); } else { /* port not set as currently active (e.g. new active port * has already claimed the nodes and filters diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 9c9ea4c1b93b7..380ba1e8b3b2c 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -2424,7 +2424,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) ICE_TXD_CTX_QW1_CMD_S); ice_tstamp(tx_ring, skb, first, &offload); - if (ice_is_switchdev_running(vsi->back) && vsi->type != ICE_VSI_SF) + if ((ice_is_switchdev_running(vsi->back) || + ice_lag_is_switchdev_running(vsi->back)) && + vsi->type != ICE_VSI_SF) ice_eswitch_set_target_vsi(skb, &offload); if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { -- GitLab From 374c9faac5a763a05bc3f68ad9f73dab3c6aec90 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 26 Feb 2025 16:37:31 +0800 Subject: [PATCH 1277/1585] drm/amd/display: Fix null check for pipe_ctx->plane_state in resource_build_scaling_params Null pointer dereference issue could occur when pipe_ctx->plane_state is null. The fix adds a check to ensure 'pipe_ctx->plane_state' is not null before accessing. This prevents a null pointer dereference. Found by code review. Fixes: 3be5262e353b ("drm/amd/display: Rename more dc_surface stuff to plane_state") Reviewed-by: Alex Hung Signed-off-by: Ma Ke Signed-off-by: Alex Deucher (cherry picked from commit 63e6a77ccf239337baa9b1e7787cde9fa0462092) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 520a34a42827b..a45037cb4cc01 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1455,7 +1455,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* Invalid input */ - if (!plane_state->dst_rect.width || + if (!plane_state || + !plane_state->dst_rect.width || !plane_state->dst_rect.height || !plane_state->src_rect.width || !plane_state->src_rect.height) { -- GitLab From 2a3e89a14864090ee4804fcec655ffc15fabf45c Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 19 Feb 2025 14:30:39 +0100 Subject: [PATCH 1278/1585] ice: register devlink prior to creating health reporters ice_health_init() was introduced in the commit 2a82874a3b7b ("ice: add Tx hang devlink health reporter"). The call to it should have been put after ice_devlink_register(). It went unnoticed until next reporter by Konrad, which receives events from FW. FW is reporting all events, also from prior driver load, and thus it is not unlikely to have something at the very beginning. And that results in a splat: [ 24.455950] ? devlink_recover_notify.constprop.0+0x198/0x1b0 [ 24.455973] devlink_health_report+0x5d/0x2a0 [ 24.455976] ? __pfx_ice_health_status_lookup_compare+0x10/0x10 [ice] [ 24.456044] ice_process_health_status_event+0x1b7/0x200 [ice] Do the analogous thing for deinit patch. Fixes: 85d6164ec56d ("ice: add fw and port health reporters") Reviewed-by: Aleksandr Loktionov Reviewed-by: Michal Swiatkowski Reviewed-by: Konrad Knitter Signed-off-by: Przemek Kitszel Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c3a0fb97c5ee4..e13bd5a6cb6c4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5065,16 +5065,16 @@ static int ice_init_devlink(struct ice_pf *pf) return err; ice_devlink_init_regions(pf); - ice_health_init(pf); ice_devlink_register(pf); + ice_health_init(pf); return 0; } static void ice_deinit_devlink(struct ice_pf *pf) { - ice_devlink_unregister(pf); ice_health_deinit(pf); + ice_devlink_unregister(pf); ice_devlink_destroy_regions(pf); ice_devlink_unregister_params(pf); } -- GitLab From fd617ea3b79d2116d53f76cdb5a3601c0ba6e42f Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Fri, 28 Feb 2025 11:26:48 -0500 Subject: [PATCH 1279/1585] drm/amdkfd: Fix NULL Pointer Dereference in KFD queue Through KFD IOCTL Fuzzing we encountered a NULL pointer derefrence when calling kfd_queue_acquire_buffers. Fixes: 629568d25fea ("drm/amdkfd: Validate queue cwsr area and eop buffer size") Signed-off-by: Andrew Martin Reviewed-by: Philip Yang Signed-off-by: Andrew Martin Signed-off-by: Alex Deucher (cherry picked from commit 049e5bf3c8406f87c3d8e1958e0a16804fa1d530) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index ecccd7adbab4d..24396a2c77bd0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -266,8 +266,8 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope /* EOP buffer is not required for all ASICs */ if (properties->eop_ring_buffer_address) { if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { - pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", - properties->eop_buf_bo->tbo.base.size, + pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n", + properties->eop_ring_buffer_size, topo_dev->node_props.eop_buffer_size); err = -EINVAL; goto out_err_unreserve; -- GitLab From c27c66afc449b80f3b4b84d123358c0248f2cf63 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 5 Mar 2025 07:08:09 -1000 Subject: [PATCH 1280/1585] fs/pipe: Fix pipe_occupancy() with 16-bit indexes The pipe_occupancy() logic implicitly relied on the natural unsigned modulo arithmetic in C, but that doesn't work for the new 'pipe_index_t' case, since any arithmetic will be done in 'int' (and here we had also made it 'unsigned int' due to the function call boundary). So make the modulo arithmetic explicit by casting the result to the proper type. Cc: Oleg Nesterov Cc: Mateusz Guzik Cc: Manfred Spraul Cc: Christian Brauner Cc: Swapnil Sapkal Cc: Alexey Gladkov Cc: K Prateek Nayak Link: https://lore.kernel.org/all/CAHk-=wjyHsGLx=rxg6PKYBNkPYAejgo7=CbyL3=HGLZLsAaJFQ@mail.gmail.com/ Fixes: 3d252160b818 ("fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex") Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3cc4f8eab853f..1f013ed7577ef 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -192,7 +192,7 @@ static inline bool pipe_empty(unsigned int head, unsigned int tail) */ static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { - return head - tail; + return (pipe_index_t)(head - tail); } /** -- GitLab From cfced12f5100e50d56bc587299393fd33c1169a9 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Wed, 5 Mar 2025 11:23:01 +0000 Subject: [PATCH 1281/1585] include/linux/pipe_fs_i: Add htmldoc annotation for "head_tail" member Add htmldoc annotation for the newly introduced "head_tail" member describing it to be a union of the pipe_inode_info's @head and @tail members. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20250305204609.5e64768e@canb.auug.org.au/ Fixes: 3d252160b818 ("fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex") Signed-off-by: K Prateek Nayak Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 1f013ed7577ef..05ccbc5d01294 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -65,6 +65,7 @@ union pipe_index { * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption + * @head_tail: unsigned long union of @head and @tail * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) -- GitLab From 0eba2a7e858907a746ba69cd002eb9eb4dbd7bf3 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 28 Feb 2025 15:14:56 +0000 Subject: [PATCH 1282/1585] ASoC: ops: Consistently treat platform_max as control value This reverts commit 9bdd10d57a88 ("ASoC: ops: Shift tested values in snd_soc_put_volsw() by +min"), and makes some additional related updates. There are two ways the platform_max could be interpreted; the maximum register value, or the maximum value the control can be set to. The patch moved from treating the value as a control value to a register one. When the patch was applied it was technically correct as snd_soc_limit_volume() also used the register interpretation. However, even then most of the other usages treated platform_max as a control value, and snd_soc_limit_volume() has since been updated to also do so in commit fb9ad24485087 ("ASoC: ops: add correct range check for limiting volume"). That patch however, missed updating snd_soc_put_volsw() back to the control interpretation, and fixing snd_soc_info_volsw_range(). The control interpretation makes more sense as limiting is typically done from the machine driver, so it is appropriate to use the customer facing representation rather than the internal codec representation. Update all the code to consistently use this interpretation of platform_max. Finally, also add some comments to the soc_mixer_control struct to hopefully avoid further patches switching between the two approaches. Fixes: fb9ad24485087 ("ASoC: ops: add correct range check for limiting volume") Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250228151456.3703342-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc.h | 5 ++++- sound/soc/soc-ops.c | 15 +++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index fcdb5adfcd5ec..b3e84bc47c6fd 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1261,7 +1261,10 @@ void snd_soc_close_delayed_work(struct snd_soc_pcm_runtime *rtd); /* mixer control */ struct soc_mixer_control { - int min, max, platform_max; + /* Minimum and maximum specified as written to the hardware */ + int min, max; + /* Limited maximum value specified as presented through the control */ + int platform_max; int reg, rreg; unsigned int shift, rshift; unsigned int sign_bit; diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 19928f098d8dc..b0e4e4168f38d 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -337,7 +337,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, if (ucontrol->value.integer.value[0] < 0) return -EINVAL; val = ucontrol->value.integer.value[0]; - if (mc->platform_max && ((int)val + min) > mc->platform_max) + if (mc->platform_max && val > mc->platform_max) return -EINVAL; if (val > max - min) return -EINVAL; @@ -350,7 +350,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, if (ucontrol->value.integer.value[1] < 0) return -EINVAL; val2 = ucontrol->value.integer.value[1]; - if (mc->platform_max && ((int)val2 + min) > mc->platform_max) + if (mc->platform_max && val2 > mc->platform_max) return -EINVAL; if (val2 > max - min) return -EINVAL; @@ -503,17 +503,16 @@ int snd_soc_info_volsw_range(struct snd_kcontrol *kcontrol, { struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; - int platform_max; - int min = mc->min; + int max; - if (!mc->platform_max) - mc->platform_max = mc->max; - platform_max = mc->platform_max; + max = mc->max - mc->min; + if (mc->platform_max && mc->platform_max < max) + max = mc->platform_max; uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = snd_soc_volsw_is_stereo(mc) ? 2 : 1; uinfo->value.integer.min = 0; - uinfo->value.integer.max = platform_max - min; + uinfo->value.integer.max = max; return 0; } -- GitLab From 29ffeb73b216ce3eff10229eb077cf9b7812119d Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Mon, 17 Jun 2019 23:46:27 +0200 Subject: [PATCH 1283/1585] drm/radeon: Fix rs400_gpu_init for ATI mobility radeon Xpress 200M MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit num_gb_pipes was set to a wrong value using r420_pipe_config This have lead to HyperZ glitches on fast Z clearing. Closes: https://bugs.freedesktop.org/show_bug.cgi?id=110897 Reviewed-by: Marek Olšák Signed-off-by: Richard Thier Signed-off-by: Alex Deucher (cherry picked from commit 044e59a85c4d84e3c8d004c486e5c479640563a6) Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/r300.c | 3 ++- drivers/gpu/drm/radeon/radeon_asic.h | 1 + drivers/gpu/drm/radeon/rs400.c | 18 ++++++++++++++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 05c13102a8cb8..d22889fbfa9c8 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -359,7 +359,8 @@ int r300_mc_wait_for_idle(struct radeon_device *rdev) return -1; } -static void r300_gpu_init(struct radeon_device *rdev) +/* rs400_gpu_init also calls this! */ +void r300_gpu_init(struct radeon_device *rdev) { uint32_t gb_tile_config, tmp; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 1e00f6b99f94b..8f5e07834fcc6 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -165,6 +165,7 @@ void r200_set_safe_registers(struct radeon_device *rdev); */ extern int r300_init(struct radeon_device *rdev); extern void r300_fini(struct radeon_device *rdev); +extern void r300_gpu_init(struct radeon_device *rdev); extern int r300_suspend(struct radeon_device *rdev); extern int r300_resume(struct radeon_device *rdev); extern int r300_asic_reset(struct radeon_device *rdev, bool hard); diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index d6c18fd740ec6..13cd0a688a65c 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -256,8 +256,22 @@ int rs400_mc_wait_for_idle(struct radeon_device *rdev) static void rs400_gpu_init(struct radeon_device *rdev) { - /* FIXME: is this correct ? */ - r420_pipes_init(rdev); + /* Earlier code was calling r420_pipes_init and then + * rs400_mc_wait_for_idle(rdev). The problem is that + * at least on my Mobility Radeon Xpress 200M RC410 card + * that ends up in this code path ends up num_gb_pipes == 3 + * while the card seems to have only one pipe. With the + * r420 pipe initialization method. + * + * Problems shown up as HyperZ glitches, see: + * https://bugs.freedesktop.org/show_bug.cgi?id=110897 + * + * Delegating initialization to r300 code seems to work + * and results in proper pipe numbers. The rs400 cards + * are said to be not r400, but r300 kind of cards. + */ + r300_gpu_init(rdev); + if (rs400_mc_wait_for_idle(rdev)) { pr_warn("rs400: Failed to wait MC idle while programming pipes. Bad things might happen. %08x\n", RREG32(RADEON_MC_STATUS)); -- GitLab From da552bda987420e877500fdd90bd0172e3bf412b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 28 Feb 2025 17:02:11 +0800 Subject: [PATCH 1284/1585] drm/amd/pm: always allow ih interrupt from fw always allow ih interrupt from fw on smu v14 based on the interface requirement Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit a3199eba46c54324193607d9114a1e321292d7a1) Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 9b2f4fe1578b8..ddb6444406d28 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -1895,16 +1895,6 @@ static int smu_v14_0_allow_ih_interrupt(struct smu_context *smu) NULL); } -static int smu_v14_0_process_pending_interrupt(struct smu_context *smu) -{ - int ret = 0; - - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT)) - ret = smu_v14_0_allow_ih_interrupt(smu); - - return ret; -} - int smu_v14_0_enable_thermal_alert(struct smu_context *smu) { int ret = 0; @@ -1916,7 +1906,7 @@ int smu_v14_0_enable_thermal_alert(struct smu_context *smu) if (ret) return ret; - return smu_v14_0_process_pending_interrupt(smu); + return smu_v14_0_allow_ih_interrupt(smu); } int smu_v14_0_disable_thermal_alert(struct smu_context *smu) -- GitLab From 0d2d0f3d93ddd6556f23c917d910becd9925ddeb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 5 Mar 2025 07:35:40 -1000 Subject: [PATCH 1285/1585] fs/pipe: remove buggy and unused 'helper' function While looking for incorrect users of the pipe head/tail fields (see commit c27c66afc449: "fs/pipe: Fix pipe_occupancy() with 16-bit indexes"), I found a bug in pipe_discard_from() that looked entirely broken. However, the fix is trivial: this buggy function isn't actually called by anything, so let's just remove it ASAP. Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 05ccbc5d01294..e572e6fc4f81f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -281,15 +281,6 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, return buf->ops->try_steal(pipe, buf); } -static inline void pipe_discard_from(struct pipe_inode_info *pipe, - unsigned int old_head) -{ - unsigned int mask = pipe->ring_size - 1; - - while (pipe->head > old_head) - pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); -} - /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE -- GitLab From 528361c49962708a60f51a1afafeb00987cebedf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Mar 2025 18:52:59 +0300 Subject: [PATCH 1286/1585] nvme-tcp: fix signedness bug in nvme_tcp_init_connection() The kernel_recvmsg() function returns an int which could be either negative error codes or the number of bytes received. The problem is that the condition: if (ret < sizeof(*icresp)) { is type promoted to type unsigned long and negative values are treated as high positive values which is success, when they should be treated as failure. Handle invalid positive returns separately from negative error codes to avoid this problem. Fixes: 578539e09690 ("nvme-tcp: fix connect failure on receiving partial ICResp PDU") Signed-off-by: Dan Carpenter Reviewed-by: Caleb Sander Mateos Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 23f11527d29d0..327f3f2f5399c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1521,11 +1521,11 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) msg.msg_flags = MSG_WAITALL; ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); - if (ret < sizeof(*icresp)) { + if (ret >= 0 && ret < sizeof(*icresp)) + ret = -ECONNRESET; + if (ret < 0) { pr_warn("queue %d: failed to receive icresp, error %d\n", nvme_tcp_queue_id(queue), ret); - if (ret >= 0) - ret = -ECONNRESET; goto free_icresp; } ret = -ENOTCONN; -- GitLab From f2c11231b57b5163bf16cdfd65271d53d61dd996 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:45 +0000 Subject: [PATCH 1287/1585] ALSA: hda/realtek: Add support for ASUS ROG Strix G814 Laptop using CS35L41 HDA Add support for ASUS G814PH/PM/PP and G814FH/FM/FP. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C. Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-2-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4ca457e7ca9dd..e7612ab492172 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10784,6 +10784,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), + SND_PCI_QUIRK(0x1043, 0x3e00, "ASUS G814FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3e20, "ASUS G814PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e30, "ASUS TP3607SA", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3ee0, "ASUS Strix G815_JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3ef0, "ASUS Strix G635LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), -- GitLab From 16dc157346dd4404b02b42e73b88604be3652039 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:46 +0000 Subject: [PATCH 1288/1585] ALSA: hda/realtek: Add support for ASUS ROG Strix GA603 Laptops using CS35L41 HDA Add support for ASUS GA603KP, GA603KM and GA603KH. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-3-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e7612ab492172..aa9b6e474f69b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10784,6 +10784,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), + SND_PCI_QUIRK(0x1043, 0x3d78, "ASUS GA603KH", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3d88, "ASUS GA603KM", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e00, "ASUS G814FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e20, "ASUS G814PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e30, "ASUS TP3607SA", ALC287_FIXUP_TAS2781_I2C), -- GitLab From 9120b2b4ad0dad2f6bbb6bcacd0456f806fda62d Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:47 +0000 Subject: [PATCH 1289/1585] ALSA: hda/realtek: Add support for ASUS ROG Strix G614 Laptops using CS35L41 HDA Add support for ASUS G614PH/PM/PP and G614FH/FM/FP. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-4-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index aa9b6e474f69b..c2ebcd2958c3f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10676,7 +10676,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), + SND_PCI_QUIRK(0x1043, 0x1054, "ASUS G614FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x1074, "ASUS G614PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x10a4, "ASUS TP3407SA", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), -- GitLab From 859a11917001424776e1cca02b762efcabb4044e Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:48 +0000 Subject: [PATCH 1290/1585] ALSA: hda/realtek: Add support for various ASUS Laptops using CS35L41 HDA Add support for ASUS B3405CVA, B5405CVA, B5605CVA, B3605CVA. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with SPI Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-5-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c2ebcd2958c3f..bf89900ec5f50 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10692,6 +10692,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1271, "ASUS X430UN", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1294, "ASUS B3405CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), @@ -10779,6 +10780,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f63, "ASUS P5405CSA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x3011, "ASUS B5605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), @@ -10797,6 +10799,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3f10, "ASUS Strix G835LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3f20, "ASUS Strix G615LR_LW", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x3f30, "ASUS Strix G815LR_LW", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3fd0, "ASUS B3605CVA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3ff0, "ASUS B5405CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), -- GitLab From 7ab61d0a9a35e32497bcf2233310fec79ee3338f Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:49 +0000 Subject: [PATCH 1291/1585] ALSA: hda/realtek: Add support for ASUS B3405 and B3605 Laptops using CS35L41 HDA Add support for ASUS B3405CCA / P3405CCA, B3605CCA / P3605CCA, B3405CCA, B3605CCA. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with SPI Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-6-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bf89900ec5f50..53d2a267d7989 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10696,6 +10696,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -10782,7 +10783,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3011, "ASUS B5605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), + SND_PCI_QUIRK(0x1043, 0x3061, "ASUS B3405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30c1, "ASUS B3605CCA / P3605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), + SND_PCI_QUIRK(0x1043, 0x31f1, "ASUS B3605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), -- GitLab From c86dd79a7c338fff9bebb9503857e07db9845eca Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:50 +0000 Subject: [PATCH 1292/1585] ALSA: hda/realtek: Add support for ASUS B5405 and B5605 Laptops using CS35L41 HDA Add support for ASUS B5605CCA and B5405CCA. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with SPI Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-7-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 53d2a267d7989..f67ab69c9997c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10784,8 +10784,12 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3011, "ASUS B5605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x3061, "ASUS B3405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3071, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x30c1, "ASUS B3605CCA / P3605CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30d1, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), + SND_PCI_QUIRK(0x1043, 0x31e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x31f1, "ASUS B3605CCA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), -- GitLab From 8463d2adbe1901247937fcdfe4b525130f6db10b Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 5 Mar 2025 17:06:51 +0000 Subject: [PATCH 1293/1585] ALSA: hda/realtek: Add support for ASUS Zenbook UM3406KA Laptops using CS35L41 HDA Laptop uses 2 CS35L41 Amps with HDA, using External boost with I2C Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250305170714.755794-8-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f67ab69c9997c..d2a1f836dbbf7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10686,6 +10686,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x1154, "ASUS TP3607SH", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x1194, "ASUS UM3406KA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1204, "ASUS Strix G615JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1214, "ASUS Strix G615LH_LM_LP", ALC287_FIXUP_TAS2781_I2C), -- GitLab From f2052a4a62465c0037aef7ea7426bffdb3531e41 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Mon, 3 Mar 2025 13:11:21 +0000 Subject: [PATCH 1294/1585] clk: samsung: gs101: fix synchronous external abort in samsung_clk_save() EARLY_WAKEUP_SW_TRIG_*_SET and EARLY_WAKEUP_SW_TRIG_*_CLEAR registers are only writeable. Attempting to read these registers during samsung_clk_save() causes a synchronous external abort. Remove these 8 registers from cmu_top_clk_regs[] array so that system suspend gets further. Note: the code path can be exercised using the following command: echo mem > /sys/power/state Fixes: 2c597bb7d66a ("clk: samsung: clk-gs101: Add cmu_top, cmu_misc and cmu_apm support") Signed-off-by: Peter Griffin Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250303-clk-suspend-fix-v1-1-c2edaf66260f@linaro.org Signed-off-by: Krzysztof Kozlowski --- drivers/clk/samsung/clk-gs101.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/clk/samsung/clk-gs101.c b/drivers/clk/samsung/clk-gs101.c index 86b39edba1227..08b867ae3ed9d 100644 --- a/drivers/clk/samsung/clk-gs101.c +++ b/drivers/clk/samsung/clk-gs101.c @@ -382,17 +382,9 @@ static const unsigned long cmu_top_clk_regs[] __initconst = { EARLY_WAKEUP_DPU_DEST, EARLY_WAKEUP_CSIS_DEST, EARLY_WAKEUP_SW_TRIG_APM, - EARLY_WAKEUP_SW_TRIG_APM_SET, - EARLY_WAKEUP_SW_TRIG_APM_CLEAR, EARLY_WAKEUP_SW_TRIG_CLUSTER0, - EARLY_WAKEUP_SW_TRIG_CLUSTER0_SET, - EARLY_WAKEUP_SW_TRIG_CLUSTER0_CLEAR, EARLY_WAKEUP_SW_TRIG_DPU, - EARLY_WAKEUP_SW_TRIG_DPU_SET, - EARLY_WAKEUP_SW_TRIG_DPU_CLEAR, EARLY_WAKEUP_SW_TRIG_CSIS, - EARLY_WAKEUP_SW_TRIG_CSIS_SET, - EARLY_WAKEUP_SW_TRIG_CSIS_CLEAR, CLK_CON_MUX_MUX_CLKCMU_BO_BUS, CLK_CON_MUX_MUX_CLKCMU_BUS0_BUS, CLK_CON_MUX_MUX_CLKCMU_BUS1_BUS, -- GitLab From 53517a70873c7a91675f7244768aad5006cc45de Mon Sep 17 00:00:00 2001 From: Varada Pavani Date: Tue, 25 Feb 2025 18:49:18 +0530 Subject: [PATCH 1295/1585] clk: samsung: update PLL locktime for PLL142XX used on FSD platform Currently PLL142XX locktime is 270. As per spec, it should be 150. Hence update PLL142XX controller locktime to 150. Cc: stable@vger.kernel.org Fixes: 4f346005aaed ("clk: samsung: fsd: Add initial clock support") Signed-off-by: Varada Pavani Link: https://lore.kernel.org/r/20250225131918.50925-3-v.pavani@samsung.com Signed-off-by: Krzysztof Kozlowski --- drivers/clk/samsung/clk-pll.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clk/samsung/clk-pll.c b/drivers/clk/samsung/clk-pll.c index 2e94bba6c3966..023a25af73c47 100644 --- a/drivers/clk/samsung/clk-pll.c +++ b/drivers/clk/samsung/clk-pll.c @@ -206,6 +206,7 @@ static const struct clk_ops samsung_pll3000_clk_ops = { */ /* Maximum lock time can be 270 * PDIV cycles */ #define PLL35XX_LOCK_FACTOR (270) +#define PLL142XX_LOCK_FACTOR (150) #define PLL35XX_MDIV_MASK (0x3FF) #define PLL35XX_PDIV_MASK (0x3F) @@ -272,7 +273,11 @@ static int samsung_pll35xx_set_rate(struct clk_hw *hw, unsigned long drate, } /* Set PLL lock time. */ - writel_relaxed(rate->pdiv * PLL35XX_LOCK_FACTOR, + if (pll->type == pll_142xx) + writel_relaxed(rate->pdiv * PLL142XX_LOCK_FACTOR, + pll->lock_reg); + else + writel_relaxed(rate->pdiv * PLL35XX_LOCK_FACTOR, pll->lock_reg); /* Change PLL PMS values */ -- GitLab From e775e2a060d99180edc5366fb9f4299d0f07b66c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 28 Feb 2025 08:30:55 +0100 Subject: [PATCH 1296/1585] drm/xe/vm: Validate userptr during gpu vma prefetching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a userptr vma subject to prefetching was already invalidated or invalidated during the prefetch operation, the operation would repeatedly return -EAGAIN which would typically cause an infinite loop. Validate the userptr to ensure this doesn't happen. v2: - Don't fallthrough from UNMAP to PREFETCH (Matthew Brost) Fixes: 5bd24e78829a ("drm/xe/vm: Subclass userptr vmas") Fixes: 617eebb9c480 ("drm/xe: Fix array of binds") Cc: Matthew Brost Cc: # v6.9+ Suggested-by: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-2-thomas.hellstrom@linux.intel.com (cherry picked from commit 03c346d4d0d85d210d549d43c8cfb3dfb7f20e0a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 30259eba450b5..d2cd227e4d694 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2286,8 +2286,17 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, break; } case DRM_GPUVA_OP_UNMAP: + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + break; case DRM_GPUVA_OP_PREFETCH: - /* FIXME: Need to skip some prefetch ops */ + vma = gpuva_to_vma(op->base.prefetch.va); + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + if (err) + return err; + } + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); break; default: -- GitLab From 1414d95d5805b1dc221d22db9b8dc5287ef083bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 28 Feb 2025 08:30:56 +0100 Subject: [PATCH 1297/1585] drm/xe/vm: Fix a misplaced #endif MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a (harmless) misplaced #endif leading to declarations appearing multiple times. Fixes: 0eb2a18a8fad ("drm/xe: Implement VM snapshot support for BO's and userptr") Cc: Maarten Lankhorst Cc: José Roberto de Souza Cc: # v6.12+ Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-3-thomas.hellstrom@linux.intel.com (cherry picked from commit fcc20a4c752214b3e25632021c57d7d1d71ee1dd) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 23adb74428815..256a837c2704a 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -274,9 +274,9 @@ static inline void vm_dbg(const struct drm_device *dev, const char *format, ...) { /* noop */ } #endif -#endif struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); +#endif -- GitLab From 84211b1c0db6b9dbe0020fa97192fb9661617f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 28 Feb 2025 08:30:57 +0100 Subject: [PATCH 1298/1585] drm/xe: Fix fault mode invalidation with unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix fault mode invalidation racing with unbind leading to the PTE zapping potentially traversing an invalid page-table tree. Do this by holding the notifier lock across PTE zapping. This might transfer any contention waiting on the notifier seqlock read side to the notifier lock read side, but that shouldn't be a major problem. At the same time get rid of the open-coded invalidation in the bind code by relying on the notifier even when the vma bind is not yet committed. Finally let userptr invalidation call a dedicated xe_vm function performing a full invalidation. Fixes: e8babb280b5e ("drm/xe: Convert multiple bind ops into single job") Cc: Thomas Hellström Cc: Matthew Brost Cc: Matthew Auld Cc: # v6.12+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 100a5b8dadfca50d91d9a4c9fc01431b42a25cab) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 38 ++++---------- drivers/gpu/drm/xe/xe_vm.c | 85 +++++++++++++++++++++----------- drivers/gpu/drm/xe/xe_vm.h | 8 +++ drivers/gpu/drm/xe/xe_vm_types.h | 4 +- 4 files changed, 75 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 1ddcc7e79a93e..12a627a23eb45 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1213,42 +1213,22 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, return 0; uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; + if (xe_pt_userptr_inject_eagain(uvma)) + xe_vma_userptr_force_invalidate(uvma); - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; + notifier_seq = uvma->userptr.notifier_seq; if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) + notifier_seq)) return 0; - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm)) return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } + /* + * Just continue the operation since exec or rebind worker + * will take care of rebinding. + */ return 0; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d2cd227e4d694..d54aaa5eaff38 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -579,51 +579,26 @@ static void preempt_rebind_work_func(struct work_struct *w) trace_xe_vm_rebind_worker_exit(vm); } -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) { - struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); - struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); + struct xe_userptr *userptr = &uvma->userptr; struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; long err; - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - /* No need to stop gpu access if the userptr is not yet bound. */ - if (!userptr->initial_bind) { - up_write(&vm->userptr.notifier_lock); - return true; - } - /* * Tell exec and rebind worker they need to repin and rebind this * userptr. */ if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { spin_lock(&vm->userptr.invalidated_lock); list_move_tail(&userptr->invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); } - up_write(&vm->userptr.notifier_lock); - /* * Preempt fences turn into schedule disables, pipeline these. * Note that even in fault mode, we need to wait for binds and @@ -641,11 +616,35 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, false, MAX_SCHEDULE_TIMEOUT); XE_WARN_ON(err <= 0); - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { err = xe_vm_invalidate_vma(vma); XE_WARN_ON(err); } +} +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + + down_write(&vm->userptr.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + __vma_userptr_invalidate(vm, uvma); + up_write(&vm->userptr.notifier_lock); trace_xe_vma_userptr_invalidate_complete(vma); return true; @@ -655,6 +654,34 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { .invalidate = vma_userptr_invalidate, }; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +/** + * xe_vma_userptr_force_invalidate() - force invalidate a userptr + * @uvma: The userptr vma to invalidate + * + * Perform a forced userptr invalidation for testing purposes. + */ +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + /* Protect against concurrent userptr pinning */ + lockdep_assert_held(&vm->lock); + /* Protect against concurrent notifiers */ + lockdep_assert_held(&vm->userptr.notifier_lock); + /* + * Protect against concurrent instances of this function and + * the critical exec sections + */ + xe_vm_assert_held(vm); + + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + uvma->userptr.notifier_seq)) + uvma->userptr.notifier_seq -= 2; + __vma_userptr_invalidate(vm, uvma); +} +#endif + int xe_vm_userptr_pin(struct xe_vm *vm) { struct xe_userptr_vma *uvma, *next; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 256a837c2704a..b882bfb31bd05 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -279,4 +279,12 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); +#else +static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ +} +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 7f9a303e51d89..d2511819cdf43 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -227,8 +227,8 @@ struct xe_vm { * up for revalidation. Protected from access with the * @invalidated_lock. Removing items from the list * additionally requires @lock in write mode, and adding - * items to the list requires the @userptr.notifer_lock in - * write mode. + * items to the list requires either the @userptr.notifer_lock in + * write mode, OR @lock in write mode. */ struct list_head invalidated; } userptr; -- GitLab From ae482ec8cd1a85bde3307f71921a7780086fbec0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 28 Feb 2025 08:30:58 +0100 Subject: [PATCH 1299/1585] drm/xe: Add staging tree for VM binds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Concurrent VM bind staging and zapping of PTEs from a userptr notifier do not work because the view of PTEs is not stable. VM binds cannot acquire the notifier lock during staging, as memory allocations are required. To resolve this race condition, use a staging tree for VM binds that is committed only under the userptr notifier lock during the final step of the bind. This ensures a consistent view of the PTEs in the userptr notifier. A follow up may only use staging for VM in fault mode as this is the only mode in which the above race exists. v3: - Drop zap PTE change (Thomas) - s/xe_pt_entry/xe_pt_entry_staging (Thomas) Suggested-by: Thomas Hellström Cc: Fixes: e8babb280b5e ("drm/xe: Convert multiple bind ops into single job") Fixes: a708f6501c69 ("drm/xe: Update PT layer with better error handling") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-5-thomas.hellstrom@linux.intel.com Signed-off-by: Thomas Hellström (cherry picked from commit 6f39b0c5ef0385eae586760d10b9767168037aa5) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 58 +++++++++++++++++++++++---------- drivers/gpu/drm/xe/xe_pt_walk.c | 3 +- drivers/gpu/drm/xe/xe_pt_walk.h | 4 +++ 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 12a627a23eb45..dc24baa840924 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -28,6 +28,8 @@ struct xe_pt_dir { struct xe_pt pt; /** @children: Array of page-table child nodes */ struct xe_ptw *children[XE_PDES]; + /** @staging: Array of page-table staging nodes */ + struct xe_ptw *staging[XE_PDES]; }; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) @@ -48,9 +50,10 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) return container_of(pt, struct xe_pt_dir, pt); } -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) +static struct xe_pt * +xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index) { - return container_of(pt_dir->children[index], struct xe_pt, base); + return container_of(pt_dir->staging[index], struct xe_pt, base); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -125,6 +128,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, } pt->bo = bo; pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; + pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL; if (vm->xef) xe_drm_client_add_bo(vm->xef->client, pt->bo); @@ -206,8 +210,8 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, + if (xe_pt_entry_staging(pt_dir, i)) + xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags, deferred); } } @@ -376,8 +380,10 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, /* Continue building a non-connected subtree. */ struct iosys_map *map = &parent->bo->vmap; - if (unlikely(xe_child)) + if (unlikely(xe_child)) { parent->base.children[offset] = &xe_child->base; + parent->base.staging[offset] = &xe_child->base; + } xe_pt_write(xe_walk->vm->xe, map, offset, pte); parent->num_live++; @@ -614,6 +620,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_bind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .vm = xe_vma_vm(vma), .tile = tile, @@ -873,7 +880,7 @@ static void xe_pt_cancel_bind(struct xe_vma *vma, } } -static void xe_pt_commit_locks_assert(struct xe_vma *vma) +static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); @@ -885,6 +892,16 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) xe_vm_assert_held(vm); } +static void xe_pt_commit_locks_assert(struct xe_vma *vma) +{ + struct xe_vm *vm = xe_vma_vm(vma); + + xe_pt_commit_prepare_locks_assert(vma); + + if (xe_vma_is_userptr(vma)) + lockdep_assert_held_read(&vm->userptr.notifier_lock); +} + static void xe_pt_commit(struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 num_entries, struct llist_head *deferred) @@ -895,13 +912,17 @@ static void xe_pt_commit(struct xe_vma *vma, for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; + struct xe_pt_dir *pt_dir; if (!pt->level) continue; + pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { struct xe_pt *oldpte = entries[i].pt_entries[j].pt; + int j_ = j + entries[i].ofs; + pt_dir->children[j_] = pt_dir->staging[j_]; xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred); } } @@ -913,7 +934,7 @@ static void xe_pt_abort_bind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_pt *pt = entries[i].pt; @@ -928,10 +949,10 @@ static void xe_pt_abort_bind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = xe_pt_entry(pt_dir, j_); + struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_); struct xe_pt *oldpte = entries[i].pt_entries[j].pt; - pt_dir->children[j_] = oldpte ? &oldpte->base : 0; + pt_dir->staging[j_] = oldpte ? &oldpte->base : 0; xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL); } } @@ -943,7 +964,7 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, { u32 i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; @@ -961,10 +982,10 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, struct xe_pt *newpte = entries[i].pt_entries[j].pt; struct xe_pt *oldpte = NULL; - if (xe_pt_entry(pt_dir, j_)) - oldpte = xe_pt_entry(pt_dir, j_); + if (xe_pt_entry_staging(pt_dir, j_)) + oldpte = xe_pt_entry_staging(pt_dir, j_); - pt_dir->children[j_] = &newpte->base; + pt_dir->staging[j_] = &newpte->base; entries[i].pt_entries[j].pt = oldpte; } } @@ -1494,6 +1515,7 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_unbind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .tile = tile, .modified_start = xe_vma_start(vma), @@ -1535,7 +1557,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1548,7 +1570,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, continue; for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) - pt_dir->children[j] = + pt_dir->staging[j] = entries[i].pt_entries[j - entry->ofs].pt ? &entries[i].pt_entries[j - entry->ofs].pt->base : NULL; } @@ -1561,7 +1583,7 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; ++i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1575,8 +1597,8 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) { entry->pt_entries[j - entry->ofs].pt = - xe_pt_entry(pt_dir, j); - pt_dir->children[j] = NULL; + xe_pt_entry_staging(pt_dir, j); + pt_dir->staging[j] = NULL; } } } diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c index b8b3d2aea4923..be602a763ff32 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.c +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -74,7 +74,8 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, u64 addr, u64 end, struct xe_pt_walk *walk) { pgoff_t offset = xe_pt_offset(addr, level, walk); - struct xe_ptw **entries = parent->children ? parent->children : NULL; + struct xe_ptw **entries = walk->staging ? (parent->staging ?: NULL) : + (parent->children ?: NULL); const struct xe_pt_walk_ops *ops = walk->ops; enum page_walk_action action; struct xe_ptw *child; diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h index 5ecc4d2f0f653..5c02c244f7de3 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.h +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -11,12 +11,14 @@ /** * struct xe_ptw - base class for driver pagetable subclassing. * @children: Pointer to an array of children if any. + * @staging: Pointer to an array of staging if any. * * Drivers could subclass this, and if it's a page-directory, typically * embed an array of xe_ptw pointers. */ struct xe_ptw { struct xe_ptw **children; + struct xe_ptw **staging; }; /** @@ -41,6 +43,8 @@ struct xe_pt_walk { * as shared pagetables. */ bool shared_pt_mode; + /** @staging: Walk staging PT structure */ + bool staging; }; /** -- GitLab From e3e2e7fc4cd8414c9a966ef1b344db543f8614f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Mar 2025 18:33:40 +0100 Subject: [PATCH 1300/1585] drm/xe/hmm: Style- and include fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add proper #ifndef around the xe_hmm.h header, proper spacing and since the documentation mostly follows kerneldoc format, make it kerneldoc. Also prepare for upcoming -stable fixes. Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Cc: Oak Zeng Cc: # v6.10+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250304173342.22009-2-thomas.hellstrom@linux.intel.com (cherry picked from commit bbe2b06b55bc061c8fcec034ed26e88287f39143) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 9 +++------ drivers/gpu/drm/xe/xe_hmm.h | 5 +++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 2e4ae61567d8d..6ddcf88d8a393 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -19,11 +19,10 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end) return (end - start) >> PAGE_SHIFT; } -/* +/** * xe_mark_range_accessed() - mark a range is accessed, so core mm * have such information for memory eviction or write back to * hard disk - * * @range: the range to mark * @write: if write to this range, we mark pages in this range * as dirty @@ -43,11 +42,10 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) } } -/* +/** * xe_build_sg() - build a scatter gather table for all the physical pages/pfn * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table * and will be used to program GPU page table later. - * * @xe: the xe device who will access the dma-address in sg table * @range: the hmm range that we build the sg table from. range->hmm_pfns[] * has the pfn numbers of pages that back up this hmm address range. @@ -112,9 +110,8 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, return ret; } -/* +/** * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr - * * @uvma: the userptr vma which hold the scatter gather table * * With function xe_userptr_populate_range, we allocate storage of diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h index 909dc2bdcd97e..9602cb7d976dd 100644 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -3,9 +3,14 @@ * Copyright © 2024 Intel Corporation */ +#ifndef _XE_HMM_H_ +#define _XE_HMM_H_ + #include struct xe_userptr_vma; int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); + void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); +#endif -- GitLab From 0a98219bcc961edd3388960576e4353e123b4a51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Mar 2025 18:33:41 +0100 Subject: [PATCH 1301/1585] drm/xe/hmm: Don't dereference struct page pointers without notifier lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pnfs that we obtain from hmm_range_fault() point to pages that we don't have a reference on, and the guarantee that they are still in the cpu page-tables is that the notifier lock must be held and the notifier seqno is still valid. So while building the sg table and marking the pages accesses / dirty we need to hold this lock with a validated seqno. However, the lock is reclaim tainted which makes sg_alloc_table_from_pages_segment() unusable, since it internally allocates memory. Instead build the sg-table manually. For the non-iommu case this might lead to fewer coalesces, but if that's a problem it can be fixed up later in the resource cursor code. For the iommu case, the whole sg-table may still be coalesced to a single contigous device va region. This avoids marking pages that we don't own dirty and accessed, and it also avoid dereferencing struct pages that we don't own. v2: - Use assert to check whether hmm pfns are valid (Matthew Auld) - Take into account that large pages may cross range boundaries (Matthew Auld) v3: - Don't unnecessarily check for a non-freed sg-table. (Matthew Auld) - Add a missing up_read() in an error path. (Matthew Auld) Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Cc: Oak Zeng Cc: # v6.10+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250304173342.22009-3-thomas.hellstrom@linux.intel.com (cherry picked from commit ea3e66d280ce2576664a862693d1da8fd324c317) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 112 +++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 6ddcf88d8a393..be284b852307e 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -42,6 +42,42 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) } } +static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, + struct hmm_range *range, struct rw_semaphore *notifier_sem) +{ + unsigned long i, npages, hmm_pfn; + unsigned long num_chunks = 0; + int ret; + + /* HMM docs says this is needed. */ + ret = down_read_interruptible(notifier_sem); + if (ret) + return ret; + + if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { + up_read(notifier_sem); + return -EAGAIN; + } + + npages = xe_npages_in_range(range->start, range->end); + for (i = 0; i < npages;) { + unsigned long len; + + hmm_pfn = range->hmm_pfns[i]; + xe_assert(xe, hmm_pfn & HMM_PFN_VALID); + + len = 1UL << hmm_pfn_to_map_order(hmm_pfn); + + /* If order > 0 the page may extend beyond range->start */ + len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); + i += len; + num_chunks++; + } + up_read(notifier_sem); + + return sg_alloc_table(st, num_chunks, GFP_KERNEL); +} + /** * xe_build_sg() - build a scatter gather table for all the physical pages/pfn * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table @@ -50,6 +86,7 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) * @range: the hmm range that we build the sg table from. range->hmm_pfns[] * has the pfn numbers of pages that back up this hmm address range. * @st: pointer to the sg table. + * @notifier_sem: The xe notifier lock. * @write: whether we write to this range. This decides dma map direction * for system pages. If write we map it bi-diretional; otherwise * DMA_TO_DEVICE @@ -76,38 +113,41 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) * Returns 0 if successful; -ENOMEM if fails to allocate memory */ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, - struct sg_table *st, bool write) + struct sg_table *st, + struct rw_semaphore *notifier_sem, + bool write) { + unsigned long npages = xe_npages_in_range(range->start, range->end); struct device *dev = xe->drm.dev; - struct page **pages; - u64 i, npages; - int ret; + struct scatterlist *sgl; + struct page *page; + unsigned long i, j; - npages = xe_npages_in_range(range->start, range->end); - pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; + lockdep_assert_held(notifier_sem); - for (i = 0; i < npages; i++) { - pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); - xe_assert(xe, !is_device_private_page(pages[i])); - } + i = 0; + for_each_sg(st->sgl, sgl, st->nents, j) { + unsigned long hmm_pfn, size; - ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT, - xe_sg_segment_size(dev), GFP_KERNEL); - if (ret) - goto free_pages; + hmm_pfn = range->hmm_pfns[i]; + page = hmm_pfn_to_page(hmm_pfn); + xe_assert(xe, !is_device_private_page(page)); + + size = 1UL << hmm_pfn_to_map_order(hmm_pfn); + size -= page_to_pfn(page) & (size - 1); + i += size; - ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); - if (ret) { - sg_free_table(st); - st = NULL; + if (unlikely(j == st->nents - 1)) { + if (i > npages) + size -= (i - npages); + sg_mark_end(sgl); + } + sg_set_page(sgl, page, size << PAGE_SHIFT, 0); } + xe_assert(xe, i == npages); -free_pages: - kvfree(pages); - return ret; + return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); } /** @@ -237,16 +277,36 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, if (ret) goto free_pfns; - ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write); + ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); if (ret) goto free_pfns; + ret = down_read_interruptible(&vm->userptr.notifier_lock); + if (ret) + goto free_st; + + if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, + &vm->userptr.notifier_lock, write); + if (ret) + goto out_unlock; + xe_mark_range_accessed(&hmm_range, write); userptr->sg = &userptr->sgt; userptr->notifier_seq = hmm_range.notifier_seq; + up_read(&vm->userptr.notifier_lock); + kvfree(pfns); + return 0; +out_unlock: + up_read(&vm->userptr.notifier_lock); +free_st: + sg_free_table(&userptr->sgt); free_pfns: kvfree(pfns); return ret; } - -- GitLab From 333b8906336174478efbbfc1e24a89e3397ffe65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Mar 2025 18:33:42 +0100 Subject: [PATCH 1302/1585] drm/xe/userptr: Unmap userptrs in the mmu notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If userptr pages are freed after a call to the xe mmu notifier, the device will not be blocked out from theoretically accessing these pages unless they are also unmapped from the iommu, and this violates some aspects of the iommu-imposed security. Ensure that userptrs are unmapped in the mmu notifier to mitigate this. A naive attempt would try to free the sg table, but the sg table itself may be accessed by a concurrent bind operation, so settle for only unmapping. v3: - Update lockdep asserts. - Fix a typo (Matthew Auld) Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Cc: Oak Zeng Cc: Matthew Auld Cc: # v6.10+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250304173342.22009-4-thomas.hellstrom@linux.intel.com (cherry picked from commit ba767b9d01a2c552d76cf6f46b125d50ec4147a6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 51 ++++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_hmm.h | 2 ++ drivers/gpu/drm/xe/xe_vm.c | 4 +++ drivers/gpu/drm/xe/xe_vm_types.h | 4 +++ 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index be284b852307e..392102515f3d8 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -150,6 +150,45 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); } +static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->userptr.notifier_lock); + + mutex_lock(&userptr->unmap_mutex); + xe_assert(vm->xe, !userptr->mapped); + userptr->mapped = true; + mutex_unlock(&userptr->unmap_mutex); +} + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + bool write = !xe_vma_read_only(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + + if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && + !lockdep_is_held_type(&vm->lock, 0) && + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { + /* Don't unmap in exec critical section. */ + xe_vm_assert_held(vm); + /* Don't unmap while mapping the sg. */ + lockdep_assert_held(&vm->lock); + } + + mutex_lock(&userptr->unmap_mutex); + if (userptr->sg && userptr->mapped) + dma_unmap_sgtable(xe->drm.dev, userptr->sg, + write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + userptr->mapped = false; + mutex_unlock(&userptr->unmap_mutex); +} + /** * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr * @uvma: the userptr vma which hold the scatter gather table @@ -161,16 +200,9 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) { struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - bool write = !xe_vma_read_only(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - struct device *dev = xe->drm.dev; - - xe_assert(xe, userptr->sg); - dma_unmap_sgtable(dev, userptr->sg, - write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); + xe_hmm_userptr_unmap(uvma); sg_free_table(userptr->sg); userptr->sg = NULL; } @@ -297,6 +329,7 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, xe_mark_range_accessed(&hmm_range, write); userptr->sg = &userptr->sgt; + xe_hmm_userptr_set_mapped(uvma); userptr->notifier_seq = hmm_range.notifier_seq; up_read(&vm->userptr.notifier_lock); kvfree(pfns); diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h index 9602cb7d976dd..0ea98d8e7bbc7 100644 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -13,4 +13,6 @@ struct xe_userptr_vma; int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d54aaa5eaff38..ec6ec18ab3faa 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -620,6 +620,8 @@ static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uv err = xe_vm_invalidate_vma(vma); XE_WARN_ON(err); } + + xe_hmm_userptr_unmap(uvma); } static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, @@ -1039,6 +1041,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, INIT_LIST_HEAD(&userptr->invalidate_link); INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; + mutex_init(&userptr->unmap_mutex); err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, @@ -1080,6 +1083,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) * them anymore */ mmu_interval_notifier_remove(&userptr->notifier); + mutex_destroy(&userptr->unmap_mutex); xe_vm_put(vm); } else if (xe_vma_is_null(vma)) { xe_vm_put(vm); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index d2511819cdf43..a4b4091cfd0da 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -59,12 +59,16 @@ struct xe_userptr { struct sg_table *sg; /** @notifier_seq: notifier sequence number */ unsigned long notifier_seq; + /** @unmap_mutex: Mutex protecting dma-unmapping */ + struct mutex unmap_mutex; /** * @initial_bind: user pointer has been bound at least once. * write: vm->userptr.notifier_lock in read mode and vm->resv held. * read: vm->userptr.notifier_lock in write mode or vm->resv held. */ bool initial_bind; + /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */ + bool mapped; #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) u32 divisor; #endif -- GitLab From bcb0fda3c2da9fe4721d3e73d80e778c038e7d27 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Mar 2025 14:03:34 -0700 Subject: [PATCH 1303/1585] io_uring/rw: ensure reissue path is correctly handled for IOPOLL The IOPOLL path posts CQEs when the io_kiocb is marked as completed, so it cannot rely on the usual retry that non-IOPOLL requests do for read/write requests. If -EAGAIN is received and the request should be retried, go through the normal completion path and let the normal flush logic catch it and reissue it, like what is done for !IOPOLL reads or writes. Fixes: d803d123948f ("io_uring/rw: handle -EAGAIN retry at IO completion time") Reported-by: John Garry Link: https://lore.kernel.org/io-uring/2b43ccfa-644d-4a09-8f8f-39ad71810f41@oracle.com/ Signed-off-by: Jens Axboe --- io_uring/rw.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 9edc6baebd01c..e5528cebcd066 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -560,11 +560,10 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); if (unlikely(res != req->cqe.res)) { - if (res == -EAGAIN && io_rw_should_reissue(req)) { + if (res == -EAGAIN && io_rw_should_reissue(req)) req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; - return; - } - req->cqe.res = res; + else + req->cqe.res = res; } /* order with io_iopoll_complete() checking ->iopoll_completed */ -- GitLab From df08c94baafb001de6cf44bb7098bb557f36c335 Mon Sep 17 00:00:00 2001 From: Nicklas Bo Jensen Date: Thu, 27 Feb 2025 13:32:34 +0000 Subject: [PATCH 1304/1585] netfilter: nf_conncount: garbage collection is not skipped when jiffies wrap around nf_conncount is supposed to skip garbage collection if it has already run garbage collection in the same jiffy. Unfortunately, this is broken when jiffies wrap around which this patch fixes. The problem is that last_gc in the nf_conncount_list struct is an u32, but jiffies is an unsigned long which is 8 bytes on my systems. When those two are compared it only works until last_gc wraps around. See bug report: https://bugzilla.netfilter.org/show_bug.cgi?id=1778 for more details. Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC") Signed-off-by: Nicklas Bo Jensen Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 4890af4dc263f..ebe38ed2e6f4f 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net, struct nf_conn *found_ct; unsigned int collect = 0; - if (time_is_after_eq_jiffies((unsigned long)list->last_gc)) + if ((u32)jiffies == list->last_gc) goto add_new_node; /* check the saved connections */ @@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net, bool ret = false; /* don't bother if we just did GC */ - if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc))) + if ((u32)jiffies == READ_ONCE(list->last_gc)) return false; /* don't bother if other cpu is already doing GC */ -- GitLab From 374908a15af4cd60862ebc51a6e012ace2212c76 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 3 Mar 2025 18:10:30 +0100 Subject: [PATCH 1305/1585] rust: remove leftover mentions of the `alloc` crate In commit 392e34b6bc22 ("kbuild: rust: remove the `alloc` crate and `GlobalAlloc`") we stopped using the upstream `alloc` crate. Thus remove a few leftover mentions treewide. Cc: stable@vger.kernel.org # Also to 6.12.y after the `alloc` backport lands Fixes: 392e34b6bc22 ("kbuild: rust: remove the `alloc` crate and `GlobalAlloc`") Reviewed-by: Danilo Krummrich Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250303171030.1081134-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Documentation/rust/quick-start.rst | 2 +- rust/kernel/lib.rs | 2 +- scripts/rustdoc_test_gen.rs | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst index 4aa50e5fcb8c0..6d2607870ba44 100644 --- a/Documentation/rust/quick-start.rst +++ b/Documentation/rust/quick-start.rst @@ -145,7 +145,7 @@ Rust standard library source **************************** The Rust standard library source is required because the build system will -cross-compile ``core`` and ``alloc``. +cross-compile ``core``. If ``rustup`` is being used, run:: diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 398242f92a961..7697c60b2d1a6 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -6,7 +6,7 @@ //! usage by Rust code in the kernel and is shared by all of them. //! //! In other words, all the rest of the Rust code in the kernel (e.g. kernel -//! modules written in Rust) depends on [`core`], [`alloc`] and this crate. +//! modules written in Rust) depends on [`core`] and this crate. //! //! If you need a kernel C API that is not ported or wrapped yet here, then //! do so first instead of bypassing this crate. diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index 5ebd42ae4a3fd..76aaa8329413d 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -15,8 +15,8 @@ //! - Test code should be able to define functions and call them, without having to carry //! the context. //! -//! - Later on, we may want to be able to test non-kernel code (e.g. `core`, `alloc` or -//! third-party crates) which likely use the standard library `assert*!` macros. +//! - Later on, we may want to be able to test non-kernel code (e.g. `core` or third-party +//! crates) which likely use the standard library `assert*!` macros. //! //! For this reason, instead of the passed context, `kunit_get_current_test()` is used instead //! (i.e. `current->kunit_test`). -- GitLab From df27cef153603b18a7d094b53cc3d5264ff32797 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 5 Mar 2025 13:29:01 +0000 Subject: [PATCH 1306/1585] rust: init: fix `Zeroable` implementation for `Option>` and `Option>` According to [1], `NonNull` and `#[repr(transparent)]` wrapper types such as our custom `KBox` have the null pointer optimization only if `T: Sized`. Thus remove the `Zeroable` implementation for the unsized case. Link: https://doc.rust-lang.org/stable/std/option/index.html#representation [1] Reported-by: Alice Ryhl Closes: https://lore.kernel.org/rust-for-linux/CAH5fLghL+qzrD8KiCF1V3vf2YcC6aWySzkmaE2Zzrnh1gKj-hw@mail.gmail.com/ Cc: stable@vger.kernel.org # v6.12+ (a custom patch will be needed for 6.6.y) Fixes: 38cde0bd7b67 ("rust: init: add `Zeroable` trait and `init::zeroed` function") Signed-off-by: Benno Lossin Reviewed-by: Alice Ryhl Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250305132836.2145476-1-benno.lossin@proton.me [ Added Closes tag and moved up the Reported-by one. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/init.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 7fd1ea8265a55..8bbd5e3398fcb 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -1418,17 +1418,14 @@ impl_zeroable! { // SAFETY: `T: Zeroable` and `UnsafeCell` is `repr(transparent)`. {} UnsafeCell, - // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee). + // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee: + // https://doc.rust-lang.org/stable/std/option/index.html#representation). Option, Option, Option, Option, Option, Option, Option, Option, Option, Option, Option, Option, - - // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee). - // - // In this case we are allowed to use `T: ?Sized`, since all zeros is the `None` variant. - {} Option>, - {} Option>, + {} Option>, + {} Option>, // SAFETY: `null` pointer is valid. // -- GitLab From ff64846bee0e7e3e7bc9363ebad3bab42dd27e24 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Thu, 13 Feb 2025 06:34:18 -0500 Subject: [PATCH 1307/1585] rust: alloc: satisfy POSIX alignment requirement ISO C's `aligned_alloc` is partially implementation-defined; on some systems it inherits stricter requirements from POSIX's `posix_memalign`. This causes the call added in commit dd09538fb409 ("rust: alloc: implement `Cmalloc` in module allocator_test") to fail on macOS because it doesn't meet the requirements of `posix_memalign`. Adjust the call to meet the POSIX requirement and add a comment. This fixes failures in `make rusttest` on macOS. Acked-by: Danilo Krummrich Cc: stable@vger.kernel.org Fixes: dd09538fb409 ("rust: alloc: implement `Cmalloc` in module allocator_test") Signed-off-by: Tamir Duberstein Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20250213-aligned-alloc-v7-1-d2a2d0be164b@gmail.com [ Added Cc: stable. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/allocator_test.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index e3240d16040bd..c37d4c0c64e9f 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -62,6 +62,24 @@ unsafe impl Allocator for Cmalloc { )); } + // ISO C (ISO/IEC 9899:2011) defines `aligned_alloc`: + // + // > The value of alignment shall be a valid alignment supported by the implementation + // [...]. + // + // As an example of the "supported by the implementation" requirement, POSIX.1-2001 (IEEE + // 1003.1-2001) defines `posix_memalign`: + // + // > The value of alignment shall be a power of two multiple of sizeof (void *). + // + // and POSIX-based implementations of `aligned_alloc` inherit this requirement. At the time + // of writing, this is known to be the case on macOS (but not in glibc). + // + // Satisfy the stricter requirement to avoid spurious test failures on some platforms. + let min_align = core::mem::size_of::<*const crate::ffi::c_void>(); + let layout = layout.align_to(min_align).map_err(|_| AllocError)?; + let layout = layout.pad_to_align(); + // SAFETY: Returns either NULL or a pointer to a memory allocation that satisfies or // exceeds the given size and alignment requirements. let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) } as *mut u8; -- GitLab From f2e413f00ebec10a8725b1b75b1b523c561bd403 Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:52 +0000 Subject: [PATCH 1308/1585] rust: docs: add missing newline to printing macro examples Fix adding a newline at the end of the usage of pr_info! in the documentation Fixes: e3c3d34507c7 ("docs: rust: Add description of Rust documentation test as KUnit ones") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Reviewed-by: David Gow Link: https://lore.kernel.org/r/20250206-printing_fix-v3-1-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. - Miguel ] Signed-off-by: Miguel Ojeda --- Documentation/rust/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/rust/testing.rst b/Documentation/rust/testing.rst index 568b71b415a45..180b886e0f1ee 100644 --- a/Documentation/rust/testing.rst +++ b/Documentation/rust/testing.rst @@ -97,7 +97,7 @@ operator are also supported as usual, e.g.: /// ``` /// # use kernel::{spawn_work_item, workqueue}; - /// spawn_work_item!(workqueue::system(), || pr_info!("x"))?; + /// spawn_work_item!(workqueue::system(), || pr_info!("x\n"))?; /// # Ok::<(), Error>(()) /// ``` -- GitLab From 6f5c36f56d475732981dcf624e0ac0cc7c8984c8 Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:53 +0000 Subject: [PATCH 1309/1585] rust: error: add missing newline to pr_warn! calls Added missing newline at the end of pr_warn! usage so the log is not missed. Fixes: 6551a7fe0acb ("rust: error: Add Error::from_errno{_unchecked}()") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Link: https://lore.kernel.org/r/20250206-printing_fix-v3-2-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index f6ecf09cb65f4..a194d83e6835c 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -107,7 +107,7 @@ impl Error { } else { // TODO: Make it a `WARN_ONCE` once available. crate::pr_warn!( - "attempted to create `Error` with out of range `errno`: {}", + "attempted to create `Error` with out of range `errno`: {}\n", errno ); code::EINVAL -- GitLab From ccc2f5a436fbb0ae1fb598932a9b8e48423c1959 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 4 Mar 2025 09:50:23 +0100 Subject: [PATCH 1310/1585] net: dsa: mt7530: Fix traffic flooding for MMIO devices On MMIO devices (e.g. MT7988 or EN7581) unicast traffic received on lanX port is flooded on all other user ports if the DSA switch is configured without VLAN support since PORT_MATRIX in PCR regs contains all user ports. Similar to MDIO devices (e.g. MT7530 and MT7531) fix the issue defining default VLAN-ID 0 for MT7530 MMIO devices. Fixes: 110c18bfed414 ("net: dsa: mt7530: introduce driver for MT7988 built-in switch") Signed-off-by: Lorenzo Bianconi Reviewed-by: Chester A. Unal Link: https://patch.msgid.link/20250304-mt7988-flooding-fix-v1-1-905523ae83e9@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 1c83af805209c..5883eb93efb11 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2591,7 +2591,8 @@ mt7531_setup_common(struct dsa_switch *ds) if (ret < 0) return ret; - return 0; + /* Setup VLAN ID 0 for VLAN-unaware bridges */ + return mt7530_setup_vlan0(priv); } static int @@ -2687,11 +2688,6 @@ mt7531_setup(struct dsa_switch *ds) if (ret) return ret; - /* Setup VLAN ID 0 for VLAN-unaware bridges */ - ret = mt7530_setup_vlan0(priv); - if (ret) - return ret; - ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; -- GitLab From 0a7565ee6ec31eb16c0476adbfc1af3f2271cb6b Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Thu, 13 Feb 2025 19:38:50 -0800 Subject: [PATCH 1311/1585] Revert "selftests/mm: remove local __NR_* definitions" This reverts commit a5c6bc590094a1a73cf6fa3f505e1945d2bf2461. The general approach described in commit e076eaca5906 ("selftests: break the dependency upon local header files") was taken one step too far here: it should not have been extended to include the syscall numbers. This is because doing so would require per-arch support in tools/include/uapi, and no such support exists. This revert fixes two separate reports of test failures, from Dave Hansen[1], and Li Wang[2]. An excerpt of Dave's report: Before this commit (a5c6bc590094a1a73cf6fa3f505e1945d2bf2461) things are fine. But after, I get: running PKEY tests for unsupported CPU/OS An excerpt of Li's report: I just found that mlock2_() return a wrong value in mlock2-test [1] https://lore.kernel.org/dc585017-6740-4cab-a536-b12b37a7582d@intel.com [2] https://lore.kernel.org/CAEemH2eW=UMu9+turT2jRie7+6ewUazXmA6kL+VBo3cGDGU6RA@mail.gmail.com Link: https://lkml.kernel.org/r/20250214033850.235171-1-jhubbard@nvidia.com Fixes: a5c6bc590094 ("selftests/mm: remove local __NR_* definitions") Signed-off-by: John Hubbard Cc: Dave Hansen Cc: Li Wang Cc: David Hildenbrand Cc: Jeff Xu Cc: Andrei Vagin Cc: Axel Rasmussen Cc: Christian Brauner Cc: Kees Cook Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Muhammad Usama Anjum Cc: Peter Xu Cc: Rich Felker Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/hugepage-mremap.c | 2 +- tools/testing/selftests/mm/ksm_functional_tests.c | 8 +++++++- tools/testing/selftests/mm/memfd_secret.c | 14 +++++++++++++- tools/testing/selftests/mm/mkdirty.c | 8 +++++++- tools/testing/selftests/mm/mlock2.h | 1 - tools/testing/selftests/mm/protection_keys.c | 2 +- tools/testing/selftests/mm/uffd-common.c | 4 ++++ tools/testing/selftests/mm/uffd-stress.c | 15 ++++++++++++++- tools/testing/selftests/mm/uffd-unit-tests.c | 14 +++++++++++++- 9 files changed, 60 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index ada9156cc497b..c463d1c09c9b4 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -15,7 +15,7 @@ #define _GNU_SOURCE #include #include -#include +#include #include #include #include /* Definition of O_* constants */ diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 66b4e111b5a27..b61803e36d1cf 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -369,6 +369,7 @@ static void test_unmerge_discarded(void) munmap(map, size); } +#ifdef __NR_userfaultfd static void test_unmerge_uffd_wp(void) { struct uffdio_writeprotect uffd_writeprotect; @@ -429,6 +430,7 @@ static void test_unmerge_uffd_wp(void) unmap: munmap(map, size); } +#endif /* Verify that KSM can be enabled / queried with prctl. */ static void test_prctl(void) @@ -684,7 +686,9 @@ int main(int argc, char **argv) exit(test_child_ksm()); } +#ifdef __NR_userfaultfd tests++; +#endif ksft_print_header(); ksft_set_plan(tests); @@ -696,7 +700,9 @@ int main(int argc, char **argv) test_unmerge(); test_unmerge_zero_pages(); test_unmerge_discarded(); +#ifdef __NR_userfaultfd test_unmerge_uffd_wp(); +#endif test_prot_none(); diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 74c911aa3aea9..9a0597310a765 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include @@ -28,6 +28,8 @@ #define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__) #define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__) +#ifdef __NR_memfd_secret + #define PATTERN 0x55 static const int prot = PROT_READ | PROT_WRITE; @@ -332,3 +334,13 @@ int main(int argc, char *argv[]) ksft_finished(); } + +#else /* __NR_memfd_secret */ + +int main(int argc, char *argv[]) +{ + printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_memfd_secret */ diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index af2fce496912b..09feeb4536460 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -9,7 +9,7 @@ */ #include #include -#include +#include #include #include #include @@ -265,6 +265,7 @@ static void test_pte_mapped_thp(void) munmap(mmap_mem, mmap_size); } +#ifdef __NR_userfaultfd static void test_uffdio_copy(void) { struct uffdio_register uffdio_register; @@ -322,6 +323,7 @@ static void test_uffdio_copy(void) munmap(dst, pagesize); free(src); } +#endif /* __NR_userfaultfd */ int main(void) { @@ -334,7 +336,9 @@ int main(void) thpsize / 1024); tests += 3; } +#ifdef __NR_userfaultfd tests += 1; +#endif /* __NR_userfaultfd */ ksft_print_header(); ksft_set_plan(tests); @@ -364,7 +368,9 @@ int main(void) if (thpsize) test_pte_mapped_thp(); /* Placing a fresh page via userfaultfd may set the PTE dirty. */ +#ifdef __NR_userfaultfd test_uffdio_copy(); +#endif /* __NR_userfaultfd */ err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 1e5731bab499a..4417eaa5cfb78 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -3,7 +3,6 @@ #include #include #include -#include static int mlock2_(void *start, size_t len, int flags) { diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index a4683f2476f27..35565af308af6 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 717539eddf987..7ad6ba660c7d6 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -673,7 +673,11 @@ int uffd_open_dev(unsigned int flags) int uffd_open_sys(unsigned int flags) { +#ifdef __NR_userfaultfd return syscall(__NR_userfaultfd, flags); +#else + return -1; +#endif } int uffd_open(unsigned int flags) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index a4b83280998ab..944d559ade21f 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -33,10 +33,11 @@ * pthread_mutex_lock will also verify the atomicity of the memory * transfer (UFFDIO_COPY). */ -#include + #include "uffd-common.h" uint64_t features; +#ifdef __NR_userfaultfd #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -471,3 +472,15 @@ int main(int argc, char **argv) nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 9ff71fa1f9bf0..74c8bc02b5063 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -5,11 +5,12 @@ * Copyright (C) 2015-2023 Red Hat, Inc. */ -#include #include "uffd-common.h" #include "../../../../mm/gup_test.h" +#ifdef __NR_userfaultfd + /* The unit test doesn't need a large or random size, make it 32MB for now */ #define UFFD_TEST_MEM_SIZE (32UL << 20) @@ -1558,3 +1559,14 @@ int main(int argc, char *argv[]) return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS; } +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("Skipping %s (missing __NR_userfaultfd)\n", __file__); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ -- GitLab From 349db086a66051bc6114b64b4446787c20ac3f00 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 17 Feb 2025 10:23:04 -0800 Subject: [PATCH 1312/1585] selftests/damon/damos_quota_goal: handle minimum quota that cannot be further reduced damos_quota_goal.py selftest see if DAMOS quota goals tuning feature increases or reduces the effective size quota for given score as expected. The tuning feature sets the minimum quota size as one byte, so if the effective size quota is already one, we cannot expect it further be reduced. However the test is not aware of the edge case, and fails since it shown no expected change of the effective quota. Handle the case by updating the failure logic for no change to see if it was the case, and simply skips to next test input. Link: https://lkml.kernel.org/r/20250217182304.45215-1-sj@kernel.org Fixes: f1c07c0a1662 ("selftests/damon: add a test for DAMOS quota goal") Signed-off-by: SeongJae Park Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202502171423.b28a918d-lkp@intel.com Cc: Shuah Khan Cc: [6.10.x] Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/damos_quota_goal.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py index 18246f3b62f7e..f76e0412b564c 100755 --- a/tools/testing/selftests/damon/damos_quota_goal.py +++ b/tools/testing/selftests/damon/damos_quota_goal.py @@ -63,6 +63,9 @@ def main(): if last_effective_bytes != 0 else -1.0)) if last_effective_bytes == goal.effective_bytes: + # effective quota was already minimum that cannot be more reduced + if expect_increase is False and last_effective_bytes == 1: + continue print('efective bytes not changed: %d' % goal.effective_bytes) exit(1) -- GitLab From 7277dd0a0ba4f8259f7abe37c4b7280fbfc2a182 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Tue, 18 Feb 2025 00:00:17 +0800 Subject: [PATCH 1313/1585] m68k: sun3: add check for __pgd_alloc() Add check for the return value of __pgd_alloc() in pgd_alloc() to prevent null pointer dereference. Link: https://lkml.kernel.org/r/20250217160017.2375536-1-haoxiang_li2024@163.com Fixes: a9b3c355c2e6 ("asm-generic: pgalloc: provide generic __pgd_{alloc,free}") Signed-off-by: Haoxiang Li Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Cc: Dave Hansen Cc: Kevin Brodsky Cc: Qi Zheng Cc: Sam Creasey Cc: Signed-off-by: Andrew Morton --- arch/m68k/include/asm/sun3_pgalloc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index f1ae4ed890db5..80afc3a187249 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -44,8 +44,10 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm) pgd_t *new_pgd; new_pgd = __pgd_alloc(mm, 0); - memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE); - memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT)); + if (likely(new_pgd != NULL)) { + memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE); + memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT)); + } return new_pgd; } -- GitLab From a564ccfe300fa6a065beda06ab7f3c140d6b4d63 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 17 Feb 2025 10:49:24 +0800 Subject: [PATCH 1314/1585] arm: pgtable: fix NULL pointer dereference issue When update_mmu_cache_range() is called by update_mmu_cache(), the vmf parameter is NULL, which will cause a NULL pointer dereference issue in adjust_pte(): Unable to handle kernel NULL pointer dereference at virtual address 00000030 when read Hardware name: Atmel AT91SAM9 PC is at update_mmu_cache_range+0x1e0/0x278 LR is at pte_offset_map_rw_nolock+0x18/0x2c Call trace: update_mmu_cache_range from remove_migration_pte+0x29c/0x2ec remove_migration_pte from rmap_walk_file+0xcc/0x130 rmap_walk_file from remove_migration_ptes+0x90/0xa4 remove_migration_ptes from migrate_pages_batch+0x6d4/0x858 migrate_pages_batch from migrate_pages+0x188/0x488 migrate_pages from compact_zone+0x56c/0x954 compact_zone from compact_node+0x90/0xf0 compact_node from kcompactd+0x1d4/0x204 kcompactd from kthread+0x120/0x12c kthread from ret_from_fork+0x14/0x38 Exception stack(0xc0d8bfb0 to 0xc0d8bff8) To fix it, do not rely on whether 'ptl' is equal to decide whether to hold the pte lock, but decide it by whether CONFIG_SPLIT_PTE_PTLOCKS is enabled. In addition, if two vmas map to the same PTE page, there is no need to hold the pte lock again, otherwise a deadlock will occur. Just add the need_lock parameter to let adjust_pte() know this information. Link: https://lkml.kernel.org/r/20250217024924.57996-1-zhengqi.arch@bytedance.com Fixes: fc9c45b71f43 ("arm: adjust_pte() use pte_offset_map_rw_nolock()") Signed-off-by: Qi Zheng Reported-by: Ezra Buehler Closes: https://lore.kernel.org/lkml/CAM1KZSmZ2T_riHvay+7cKEFxoPgeVpHkVFTzVVEQ1BO0cLkHEQ@mail.gmail.com/ Acked-by: David Hildenbrand Tested-by: Ezra Buehler Cc: Hugh Dickins Cc: Muchun Song Cc: Qi Zheng Cc: Russel King Cc: Ryan Roberts Cc: Signed-off-by: Andrew Morton --- arch/arm/mm/fault-armv.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 2bec87c3327d2..39fd5df733178 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -62,7 +62,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, } static int adjust_pte(struct vm_area_struct *vma, unsigned long address, - unsigned long pfn, struct vm_fault *vmf) + unsigned long pfn, bool need_lock) { spinlock_t *ptl; pgd_t *pgd; @@ -99,12 +99,11 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, if (!pte) return 0; - /* - * If we are using split PTE locks, then we need to take the page - * lock here. Otherwise we are using shared mm->page_table_lock - * which is already locked, thus cannot take it. - */ - if (ptl != vmf->ptl) { + if (need_lock) { + /* + * Use nested version here to indicate that we are already + * holding one similar spinlock. + */ spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) { pte_unmap_unlock(pte, ptl); @@ -114,7 +113,7 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, ret = do_adjust_pte(vma, address, pfn, pte); - if (ptl != vmf->ptl) + if (need_lock) spin_unlock(ptl); pte_unmap(pte); @@ -123,9 +122,10 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, static void make_coherent(struct address_space *mapping, struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, unsigned long pfn, - struct vm_fault *vmf) + unsigned long addr, pte_t *ptep, unsigned long pfn) { + const unsigned long pmd_start_addr = ALIGN_DOWN(addr, PMD_SIZE); + const unsigned long pmd_end_addr = pmd_start_addr + PMD_SIZE; struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; unsigned long offset; @@ -141,6 +141,14 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, */ flush_dcache_mmap_lock(mapping); vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { + /* + * If we are using split PTE locks, then we need to take the pte + * lock. Otherwise we are using shared mm->page_table_lock which + * is already locked, thus cannot take it. + */ + bool need_lock = IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS); + unsigned long mpnt_addr; + /* * If this VMA is not in our MM, we can ignore it. * Note that we intentionally mask out the VMA @@ -151,7 +159,12 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, if (!(mpnt->vm_flags & VM_MAYSHARE)) continue; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; - aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf); + mpnt_addr = mpnt->vm_start + offset; + + /* Avoid deadlocks by not grabbing the same PTE lock again. */ + if (mpnt_addr >= pmd_start_addr && mpnt_addr < pmd_end_addr) + need_lock = false; + aliases += adjust_pte(mpnt, mpnt_addr, pfn, need_lock); } flush_dcache_mmap_unlock(mapping); if (aliases) @@ -194,7 +207,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, __flush_dcache_folio(mapping, folio); if (mapping) { if (cache_is_vivt()) - make_coherent(mapping, vma, addr, ptep, pfn, vmf); + make_coherent(mapping, vma, addr, ptep, pfn); else if (vma->vm_flags & VM_EXEC) __flush_icache_all(); } -- GitLab From b81679b1633aa43c0d973adfa816d78c1ed0d032 Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Mon, 17 Feb 2025 09:43:27 +0800 Subject: [PATCH 1315/1585] mm: memory-failure: update ttu flag inside unmap_poisoned_folio Patch series "mm: memory_failure: unmap poisoned folio during migrate properly", v3. Fix two bugs during folio migration if the folio is poisoned. This patch (of 3): Commit 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON") introduce TTU_HWPOISON to replace TTU_IGNORE_HWPOISON in order to stop send SIGBUS signal when accessing an error page after a memory error on a clean folio. However during page migration, anon folio must be set with TTU_HWPOISON during unmap_*(). For pagecache we need some policy just like the one in hwpoison_user_mappings to set this flag. So move this policy from hwpoison_user_mappings to unmap_poisoned_folio to handle this warning properly. Warning will be produced during unamp poison folio with the following log: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 365 at mm/rmap.c:1847 try_to_unmap_one+0x8fc/0xd3c Modules linked in: CPU: 1 UID: 0 PID: 365 Comm: bash Tainted: G W 6.13.0-rc1-00018-gacdb4bbda7ab #42 Tainted: [W]=WARN Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : try_to_unmap_one+0x8fc/0xd3c lr : try_to_unmap_one+0x3dc/0xd3c Call trace: try_to_unmap_one+0x8fc/0xd3c (P) try_to_unmap_one+0x3dc/0xd3c (L) rmap_walk_anon+0xdc/0x1f8 rmap_walk+0x3c/0x58 try_to_unmap+0x88/0x90 unmap_poisoned_folio+0x30/0xa8 do_migrate_range+0x4a0/0x568 offline_pages+0x5a4/0x670 memory_block_action+0x17c/0x374 memory_subsys_offline+0x3c/0x78 device_offline+0xa4/0xd0 state_store+0x8c/0xf0 dev_attr_store+0x18/0x2c sysfs_kf_write+0x44/0x54 kernfs_fop_write_iter+0x118/0x1a8 vfs_write+0x3a8/0x4bc ksys_write+0x6c/0xf8 __arm64_sys_write+0x1c/0x28 invoke_syscall+0x44/0x100 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x30/0xd0 el0t_64_sync_handler+0xc8/0xcc el0t_64_sync+0x198/0x19c ---[ end trace 0000000000000000 ]--- [mawupeng1@huawei.com: unmap_poisoned_folio(): remove shadowed local `mapping', per Miaohe] Link: https://lkml.kernel.org/r/20250219060653.3849083-1-mawupeng1@huawei.com Link: https://lkml.kernel.org/r/20250217014329.3610326-1-mawupeng1@huawei.com Link: https://lkml.kernel.org/r/20250217014329.3610326-2-mawupeng1@huawei.com Fixes: 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON") Signed-off-by: Ma Wupeng Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Acked-by: Miaohe Lin Cc: Ma Wupeng Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/internal.h | 5 ++-- mm/memory-failure.c | 63 ++++++++++++++++++++++----------------------- mm/memory_hotplug.c | 3 ++- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 109ef30fee11f..20b3535935a31 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1115,7 +1115,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask) * mm/memory-failure.c */ #ifdef CONFIG_MEMORY_FAILURE -void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu); +int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill); void shake_folio(struct folio *folio); extern int hwpoison_filter(struct page *p); @@ -1138,8 +1138,9 @@ unsigned long page_mapped_in_vma(const struct page *page, struct vm_area_struct *vma); #else -static inline void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) +static inline int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) { + return -EBUSY; } #endif diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 995a15eb67e2c..327e02fdc029d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1556,11 +1556,35 @@ static int get_hwpoison_page(struct page *p, unsigned long flags) return ret; } -void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) +int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) { - if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) { - struct address_space *mapping; + enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; + struct address_space *mapping; + + if (folio_test_swapcache(folio)) { + pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); + ttu &= ~TTU_HWPOISON; + } + /* + * Propagate the dirty bit from PTEs to struct page first, because we + * need this to decide if we should kill or just drop the page. + * XXX: the dirty test could be racy: set_page_dirty() may not always + * be called inside page lock (it's recommended but not enforced). + */ + mapping = folio_mapping(folio); + if (!must_kill && !folio_test_dirty(folio) && mapping && + mapping_can_writeback(mapping)) { + if (folio_mkclean(folio)) { + folio_set_dirty(folio); + } else { + ttu &= ~TTU_HWPOISON; + pr_info("%#lx: corrupted page was clean: dropped without side effects\n", + pfn); + } + } + + if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) { /* * For hugetlb folios in shared mappings, try_to_unmap * could potentially call huge_pmd_unshare. Because of @@ -1572,7 +1596,7 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) if (!mapping) { pr_info("%#lx: could not lock mapping for mapped hugetlb folio\n", folio_pfn(folio)); - return; + return -EBUSY; } try_to_unmap(folio, ttu|TTU_RMAP_LOCKED); @@ -1580,6 +1604,8 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) } else { try_to_unmap(folio, ttu); } + + return folio_mapped(folio) ? -EBUSY : 0; } /* @@ -1589,8 +1615,6 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) static bool hwpoison_user_mappings(struct folio *folio, struct page *p, unsigned long pfn, int flags) { - enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; - struct address_space *mapping; LIST_HEAD(tokill); bool unmap_success; int forcekill; @@ -1613,29 +1637,6 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p, if (!folio_mapped(folio)) return true; - if (folio_test_swapcache(folio)) { - pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); - ttu &= ~TTU_HWPOISON; - } - - /* - * Propagate the dirty bit from PTEs to struct page first, because we - * need this to decide if we should kill or just drop the page. - * XXX: the dirty test could be racy: set_page_dirty() may not always - * be called inside page lock (it's recommended but not enforced). - */ - mapping = folio_mapping(folio); - if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping && - mapping_can_writeback(mapping)) { - if (folio_mkclean(folio)) { - folio_set_dirty(folio); - } else { - ttu &= ~TTU_HWPOISON; - pr_info("%#lx: corrupted page was clean: dropped without side effects\n", - pfn); - } - } - /* * First collect all the processes that have the page * mapped in dirty form. This has to be done before try_to_unmap, @@ -1643,9 +1644,7 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p, */ collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED); - unmap_poisoned_folio(folio, ttu); - - unmap_success = !folio_mapped(folio); + unmap_success = !unmap_poisoned_folio(folio, pfn, flags & MF_MUST_KILL); if (!unmap_success) pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n", pfn, folio_mapcount(folio)); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e3655f07dd6e3..e7e47838fd494 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1833,7 +1833,8 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (WARN_ON(folio_test_lru(folio))) folio_isolate_lru(folio); if (folio_mapped(folio)) - unmap_poisoned_folio(folio, TTU_IGNORE_MLOCK); + unmap_poisoned_folio(folio, pfn, false); + continue; } -- GitLab From 773b9a6aa6d38894b95088e3ed6f8a701d9f50fd Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Mon, 17 Feb 2025 09:43:28 +0800 Subject: [PATCH 1316/1585] mm: memory-hotplug: check folio ref count first in do_migrate_range If a folio has an increased reference count, folio_try_get() will acquire it, perform necessary operations, and then release it. In the case of a poisoned folio without an elevated reference count (which is unlikely for memory-failure), folio_try_get() will simply bypass it. Therefore, relocate the folio_try_get() function, responsible for checking and acquiring this reference count at first. Link: https://lkml.kernel.org/r/20250217014329.3610326-3-mawupeng1@huawei.com Signed-off-by: Ma Wupeng Acked-by: David Hildenbrand Acked-by: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/memory_hotplug.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e7e47838fd494..a6abd8d4a09c6 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1822,12 +1822,12 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (folio_test_large(folio)) pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; - /* - * HWPoison pages have elevated reference counts so the migration would - * fail on them. It also doesn't make any sense to migrate them in the - * first place. Still try to unmap such a page in case it is still mapped - * (keep the unmap as the catch all safety net). - */ + if (!folio_try_get(folio)) + continue; + + if (unlikely(page_folio(page) != folio)) + goto put_folio; + if (folio_test_hwpoison(folio) || (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { if (WARN_ON(folio_test_lru(folio))) @@ -1835,14 +1835,8 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (folio_mapped(folio)) unmap_poisoned_folio(folio, pfn, false); - continue; - } - - if (!folio_try_get(folio)) - continue; - - if (unlikely(page_folio(page) != folio)) goto put_folio; + } if (!isolate_folio_to_list(folio, &source)) { if (__ratelimit(&migrate_rs)) { -- GitLab From af288a426c3e3552b62595c6138ec6371a17dbba Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Mon, 17 Feb 2025 09:43:29 +0800 Subject: [PATCH 1317/1585] hwpoison, memory_hotplug: lock folio before unmap hwpoisoned folio Commit b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined) add page poison checks in do_migrate_range in order to make offline hwpoisoned page possible by introducing isolate_lru_page and try_to_unmap for hwpoisoned page. However folio lock must be held before calling try_to_unmap. Add it to fix this problem. Warning will be produced if folio is not locked during unmap: ------------[ cut here ]------------ kernel BUG at ./include/linux/swapops.h:400! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 4 UID: 0 PID: 411 Comm: bash Tainted: G W 6.13.0-rc1-00016-g3c434c7ee82a-dirty #41 Tainted: [W]=WARN Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : try_to_unmap_one+0xb08/0xd3c lr : try_to_unmap_one+0x3dc/0xd3c Call trace: try_to_unmap_one+0xb08/0xd3c (P) try_to_unmap_one+0x3dc/0xd3c (L) rmap_walk_anon+0xdc/0x1f8 rmap_walk+0x3c/0x58 try_to_unmap+0x88/0x90 unmap_poisoned_folio+0x30/0xa8 do_migrate_range+0x4a0/0x568 offline_pages+0x5a4/0x670 memory_block_action+0x17c/0x374 memory_subsys_offline+0x3c/0x78 device_offline+0xa4/0xd0 state_store+0x8c/0xf0 dev_attr_store+0x18/0x2c sysfs_kf_write+0x44/0x54 kernfs_fop_write_iter+0x118/0x1a8 vfs_write+0x3a8/0x4bc ksys_write+0x6c/0xf8 __arm64_sys_write+0x1c/0x28 invoke_syscall+0x44/0x100 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x30/0xd0 el0t_64_sync_handler+0xc8/0xcc el0t_64_sync+0x198/0x19c Code: f9407be0 b5fff320 d4210000 17ffff97 (d4210000) ---[ end trace 0000000000000000 ]--- Link: https://lkml.kernel.org/r/20250217014329.3610326-4-mawupeng1@huawei.com Fixes: b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined") Signed-off-by: Ma Wupeng Acked-by: David Hildenbrand Acked-by: Miaohe Lin Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/memory_hotplug.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a6abd8d4a09c6..16cf9e17077e3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1832,8 +1832,11 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { if (WARN_ON(folio_test_lru(folio))) folio_isolate_lru(folio); - if (folio_mapped(folio)) + if (folio_mapped(folio)) { + folio_lock(folio); unmap_poisoned_folio(folio, pfn, false); + folio_unlock(folio); + } goto put_folio; } -- GitLab From 04ec365e3fdf136ba5f9053b02fb6c3368a22e83 Mon Sep 17 00:00:00 2001 From: Ujwal Kundur Date: Sat, 15 Feb 2025 16:21:07 +0530 Subject: [PATCH 1318/1585] Documentation: fix doc link to fault-injection.rst Fix incorrect reference to fault-injection docs Link: https://lkml.kernel.org/r/20250215105106.734-1-ujwal.kundur@gmail.com Signed-off-by: Ujwal Kundur Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1af972a92d06f..35796c290ca35 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2103,7 +2103,7 @@ config FAIL_SKB_REALLOC reallocated, catching possible invalid pointers to the skb. For more information, check - Documentation/dev-tools/fault-injection/fault-injection.rst + Documentation/fault-injection/fault-injection.rst config FAULT_INJECTION_CONFIGFS bool "Configfs interface for fault-injection capabilities" -- GitLab From 19fac3c93991502a22c5132824c40b6a2e64b136 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 18 Feb 2025 10:14:11 +0100 Subject: [PATCH 1319/1585] dma: kmsan: export kmsan_handle_dma() for modules kmsan_handle_dma() is used by virtio_ring() which can be built as a module. kmsan_handle_dma() needs to be exported otherwise building the virtio_ring fails. Export kmsan_handle_dma for modules. Link: https://lkml.kernel.org/r/20250218091411.MMS3wBN9@linutronix.de Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502150634.qjxwSeJR-lkp@intel.com/ Fixes: 7ade4f10779c ("dma: kmsan: unpoison DMA mappings") Signed-off-by: Sebastian Andrzej Siewior Cc: Alexander Potapenko Cc: Dmitriy Vyukov Cc: Macro Elver Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton --- mm/kmsan/hooks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c index 3ea50f09311fd..3df45c25c1f62 100644 --- a/mm/kmsan/hooks.c +++ b/mm/kmsan/hooks.c @@ -357,6 +357,7 @@ void kmsan_handle_dma(struct page *page, size_t offset, size_t size, size -= to_go; } } +EXPORT_SYMBOL_GPL(kmsan_handle_dma); void kmsan_handle_dma_sg(struct scatterlist *sg, int nents, enum dma_data_direction dir) -- GitLab From c3e998398de48a7528842f05858a3a6bb21002e6 Mon Sep 17 00:00:00 2001 From: gao xu Date: Wed, 19 Feb 2025 01:56:28 +0000 Subject: [PATCH 1320/1585] mm: fix possible NULL pointer dereference in __swap_duplicate Add a NULL check on the return value of swp_swap_info in __swap_duplicate to prevent crashes caused by NULL pointer dereference. The reason why swp_swap_info() returns NULL is unclear; it may be due to CPU cache issues or DDR bit flips. The probability of this issue is very small - it has been observed to occur approximately 1 in 500,000 times per week. The stack info we encountered is as follows: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058 [RB/E]rb_sreason_str_set: sreason_str set null_pointer Mem abort info: ESR = 0x0000000096000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 39-bit VAs, pgdp=00000008a80e5000 [0000000000000058] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Skip md ftrace buffer dump for: 0x1609e0 ... pc : swap_duplicate+0x44/0x164 lr : copy_page_range+0x508/0x1e78 sp : ffffffc0f2a699e0 x29: ffffffc0f2a699e0 x28: ffffff8a5b28d388 x27: ffffff8b06603388 x26: ffffffdf7291fe70 x25: 0000000000000006 x24: 0000000000100073 x23: 00000000002d2d2f x22: 0000000000000008 x21: 0000000000000000 x20: 00000000002d2d2f x19: 18000000002d2d2f x18: ffffffdf726faec0 x17: 0000000000000000 x16: 0010000000000001 x15: 0040000000000001 x14: 0400000000000001 x13: ff7ffffffffffb7f x12: ffeffffffffffbff x11: ffffff8a5c7e1898 x10: 0000000000000018 x9 : 0000000000000006 x8 : 1800000000000000 x7 : 0000000000000000 x6 : ffffff8057c01f10 x5 : 000000000000a318 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000006daf200000 x1 : 0000000000000001 x0 : 18000000002d2d2f Call trace: swap_duplicate+0x44/0x164 copy_page_range+0x508/0x1e78 copy_process+0x1278/0x21cc kernel_clone+0x90/0x438 __arm64_sys_clone+0x5c/0x8c invoke_syscall+0x58/0x110 do_el0_svc+0x8c/0xe0 el0_svc+0x38/0x9c el0t_64_sync_handler+0x44/0xec el0t_64_sync+0x1a8/0x1ac Code: 9139c35a 71006f3f 54000568 f8797b55 (f9402ea8) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception SMP: stopping secondary CPUs The patch seems to only provide a workaround, but there are no more effective software solutions to handle the bit flips problem. This path will change the issue from a system crash to a process exception, thereby reducing the impact on the entire machine. akpm: this is probably a kernel bug, but this patch keeps the system running and doesn't reduce that bug's debuggability. Link: https://lkml.kernel.org/r/e223b0e6ba2f4924984b1917cc717bd5@honor.com Signed-off-by: gao xu Reviewed-by: Barry Song Cc: Suren Baghdasaryan Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/swapfile.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/swapfile.c b/mm/swapfile.c index ba19430dd4ead..fab99d67026aa 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3542,6 +3542,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr) int err, i; si = swp_swap_info(entry); + if (WARN_ON_ONCE(!si)) { + pr_err("%s%08lx\n", Bad_file, entry.val); + return -EINVAL; + } offset = swp_offset(entry); VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); -- GitLab From 67bab13307c83fb742c2556b06cdc39dbad27f07 Mon Sep 17 00:00:00 2001 From: Ge Yang Date: Wed, 19 Feb 2025 11:46:44 +0800 Subject: [PATCH 1321/1585] mm/hugetlb: wait for hugetlb folios to be freed Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing of huge pages if in non-task context"), which supports deferring the freeing of hugetlb pages, the allocation of contiguous memory through cma_alloc() may fail probabilistically. In the CMA allocation process, if it is found that the CMA area is occupied by in-use hugetlb folios, these in-use hugetlb folios need to be migrated to another location. When there are no available hugetlb folios in the free hugetlb pool during the migration of in-use hugetlb folios, new folios are allocated from the buddy system. A temporary state is set on the newly allocated folio. Upon completion of the hugetlb folio migration, the temporary state is transferred from the new folios to the old folios. Normally, when the old folios with the temporary state are freed, it is directly released back to the buddy system. However, due to the deferred freeing of hugetlb pages, the PageBuddy() check fails, ultimately leading to the failure of cma_alloc(). Here is a simplified call trace illustrating the process: cma_alloc() ->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios ->unmap_and_move_huge_page() ->folio_putback_hugetlb() // Free old folios ->test_pages_isolated() ->__test_page_isolated_in_pageblock() ->PageBuddy(page) // Check if the page is in buddy To resolve this issue, we have implemented a function named wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb folios are properly released back to the buddy system after their migration is completed. By invoking wait_for_freed_hugetlb_folios() before calling PageBuddy(), we ensure that PageBuddy() will succeed. Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.com Fixes: c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in non-task context") Signed-off-by: Ge Yang Reviewed-by: Muchun Song Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Barry Song <21cnbao@gmail.com> Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 5 +++++ mm/hugetlb.c | 8 ++++++++ mm/page_isolation.c | 10 ++++++++++ 3 files changed, 23 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ec8c0ccc8f959..dbe76d4f1bfc8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -682,6 +682,7 @@ struct huge_bootmem_page { int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); +void wait_for_freed_hugetlb_folios(void); struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, bool cow_from_owner); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, @@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn, return 0; } +static inline void wait_for_freed_hugetlb_folios(void) +{ +} + static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, bool cow_from_owner) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 163190e89ea16..811b29f77abf8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) return ret; } +void wait_for_freed_hugetlb_folios(void) +{ + if (llist_empty(&hpage_freelist)) + return; + + flush_work(&free_hpage_work); +} + typedef enum { /* * For either 0/1: we checked the per-vma resv map, and one resv diff --git a/mm/page_isolation.c b/mm/page_isolation.c index c608e9d728655..a051a29e95ad0 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -607,6 +607,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, struct zone *zone; int ret; + /* + * Due to the deferred freeing of hugetlb folios, the hugepage folios may + * not immediately release to the buddy system. This can cause PageBuddy() + * to fail in __test_page_isolated_in_pageblock(). To ensure that the + * hugetlb folios are properly released back to the buddy system, we + * invoke the wait_for_freed_hugetlb_folios() function to wait for the + * release to complete. + */ + wait_for_freed_hugetlb_folios(); + /* * Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free * pages are not aligned to pageblock_nr_pages. -- GitLab From 47b16d0462a460000b8f05dfb1292377ac48f3ca Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 22 Feb 2025 16:19:52 +0000 Subject: [PATCH 1322/1585] mm: abort vma_modify() on merge out of memory failure The remainder of vma_modify() relies upon the vmg state remaining pristine after a merge attempt. Usually this is the case, however in the one edge case scenario of a merge attempt failing not due to the specified range being unmergeable, but rather due to an out of memory error arising when attempting to commit the merge, this assumption becomes untrue. This results in vmg->start, end being modified, and thus the proceeding attempts to split the VMA will be done with invalid start/end values. Thankfully, it is likely practically impossible for us to hit this in reality, as it would require a maple tree node pre-allocation failure that would likely never happen due to it being 'too small to fail', i.e. the kernel would simply keep retrying reclaim until it succeeded. However, this scenario remains theoretically possible, and what we are doing here is wrong so we must correct it. The safest option is, when this scenario occurs, to simply give up the operation. If we cannot allocate memory to merge, then we cannot allocate memory to split either (perhaps moreso!). Any scenario where this would be happening would be under very extreme (likely fatal) memory pressure, so it's best we give up early. So there is no doubt it is appropriate to simply bail out in this scenario. However, in general we must if at all possible never assume VMG state is stable after a merge attempt, since merge operations update VMG fields. As a result, additionally also make this clear by storing start, end in local variables. The issue was reported originally by syzkaller, and by Brad Spengler (via an off-list discussion), and in both instances it manifested as a triggering of the assert: VM_WARN_ON_VMG(start >= end, vmg); In vma_merge_existing_range(). It seems at least one scenario in which this is occurring is one in which the merge being attempted is due to an madvise() across multiple VMAs which looks like this: start end |<------>| |----------|------| | vma | next | |----------|------| When madvise_walk_vmas() is invoked, we first find vma in the above (determining prev to be equal to vma as we are offset into vma), and then enter the loop. We determine the end of vma that forms part of the range we are madvise()'ing by setting 'tmp' to this value: /* Here vma->vm_start <= start < (end|vma->vm_end) */ tmp = vma->vm_end; We then invoke the madvise() operation via visit(), letting prev get updated to point to vma as part of the operation: /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ error = visit(vma, &prev, start, tmp, arg); Where the visit() function pointer in this instance is madvise_vma_behavior(). As observed in syzkaller reports, it is ultimately madvise_update_vma() that is invoked, calling vma_modify_flags_name() and vma_modify() in turn. Then, in vma_modify(), we attempt the merge: merged = vma_merge_existing_range(vmg); if (merged) return merged; We invoke this with vmg->start, end set to start, tmp as such: start tmp |<--->| |----------|------| | vma | next | |----------|------| We find ourselves in the merge right scenario, but the one in which we cannot remove the middle (we are offset into vma). Here we have a special case where vmg->start, end get set to perhaps unintuitive values - we intended to shrink the middle VMA and expand the next. This means vmg->start, end are set to... vma->vm_start, start. Now the commit_merge() fails, and vmg->start, end are left like this. This means we return to the rest of vma_modify() with vmg->start, end (here denoted as start', end') set as: start' end' |<-->| |----------|------| | vma | next | |----------|------| So we now erroneously try to split accordingly. This is where the unfortunate stuff begins. We start with: /* Split any preceding portion of the VMA. */ if (vma->vm_start < vmg->start) { ... } This doesn't trigger as we are no longer offset into vma at the start. But then we invoke: /* Split any trailing portion of the VMA. */ if (vma->vm_end > vmg->end) { ... } Which does get invoked. This leaves us with: start' end' |<-->| |----|-----|------| | vma| new | next | |----|-----|------| We then return ultimately to madvise_walk_vmas(). Here 'new' is unknown, and putting back the values known in this function we are faced with: start tmp end | | | |----|-----|------| | vma| new | next | |----|-----|------| prev Then: start = tmp; So: start end | | |----|-----|------| | vma| new | next | |----|-----|------| prev The following code does not cause anything to happen: if (prev && start < prev->vm_end) start = prev->vm_end; if (start >= end) break; And then we invoke: if (prev) vma = find_vma(mm, prev->vm_end); Which is where a problem occurs - we don't know about 'new' so we essentially look for the vma after prev, which is new, whereas we actually intended to discover next! So we end up with: start end | | |----|-----|------| |prev| vma | next | |----|-----|------| And we have successfully bypassed all of the checks madvise_walk_vmas() has to ensure early exit should we end up moving out of range. We loop around, and hit: /* Here vma->vm_start <= start < (end|vma->vm_end) */ tmp = vma->vm_end; Oh dear. Now we have: tmp start end | | |----|-----|------| |prev| vma | next | |----|-----|------| We then invoke: /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ error = visit(vma, &prev, start, tmp, arg); Where start == tmp. That is, a zero range. This is not good. We invoke visit() which is madvise_vma_behavior() which does not check the range (for good reason, it assumes all checks have been done before it was called), which in turn finally calls madvise_update_vma(). The madvise_update_vma() function calls vma_modify_flags_name() in turn, which ultimately invokes vma_modify() with... start == end. vma_modify() calls vma_merge_existing_range() and finally we hit: VM_WARN_ON_VMG(start >= end, vmg); Which triggers, as start == end. While it might be useful to add some CONFIG_DEBUG_VM asserts in these instances to catch this kind of error, since we have just eliminated any possibility of that happening, we will add such asserts separately as to reduce churn and aid backporting. Link: https://lkml.kernel.org/r/20250222161952.41957-1-lorenzo.stoakes@oracle.com Fixes: 2f1c6611b0a8 ("mm: introduce vma_merge_struct and abstract vma_merge(),vma_modify()") Signed-off-by: Lorenzo Stoakes Tested-by: Brad Spengler Reported-by: Brad Spengler Reported-by: syzbot+46423ed8fa1f1148c6e4@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/6774c98f.050a0220.25abdd.0991.GAE@google.com/ Cc: Jann Horn Cc: Liam Howlett Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/vma.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mm/vma.c b/mm/vma.c index af1d549b179c9..96bcb372c90e4 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -1509,24 +1509,28 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg) { struct vm_area_struct *vma = vmg->vma; + unsigned long start = vmg->start; + unsigned long end = vmg->end; struct vm_area_struct *merged; /* First, try to merge. */ merged = vma_merge_existing_range(vmg); if (merged) return merged; + if (vmg_nomem(vmg)) + return ERR_PTR(-ENOMEM); /* Split any preceding portion of the VMA. */ - if (vma->vm_start < vmg->start) { - int err = split_vma(vmg->vmi, vma, vmg->start, 1); + if (vma->vm_start < start) { + int err = split_vma(vmg->vmi, vma, start, 1); if (err) return ERR_PTR(err); } /* Split any trailing portion of the VMA. */ - if (vma->vm_end > vmg->end) { - int err = split_vma(vmg->vmi, vma, vmg->end, 0); + if (vma->vm_end > end) { + int err = split_vma(vmg->vmi, vma, end, 0); if (err) return ERR_PTR(err); -- GitLab From 51f271c1940fc9a5f77931ec603b457ea293bd56 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Mon, 24 Feb 2025 19:39:10 +0800 Subject: [PATCH 1323/1585] mm: swap: add back full cluster when no entry is reclaimed If no swap cache is reclaimed, cluster taken off from full_clusters list will not be put in any list and we can't reclaime HAS_CACHE slots efficiently. Do relocate_cluster for such cluster to avoid inefficiency. Link: https://lkml.kernel.org/r/20250224113910.522439-1-shikemeng@huaweicloud.com Fixes: 3b644773eefd ("mm, swap: reduce contention on device lock") Signed-off-by: Kemeng Shi Reviewed-by: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/swapfile.c b/mm/swapfile.c index fab99d67026aa..a6c41c7ffb037 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -858,6 +858,10 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) offset++; } + /* in case no swap cache is reclaimed */ + if (ci->flags == CLUSTER_FLAG_NONE) + relocate_cluster(si, ci); + unlock_cluster(ci); if (to_scan <= 0) break; -- GitLab From 7a2e7ae5d13658ada38898e5a3a8a40a7910db06 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:46 +0800 Subject: [PATCH 1324/1585] mm: swap: use correct step in loop to wait all clusters in wait_for_allocation() Use correct step in loop to wait all clusters in wait_for_allocation(). If we miss some cluster in wait_for_allocation(), use after free may occur as follows: shmem_writepage swapoff folio_alloc_swap get_swap_pages scan_swap_map_slots cluster_alloc_swap_entry alloc_swap_scan_cluster cluster_alloc_range /* SWP_WRITEOK is valid */ if (!(si->flags & SWP_WRITEOK)) ... del_from_avail_list(p, true); ... /* miss the cluster in shmem_writepage */ wait_for_allocation() ... try_to_unuse() memset(si->swap_map + start, usage, nr_pages); swap_range_alloc(si, nr_pages); ci->count += nr_pages; /* return a valid entry */ ... exit_swap_address_space(p->type); ... ... add_to_swap_cache /* dereference swap_address_space(entry) which is NULL */ xas_lock_irq(&xas); Link: https://lkml.kernel.org/r/20250222160850.505274-3-shikemeng@huaweicloud.com Fixes: 9a0ddeb79880 ("mm, swap: hold a reference during scan and cleanup flag usage") Signed-off-by: Kemeng Shi Reviewed-by: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index a6c41c7ffb037..6460b6cb36c99 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2645,7 +2645,6 @@ static void wait_for_allocation(struct swap_info_struct *si) for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) { ci = lock_cluster(si, offset); unlock_cluster(ci); - offset += SWAPFILE_CLUSTER; } } -- GitLab From 57d910cffaa0be981f558ff603e2d896b36b8241 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:47 +0800 Subject: [PATCH 1325/1585] mm, swap: avoid BUG_ON in relocate_cluster() If allocation is racy with swapoff, we may call free_cluster for cluster already in free list and trigger BUG_ON() as following: Allocation Swapoff cluster_alloc_swap_entry ... /* may get a free cluster with offset */ offset = xxx; if (offset) ci = lock_cluster(si, offset); ... del_from_avail_list(p, true); si->flags &= ~SWP_WRITEOK; alloc_swap_scan_cluster(si, ci, ...) ... /* failed to alloc entry from free entry */ if (!cluster_alloc_range(...)) break; ... /* add back a free cluster */ relocate_cluster(si, ci); if (!ci->count) free_cluster(si, ci); VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); To prevent the BUG_ON(), call free_cluster() for free cluster to move the cluster to tail of list. Check cluster is not free before calling free_cluster() in relocate_cluster() to avoid BUG_ON(). Link: https://lkml.kernel.org/r/20250222160850.505274-4-shikemeng@huaweicloud.com Fixes: 3b644773eefd ("mm, swap: reduce contention on device lock") Signed-off-by: Kemeng Shi Reviewed-by: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 6460b6cb36c99..df7c4e8b089ca 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -653,7 +653,8 @@ static void relocate_cluster(struct swap_info_struct *si, return; if (!ci->count) { - free_cluster(si, ci); + if (ci->flags != CLUSTER_FLAG_FREE) + free_cluster(si, ci); } else if (ci->count != SWAPFILE_CLUSTER) { if (ci->flags != CLUSTER_FLAG_FRAG) move_cluster(si, ci, &si->frag_clusters[ci->order], -- GitLab From ce6d9c1c2b5cc785016faa11b48b6cd317eb367e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 24 Feb 2025 21:20:02 -0500 Subject: [PATCH 1326/1585] NFS: fix nfs_release_folio() to not deadlock via kcompactd writeback Add PF_KCOMPACTD flag and current_is_kcompactd() helper to check for it so nfs_release_folio() can skip calling nfs_wb_folio() from kcompactd. Otherwise NFS can deadlock waiting for kcompactd enduced writeback which recurses back to NFS (which triggers writeback to NFSD via NFS loopback mount on the same host, NFSD blocks waiting for XFS's call to __filemap_get_folio): 6070.550357] INFO: task kcompactd0:58 blocked for more than 4435 seconds. {--- [58] "kcompactd0" [<0>] folio_wait_bit+0xe8/0x200 [<0>] folio_wait_writeback+0x2b/0x80 [<0>] nfs_wb_folio+0x80/0x1b0 [nfs] [<0>] nfs_release_folio+0x68/0x130 [nfs] [<0>] split_huge_page_to_list_to_order+0x362/0x840 [<0>] migrate_pages_batch+0x43d/0xb90 [<0>] migrate_pages_sync+0x9a/0x240 [<0>] migrate_pages+0x93c/0x9f0 [<0>] compact_zone+0x8e2/0x1030 [<0>] compact_node+0xdb/0x120 [<0>] kcompactd+0x121/0x2e0 [<0>] kthread+0xcf/0x100 [<0>] ret_from_fork+0x31/0x40 [<0>] ret_from_fork_asm+0x1a/0x30 ---} [akpm@linux-foundation.org: fix build] Link: https://lkml.kernel.org/r/20250225022002.26141-1-snitzer@kernel.org Fixes: 96780ca55e3c ("NFS: fix up nfs_release_folio() to try to release the page") Signed-off-by: Mike Snitzer Cc: Anna Schumaker Cc: Trond Myklebust Cc: Signed-off-by: Andrew Morton --- fs/nfs/file.c | 3 ++- include/linux/compaction.h | 5 +++++ include/linux/sched.h | 2 +- mm/compaction.c | 3 +++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1bb646752e466..033feeab8c346 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -457,7 +458,7 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp) /* If the private flag is set, then the folio is not freeable */ if (folio_test_private(folio)) { if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || - current_is_kswapd()) + current_is_kswapd() || current_is_kcompactd()) return false; if (nfs_wb_folio(folio->mapping->host, folio) < 0) return false; diff --git a/include/linux/compaction.h b/include/linux/compaction.h index e947764960496..7bf0c521db634 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -80,6 +80,11 @@ static inline unsigned long compact_gap(unsigned int order) return 2UL << order; } +static inline int current_is_kcompactd(void) +{ + return current->flags & PF_KCOMPACTD; +} + #ifdef CONFIG_COMPACTION extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); diff --git a/include/linux/sched.h b/include/linux/sched.h index 9632e3318e0d6..9c15365a30c08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1701,7 +1701,7 @@ extern struct pid *cad_pid; #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ -#define PF__HOLE__00010000 0x00010000 +#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ diff --git a/mm/compaction.c b/mm/compaction.c index 12ed8425fa175..a3203d97123ea 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -3181,6 +3181,7 @@ static int kcompactd(void *p) long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC); long timeout = default_timeout; + current->flags |= PF_KCOMPACTD; set_freezable(); pgdat->kcompactd_max_order = 0; @@ -3237,6 +3238,8 @@ static int kcompactd(void *p) pgdat->proactive_compact_trigger = false; } + current->flags &= ~PF_KCOMPACTD; + return 0; } -- GitLab From c29564d8b46f64f5e6e6f1c9c02f7761b7b90963 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Fri, 21 Feb 2025 15:16:25 +0800 Subject: [PATCH 1327/1585] include/linux/log2.h: mark is_power_of_2() with __always_inline When building kernel with randconfig, there is an error: In function `kvm_is_cr4_bit_set',inlined from `kvm_update_cpuid_runtime' at arch/x86/kvm/cpuid.c:310:9: include/linux/compiler_types.h:542:38: error: call to `__compiletime_assert_380' declared with attribute error: BUILD_BUG_ON failed: !is_power_of_2(cr4_bit). '!is_power_of_2(X86_CR4_OSXSAVE)' is False, but gcc treats is_power_of_2() as non-inline function and a compilation error happens. Fix this by marking is_power_of_2() with __always_inline. Link: https://lkml.kernel.org/r/20250221071624.1356899-1-suhui@nfschina.com Signed-off-by: Su Hui Cc: Binbin Wu Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Andrew Morton --- include/linux/log2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/log2.h b/include/linux/log2.h index 9f30d087a1281..1366cb688a6d9 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -41,7 +41,7 @@ int __ilog2_u64(u64 n) * *not* considered a power of two. * Return: true if @n is a power of 2, otherwise false. */ -static inline __attribute__((const)) +static __always_inline __attribute__((const)) bool is_power_of_2(unsigned long n) { return (n != 0 && ((n & (n - 1)) == 0)); -- GitLab From 1c684d77dfbcf926e0dd28f6d260e8fdd8a58e85 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Feb 2025 14:23:31 -0800 Subject: [PATCH 1328/1585] selftests/damon/damos_quota: make real expectation of quota exceeds Patch series "selftests/damon: three fixes for false results". Fix three DAMON selftest bugs that cause two and one false positive failures and successes. This patch (of 3): damos_quota.py assumes the quota will always exceeded. But whether quota will be exceeded or not depend on the monitoring results. Actually the monitored workload has chaning access pattern and hence sometimes the quota may not really be exceeded. As a result, false positive test failures happen. Expect how much time the quota will be exceeded by checking the monitoring results, and use it instead of the naive assumption. Link: https://lkml.kernel.org/r/20250225222333.505646-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250225222333.505646-2-sj@kernel.org Fixes: 51f58c9da14b ("selftests/damon: add a test for DAMOS quota") Signed-off-by: SeongJae Park Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/damos_quota.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py index 7d4c6bb2e3cd2..57c4937aaed28 100755 --- a/tools/testing/selftests/damon/damos_quota.py +++ b/tools/testing/selftests/damon/damos_quota.py @@ -51,16 +51,19 @@ def main(): nr_quota_exceeds = scheme.stats.qt_exceeds wss_collected.sort() + nr_expected_quota_exceeds = 0 for wss in wss_collected: if wss > sz_quota: print('quota is not kept: %s > %s' % (wss, sz_quota)) print('collected samples are as below') print('\n'.join(['%d' % wss for wss in wss_collected])) exit(1) + if wss == sz_quota: + nr_expected_quota_exceeds += 1 - if nr_quota_exceeds < len(wss_collected): - print('quota is not always exceeded: %d > %d' % - (len(wss_collected), nr_quota_exceeds)) + if nr_quota_exceeds < nr_expected_quota_exceeds: + print('quota is exceeded less than expected: %d < %d' % + (nr_quota_exceeds, nr_expected_quota_exceeds)) exit(1) if __name__ == '__main__': -- GitLab From 695469c07a65547acb6e229b3fdf6aaa881817e3 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Feb 2025 14:23:32 -0800 Subject: [PATCH 1329/1585] selftests/damon/damon_nr_regions: set ops update for merge results check to 100ms damon_nr_regions.py updates max_nr_regions to a number smaller than expected number of real regions and confirms DAMON respect the harsh limit. To give time for DAMON to make changes for the regions, 3 aggregation intervals (300 milliseconds) are given. The internal mechanism works with not only the max_nr_regions, but also sz_limit, though. It avoids merging region if that casn make region of size larger than sz_limit. In the test, sz_limit is set too small to achive the new max_nr_regions, unless it is updated for the new min_nr_regions. But the update is done only once per operations set update interval, which is one second by default. Hence, the test randomly incurs false positive failures. Fix it by setting the ops interval same to aggregation interval, to make sure sz_limit is updated by the time of the check. Link: https://lkml.kernel.org/r/20250225222333.505646-3-sj@kernel.org Fixes: 8bf890c81612 ("selftests/damon/damon_nr_regions: test online-tuned max_nr_regions") Signed-off-by: SeongJae Park Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/damon_nr_regions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py index 2e8a74aff5431..6f1c1d88e3091 100755 --- a/tools/testing/selftests/damon/damon_nr_regions.py +++ b/tools/testing/selftests/damon/damon_nr_regions.py @@ -109,6 +109,7 @@ def main(): attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs attrs.min_nr_regions = 3 attrs.max_nr_regions = 7 + attrs.update_us = 100000 err = kdamonds.kdamonds[0].commit() if err is not None: proc.terminate() -- GitLab From 582ccf78f6090d88b1c7066b1e90b3d9ec952d08 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 25 Feb 2025 14:23:33 -0800 Subject: [PATCH 1330/1585] selftests/damon/damon_nr_regions: sort collected regiosn before checking with min/max boundaries damon_nr_regions.py starts DAMON, periodically collect number of regions in snapshots, and see if it is in the requested range. The check code assumes the numbers are sorted on the collection list, but there is no such guarantee. Hence this can result in false positive test success. Sort the list before doing the check. Link: https://lkml.kernel.org/r/20250225222333.505646-4-sj@kernel.org Fixes: 781497347d1b ("selftests/damon: implement test for min/max_nr_regions") Signed-off-by: SeongJae Park Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/damon_nr_regions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py index 6f1c1d88e3091..58f3291fed12a 100755 --- a/tools/testing/selftests/damon/damon_nr_regions.py +++ b/tools/testing/selftests/damon/damon_nr_regions.py @@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions): test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % ( real_nr_regions, min_nr_regions, max_nr_regions) + collected_nr_regions.sort() if (collected_nr_regions[0] < min_nr_regions or collected_nr_regions[-1] > max_nr_regions): print('fail %s' % test_name) -- GitLab From c50f8e6053b0503375c2975bf47f182445aebb4c Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 26 Feb 2025 13:14:00 +1300 Subject: [PATCH 1331/1585] mm: fix kernel BUG when userfaultfd_move encounters swapcache userfaultfd_move() checks whether the PTE entry is present or a swap entry. - If the PTE entry is present, move_present_pte() handles folio migration by setting: src_folio->index = linear_page_index(dst_vma, dst_addr); - If the PTE entry is a swap entry, move_swap_pte() simply copies the PTE to the new dst_addr. This approach is incorrect because, even if the PTE is a swap entry, it can still reference a folio that remains in the swap cache. This creates a race window between steps 2 and 4. 1. add_to_swap: The folio is added to the swapcache. 2. try_to_unmap: PTEs are converted to swap entries. 3. pageout: The folio is written back. 4. Swapcache is cleared. If userfaultfd_move() occurs in the window between steps 2 and 4, after the swap PTE has been moved to the destination, accessing the destination triggers do_swap_page(), which may locate the folio in the swapcache. However, since the folio's index has not been updated to match the destination VMA, do_swap_page() will detect a mismatch. This can result in two critical issues depending on the system configuration. If KSM is disabled, both small and large folios can trigger a BUG during the add_rmap operation due to: page_pgoff(folio, page) != linear_page_index(vma, address) [ 13.336953] page: refcount:6 mapcount:1 mapping:00000000f43db19c index:0xffffaf150 pfn:0x4667c [ 13.337520] head: order:2 mapcount:1 entire_mapcount:0 nr_pages_mapped:1 pincount:0 [ 13.337716] memcg:ffff00000405f000 [ 13.337849] anon flags: 0x3fffc0000020459(locked|uptodate|dirty|owner_priv_1|head|swapbacked|node=0|zone=0|lastcpupid=0xffff) [ 13.338630] raw: 03fffc0000020459 ffff80008507b538 ffff80008507b538 ffff000006260361 [ 13.338831] raw: 0000000ffffaf150 0000000000004000 0000000600000000 ffff00000405f000 [ 13.339031] head: 03fffc0000020459 ffff80008507b538 ffff80008507b538 ffff000006260361 [ 13.339204] head: 0000000ffffaf150 0000000000004000 0000000600000000 ffff00000405f000 [ 13.339375] head: 03fffc0000000202 fffffdffc0199f01 ffffffff00000000 0000000000000001 [ 13.339546] head: 0000000000000004 0000000000000000 00000000ffffffff 0000000000000000 [ 13.339736] page dumped because: VM_BUG_ON_PAGE(page_pgoff(folio, page) != linear_page_index(vma, address)) [ 13.340190] ------------[ cut here ]------------ [ 13.340316] kernel BUG at mm/rmap.c:1380! [ 13.340683] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [ 13.340969] Modules linked in: [ 13.341257] CPU: 1 UID: 0 PID: 107 Comm: a.out Not tainted 6.14.0-rc3-gcf42737e247a-dirty #299 [ 13.341470] Hardware name: linux,dummy-virt (DT) [ 13.341671] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 13.341815] pc : __page_check_anon_rmap+0xa0/0xb0 [ 13.341920] lr : __page_check_anon_rmap+0xa0/0xb0 [ 13.342018] sp : ffff80008752bb20 [ 13.342093] x29: ffff80008752bb20 x28: fffffdffc0199f00 x27: 0000000000000001 [ 13.342404] x26: 0000000000000000 x25: 0000000000000001 x24: 0000000000000001 [ 13.342575] x23: 0000ffffaf0d0000 x22: 0000ffffaf0d0000 x21: fffffdffc0199f00 [ 13.342731] x20: fffffdffc0199f00 x19: ffff000006210700 x18: 00000000ffffffff [ 13.342881] x17: 6c203d2120296567 x16: 6170202c6f696c6f x15: 662866666f67705f [ 13.343033] x14: 6567617028454741 x13: 2929737365726464 x12: ffff800083728ab0 [ 13.343183] x11: ffff800082996bf8 x10: 0000000000000fd7 x9 : ffff80008011bc40 [ 13.343351] x8 : 0000000000017fe8 x7 : 00000000fffff000 x6 : ffff8000829eebf8 [ 13.343498] x5 : c0000000fffff000 x4 : 0000000000000000 x3 : 0000000000000000 [ 13.343645] x2 : 0000000000000000 x1 : ffff0000062db980 x0 : 000000000000005f [ 13.343876] Call trace: [ 13.344045] __page_check_anon_rmap+0xa0/0xb0 (P) [ 13.344234] folio_add_anon_rmap_ptes+0x22c/0x320 [ 13.344333] do_swap_page+0x1060/0x1400 [ 13.344417] __handle_mm_fault+0x61c/0xbc8 [ 13.344504] handle_mm_fault+0xd8/0x2e8 [ 13.344586] do_page_fault+0x20c/0x770 [ 13.344673] do_translation_fault+0xb4/0xf0 [ 13.344759] do_mem_abort+0x48/0xa0 [ 13.344842] el0_da+0x58/0x130 [ 13.344914] el0t_64_sync_handler+0xc4/0x138 [ 13.345002] el0t_64_sync+0x1ac/0x1b0 [ 13.345208] Code: aa1503e0 f000f801 910f6021 97ff5779 (d4210000) [ 13.345504] ---[ end trace 0000000000000000 ]--- [ 13.345715] note: a.out[107] exited with irqs disabled [ 13.345954] note: a.out[107] exited with preempt_count 2 If KSM is enabled, Peter Xu also discovered that do_swap_page() may trigger an unexpected CoW operation for small folios because ksm_might_need_to_copy() allocates a new folio when the folio index does not match linear_page_index(vma, addr). This patch also checks the swapcache when handling swap entries. If a match is found in the swapcache, it processes it similarly to a present PTE. However, there are some differences. For example, the folio is no longer exclusive because folio_try_share_anon_rmap_pte() is performed during unmapping. Furthermore, in the case of swapcache, the folio has already been unmapped, eliminating the risk of concurrent rmap walks and removing the need to acquire src_folio's anon_vma or lock. Note that for large folios, in the swapcache handling path, we directly return -EBUSY since split_folio() will return -EBUSY regardless if the folio is under writeback or unmapped. This is not an urgent issue, so a follow-up patch may address it separately. [v-songbaohua@oppo.com: minor cleanup according to Peter Xu] Link: https://lkml.kernel.org/r/20250226024411.47092-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20250226001400.9129-1-21cnbao@gmail.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Barry Song Acked-by: Peter Xu Reviewed-by: Suren Baghdasaryan Cc: Andrea Arcangeli Cc: Al Viro Cc: Axel Rasmussen Cc: Brian Geffon Cc: Christian Brauner Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jann Horn Cc: Kalesh Singh Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport (IBM) Cc: Nicolas Geoffray Cc: Ryan Roberts Cc: Shuah Khan Cc: ZhangPeng Cc: Tangquan Zheng Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 74 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index af3dfc3633dbe..c45b672e10d17 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -18,6 +18,7 @@ #include #include #include "internal.h" +#include "swap.h" static __always_inline bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end) @@ -1076,16 +1077,14 @@ static int move_present_pte(struct mm_struct *mm, return err; } -static int move_swap_pte(struct mm_struct *mm, +static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, pte_t *dst_pte, pte_t *src_pte, pte_t orig_dst_pte, pte_t orig_src_pte, pmd_t *dst_pmd, pmd_t dst_pmdval, - spinlock_t *dst_ptl, spinlock_t *src_ptl) + spinlock_t *dst_ptl, spinlock_t *src_ptl, + struct folio *src_folio) { - if (!pte_swp_exclusive(orig_src_pte)) - return -EBUSY; - double_pt_lock(dst_ptl, src_ptl); if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte, @@ -1094,6 +1093,16 @@ static int move_swap_pte(struct mm_struct *mm, return -EAGAIN; } + /* + * The src_folio resides in the swapcache, requiring an update to its + * index and mapping to align with the dst_vma, where a swap-in may + * occur and hit the swapcache after moving the PTE. + */ + if (src_folio) { + folio_move_anon_rmap(src_folio, dst_vma); + src_folio->index = linear_page_index(dst_vma, dst_addr); + } + orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); set_pte_at(mm, dst_addr, dst_pte, orig_src_pte); double_pt_unlock(dst_ptl, src_ptl); @@ -1141,6 +1150,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, __u64 mode) { swp_entry_t entry; + struct swap_info_struct *si = NULL; pte_t orig_src_pte, orig_dst_pte; pte_t src_folio_pte; spinlock_t *src_ptl, *dst_ptl; @@ -1322,6 +1332,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, dst_ptl, src_ptl, src_folio); } else { + struct folio *folio = NULL; + entry = pte_to_swp_entry(orig_src_pte); if (non_swap_entry(entry)) { if (is_migration_entry(entry)) { @@ -1335,9 +1347,53 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, goto out; } - err = move_swap_pte(mm, dst_addr, src_addr, dst_pte, src_pte, - orig_dst_pte, orig_src_pte, dst_pmd, - dst_pmdval, dst_ptl, src_ptl); + if (!pte_swp_exclusive(orig_src_pte)) { + err = -EBUSY; + goto out; + } + + si = get_swap_device(entry); + if (unlikely(!si)) { + err = -EAGAIN; + goto out; + } + /* + * Verify the existence of the swapcache. If present, the folio's + * index and mapping must be updated even when the PTE is a swap + * entry. The anon_vma lock is not taken during this process since + * the folio has already been unmapped, and the swap entry is + * exclusive, preventing rmap walks. + * + * For large folios, return -EBUSY immediately, as split_folio() + * also returns -EBUSY when attempting to split unmapped large + * folios in the swapcache. This issue needs to be resolved + * separately to allow proper handling. + */ + if (!src_folio) + folio = filemap_get_folio(swap_address_space(entry), + swap_cache_index(entry)); + if (!IS_ERR_OR_NULL(folio)) { + if (folio_test_large(folio)) { + err = -EBUSY; + folio_put(folio); + goto out; + } + src_folio = folio; + src_folio_pte = orig_src_pte; + if (!folio_trylock(src_folio)) { + pte_unmap(&orig_src_pte); + pte_unmap(&orig_dst_pte); + src_pte = dst_pte = NULL; + put_swap_device(si); + si = NULL; + /* now we can block and wait */ + folio_lock(src_folio); + goto retry; + } + } + err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte, + orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, + dst_ptl, src_ptl, src_folio); } out: @@ -1354,6 +1410,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, if (src_pte) pte_unmap(src_pte); mmu_notifier_invalidate_range_end(&range); + if (si) + put_swap_device(si); return err; } -- GitLab From 058313515d5aab10d0a01dd634f92ed4a4e71d4c Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 25 Feb 2025 17:52:55 +0800 Subject: [PATCH 1332/1585] mm: shmem: fix potential data corruption during shmem swapin Alex and Kairui reported some issues (system hang or data corruption) when swapping out or swapping in large shmem folios. This is especially easy to reproduce when the tmpfs is mount with the 'huge=within_size' parameter. Thanks to Kairui's reproducer, the issue can be easily replicated. The root cause of the problem is that swap readahead may asynchronously swap in order 0 folios into the swap cache, while the shmem mapping can still store large swap entries. Then an order 0 folio is inserted into the shmem mapping without splitting the large swap entry, which overwrites the original large swap entry, leading to data corruption. When getting a folio from the swap cache, we should split the large swap entry stored in the shmem mapping if the orders do not match, to fix this issue. Link: https://lkml.kernel.org/r/2fe47c557e74e9df5fe2437ccdc6c9115fa1bf70.1740476943.git.baolin.wang@linux.alibaba.com Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") Signed-off-by: Baolin Wang Reported-by: Alex Xu (Hello71) Reported-by: Kairui Song Closes: https://lore.kernel.org/all/1738717785.im3r5g2vxc.none@localhost/ Tested-by: Kairui Song Cc: David Hildenbrand Cc: Lance Yang Cc: Matthew Wilcow Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 4ea6109a80431..cebbac97a2219 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2253,7 +2253,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct folio *folio = NULL; bool skip_swapcache = false; swp_entry_t swap; - int error, nr_pages; + int error, nr_pages, order, split_order; VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); swap = radix_to_swp_entry(*foliop); @@ -2272,10 +2272,9 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); + order = xa_get_order(&mapping->i_pages, index); if (!folio) { - int order = xa_get_order(&mapping->i_pages, index); bool fallback_order0 = false; - int split_order; /* Or update major stats only when swapin succeeds?? */ if (fault_type) { @@ -2339,6 +2338,29 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, error = -ENOMEM; goto failed; } + } else if (order != folio_order(folio)) { + /* + * Swap readahead may swap in order 0 folios into swapcache + * asynchronously, while the shmem mapping can still stores + * large swap entries. In such cases, we should split the + * large swap entry to prevent possible data corruption. + */ + split_order = shmem_split_large_entry(inode, index, swap, gfp); + if (split_order < 0) { + error = split_order; + goto failed; + } + + /* + * If the large swap entry has already been split, it is + * necessary to recalculate the new swap entry based on + * the old order alignment. + */ + if (split_order > 0) { + pgoff_t offset = index - round_down(index, 1 << split_order); + + swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); + } } alloced: @@ -2346,7 +2368,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, folio_lock(folio); if ((!skip_swapcache && !folio_test_swapcache(folio)) || folio->swap.val != swap.val || - !shmem_confirm_swap(mapping, index, swap)) { + !shmem_confirm_swap(mapping, index, swap) || + xa_get_order(&mapping->i_pages, index) != folio_order(folio)) { error = -EEXIST; goto unlock; } -- GitLab From ea6de4f8f8f32e54662118a97c441a6ad7b24345 Mon Sep 17 00:00:00 2001 From: Sun YangKai Date: Wed, 26 Feb 2025 23:32:43 +0800 Subject: [PATCH 1333/1585] mm: zswap: use ATOMIC_LONG_INIT to initialize zswap_stored_pages This is currently the only atomic_long_t variable initialized by ATOMIC_INIT macro found in the kernel by using `grep -r atomic_long_t | grep ATOMIC_INIT` This was introduced in 6e1fa555ec77, in which we modified the type of zswap_stored_pages to atomic_long_t, but didn't change the initialization. Link: https://lkml.kernel.org/r/20250226153253.19179-1-sunk67188@gmail.com Fixes: 6e1fa555ec77 ("mm: zswap: modify zswap_stored_pages to be atomic_long_t") Signed-off-by: Sun YangKai Acked-by: Yosry Ahmed Acked-by: David Hildenbrand Cc: Chengming Zhou Cc: Johannes Weiner Cc: Kanchana P Sridhar Cc: Nhat Pham Signed-off-by: Andrew Morton --- mm/zswap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zswap.c b/mm/zswap.c index ac9d299e7d0c1..23365e76a3ce3 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -43,7 +43,7 @@ * statistics **********************************/ /* The number of compressed pages currently stored in zswap */ -atomic_long_t zswap_stored_pages = ATOMIC_INIT(0); +atomic_long_t zswap_stored_pages = ATOMIC_LONG_INIT(0); /* * The statistics below are not protected from concurrent access for -- GitLab From 37b338eed10581784e854d4262da05c8d960c748 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 26 Feb 2025 10:55:08 -0800 Subject: [PATCH 1334/1585] userfaultfd: do not block on locking a large folio with raised refcount Lokesh recently raised an issue about UFFDIO_MOVE getting into a deadlock state when it goes into split_folio() with raised folio refcount. split_folio() expects the reference count to be exactly mapcount + num_pages_in_folio + 1 (see can_split_folio()) and fails with EAGAIN otherwise. If multiple processes are trying to move the same large folio, they raise the refcount (all tasks succeed in that) then one of them succeeds in locking the folio, while others will block in folio_lock() while keeping the refcount raised. The winner of this race will proceed with calling split_folio() and will fail returning EAGAIN to the caller and unlocking the folio. The next competing process will get the folio locked and will go through the same flow. In the meantime the original winner will be retried and will block in folio_lock(), getting into the queue of waiting processes only to repeat the same path. All this results in a livelock. An easy fix would be to avoid waiting for the folio lock while holding folio refcount, similar to madvise_free_huge_pmd() where folio lock is acquired before raising the folio refcount. Since we lock and take a refcount of the folio while holding the PTE lock, changing the order of these operations should not break anything. Modify move_pages_pte() to try locking the folio first and if that fails and the folio is large then return EAGAIN without touching the folio refcount. If the folio is single-page then split_folio() is not called, so we don't have this issue. Lokesh has a reproducer [1] and I verified that this change fixes the issue. [1] https://github.com/lokeshgidra/uffd_move_ioctl_deadlock [akpm@linux-foundation.org: reflow comment to 80 cols, s/end/end up/] Link: https://lkml.kernel.org/r/20250226185510.2732648-2-surenb@google.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Suren Baghdasaryan Reported-by: Lokesh Gidra Reviewed-by: Peter Xu Acked-by: Liam R. Howlett Cc: Andrea Arcangeli Cc: Barry Song <21cnbao@gmail.com> Cc: Barry Song Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jann Horn Cc: Kalesh Singh Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index c45b672e10d17..f5c6b3454f76b 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1250,6 +1250,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, */ if (!src_folio) { struct folio *folio; + bool locked; /* * Pin the page while holding the lock to be sure the @@ -1269,12 +1270,26 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, goto out; } + locked = folio_trylock(folio); + /* + * We avoid waiting for folio lock with a raised + * refcount for large folios because extra refcounts + * will result in split_folio() failing later and + * retrying. If multiple tasks are trying to move a + * large folio we can end up livelocking. + */ + if (!locked && folio_test_large(folio)) { + spin_unlock(src_ptl); + err = -EAGAIN; + goto out; + } + folio_get(folio); src_folio = folio; src_folio_pte = orig_src_pte; spin_unlock(src_ptl); - if (!folio_trylock(src_folio)) { + if (!locked) { pte_unmap(&orig_src_pte); pte_unmap(&orig_dst_pte); src_pte = dst_pte = NULL; -- GitLab From 927e926d72d9155fde3264459fe9bfd7b5e40d28 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 26 Feb 2025 10:55:09 -0800 Subject: [PATCH 1335/1585] userfaultfd: fix PTE unmapping stack-allocated PTE copies Current implementation of move_pages_pte() copies source and destination PTEs in order to detect concurrent changes to PTEs involved in the move. However these copies are also used to unmap the PTEs, which will fail if CONFIG_HIGHPTE is enabled because the copies are allocated on the stack. Fix this by using the actual PTEs which were kmap()ed. Link: https://lkml.kernel.org/r/20250226185510.2732648-3-surenb@google.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Suren Baghdasaryan Reported-by: Peter Xu Reviewed-by: Peter Xu Cc: Andrea Arcangeli Cc: Barry Song <21cnbao@gmail.com> Cc: Barry Song Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jann Horn Cc: Kalesh Singh Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index f5c6b3454f76b..d06453fa8abae 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1290,8 +1290,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, spin_unlock(src_ptl); if (!locked) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; /* now we can block and wait */ folio_lock(src_folio); @@ -1307,8 +1307,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, /* at this point we have src_folio locked */ if (folio_test_large(src_folio)) { /* split_folio() can block */ - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; err = split_folio(src_folio); if (err) @@ -1333,8 +1333,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, goto out; } if (!anon_vma_trylock_write(src_anon_vma)) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; /* now we can block and wait */ anon_vma_lock_write(src_anon_vma); @@ -1352,8 +1352,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, entry = pte_to_swp_entry(orig_src_pte); if (non_swap_entry(entry)) { if (is_migration_entry(entry)) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; migration_entry_wait(mm, src_pmd, src_addr); err = -EAGAIN; @@ -1396,8 +1396,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, src_folio = folio; src_folio_pte = orig_src_pte; if (!folio_trylock(src_folio)) { - pte_unmap(&orig_src_pte); - pte_unmap(&orig_dst_pte); + pte_unmap(src_pte); + pte_unmap(dst_pte); src_pte = dst_pte = NULL; put_swap_device(si); si = NULL; -- GitLab From adae46ac1e38a288b14f0298e27412adcba83f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= Date: Wed, 26 Feb 2025 13:26:27 +0100 Subject: [PATCH 1336/1585] mm: shmem: remove unnecessary warning in shmem_writepage() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although the scenario where shmem_writepage() is called with info->flags & VM_LOCKED is unlikely to happen, it's still possible, as evidenced by syzbot [1]. However, the warning in this case isn't necessary because the situation is already handled correctly [2]. [2] https://lore.kernel.org/lkml/8afe1f7f-31a2-4fc0-1fbd-f9ba8a116fe3@google.com/ Link: https://lkml.kernel.org/r/20250226-20250221-warning-in-shmem_writepage-v1-1-5ad19420e17e@igalia.com Fixes: 9a976f0c847b ("shmem: skip page split if we're not reclaiming") Signed-off-by: Ricardo Cañuelo Navarro Reported-by: Pengfei Xu Closes: https://lore.kernel.org/lkml/ZZ9PShXjKJkVelNm@xpf.sh.intel.com/ [1] Suggested-by: Hugh Dickins Reviewed-by: Baolin Wang Cc: Florent Revest Cc: Christian Brauner Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: Florent Revest Cc: Luis Chamberalin Signed-off-by: Andrew Morton --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index cebbac97a2219..3fcd1690eedd7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1548,7 +1548,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) if (WARN_ON_ONCE(!wbc->for_reclaim)) goto redirty; - if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap)) + if ((info->flags & VM_LOCKED) || sbinfo->noswap) goto redirty; if (!total_swap_pages) -- GitLab From 3685024edd270f7c791f993157d65d3c928f3d6e Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 26 Feb 2025 12:16:09 +0000 Subject: [PATCH 1337/1585] mm: don't skip arch_sync_kernel_mappings() in error paths Fix callers that previously skipped calling arch_sync_kernel_mappings() if an error occurred during a pgtable update. The call is still required to sync any pgtable updates that may have occurred prior to hitting the error condition. These are theoretical bugs discovered during code review. Link: https://lkml.kernel.org/r/20250226121610.2401743-1-ryan.roberts@arm.com Fixes: 2ba3e6947aed ("mm/vmalloc: track which page-table levels were modified") Fixes: 0c95cba49255 ("mm: apply_to_pte_range warn and fail if a large pte is encountered") Signed-off-by: Ryan Roberts Reviewed-by: Anshuman Khandual Reviewed-by: Catalin Marinas Cc: Christop Hellwig Cc: "Uladzislau Rezki (Sony)" Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 6 ++++-- mm/vmalloc.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b4d3d4893267c..55d0d49546273 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3051,8 +3051,10 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, next = pgd_addr_end(addr, end); if (pgd_none(*pgd) && !create) continue; - if (WARN_ON_ONCE(pgd_leaf(*pgd))) - return -EINVAL; + if (WARN_ON_ONCE(pgd_leaf(*pgd))) { + err = -EINVAL; + break; + } if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) { if (!create) continue; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a6e7acebe9adf..61981ee1c9d2f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -586,13 +586,13 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end, mask |= PGTBL_PGD_MODIFIED; err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask); if (err) - return err; + break; } while (pgd++, addr = next, addr != end); if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, end); - return 0; + return err; } /* -- GitLab From 34b82f33cf3f03bc39e9a205a913d790e1520ade Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Wed, 26 Feb 2025 11:23:41 -0500 Subject: [PATCH 1338/1585] mm: fix finish_fault() handling for large folios When handling faults for anon shmem finish_fault() will attempt to install ptes for the entire folio. Unfortunately if it encounters a single non-pte_none entry in that range it will bail, even if the pte that triggered the fault is still pte_none. When this situation happens the fault will be retried endlessly never making forward progress. This patch fixes this behavior and if it detects that a pte in the range is not pte_none it will fall back to setting a single pte. [bgeffon@google.com: tweak whitespace] Link: https://lkml.kernel.org/r/20250227133236.1296853-1-bgeffon@google.com Link: https://lkml.kernel.org/r/20250226162341.915535-1-bgeffon@google.com Fixes: 43e027e41423 ("mm: memory: extend finish_fault() to support large folio") Signed-off-by: Brian Geffon Suggested-by: Baolin Wang Reported-by: Marek Maslanka Cc: Hugh Dickins Cc: David Hildenbrand Cc: Hugh Dickens Cc: Kefeng Wang Cc: Matthew Wilcow (Oracle) Cc: Suren Baghdasaryan Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 55d0d49546273..b9661ccfa64fd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5185,7 +5185,11 @@ vm_fault_t finish_fault(struct vm_fault *vmf) bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED); int type, nr_pages; - unsigned long addr = vmf->address; + unsigned long addr; + bool needs_fallback = false; + +fallback: + addr = vmf->address; /* Did we COW the page? */ if (is_cow) @@ -5224,7 +5228,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf) * approach also applies to non-anonymous-shmem faults to avoid * inflating the RSS of the process. */ - if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma))) { + if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma)) || + unlikely(needs_fallback)) { nr_pages = 1; } else if (nr_pages > 1) { pgoff_t idx = folio_page_idx(folio, page); @@ -5260,9 +5265,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf) ret = VM_FAULT_NOPAGE; goto unlock; } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { - update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages); - ret = VM_FAULT_NOPAGE; - goto unlock; + needs_fallback = true; + pte_unmap_unlock(vmf->pte, vmf->ptl); + goto fallback; } folio_ref_add(folio, nr_pages - 1); -- GitLab From eae116d1f0449ade3269ca47a67432622f5c6438 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 25 Feb 2025 22:22:58 -0500 Subject: [PATCH 1339/1585] Revert "mm/page_alloc.c: don't show protection in zone's ->lowmem_reserve[] for empty zone" Commit 96a5c186efff ("mm/page_alloc.c: don't show protection in zone's ->lowmem_reserve[] for empty zone") removes the protection of lower zones from allocations targeting memory-less high zones. This had an unintended impact on the pattern of reclaims because it makes the high-zone-targeted allocation more likely to succeed in lower zones, which adds pressure to said zones. I.e, the following corresponding checks in zone_watermark_ok/zone_watermark_fast are less likely to trigger: if (free_pages <= min + z->lowmem_reserve[highest_zoneidx]) return false; As a result, we are observing an increase in reclaim and kswapd scans, due to the increased pressure. This was initially observed as increased latency in filesystem operations when benchmarking with fio on a machine with some memory-less zones, but it has since been associated with increased contention in locks related to memory reclaim. By reverting this patch, the original performance was recovered on that machine. The original commit was introduced as a clarification of the /proc/zoneinfo output, so it doesn't seem there are usecases depending on it, making the revert a simple solution. For reference, I collected vmstat with and without this patch on a freshly booted system running intensive randread io from an nvme for 5 minutes. I got: rpm-6.12.0-slfo.1.2 -> pgscan_kswapd 5629543865 Patched -> pgscan_kswapd 33580844 33M scans is similar to what we had in kernels predating this patch. These numbers is fairly representative of the workload on this machine, as measured in several runs. So we are talking about a 2-order of magnitude increase. Link: https://lkml.kernel.org/r/20250226032258.234099-1-krisman@suse.de Fixes: 96a5c186efff ("mm/page_alloc.c: don't show protection in zone's ->lowmem_reserve[] for empty zone") Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Mel Gorman Cc: Baoquan He Cc: Signed-off-by: Andrew Morton --- mm/page_alloc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 579789600a3c7..fe986e6de7a01 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5849,11 +5849,10 @@ static void setup_per_zone_lowmem_reserve(void) for (j = i + 1; j < MAX_NR_ZONES; j++) { struct zone *upper_zone = &pgdat->node_zones[j]; - bool empty = !zone_managed_pages(upper_zone); managed_pages += zone_managed_pages(upper_zone); - if (clear || empty) + if (clear) zone->lowmem_reserve[j] = 0; else zone->lowmem_reserve[j] = managed_pages / ratio; -- GitLab From 88f5a9a945bf25df1f90032baf95457370c2206e Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 27 Feb 2025 17:02:28 +0530 Subject: [PATCH 1340/1585] MAINTAINERS: .mailmap: update Sumit Garg's email address Update Sumit Garg's email address to @kernel.org. Link: https://lkml.kernel.org/r/20250227113228.1809449-1-sumit.garg@linaro.org Signed-off-by: Sumit Garg Cc: Herbert Xu Cc: Jarkko Sakkinen Cc: Jens Wiklander Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index a897c16d3baef..4a93909286d8b 100644 --- a/.mailmap +++ b/.mailmap @@ -689,6 +689,7 @@ Subbaraman Narayanamurthy Subhash Jadavani Sudarshan Rajagopalan Sudeep Holla Sudeep KarkadaNagesha +Sumit Garg Sumit Semwal Surabhi Vishnoi Sven Eckelmann diff --git a/MAINTAINERS b/MAINTAINERS index 4e17764cb6ed4..5e1be7b25912e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12864,7 +12864,7 @@ F: include/keys/trusted_dcp.h F: security/keys/trusted-keys/trusted_dcp.c KEYS-TRUSTED-TEE -M: Sumit Garg +M: Sumit Garg L: linux-integrity@vger.kernel.org L: keyrings@vger.kernel.org S: Supported @@ -17658,7 +17658,7 @@ F: Documentation/ABI/testing/sysfs-bus-optee-devices F: drivers/tee/optee/ OP-TEE RANDOM NUMBER GENERATOR (RNG) DRIVER -M: Sumit Garg +M: Sumit Garg L: op-tee@lists.trustedfirmware.org S: Maintained F: drivers/char/hw_random/optee-rng.c @@ -23268,7 +23268,7 @@ F: include/media/i2c/tw9910.h TEE SUBSYSTEM M: Jens Wiklander -R: Sumit Garg +R: Sumit Garg L: op-tee@lists.trustedfirmware.org S: Maintained F: Documentation/ABI/testing/sysfs-class-tee -- GitLab From b2ef51c74b0171fde7eb69b6152d3d2f743ef269 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Thu, 27 Feb 2025 15:34:09 +0800 Subject: [PATCH 1341/1585] rapidio: fix an API misues when rio_add_net() fails rio_add_net() calls device_register() and fails when device_register() fails. Thus, put_device() should be used rather than kfree(). Add "mport->net = NULL;" to avoid a use after free issue. Link: https://lkml.kernel.org/r/20250227073409.3696854-1-haoxiang_li2024@163.com Fixes: e8de370188d0 ("rapidio: add mport char device driver") Signed-off-by: Haoxiang Li Reviewed-by: Dan Carpenter Cc: Alexandre Bounine Cc: Matt Porter Cc: Yang Yingliang Cc: Signed-off-by: Andrew Morton --- drivers/rapidio/devices/rio_mport_cdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 27afbb9d544b7..cbf531d0ba688 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -1742,7 +1742,8 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, err = rio_add_net(net); if (err) { rmcd_debug(RDEV, "failed to register net, err=%d", err); - kfree(net); + put_device(&net->dev); + mport->net = NULL; goto cleanup; } } -- GitLab From e842f9a1edf306bf36fe2a4d847a0b0d458770de Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Thu, 27 Feb 2025 12:11:31 +0800 Subject: [PATCH 1342/1585] rapidio: add check for rio_add_net() in rio_scan_alloc_net() The return value of rio_add_net() should be checked. If it fails, put_device() should be called to free the memory and give up the reference initialized in rio_add_net(). Link: https://lkml.kernel.org/r/20250227041131.3680761-1-haoxiang_li2024@163.com Fixes: e6b585ca6e81 ("rapidio: move net allocation into core code") Signed-off-by: Yang Yingliang Signed-off-by: Haoxiang Li Cc: Alexandre Bounine Cc: Matt Porter Cc: Dan Carpenter Cc: Signed-off-by: Andrew Morton --- drivers/rapidio/rio-scan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index fdcf742b2adbc..c12941f71e2cb 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -871,7 +871,10 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport, dev_set_name(&net->dev, "rnet_%d", net->id); net->dev.parent = &mport->dev; net->dev.release = rio_scan_release_dev; - rio_add_net(net); + if (rio_add_net(net)) { + put_device(&net->dev); + net = NULL; + } } return net; -- GitLab From 8fe9ed44dc29fba0786b7e956d2e87179e407582 Mon Sep 17 00:00:00 2001 From: Hao Zhang Date: Thu, 27 Feb 2025 11:41:29 +0800 Subject: [PATCH 1343/1585] mm/page_alloc: fix uninitialized variable The variable "compact_result" is not initialized in function __alloc_pages_slowpath(). It causes should_compact_retry() to use an uninitialized value. Initialize variable "compact_result" with the value COMPACT_SKIPPED. BUG: KMSAN: uninit-value in __alloc_pages_slowpath+0xee8/0x16c0 mm/page_alloc.c:4416 __alloc_pages_slowpath+0xee8/0x16c0 mm/page_alloc.c:4416 __alloc_frozen_pages_noprof+0xa4c/0xe00 mm/page_alloc.c:4752 alloc_pages_mpol+0x4cd/0x890 mm/mempolicy.c:2270 alloc_frozen_pages_noprof mm/mempolicy.c:2341 [inline] alloc_pages_noprof mm/mempolicy.c:2361 [inline] folio_alloc_noprof+0x1dc/0x350 mm/mempolicy.c:2371 filemap_alloc_folio_noprof+0xa6/0x440 mm/filemap.c:1019 __filemap_get_folio+0xb9a/0x1840 mm/filemap.c:1970 grow_dev_folio fs/buffer.c:1039 [inline] grow_buffers fs/buffer.c:1105 [inline] __getblk_slow fs/buffer.c:1131 [inline] bdev_getblk+0x2c9/0xab0 fs/buffer.c:1431 getblk_unmovable include/linux/buffer_head.h:369 [inline] ext4_getblk+0x3b7/0xe50 fs/ext4/inode.c:864 ext4_bread_batch+0x9f/0x7d0 fs/ext4/inode.c:933 __ext4_find_entry+0x1ebb/0x36c0 fs/ext4/namei.c:1627 ext4_lookup_entry fs/ext4/namei.c:1729 [inline] ext4_lookup+0x189/0xb40 fs/ext4/namei.c:1797 __lookup_slow+0x538/0x710 fs/namei.c:1793 lookup_slow+0x6a/0xd0 fs/namei.c:1810 walk_component fs/namei.c:2114 [inline] link_path_walk+0xf29/0x1420 fs/namei.c:2479 path_openat+0x30f/0x6250 fs/namei.c:3985 do_filp_open+0x268/0x600 fs/namei.c:4016 do_sys_openat2+0x1bf/0x2f0 fs/open.c:1428 do_sys_open fs/open.c:1443 [inline] __do_sys_openat fs/open.c:1459 [inline] __se_sys_openat fs/open.c:1454 [inline] __x64_sys_openat+0x2a1/0x310 fs/open.c:1454 x64_sys_call+0x36f5/0x3c30 arch/x86/include/generated/asm/syscalls_64.h:258 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Local variable compact_result created at: __alloc_pages_slowpath+0x66/0x16c0 mm/page_alloc.c:4218 __alloc_frozen_pages_noprof+0xa4c/0xe00 mm/page_alloc.c:4752 Link: https://lkml.kernel.org/r/tencent_ED1032321D6510B145CDBA8CBA0093178E09@qq.com Reported-by: syzbot+0cfd5e38e96a5596f2b6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0cfd5e38e96a5596f2b6 Signed-off-by: Hao Zhang Reviewed-by: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fe986e6de7a01..94917c729120f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4243,6 +4243,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, restart: compaction_retries = 0; no_progress_loops = 0; + compact_result = COMPACT_SKIPPED; compact_priority = DEF_COMPACT_PRIORITY; cpuset_mems_cookie = read_mems_allowed_begin(); zonelist_iter_cookie = zonelist_iter_begin(); -- GitLab From 80da96d735094ea22985ced98bc57fe3a4422921 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 4 Mar 2025 14:41:57 +0100 Subject: [PATCH 1344/1585] drm/bochs: Fix DPMS regression The recent rewrite with the use of regular atomic helpers broke the DPMS unblanking on X11. Fix it by moving the call of bochs_hw_blank(false) from CRTC mode_set_nofb() to atomic_enable(). Fixes: 2037174993c8 ("drm/bochs: Use regular atomic helpers") Link: https://bugzilla.suse.com/show_bug.cgi?id=1238209 Signed-off-by: Takashi Iwai Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20250304134203.20534-1-tiwai@suse.de --- drivers/gpu/drm/tiny/bochs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index c67e1f9067859..8706763af8fba 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -335,8 +335,6 @@ static void bochs_hw_setmode(struct bochs_device *bochs, struct drm_display_mode bochs->xres, bochs->yres, bochs->bpp, bochs->yres_virtual); - bochs_hw_blank(bochs, false); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_ENABLE, 0); bochs_dispi_write(bochs, VBE_DISPI_INDEX_BPP, bochs->bpp); bochs_dispi_write(bochs, VBE_DISPI_INDEX_XRES, bochs->xres); @@ -506,6 +504,9 @@ static int bochs_crtc_helper_atomic_check(struct drm_crtc *crtc, static void bochs_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct bochs_device *bochs = to_bochs_device(crtc->dev); + + bochs_hw_blank(bochs, false); } static void bochs_crtc_helper_atomic_disable(struct drm_crtc *crtc, -- GitLab From d385c8bceb14665e935419334aa3d3fac2f10456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Wed, 5 Mar 2025 15:58:49 +0100 Subject: [PATCH 1345/1585] pid: Do not set pid_max in new pid namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is already difficult for users to troubleshoot which of multiple pid limits restricts their workload. The per-(hierarchical-)NS pid_max would contribute to the confusion. Also, the implementation copies the limit upon creation from parent, this pattern showed cumbersome with some attributes in legacy cgroup controllers -- it's subject to race condition between parent's limit modification and children creation and once copied it must be changed in the descendant. Let's do what other places do (ucounts or cgroup limits) -- create new pid namespaces without any limit at all. The global limit (actually any ancestor's limit) is still effectively in place, we avoid the set/unshare race and bumps of global (ancestral) limit have the desired effect on pid namespace that do not care. Link: https://lore.kernel.org/r/20240408145819.8787-1-mkoutny@suse.com/ Link: https://lore.kernel.org/r/20250221170249.890014-1-mkoutny@suse.com/ Fixes: 7863dcc72d0f4 ("pid: allow pid_max to be set per pid namespace") Signed-off-by: Michal Koutný Link: https://lore.kernel.org/r/20250305145849.55491-1-mkoutny@suse.com Signed-off-by: Christian Brauner --- kernel/pid_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 8f6cfec87555a..7098ed44e717d 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -107,7 +107,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns goto out_free_idr; ns->ns.ops = &pidns_operations; - ns->pid_max = parent_pid_ns->pid_max; + ns->pid_max = PID_MAX_LIMIT; err = register_pidns_sysctls(ns); if (err) goto out_free_inum; -- GitLab From 14672f059d83f591afb2ee1fff56858efe055e5a Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Thu, 6 Mar 2025 10:59:53 +0530 Subject: [PATCH 1346/1585] sched/deadline: Use online cpus for validating runtime The ftrace selftest reported a failure because writing -1 to sched_rt_runtime_us returns -EBUSY. This happens when the possible CPUs are different from active CPUs. Active CPUs are part of one root domain, while remaining CPUs are part of def_root_domain. Since active cpumask is being used, this results in cpus=0 when a non active CPUs is used in the loop. Fix it by looping over the online CPUs instead for validating the bandwidth calculations. Signed-off-by: Shrikanth Hegde Signed-off-by: Ingo Molnar Reviewed-by: Juri Lelli Link: https://lore.kernel.org/r/20250306052954.452005-2-sshegde@linux.ibm.com --- kernel/sched/deadline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 38e4537790af7..ff4df16b5186d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -3189,7 +3189,7 @@ int sched_dl_global_validate(void) * value smaller than the currently allocated bandwidth in * any of the root_domains. */ - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { rcu_read_lock_sched(); if (dl_bw_visited(cpu, gen)) -- GitLab From b1536481c81fb604074da799e4f2d2038a1663f7 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Thu, 6 Mar 2025 10:59:54 +0530 Subject: [PATCH 1347/1585] sched/rt: Update limit of sched_rt sysctl in documentation By default fair_server dl_server allocates 5% of the bandwidth to the root domain. Due to this writing any value less than 5% fails due to -EBUSY: $ cat /proc/sys/kernel/sched_rt_period_us 1000000 $ echo 49999 > /proc/sys/kernel/sched_rt_runtime_us -bash: echo: write error: Device or resource busy $ echo 50000 > /proc/sys/kernel/sched_rt_runtime_us $ Since the sched_rt_runtime_us allows -1 as the minimum, put this restriction in the documentation. One should check average of runtime/period in /sys/kernel/debug/sched/fair_server/cpuX/* for exact value. Signed-off-by: Shrikanth Hegde Signed-off-by: Ingo Molnar Reviewed-by: Juri Lelli Link: https://lore.kernel.org/r/20250306052954.452005-3-sshegde@linux.ibm.com --- Documentation/scheduler/sched-rt-group.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/scheduler/sched-rt-group.rst b/Documentation/scheduler/sched-rt-group.rst index 80b05a3009ea2..ab464335d3204 100644 --- a/Documentation/scheduler/sched-rt-group.rst +++ b/Documentation/scheduler/sched-rt-group.rst @@ -102,6 +102,9 @@ The system wide settings are configured under the /proc virtual file system: * sched_rt_period_us takes values from 1 to INT_MAX. * sched_rt_runtime_us takes values from -1 to sched_rt_period_us. * A run time of -1 specifies runtime == period, ie. no limit. + * sched_rt_runtime_us/sched_rt_period_us > 0.05 inorder to preserve + bandwidth for fair dl_server. For accurate value check average of + runtime/period in /sys/kernel/debug/sched/fair_server/cpuX/ 2.2 Default behaviour -- GitLab From cf7ee25e70c6edfac4553d6b671e8b19db1d9573 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 4 Mar 2025 13:59:51 +0800 Subject: [PATCH 1348/1585] mctp i3c: handle NULL header address daddr can be NULL if there is no neighbour table entry present, in that case the tx packet should be dropped. saddr will usually be set by MCTP core, but check for NULL in case a packet is transmitted by a different protocol. Signed-off-by: Matt Johnston Fixes: c8755b29b58e ("mctp i3c: MCTP I3C driver") Link: https://patch.msgid.link/20250304-mctp-i3c-null-v1-1-4416bbd56540@codeconstruct.com.au Signed-off-by: Paolo Abeni --- drivers/net/mctp/mctp-i3c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c index d247fe483c588..c1e72253063b5 100644 --- a/drivers/net/mctp/mctp-i3c.c +++ b/drivers/net/mctp/mctp-i3c.c @@ -507,6 +507,9 @@ static int mctp_i3c_header_create(struct sk_buff *skb, struct net_device *dev, { struct mctp_i3c_internal_hdr *ihdr; + if (!daddr || !saddr) + return -EINVAL; + skb_push(skb, sizeof(struct mctp_i3c_internal_hdr)); skb_reset_mac_header(skb); ihdr = (void *)skb_mac_header(skb); -- GitLab From 0e7633d7b95b67f1758aea19f8e85621c5f506a3 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 4 Mar 2025 19:10:39 +0100 Subject: [PATCH 1349/1585] net: ipv6: fix dst ref loop in ila lwtunnel This patch follows commit 92191dd10730 ("net: ipv6: fix dst ref loops in rpl, seg6 and ioam6 lwtunnels") and, on a second thought, the same patch is also needed for ila (even though the config that triggered the issue was pathological, but still, we don't want that to happen). Fixes: 79ff2fc31e0f ("ila: Cache a route to translated address") Cc: Tom Herbert Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20250304181039.35951-1-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- net/ipv6/ila/ila_lwt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index ff7e734e335b0..ac4bcc623603a 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -88,7 +88,8 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - if (ilwt->connected) { + /* cache only if we don't create a dst reference loop */ + if (ilwt->connected && orig_dst->lwtstate != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr); local_bh_enable(); -- GitLab From 5da15a9c11c1c47ef573e6805b60a7d8a1687a2a Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 5 Mar 2025 09:16:55 +0100 Subject: [PATCH 1350/1585] net: ipv6: fix missing dst ref drop in ila lwtunnel Add missing skb_dst_drop() to drop reference to the old dst before adding the new dst to the skb. Fixes: 79ff2fc31e0f ("ila: Cache a route to translated address") Cc: Tom Herbert Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20250305081655.19032-1-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- net/ipv6/ila/ila_lwt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index ac4bcc623603a..7d574f5132e2f 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -96,6 +96,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) } } + skb_dst_drop(skb); skb_dst_set(skb, dst); return dst_output(net, sk, skb); -- GitLab From 9a665fe3d967fe46edb4fd2497c7a5cc2dac2f55 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 6 Mar 2025 11:44:41 +0100 Subject: [PATCH 1351/1585] USB: serial: option: match on interface class for Telit FN990B The device id entries for Telit FN990B ended up matching only on the interface protocol. While this works, the protocol is qualified by the interface class (and subclass) which should have been included. Switch to matching using USB_DEVICE_AND_INTERFACE_INFO() while keeping the entries sorted also by protocol for consistency. Link: https://lore.kernel.org/20250227110655.3647028-2-fabio.porcedda@gmail.com/ Cc: Fabio Porcedda Cc: Daniele Palmas Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 58bd54e8c483a..1ea2870725aca 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1394,22 +1394,22 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c8, 0xff), /* Telit FE910C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x60) }, /* Telit FN990B (rmnet) */ - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x40) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x30), + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x30), /* Telit FN990B (rmnet) */ .driver_info = NCTRL(5) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x60) }, /* Telit FN990B (MBIM) */ - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x40) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x30), + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x30), /* Telit FN990B (MBIM) */ .driver_info = NCTRL(6) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x60) }, /* Telit FN990B (RNDIS) */ - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x40) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x30), + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d2, 0xff, 0xff, 0x30), /* Telit FN990B (RNDIS) */ .driver_info = NCTRL(6) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x60) }, /* Telit FN990B (ECM) */ - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x40) }, - { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x30), + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d2, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d2, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d3, 0xff, 0xff, 0x30), /* Telit FN990B (ECM) */ .driver_info = NCTRL(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d3, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d3, 0xff, 0xff, 0x60) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), -- GitLab From 4981bb50392b7515b765da28cf8768ce624c2670 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 4 Mar 2025 10:19:38 +0100 Subject: [PATCH 1352/1585] USB: serial: option: add Telit Cinterion FE990B compositions Add the following Telit Cinterion FE990B40 compositions: 0x10b0: rmnet + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 7 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10b0 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE990 S: SerialNumber=28c2595e C: #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10b1: MBIM + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10b1 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE990 S: SerialNumber=28c2595e C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10b2: RNDIS + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10b2 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE990 S: SerialNumber=28c2595e C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=ef(misc ) Sub=04 Prot=01 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10b3: ECM + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) + tty (diag) + DPL + QDSS (Qualcomm Debug SubSystem) + adb T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 11 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10b3 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE990 S: SerialNumber=28c2595e C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Reviewed-by: Daniele Palmas [ johan: use USB_DEVICE_AND_INTERFACE_INFO() and sort by protocol ] Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1ea2870725aca..aeb71d96a8287 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1388,6 +1388,22 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10aa, 0xff), /* Telit FN920C04 (MBIM) */ .driver_info = NCTRL(3) | RSVD(4) | RSVD(5) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b0, 0xff, 0xff, 0x30), /* Telit FE990B (rmnet) */ + .driver_info = NCTRL(5) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b0, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b0, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b1, 0xff, 0xff, 0x30), /* Telit FE990B (MBIM) */ + .driver_info = NCTRL(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b1, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b1, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b2, 0xff, 0xff, 0x30), /* Telit FE990B (RNDIS) */ + .driver_info = NCTRL(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b2, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b2, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b3, 0xff, 0xff, 0x30), /* Telit FE990B (ECM) */ + .driver_info = NCTRL(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b3, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b3, 0xff, 0xff, 0x60) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c0, 0xff), /* Telit FE910C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c4, 0xff), /* Telit FE910C04 (rmnet) */ -- GitLab From 6232f0d8e100a26275bbd773fc56a60af2c95322 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 4 Mar 2025 10:19:39 +0100 Subject: [PATCH 1353/1585] USB: serial: option: fix Telit Cinterion FE990A name The correct name for FE990 is FE990A so use it in order to avoid confusion with FE990B. Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index aeb71d96a8287..5cd26dac2069f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1368,13 +1368,13 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990A (PCIe) */ .driver_info = RSVD(0) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990 (rmnet) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990A (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990 (MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990A (MBIM) */ .driver_info = NCTRL(0) | RSVD(1) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff), /* Telit FE990 (RNDIS) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff), /* Telit FE990A (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990A (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff), /* Telit FN20C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(3) }, -- GitLab From 9af152dcf1a06f589f44a74da4ad67e365d4db9a Mon Sep 17 00:00:00 2001 From: Ivan Abramov Date: Thu, 6 Mar 2025 14:20:45 +0300 Subject: [PATCH 1354/1585] drm/gma500: Add NULL check for pci_gfx_root in mid_get_vbt_data() Since pci_get_domain_bus_and_slot() can return NULL, add NULL check for pci_gfx_root in the mid_get_vbt_data(). This change is similar to the checks implemented in mid_get_fuse_settings() and mid_get_pci_revID(), which were introduced by commit 0cecdd818cd7 ("gma500: Final enables for Oaktrail") as "additional minor bulletproofing". Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: f910b411053f ("gma500: Add the glue to the various BIOS and firmware interfaces") Signed-off-by: Ivan Abramov Signed-off-by: Patrik Jakobsson Link: https://patchwork.freedesktop.org/patch/msgid/20250306112046.17144-1-i.abramov@mt-integration.ru --- drivers/gpu/drm/gma500/mid_bios.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/gma500/mid_bios.c b/drivers/gpu/drm/gma500/mid_bios.c index 7e76790c6a81f..cba97d7db131d 100644 --- a/drivers/gpu/drm/gma500/mid_bios.c +++ b/drivers/gpu/drm/gma500/mid_bios.c @@ -279,6 +279,11 @@ static void mid_get_vbt_data(struct drm_psb_private *dev_priv) 0, PCI_DEVFN(2, 0)); int ret = -1; + if (pci_gfx_root == NULL) { + WARN_ON(1); + return; + } + /* Get the address of the platform config vbt */ pci_read_config_dword(pci_gfx_root, 0xFC, &addr); pci_dev_put(pci_gfx_root); -- GitLab From b5e3956535466187657563b754ba0f1da8626c7f Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 6 Mar 2025 14:39:51 +0800 Subject: [PATCH 1355/1585] kbuild: install-extmod-build: Fix build when specifying KBUILD_OUTPUT Since commit 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package"), the linux-headers pacman package fails to build when "O=" is set. The build system complains: /mnt/chroot/linux/scripts/Makefile.build:41: mnt/chroots/linux-mainline/pacman/linux-upstream/pkg/linux-upstream-headers/usr//lib/modules/6.14.0-rc3-00350-g771dba31fffc/build/scripts/Makefile: No such file or directory This is because the "srcroot" variable is set to "." and the "build" variable is set to the absolute path. This makes the "src" variables point to wrong directory. Change the "build" variable to a relative path to "." to fix build. Fixes: 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package") Signed-off-by: Inochi Amaoto Signed-off-by: Masahiro Yamada --- scripts/package/install-extmod-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index 2966473b46609..b96538787f3d9 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"${destdir}"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"$(realpath --relative-base=. "${destdir}")"/scripts rm -f "${destdir}/scripts/Kbuild" fi -- GitLab From fb8286562ecfb585e26b033c5e32e6fb85efb0b3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Mar 2025 04:05:26 +0100 Subject: [PATCH 1356/1585] netfilter: nf_tables: make destruction work queue pernet The call to flush_work before tearing down a table from the netlink notifier was supposed to make sure that all earlier updates (e.g. rule add) that might reference that table have been processed. Unfortunately, flush_work() waits for the last queued instance. This could be an instance that is different from the one that we must wait for. This is because transactions are protected with a pernet mutex, but the work item is global, so holding the transaction mutex doesn't prevent another netns from queueing more work. Make the work item pernet so that flush_work() will wait for all transactions queued from this netns. A welcome side effect is that we no longer need to wait for transaction objects from foreign netns. The gc work queue is still global. This seems to be ok because nft_set structures are reference counted and each container structure owns a reference on the net namespace. The destroy_list is still protected by a global spinlock rather than pernet one but the hold time is very short anyway. v2: call cancel_work_sync before reaping the remaining tables (Pablo). Fixes: 9f6958ba2e90 ("netfilter: nf_tables: unconditionally flush pending work before notifier") Reported-by: syzbot+5d8c5789c8cb076b2c25@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +++- net/netfilter/nf_tables_api.c | 24 ++++++++++++++---------- net/netfilter/nft_compat.c | 8 ++++---- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 60d5dcdb289c9..803d5f1601f9d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1891,7 +1891,7 @@ void nft_chain_filter_fini(void); void __init nft_chain_route_init(void); void nft_chain_route_fini(void); -void nf_tables_trans_destroy_flush_work(void); +void nf_tables_trans_destroy_flush_work(struct net *net); int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); __be64 nf_jiffies64_to_msecs(u64 input); @@ -1905,6 +1905,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head destroy_list; struct list_head commit_set_list; struct list_head binding_list; struct list_head module_list; @@ -1915,6 +1916,7 @@ struct nftables_pernet { unsigned int base_seq; unsigned int gc_seq; u8 validate_state; + struct work_struct destroy_work; }; extern unsigned int nf_tables_net_id; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a34de9c17cf10..c2df81b7e9505 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -34,7 +34,6 @@ unsigned int nf_tables_net_id __read_mostly; static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); -static LIST_HEAD(nf_tables_destroy_list); static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); static DEFINE_SPINLOCK(nf_tables_gc_list_lock); @@ -125,7 +124,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s table->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); -static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); static void nft_trans_gc_work(struct work_struct *work); static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); @@ -10006,11 +10004,12 @@ static void nft_commit_release(struct nft_trans *trans) static void nf_tables_trans_destroy_work(struct work_struct *w) { + struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work); struct nft_trans *trans, *next; LIST_HEAD(head); spin_lock(&nf_tables_destroy_list_lock); - list_splice_init(&nf_tables_destroy_list, &head); + list_splice_init(&nft_net->destroy_list, &head); spin_unlock(&nf_tables_destroy_list_lock); if (list_empty(&head)) @@ -10024,9 +10023,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) } } -void nf_tables_trans_destroy_flush_work(void) +void nf_tables_trans_destroy_flush_work(struct net *net) { - flush_work(&trans_destroy_work); + struct nftables_pernet *nft_net = nft_pernet(net); + + flush_work(&nft_net->destroy_work); } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); @@ -10484,11 +10485,11 @@ static void nf_tables_commit_release(struct net *net) trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); - list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); + list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); - schedule_work(&trans_destroy_work); + schedule_work(&nft_net->destroy_work); mutex_unlock(&nft_net->commit_mutex); } @@ -11853,7 +11854,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && @@ -11895,6 +11896,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->destroy_list); INIT_LIST_HEAD(&nft_net->commit_set_list); INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); @@ -11903,6 +11905,7 @@ static int __net_init nf_tables_init_net(struct net *net) nft_net->base_seq = 1; nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; + INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); return 0; } @@ -11931,14 +11934,17 @@ static void __net_exit nf_tables_exit_net(struct net *net) if (!list_empty(&nft_net->module_list)) nf_tables_module_autoload_cleanup(net); + cancel_work_sync(&nft_net->destroy_work); __nft_release_tables(net); nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); WARN_ON_ONCE(!list_empty(&nft_net->module_list)); WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); + WARN_ON_ONCE(!list_empty(&nft_net->destroy_list)); } static void nf_tables_exit_batch(struct list_head *net_exit_list) @@ -12029,10 +12035,8 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); - nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); - cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7ca4f0d21fe2a..72711d62fddfa 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return 0; } -static void nft_compat_wait_for_destructors(void) +static void nft_compat_wait_for_destructors(struct net *net) { /* xtables matches or targets can have side effects, e.g. * creation/destruction of /proc files. @@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void) * work queue. If we have pending invocations we thus * need to wait for those to finish. */ - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); } static int @@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) { @@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); return xt_check_match(&par, size, proto, inv); } -- GitLab From e26f1cfeac6712516bfeed80890da664f4f2e88a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 6 Mar 2025 13:32:54 +0000 Subject: [PATCH 1357/1585] ASoC: cs42l43: Fix maximum ADC Volume The range of ADC volume is -1 -> 3 (-6 to 18dB) so the number of levels should actually be 4. Fixes: fc918cbe874e ("ASoC: cs42l43: Add support for the cs42l43") Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250306133254.1861046-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 4257dbefe9dd1..d307b56a7f38e 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -1146,7 +1146,7 @@ static const struct snd_kcontrol_new cs42l43_controls[] = { SOC_DOUBLE_R_SX_TLV("ADC Volume", CS42L43_ADC_B_CTRL1, CS42L43_ADC_B_CTRL2, CS42L43_ADC_PGA_GAIN_SHIFT, - 0xF, 5, cs42l43_adc_tlv), + 0xF, 4, cs42l43_adc_tlv), SOC_DOUBLE("PDM1 Invert Switch", CS42L43_DMIC_PDM_CTRL, CS42L43_PDM1L_INV_SHIFT, CS42L43_PDM1R_INV_SHIFT, 1, 0), -- GitLab From 35d99c68af40a8ca175babc5a89ef7e2226fb3ca Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Mon, 3 Mar 2025 10:42:33 +0800 Subject: [PATCH 1358/1585] btrfs: fix a leaked chunk map issue in read_one_chunk() Add btrfs_free_chunk_map() to free the memory allocated by btrfs_alloc_chunk_map() if btrfs_add_chunk_map() fails. Fixes: 7dc66abb5a47 ("btrfs: use a dedicated data structure for chunk maps") CC: stable@vger.kernel.org Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Haoxiang Li Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f6ae76815e4b5..6f8dcf59b5257 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7151,6 +7151,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, btrfs_err(fs_info, "failed to add chunk map, start=%llu len=%llu: %d", map->start, map->chunk_len, ret); + btrfs_free_chunk_map(map); } return ret; -- GitLab From 391b41f983bf7ff853de44704d8e14e7cc648a9b Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Wed, 5 Mar 2025 16:37:50 +0000 Subject: [PATCH 1359/1585] gpio: rcar: Fix missing of_node_put() call of_parse_phandle_with_fixed_args() requires its caller to call into of_node_put() on the node pointer from the output structure, but such a call is currently missing. Call into of_node_put() to rectify that. Fixes: 159f8a0209af ("gpio-rcar: Add DT support") Signed-off-by: Fabrizio Castro Reviewed-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250305163753.34913-2-fabrizio.castro.jz@renesas.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rcar.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index 8e0544e924886..a7a1cdf7ac66d 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -468,7 +468,12 @@ static int gpio_rcar_parse_dt(struct gpio_rcar_priv *p, unsigned int *npins) p->info = *info; ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args); - *npins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK; + if (ret) { + *npins = RCAR_MAX_GPIO_PER_BANK; + } else { + *npins = args.args[2]; + of_node_put(args.np); + } if (*npins == 0 || *npins > RCAR_MAX_GPIO_PER_BANK) { dev_warn(p->dev, "Invalid number of gpio lines %u, using %u\n", -- GitLab From ff712188daa3fe3ce7e11e530b4dca3826dae14a Mon Sep 17 00:00:00 2001 From: Miao Li Date: Tue, 4 Mar 2025 15:07:57 +0800 Subject: [PATCH 1360/1585] usb: quirks: Add DELAY_INIT and NO_LPM for Prolific Mass Storage Card Reader When used on Huawei hisi platforms, Prolific Mass Storage Card Reader which the VID:PID is in 067b:2731 might fail to enumerate at boot time and doesn't work well with LPM enabled, combination quirks: USB_QUIRK_DELAY_INIT + USB_QUIRK_NO_LPM fixed the problems. Signed-off-by: Miao Li Cc: stable Link: https://lore.kernel.org/r/20250304070757.139473-1-limiao870622@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index dfcfc142bd5e1..8efbacc5bc341 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -341,6 +341,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0638, 0x0a13), .driver_info = USB_QUIRK_STRING_FETCH_255 }, + /* Prolific Single-LUN Mass Storage Card Reader */ + { USB_DEVICE(0x067b, 0x2731), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_NO_LPM }, + /* Saitek Cyborg Gold Joystick */ { USB_DEVICE(0x06a3, 0x0006), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, -- GitLab From b13abcb7ddd8d38de769486db5bd917537b32ab1 Mon Sep 17 00:00:00 2001 From: Andrei Kuchynski Date: Wed, 5 Mar 2025 11:17:39 +0000 Subject: [PATCH 1361/1585] usb: typec: ucsi: Fix NULL pointer access Resources should be released only after all threads that utilize them have been destroyed. This commit ensures that resources are not released prematurely by waiting for the associated workqueue to complete before deallocating them. Cc: stable Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking") Signed-off-by: Andrei Kuchynski Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250305111739.1489003-2-akuchynski@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 7a56d3f840d75..2a2915b0a645f 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1825,11 +1825,11 @@ static int ucsi_init(struct ucsi *ucsi) err_unregister: for (con = connector; con->port; con++) { + if (con->wq) + destroy_workqueue(con->wq); ucsi_unregister_partner(con); ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON); ucsi_unregister_port_psy(con); - if (con->wq) - destroy_workqueue(con->wq); usb_power_delivery_unregister_capabilities(con->port_sink_caps); con->port_sink_caps = NULL; @@ -2013,10 +2013,6 @@ void ucsi_unregister(struct ucsi *ucsi) for (i = 0; i < ucsi->cap.num_connectors; i++) { cancel_work_sync(&ucsi->connector[i].work); - ucsi_unregister_partner(&ucsi->connector[i]); - ucsi_unregister_altmodes(&ucsi->connector[i], - UCSI_RECIPIENT_CON); - ucsi_unregister_port_psy(&ucsi->connector[i]); if (ucsi->connector[i].wq) { struct ucsi_work *uwork; @@ -2032,6 +2028,11 @@ void ucsi_unregister(struct ucsi *ucsi) destroy_workqueue(ucsi->connector[i].wq); } + ucsi_unregister_partner(&ucsi->connector[i]); + ucsi_unregister_altmodes(&ucsi->connector[i], + UCSI_RECIPIENT_CON); + ucsi_unregister_port_psy(&ucsi->connector[i]); + usb_power_delivery_unregister_capabilities(ucsi->connector[i].port_sink_caps); ucsi->connector[i].port_sink_caps = NULL; usb_power_delivery_unregister_capabilities(ucsi->connector[i].port_source_caps); -- GitLab From 74d42bdb3a4673b1c10d1f457184e4d3c9cb0196 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2025 07:30:42 -1000 Subject: [PATCH 1362/1585] fs/pipe: express 'pipe_empty()' in terms of 'pipe_occupancy()' That's what 'pipe_full()' does, so it's more consistent. But more importantly it gets the type limits right when the pipe head and tail are no longer necessarily 'unsigned int'. Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index e572e6fc4f81f..4d0a2267e6efc 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -177,23 +177,23 @@ static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) } /** - * pipe_empty - Return true if the pipe is empty + * pipe_occupancy - Return number of slots used in the pipe * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline bool pipe_empty(unsigned int head, unsigned int tail) +static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { - return head == tail; + return (pipe_index_t)(head - tail); } /** - * pipe_occupancy - Return number of slots used in the pipe + * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) +static inline bool pipe_empty(unsigned int head, unsigned int tail) { - return (pipe_index_t)(head - tail); + return !pipe_occupancy(head, tail); } /** -- GitLab From d810d4c27bf34c719243bab9feb0d843edc09fd7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2025 07:33:58 -1000 Subject: [PATCH 1363/1585] fs/pipe: do not open-code pipe head/tail logic in FIONREAD Rasmus points out that we do indeed have other cases of breakage from the type changes that were introduced on 32-bit targets in order to read the pipe head and tail values atomically (commit 3d252160b818: "fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex"). Fix it up by using the proper helper functions that now deal with the pipe buffer index types properly. This makes the code simpler and more obvious. The compiler does the CSE and loop hoisting of the pipe ring size masking that we used to do manually, so open-coding this was never a good idea. Reported-by: Rasmus Villemoes Link: https://lore.kernel.org/all/87cyeu5zgk.fsf@prevas.dk/ Fixes: 3d252160b818 ("fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex")Cc: Oleg Nesterov Cc: Mateusz Guzik Cc: K Prateek Nayak Cc: Swapnil Sapkal Signed-off-by: Linus Torvalds --- fs/pipe.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index e8e6698f36981..5c872775a6db9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -614,7 +614,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe = filp->private_data; - unsigned int count, head, tail, mask; + unsigned int count, head, tail; switch (cmd) { case FIONREAD: @@ -622,10 +622,9 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) count = 0; head = pipe->head; tail = pipe->tail; - mask = pipe->ring_size - 1; - while (tail != head) { - count += pipe->bufs[tail & mask].len; + while (!pipe_empty(head, tail)) { + count += pipe_buf(pipe, tail)->len; tail++; } mutex_unlock(&pipe->mutex); -- GitLab From ebb0f38bb47f74b29e267babdbcd2c47d5292aa8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2025 07:53:25 -1000 Subject: [PATCH 1364/1585] fs/pipe: fix pipe buffer index use in FUSE This was another case that Rasmus pointed out where the direct access to the pipe head and tail pointers broke on 32-bit configurations due to the type changes. As with the pipe FIONREAD case, fix it by using the appropriate helper functions that deal with the right pipe index sizing. Reported-by: Rasmus Villemoes Link: https://lore.kernel.org/all/878qpi5wz4.fsf@prevas.dk/ Fixes: 3d252160b818 ("fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex")Cc: Oleg > Cc: Mateusz Guzik Cc: K Prateek Nayak Cc: Swapnil Sapkal Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2b2d1b7555444..3c9caafca9e29 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2107,7 +2107,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - unsigned int head, tail, mask, count; + unsigned int head, tail, count; unsigned nbuf; unsigned idx; struct pipe_buffer *bufs; @@ -2124,8 +2124,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, head = pipe->head; tail = pipe->tail; - mask = pipe->ring_size - 1; - count = head - tail; + count = pipe_occupancy(head, tail); bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) { @@ -2135,8 +2134,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, nbuf = 0; rem = 0; - for (idx = tail; idx != head && rem < len; idx++) - rem += pipe->bufs[idx & mask].len; + for (idx = tail; !pipe_empty(head, idx) && rem < len; idx++) + rem += pipe_buf(pipe, idx)->len; ret = -EINVAL; if (rem < len) @@ -2147,10 +2146,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, struct pipe_buffer *ibuf; struct pipe_buffer *obuf; - if (WARN_ON(nbuf >= count || tail == head)) + if (WARN_ON(nbuf >= count || pipe_empty(head, tail))) goto out_free; - ibuf = &pipe->bufs[tail & mask]; + ibuf = pipe_buf(pipe, tail); obuf = &bufs[nbuf]; if (rem >= ibuf->len) { -- GitLab From 6933c1067fe6df8ddb34dd68bdb2aa172cbd08c8 Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:54 +0000 Subject: [PATCH 1365/1585] rust: init: add missing newline to pr_info! calls Several pr_info! calls in rust/kernel/init.rs (both in code examples and macro documentation) were missing a newline, causing logs to run together. This commit updates these calls to include a trailing newline, improving readability and consistency with the C side. Fixes: 6841d45a3030 ("rust: init: add `stack_pin_init!` macro") Fixes: 7f8977a7fe6d ("rust: init: add `{pin_}chain` functions to `{Pin}Init`") Fixes: d0fdc3961270 ("rust: init: add `PinnedDrop` trait and macros") Fixes: 4af84c6a85c6 ("rust: init: update expanded macro explanation") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Link: https://lore.kernel.org/r/20250206-printing_fix-v3-3-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. Added one more Fixes tag (still same set of stable kernels). - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/init.rs | 12 ++++++------ rust/kernel/init/macros.rs | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 8bbd5e3398fcb..e25d047f3c827 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -259,7 +259,7 @@ pub mod macros; /// }, /// })); /// let foo: Pin<&mut Foo> = foo; -/// pr_info!("a: {}", &*foo.a.lock()); +/// pr_info!("a: {}\n", &*foo.a.lock()); /// ``` /// /// # Syntax @@ -319,7 +319,7 @@ macro_rules! stack_pin_init { /// }, GFP_KERNEL)?, /// })); /// let foo = foo.unwrap(); -/// pr_info!("a: {}", &*foo.a.lock()); +/// pr_info!("a: {}\n", &*foo.a.lock()); /// ``` /// /// ```rust,ignore @@ -352,7 +352,7 @@ macro_rules! stack_pin_init { /// x: 64, /// }, GFP_KERNEL)?, /// })); -/// pr_info!("a: {}", &*foo.a.lock()); +/// pr_info!("a: {}\n", &*foo.a.lock()); /// # Ok::<_, AllocError>(()) /// ``` /// @@ -882,7 +882,7 @@ pub unsafe trait PinInit: Sized { /// /// impl Foo { /// fn setup(self: Pin<&mut Self>) { - /// pr_info!("Setting up foo"); + /// pr_info!("Setting up foo\n"); /// } /// } /// @@ -986,7 +986,7 @@ pub unsafe trait Init: PinInit { /// /// impl Foo { /// fn setup(&mut self) { - /// pr_info!("Setting up foo"); + /// pr_info!("Setting up foo\n"); /// } /// } /// @@ -1336,7 +1336,7 @@ impl InPlaceWrite for UniqueArc> { /// #[pinned_drop] /// impl PinnedDrop for Foo { /// fn drop(self: Pin<&mut Self>) { -/// pr_info!("Foo is being dropped!"); +/// pr_info!("Foo is being dropped!\n"); /// } /// } /// ``` diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs index 1fd146a832416..b7213962a6a5a 100644 --- a/rust/kernel/init/macros.rs +++ b/rust/kernel/init/macros.rs @@ -45,7 +45,7 @@ //! #[pinned_drop] //! impl PinnedDrop for Foo { //! fn drop(self: Pin<&mut Self>) { -//! pr_info!("{self:p} is getting dropped."); +//! pr_info!("{self:p} is getting dropped.\n"); //! } //! } //! @@ -412,7 +412,7 @@ //! #[pinned_drop] //! impl PinnedDrop for Foo { //! fn drop(self: Pin<&mut Self>) { -//! pr_info!("{self:p} is getting dropped."); +//! pr_info!("{self:p} is getting dropped.\n"); //! } //! } //! ``` @@ -423,7 +423,7 @@ //! // `unsafe`, full path and the token parameter are added, everything else stays the same. //! unsafe impl ::kernel::init::PinnedDrop for Foo { //! fn drop(self: Pin<&mut Self>, _: ::kernel::init::__internal::OnlyCallFromDrop) { -//! pr_info!("{self:p} is getting dropped."); +//! pr_info!("{self:p} is getting dropped.\n"); //! } //! } //! ``` -- GitLab From 50c3e77eb3712a039760345999709ee0fad83447 Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:55 +0000 Subject: [PATCH 1366/1585] rust: sync: add missing newline in locked_by log example The pr_info! example in rust/kernel/sync/locked_by.rs was missing a newline. This patch appends the missing newline to ensure that log messages for locked resources display correctly. Fixes: 7b1f55e3a984 ("rust: sync: introduce `LockedBy`") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Link: https://lore.kernel.org/r/20250206-printing_fix-v3-4-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/sync/locked_by.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs index a7b244675c2b9..61f100a45b350 100644 --- a/rust/kernel/sync/locked_by.rs +++ b/rust/kernel/sync/locked_by.rs @@ -55,7 +55,7 @@ use core::{cell::UnsafeCell, mem::size_of, ptr}; /// fn print_bytes_used(dir: &Directory, file: &File) { /// let guard = dir.inner.lock(); /// let inner_file = file.inner.access(&guard); -/// pr_info!("{} {}", guard.bytes_used, inner_file.bytes_used); +/// pr_info!("{} {}\n", guard.bytes_used, inner_file.bytes_used); /// } /// /// /// Increments `bytes_used` for both the directory and file. -- GitLab From 0ea4c3906416cefd6ae7ae5e93af9f2ef1b8c39b Mon Sep 17 00:00:00 2001 From: Alban Kurti Date: Thu, 6 Feb 2025 21:07:56 +0000 Subject: [PATCH 1367/1585] rust: workqueue: add missing newline to pr_info! examples The documentation examples in rust/kernel/workqueue.rs use pr_info! calls that lack a trailing newline. To maintain consistency with kernel logging practices, this patch adds the newline to all affected examples. Fixes: 15b286d1fd05 ("rust: workqueue: add examples") Reported-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1139 Signed-off-by: Alban Kurti Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250206-printing_fix-v3-5-a85273b501ae@invicto.ai [ Replaced Closes with Link since it fixes part of the issue. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/workqueue.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 0cd100d2aefb4..b7be224cdf4bb 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -60,7 +60,7 @@ //! type Pointer = Arc; //! //! fn run(this: Arc) { -//! pr_info!("The value is: {}", this.value); +//! pr_info!("The value is: {}\n", this.value); //! } //! } //! @@ -108,7 +108,7 @@ //! type Pointer = Arc; //! //! fn run(this: Arc) { -//! pr_info!("The value is: {}", this.value_1); +//! pr_info!("The value is: {}\n", this.value_1); //! } //! } //! @@ -116,7 +116,7 @@ //! type Pointer = Arc; //! //! fn run(this: Arc) { -//! pr_info!("The second value is: {}", this.value_2); +//! pr_info!("The second value is: {}\n", this.value_2); //! } //! } //! -- GitLab From c00b413a96261faef4ce22329153c6abd4acef25 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 6 Mar 2025 16:59:16 +0100 Subject: [PATCH 1368/1585] x86/boot: Sanitize boot params before parsing command line The 5-level paging code parses the command line to look for the 'no5lvl' string, and does so very early, before sanitize_boot_params() has been called and has been given the opportunity to wipe bogus data from the fields in boot_params that are not covered by struct setup_header, and are therefore supposed to be initialized to zero by the bootloader. This triggers an early boot crash when using syslinux-efi to boot a recent kernel built with CONFIG_X86_5LEVEL=y and CONFIG_EFI_STUB=n, as the 0xff padding that now fills the unused PE/COFF header is copied into boot_params by the bootloader, and interpreted as the top half of the command line pointer. Fix this by sanitizing the boot_params before use. Note that there is no harm in calling this more than once; subsequent invocations are able to spot that the boot_params have already been cleaned up. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Linus Torvalds Cc: # v6.1+ Link: https://lore.kernel.org/r/20250306155915.342465-2-ardb+git@google.com Closes: https://lore.kernel.org/all/202503041549.35913.ulrich.gemkow@ikr.uni-stuttgart.de --- arch/x86/boot/compressed/pgtable_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index c882e1f67af01..d8c5de40669d3 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" #include +#include #include #include #include "pgtable.h" @@ -107,6 +108,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ + sanitize_boot_params(bp); boot_params_ptr = bp; /* -- GitLab From 33255c161ac4be003ac87c434ebc49645d18a929 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Feb 2025 20:07:58 -0500 Subject: [PATCH 1369/1585] bcachefs: Fix bch2_dev_journal_alloc() spuriously failing Previously, we fixed journal resize spuriousl failing with -BCH_ERR_open_buckets_empty, but initial journal allocation was missed because it didn't invoke the "block on allocator" loop at all. Factor out the "loop on allocator" code to fix that. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 59 +++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 5dabbf3c0965c..05b1250619ecc 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1021,8 +1021,8 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, /* allocate journal on a device: */ -static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - bool new_fs, struct closure *cl) +static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, + bool new_fs, struct closure *cl) { struct bch_fs *c = ca->fs; struct journal_device *ja = &ca->journal; @@ -1150,26 +1150,20 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, return ret; } -/* - * Allocate more journal space at runtime - not currently making use if it, but - * the code works: - */ -int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, - unsigned nr) +static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca, + unsigned nr, bool new_fs) { struct journal_device *ja = &ca->journal; - struct closure cl; int ret = 0; + struct closure cl; closure_init_stack(&cl); - down_write(&c->state_lock); - /* don't handle reducing nr of buckets yet: */ if (nr < ja->nr) - goto unlock; + return 0; - while (ja->nr < nr) { + while (!ret && ja->nr < nr) { struct disk_reservation disk_res = { 0, 0, 0 }; /* @@ -1182,27 +1176,38 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, * filesystem-wide allocation will succeed, this is a device * specific allocation - we can hang here: */ + if (!new_fs) { + ret = bch2_disk_reservation_get(c, &disk_res, + bucket_to_sector(ca, nr - ja->nr), 1, 0); + if (ret) + break; + } - ret = bch2_disk_reservation_get(c, &disk_res, - bucket_to_sector(ca, nr - ja->nr), 1, 0); - if (ret) - break; + ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl); - ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl); + if (ret == -BCH_ERR_bucket_alloc_blocked || + ret == -BCH_ERR_open_buckets_empty) + ret = 0; /* wait and retry */ bch2_disk_reservation_put(c, &disk_res); - closure_sync(&cl); - - if (ret && - ret != -BCH_ERR_bucket_alloc_blocked && - ret != -BCH_ERR_open_buckets_empty) - break; } - bch_err_fn(c, ret); -unlock: + return ret; +} + +/* + * Allocate more journal space at runtime - not currently making use if it, but + * the code works: + */ +int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, + unsigned nr) +{ + down_write(&c->state_lock); + int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false); up_write(&c->state_lock); + + bch_err_fn(c, ret); return ret; } @@ -1228,7 +1233,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) min(1 << 13, (1 << 24) / ca->mi.bucket_size)); - ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL); + ret = bch2_set_nr_journal_buckets_loop(ca->fs, ca, nr, new_fs); err: bch_err_fn(ca, ret); return ret; -- GitLab From 8ba73f53dc5b7545776e09e6982115dcbcbabec4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Feb 2025 11:34:41 -0500 Subject: [PATCH 1370/1585] bcachefs: copygc now skips non-rw devices There's no point in doing copygc on non-rw devices: the fragmentation doesn't matter if we're not writing to them, and we may not have anywhere to put the data on our other devices. Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 21805509ab9e6..6718dc37c5a35 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -74,20 +74,14 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, struct move_bucket *b, u64 time) { struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c k; - struct bch_alloc_v4 _a; - const struct bch_alloc_v4 *a; - int ret; - if (bch2_bucket_is_open(trans->c, - b->k.bucket.inode, - b->k.bucket.offset)) + if (bch2_bucket_is_open(c, b->k.bucket.inode, b->k.bucket.offset)) return 0; - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, - b->k.bucket, BTREE_ITER_cached); - ret = bkey_err(k); + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, + b->k.bucket, BTREE_ITER_cached); + int ret = bkey_err(k); if (ret) return ret; @@ -95,13 +89,18 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (!ca) goto out; - a = bch2_alloc_to_v4(k, &_a); + if (ca->mi.state != BCH_MEMBER_STATE_rw || + !bch2_dev_is_online(ca)) + goto out_put; + + struct bch_alloc_v4 _a; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); b->k.gen = a->gen; b->sectors = bch2_bucket_sectors_dirty(*a); u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); ret = lru_idx && lru_idx <= time; - +out_put: bch2_dev_put(ca); out: bch2_trans_iter_exit(trans, &iter); -- GitLab From 115ef44a98220fddfab37a39a19370497cd718b9 Mon Sep 17 00:00:00 2001 From: Jun Yang Date: Wed, 5 Mar 2025 23:44:10 +0800 Subject: [PATCH 1371/1585] sched: address a potential NULL pointer dereference in the GRED scheduler. If kzalloc in gred_init returns a NULL pointer, the code follows the error handling path, invoking gred_destroy. This, in turn, calls gred_offload, where memset could receive a NULL pointer as input, potentially leading to a kernel crash. When table->opt is NULL in gred_init(), gred_change_table_def() is not called yet, so it is not necessary to call ->ndo_setup_tc() in gred_offload(). Signed-off-by: Jun Yang Reviewed-by: Cong Wang Fixes: f25c0515c521 ("net: sched: gred: dynamically allocate tc_gred_qopt_offload") Link: https://patch.msgid.link/20250305154410.3505642-1-juny24602@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_gred.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index ab6234b4fcd54..532fde548b88f 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -913,7 +913,8 @@ static void gred_destroy(struct Qdisc *sch) for (i = 0; i < table->DPs; i++) gred_destroy_vq(table->tab[i]); - gred_offload(sch, TC_GRED_DESTROY); + if (table->opt) + gred_offload(sch, TC_GRED_DESTROY); kfree(table->opt); } -- GitLab From e7112524e5e885181cc5ae4d258f33b9dbe0b907 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 6 Mar 2025 08:27:51 -0800 Subject: [PATCH 1372/1585] block: Name the RQF flags enum Commit 5f89154e8e9e3445f9b59 ("block: Use enum to define RQF_x bit indexes") converted the RQF flags to an anonymous enum, which was a beneficial change. This patch goes one step further by naming the enum as "rqf_flags". This naming enables exporting these flags to BPF clients, eliminating the need to duplicate these flags in BPF code. Instead, BPF clients can now access the same kernel-side values through CO:RE (Compile Once, Run Everywhere), as shown in this example: rqf_stats = bpf_core_enum_value(enum rqf_flags, __RQF_STATS) Suggested-by: Yonghong Song Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20250306-rqf_flags-v1-1-bbd64918b406@debian.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fa2a76cc2f73d..71f4f0cc3dac6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -28,7 +28,7 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); typedef __u32 __bitwise req_flags_t; /* Keep rqf_name[] in sync with the definitions below */ -enum { +enum rqf_flags { /* drive already may have started this one */ __RQF_STARTED, /* request for flush sequence */ -- GitLab From 00a7d39898c8010bfd5ff62af31ca5db34421b38 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2025 18:25:35 -1000 Subject: [PATCH 1373/1585] fs/pipe: add simpler helpers for common cases The fix to atomically read the pipe head and tail state when not holding the pipe mutex has caused a number of headaches due to the size change of the involved types. It turns out that we don't have _that_ many places that access these fields directly and were affected, but we have more than we strictly should have, because our low-level helper functions have been designed to have intimate knowledge of how the pipes work. And as a result, that random noise of direct 'pipe->head' and 'pipe->tail' accesses makes it harder to pinpoint any actual potential problem spots remaining. For example, we didn't have a "is the pipe full" helper function, but instead had a "given these pipe buffer indexes and this pipe size, is the pipe full". That's because some low-level pipe code does actually want that much more complicated interface. But most other places literally just want a "is the pipe full" helper, and not having it meant that those places ended up being unnecessarily much too aware of this all. It would have been much better if only the very core pipe code that cared had been the one aware of this all. So let's fix it - better late than never. This just introduces the trivial wrappers for "is this pipe full or empty" and to get how many pipe buffers are used, so that instead of writing if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) the places that literally just want to know if a pipe is full can just say if (pipe_is_full(pipe)) instead. The existing trivial cases were converted with a 'sed' script. This cuts down on the places that access pipe->head and pipe->tail directly outside of the pipe code (and core splice code) quite a lot. The splice code in particular still revels in doing the direct low-level accesses, and the fuse fuse_dev_splice_write() code also seems a bit unnecessarily eager to go very low-level, but it's at least a bit better than it used to be. Signed-off-by: Linus Torvalds --- drivers/char/virtio_console.c | 4 ++-- fs/fuse/dev.c | 2 +- fs/pipe.c | 6 +++--- fs/splice.c | 20 ++++++++++---------- include/linux/pipe_fs_i.h | 27 +++++++++++++++++++++++++++ mm/filemap.c | 7 +++---- mm/shmem.c | 6 +++--- 7 files changed, 49 insertions(+), 23 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 24442485e73e7..18f92dd44d456 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -923,14 +923,14 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, pipe_lock(pipe); ret = 0; - if (pipe_empty(pipe->head, pipe->tail)) + if (pipe_is_empty(pipe)) goto error_out; ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); if (ret < 0) goto error_out; - occupancy = pipe_occupancy(pipe->head, pipe->tail); + occupancy = pipe_buf_usage(pipe); buf = alloc_buf(port->portdev->vdev, 0, occupancy); if (!buf) { diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3c9caafca9e29..2c3a4d09e500f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1457,7 +1457,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (ret < 0) goto out; - if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) { + if (pipe_buf_usage(pipe) + cs.nr_segs > pipe->max_usage) { ret = -EIO; goto out; } diff --git a/fs/pipe.c b/fs/pipe.c index 5c872775a6db9..4d0799e4e7196 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -394,7 +394,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) wake_next_reader = true; mutex_lock(&pipe->mutex); } - if (pipe_empty(pipe->head, pipe->tail)) + if (pipe_is_empty(pipe)) wake_next_reader = false; mutex_unlock(&pipe->mutex); @@ -577,11 +577,11 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); mutex_lock(&pipe->mutex); - was_empty = pipe_empty(pipe->head, pipe->tail); + was_empty = pipe_is_empty(pipe); wake_next_writer = true; } out: - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (pipe_is_full(pipe)) wake_next_writer = false; mutex_unlock(&pipe->mutex); diff --git a/fs/splice.c b/fs/splice.c index 28cfa63aa2364..23fa5561b9441 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -331,7 +331,7 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, int i; /* Work out how much data we can actually add into the pipe */ - used = pipe_occupancy(pipe->head, pipe->tail); + used = pipe_buf_usage(pipe); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); npages = DIV_ROUND_UP(len, PAGE_SIZE); @@ -527,7 +527,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des return -ERESTARTSYS; repeat: - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { if (!pipe->writers) return 0; @@ -820,7 +820,7 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, if (signal_pending(current)) break; - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { ret = 0; if (!pipe->writers) goto out; @@ -968,7 +968,7 @@ static ssize_t do_splice_read(struct file *in, loff_t *ppos, return 0; /* Don't try to read more the pipe has space for. */ - p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail); + p_space = pipe->max_usage - pipe_buf_usage(pipe); len = min_t(size_t, len, p_space << PAGE_SHIFT); if (unlikely(len > MAX_RW_COUNT)) @@ -1080,7 +1080,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, more = sd->flags & SPLICE_F_MORE; sd->flags |= SPLICE_F_MORE; - WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); + WARN_ON_ONCE(!pipe_is_empty(pipe)); while (len) { size_t read_len; @@ -1268,7 +1268,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) send_sig(SIGPIPE, current, 0); return -EPIPE; } - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (!pipe_is_full(pipe)) return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; @@ -1652,13 +1652,13 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check the pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (!pipe_empty(pipe->head, pipe->tail)) + if (!pipe_is_empty(pipe)) return 0; ret = 0; pipe_lock(pipe); - while (pipe_empty(pipe->head, pipe->tail)) { + while (pipe_is_empty(pipe)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -1688,13 +1688,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (!pipe_is_full(pipe)) return 0; ret = 0; pipe_lock(pipe); - while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + while (pipe_is_full(pipe)) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 4d0a2267e6efc..b698758000f8b 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -208,6 +208,33 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, return pipe_occupancy(head, tail) >= limit; } +/** + * pipe_is_full - Return true if the pipe is full + * @pipe: the pipe + */ +static inline bool pipe_is_full(const struct pipe_inode_info *pipe) +{ + return pipe_full(pipe->head, pipe->tail, pipe->max_usage); +} + +/** + * pipe_is_empty - Return true if the pipe is empty + * @pipe: the pipe + */ +static inline bool pipe_is_empty(const struct pipe_inode_info *pipe) +{ + return pipe_empty(pipe->head, pipe->tail); +} + +/** + * pipe_buf_usage - Return how many pipe buffers are in use + * @pipe: the pipe + */ +static inline unsigned int pipe_buf_usage(const struct pipe_inode_info *pipe) +{ + return pipe_occupancy(pipe->head, pipe->tail); +} + /** * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring * @pipe: The pipe to access diff --git a/mm/filemap.c b/mm/filemap.c index d4564a79eb353..2974691fdfad2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2897,8 +2897,7 @@ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe, size = min(size, folio_size(folio) - offset); offset %= PAGE_SIZE; - while (spliced < size && - !pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + while (spliced < size && !pipe_is_full(pipe)) { struct pipe_buffer *buf = pipe_head_buf(pipe); size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced); @@ -2955,7 +2954,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, iocb.ki_pos = *ppos; /* Work out how much data we can actually add into the pipe */ - used = pipe_occupancy(pipe->head, pipe->tail); + used = pipe_buf_usage(pipe); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); @@ -3015,7 +3014,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, total_spliced += n; *ppos += n; in->f_ra.prev_pos = *ppos; - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (pipe_is_full(pipe)) goto out; } diff --git a/mm/shmem.c b/mm/shmem.c index 4ea6109a80431..20032a333d80c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3487,7 +3487,7 @@ static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe, size = min_t(size_t, size, PAGE_SIZE - offset); - if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + if (!pipe_is_full(pipe)) { struct pipe_buffer *buf = pipe_head_buf(pipe); *buf = (struct pipe_buffer) { @@ -3514,7 +3514,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, int error = 0; /* Work out how much data we can actually add into the pipe */ - used = pipe_occupancy(pipe->head, pipe->tail); + used = pipe_buf_usage(pipe); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); @@ -3601,7 +3601,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, total_spliced += n; *ppos += n; in->f_ra.prev_pos = *ppos; - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + if (pipe_is_full(pipe)) break; cond_resched(); -- GitLab From d048c84bc1d6b831ca4e3381a16fb616ad96d8db Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Mar 2025 09:28:26 +0100 Subject: [PATCH 1374/1585] wifi: rework MAINTAINERS entries a bit Since I really don't want to be CC'ed on every patch add X: entries for all the drivers that are otherwise covered. In some cases, add a bit more to drivers that have other entries, mostly for the vendor directories, but for libertas also add libertas_tf. While at it, also add all nl80211-related (vendor) UAPI header files to the nl80211 entry. Link: https://patch.msgid.link/20250306092831.f7fdfe7df7b2.I7c86da443038af32e9bcbaa5f53b1e4128a0d1f9@changeid Signed-off-by: Johannes Berg --- MAINTAINERS | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 73a6c34692740..e989ca218d379 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -124,6 +124,7 @@ F: include/net/ieee80211_radiotap.h F: include/net/iw_handler.h F: include/net/wext.h F: include/uapi/linux/nl80211.h +N: include/uapi/linux/nl80211-.* F: include/uapi/linux/wireless.h F: net/wireless/ @@ -514,7 +515,7 @@ F: drivers/hwmon/adm1029.c ADM8211 WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -F: drivers/net/wireless/admtek/adm8211.* +F: drivers/net/wireless/admtek/ ADP1050 HARDWARE MONITOR DRIVER M: Radu Sabau @@ -6195,7 +6196,7 @@ F: Documentation/process/cve.rst CW1200 WLAN driver S: Orphan -F: drivers/net/wireless/st/cw1200/ +F: drivers/net/wireless/st/ F: include/linux/platform_data/net-cw1200.h CX18 VIDEO4LINUX DRIVER @@ -13983,6 +13984,7 @@ MARVELL LIBERTAS WIRELESS DRIVER L: libertas-dev@lists.infradead.org S: Orphan F: drivers/net/wireless/marvell/libertas/ +F: drivers/net/wireless/marvell/libertas_tf/ MARVELL MACCHIATOBIN SUPPORT M: Russell King @@ -15652,7 +15654,7 @@ M: Ajay Singh M: Claudiu Beznea L: linux-wireless@vger.kernel.org S: Supported -F: drivers/net/wireless/microchip/wilc1000/ +F: drivers/net/wireless/microchip/ MICROSEMI MIPS SOCS M: Alexandre Belloni @@ -16438,6 +16440,23 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ +X: drivers/net/wireless/ath/ +X: drivers/net/wireless/broadcom/ +X: drivers/net/wireless/intel/ +X: drivers/net/wireless/intersil/ +X: drivers/net/wireless/marvell/ +X: drivers/net/wireless/mediatek/mt76/ +X: drivers/net/wireless/mediatek/mt7601u/ +X: drivers/net/wireless/microchip/ +X: drivers/net/wireless/purelifi/ +X: drivers/net/wireless/quantenna/ +X: drivers/net/wireless/ralink/ +X: drivers/net/wireless/realtek/ +X: drivers/net/wireless/rsi/ +X: drivers/net/wireless/silabs/ +X: drivers/net/wireless/st/ +X: drivers/net/wireless/ti/ +X: drivers/net/wireless/zydas/ NETWORKING [DSA] M: Andrew Lunn @@ -17822,7 +17841,7 @@ M: Christian Lamparter L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/en/users/Drivers/p54 -F: drivers/net/wireless/intersil/p54/ +F: drivers/net/wireless/intersil/ PACKET SOCKETS M: Willem de Bruijn @@ -19099,7 +19118,7 @@ PURELIFI PLFXLC DRIVER M: Srinivasan Raju L: linux-wireless@vger.kernel.org S: Supported -F: drivers/net/wireless/purelifi/plfxlc/ +F: drivers/net/wireless/purelifi/ PVRUSB2 VIDEO4LINUX DRIVER M: Mike Isely @@ -19650,7 +19669,7 @@ M: Igor Mitsyanko R: Sergey Matyukevich L: linux-wireless@vger.kernel.org S: Maintained -F: drivers/net/wireless/quantenna +F: drivers/net/wireless/quantenna/ RADEON and AMDGPU DRM DRIVERS M: Alex Deucher @@ -19731,7 +19750,7 @@ RALINK RT2X00 WIRELESS LAN DRIVER M: Stanislaw Gruszka L: linux-wireless@vger.kernel.org S: Maintained -F: drivers/net/wireless/ralink/rt2x00/ +F: drivers/net/wireless/ralink/ RAMDISK RAM BLOCK DEVICE DRIVER M: Jens Axboe @@ -21698,7 +21717,7 @@ SILICON LABS WIRELESS DRIVERS (for WFxxx series) M: Jérôme Pouiller S: Supported F: Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml -F: drivers/net/wireless/silabs/wfx/ +F: drivers/net/wireless/silabs/ SILICON MOTION SM712 FRAME BUFFER DRIVER M: Sudip Mukherjee @@ -26198,7 +26217,7 @@ F: mm/zbud.c ZD1211RW WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -F: drivers/net/wireless/zydas/zd1211rw/ +F: drivers/net/wireless/zydas/ ZD1301 MEDIA DRIVER L: linux-media@vger.kernel.org -- GitLab From bbb18f7e23a3f5f56d5c8b4ee0f78f00edb3b1b2 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 6 Mar 2025 12:25:46 +0200 Subject: [PATCH 1375/1585] wifi: iwlwifi: pcie: Fix TSO preparation The allocation of the scatter gather data structure should be done based on the number of memory chunks that need to be mapped, and it is not dependent on the overall payload length. Fix it. In addition, as the skb_to_sgvec() function returns an 'int' do not assign it to an 'unsigned int' as otherwise the error check would be useless. Fixes: 7f5e3038f029 ("wifi: iwlwifi: map entire SKB when sending AMSDUs") Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306122425.8c0e23a3d583.I3cb4d6768c9d28ce3da6cd0a6c65466176cfc1ee@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 7b6071a59b694..7c1dd5cc084ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1869,12 +1869,12 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, unsigned int offset) { struct sg_table *sgt; - unsigned int n_segments; + unsigned int n_segments = skb_shinfo(skb)->nr_frags + 1; + int orig_nents; if (WARN_ON_ONCE(skb_has_frag_list(skb))) return NULL; - n_segments = DIV_ROUND_UP(skb->len - offset, skb_shinfo(skb)->gso_size); *hdr = iwl_pcie_get_page_hdr(trans, hdr_room + __alignof__(struct sg_table) + sizeof(struct sg_table) + @@ -1889,11 +1889,12 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, sg_init_table(sgt->sgl, n_segments); /* Only map the data, not the header (it is copied to the TSO page) */ - sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, offset, - skb->len - offset); - if (WARN_ON_ONCE(sgt->orig_nents <= 0)) + orig_nents = skb_to_sgvec(skb, sgt->sgl, offset, skb->len - offset); + if (WARN_ON_ONCE(orig_nents <= 0)) return NULL; + sgt->orig_nents = orig_nents; + /* And map the entire SKB */ if (dma_map_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0) < 0) return NULL; -- GitLab From b8c8a03e9b7bfc06f366b75daf3d0812400e7123 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 6 Mar 2025 12:25:47 +0200 Subject: [PATCH 1376/1585] wifi: iwlwifi: mvm: fix PNVM timeout for non-MSI-X platforms When MSI-X is not enabled, we mask all the interrupts in the interrupt handler and re-enable them when the interrupt thread runs. If STATUS_INT_ENABLED is not set, we won't re-enable in the thread. In order to get the ALIVE interrupt, we allow the ALIVE interrupt itself, and RX as well in order to receive the ALIVE notification (which is received as an RX from the firmware. The problem is that STATUS_INT_ENABLED is clear until the op_mode calls trans_fw_alive which means that until trans_fw_alive is called, any notification from the firmware will not be received. This became a problem when we inserted the pnvm_load exactly between the ALIVE and trans_fw_alive. Fix that by calling trans_fw_alive before loading the PNVM. This will allow to get the notification from the firmware about PNVM load being complete and continue the flow normally. This didn't happen on MSI-X because we don't disable the interrupts in the ISR when MSI-X is available. The error in the log looks like this: iwlwifi 0000:00:03.0: Timeout waiting for PNVM load! iwlwifi 0000:00:03.0: Failed to start RT ucode: -110 iwlwifi 0000:00:03.0: WRT: Collecting data: ini trigger 13 fired (delay=0ms). Fixes: 70d3ca86b025 ("iwlwifi: mvm: ring the doorbell and wait for PNVM load completion") Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306122425.0f2cf207aae1.I025d8f724b44f52eadf6c19069352eb9275613a8@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index df49dd2e2026d..d10877856049a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -422,6 +422,8 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, /* if reached this point, Alive notification was received */ iwl_mei_alive_notif(true); + iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr); + ret = iwl_pnvm_load(mvm->trans, &mvm->notif_wait, &mvm->fw->ucode_capa); if (ret) { @@ -430,8 +432,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, return ret; } - iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr); - /* * Note: all the queues are enabled as part of the interface * initialization, but in firmware restart scenarios they -- GitLab From 1801a94299a5c7fc1a6825e92e1ce0dc7099faa9 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 6 Mar 2025 12:25:48 +0200 Subject: [PATCH 1377/1585] wifi: iwlwifi: trans: cancel restart work on op mode leave If the restart work happens to run after the opmode left (i.e. called iwl_trans_op_mode_leave), then the opmode memory (including its mutex) is likely to be freed already, and trans->opmode is NULL. Although the hw is stopped in that stage, which means that this restart got aborted (i.e. STATUS_RESET_PENDING will be cleared), it still can access trans->opmode (NULL pointer dereference) or the opmodes memory (which is freed). Fix this by canceling the restart wk in iwl_trans_op_mode_leave. Also make sure that the restart wk is really aborted. Fixes: 7391b2a4f7db ("wifi: iwlwifi: rework firmware error handling") Signed-off-by: Miri Korenblit Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20250306122425.801301ba1b8b.I6f6143f550b6335b699920c5d4b2b78449607a96@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-trans.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 49c8507d1a6b1..47854a36413e1 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -403,6 +403,8 @@ void iwl_trans_op_mode_leave(struct iwl_trans *trans) iwl_trans_pcie_op_mode_leave(trans); + cancel_work_sync(&trans->restart.wk); + trans->op_mode = NULL; trans->state = IWL_TRANS_NO_FW; -- GitLab From 43e04077170799d0e6289f3e928f727e401b3d79 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 6 Mar 2025 12:37:55 +0200 Subject: [PATCH 1378/1585] wifi: mac80211: flush the station before moving it to UN-AUTHORIZED state We first want to flush the station to make sure we no longer have any frames being Tx by the station before the station is moved to un-authorized state. Failing to do that will lead to races: a frame may be sent after the station's state has been changed. Since the API clearly states that the driver can't fail the sta_state() transition down the list of state, we can easily flush the station first, and only then call the driver's sta_state(). Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306123626.450bc40e8b04.I636ba96843c77f13309c15c9fd6eb0c5a52a7976@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f83268fa9f928..caa3d0236b5ec 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -1335,9 +1335,13 @@ static int _sta_info_move_state(struct sta_info *sta, sta->sta.addr, new_state); /* notify the driver before the actual changes so it can - * fail the transition + * fail the transition if the state is increasing. + * The driver is required not to fail when the transition + * is decreasing the state, so first, do all the preparation + * work and only then, notify the driver. */ - if (test_sta_flag(sta, WLAN_STA_INSERTED)) { + if (new_state > sta->sta_state && + test_sta_flag(sta, WLAN_STA_INSERTED)) { int err = drv_sta_state(sta->local, sta->sdata, sta, sta->sta_state, new_state); if (err) @@ -1413,6 +1417,16 @@ static int _sta_info_move_state(struct sta_info *sta, break; } + if (new_state < sta->sta_state && + test_sta_flag(sta, WLAN_STA_INSERTED)) { + int err = drv_sta_state(sta->local, sta->sdata, sta, + sta->sta_state, new_state); + + WARN_ONCE(err, + "Driver is not allowed to fail if the sta_state is transitioning down the list: %d\n", + err); + } + sta->sta_state = new_state; return 0; -- GitLab From 20d5a0b9cd0ccb32e886cf6baecf14936325bf10 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 6 Mar 2025 12:37:56 +0200 Subject: [PATCH 1379/1585] wifi: mac80211: don't queue sdata::work for a non-running sdata The worker really shouldn't be queued for a non-running interface. Also, if ieee80211_setup_sdata is called between queueing and executing the wk, it will be initialized, which will corrupt wiphy_work_list. Fixes: f8891461a277 ("mac80211: do not start any work during reconfigure flow") Signed-off-by: Miri Korenblit Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20250306123626.1e02caf82640.I4949e71ed56e7186ed4968fa9ddff477473fa2f4@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 7f02bd5891eb9..fdda14c08e2b1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -6,7 +6,7 @@ * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * utilities for mac80211 */ @@ -2193,8 +2193,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_reconfig_roc(local); /* Requeue all works */ - list_for_each_entry(sdata, &local->interfaces, list) - wiphy_work_queue(local->hw.wiphy, &sdata->work); + list_for_each_entry(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + wiphy_work_queue(local->hw.wiphy, &sdata->work); + } } ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, -- GitLab From 2e85829ac7fbbd57b93f6cd334b6d448c9ce9db3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Mar 2025 12:37:57 +0200 Subject: [PATCH 1380/1585] wifi: nl80211: fix assoc link handling The refactoring of the assoc link handling in order to support multi-link reconfiguration broke the setting of the assoc link ID, and thus resulted in the wrong BSS "use_for" value being selected. Fix that for both association and ML reconfiguration. Fixes: 720fa448f5a7 ("wifi: nl80211: Split the links handling of an association request") Signed-off-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306123626.7b233d769c32.I62fd04a8667dd55cedb9a1c0414cc92dd098da75@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e87267fbb442e..aac0e7298dc7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11123,6 +11123,7 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device static int nl80211_process_links(struct cfg80211_registered_device *rdev, struct cfg80211_assoc_link *links, + int assoc_link_id, const u8 *ssid, int ssid_len, struct genl_info *info) { @@ -11153,7 +11154,7 @@ static int nl80211_process_links(struct cfg80211_registered_device *rdev, } links[link_id].bss = nl80211_assoc_bss(rdev, ssid, ssid_len, attrs, - link_id, link_id); + assoc_link_id, link_id); if (IS_ERR(links[link_id].bss)) { err = PTR_ERR(links[link_id].bss); links[link_id].bss = NULL; @@ -11350,8 +11351,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); ap_addr = req.ap_mld_addr; - err = nl80211_process_links(rdev, req.links, ssid, ssid_len, - info); + err = nl80211_process_links(rdev, req.links, req.link_id, + ssid, ssid_len, info); if (err) goto free; @@ -16506,7 +16507,10 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) add_links = 0; if (info->attrs[NL80211_ATTR_MLO_LINKS]) { - err = nl80211_process_links(rdev, links, NULL, 0, info); + err = nl80211_process_links(rdev, links, + /* mark as MLO, but not assoc */ + IEEE80211_MLD_MAX_NUM_LINKS, + NULL, 0, info); if (err) return err; -- GitLab From 9a267ce4a3fca93a34a8881046f97bcf472228c8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Mar 2025 12:37:58 +0200 Subject: [PATCH 1381/1585] wifi: mac80211: fix SA Query processing in MLO When MLO is used and SA Query processing isn't done by userspace (e.g. wpa_supplicant w/o CONFIG_OCV), then the mac80211 code kicks in but uses the wrong addresses. Fix them. Signed-off-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250306123626.bab48bb49061.I9391b22f1360d20ac8c4e92604de23f27696ba8f@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1e28efe4203c0..0659ec892ec6c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include @@ -3329,8 +3329,8 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; } - if (!ether_addr_equal(mgmt->sa, sdata->deflink.u.mgd.bssid) || - !ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid)) { + if (!ether_addr_equal(mgmt->sa, sdata->vif.cfg.ap_addr) || + !ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr)) { /* Not from the current AP or not associated yet. */ return; } @@ -3346,9 +3346,9 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, local->hw.extra_tx_headroom); resp = skb_put_zero(skb, 24); - memcpy(resp->da, mgmt->sa, ETH_ALEN); + memcpy(resp->da, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(resp->sa, sdata->vif.addr, ETH_ALEN); - memcpy(resp->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(resp->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); -- GitLab From 72d520476a2fab6f3489e8388ab524985d6c4b90 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 6 Mar 2025 12:37:59 +0200 Subject: [PATCH 1382/1585] wifi: cfg80211: cancel wiphy_work before freeing wiphy A wiphy_work can be queued from the moment the wiphy is allocated and initialized (i.e. wiphy_new_nm). When a wiphy_work is queued, the rdev::wiphy_work is getting queued. If wiphy_free is called before the rdev::wiphy_work had a chance to run, the wiphy memory will be freed, and then when it eventally gets to run it'll use invalid memory. Fix this by canceling the work before freeing the wiphy. Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") Signed-off-by: Miri Korenblit Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20250306123626.efd1d19f6e07.I48229f96f4067ef73f5b87302335e2fd750136c9@changeid Signed-off-by: Johannes Berg --- net/wireless/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 12b780de8779c..828e298726335 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1191,6 +1191,13 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) { struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; + unsigned long flags; + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + WARN_ON(!list_empty(&rdev->wiphy_work_list)); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + cancel_work_sync(&rdev->wiphy_work); + rfkill_destroy(rdev->wiphy.rfkill); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); -- GitLab From 502843396ec2a3eb4f58a2e4618a4a85fc5e0f46 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 3 Mar 2025 19:40:29 +0200 Subject: [PATCH 1383/1585] thunderbolt: Prevent use-after-free in resume from hibernate Kenneth noticed that his laptop crashes randomly when resuming from hibernate if there is device connected and display tunneled. I was able to reproduce this as well with the following steps: 1. Boot the system up, nothing connected. 2. Connect Thunderbolt 4 dock to the host. 3. Connect monitor to the Thunderbolt 4 dock. 4. Verify that there is picture on the screen. 5. Enter hibernate. 6. Exit hibernate. 7. Wait for the system to resume. Expectation: System resumes just fine, the connected monitor still shows screen. Actual result: There is crash during resume, screen is blank. What happens is that during resume from hibernate we tear down any existing tunnels created by the boot kernel and this ends up calling tb_dp_dprx_stop() which calls tb_tunnel_put() dropping the reference count to zero even though we never called tb_dp_dprx_start() for it (we never do that for discovery). This makes the discovered DP tunnel memory to be released and any access after that causes use-after-free and possible crash. Fix this so that we only stop DPRX flow if it has been started in the first place. Reported-by: Kenneth Crudup Closes: https://lore.kernel.org/linux-usb/8e175721-806f-45d6-892a-bd3356af80c9@panix.com/ Cc: stable@vger.kernel.org Fixes: d6d458d42e1e ("thunderbolt: Handle DisplayPort tunnel activation asynchronously") Reviewed-by: Yehezkel Bernat Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tunnel.c | 11 ++++++++--- drivers/thunderbolt/tunnel.h | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 8229a6fbda5ab..717b31d787289 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -1009,6 +1009,8 @@ static int tb_dp_dprx_start(struct tb_tunnel *tunnel) */ tb_tunnel_get(tunnel); + tunnel->dprx_started = true; + if (tunnel->callback) { tunnel->dprx_timeout = dprx_timeout_to_ktime(dprx_timeout); queue_delayed_work(tunnel->tb->wq, &tunnel->dprx_work, 0); @@ -1021,9 +1023,12 @@ static int tb_dp_dprx_start(struct tb_tunnel *tunnel) static void tb_dp_dprx_stop(struct tb_tunnel *tunnel) { - tunnel->dprx_canceled = true; - cancel_delayed_work(&tunnel->dprx_work); - tb_tunnel_put(tunnel); + if (tunnel->dprx_started) { + tunnel->dprx_started = false; + tunnel->dprx_canceled = true; + cancel_delayed_work(&tunnel->dprx_work); + tb_tunnel_put(tunnel); + } } static int tb_dp_activate(struct tb_tunnel *tunnel, bool active) diff --git a/drivers/thunderbolt/tunnel.h b/drivers/thunderbolt/tunnel.h index 7f6d3a18a41e8..8a0a0cb21a895 100644 --- a/drivers/thunderbolt/tunnel.h +++ b/drivers/thunderbolt/tunnel.h @@ -63,6 +63,7 @@ enum tb_tunnel_state { * @allocated_down: Allocated downstream bandwidth (only for USB3) * @bw_mode: DP bandwidth allocation mode registers can be used to * determine consumed and allocated bandwidth + * @dprx_started: DPRX negotiation was started (tb_dp_dprx_start() was called for it) * @dprx_canceled: Was DPRX capabilities read poll canceled * @dprx_timeout: If set DPRX capabilities read poll work will timeout after this passes * @dprx_work: Worker that is scheduled to poll completion of DPRX capabilities read @@ -100,6 +101,7 @@ struct tb_tunnel { int allocated_up; int allocated_down; bool bw_mode; + bool dprx_started; bool dprx_canceled; ktime_t dprx_timeout; struct delayed_work dprx_work; -- GitLab From 14cb5d83068ecf15d2da6f7d0e9ea9edbcbc0457 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Fri, 7 Mar 2025 00:28:46 +0000 Subject: [PATCH 1384/1585] x86/amd_nb: Use rdmsr_safe() in amd_get_mmconfig_range() Xen doesn't offer MSR_FAM10H_MMIO_CONF_BASE to all guests. This results in the following warning: unchecked MSR access error: RDMSR from 0xc0010058 at rIP: 0xffffffff8101d19f (xen_do_read_msr+0x7f/0xa0) Call Trace: xen_read_msr+0x1e/0x30 amd_get_mmconfig_range+0x2b/0x80 quirk_amd_mmconfig_area+0x28/0x100 pnp_fixup_device+0x39/0x50 __pnp_add_device+0xf/0x150 pnp_add_device+0x3d/0x100 pnpacpi_add_device_handler+0x1f9/0x280 acpi_ns_get_device_callback+0x104/0x1c0 acpi_ns_walk_namespace+0x1d0/0x260 acpi_get_devices+0x8a/0xb0 pnpacpi_init+0x50/0x80 do_one_initcall+0x46/0x2e0 kernel_init_freeable+0x1da/0x2f0 kernel_init+0x16/0x1b0 ret_from_fork+0x30/0x50 ret_from_fork_asm+0x1b/0x30 based on quirks for a "PNP0c01" device. Treating MMCFG as disabled is the right course of action, so no change is needed there. This was most likely exposed by fixing the Xen MSR accessors to not be silently-safe. Fixes: 3fac3734c43a ("xen/pv: support selecting safe/unsafe msr accesses") Signed-off-by: Andrew Cooper Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250307002846.3026685-1-andrew.cooper3@citrix.com --- arch/x86/kernel/amd_nb.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 11fac09e3a8cb..67e773744edb2 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -143,7 +143,6 @@ bool __init early_is_amd_nb(u32 device) struct resource *amd_get_mmconfig_range(struct resource *res) { - u32 address; u64 base, msr; unsigned int segn_busn_bits; @@ -151,13 +150,11 @@ struct resource *amd_get_mmconfig_range(struct resource *res) boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) return NULL; - /* assume all cpus from fam10h have mmconfig */ - if (boot_cpu_data.x86 < 0x10) + /* Assume CPUs from Fam10h have mmconfig, although not all VMs do */ + if (boot_cpu_data.x86 < 0x10 || + rdmsrl_safe(MSR_FAM10H_MMIO_CONF_BASE, &msr)) return NULL; - address = MSR_FAM10H_MMIO_CONF_BASE; - rdmsrl(address, msr); - /* mmconfig is not enabled */ if (!(msr & FAM10H_MMIO_CONF_ENABLE)) return NULL; -- GitLab From ac7c06acaa3738b38e83815ac0f07140ad320f13 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Thu, 6 Mar 2025 19:17:21 +1100 Subject: [PATCH 1385/1585] virt: sev-guest: Allocate request data dynamically Commit ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") narrowed the command mutex scope to snp_send_guest_request(). However, GET_REPORT, GET_DERIVED_KEY, and GET_EXT_REPORT share the req structure in snp_guest_dev. Without the mutex protection, concurrent requests can overwrite each other's data. Fix it by dynamically allocating the request structure. Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") Closes: https://github.com/AMDESE/AMDSEV/issues/265 Reported-by: andreas.stuehrk@yaxi.tech Signed-off-by: Nikunj A Dadhania Signed-off-by: Alexey Kardashevskiy Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250307013700.437505-2-aik@amd.com --- drivers/virt/coco/sev-guest/sev-guest.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 264b6523fe52f..23ac177472beb 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -38,12 +38,6 @@ struct snp_guest_dev { struct miscdevice misc; struct snp_msg_desc *msg_desc; - - union { - struct snp_report_req report; - struct snp_derived_key_req derived_key; - struct snp_ext_report_req ext_report; - } req; }; /* @@ -71,7 +65,7 @@ struct snp_req_resp { static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { - struct snp_report_req *report_req = &snp_dev->req.report; + struct snp_report_req *report_req __free(kfree) = NULL; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_report_resp *report_resp; struct snp_guest_req req = {}; @@ -80,6 +74,10 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io if (!arg->req_data || !arg->resp_data) return -EINVAL; + report_req = kzalloc(sizeof(*report_req), GFP_KERNEL_ACCOUNT); + if (!report_req) + return -ENOMEM; + if (copy_from_user(report_req, (void __user *)arg->req_data, sizeof(*report_req))) return -EFAULT; @@ -116,7 +114,7 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { - struct snp_derived_key_req *derived_key_req = &snp_dev->req.derived_key; + struct snp_derived_key_req *derived_key_req __free(kfree) = NULL; struct snp_derived_key_resp derived_key_resp = {0}; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_guest_req req = {}; @@ -136,6 +134,10 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque if (sizeof(buf) < resp_len) return -ENOMEM; + derived_key_req = kzalloc(sizeof(*derived_key_req), GFP_KERNEL_ACCOUNT); + if (!derived_key_req) + return -ENOMEM; + if (copy_from_user(derived_key_req, (void __user *)arg->req_data, sizeof(*derived_key_req))) return -EFAULT; @@ -168,7 +170,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_req_resp *io) { - struct snp_ext_report_req *report_req = &snp_dev->req.ext_report; + struct snp_ext_report_req *report_req __free(kfree) = NULL; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_report_resp *report_resp; struct snp_guest_req req = {}; @@ -178,6 +180,10 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; + report_req = kzalloc(sizeof(*report_req), GFP_KERNEL_ACCOUNT); + if (!report_req) + return -ENOMEM; + if (copy_from_sockptr(report_req, io->req_data, sizeof(*report_req))) return -EFAULT; -- GitLab From 3e385c0d6ce88ac9916dcf84267bd5855d830748 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 7 Mar 2025 12:37:00 +1100 Subject: [PATCH 1386/1585] virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex Compared to the SNP Guest Request, the "Extended" version adds data pages for receiving certificates. If not enough pages provided, the HV can report to the VM how much is needed so the VM can reallocate and repeat. Commit ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") moved handling of the allocated/desired pages number out of scope of said mutex and create a possibility for a race (multiple instances trying to trigger Extended request in a VM) as there is just one instance of snp_msg_desc per /dev/sev-guest and no locking other than snp_cmd_mutex. Fix the issue by moving the data blob/size and the GHCB input struct (snp_req_data) into snp_guest_req which is allocated on stack now and accessed by the GHCB caller under that mutex. Stop allocating SEV_FW_BLOB_MAX_SIZE in snp_msg_alloc() as only one of four callers needs it. Free the received blob in get_ext_report() right after it is copied to the userspace. Possible future users of snp_send_guest_request() are likely to have different ideas about the buffer size anyways. Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikunj A Dadhania Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250307013700.437505-3-aik@amd.com --- arch/x86/coco/sev/core.c | 23 ++++++----------- arch/x86/include/asm/sev.h | 6 ++--- drivers/virt/coco/sev-guest/sev-guest.c | 34 ++++++++++++++++++++----- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 82492efc5d949..96c7bc698e6b6 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -2853,19 +2853,8 @@ struct snp_msg_desc *snp_msg_alloc(void) if (!mdesc->response) goto e_free_request; - mdesc->certs_data = alloc_shared_pages(SEV_FW_BLOB_MAX_SIZE); - if (!mdesc->certs_data) - goto e_free_response; - - /* initial the input address for guest request */ - mdesc->input.req_gpa = __pa(mdesc->request); - mdesc->input.resp_gpa = __pa(mdesc->response); - mdesc->input.data_gpa = __pa(mdesc->certs_data); - return mdesc; -e_free_response: - free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); e_free_request: free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); e_unmap: @@ -2885,7 +2874,6 @@ void snp_msg_free(struct snp_msg_desc *mdesc) kfree(mdesc->ctx); free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); - free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE); iounmap((__force void __iomem *)mdesc->secrets); memset(mdesc, 0, sizeof(*mdesc)); @@ -3054,7 +3042,7 @@ static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_r * sequence number must be incremented or the VMPCK must be deleted to * prevent reuse of the IV. */ - rc = snp_issue_guest_request(req, &mdesc->input, rio); + rc = snp_issue_guest_request(req, &req->input, rio); switch (rc) { case -ENOSPC: /* @@ -3064,7 +3052,7 @@ static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_r * order to increment the sequence number and thus avoid * IV reuse. */ - override_npages = mdesc->input.data_npages; + override_npages = req->input.data_npages; req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; /* @@ -3120,7 +3108,7 @@ static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_r } if (override_npages) - mdesc->input.data_npages = override_npages; + req->input.data_npages = override_npages; return rc; } @@ -3158,6 +3146,11 @@ int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req */ memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); + /* Initialize the input address for guest request */ + req->input.req_gpa = __pa(mdesc->request); + req->input.resp_gpa = __pa(mdesc->response); + req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; + rc = __handle_guest_request(mdesc, req, rio); if (rc) { if (rc == -EIO && diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 1581246491b54..ba7999f66abe6 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -203,6 +203,9 @@ struct snp_guest_req { unsigned int vmpck_id; u8 msg_version; u8 msg_type; + + struct snp_req_data input; + void *certs_data; }; /* @@ -263,9 +266,6 @@ struct snp_msg_desc { struct snp_guest_msg secret_request, secret_response; struct snp_secrets_page *secrets; - struct snp_req_data input; - - void *certs_data; struct aesgcm_ctx *ctx; diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 23ac177472beb..70fbc9a3e703d 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -176,6 +176,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_guest_req req = {}; int ret, npages = 0, resp_len; sockptr_t certs_address; + struct page *page; if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; @@ -209,8 +210,20 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques * the host. If host does not supply any certs in it, then copy * zeros to indicate that certificate data was not provided. */ - memset(mdesc->certs_data, 0, report_req->certs_len); npages = report_req->certs_len >> PAGE_SHIFT; + page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, + get_order(report_req->certs_len)); + if (!page) + return -ENOMEM; + + req.certs_data = page_address(page); + ret = set_memory_decrypted((unsigned long)req.certs_data, npages); + if (ret) { + pr_err("failed to mark page shared, ret=%d\n", ret); + __free_pages(page, get_order(report_req->certs_len)); + return -EFAULT; + } + cmd: /* * The intermediate response buffer is used while decrypting the @@ -219,10 +232,12 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques */ resp_len = sizeof(report_resp->data) + mdesc->ctx->authsize; report_resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT); - if (!report_resp) - return -ENOMEM; + if (!report_resp) { + ret = -ENOMEM; + goto e_free_data; + } - mdesc->input.data_npages = npages; + req.input.data_npages = npages; req.msg_version = arg->msg_version; req.msg_type = SNP_MSG_REPORT_REQ; @@ -237,7 +252,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques /* If certs length is invalid then copy the returned length */ if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) { - report_req->certs_len = mdesc->input.data_npages << PAGE_SHIFT; + report_req->certs_len = req.input.data_npages << PAGE_SHIFT; if (copy_to_sockptr(io->req_data, report_req, sizeof(*report_req))) ret = -EFAULT; @@ -246,7 +261,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques if (ret) goto e_free; - if (npages && copy_to_sockptr(certs_address, mdesc->certs_data, report_req->certs_len)) { + if (npages && copy_to_sockptr(certs_address, req.certs_data, report_req->certs_len)) { ret = -EFAULT; goto e_free; } @@ -256,6 +271,13 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques e_free: kfree(report_resp); +e_free_data: + if (npages) { + if (set_memory_encrypted((unsigned long)req.certs_data, npages)) + WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); + else + __free_pages(page, get_order(report_req->certs_len)); + } return ret; } -- GitLab From 986c2e9ca818b0b74cfc737517549fd0b80ff15d Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 2 Mar 2025 00:16:01 +0100 Subject: [PATCH 1387/1585] drm/panic: use `div_ceil` to clean Clippy warning Starting with the upcoming Rust 1.86.0 (to be released 2025-04-03), Clippy warns: error: manually reimplementing `div_ceil` --> drivers/gpu/drm/drm_panic_qr.rs:548:26 | 548 | let pad_offset = (offset + 7) / 8; | ^^^^^^^^^^^^^^^^ help: consider using `.div_ceil()`: `offset.div_ceil(8)` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#manual_div_ceil And similarly for `stride`. Thus apply the suggestion to both. The behavior (and thus codegen) is not exactly equivalent [1][2], since `div_ceil()` returns the right value for the values that currently would overflow. Link: https://github.com/rust-lang/rust-clippy/issues/14333 [1] Link: https://godbolt.org/z/dPq6nGnv3 [2] Signed-off-by: Miguel Ojeda Fixes: cb5164ac43d0 ("drm/panic: Add a QR code panic screen") Cc: stable@vger.kernel.org # Needed in 6.12.y and 6.13.y only (Rust is pinned in older LTSs). Reviewed-by: Alice Ryhl Reviewed-by: Jocelyn Falempe Signed-off-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20250301231602.917580-1-ojeda@kernel.org --- drivers/gpu/drm/drm_panic_qr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index ef2d490965ba2..56692c6be2199 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -545,7 +545,7 @@ impl EncodedMsg<'_> { } self.push(&mut offset, (MODE_STOP, 4)); - let pad_offset = (offset + 7) / 8; + let pad_offset = offset.div_ceil(8); for i in pad_offset..self.version.max_data() { self.data[i] = PADDING[(i & 1) ^ (pad_offset & 1)]; } @@ -659,7 +659,7 @@ struct QrImage<'a> { impl QrImage<'_> { fn new<'a, 'b>(em: &'b EncodedMsg<'b>, qrdata: &'a mut [u8]) -> QrImage<'a> { let width = em.version.width(); - let stride = (width + 7) / 8; + let stride = width.div_ceil(8); let data = qrdata; let mut qr_image = QrImage { -- GitLab From cba3b86974a3388b12130654809e50cd19294849 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 2 Mar 2025 00:16:02 +0100 Subject: [PATCH 1388/1585] drm/panic: fix overindented list items in documentation Starting with the upcoming Rust 1.86.0 (to be released 2025-04-03), Clippy warns: error: doc list item overindented --> drivers/gpu/drm/drm_panic_qr.rs:914:5 | 914 | /// will be encoded as binary segment, otherwise it will be encoded | ^^^ help: try using ` ` (2 spaces) | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#doc_overindented_list_items The overindentation is slightly hard to notice, since all the items start with a backquote that makes it look OK, but it is there. Thus fix it. Signed-off-by: Miguel Ojeda Fixes: cb5164ac43d0 ("drm/panic: Add a QR code panic screen") Cc: stable@vger.kernel.org # Needed in 6.12.y and 6.13.y only (Rust is pinned in older LTSs). Reviewed-by: Jocelyn Falempe Reviewed-by: Alice Ryhl Signed-off-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20250301231602.917580-2-ojeda@kernel.org --- drivers/gpu/drm/drm_panic_qr.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 56692c6be2199..08b31d75c24a1 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -911,16 +911,16 @@ impl QrImage<'_> { /// /// * `url`: The base URL of the QR code. It will be encoded as Binary segment. /// * `data`: A pointer to the binary data, to be encoded. if URL is NULL, it -/// will be encoded as binary segment, otherwise it will be encoded -/// efficiently as a numeric segment, and appended to the URL. +/// will be encoded as binary segment, otherwise it will be encoded +/// efficiently as a numeric segment, and appended to the URL. /// * `data_len`: Length of the data, that needs to be encoded, must be less -/// than data_size. +/// than data_size. /// * `data_size`: Size of data buffer, it should be at least 4071 bytes to hold -/// a V40 QR code. It will then be overwritten with the QR code image. +/// a V40 QR code. It will then be overwritten with the QR code image. /// * `tmp`: A temporary buffer that the QR code encoder will use, to write the -/// segments and ECC. +/// segments and ECC. /// * `tmp_size`: Size of the temporary buffer, it must be at least 3706 bytes -/// long for V40. +/// long for V40. /// /// # Safety /// -- GitLab From 75ddcd5ad40ecd9fbc9f5a7a2ed0e1e74921db3c Mon Sep 17 00:00:00 2001 From: Hsin-chen Chuang Date: Fri, 28 Feb 2025 01:14:10 +0800 Subject: [PATCH 1389/1585] Bluetooth: btusb: Configure altsetting for HCI_USER_CHANNEL Automatically configure the altsetting for HCI_USER_CHANNEL when a SCO is connected. The motivation is to enable the HCI_USER_CHANNEL user to send out SCO data through USB Bluetooth chips, which is mainly used for bidirectional audio transfer (voice call). This was not capable because: - Per Bluetooth Core Spec v5, Vol 4, Part B, 2.1, the corresponding alternate setting should be set based on the air mode in order to transfer SCO data, but - The Linux Bluetooth HCI_USER_CHANNEL exposes the Bluetooth Host Controller Interface to the user space, which is something above the USB layer. The user space is not able to configure the USB alt while keeping the channel open. This patch intercepts the HCI_EV_SYNC_CONN_COMPLETE packets in btusb, extracts the air mode, and configures the alt setting in btusb. This patch is tested on ChromeOS devices. The USB Bluetooth models (CVSD, TRANS alt3 and alt6) could work without a customized kernel. Fixes: b16b327edb4d ("Bluetooth: btusb: add sysfs attribute to control USB alt setting") Signed-off-by: Hsin-chen Chuang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/Kconfig | 12 ++++++++++++ drivers/bluetooth/btusb.c | 41 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 4ab32abf0f486..7771edf54fb3f 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -56,6 +56,18 @@ config BT_HCIBTUSB_POLL_SYNC Say Y here to enable USB poll_sync for Bluetooth USB devices by default. +config BT_HCIBTUSB_AUTO_ISOC_ALT + bool "Automatically adjust alternate setting for Isoc endpoints" + depends on BT_HCIBTUSB + default y if CHROME_PLATFORMS + help + Say Y here to automatically adjusting the alternate setting for + HCI_USER_CHANNEL whenever a SCO link is established. + + When enabled, btusb intercepts the HCI_EV_SYNC_CONN_COMPLETE packets + and configures isoc endpoint alternate setting automatically when + HCI_USER_CHANNEL is in use. + config BT_HCIBTUSB_BCM bool "Broadcom protocol support" depends on BT_HCIBTUSB diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 2a8d91963c63f..a0fc465458b2f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -34,6 +34,7 @@ static bool force_scofix; static bool enable_autosuspend = IS_ENABLED(CONFIG_BT_HCIBTUSB_AUTOSUSPEND); static bool enable_poll_sync = IS_ENABLED(CONFIG_BT_HCIBTUSB_POLL_SYNC); static bool reset = true; +static bool auto_isoc_alt = IS_ENABLED(CONFIG_BT_HCIBTUSB_AUTO_ISOC_ALT); static struct usb_driver btusb_driver; @@ -1085,6 +1086,42 @@ static inline void btusb_free_frags(struct btusb_data *data) spin_unlock_irqrestore(&data->rxlock, flags); } +static void btusb_sco_connected(struct btusb_data *data, struct sk_buff *skb) +{ + struct hci_event_hdr *hdr = (void *) skb->data; + struct hci_ev_sync_conn_complete *ev = + (void *) skb->data + sizeof(*hdr); + struct hci_dev *hdev = data->hdev; + unsigned int notify_air_mode; + + if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT) + return; + + if (skb->len < sizeof(*hdr) || hdr->evt != HCI_EV_SYNC_CONN_COMPLETE) + return; + + if (skb->len != sizeof(*hdr) + sizeof(*ev) || ev->status) + return; + + switch (ev->air_mode) { + case BT_CODEC_CVSD: + notify_air_mode = HCI_NOTIFY_ENABLE_SCO_CVSD; + break; + + case BT_CODEC_TRANSPARENT: + notify_air_mode = HCI_NOTIFY_ENABLE_SCO_TRANSP; + break; + + default: + return; + } + + bt_dev_info(hdev, "enabling SCO with air mode %u", ev->air_mode); + data->sco_num = 1; + data->air_mode = notify_air_mode; + schedule_work(&data->work); +} + static int btusb_recv_event(struct btusb_data *data, struct sk_buff *skb) { if (data->intr_interval) { @@ -1092,6 +1129,10 @@ static int btusb_recv_event(struct btusb_data *data, struct sk_buff *skb) schedule_delayed_work(&data->rx_work, 0); } + /* Configure altsetting for HCI_USER_CHANNEL on SCO connected */ + if (auto_isoc_alt && hci_dev_test_flag(data->hdev, HCI_USER_CHANNEL)) + btusb_sco_connected(data, skb); + return data->recv_event(data->hdev, skb); } -- GitLab From 8d74c9106be8da051b22f0cd81e665f17d51ba5d Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 27 Feb 2025 23:28:15 +0200 Subject: [PATCH 1390/1585] Bluetooth: SCO: fix sco_conn refcounting on sco_conn_ready sco_conn refcount shall not be incremented a second time if the sk already owns the refcount, so hold only when adding new chan. Add sco_conn_hold() for clarity, as refcnt is never zero here due to the sco_conn_add(). Fixes SCO socket shutdown not actually closing the SCO connection. Fixes: ed9588554943 ("Bluetooth: SCO: remove the redundant sco_conn_put") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index aa7bfe26cb40f..ed6846864ea93 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -107,6 +107,14 @@ static void sco_conn_put(struct sco_conn *conn) kref_put(&conn->ref, sco_conn_free); } +static struct sco_conn *sco_conn_hold(struct sco_conn *conn) +{ + BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref)); + + kref_get(&conn->ref); + return conn; +} + static struct sco_conn *sco_conn_hold_unless_zero(struct sco_conn *conn) { if (!conn) @@ -1353,6 +1361,7 @@ static void sco_conn_ready(struct sco_conn *conn) bacpy(&sco_pi(sk)->src, &conn->hcon->src); bacpy(&sco_pi(sk)->dst, &conn->hcon->dst); + sco_conn_hold(conn); hci_conn_hold(conn->hcon); __sco_chan_add(conn, sk, parent); @@ -1411,8 +1420,10 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) struct sco_conn *conn; conn = sco_conn_add(hcon); - if (conn) + if (conn) { sco_conn_ready(conn); + sco_conn_put(conn); + } } else sco_conn_del(hcon, bt_to_errno(status)); } -- GitLab From 0bdd88971519cfa8a76d1a4dde182e74cfbd5d5c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 28 Feb 2025 13:12:54 -0500 Subject: [PATCH 1391/1585] Bluetooth: hci_event: Fix enabling passive scanning Passive scanning shall only be enabled when disconnecting LE links, otherwise it may start result in triggering scanning when e.g. an ISO link disconnects: > HCI Event: LE Meta Event (0x3e) plen 29 LE Connected Isochronous Stream Established (0x19) Status: Success (0x00) Connection Handle: 257 CIG Synchronization Delay: 0 us (0x000000) CIS Synchronization Delay: 0 us (0x000000) Central to Peripheral Latency: 10000 us (0x002710) Peripheral to Central Latency: 10000 us (0x002710) Central to Peripheral PHY: LE 2M (0x02) Peripheral to Central PHY: LE 2M (0x02) Number of Subevents: 1 Central to Peripheral Burst Number: 1 Peripheral to Central Burst Number: 1 Central to Peripheral Flush Timeout: 2 Peripheral to Central Flush Timeout: 2 Central to Peripheral MTU: 320 Peripheral to Central MTU: 160 ISO Interval: 10.00 msec (0x0008) ... > HCI Event: Disconnect Complete (0x05) plen 4 Status: Success (0x00) Handle: 257 Reason: Remote User Terminated Connection (0x13) < HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6 Extended scan: Enabled (0x01) Filter duplicates: Enabled (0x01) Duration: 0 msec (0x0000) Period: 0.00 sec (0x0000) Fixes: 9fcb18ef3acb ("Bluetooth: Introduce LE auto connect options") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 2cc7a93063501..903b0b52692aa 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3391,23 +3391,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, hci_update_scan(hdev); } - params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) { - switch (params->auto_connect) { - case HCI_AUTO_CONN_LINK_LOSS: - if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + /* Re-enable passive scanning if disconnected device is marked + * as auto-connectable. + */ + if (conn->type == LE_LINK) { + params = hci_conn_params_lookup(hdev, &conn->dst, + conn->dst_type); + if (params) { + switch (params->auto_connect) { + case HCI_AUTO_CONN_LINK_LOSS: + if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + break; + fallthrough; + + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + hci_pend_le_list_del_init(params); + hci_pend_le_list_add(params, + &hdev->pend_le_conns); + hci_update_passive_scan(hdev); break; - fallthrough; - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - hci_pend_le_list_del_init(params); - hci_pend_le_list_add(params, &hdev->pend_le_conns); - hci_update_passive_scan(hdev); - break; - - default: - break; + default: + break; + } } } -- GitLab From ab6ab707a4d060a51c45fc13e3b2228d5f7c0b87 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 4 Mar 2025 10:06:10 -0500 Subject: [PATCH 1392/1585] Revert "Bluetooth: hci_core: Fix sleeping function called from invalid context" This reverts commit 4d94f05558271654670d18c26c912da0c1c15549 which has problems (see [1]) and is no longer needed since 581dd2dc168f ("Bluetooth: hci_event: Fix using rcu_read_(un)lock while iterating") has reworked the code where the original bug has been found. [1] Link: https://lore.kernel.org/linux-bluetooth/877c55ci1r.wl-tiwai@suse.de/T/#t Fixes: 4d94f0555827 ("Bluetooth: hci_core: Fix sleeping function called from invalid context") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 108 +++++++++++-------------------- net/bluetooth/hci_core.c | 10 ++- net/bluetooth/iso.c | 6 -- net/bluetooth/l2cap_core.c | 12 ++-- net/bluetooth/rfcomm/core.c | 6 -- net/bluetooth/sco.c | 12 ++-- 6 files changed, 57 insertions(+), 97 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f756fac95488a..6281063cbd8e4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -804,6 +804,7 @@ struct hci_conn_params { extern struct list_head hci_dev_list; extern struct list_head hci_cb_list; extern rwlock_t hci_dev_list_lock; +extern struct mutex hci_cb_list_lock; #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) @@ -2010,47 +2011,24 @@ struct hci_cb { char *name; - bool (*match) (struct hci_conn *conn); void (*connect_cfm) (struct hci_conn *conn, __u8 status); void (*disconn_cfm) (struct hci_conn *conn, __u8 status); void (*security_cfm) (struct hci_conn *conn, __u8 status, - __u8 encrypt); + __u8 encrypt); void (*key_change_cfm) (struct hci_conn *conn, __u8 status); void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role); }; -static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list) -{ - struct hci_cb *cb, *cpy; - - rcu_read_lock(); - list_for_each_entry_rcu(cb, &hci_cb_list, list) { - if (cb->match && cb->match(conn)) { - cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC); - if (!cpy) - break; - - *cpy = *cb; - INIT_LIST_HEAD(&cpy->list); - list_add_rcu(&cpy->list, list); - } - } - rcu_read_unlock(); -} - static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->connect_cfm) cb->connect_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->connect_cfm_cb) conn->connect_cfm_cb(conn, status); @@ -2058,43 +2036,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->disconn_cfm) cb->disconn_cfm(conn, reason); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->disconn_cfm_cb) conn->disconn_cfm_cb(conn, reason); } -static inline void hci_security_cfm(struct hci_conn *conn, __u8 status, - __u8 encrypt) -{ - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { - if (cb->security_cfm) - cb->security_cfm(conn, status, encrypt); - kfree(cb); - } - - if (conn->security_cfm_cb) - conn->security_cfm_cb(conn, status); -} - static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) @@ -2102,11 +2059,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (conn->state == BT_CONFIG) { @@ -2133,38 +2099,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) conn->sec_level = conn->pending_sec_level; } - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->key_change_cfm) cb->key_change_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, __u8 role) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->role_switch_cfm) cb->role_switch_cfm(conn, status, role); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e7ec12437c8b1..012fc107901a6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -57,6 +57,7 @@ DEFINE_RWLOCK(hci_dev_list_lock); /* HCI callback list */ LIST_HEAD(hci_cb_list); +DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); @@ -2972,7 +2973,9 @@ int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_add_tail_rcu(&cb->list, &hci_cb_list); + mutex_lock(&hci_cb_list_lock); + list_add_tail(&cb->list, &hci_cb_list); + mutex_unlock(&hci_cb_list_lock); return 0; } @@ -2982,8 +2985,9 @@ int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_del_rcu(&cb->list); - synchronize_rcu(); + mutex_lock(&hci_cb_list_lock); + list_del(&cb->list); + mutex_unlock(&hci_cb_list_lock); return 0; } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 44acddf58a0cd..0cb52a3308bae 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2187,11 +2187,6 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return HCI_LM_ACCEPT; } -static bool iso_match(struct hci_conn *hcon) -{ - return hcon->type == ISO_LINK || hcon->type == LE_LINK; -} - static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) { if (hcon->type != ISO_LINK) { @@ -2373,7 +2368,6 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) static struct hci_cb iso_cb = { .name = "ISO", - .match = iso_match, .connect_cfm = iso_connect_cfm, .disconn_cfm = iso_disconn_cfm, }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b22078b679726..c27ea70f71e1e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7182,11 +7182,6 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, return NULL; } -static bool l2cap_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK || hcon->type == LE_LINK; -} - static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct hci_dev *hdev = hcon->hdev; @@ -7194,6 +7189,9 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) struct l2cap_chan *pchan; u8 dst_type; + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); if (status) { @@ -7258,6 +7256,9 @@ int l2cap_disconn_ind(struct hci_conn *hcon) static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); @@ -7565,7 +7566,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) static struct hci_cb l2cap_cb = { .name = "L2CAP", - .match = l2cap_match, .connect_cfm = l2cap_connect_cfm, .disconn_cfm = l2cap_disconn_cfm, .security_cfm = l2cap_security_cfm, diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4c56ca5a216c6..ad5177e3a69b7 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2134,11 +2134,6 @@ static int rfcomm_run(void *unused) return 0; } -static bool rfcomm_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK; -} - static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) { struct rfcomm_session *s; @@ -2185,7 +2180,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) static struct hci_cb rfcomm_cb = { .name = "RFCOMM", - .match = rfcomm_match, .security_cfm = rfcomm_security_cfm }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ed6846864ea93..5d1bc0d6aee03 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1407,13 +1407,11 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return lm; } -static bool sco_match(struct hci_conn *hcon) -{ - return hcon->type == SCO_LINK || hcon->type == ESCO_LINK; -} - static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status); if (!status) { @@ -1430,6 +1428,9 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); @@ -1455,7 +1456,6 @@ void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) static struct hci_cb sco_cb = { .name = "SCO", - .match = sco_match, .connect_cfm = sco_connect_cfm, .disconn_cfm = sco_disconn_cfm, }; -- GitLab From 6914f7e2e25fac9d1d2b62c208eaa5f2bf810fe9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 6 Mar 2025 23:00:16 +0100 Subject: [PATCH 1393/1585] x86/mm: Define PTRS_PER_PMD for assembly code too Andy reported the following build warning from head_32.S: In file included from arch/x86/kernel/head_32.S:29: arch/x86/include/asm/pgtable_32.h:59:5: error: "PTRS_PER_PMD" is not defined, evaluates to 0 [-Werror=undef] 59 | #if PTRS_PER_PMD > 1 The reason is that on 2-level i386 paging the folded in PMD's PTRS_PER_PMD constant is not defined in assembly headers, only in generic MM C headers. Instead of trying to fish out the definition from the generic headers, just define it - it even has a comment for it already... Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/Z8oa8AUVyi2HWfo9@gmail.com --- arch/x86/include/asm/pgtable-2level_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 7f6ccff0ba727..4a12c276b1812 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -23,17 +23,17 @@ typedef union { #define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED /* - * traditional i386 two-level paging structure: + * Traditional i386 two-level paging structure: */ #define PGDIR_SHIFT 22 #define PTRS_PER_PGD 1024 - /* - * the i386 is two-level, so we don't really have any - * PMD directory physically. + * The i386 is two-level, so we don't really have any + * PMD directory physically: */ +#define PTRS_PER_PMD 1 #define PTRS_PER_PTE 1024 -- GitLab From 966944f3711665db13e214fef6d02982c49bb972 Mon Sep 17 00:00:00 2001 From: Mitchell Levy Date: Fri, 7 Mar 2025 15:27:00 -0800 Subject: [PATCH 1394/1585] rust: lockdep: Remove support for dynamically allocated LockClassKeys Currently, dynamically allocated LockCLassKeys can be used from the Rust side without having them registered. This is a soundness issue, so remove them. Fixes: 6ea5aa08857a ("rust: sync: introduce `LockClassKey`") Suggested-by: Alice Ryhl Signed-off-by: Mitchell Levy Signed-off-by: Boqun Feng Signed-off-by: Ingo Molnar Reviewed-by: Benno Lossin Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250307232717.1759087-11-boqun.feng@gmail.com --- rust/kernel/sync.rs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 3498fb344dc93..16eab9138b2ba 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -30,28 +30,20 @@ pub struct LockClassKey(Opaque); unsafe impl Sync for LockClassKey {} impl LockClassKey { - /// Creates a new lock class key. - pub const fn new() -> Self { - Self(Opaque::uninit()) - } - pub(crate) fn as_ptr(&self) -> *mut bindings::lock_class_key { self.0.get() } } -impl Default for LockClassKey { - fn default() -> Self { - Self::new() - } -} - /// Defines a new static lock class and returns a pointer to it. #[doc(hidden)] #[macro_export] macro_rules! static_lock_class { () => {{ - static CLASS: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new(); + static CLASS: $crate::sync::LockClassKey = + // SAFETY: lockdep expects uninitialized memory when it's handed a statically allocated + // lock_class_key + unsafe { ::core::mem::MaybeUninit::uninit().assume_init() }; &CLASS }}; } -- GitLab From b3c5ec8b79bf6bc49cc4850d0949d712830283d7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 7 Mar 2025 15:26:51 -0800 Subject: [PATCH 1395/1585] locking/rtmutex: Use the 'struct' keyword in kernel-doc comment Add the "struct" keyword to prevent a kernel-doc warning: rtmutex_common.h:67: warning: cannot understand function prototype: 'struct rt_wake_q_head ' Signed-off-by: Randy Dunlap Signed-off-by: Boqun Feng Signed-off-by: Ingo Molnar Acked-by: Waiman Long Link: https://lore.kernel.org/r/20250307232717.1759087-2-boqun.feng@gmail.com --- kernel/locking/rtmutex_common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index c38a2d2d4a7ee..78dd3d8c65544 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -59,8 +59,8 @@ struct rt_mutex_waiter { }; /** - * rt_wake_q_head - Wrapper around regular wake_q_head to support - * "sleeping" spinlocks on RT + * struct rt_wake_q_head - Wrapper around regular wake_q_head to support + * "sleeping" spinlocks on RT * @head: The regular wake_q_head for sleeping lock variants * @rtlock_task: Task pointer for RT lock (spin/rwlock) wakeups */ -- GitLab From 85b2b9c16d053364e2004883140538e73b333cdb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 7 Mar 2025 15:26:52 -0800 Subject: [PATCH 1396/1585] locking/semaphore: Use wake_q to wake up processes outside lock critical section A circular lock dependency splat has been seen involving down_trylock(): ====================================================== WARNING: possible circular locking dependency detected 6.12.0-41.el10.s390x+debug ------------------------------------------------------ dd/32479 is trying to acquire lock: 0015a20accd0d4f8 ((console_sem).lock){-.-.}-{2:2}, at: down_trylock+0x26/0x90 but task is already holding lock: 000000017e461698 (&zone->lock){-.-.}-{2:2}, at: rmqueue_bulk+0xac/0x8f0 the existing dependency chain (in reverse order) is: -> #4 (&zone->lock){-.-.}-{2:2}: -> #3 (hrtimer_bases.lock){-.-.}-{2:2}: -> #2 (&rq->__lock){-.-.}-{2:2}: -> #1 (&p->pi_lock){-.-.}-{2:2}: -> #0 ((console_sem).lock){-.-.}-{2:2}: The console_sem -> pi_lock dependency is due to calling try_to_wake_up() while holding the console_sem raw_spinlock. This dependency can be broken by using wake_q to do the wakeup instead of calling try_to_wake_up() under the console_sem lock. This will also make the semaphore's raw_spinlock become a terminal lock without taking any further locks underneath it. The hrtimer_bases.lock is a raw_spinlock while zone->lock is a spinlock. The hrtimer_bases.lock -> zone->lock dependency happens via the debug_objects_fill_pool() helper function in the debugobjects code. -> #4 (&zone->lock){-.-.}-{2:2}: __lock_acquire+0xe86/0x1cc0 lock_acquire.part.0+0x258/0x630 lock_acquire+0xb8/0xe0 _raw_spin_lock_irqsave+0xb4/0x120 rmqueue_bulk+0xac/0x8f0 __rmqueue_pcplist+0x580/0x830 rmqueue_pcplist+0xfc/0x470 rmqueue.isra.0+0xdec/0x11b0 get_page_from_freelist+0x2ee/0xeb0 __alloc_pages_noprof+0x2c2/0x520 alloc_pages_mpol_noprof+0x1fc/0x4d0 alloc_pages_noprof+0x8c/0xe0 allocate_slab+0x320/0x460 ___slab_alloc+0xa58/0x12b0 __slab_alloc.isra.0+0x42/0x60 kmem_cache_alloc_noprof+0x304/0x350 fill_pool+0xf6/0x450 debug_object_activate+0xfe/0x360 enqueue_hrtimer+0x34/0x190 __run_hrtimer+0x3c8/0x4c0 __hrtimer_run_queues+0x1b2/0x260 hrtimer_interrupt+0x316/0x760 do_IRQ+0x9a/0xe0 do_irq_async+0xf6/0x160 Normally a raw_spinlock to spinlock dependency is not legitimate and will be warned if CONFIG_PROVE_RAW_LOCK_NESTING is enabled, but debug_objects_fill_pool() is an exception as it explicitly allows this dependency for non-PREEMPT_RT kernel without causing PROVE_RAW_LOCK_NESTING lockdep splat. As a result, this dependency is legitimate and not a bug. Anyway, semaphore is the only locking primitive left that is still using try_to_wake_up() to do wakeup inside critical section, all the other locking primitives had been migrated to use wake_q to do wakeup outside of the critical section. It is also possible that there are other circular locking dependencies involving printk/console_sem or other existing/new semaphores lurking somewhere which may show up in the future. Let just do the migration now to wake_q to avoid headache like this. Reported-by: yzbot+ed801a886dfdbfe7136d@syzkaller.appspotmail.com Signed-off-by: Waiman Long Signed-off-by: Boqun Feng Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250307232717.1759087-3-boqun.feng@gmail.com --- kernel/locking/semaphore.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 34bfae72f2952..de9117c0e671e 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,7 @@ static noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); static noinline int __down_killable(struct semaphore *sem); static noinline int __down_timeout(struct semaphore *sem, long timeout); -static noinline void __up(struct semaphore *sem); +static noinline void __up(struct semaphore *sem, struct wake_q_head *wake_q); /** * down - acquire the semaphore @@ -183,13 +184,16 @@ EXPORT_SYMBOL(down_timeout); void __sched up(struct semaphore *sem) { unsigned long flags; + DEFINE_WAKE_Q(wake_q); raw_spin_lock_irqsave(&sem->lock, flags); if (likely(list_empty(&sem->wait_list))) sem->count++; else - __up(sem); + __up(sem, &wake_q); raw_spin_unlock_irqrestore(&sem->lock, flags); + if (!wake_q_empty(&wake_q)) + wake_up_q(&wake_q); } EXPORT_SYMBOL(up); @@ -269,11 +273,12 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long timeout) return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout); } -static noinline void __sched __up(struct semaphore *sem) +static noinline void __sched __up(struct semaphore *sem, + struct wake_q_head *wake_q) { struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); list_del(&waiter->list); waiter->up = true; - wake_up_process(waiter->task); + wake_q_add(wake_q, waiter->task); } -- GitLab From f3600c867c99a2cc8038680ecf211089c50e7971 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 6 Mar 2025 21:55:20 +0000 Subject: [PATCH 1397/1585] netmem: prevent TX of unreadable skbs Currently on stable trees we have support for netmem/devmem RX but not TX. It is not safe to forward/redirect an RX unreadable netmem packet into the device's TX path, as the device may call dma-mapping APIs on dma addrs that should not be passed to it. Fix this by preventing the xmit of unreadable skbs. Tested by configuring tc redirect: sudo tc qdisc add dev eth1 ingress sudo tc filter add dev eth1 ingress protocol ip prio 1 flower ip_proto \ tcp src_ip 192.168.1.12 action mirred egress redirect dev eth1 Before, I see unreadable skbs in the driver's TX path passed to dma mapping APIs. After, I don't see unreadable skbs in the driver's TX path passed to dma mapping APIs. Fixes: 65249feb6b3d ("net: add support for skbs with unreadable frags") Suggested-by: Jakub Kicinski Cc: stable@vger.kernel.org Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20250306215520.1415465-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 30da277c5a6f8..2f7f5fd9ffec7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3872,6 +3872,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device { netdev_features_t features; + if (!skb_frags_readable(skb)) + goto out_kfree_skb; + features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); if (unlikely(!skb)) -- GitLab From d749d901b2168389f060b654fdaa08acf6b367d2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 6 Mar 2025 23:25:29 +0200 Subject: [PATCH 1398/1585] net/mlx5: Fill out devlink dev info only for PFs Firmware version query is supported on the PFs. Due to this following kernel warning log is observed: [ 188.590344] mlx5_core 0000:08:00.2: mlx5_fw_version_query:816:(pid 1453): fw query isn't supported by the FW Fix it by restricting the query and devlink info to the PF. Fixes: 8338d9378895 ("net/mlx5: Added devlink info callback") Signed-off-by: Jiri Pirko Reviewed-by: Kalesh AP Signed-off-by: Tariq Toukan Reviewed-by: Parav Pandit Link: https://patch.msgid.link/20250306212529.429329-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 98d4306929f3e..a2cf3e79693dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -46,6 +46,9 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, u32 running_fw, stored_fw; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); if (err) return err; -- GitLab From dc5340c3133a3ebe54853fd299116149e528cfaa Mon Sep 17 00:00:00 2001 From: Joseph Huang Date: Thu, 6 Mar 2025 12:23:05 -0500 Subject: [PATCH 1399/1585] net: dsa: mv88e6xxx: Verify after ATU Load ops ATU Load operations could fail silently if there's not enough space on the device to hold the new entry. When this happens, the symptom depends on the unknown flood settings. If unknown multicast flood is disabled, the multicast packets are dropped when the ATU table is full. If unknown multicast flood is enabled, the multicast packets will be flooded to all ports. Either way, IGMP snooping is broken when the ATU Load operation fails silently. Do a Read-After-Write verification after each fdb/mdb add operation to make sure that the operation was really successful, and return -ENOSPC otherwise. Fixes: defb05b9b9b4 ("net: dsa: mv88e6xxx: Add support for fdb_add, fdb_del, and fdb_getnext") Signed-off-by: Joseph Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250306172306.3859214-1-Joseph.Huang@garmin.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 59 ++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 68d1e891752b8..5db96ca52505a 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2208,13 +2208,11 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, return err; } -static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, - const unsigned char *addr, u16 vid, - u8 state) +static int mv88e6xxx_port_db_get(struct mv88e6xxx_chip *chip, + const unsigned char *addr, u16 vid, + u16 *fid, struct mv88e6xxx_atu_entry *entry) { - struct mv88e6xxx_atu_entry entry; struct mv88e6xxx_vtu_entry vlan; - u16 fid; int err; /* Ports have two private address databases: one for when the port is @@ -2225,7 +2223,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, * VLAN ID into the port's database used for VLAN-unaware bridging. */ if (vid == 0) { - fid = MV88E6XXX_FID_BRIDGED; + *fid = MV88E6XXX_FID_BRIDGED; } else { err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) @@ -2235,14 +2233,39 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (!vlan.valid) return -EOPNOTSUPP; - fid = vlan.fid; + *fid = vlan.fid; } - entry.state = 0; - ether_addr_copy(entry.mac, addr); - eth_addr_dec(entry.mac); + entry->state = 0; + ether_addr_copy(entry->mac, addr); + eth_addr_dec(entry->mac); + + return mv88e6xxx_g1_atu_getnext(chip, *fid, entry); +} + +static bool mv88e6xxx_port_db_find(struct mv88e6xxx_chip *chip, + const unsigned char *addr, u16 vid) +{ + struct mv88e6xxx_atu_entry entry; + u16 fid; + int err; - err = mv88e6xxx_g1_atu_getnext(chip, fid, &entry); + err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); + if (err) + return false; + + return entry.state && ether_addr_equal(entry.mac, addr); +} + +static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + const unsigned char *addr, u16 vid, + u8 state) +{ + struct mv88e6xxx_atu_entry entry; + u16 fid; + int err; + + err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); if (err) return err; @@ -2846,6 +2869,13 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC); + if (err) + goto out; + + if (!mv88e6xxx_port_db_find(chip, addr, vid)) + err = -ENOSPC; + +out: mv88e6xxx_reg_unlock(chip); return err; @@ -6614,6 +6644,13 @@ static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC); + if (err) + goto out; + + if (!mv88e6xxx_port_db_find(chip, mdb->addr, mdb->vid)) + err = -ENOSPC; + +out: mv88e6xxx_reg_unlock(chip); return err; -- GitLab From 26db9c9ee19c36a97dbb1cfef007a3c189c4c874 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 6 Mar 2025 18:24:18 +0800 Subject: [PATCH 1400/1585] net: mctp i3c: Copy headers if cloned Use skb_cow_head() prior to modifying the tx skb. This is necessary when the skb has been cloned, to avoid modifying other shared clones. Signed-off-by: Matt Johnston Fixes: c8755b29b58e ("mctp i3c: MCTP I3C driver") Link: https://patch.msgid.link/20250306-matt-i3c-cow-head-v1-1-d5e6a5495227@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-i3c.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c index c1e72253063b5..c678f79aa3561 100644 --- a/drivers/net/mctp/mctp-i3c.c +++ b/drivers/net/mctp/mctp-i3c.c @@ -506,10 +506,15 @@ static int mctp_i3c_header_create(struct sk_buff *skb, struct net_device *dev, const void *saddr, unsigned int len) { struct mctp_i3c_internal_hdr *ihdr; + int rc; if (!daddr || !saddr) return -EINVAL; + rc = skb_cow_head(skb, sizeof(struct mctp_i3c_internal_hdr)); + if (rc) + return rc; + skb_push(skb, sizeof(struct mctp_i3c_internal_hdr)); skb_reset_mac_header(skb); ihdr = (void *)skb_mac_header(skb); -- GitLab From df8ce77ba8b7c012a3edd1ca7368b46831341466 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 6 Mar 2025 10:33:20 +0800 Subject: [PATCH 1401/1585] net: mctp i2c: Copy headers if cloned Use skb_cow_head() prior to modifying the TX SKB. This is necessary when the SKB has been cloned, to avoid modifying other shared clones. Signed-off-by: Matt Johnston Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") Link: https://patch.msgid.link/20250306-matt-mctp-i2c-cow-v1-1-293827212681@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-i2c.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index e3dcdeacc12c5..d74d47dd6e04d 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -583,6 +583,7 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, struct mctp_i2c_hdr *hdr; struct mctp_hdr *mhdr; u8 lldst, llsrc; + int rc; if (len > MCTP_I2C_MAXMTU) return -EMSGSIZE; @@ -593,6 +594,10 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, lldst = *((u8 *)daddr); llsrc = *((u8 *)saddr); + rc = skb_cow_head(skb, sizeof(struct mctp_i2c_hdr)); + if (rc) + return rc; + skb_push(skb, sizeof(struct mctp_i2c_hdr)); skb_reset_mac_header(skb); hdr = (void *)skb_mac_header(skb); -- GitLab From a07364b394697d2e0baffeb517f41385259aa484 Mon Sep 17 00:00:00 2001 From: Andrei Botila Date: Tue, 4 Mar 2025 18:06:13 +0200 Subject: [PATCH 1402/1585] net: phy: nxp-c45-tja11xx: add TJA112X PHY configuration errata The most recent sillicon versions of TJA1120 and TJA1121 can achieve full silicon performance by putting the PHY in managed mode. It is necessary to apply these PHY writes before link gets established. Application of this fix is required after restart of device and wakeup from sleep. Cc: stable@vger.kernel.org Fixes: f1fe5dff2b8a ("net: phy: nxp-c45-tja11xx: add TJA1120 support") Signed-off-by: Andrei Botila Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250304160619.181046-2-andrei.botila@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/nxp-c45-tja11xx.c | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 34231b5b91754..709d6c9f7cbae 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -22,6 +22,11 @@ #define PHY_ID_TJA_1103 0x001BB010 #define PHY_ID_TJA_1120 0x001BB031 +#define VEND1_DEVICE_ID3 0x0004 +#define TJA1120_DEV_ID3_SILICON_VERSION GENMASK(15, 12) +#define TJA1120_DEV_ID3_SAMPLE_TYPE GENMASK(11, 8) +#define DEVICE_ID3_SAMPLE_TYPE_R 0x9 + #define VEND1_DEVICE_CONTROL 0x0040 #define DEVICE_CONTROL_RESET BIT(15) #define DEVICE_CONTROL_CONFIG_GLOBAL_EN BIT(14) @@ -1593,6 +1598,50 @@ static int nxp_c45_set_phy_mode(struct phy_device *phydev) return 0; } +/* Errata: ES_TJA1120 and ES_TJA1121 Rev. 1.0 — 28 November 2024 Section 3.1 */ +static void nxp_c45_tja1120_errata(struct phy_device *phydev) +{ + int silicon_version, sample_type; + bool macsec_ability; + int phy_abilities; + int ret = 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_DEVICE_ID3); + if (ret < 0) + return; + + sample_type = FIELD_GET(TJA1120_DEV_ID3_SAMPLE_TYPE, ret); + if (sample_type != DEVICE_ID3_SAMPLE_TYPE_R) + return; + + silicon_version = FIELD_GET(TJA1120_DEV_ID3_SILICON_VERSION, ret); + + phy_abilities = phy_read_mmd(phydev, MDIO_MMD_VEND1, + VEND1_PORT_ABILITIES); + macsec_ability = !!(phy_abilities & MACSEC_ABILITY); + if ((!macsec_ability && silicon_version == 2) || + (macsec_ability && silicon_version == 1)) { + /* TJA1120/TJA1121 PHY configuration errata workaround. + * Apply PHY writes sequence before link up. + */ + if (!macsec_ability) { + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x4b95); + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0xf3cd); + } else { + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x89c7); + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0893); + } + + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x0476, 0x58a0); + + phy_write_mmd(phydev, MDIO_MMD_PMAPMD, 0x8921, 0xa3a); + phy_write_mmd(phydev, MDIO_MMD_PMAPMD, 0x89F1, 0x16c1); + + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x0); + phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0); + } +} + static int nxp_c45_config_init(struct phy_device *phydev) { int ret; @@ -1609,6 +1658,9 @@ static int nxp_c45_config_init(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 1); phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 2); + if (phy_id_compare(phydev->phy_id, PHY_ID_TJA_1120, GENMASK(31, 4))) + nxp_c45_tja1120_errata(phydev); + phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_CONFIG, PHY_CONFIG_AUTO); -- GitLab From 48939523843e4813e78920f54937944a8787134b Mon Sep 17 00:00:00 2001 From: Andrei Botila Date: Tue, 4 Mar 2025 18:06:14 +0200 Subject: [PATCH 1403/1585] net: phy: nxp-c45-tja11xx: add TJA112XB SGMII PCS restart errata TJA1120B/TJA1121B can achieve a stable operation of SGMII after a startup event by putting the SGMII PCS into power down mode and restart afterwards. It is necessary to put the SGMII PCS into power down mode and back up. Cc: stable@vger.kernel.org Fixes: f1fe5dff2b8a ("net: phy: nxp-c45-tja11xx: add TJA1120 support") Signed-off-by: Andrei Botila Link: https://patch.msgid.link/20250304160619.181046-3-andrei.botila@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/nxp-c45-tja11xx.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 709d6c9f7cbae..e9fc54517449c 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -114,6 +114,9 @@ #define MII_BASIC_CONFIG_RMII 0x5 #define MII_BASIC_CONFIG_MII 0x4 +#define VEND1_SGMII_BASIC_CONTROL 0xB000 +#define SGMII_LPM BIT(11) + #define VEND1_SYMBOL_ERROR_CNT_XTD 0x8351 #define EXTENDED_CNT_EN BIT(15) #define VEND1_MONITOR_STATUS 0xAC80 @@ -1598,11 +1601,11 @@ static int nxp_c45_set_phy_mode(struct phy_device *phydev) return 0; } -/* Errata: ES_TJA1120 and ES_TJA1121 Rev. 1.0 — 28 November 2024 Section 3.1 */ +/* Errata: ES_TJA1120 and ES_TJA1121 Rev. 1.0 — 28 November 2024 Section 3.1 & 3.2 */ static void nxp_c45_tja1120_errata(struct phy_device *phydev) { + bool macsec_ability, sgmii_ability; int silicon_version, sample_type; - bool macsec_ability; int phy_abilities; int ret = 0; @@ -1619,6 +1622,7 @@ static void nxp_c45_tja1120_errata(struct phy_device *phydev) phy_abilities = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_PORT_ABILITIES); macsec_ability = !!(phy_abilities & MACSEC_ABILITY); + sgmii_ability = !!(phy_abilities & SGMII_ABILITY); if ((!macsec_ability && silicon_version == 2) || (macsec_ability && silicon_version == 1)) { /* TJA1120/TJA1121 PHY configuration errata workaround. @@ -1639,6 +1643,18 @@ static void nxp_c45_tja1120_errata(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x0); phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0); + + if (sgmii_ability) { + /* TJA1120B/TJA1121B SGMII PCS restart errata workaround. + * Put SGMII PCS into power down mode and back up. + */ + phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, + VEND1_SGMII_BASIC_CONTROL, + SGMII_LPM); + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, + VEND1_SGMII_BASIC_CONTROL, + SGMII_LPM); + } } } -- GitLab From 505ead7ab77f289f12d8a68ac83da068e4d4408b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 6 Mar 2025 05:16:18 -0800 Subject: [PATCH 1404/1585] netpoll: hold rcu read lock in __netpoll_send_skb() The function __netpoll_send_skb() is being invoked without holding the RCU read lock. This oversight triggers a warning message when CONFIG_PROVE_RCU_LIST is enabled: net/core/netpoll.c:330 suspicious rcu_dereference_check() usage! netpoll_send_skb netpoll_send_udp write_ext_msg console_flush_all console_unlock vprintk_emit To prevent npinfo from disappearing unexpectedly, ensure that __netpoll_send_skb() is protected with the RCU read lock. Fixes: 2899656b494dcd1 ("netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()") Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306-netpoll_rcu_v2-v2-1-bc4f5c51742a@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 62b4041aae1ae..0ab722d95a2df 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -319,6 +319,7 @@ static int netpoll_owner_active(struct net_device *dev) static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { netdev_tx_t status = NETDEV_TX_BUSY; + netdev_tx_t ret = NET_XMIT_DROP; struct net_device *dev; unsigned long tries; /* It is up to the caller to keep npinfo alive. */ @@ -327,11 +328,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) lockdep_assert_irqs_disabled(); dev = np->dev; + rcu_read_lock(); npinfo = rcu_dereference_bh(dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { dev_kfree_skb_irq(skb); - return NET_XMIT_DROP; + goto out; } /* don't get messages out of order, and no recursion */ @@ -370,7 +372,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } - return NETDEV_TX_OK; + ret = NETDEV_TX_OK; +out: + rcu_read_unlock(); + return ret; } netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) -- GitLab From da64a2359092ceec4f9dea5b329d0aef20104217 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 8 Mar 2025 13:50:45 +0800 Subject: [PATCH 1405/1585] LoongArch: Convert unreachable() to BUG() When compiling on LoongArch, there exists the following objtool warning in arch/loongarch/kernel/machine_kexec.o: kexec_reboot() falls through to next function crash_shutdown_secondary() Avoid using unreachable() as it can (and will in the absence of UBSAN) generate fall-through code. Use BUG() so we get a "break BRK_BUG" trap (with unreachable annotation). Cc: stable@vger.kernel.org # 6.12+ Acked-by: Josh Poimboeuf Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/machine_kexec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index 8ae641dc53bb7..f9381800e291c 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -126,14 +126,14 @@ void kexec_reboot(void) /* All secondary cpus go to kexec_smp_wait */ if (smp_processor_id() > 0) { relocated_kexec_smp_wait(NULL); - unreachable(); + BUG(); } #endif do_kexec = (void *)reboot_code_buffer; do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry); - unreachable(); + BUG(); } -- GitLab From a0d3c8bcb9206ac207c7ad3182027c6b0a1319bb Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 1406/1585] LoongArch: Eliminate superfluous get_numa_distances_cnt() In LoongArch, get_numa_distances_cnt() isn't in use, resulting in a compiler warning. Fix follow errors with clang-18 when W=1e: arch/loongarch/kernel/acpi.c:259:28: error: unused function 'get_numa_distances_cnt' [-Werror,-Wunused-function] 259 | static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) | ^~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Link: https://lore.kernel.org/all/Z7bHPVUH4lAezk0E@kernel.org/ Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/acpi.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 382a09a7152c3..1120ac2824f6e 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -249,18 +249,6 @@ static __init int setup_node(int pxm) return acpi_map_pxm_to_node(pxm); } -/* - * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for - * I/O localities since SRAT does not list them. I/O localities are - * not supported at this point. - */ -unsigned int numa_distance_cnt; - -static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) -{ - return slit->locality_count; -} - void __init numa_set_distance(int from, int to, int distance) { if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { -- GitLab From c9117434c8f7523f0b77db4c5766f5011cc94677 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 1407/1585] LoongArch: Use polling play_dead() when resuming from hibernation When CONFIG_RANDOM_KMALLOC_CACHES or other randomization infrastructrue enabled, the idle_task's stack may different between the booting kernel and target kernel. So when resuming from hibernation, an ACTION_BOOT_CPU IPI wakeup the idle instruction in arch_cpu_idle_dead() and jump to the interrupt handler. But since the stack pointer is changed, the interrupt handler cannot restore correct context. So rename the current arch_cpu_idle_dead() to idle_play_dead(), make it as the default version of play_dead(), and the new arch_cpu_idle_dead() call play_dead() directly. For hibernation, implement an arch-specific hibernate_resume_nonboot_cpu_disable() to use the polling version (idle instruction is replace by nop, and irq is disabled) of play_dead(), i.e. poll_play_dead(), to avoid IPI handler corrupting the idle_task's stack when resuming from hibernation. This solution is a little similar to commit 406f992e4a372dafbe3c ("x86 / hibernate: Use hlt_play_dead() when resuming from hibernation"). Cc: stable@vger.kernel.org Tested-by: Erpeng Xu Tested-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 47 ++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index fbf747447f13f..4b24589c0b565 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -423,7 +424,7 @@ void loongson_cpu_die(unsigned int cpu) mb(); } -void __noreturn arch_cpu_idle_dead(void) +static void __noreturn idle_play_dead(void) { register uint64_t addr; register void (*init_fn)(void); @@ -447,6 +448,50 @@ void __noreturn arch_cpu_idle_dead(void) BUG(); } +#ifdef CONFIG_HIBERNATION +static void __noreturn poll_play_dead(void) +{ + register uint64_t addr; + register void (*init_fn)(void); + + idle_task_exit(); + __this_cpu_write(cpu_state, CPU_DEAD); + + __smp_mb(); + do { + __asm__ __volatile__("nop\n\t"); + addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); + } while (addr == 0); + + init_fn = (void *)TO_CACHE(addr); + iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); + + init_fn(); + BUG(); +} +#endif + +static void (*play_dead)(void) = idle_play_dead; + +void __noreturn arch_cpu_idle_dead(void) +{ + play_dead(); + BUG(); /* play_dead() doesn't return */ +} + +#ifdef CONFIG_HIBERNATION +int hibernate_resume_nonboot_cpu_disable(void) +{ + int ret; + + play_dead = poll_play_dead; + ret = suspend_disable_secondary_cpus(); + play_dead = idle_play_dead; + + return ret; +} +#endif + #endif /* -- GitLab From c8477bb0a8e7f6b2e47952b403c5cb67a6929e55 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 1408/1585] LoongArch: Set max_pfn with the PFN of the last page The current max_pfn equals to zero. In this case, it causes user cannot get some page information through /proc filesystem such as kpagecount. The following message is displayed by stress-ng test suite with command "stress-ng --verbose --physpage 1 -t 1". # stress-ng --verbose --physpage 1 -t 1 stress-ng: error: [1691] physpage: cannot read page count for address 0x134ac000 in /proc/kpagecount, errno=22 (Invalid argument) stress-ng: error: [1691] physpage: cannot read page count for address 0x7ffff207c3a8 in /proc/kpagecount, errno=22 (Invalid argument) stress-ng: error: [1691] physpage: cannot read page count for address 0x134b0000 in /proc/kpagecount, errno=22 (Invalid argument) ... After applying this patch, the kernel can pass the test. # stress-ng --verbose --physpage 1 -t 1 stress-ng: debug: [1701] physpage: [1701] started (instance 0 on CPU 3) stress-ng: debug: [1701] physpage: [1701] exited (instance 0 on CPU 3) stress-ng: debug: [1700] physpage: [1701] terminated (success) Cc: stable@vger.kernel.org # 6.8+ Fixes: ff6c3d81f2e8 ("NUMA: optimize detection of memory with no node id assigned by firmware") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index edcfdfcad7d22..90cb3ca96f085 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -387,6 +387,9 @@ static void __init check_kernel_sections_mem(void) */ static void __init arch_mem_init(char **cmdline_p) { + /* Recalculate max_low_pfn for "mem=xxx" */ + max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + if (usermem) pr_info("User-defined physical RAM map overwrite\n"); -- GitLab From 3109d5ff484b7bc7b955f166974c6776d91f247b Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 1409/1585] LoongArch: Set hugetlb mmap base address aligned with pmd size With ltp test case "testcases/bin/hugefork02", there is a dmesg error report message such as: kernel BUG at mm/hugetlb.c:5550! Oops - BUG[#1]: CPU: 0 UID: 0 PID: 1517 Comm: hugefork02 Not tainted 6.14.0-rc2+ #241 Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 pc 90000000004eaf1c ra 9000000000485538 tp 900000010edbc000 sp 900000010edbf940 a0 900000010edbfb00 a1 9000000108d20280 a2 00007fffe9474000 a3 00007ffff3474000 a4 0000000000000000 a5 0000000000000003 a6 00000000003cadd3 a7 0000000000000000 t0 0000000001ffffff t1 0000000001474000 t2 900000010ecd7900 t3 00007fffe9474000 t4 00007fffe9474000 t5 0000000000000040 t6 900000010edbfb00 t7 0000000000000001 t8 0000000000000005 u0 90000000004849d0 s9 900000010edbfa00 s0 9000000108d20280 s1 00007fffe9474000 s2 0000000002000000 s3 9000000108d20280 s4 9000000002b38b10 s5 900000010edbfb00 s6 00007ffff3474000 s7 0000000000000406 s8 900000010edbfa08 ra: 9000000000485538 unmap_vmas+0x130/0x218 ERA: 90000000004eaf1c __unmap_hugepage_range+0x6f4/0x7d0 PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000007 (+FPE +SXE +ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) Process hugefork02 (pid: 1517, threadinfo=00000000a670eaf4, task=000000007a95fc64) Call Trace: [<90000000004eaf1c>] __unmap_hugepage_range+0x6f4/0x7d0 [<9000000000485534>] unmap_vmas+0x12c/0x218 [<9000000000494068>] exit_mmap+0xe0/0x308 [<900000000025fdc4>] mmput+0x74/0x180 [<900000000026a284>] do_exit+0x294/0x898 [<900000000026aa30>] do_group_exit+0x30/0x98 [<900000000027bed4>] get_signal+0x83c/0x868 [<90000000002457b4>] arch_do_signal_or_restart+0x54/0xfa0 [<90000000015795e8>] irqentry_exit_to_user_mode+0xb8/0x138 [<90000000002572d0>] tlb_do_page_fault_1+0x114/0x1b4 The problem is that base address allocated from hugetlbfs is not aligned with pmd size. Here add a checking for hugetlbfs and align base address with pmd size. After this patch the test case "testcases/bin/hugefork02" passes to run. This is similar to the commit 7f24cbc9c4d42db8a3c8484d1 ("mm/mmap: teach generic_get_unmapped_area{_topdown} to handle hugetlb mappings"). Cc: stable@vger.kernel.org # 6.13+ Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/mm/mmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 914e82ff3f656..1df9e99582cc6 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -3,6 +3,7 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include #include @@ -63,8 +64,11 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, } info.length = len; - info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; info.align_offset = pgoff << PAGE_SHIFT; + if (filp && is_file_hugepages(filp)) + info.align_mask = huge_page_mask_align(filp); + else + info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; if (dir == DOWN) { info.flags = VM_UNMAPPED_AREA_TOPDOWN; -- GitLab From 6fb1867d5a44b0a061cf39d2492d23d314bcb8ce Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:59 +0800 Subject: [PATCH 1410/1585] LoongArch: KVM: Add interrupt checking for AVEC There is a newly added macro INT_AVEC with CSR ESTAT register, which is bit 14 used for LoongArch AVEC support. AVEC interrupt status bit 14 is supported with macro CSR_ESTAT_IS, so here replace the hard-coded value 0x1fff with macro CSR_ESTAT_IS so that the AVEC interrupt status is also supported by KVM. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 20f941af3e9ea..9e1a9b4aa4c6a 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -311,7 +311,7 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { int ret = RESUME_GUEST; unsigned long estat = vcpu->arch.host_estat; - u32 intr = estat & 0x1fff; /* Ignore NMI */ + u32 intr = estat & CSR_ESTAT_IS; u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; vcpu->mode = OUTSIDE_GUEST_MODE; -- GitLab From 78d7bc5a02e1468df53896df354fa80727f35b7d Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:52:01 +0800 Subject: [PATCH 1411/1585] LoongArch: KVM: Reload guest CSR registers after sleep On host, the HW guest CSR registers are lost after suspend and resume operation. Since last_vcpu of boot CPU still records latest vCPU pointer so that the guest CSR register skips to reload when boot CPU resumes and vCPU is scheduled. Here last_vcpu is cleared so that guest CSR registers will reload from scheduled vCPU context after suspend and resume. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index f6d3242b9234a..b6864d6e5ec8d 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -317,6 +317,13 @@ int kvm_arch_enable_virtualization_cpu(void) kvm_debug("GCFG:%lx GSTAT:%lx GINTC:%lx GTLBC:%lx", read_csr_gcfg(), read_csr_gstat(), read_csr_gintc(), read_csr_gtlbc()); + /* + * HW Guest CSR registers are lost after CPU suspend and resume. + * Clear last_vcpu so that Guest CSR registers forced to reload + * from vCPU SW state. + */ + this_cpu_ptr(vmcs)->last_vcpu = NULL; + return 0; } -- GitLab From 6bdbb73dc8d99fbb77f5db79dbb6f108708090b4 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:52:04 +0800 Subject: [PATCH 1412/1585] LoongArch: KVM: Fix GPA size issue about VM Physical address space is 48 bit on Loongson-3A5000 physical machine, however it is 47 bit for VM on Loongson-3A5000 system. Size of physical address space of VM is the same with the size of virtual user space (a half) of physical machine. Variable cpu_vabits represents user address space, kernel address space is not included (user space and kernel space are both a half of total). Here cpu_vabits, rather than cpu_vabits - 1, is to represent the size of guest physical address space. Also there is strict checking about page fault GPA address, inject error if it is larger than maximum GPA address of VM. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/exit.c | 6 ++++++ arch/loongarch/kvm/vm.c | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index c1e8ec5b941b2..ea321403644ad 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -669,6 +669,12 @@ static int kvm_handle_rdwr_fault(struct kvm_vcpu *vcpu, bool write) struct kvm_run *run = vcpu->run; unsigned long badv = vcpu->arch.badv; + /* Inject ADE exception if exceed max GPA size */ + if (unlikely(badv >= vcpu->kvm->arch.gpa_size)) { + kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEM); + return RESUME_GUEST; + } + ret = kvm_handle_mm_fault(vcpu, badv, write); if (ret) { /* Treat as MMIO */ diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index b8b3e1972d6ea..edccfc8c9cd80 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -48,7 +48,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (kvm_pvtime_supported()) kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); - kvm->arch.gpa_size = BIT(cpu_vabits - 1); + /* + * cpu_vabits means user address space only (a half of total). + * GPA size of VM is the same with the size of user address space. + */ + kvm->arch.gpa_size = BIT(cpu_vabits); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; kvm->arch.invalid_ptes[0] = 0; kvm->arch.invalid_ptes[1] = (unsigned long)invalid_pte_table; -- GitLab From 0704a15b930cf97073ce091a0cd7ad32f2304329 Mon Sep 17 00:00:00 2001 From: Thomas Mizrahi Date: Sat, 8 Mar 2025 01:06:28 -0300 Subject: [PATCH 1413/1585] ASoC: amd: yc: Support mic on another Lenovo ThinkPad E16 Gen 2 model The internal microphone on the Lenovo ThinkPad E16 model requires a quirk entry to work properly. This was fixed in a previous patch (linked below), but depending on the specific variant of the model, the product name may be "21M5" or "21M6". The following patch fixed this issue for the 21M5 variant: https://lore.kernel.org/all/20240725065442.9293-1-tiwai@suse.de/ This patch adds support for the microphone on the 21M6 variant. Link: https://github.com/ramaureirac/thinkpad-e14-linux/issues/31 Cc: stable@vger.kernel.org Signed-off-by: Thomas Mizrahi Link: https://patch.msgid.link/20250308041303.198765-1-thomasmizra@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index b16587d8f97a8..a7637056972aa 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -248,6 +248,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21M5"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21M6"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From 058a6bec37c6c3b826158f6d26b75de43816a880 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 7 Mar 2025 23:02:56 +0100 Subject: [PATCH 1414/1585] x86/microcode/AMD: Add some forgotten models to the SHA check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add some more forgotten models to the SHA check. Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") Reported-by: Toralf Förster Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Tested-by: Toralf Förster Link: https://lore.kernel.org/r/20250307220256.11816-1-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 95ac1c6a84fbe..c69b1bc454834 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -175,23 +175,29 @@ static bool need_sha_check(u32 cur_rev) { switch (cur_rev >> 8) { case 0x80012: return cur_rev <= 0x800126f; break; + case 0x80082: return cur_rev <= 0x800820f; break; case 0x83010: return cur_rev <= 0x830107c; break; case 0x86001: return cur_rev <= 0x860010e; break; case 0x86081: return cur_rev <= 0x8608108; break; case 0x87010: return cur_rev <= 0x8701034; break; case 0x8a000: return cur_rev <= 0x8a0000a; break; + case 0xa0010: return cur_rev <= 0xa00107a; break; case 0xa0011: return cur_rev <= 0xa0011da; break; case 0xa0012: return cur_rev <= 0xa001243; break; + case 0xa0082: return cur_rev <= 0xa00820e; break; case 0xa1011: return cur_rev <= 0xa101153; break; case 0xa1012: return cur_rev <= 0xa10124e; break; case 0xa1081: return cur_rev <= 0xa108109; break; case 0xa2010: return cur_rev <= 0xa20102f; break; case 0xa2012: return cur_rev <= 0xa201212; break; + case 0xa4041: return cur_rev <= 0xa404109; break; + case 0xa5000: return cur_rev <= 0xa500013; break; case 0xa6012: return cur_rev <= 0xa60120a; break; case 0xa7041: return cur_rev <= 0xa704109; break; case 0xa7052: return cur_rev <= 0xa705208; break; case 0xa7080: return cur_rev <= 0xa708009; break; case 0xa70c0: return cur_rev <= 0xa70C009; break; + case 0xaa001: return cur_rev <= 0xaa00116; break; case 0xaa002: return cur_rev <= 0xaa00218; break; default: break; } -- GitLab From 80e54e84911a923c40d7bee33a34c1b4be148d7a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Mar 2025 13:45:25 -1000 Subject: [PATCH 1415/1585] Linux 6.14-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6a8e5be6b0043..1d6a9ec8a2ace 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* -- GitLab From aed709355fd05ef747e1af24a1d5d78cd7feb81e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 10 Feb 2025 11:34:41 -0800 Subject: [PATCH 1416/1585] drm/hyperv: Fix address space leak when Hyper-V DRM device is removed When a Hyper-V DRM device is probed, the driver allocates MMIO space for the vram, and maps it cacheable. If the device removed, or in the error path for device probing, the MMIO space is released but no unmap is done. Consequently the kernel address space for the mapping is leaked. Fix this by adding iounmap() calls in the device removal path, and in the error path during device probing. Fixes: f1f63cbb705d ("drm/hyperv: Fix an error handling path in hyperv_vmbus_probe()") Fixes: a0ab5abced55 ("drm/hyperv : Removing the restruction of VRAM allocation with PCI bar size") Signed-off-by: Michael Kelley Reviewed-by: Saurabh Sengar Tested-by: Saurabh Sengar Link: https://lore.kernel.org/r/20250210193441.2414-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250210193441.2414-1-mhklinux@outlook.com> --- drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index f59abfa7622ac..0d49f168a919d 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -154,6 +154,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, return 0; err_free_mmio: + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); err_vmbus_close: vmbus_close(hdev->channel); @@ -172,6 +173,7 @@ static void hyperv_vmbus_remove(struct hv_device *hdev) vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); } -- GitLab From 304386373007aaca9236a3f36afac0bbedcd2bf0 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 18 Feb 2025 15:01:30 -0800 Subject: [PATCH 1417/1585] fbdev: hyperv_fb: Fix hang in kdump kernel when on Hyper-V Gen 2 VMs Gen 2 Hyper-V VMs boot via EFI and have a standard EFI framebuffer device. When the kdump kernel runs in such a VM, loading the efifb driver may hang because of accessing the framebuffer at the wrong memory address. The scenario occurs when the hyperv_fb driver in the original kernel moves the framebuffer to a different MMIO address because of conflicts with an already-running efifb or simplefb driver. The hyperv_fb driver then informs Hyper-V of the change, which is allowed by the Hyper-V FB VMBus device protocol. However, when the kexec command loads the kdump kernel into crash memory via the kexec_file_load() system call, the system call doesn't know the framebuffer has moved, and it sets up the kdump screen_info using the original framebuffer address. The transition to the kdump kernel does not go through the Hyper-V host, so Hyper-V does not reset the framebuffer address like it would do on a reboot. When efifb tries to run, it accesses a non-existent framebuffer address, which traps to the Hyper-V host. After many such accesses, the Hyper-V host thinks the guest is being malicious, and throttles the guest to the point that it runs very slowly or appears to have hung. When the kdump kernel is loaded into crash memory via the kexec_load() system call, the problem does not occur. In this case, the kexec command builds the screen_info table itself in user space from data returned by the FBIOGET_FSCREENINFO ioctl against /dev/fb0, which gives it the new framebuffer location. This problem was originally reported in 2020 [1], resulting in commit 3cb73bc3fa2a ("hyperv_fb: Update screen_info after removing old framebuffer"). This commit solved the problem by setting orig_video_isVGA to 0, so the kdump kernel was unaware of the EFI framebuffer. The efifb driver did not try to load, and no hang occurred. But in 2024, commit c25a19afb81c ("fbdev/hyperv_fb: Do not clear global screen_info") effectively reverted 3cb73bc3fa2a. Commit c25a19afb81c has no reference to 3cb73bc3fa2a, so perhaps it was done without knowing the implications that were reported with 3cb73bc3fa2a. In any case, as of commit c25a19afb81c, the original problem came back again. Interestingly, the hyperv_drm driver does not have this problem because it never moves the framebuffer. The difference is that the hyperv_drm driver removes any conflicting framebuffers *before* allocating an MMIO address, while the hyperv_fb drivers removes conflicting framebuffers *after* allocating an MMIO address. With the "after" ordering, hyperv_fb may encounter a conflict and move the framebuffer to a different MMIO address. But the conflict is essentially bogus because it is removed a few lines of code later. Rather than fix the problem with the approach from 2020 in commit 3cb73bc3fa2a, instead slightly reorder the steps in hyperv_fb so conflicting framebuffers are removed before allocating an MMIO address. Then the default framebuffer MMIO address should always be available, and there's never any confusion about which framebuffer address the kdump kernel should use -- it's always the original address provided by the Hyper-V host. This approach is already used by the hyperv_drm driver, and is consistent with the usage guidelines at the head of the module with the function aperture_remove_conflicting_devices(). This approach also solves a related minor problem when kexec_load() is used to load the kdump kernel. With current code, unbinding and rebinding the hyperv_fb driver could result in the framebuffer moving back to the default framebuffer address, because on the rebind there are no conflicts. If such a move is done after the kdump kernel is loaded with the new framebuffer address, at kdump time it could again have the wrong address. This problem and fix are described in terms of the kdump kernel, but it can also occur with any kernel started via kexec. See extensive discussion of the problem and solution at [2]. [1] https://lore.kernel.org/linux-hyperv/20201014092429.1415040-1-kasong@redhat.com/ [2] https://lore.kernel.org/linux-hyperv/BLAPR10MB521793485093FDB448F7B2E5FDE92@BLAPR10MB5217.namprd10.prod.outlook.com/ Reported-by: Thomas Tai Fixes: c25a19afb81c ("fbdev/hyperv_fb: Do not clear global screen_info") Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20250218230130.3207-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250218230130.3207-1-mhklinux@outlook.com> --- drivers/video/fbdev/hyperv_fb.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 363e4ccfcdb77..ce23d0ef5702a 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -989,6 +989,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) base = pci_resource_start(pdev, 0); size = pci_resource_len(pdev, 0); + aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME); /* * For Gen 1 VM, we can directly use the contiguous memory @@ -1010,11 +1011,21 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) goto getmem_done; } pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n"); + } else { + aperture_remove_all_conflicting_devices(KBUILD_MODNAME); } /* - * Cannot use the contiguous physical memory. - * Allocate mmio space for framebuffer. + * Cannot use contiguous physical memory, so allocate MMIO space for + * the framebuffer. At this point in the function, conflicting devices + * that might have claimed the framebuffer MMIO space based on + * screen_info.lfb_base must have already been removed so that + * vmbus_allocate_mmio() does not allocate different MMIO space. If the + * kdump image were to be loaded using kexec_file_load(), the + * framebuffer location in the kdump image would be set from + * screen_info.lfb_base at the time that kdump is enabled. If the + * framebuffer has moved elsewhere, this could be the wrong location, + * causing kdump to hang when efifb (for example) loads. */ dio_fb_size = screen_width * screen_height * screen_depth / 8; @@ -1051,11 +1062,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) info->screen_size = dio_fb_size; getmem_done: - if (base && size) - aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME); - else - aperture_remove_all_conflicting_devices(KBUILD_MODNAME); - if (!gen2vm) pci_dev_put(pdev); -- GitLab From f5e728a50bb17336a20803dde488515b833ecd1d Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Sat, 1 Mar 2025 08:16:30 -0800 Subject: [PATCH 1418/1585] fbdev: hyperv_fb: Simplify hvfb_putmem The device object required in 'hvfb_release_phymem' function for 'dma_free_coherent' can also be obtained from the 'info' pointer, making 'hdev' parameter in 'hvfb_putmem' redundant. Remove the unnecessary 'hdev' argument from 'hvfb_putmem'. Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Tested-by: Michael Kelley Link: https://lore.kernel.org/r/1740845791-19977-2-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1740845791-19977-2-git-send-email-ssengar@linux.microsoft.com> --- drivers/video/fbdev/hyperv_fb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index ce23d0ef5702a..9798a34ac571f 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -952,7 +952,7 @@ static phys_addr_t hvfb_get_phymem(struct hv_device *hdev, } /* Release contiguous physical memory */ -static void hvfb_release_phymem(struct hv_device *hdev, +static void hvfb_release_phymem(struct device *device, phys_addr_t paddr, unsigned int size) { unsigned int order = get_order(size); @@ -960,7 +960,7 @@ static void hvfb_release_phymem(struct hv_device *hdev, if (order <= MAX_PAGE_ORDER) __free_pages(pfn_to_page(paddr >> PAGE_SHIFT), order); else - dma_free_coherent(&hdev->device, + dma_free_coherent(device, round_up(size, PAGE_SIZE), phys_to_virt(paddr), paddr); @@ -1080,7 +1080,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) } /* Release the framebuffer */ -static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) +static void hvfb_putmem(struct fb_info *info) { struct hvfb_par *par = info->par; @@ -1089,7 +1089,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) iounmap(par->mmio_vp); vmbus_free_mmio(par->mem->start, screen_fb_size); } else { - hvfb_release_phymem(hdev, info->fix.smem_start, + hvfb_release_phymem(info->device, info->fix.smem_start, screen_fb_size); } @@ -1203,7 +1203,7 @@ static int hvfb_probe(struct hv_device *hdev, error: fb_deferred_io_cleanup(info); - hvfb_putmem(hdev, info); + hvfb_putmem(info); error2: vmbus_close(hdev->channel); error1: @@ -1232,7 +1232,7 @@ static void hvfb_remove(struct hv_device *hdev) vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); - hvfb_putmem(hdev, info); + hvfb_putmem(info); framebuffer_release(info); } -- GitLab From ea2f45ab0e53b255f72c85ccd99e2b394fc5fceb Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Sat, 1 Mar 2025 08:16:31 -0800 Subject: [PATCH 1419/1585] fbdev: hyperv_fb: Allow graceful removal of framebuffer When a Hyper-V framebuffer device is unbind, hyperv_fb driver tries to release the framebuffer forcefully. If this framebuffer is in use it produce the following WARN and hence this framebuffer is never released. [ 44.111220] WARNING: CPU: 35 PID: 1882 at drivers/video/fbdev/core/fb_info.c:70 framebuffer_release+0x2c/0x40 < snip > [ 44.111289] Call Trace: [ 44.111290] [ 44.111291] ? show_regs+0x6c/0x80 [ 44.111295] ? __warn+0x8d/0x150 [ 44.111298] ? framebuffer_release+0x2c/0x40 [ 44.111300] ? report_bug+0x182/0x1b0 [ 44.111303] ? handle_bug+0x6e/0xb0 [ 44.111306] ? exc_invalid_op+0x18/0x80 [ 44.111308] ? asm_exc_invalid_op+0x1b/0x20 [ 44.111311] ? framebuffer_release+0x2c/0x40 [ 44.111313] ? hvfb_remove+0x86/0xa0 [hyperv_fb] [ 44.111315] vmbus_remove+0x24/0x40 [hv_vmbus] [ 44.111323] device_remove+0x40/0x80 [ 44.111325] device_release_driver_internal+0x20b/0x270 [ 44.111327] ? bus_find_device+0xb3/0xf0 Fix this by moving the release of framebuffer and assosiated memory to fb_ops.fb_destroy function, so that framebuffer framework handles it gracefully. While we fix this, also replace manual registrations/unregistration of framebuffer with devm_register_framebuffer. Fixes: 68a2d20b79b1 ("drivers/video: add Hyper-V Synthetic Video Frame Buffer Driver") Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Tested-by: Michael Kelley Link: https://lore.kernel.org/r/1740845791-19977-3-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1740845791-19977-3-git-send-email-ssengar@linux.microsoft.com> --- drivers/video/fbdev/hyperv_fb.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 9798a34ac571f..75338ffc703fb 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -282,6 +282,8 @@ static uint screen_depth; static uint screen_fb_size; static uint dio_fb_size; /* FB size for deferred IO */ +static void hvfb_putmem(struct fb_info *info); + /* Send message to Hyper-V host */ static inline int synthvid_send(struct hv_device *hdev, struct synthvid_msg *msg) @@ -862,6 +864,17 @@ static void hvfb_ops_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, hvfb_ondemand_refresh_throttle(par, x, y, width, height); } +/* + * fb_ops.fb_destroy is called by the last put_fb_info() call at the end + * of unregister_framebuffer() or fb_release(). Do any cleanup related to + * framebuffer here. + */ +static void hvfb_destroy(struct fb_info *info) +{ + hvfb_putmem(info); + framebuffer_release(info); +} + /* * TODO: GEN1 codepaths allocate from system or DMA-able memory. Fix the * driver to use the _SYSMEM_ or _DMAMEM_ helpers in these cases. @@ -877,6 +890,7 @@ static const struct fb_ops hvfb_ops = { .fb_set_par = hvfb_set_par, .fb_setcolreg = hvfb_setcolreg, .fb_blank = hvfb_blank, + .fb_destroy = hvfb_destroy, }; /* Get options from kernel paramenter "video=" */ @@ -1178,7 +1192,7 @@ static int hvfb_probe(struct hv_device *hdev, if (ret) goto error; - ret = register_framebuffer(info); + ret = devm_register_framebuffer(&hdev->device, info); if (ret) { pr_err("Unable to register framebuffer\n"); goto error; @@ -1226,14 +1240,10 @@ static void hvfb_remove(struct hv_device *hdev) fb_deferred_io_cleanup(info); - unregister_framebuffer(info); cancel_delayed_work_sync(&par->dwork); vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); - - hvfb_putmem(info); - framebuffer_release(info); } static int hvfb_suspend(struct hv_device *hdev) -- GitLab From 09beefefb57bbc3a06d98f319d85db4d719d7bcb Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 26 Feb 2025 12:06:06 -0800 Subject: [PATCH 1420/1585] x86/hyperv: Fix output argument to hypercall that changes page visibility The hypercall in hv_mark_gpa_visibility() is invoked with an input argument and an output argument. The output argument ostensibly returns the number of pages that were processed. But in fact, the hypercall does not provide any output, so the output argument is spurious. The spurious argument is harmless because Hyper-V ignores it, but in the interest of correctness and to avoid the potential for future problems, remove it. Signed-off-by: Michael Kelley Reviewed-by: Nuno Das Neves Link: https://lore.kernel.org/r/20250226200612.2062-2-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250226200612.2062-2-mhklinux@outlook.com> --- arch/x86/hyperv/ivm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index dd68d9ad9b22c..ec7880271cf98 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -464,7 +464,6 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], enum hv_mem_host_visibility visibility) { struct hv_gpa_range_for_visibility *input; - u16 pages_processed; u64 hv_status; unsigned long flags; @@ -493,7 +492,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn)); hv_status = hv_do_rep_hypercall( HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count, - 0, input, &pages_processed); + 0, input, NULL); local_irq_restore(flags); if (hv_result_success(hv_status)) -- GitLab From a2add513311b48cc924a699a8174db2c61ed5e8a Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 9 Mar 2025 20:29:59 -0500 Subject: [PATCH 1421/1585] Input: iqs7222 - preserve system status register Some register groups reserve a byte at the end of their continuous address space. Depending on the variant of silicon, this field may share the same memory space as the lower byte of the system status register (0x10). In these cases, caching the reserved byte and writing it later may effectively reset the device depending on what happened in between the read and write operations. Solve this problem by avoiding any access to this last byte within offending register groups. This method replaces a workaround which attempted to write the reserved byte with up-to-date contents, but left a small window in which updates by the device could have been clobbered. Now that the driver does not touch these reserved bytes, the order in which the device's registers are written no longer matters, and they can be written in their natural order. The new method is also much more generic, and can be more easily extended to new variants of silicon with different register maps. As part of this change, the register read and write functions must be gently updated to support byte access instead of word access. Fixes: 2e70ef525b73 ("Input: iqs7222 - acknowledge reset before writing registers") Signed-off-by: Jeff LaBundy Link: https://lore.kernel.org/r/Z85Alw+d9EHKXx2e@nixie71 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/iqs7222.c | 50 ++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c index 22022d11470db..80b917944b51e 100644 --- a/drivers/input/misc/iqs7222.c +++ b/drivers/input/misc/iqs7222.c @@ -100,11 +100,11 @@ enum iqs7222_reg_key_id { enum iqs7222_reg_grp_id { IQS7222_REG_GRP_STAT, - IQS7222_REG_GRP_FILT, IQS7222_REG_GRP_CYCLE, IQS7222_REG_GRP_GLBL, IQS7222_REG_GRP_BTN, IQS7222_REG_GRP_CHAN, + IQS7222_REG_GRP_FILT, IQS7222_REG_GRP_SLDR, IQS7222_REG_GRP_TPAD, IQS7222_REG_GRP_GPIO, @@ -286,6 +286,7 @@ static const struct iqs7222_event_desc iqs7222_tp_events[] = { struct iqs7222_reg_grp_desc { u16 base; + u16 val_len; int num_row; int num_col; }; @@ -342,6 +343,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAC00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -400,6 +402,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAC00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -454,6 +457,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xC400, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -496,6 +500,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xC400, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -543,6 +548,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAA00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -600,6 +606,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAA00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -656,6 +663,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAE00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -712,6 +720,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAE00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -768,6 +777,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { }, [IQS7222_REG_GRP_FILT] = { .base = 0xAE00, + .val_len = 3, .num_row = 1, .num_col = 2, }, @@ -1604,7 +1614,7 @@ static int iqs7222_force_comms(struct iqs7222_private *iqs7222) } static int iqs7222_read_burst(struct iqs7222_private *iqs7222, - u16 reg, void *val, u16 num_val) + u16 reg, void *val, u16 val_len) { u8 reg_buf[sizeof(__be16)]; int ret, i; @@ -1619,7 +1629,7 @@ static int iqs7222_read_burst(struct iqs7222_private *iqs7222, { .addr = client->addr, .flags = I2C_M_RD, - .len = num_val * sizeof(__le16), + .len = val_len, .buf = (u8 *)val, }, }; @@ -1675,7 +1685,7 @@ static int iqs7222_read_word(struct iqs7222_private *iqs7222, u16 reg, u16 *val) __le16 val_buf; int error; - error = iqs7222_read_burst(iqs7222, reg, &val_buf, 1); + error = iqs7222_read_burst(iqs7222, reg, &val_buf, sizeof(val_buf)); if (error) return error; @@ -1685,10 +1695,9 @@ static int iqs7222_read_word(struct iqs7222_private *iqs7222, u16 reg, u16 *val) } static int iqs7222_write_burst(struct iqs7222_private *iqs7222, - u16 reg, const void *val, u16 num_val) + u16 reg, const void *val, u16 val_len) { int reg_len = reg > U8_MAX ? sizeof(reg) : sizeof(u8); - int val_len = num_val * sizeof(__le16); int msg_len = reg_len + val_len; int ret, i; struct i2c_client *client = iqs7222->client; @@ -1747,7 +1756,7 @@ static int iqs7222_write_word(struct iqs7222_private *iqs7222, u16 reg, u16 val) { __le16 val_buf = cpu_to_le16(val); - return iqs7222_write_burst(iqs7222, reg, &val_buf, 1); + return iqs7222_write_burst(iqs7222, reg, &val_buf, sizeof(val_buf)); } static int iqs7222_ati_trigger(struct iqs7222_private *iqs7222) @@ -1831,30 +1840,14 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir) /* * Acknowledge reset before writing any registers in case the device - * suffers a spurious reset during initialization. Because this step - * may change the reserved fields of the second filter beta register, - * its cache must be updated. - * - * Writing the second filter beta register, in turn, may clobber the - * system status register. As such, the filter beta register pair is - * written first to protect against this hazard. + * suffers a spurious reset during initialization. */ if (dir == WRITE) { - u16 reg = dev_desc->reg_grps[IQS7222_REG_GRP_FILT].base + 1; - u16 filt_setup; - error = iqs7222_write_word(iqs7222, IQS7222_SYS_SETUP, iqs7222->sys_setup[0] | IQS7222_SYS_SETUP_ACK_RESET); if (error) return error; - - error = iqs7222_read_word(iqs7222, reg, &filt_setup); - if (error) - return error; - - iqs7222->filt_setup[1] &= GENMASK(7, 0); - iqs7222->filt_setup[1] |= (filt_setup & ~GENMASK(7, 0)); } /* @@ -1883,6 +1876,7 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir) int num_col = dev_desc->reg_grps[i].num_col; u16 reg = dev_desc->reg_grps[i].base; __le16 *val_buf; + u16 val_len = dev_desc->reg_grps[i].val_len ? : num_col * sizeof(*val_buf); u16 *val; if (!num_col) @@ -1900,7 +1894,7 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir) switch (dir) { case READ: error = iqs7222_read_burst(iqs7222, reg, - val_buf, num_col); + val_buf, val_len); for (k = 0; k < num_col; k++) val[k] = le16_to_cpu(val_buf[k]); break; @@ -1909,7 +1903,7 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir) for (k = 0; k < num_col; k++) val_buf[k] = cpu_to_le16(val[k]); error = iqs7222_write_burst(iqs7222, reg, - val_buf, num_col); + val_buf, val_len); break; default: @@ -1962,7 +1956,7 @@ static int iqs7222_dev_info(struct iqs7222_private *iqs7222) int error, i; error = iqs7222_read_burst(iqs7222, IQS7222_PROD_NUM, dev_id, - ARRAY_SIZE(dev_id)); + sizeof(dev_id)); if (error) return error; @@ -2915,7 +2909,7 @@ static int iqs7222_report(struct iqs7222_private *iqs7222) __le16 status[IQS7222_MAX_COLS_STAT]; error = iqs7222_read_burst(iqs7222, IQS7222_SYS_STATUS, status, - num_stat); + num_stat * sizeof(*status)); if (error) return error; -- GitLab From 823869e1e61607ab0d433de3c8abed221dc80a5e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Mar 2025 08:46:57 +0000 Subject: [PATCH 1422/1585] afs: Fix afs_atcell_get_link() to handle RCU pathwalk The ->get_link() method may be entered under RCU pathwalk conditions (in which case, the dentry pointer is NULL). This is not taken account of by afs_atcell_get_link() and lockdep will complain when it tries to lock an rwsem. Fix this by marking net->ws_cell as __rcu and using RCU access macros on it and by making afs_atcell_get_link() just return a pointer to the name in RCU pathwalk without taking net->cells_lock or a ref on the cell as RCU will protect the name storage (the cell is already freed via call_rcu()). Fixes: 30bca65bbbae ("afs: Make /afs/@cell and /afs/.@cell symlinks") Reported-by: Alexander Viro Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20250310094206.801057-2-dhowells@redhat.com/ # v4 --- fs/afs/cell.c | 11 ++++++----- fs/afs/dynroot.c | 15 +++++++++++++-- fs/afs/internal.h | 2 +- fs/afs/proc.c | 4 ++-- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index cee42646736c8..96a6781f36530 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -64,7 +64,8 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net, return ERR_PTR(-ENAMETOOLONG); if (!name) { - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, + lockdep_is_held(&net->cells_lock)); if (!cell) return ERR_PTR(-EDESTADDRREQ); goto found; @@ -388,8 +389,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) /* install the new cell */ down_write(&net->cells_lock); afs_see_cell(new_root, afs_cell_trace_see_ws); - old_root = net->ws_cell; - net->ws_cell = new_root; + old_root = rcu_replace_pointer(net->ws_cell, new_root, + lockdep_is_held(&net->cells_lock)); up_write(&net->cells_lock); afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws); @@ -945,8 +946,8 @@ void afs_cell_purge(struct afs_net *net) _enter(""); down_write(&net->cells_lock); - ws = net->ws_cell; - net->ws_cell = NULL; + ws = rcu_replace_pointer(net->ws_cell, NULL, + lockdep_is_held(&net->cells_lock)); up_write(&net->cells_lock); afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index d8bf52f77d930..008698d706caa 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -314,12 +314,23 @@ static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inod const char *name; bool dotted = vnode->fid.vnode == 3; - if (!net->ws_cell) + if (!dentry) { + /* We're in RCU-pathwalk. */ + cell = rcu_dereference(net->ws_cell); + if (dotted) + name = cell->name - 1; + else + name = cell->name; + /* Shouldn't need to set a delayed call. */ + return name; + } + + if (!rcu_access_pointer(net->ws_cell)) return ERR_PTR(-ENOENT); down_read(&net->cells_lock); - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); if (dotted) name = cell->name - 1; else diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 90f407774a9a1..df30bd62da79e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -287,7 +287,7 @@ struct afs_net { /* Cell database */ struct rb_root cells; - struct afs_cell *ws_cell; + struct afs_cell __rcu *ws_cell; struct work_struct cells_manager; struct timer_list cells_timer; atomic_t cells_outstanding; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index e7614f4f30c21..12c88d8be3fe8 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -206,7 +206,7 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v) net = afs_seq2net_single(m); down_read(&net->cells_lock); - cell = net->ws_cell; + cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); if (cell) seq_printf(m, "%s\n", cell->name); up_read(&net->cells_lock); @@ -242,7 +242,7 @@ static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size) ret = -EEXIST; inode_lock(file_inode(file)); - if (!net->ws_cell) + if (!rcu_access_pointer(net->ws_cell)) ret = afs_cell_init(net, buf); else printk("busy\n"); -- GitLab From 65be5c95d08eedda570a6c888a12384c77fe7614 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 9 Mar 2025 18:22:16 +0100 Subject: [PATCH 1423/1585] x86/sgx: Warn explicitly if X86_FEATURE_SGX_LC is not enabled The kernel requires X86_FEATURE_SGX_LC to be able to create SGX enclaves, not just X86_FEATURE_SGX. There is quite a number of hardware which has X86_FEATURE_SGX but not X86_FEATURE_SGX_LC. A kernel running on such hardware does not create the /dev/sgx_enclave file and does so silently. Explicitly warn if X86_FEATURE_SGX_LC is not enabled to properly notify users that the kernel disabled the SGX driver. The X86_FEATURE_SGX_LC, a.k.a. SGX Launch Control, is a CPU feature that enables LE (Launch Enclave) hash MSRs to be writable (with additional opt-in required in the 'feature control' MSR) when running enclaves, i.e. using a custom root key rather than the Intel proprietary key for enclave signing. I've hit this issue myself and have spent some time researching where my /dev/sgx_enclave file went on SGX-enabled hardware. Related links: https://github.com/intel/linux-sgx/issues/837 https://patchwork.kernel.org/project/platform-driver-x86/patch/20180827185507.17087-3-jarkko.sakkinen@linux.intel.com/ [ mingo: Made the error message a bit more verbose, and added other cases where the kernel fails to create the /dev/sgx_enclave device node. ] Signed-off-by: Vladis Dronov Signed-off-by: Ingo Molnar Acked-by: Kai Huang Cc: Jarkko Sakkinen Cc: Andy Lutomirski Cc: Sean Christopherson Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250309172215.21777-2-vdronov@redhat.com --- arch/x86/kernel/cpu/sgx/driver.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index 22b65a5f5ec6c..7f8d1e11dbee2 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -150,13 +150,15 @@ int __init sgx_drv_init(void) u64 xfrm_mask; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) + if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) { + pr_info("SGX disabled: SGX launch control CPU feature is not available, /dev/sgx_enclave disabled.\n"); return -ENODEV; + } cpuid_count(SGX_CPUID, 0, &eax, &ebx, &ecx, &edx); if (!(eax & 1)) { - pr_err("SGX disabled: SGX1 instruction support not available.\n"); + pr_info("SGX disabled: SGX1 instruction support not available, /dev/sgx_enclave disabled.\n"); return -ENODEV; } @@ -173,8 +175,10 @@ int __init sgx_drv_init(void) } ret = misc_register(&sgx_dev_enclave); - if (ret) + if (ret) { + pr_info("SGX disabled: Unable to register the /dev/sgx_enclave driver (%d).\n", ret); return ret; + } return 0; } -- GitLab From 247fba13416af65b155949bae582d55c310f58b6 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 10 Mar 2025 16:04:40 +0800 Subject: [PATCH 1424/1585] ASoC: rt722-sdca: add missing readable registers SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN, CH_01) ... SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN, CH_04) are used by the "FU15 Boost Volume" control, but not marked as readable. And the mbq size are 2 for those registers. Fixes: 7f5d6036ca005 ("ASoC: rt722-sdca: Add RT722 SDCA driver") Signed-off-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Shuming Fan Link: https://patch.msgid.link/20250310080440.58797-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt722-sdca-sdw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c index 25fc13687bc83..4d3043627bd04 100644 --- a/sound/soc/codecs/rt722-sdca-sdw.c +++ b/sound/soc/codecs/rt722-sdca-sdw.c @@ -86,6 +86,10 @@ static bool rt722_sdca_mbq_readable_register(struct device *dev, unsigned int re case 0x6100067: case 0x6100070 ... 0x610007c: case 0x6100080: + case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN, + CH_01) ... + SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN, + CH_04): case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_USER_FU1E, RT722_SDCA_CTL_FU_VOLUME, CH_01): case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_USER_FU1E, RT722_SDCA_CTL_FU_VOLUME, -- GitLab From f3fa0e40df175acd60b71036b9a1fd62310aec03 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 5 Feb 2025 11:24:38 +0800 Subject: [PATCH 1425/1585] sched/clock: Don't define sched_clock_irqtime as static key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sched_clock_irqtime was defined as a static key in: 8722903cbb8f ("sched: Define sched_clock_irqtime as static key") However, this change introduces a 'sleeping in atomic context' warning: arch/x86/kernel/tsc.c:1214 mark_tsc_unstable() warn: sleeping in atomic context As analyzed by Dan, the affected code path is as follows: vcpu_load() <- disables preempt -> kvm_arch_vcpu_load() -> mark_tsc_unstable() <- sleeps virt/kvm/kvm_main.c 166 void vcpu_load(struct kvm_vcpu *vcpu) 167 { 168 int cpu = get_cpu(); ^^^^^^^^^^ This get_cpu() disables preemption. 169 170 __this_cpu_write(kvm_running_vcpu, vcpu); 171 preempt_notifier_register(&vcpu->preempt_notifier); 172 kvm_arch_vcpu_load(vcpu, cpu); 173 put_cpu(); 174 } arch/x86/kvm/x86.c 4979 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { 4980 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : 4981 rdtsc() - vcpu->arch.last_host_tsc; 4982 if (tsc_delta < 0) 4983 mark_tsc_unstable("KVM discovered backwards TSC"); arch/x86/kernel/tsc.c 1206 void mark_tsc_unstable(char *reason) 1207 { 1208 if (tsc_unstable) 1209 return; 1210 1211 tsc_unstable = 1; 1212 if (using_native_sched_clock()) 1213 clear_sched_clock_stable(); --> 1214 disable_sched_clock_irqtime(); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ kernel/jump_label.c 245 void static_key_disable(struct static_key *key) 246 { 247 cpus_read_lock(); ^^^^^^^^^^^^^^^^ This lock has a might_sleep() in it which triggers the static checker warning. 248 static_key_disable_cpuslocked(key); 249 cpus_read_unlock(); 250 } Let revert this change for now as {disable,enable}_sched_clock_irqtime are used in many places, as pointed out by Sean, including the following: The code path in clocksource_watchdog(): clocksource_watchdog() | -> spin_lock(&watchdog_lock); | -> __clocksource_unstable() | -> clocksource.mark_unstable() == tsc_cs_mark_unstable() | -> disable_sched_clock_irqtime() And the code path in sched_clock_register(): /* Cannot register a sched_clock with interrupts on */ local_irq_save(flags); ... /* Enable IRQ time accounting if we have a fast enough sched_clock() */ if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) enable_sched_clock_irqtime(); local_irq_restore(flags); [ lkp@intel.com: reported a build error in the prev version ] [ mingo: cherry-picked it over into sched/urgent ] Closes: https://lore.kernel.org/kvm/37a79ba3-9ce0-479c-a5b0-2bd75d573ed3@stanley.mountain/ Fixes: 8722903cbb8f ("sched: Define sched_clock_irqtime as static key") Reported-by: Dan Carpenter Debugged-by: Dan Carpenter Debugged-by: Sean Christopherson Debugged-by: Michal Koutný Signed-off-by: Yafang Shao Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Vincent Guittot Link: https://lkml.kernel.org/r/20250205032438.14668-1-laoar.shao@gmail.com --- kernel/sched/cputime.c | 8 ++++---- kernel/sched/sched.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5d9143dd08791..6dab4854c6c08 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -9,8 +9,6 @@ #ifdef CONFIG_IRQ_TIME_ACCOUNTING -DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime); - /* * There are no locks covering percpu hardirq/softirq time. * They are only modified in vtime_account, on corresponding CPU @@ -24,14 +22,16 @@ DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime); */ DEFINE_PER_CPU(struct irqtime, cpu_irqtime); +int sched_clock_irqtime; + void enable_sched_clock_irqtime(void) { - static_branch_enable(&sched_clock_irqtime); + sched_clock_irqtime = 1; } void disable_sched_clock_irqtime(void) { - static_branch_disable(&sched_clock_irqtime); + sched_clock_irqtime = 0; } static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c8512a9fb0229..023b844159c94 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3259,11 +3259,11 @@ struct irqtime { }; DECLARE_PER_CPU(struct irqtime, cpu_irqtime); -DECLARE_STATIC_KEY_FALSE(sched_clock_irqtime); +extern int sched_clock_irqtime; static inline int irqtime_enabled(void) { - return static_branch_likely(&sched_clock_irqtime); + return sched_clock_irqtime; } /* -- GitLab From e3e89178a9f4a80092578af3ff3c8478f9187d59 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 10 Mar 2025 15:42:43 +0100 Subject: [PATCH 1426/1585] x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes Currently, load_microcode_amd() iterates over all NUMA nodes, retrieves their CPU masks and unconditionally accesses per-CPU data for the first CPU of each mask. According to Documentation/admin-guide/mm/numaperf.rst: "Some memory may share the same node as a CPU, and others are provided as memory only nodes." Therefore, some node CPU masks may be empty and wouldn't have a "first CPU". On a machine with far memory (and therefore CPU-less NUMA nodes): - cpumask_of_node(nid) is 0 - cpumask_first(0) is CONFIG_NR_CPUS - cpu_data(CONFIG_NR_CPUS) accesses the cpu_info per-CPU array at an index that is 1 out of bounds This does not have any security implications since flashing microcode is a privileged operation but I believe this has reliability implications by potentially corrupting memory while flashing a microcode update. When booting with CONFIG_UBSAN_BOUNDS=y on an AMD machine that flashes a microcode update. I get the following splat: UBSAN: array-index-out-of-bounds in arch/x86/kernel/cpu/microcode/amd.c:X:Y index 512 is out of range for type 'unsigned long[512]' [...] Call Trace: dump_stack __ubsan_handle_out_of_bounds load_microcode_amd request_microcode_amd reload_store kernfs_fop_write_iter vfs_write ksys_write do_syscall_64 entry_SYSCALL_64_after_hwframe Change the loop to go over only NUMA nodes which have CPUs before determining whether the first CPU on the respective node needs microcode update. [ bp: Massage commit message, fix typo. ] Fixes: 7ff6edf4fef3 ("x86/microcode/AMD: Fix mixed steppings support") Signed-off-by: Florent Revest Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250310144243.861978-1-revest@chromium.org --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index c69b1bc454834..138689b8e1d83 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -1074,7 +1074,7 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz if (ret != UCODE_OK) return ret; - for_each_node(nid) { + for_each_node_with_cpus(nid) { cpu = cpumask_first(cpumask_of_node(nid)); c = &cpu_data(cpu); -- GitLab From 10c7988418d8f759ba70c4a558961e0bfa74647f Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 6 Mar 2025 18:42:11 +0530 Subject: [PATCH 1427/1585] drm/xe: Release guc ids before cancelling work A GT resets can be occurring in parallel while cancelling work in async call which can requeue these workers. to avoid that, lets first release guc ids and then cancel work so they don't requeued. Fixes: 8ae8a2e8dd21 ("drm/xe: Long running job update") Fixes: 12c2f962fe71 ("drm/xe: cancel pending job timer before freeing scheduler") Signed-off-by: Tejas Upadhyay Suggested-by: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250306131211.975503-1-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 8e8d76f62329127b31c64a034b052fb9e30e92af) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index b6a2dd742ebdc..1a5fe4822a62e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1246,11 +1246,11 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) xe_pm_runtime_get(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); + release_guc_id(guc, q); if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); - release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); -- GitLab From 9106713bd2ab0cacd380cda0d3f0219f2e488086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 7 Mar 2025 11:01:09 +0100 Subject: [PATCH 1428/1585] drm/xe/userptr: Fix an incorrect assert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The assert incorrectly checks the total length processed which can in fact be greater than the number of pages. Fix. Fixes: 0a98219bcc96 ("drm/xe/hmm: Don't dereference struct page pointers without notifier lock") Cc: Matthew Auld Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20250307100109.21397-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 70e5043ba85eae199b232e39921abd706b5c1fa4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hmm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 392102515f3d8..c3cc0fa105e84 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -138,13 +138,17 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, i += size; if (unlikely(j == st->nents - 1)) { + xe_assert(xe, i >= npages); if (i > npages) size -= (i - npages); + sg_mark_end(sgl); + } else { + xe_assert(xe, i < npages); } + sg_set_page(sgl, page, size << PAGE_SHIFT, 0); } - xe_assert(xe, i == npages); return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); -- GitLab From 6266f4a78131c795631440ea9c7b66cdfd399484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 18 Feb 2025 23:18:55 +0200 Subject: [PATCH 1429/1585] drm/i915/cdclk: Do cdclk post plane programming later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently call intel_set_cdclk_post_plane_update() far too early. When pipes are active during the reprogramming the current spot only works for the cd2x divider update case, as that is synchronize to the pipe's vblank. Squashing and crawling are not synchronized in any way, so doing the programming while the pipes/planes are potentially still using the old hardware state could lead to underruns. Move the post plane reprgramming to a spot where we know that the pipes/planes have switched over the new hardware state. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250218211913.27867-2-ville.syrjala@linux.intel.com Reviewed-by: Vinod Govindapillai (cherry picked from commit fb64f5568c0e0b5730733d70a012ae26b1a55815) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 41128469f12a2..c9dcf2bbd4c73 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7830,9 +7830,6 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_program_dpkgc_latency(state); - if (state->modeset) - intel_set_cdclk_post_plane_update(state); - intel_wait_for_vblank_workers(state); /* FIXME: We should call drm_atomic_helper_commit_hw_done() here @@ -7906,6 +7903,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_verify_planes(state); intel_sagv_post_plane_update(state); + if (state->modeset) + intel_set_cdclk_post_plane_update(state); intel_pmdemand_post_plane_update(state); drm_atomic_helper_commit_hw_done(&state->base); -- GitLab From 3e331a6715ee26f2fabc59dad6bb36d810707028 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 7 Mar 2025 19:56:35 -0500 Subject: [PATCH 1430/1585] drm/xe/pm: Temporarily disable D3Cold on BMG Currently, many instability cases related to D3Cold -> D0 transition on BMG are under investigation. Among them some bad cases where the device is lost after 1 to 3 transitions from D3Cold to D0 on the runtime pm, with pcieport upstream bridge port link retrain failure. In other cases, it works fine, but with some sudden random memory corruptions after D3cold, that could be 0xffff missed ack on GT forcewake or GuC reload related failures. In some other cases though, D3Cold -> D0 works pretty reliably. It looks like it is a combination of GPU cards and Host boards at this point. So, there is no possible/available quirk at this time. This patch disables the D3Cold by default on BMG by reducing the vram_d3cold_threshold to 0. Users and developers who wants to enable it are still able to via $ echo 300 > /sys/bus/pci/devices//vram_d3cold_threshold Fixes: 3adcf970dc7e ("drm/xe/bmg: Drop force_probe requirement") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4037 Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4395 Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4396 Cc: Karthik Poosa Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250308005636.1475420-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit d945cc876277851053c0cf37927c8d7bd9d0e880) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index c9cc0c091dfdd..89fd2c043136e 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -267,6 +267,15 @@ int xe_pm_init_early(struct xe_device *xe) } ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ +static u32 vram_threshold_value(struct xe_device *xe) +{ + /* FIXME: D3Cold temporarily disabled by default on BMG */ + if (xe->info.platform == XE_BATTLEMAGE) + return 0; + + return DEFAULT_VRAM_THRESHOLD; +} + /** * xe_pm_init - Initialize Xe Power Management * @xe: xe device instance @@ -277,6 +286,7 @@ ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ */ int xe_pm_init(struct xe_device *xe) { + u32 vram_threshold; int err; /* For now suspend/resume is only allowed with GuC */ @@ -290,7 +300,8 @@ int xe_pm_init(struct xe_device *xe) if (err) return err; - err = xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + vram_threshold = vram_threshold_value(xe); + err = xe_pm_set_vram_threshold(xe, vram_threshold); if (err) return err; } -- GitLab From c605acb53f449f6289f042790307d7dc9e62d03d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 7 Mar 2025 11:03:07 -0500 Subject: [PATCH 1431/1585] drm/xe/guc_pc: Retry and wait longer for GuC PC start In a rare situation of thermal limit during resume, GuC can be slow and run into delays like this: xe 0000:00:02.0: [drm] GT1: excessive init time: 667ms! \ [status = 0x8002F034, timeouts = 0] xe 0000:00:02.0: [drm] GT1: excessive init time: \ [freq = 100MHz (req = 800MHz), before = 100MHz, \ perf_limit_reasons = 0x1C001000] xe 0000:00:02.0: [drm] *ERROR* GT1: GuC PC Start failed ------------[ cut here ]------------ xe 0000:00:02.0: [drm] GT1: Failed to start GuC PC: -EIO When this happens, it will block entirely the GPU to be used. So, let's try and with a huge timeout in the hope it comes back. Also, let's collect some information on how long it is usually taking on situations like this, so perhaps the time can be tuned later. Cc: Vinay Belgaumkar Cc: Jonathan Cavitt Cc: John Harrison Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20250307160307.1093391-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit b4b05e53b550a886b4754b87fd0dd2b304579e85) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 53 +++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index df7f130fb663f..b995d1d51aed0 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -6,6 +6,7 @@ #include "xe_guc_pc.h" #include +#include #include #include @@ -19,6 +20,7 @@ #include "xe_gt.h" #include "xe_gt_idle.h" #include "xe_gt_printk.h" +#include "xe_gt_throttle.h" #include "xe_gt_types.h" #include "xe_guc.h" #include "xe_guc_ct.h" @@ -49,6 +51,9 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ +#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ + /** * DOC: GuC Power Conservation (PC) * @@ -113,9 +118,10 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc) FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) static int wait_for_pc_state(struct xe_guc_pc *pc, - enum slpc_global_state state) + enum slpc_global_state state, + int timeout_ms) { - int timeout_us = 5000; /* rought 5ms, but no need for precision */ + int timeout_us = 1000 * timeout_ms; int slept, wait = 10; xe_device_assert_mem_access(pc_to_xe(pc)); @@ -164,7 +170,8 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) }; int ret; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) return -EAGAIN; /* Blocking here to ensure the results are ready before reading them */ @@ -187,7 +194,8 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) }; int ret; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); @@ -208,7 +216,8 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; int ret; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); @@ -440,6 +449,15 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) return freq; } +static u32 get_cur_freq(struct xe_gt *gt) +{ + u32 freq; + + freq = xe_mmio_read32(>->mmio, RPNSWREQ); + freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); + return decode_freq(freq); +} + /** * xe_guc_pc_get_cur_freq - Get Current requested frequency * @pc: The GuC PC @@ -463,10 +481,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) return -ETIMEDOUT; } - *freq = xe_mmio_read32(>->mmio, RPNSWREQ); - - *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq); - *freq = decode_freq(*freq); + *freq = get_cur_freq(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; @@ -1002,6 +1017,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); unsigned int fw_ref; + ktime_t earlier; int ret; xe_gt_assert(gt, xe_device_uc_enabled(xe)); @@ -1026,14 +1042,25 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) memset(pc->bo->vmap.vaddr, 0, size); slpc_shared_data_write(pc, header.size, size); + earlier = ktime_get(); ret = pc_action_reset(pc); if (ret) goto out; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) { - xe_gt_err(gt, "GuC PC Start failed\n"); - ret = -EIO; - goto out; + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) { + xe_gt_warn(gt, "GuC PC start taking longer than normal [freq = %dMHz (req = %dMHz), perf_limit_reasons = 0x%08X]\n", + xe_guc_pc_get_act_freq(pc), get_cur_freq(gt), + xe_gt_throttle_get_limit_reasons(gt)); + + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_EXTENDED_TIMEOUT_MS)) { + xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n"); + goto out; + } + + xe_gt_warn(gt, "GuC PC excessive start time: %lldms", + ktime_ms_delta(ktime_get(), earlier)); } ret = pc_init_freqs(pc); -- GitLab From 3f674e7b670b7b7d9261935820e4eba3c059f835 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 6 Mar 2025 14:25:57 -0800 Subject: [PATCH 1432/1585] nvme-pci: fix stuck reset on concurrent DPC and HP The PCIe error handling has the nvme driver quiesce the device, attempt to restart it, then wait for that restart to complete. A PCIe DPC event also toggles the PCIe link. If the slot doesn't have out-of-band presence detection, this will trigger a pciehp re-enumeration. The error handling that calls nvme_error_resume is holding the device lock while this happens. This lock blocks pciehp's request to disconnect the driver from proceeding. Meanwhile the nvme's reset can't make forward progress because its device isn't there anymore with outstanding IO, and the timeout handler won't do anything to fix it because the device is undergoing error handling. End result: deadlocked. Fix this by having the timeout handler short cut the disabling for a disconnected PCIe device. The downside is that we're relying on an IO timeout to clean up this mess, which could be a minute by default. Tested-by: Nilay Shroff Reviewed-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 640590b217282..e59aad269abf8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1411,9 +1411,20 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; struct nvme_command cmd = { }; + struct pci_dev *pdev = to_pci_dev(dev->dev); u32 csts = readl(dev->bar + NVME_REG_CSTS); u8 opcode; + /* + * Shutdown the device immediately if we see it is disconnected. This + * unblocks PCIe error handling if the nvme driver is waiting in + * error_resume for a device that has been removed. We can't unbind the + * driver while the driver's error callback is waiting to complete, so + * we're relying on a timeout to break that deadlock if a removal + * occurs while reset work is running. + */ + if (pci_dev_is_disconnected(pdev)) + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); if (nvme_state_terminal(&dev->ctrl)) goto disable; @@ -1421,7 +1432,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * the recovery mechanism will surely fail. */ mb(); - if (pci_channel_offline(to_pci_dev(dev->dev))) + if (pci_channel_offline(pdev)) return BLK_EH_RESET_TIMER; /* -- GitLab From de93ddf88088f7624b589d0ff3af9effb87e8f3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 19 Feb 2025 18:02:39 +0200 Subject: [PATCH 1433/1585] drm/atomic: Filter out redundant DPMS calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Video players (eg. mpv) do periodic XResetScreenSaver() calls to keep the screen on while the video playing. The modesetting ddx plumbs these straight through into the kernel as DPMS setproperty ioctls, without any filtering whatsoever. When implemented via atomic these end up as empty commits on the crtc (which will nonetheless take one full frame), which leads to a dropped frame every time XResetScreenSaver() is called. Let's just filter out redundant DPMS property changes in the kernel to avoid this issue. v2: Explain the resulting commits a bit better (Sima) Document the behaviour in uapi docs (Sima) Cc: stable@vger.kernel.org Testcase: igt/kms_flip/flip-vs-dpms-on-nop Reviewed-by: Simona Vetter Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250219160239.17502-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_atomic_uapi.c | 4 ++++ drivers/gpu/drm/drm_connector.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 370dc676e3aa5..fd36b8fd54e9e 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -956,6 +956,10 @@ int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state, if (mode != DRM_MODE_DPMS_ON) mode = DRM_MODE_DPMS_OFF; + + if (connector->dpms == mode) + goto out; + connector->dpms = mode; crtc = connector->state->crtc; diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 5f24d6b41cc6d..48b08c9611a7b 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1427,6 +1427,10 @@ EXPORT_SYMBOL(drm_hdmi_connector_get_output_format_name); * callback. For atomic drivers the remapping to the "ACTIVE" property is * implemented in the DRM core. * + * On atomic drivers any DPMS setproperty ioctl where the value does not + * change is completely skipped, otherwise a full atomic commit will occur. + * On legacy drivers the exact behavior is driver specific. + * * Note that this property cannot be set through the MODE_ATOMIC ioctl, * userspace must use "ACTIVE" on the CRTC instead. * -- GitLab From 73fe9073c0cc28056cb9de0c8a516dac070f1d1f Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 9 Mar 2025 20:52:08 -0700 Subject: [PATCH 1434/1585] Drivers: hv: vmbus: Don't release fb_mmio resource in vmbus_free_mmio() The VMBus driver manages the MMIO space it owns via the hyperv_mmio resource tree. Because the synthetic video framebuffer portion of the MMIO space is initially setup by the Hyper-V host for each guest, the VMBus driver does an early reserve of that portion of MMIO space in the hyperv_mmio resource tree. It saves a pointer to that resource in fb_mmio. When a VMBus driver requests MMIO space and passes "true" for the "fb_overlap_ok" argument, the reserved framebuffer space is used if possible. In that case it's not necessary to do another request against the "shadow" hyperv_mmio resource tree because that resource was already requested in the early reserve steps. However, the vmbus_free_mmio() function currently does no special handling for the fb_mmio resource. When a framebuffer device is removed, or the driver is unbound, the current code for vmbus_free_mmio() releases the reserved resource, leaving fb_mmio pointing to memory that has been freed. If the same or another driver is subsequently bound to the device, vmbus_allocate_mmio() checks against fb_mmio, and potentially gets garbage. Furthermore a second unbind operation produces this "nonexistent resource" error because of the unbalanced behavior between vmbus_allocate_mmio() and vmbus_free_mmio(): [ 55.499643] resource: Trying to free nonexistent resource <0x00000000f0000000-0x00000000f07fffff> Fix this by adding logic to vmbus_free_mmio() to recognize when MMIO space in the fb_mmio reserved area would be released, and don't release it. This filtering ensures the fb_mmio resource always exists, and makes vmbus_free_mmio() more parallel with vmbus_allocate_mmio(). Fixes: be000f93e5d7 ("drivers:hv: Track allocations of children of hv_vmbus in private resource tree") Signed-off-by: Michael Kelley Tested-by: Saurabh Sengar Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20250310035208.275764-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250310035208.275764-1-mhklinux@outlook.com> --- drivers/hv/vmbus_drv.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 0f6cd44fff292..6e55a1a2613d3 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2262,12 +2262,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size) struct resource *iter; mutex_lock(&hyperv_mmio_lock); + + /* + * If all bytes of the MMIO range to be released are within the + * special case fb_mmio shadow region, skip releasing the shadow + * region since no corresponding __request_region() was done + * in vmbus_allocate_mmio(). + */ + if (fb_mmio && start >= fb_mmio->start && + (start + size - 1 <= fb_mmio->end)) + goto skip_shadow_release; + for (iter = hyperv_mmio; iter; iter = iter->sibling) { if ((iter->start >= start + size) || (iter->end <= start)) continue; __release_region(iter, start, size); } + +skip_shadow_release: release_mem_region(start, size); mutex_unlock(&hyperv_mmio_lock); -- GitLab From 6fbafe1cbed10e53b3cf236a8a1987425206dd8e Mon Sep 17 00:00:00 2001 From: Panagiotis Foliadis Date: Sat, 8 Mar 2025 16:49:05 +0000 Subject: [PATCH 1435/1585] rust: task: fix `SAFETY` comment in `Task::wake_up` The `SAFETY` comment inside the `wake_up` method references erroneously the `signal_pending` C function instead of the `wake_up_process` which is actually called. Fix the comment to reference the correct C function. Fixes: fe95f58320e6 ("rust: task: adjust safety comments in Task methods") Signed-off-by: Panagiotis Foliadis Reviewed-by: Charalampos Mitrodimas Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250308-comment-fix-v1-1-4bba709fd36d@posteo.net [ Slightly reworded. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/task.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 07bc22a7645c0..38da555a2bdbb 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -320,7 +320,7 @@ impl Task { /// Wakes up the task. pub fn wake_up(&self) { - // SAFETY: It's always safe to call `signal_pending` on a valid task, even if the task + // SAFETY: It's always safe to call `wake_up_process` on a valid task, even if the task // running. unsafe { bindings::wake_up_process(self.as_ptr()) }; } -- GitLab From bf9b8020a80d32f6aa80591297a087d0519dc931 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 6 Mar 2025 17:55:48 +0900 Subject: [PATCH 1436/1585] nvmet: pci-epf: Set NVMET_PCI_EPF_Q_LIVE when a queue is fully created The function nvmet_pci_epf_create_sq() use test_and_set_bit() to check that a submission queue is not already live and if not, set the NVMET_PCI_EPF_Q_LIVE queue flag to declare the sq live (ready to use). However, this is done on entry to the function, before the submission queue is actually fully initialized and ready to use. This creates a race situation with the function nvmet_pci_epf_poll_sqs_work() which looks at the NVMET_PCI_EPF_Q_LIVE queue flag to poll the submission queue when it is live. This race can lead to invalid DMA transfers if nvmet_pci_epf_poll_sqs_work() runs after the NVMET_PCI_EPF_Q_LIVE flag is set but before setting the sq pci address and doorbell ofset. Avoid this race by only testing the NVMET_PCI_EPF_Q_LIVE flag on entry to nvmet_pci_epf_create_sq() and setting it after the submission queue is fully setup before nvmet_pci_epf_create_sq() returns success. Since the function nvmet_pci_epf_create_cq() also has the same racy flag setting pattern, also make a similar change in that function. Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 565d2bd36dcde..d55ad334670c5 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1265,7 +1265,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid]; u16 status; - if (test_and_set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) + if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; if (!(flags & NVME_QUEUE_PHYS_CONTIG)) @@ -1300,6 +1300,8 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, if (status != NVME_SC_SUCCESS) goto err; + set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); + dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", cqid, qsize, cq->qes, cq->vector); @@ -1307,7 +1309,6 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, err: clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags); - clear_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); return status; } @@ -1333,7 +1334,7 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, struct nvmet_pci_epf_queue *sq = &ctrl->sq[sqid]; u16 status; - if (test_and_set_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags)) + if (test_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags)) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; if (!(flags & NVME_QUEUE_PHYS_CONTIG)) @@ -1355,7 +1356,7 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, status = nvmet_sq_create(tctrl, &sq->nvme_sq, sqid, sq->depth); if (status != NVME_SC_SUCCESS) - goto out_clear_bit; + return status; sq->iod_wq = alloc_workqueue("sq%d_wq", WQ_UNBOUND, min_t(int, sq->depth, WQ_MAX_ACTIVE), sqid); @@ -1365,6 +1366,8 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, goto out_destroy_sq; } + set_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags); + dev_dbg(ctrl->dev, "SQ[%u]: %u entries of %zu B\n", sqid, qsize, sq->qes); @@ -1372,8 +1375,6 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, out_destroy_sq: nvmet_sq_destroy(&sq->nvme_sq); -out_clear_bit: - clear_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags); return status; } -- GitLab From 39393f5c5c795992507aa5005a9d58396a5b07f1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 6 Mar 2025 17:55:49 +0900 Subject: [PATCH 1437/1585] nvmet: pci-epf: Do not add an IRQ vector if not needed The function nvmet_pci_epf_create_cq() always unconditionally calls nvmet_pci_epf_add_irq_vector() to add an IRQ vector for a completion queue. But this is not correct if the host requested the creation of a completion queue for polling, without an IRQ vector specified (i.e. the flag NVME_CQ_IRQ_ENABLED is not set). Fix this by calling nvmet_pci_epf_add_irq_vector() and setting the queue flag NVMET_PCI_EPF_Q_IRQ_ENABLED for the cq only if NVME_CQ_IRQ_ENABLED is set. While at it, also fix the error path to add the missing removal of the added IRQ vector if nvmet_cq_create() fails. Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index d55ad334670c5..b1e31483f1574 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1271,9 +1271,6 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, if (!(flags & NVME_QUEUE_PHYS_CONTIG)) return NVME_SC_INVALID_QUEUE | NVME_STATUS_DNR; - if (flags & NVME_CQ_IRQ_ENABLED) - set_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags); - cq->pci_addr = pci_addr; cq->qid = cqid; cq->depth = qsize + 1; @@ -1290,10 +1287,11 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, cq->qes = ctrl->io_cqes; cq->pci_size = cq->qes * cq->depth; - cq->iv = nvmet_pci_epf_add_irq_vector(ctrl, vector); - if (!cq->iv) { - status = NVME_SC_INTERNAL | NVME_STATUS_DNR; - goto err; + if (flags & NVME_CQ_IRQ_ENABLED) { + cq->iv = nvmet_pci_epf_add_irq_vector(ctrl, vector); + if (!cq->iv) + return NVME_SC_INTERNAL | NVME_STATUS_DNR; + set_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags); } status = nvmet_cq_create(tctrl, &cq->nvme_cq, cqid, cq->depth); @@ -1308,7 +1306,8 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, return NVME_SC_SUCCESS; err: - clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags); + if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); return status; } -- GitLab From bb39ed47065455604729404729d9116868638d31 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 5 Mar 2025 21:21:43 +0900 Subject: [PATCH 1438/1585] ksmbd: fix use-after-free in ksmbd_free_work_struct ->interim_entry of ksmbd_work could be deleted after oplock is freed. We don't need to manage it with linked list. The interim request could be immediately sent whenever a oplock break wait is needed. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/ksmbd_work.c | 3 --- fs/smb/server/ksmbd_work.h | 1 - fs/smb/server/oplock.c | 37 +++++++++++++++---------------------- fs/smb/server/oplock.h | 1 - 4 files changed, 15 insertions(+), 27 deletions(-) diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c index 4af2e6007c29d..72b00ca6e4551 100644 --- a/fs/smb/server/ksmbd_work.c +++ b/fs/smb/server/ksmbd_work.c @@ -26,7 +26,6 @@ struct ksmbd_work *ksmbd_alloc_work_struct(void) INIT_LIST_HEAD(&work->request_entry); INIT_LIST_HEAD(&work->async_request_entry); INIT_LIST_HEAD(&work->fp_entry); - INIT_LIST_HEAD(&work->interim_entry); INIT_LIST_HEAD(&work->aux_read_list); work->iov_alloc_cnt = 4; work->iov = kcalloc(work->iov_alloc_cnt, sizeof(struct kvec), @@ -56,8 +55,6 @@ void ksmbd_free_work_struct(struct ksmbd_work *work) kfree(work->tr_buf); kvfree(work->request_buf); kfree(work->iov); - if (!list_empty(&work->interim_entry)) - list_del(&work->interim_entry); if (work->async_id) ksmbd_release_id(&work->conn->async_ida, work->async_id); diff --git a/fs/smb/server/ksmbd_work.h b/fs/smb/server/ksmbd_work.h index 8ca2c813246e6..d36393ff8310c 100644 --- a/fs/smb/server/ksmbd_work.h +++ b/fs/smb/server/ksmbd_work.h @@ -89,7 +89,6 @@ struct ksmbd_work { /* List head at conn->async_requests */ struct list_head async_request_entry; struct list_head fp_entry; - struct list_head interim_entry; }; /** diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 3a3fe4afbdf0d..2febd1c8e278c 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -46,7 +46,6 @@ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work, opinfo->fid = id; opinfo->Tid = Tid; INIT_LIST_HEAD(&opinfo->op_entry); - INIT_LIST_HEAD(&opinfo->interim_list); init_waitqueue_head(&opinfo->oplock_q); init_waitqueue_head(&opinfo->oplock_brk); atomic_set(&opinfo->refcount, 1); @@ -803,7 +802,6 @@ static void __smb2_lease_break_noti(struct work_struct *wk) static int smb2_lease_break_noti(struct oplock_info *opinfo) { struct ksmbd_conn *conn = opinfo->conn; - struct list_head *tmp, *t; struct ksmbd_work *work; struct lease_break_info *br_info; struct lease *lease = opinfo->o_lease; @@ -831,16 +829,6 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo) work->sess = opinfo->sess; if (opinfo->op_state == OPLOCK_ACK_WAIT) { - list_for_each_safe(tmp, t, &opinfo->interim_list) { - struct ksmbd_work *in_work; - - in_work = list_entry(tmp, struct ksmbd_work, - interim_entry); - setup_async_work(in_work, NULL, NULL); - smb2_send_interim_resp(in_work, STATUS_PENDING); - list_del_init(&in_work->interim_entry); - release_async_work(in_work); - } INIT_WORK(&work->work, __smb2_lease_break_noti); ksmbd_queue_work(work); wait_for_break_ack(opinfo); @@ -871,7 +859,8 @@ static void wait_lease_breaking(struct oplock_info *opinfo) } } -static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) +static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level, + struct ksmbd_work *in_work) { int err = 0; @@ -914,9 +903,15 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) } if (lease->state & (SMB2_LEASE_WRITE_CACHING_LE | - SMB2_LEASE_HANDLE_CACHING_LE)) + SMB2_LEASE_HANDLE_CACHING_LE)) { + if (in_work) { + setup_async_work(in_work, NULL, NULL); + smb2_send_interim_resp(in_work, STATUS_PENDING); + release_async_work(in_work); + } + brk_opinfo->op_state = OPLOCK_ACK_WAIT; - else + } else atomic_dec(&brk_opinfo->breaking_cnt); } else { err = oplock_break_pending(brk_opinfo, req_op_level); @@ -1116,7 +1111,7 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, if (ksmbd_conn_releasing(opinfo->conn)) continue; - oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); } } @@ -1152,7 +1147,7 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) if (ksmbd_conn_releasing(opinfo->conn)) continue; - oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); } } @@ -1252,8 +1247,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, goto op_break_not_needed; } - list_add(&work->interim_entry, &prev_opinfo->interim_list); - err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II); + err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II, work); opinfo_put(prev_opinfo); if (err == -ENOENT) goto set_lev; @@ -1322,8 +1316,7 @@ static void smb_break_all_write_oplock(struct ksmbd_work *work, } brk_opinfo->open_trunc = is_trunc; - list_add(&work->interim_entry, &brk_opinfo->interim_list); - oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II); + oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II, work); opinfo_put(brk_opinfo); } @@ -1386,7 +1379,7 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, SMB2_LEASE_KEY_SIZE)) goto next; brk_op->open_trunc = is_trunc; - oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE); + oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE, NULL); next: opinfo_put(brk_op); rcu_read_lock(); diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 72bc88a63a408..3f64f07872638 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -67,7 +67,6 @@ struct oplock_info { bool is_lease; bool open_trunc; /* truncate on open */ struct lease *o_lease; - struct list_head interim_list; struct list_head op_entry; struct list_head lease_entry; wait_queue_head_t oplock_q; /* Other server threads */ -- GitLab From 3aa660c059240e0c795217182cf7df32909dd917 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 6 Mar 2025 14:14:58 +0900 Subject: [PATCH 1439/1585] ksmbd: prevent connection release during oplock break notification ksmbd_work could be freed when after connection release. Increment r_count of ksmbd_conn to indicate that requests are not finished yet and to not release the connection. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 20 ++++++++++++++++++++ fs/smb/server/connection.h | 2 ++ fs/smb/server/oplock.c | 6 ++++++ fs/smb/server/server.c | 14 ++------------ 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index f8a40f65db6ae..c1f22c1291117 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -433,6 +433,26 @@ void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops) default_conn_ops.terminate_fn = ops->terminate_fn; } +void ksmbd_conn_r_count_inc(struct ksmbd_conn *conn) +{ + atomic_inc(&conn->r_count); +} + +void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn) +{ + /* + * Checking waitqueue to dropping pending requests on + * disconnection. waitqueue_active is safe because it + * uses atomic operation for condition. + */ + atomic_inc(&conn->refcnt); + if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q)) + wake_up(&conn->r_count_q); + + if (atomic_dec_and_test(&conn->refcnt)) + kfree(conn); +} + int ksmbd_conn_transport_init(void) { int ret; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index b379ae4fdcdff..91c2318639e76 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -168,6 +168,8 @@ int ksmbd_conn_transport_init(void); void ksmbd_conn_transport_destroy(void); void ksmbd_conn_lock(struct ksmbd_conn *conn); void ksmbd_conn_unlock(struct ksmbd_conn *conn); +void ksmbd_conn_r_count_inc(struct ksmbd_conn *conn); +void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn); /* * WARNING diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 2febd1c8e278c..28886ff1ee577 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -634,6 +634,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk) { struct smb2_oplock_break *rsp = NULL; struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work); + struct ksmbd_conn *conn = work->conn; struct oplock_break_info *br_info = work->request_buf; struct smb2_hdr *rsp_hdr; struct ksmbd_file *fp; @@ -689,6 +690,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk) out: ksmbd_free_work_struct(work); + ksmbd_conn_r_count_dec(conn); } /** @@ -723,6 +725,7 @@ static int smb2_oplock_break_noti(struct oplock_info *opinfo) work->sess = opinfo->sess; if (opinfo->op_state == OPLOCK_ACK_WAIT) { + ksmbd_conn_r_count_inc(conn); INIT_WORK(&work->work, __smb2_oplock_break_noti); ksmbd_queue_work(work); @@ -744,6 +747,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk) { struct smb2_lease_break *rsp = NULL; struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work); + struct ksmbd_conn *conn = work->conn; struct lease_break_info *br_info = work->request_buf; struct smb2_hdr *rsp_hdr; @@ -790,6 +794,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk) out: ksmbd_free_work_struct(work); + ksmbd_conn_r_count_dec(conn); } /** @@ -829,6 +834,7 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo) work->sess = opinfo->sess; if (opinfo->op_state == OPLOCK_ACK_WAIT) { + ksmbd_conn_r_count_inc(conn); INIT_WORK(&work->work, __smb2_lease_break_noti); ksmbd_queue_work(work); wait_for_break_ack(opinfo); diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 601e7fcbcf1e6..ab533c6029879 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -270,17 +270,7 @@ static void handle_ksmbd_work(struct work_struct *wk) ksmbd_conn_try_dequeue_request(work); ksmbd_free_work_struct(work); - /* - * Checking waitqueue to dropping pending requests on - * disconnection. waitqueue_active is safe because it - * uses atomic operation for condition. - */ - atomic_inc(&conn->refcnt); - if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q)) - wake_up(&conn->r_count_q); - - if (atomic_dec_and_test(&conn->refcnt)) - kfree(conn); + ksmbd_conn_r_count_dec(conn); } /** @@ -310,7 +300,7 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn) conn->request_buf = NULL; ksmbd_conn_enqueue_request(work); - atomic_inc(&conn->r_count); + ksmbd_conn_r_count_inc(conn); /* update activity on connection */ conn->last_active = jiffies; INIT_WORK(&work->work, handle_ksmbd_work); -- GitLab From fd10709e28d2fa9015667aee56d92099fc97aa0d Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Fri, 7 Mar 2025 15:37:32 +0100 Subject: [PATCH 1440/1585] MAINTAINERS: Remove myself from the goodix touchscreen maintainers Haven't authored any commits to that driver in 10 years, and haven't had supported hardware for nearly as long. Signed-off-by: Bastien Nocera Link: https://lore.kernel.org/r/20250307143740.960328-1-hadess@hadess.net Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index baf0eeb9a3554..ca8945fd98bcd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9729,7 +9729,6 @@ S: Maintained F: drivers/media/usb/go7007/ GOODIX TOUCHSCREEN -M: Bastien Nocera M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained -- GitLab From f5d4e81774c42d9c2ea3980e570f3330ff2ed5d2 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 3 Mar 2025 08:49:41 +0800 Subject: [PATCH 1441/1585] drm/xe: remove redundant check in xe_vm_create_ioctl() The check for args->extensions is repeated twice in xe_vm_create_ioctl(). This commit removes the redundant check to streamline the code. Fixes: 7224788f6756 ("drm/xe: Kill XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS extension") Cc: Rodrigo Vivi Signed-off-by: Xin Wang Reviewed-by: Tejas Upadhyay Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20250303004942.951699-1-x.wang@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 8da8aecf1f2d89c2b8188bcf7aa252ec146ddd12) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ec6ec18ab3faa..5956631c0d40a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1809,9 +1809,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions)) - return -EINVAL; - if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) -- GitLab From df1e82e7acd3c50b65ca0e2e09089b78382d14ab Mon Sep 17 00:00:00 2001 From: David Rosca Date: Thu, 13 Feb 2025 15:30:37 +0100 Subject: [PATCH 1442/1585] drm/amdgpu/display: Allow DCC for video formats on GFX12 We advertise DCC as supported for NV12/P010 formats on GFX12, but it would fail on this check on atomic commit. Signed-off-by: David Rosca Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit ba795235a2b99ba9bbef647ab003b2f3145d9bbb) Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 774cc3f4f3fd9..92472109f84a9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -277,8 +277,11 @@ static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev, if (!dcc->enable) return 0; - if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || - !dc->cap_funcs.get_dcc_compression_cap) + if (adev->family < AMDGPU_FAMILY_GC_12_0_0 && + format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return -EINVAL; + + if (!dc->cap_funcs.get_dcc_compression_cap) return -EINVAL; input.format = format; -- GitLab From e204aab79e01bc8ff750645666993ed8b719de57 Mon Sep 17 00:00:00 2001 From: Aliaksei Urbanski Date: Thu, 6 Mar 2025 13:36:03 +0300 Subject: [PATCH 1443/1585] drm/amd/display: fix missing .is_two_pixels_per_container Starting from 6.11, AMDGPU driver, while being loaded with amdgpu.dc=1, due to lack of .is_two_pixels_per_container function in dce60_tg_funcs, causes a NULL pointer dereference on PCs with old GPUs, such as R9 280X. So this fix adds missing .is_two_pixels_per_container to dce60_tg_funcs. Reported-by: Rosen Penev Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3942 Fixes: e6a901a00822 ("drm/amd/display: use even ODM slice width for two pixels per container") Signed-off-by: Aliaksei Urbanski Signed-off-by: Alex Deucher (cherry picked from commit bd4b125eb949785c6f8a53b0494e32795421209d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c index e5fb0e8333e43..e691a1cf33567 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c @@ -239,6 +239,7 @@ static const struct timing_generator_funcs dce60_tg_funcs = { dce60_timing_generator_enable_advanced_request, .configure_crc = dce60_configure_crc, .get_crc = dce110_get_crc, + .is_two_pixels_per_container = dce110_is_two_pixels_per_container, }; void dce60_timing_generator_construct( -- GitLab From 4afacc9948e1f8fdbca401d259ae65ad93d298c0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 6 Mar 2025 12:51:24 -0600 Subject: [PATCH 1444/1585] drm/amd: Keep display off while going into S4 When userspace invokes S4 the flow is: 1) amdgpu_pmops_prepare() 2) amdgpu_pmops_freeze() 3) Create hibernation image 4) amdgpu_pmops_thaw() 5) Write out image to disk 6) Turn off system Then on resume amdgpu_pmops_restore() is called. This flow has a problem that because amdgpu_pmops_thaw() is called it will call amdgpu_device_resume() which will resume all of the GPU. This includes turning the display hardware back on and discovering connectors again. This is an unexpected experience for the display to turn back on. Adjust the flow so that during the S4 sequence display hardware is not turned back on. Reported-by: Xaver Hugl Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2038 Cc: Muhammad Usama Anjum Tested-by: Muhammad Usama Anjum Acked-by: Alex Deucher Acked-by: Harry Wentland Link: https://lore.kernel.org/r/20250306185124.44780-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 68bfdc8dc0a1a7fdd9ab61e69907ae71a6fd3d91) --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 95a05b03f799d..c0ddbe7d6f0bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2555,7 +2555,6 @@ static int amdgpu_pmops_freeze(struct device *dev) int r; r = amdgpu_device_suspend(drm_dev, true); - adev->in_s4 = false; if (r) return r; @@ -2567,8 +2566,13 @@ static int amdgpu_pmops_freeze(struct device *dev) static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + int r; - return amdgpu_device_resume(drm_dev, true); + r = amdgpu_device_resume(drm_dev, true); + adev->in_s4 = false; + + return r; } static int amdgpu_pmops_poweroff(struct device *dev) @@ -2581,6 +2585,9 @@ static int amdgpu_pmops_poweroff(struct device *dev) static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + adev->in_s4 = false; return amdgpu_device_resume(drm_dev, true); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9d9645a2d18ef..f4fed93f2915a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3373,6 +3373,11 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) return 0; } + + /* leave display off for S4 sequence */ + if (adev->in_s4) + return 0; + /* Recreate dc_state - DC invalidates it when setting power state to S3. */ dc_state_release(dm_state->context); dm_state->context = dc_state_create(dm->dc, NULL); -- GitLab From 40b8c14936bd2726354c856251f6baed9869e760 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 20 Feb 2025 16:20:26 -0500 Subject: [PATCH 1445/1585] drm/amd/display: Disable unneeded hpd interrupts during dm_init [Why] It seems HPD interrupts are enabled by default for all connectors, even if the hpd source isn't valid. An eDP for example, does not have a valid hpd source (but does have a valid hpdrx source; see construct_phy()). Thus, eDPs should have their hpd interrupt disabled. In the past, this wasn't really an issue. Although the driver gets interrupted, then acks by writing to hw registers, there weren't any subscribed handlers that did anything meaningful (see register_hpd_handlers()). But things changed with the introduction of IPS. s2idle requires that the driver allows IPS for DMUB fw to put hw to sleep. Since register access requires hw to be awake, the driver will block IPS entry to do so. And no IPS means no hw sleep during s2idle. This was the observation on DCN35 systems with an eDP. During suspend, the eDP toggled its hpd pin as part of the panel power down sequence. The driver was then interrupted, and acked by writing to registers, blocking IPS entry. [How] Since DC marks eDP connections as having invalid hpd sources (see construct_phy()), DM should disable them at the hw level. Do so in amdgpu_dm_hpd_init() by disabling all hpd ints first, then selectively enabling ones for connectors that have valid hpd sources. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Leo Li Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 7b1ba19eb15f88e70782642ce2d934211269337b) Cc: stable@vger.kernel.org --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 64 +++++++++++++------ 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index c4a7fd453e5fc..a215234151ac3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -894,8 +894,16 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int irq_type; int i; + /* First, clear all hpd and hpdrx interrupts */ + for (i = DC_IRQ_SOURCE_HPD1; i <= DC_IRQ_SOURCE_HPD6RX; i++) { + if (!dc_interrupt_set(adev->dm.dc, i, false)) + drm_err(dev, "Failed to clear hpd(rx) source=%d on init\n", + i); + } + drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { struct amdgpu_dm_connector *amdgpu_dm_connector; @@ -908,10 +916,31 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) dc_link = amdgpu_dm_connector->dc_link; + /* + * Get a base driver irq reference for hpd ints for the lifetime + * of dm. Note that only hpd interrupt types are registered with + * base driver; hpd_rx types aren't. IOW, amdgpu_irq_get/put on + * hpd_rx isn't available. DM currently controls hpd_rx + * explicitly with dc_interrupt_set() + */ if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { - dc_interrupt_set(adev->dm.dc, - dc_link->irq_source_hpd, - true); + irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1; + /* + * TODO: There's a mismatch between mode_info.num_hpd + * and what bios reports as the # of connectors with hpd + * sources. Since the # of hpd source types registered + * with base driver == mode_info.num_hpd, we have to + * fallback to dc_interrupt_set for the remaining types. + */ + if (irq_type < adev->mode_info.num_hpd) { + if (amdgpu_irq_get(adev, &adev->hpd_irq, irq_type)) + drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", + dc_link->irq_source_hpd); + } else { + dc_interrupt_set(adev->dm.dc, + dc_link->irq_source_hpd, + true); + } } if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) { @@ -921,12 +950,6 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); - - /* Update reference counts for HPDs */ - for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { - if (amdgpu_irq_get(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) - drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", i); - } } /** @@ -942,7 +965,7 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; - int i; + int irq_type; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -956,9 +979,18 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) dc_link = amdgpu_dm_connector->dc_link; if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { - dc_interrupt_set(adev->dm.dc, - dc_link->irq_source_hpd, - false); + irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1; + + /* TODO: See same TODO in amdgpu_dm_hpd_init() */ + if (irq_type < adev->mode_info.num_hpd) { + if (amdgpu_irq_put(adev, &adev->hpd_irq, irq_type)) + drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", + dc_link->irq_source_hpd); + } else { + dc_interrupt_set(adev->dm.dc, + dc_link->irq_source_hpd, + false); + } } if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) { @@ -968,10 +1000,4 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); - - /* Update reference counts for HPDs */ - for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { - if (amdgpu_irq_put(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) - drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", i); - } } -- GitLab From b5a981e1b34e44f94a5967f730fff4166f2101e8 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 22 Feb 2025 23:37:32 -0600 Subject: [PATCH 1446/1585] drm/amd/display: fix default brightness [Why] To avoid flickering during boot default brightness level set by BIOS should be maintained for as much of the boot as feasible. commit 2fe87f54abdc ("drm/amd/display: Set default brightness according to ACPI") attempted to set the right levels for AC vs DC, but brightness still got reset to maximum level in initialization code for setup_backlight_device(). [How] Remove the hardcoded initialization in setup_backlight_device() and instead program brightness value to match BIOS (AC or DC). This avoids a brightness flicker from kernel changing the value. Userspace may however still change it during boot. Fixes: 2fe87f54abdc ("drm/amd/display: Set default brightness according to ACPI") Acked-by: Wayne Lin Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 0747acf3311229e22009bec4a9e7fc30c879e842) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f4fed93f2915a..4bd7f82c582dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4911,6 +4911,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) dm->backlight_dev[aconnector->bl_idx] = backlight_device_register(bl_name, aconnector->base.kdev, dm, &amdgpu_dm_backlight_ops, &props); + dm->brightness[aconnector->bl_idx] = props.brightness; if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) { DRM_ERROR("DM: Backlight registration failed!\n"); @@ -4978,7 +4979,6 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm, aconnector->bl_idx = bl_idx; amdgpu_dm_update_backlight_caps(dm, bl_idx); - dm->brightness[bl_idx] = AMDGPU_MAX_BL_LEVEL; dm->backlight_link[bl_idx] = link; dm->num_of_edps++; -- GitLab From 5760388d9681ac743038b846b9082b9023969551 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 23 Feb 2025 00:04:35 -0600 Subject: [PATCH 1447/1585] drm/amd/display: Restore correct backlight brightness after a GPU reset [Why] GPU reset will attempt to restore cached state, but brightness doesn't get restored. It will come back at 100% brightness, but userspace thinks it's the previous value. [How] When running resume sequence if GPU is in reset restore brightness to previous value. Acked-by: Wayne Lin Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 5e19e2b57b6bb640d68dfc7991e1e182922cf867) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4bd7f82c582dc..74ad0d1240feb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -245,6 +245,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); static void handle_hpd_rx_irq(void *param); +static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, + int bl_idx, + u32 user_brightness); + static bool is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state, struct drm_crtc_state *new_crtc_state); @@ -3371,6 +3375,12 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) mutex_unlock(&dm->dc_lock); + /* set the backlight after a reset */ + for (i = 0; i < dm->num_of_edps; i++) { + if (dm->backlight_dev[i]) + amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); + } + return 0; } -- GitLab From 79e31396fdd7037c503e6add15af7cb00633ea92 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Feb 2025 16:36:25 -0700 Subject: [PATCH 1448/1585] drm/amd/display: Assign normalized_pix_clk when color depth = 14 [WHY & HOW] A warning message "WARNING: CPU: 4 PID: 459 at ... /dc_resource.c:3397 calculate_phy_pix_clks+0xef/0x100 [amdgpu]" occurs because the display_color_depth == COLOR_DEPTH_141414 is not handled. This is observed in Radeon RX 6600 XT. It is fixed by assigning pix_clk * (14 * 3) / 24 - same as the rests. Also fixes the indentation in get_norm_pix_clk. Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 274a87eb389f58eddcbc5659ab0b180b37e92775) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a45037cb4cc01..298668e9729c7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3389,10 +3389,13 @@ static int get_norm_pix_clk(const struct dc_crtc_timing *timing) break; case COLOR_DEPTH_121212: normalized_pix_clk = (pix_clk * 36) / 24; - break; + break; + case COLOR_DEPTH_141414: + normalized_pix_clk = (pix_clk * 42) / 24; + break; case COLOR_DEPTH_161616: normalized_pix_clk = (pix_clk * 48) / 24; - break; + break; default: ASSERT(0); break; -- GitLab From e65e7bea220c3ce8c4c793b4ba35557f4994ab2b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 13:18:14 -0600 Subject: [PATCH 1449/1585] drm/amd/display: Fix slab-use-after-free on hdcp_work [Why] A slab-use-after-free is reported when HDCP is destroyed but the property_validate_dwork queue is still running. [How] Cancel the delayed work when destroying workqueue. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4006 Fixes: da3fd7ac0bcf ("drm/amd/display: Update CP property based on HW query") Cc: Alex Deucher Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 725a04ba5a95e89c89633d4322430cfbca7ce128) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index e339c7a8d541c..c0dc232440490 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -455,6 +455,7 @@ void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) for (i = 0; i < hdcp_work->max_link; i++) { cancel_delayed_work_sync(&hdcp_work[i].callback_dwork); cancel_delayed_work_sync(&hdcp_work[i].watchdog_timer_dwork); + cancel_delayed_work_sync(&hdcp_work[i].property_validate_dwork); } sysfs_remove_bin_file(kobj, &hdcp_work[0].attr); -- GitLab From ded6ad4c6e2005e959ea09abba16c451433dd34b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 9 Mar 2025 12:26:50 -0400 Subject: [PATCH 1450/1585] drm/amdgpu/vce2: fix ip block reference Need to use the correct IP block type. VCE vs VCN. Fixes mclk issues on Hawaii. Suggested by selendym. Fixes: 82ae6619a450 ("drm/amdgpu: update the handle ptr in wait_for_idle") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3997 Reviewed-by: Boyuan Zhang Cc: Sunil Khatri Signed-off-by: Alex Deucher (cherry picked from commit 02438acd252395628d74cfac692efbb676d21521) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index c633b7ff29438..09fd6ef99b3d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -284,7 +284,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) return 0; } - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCN); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE); if (!ip_block) return -EINVAL; -- GitLab From 366fef794bd2b7c2e9df933f6828dd9739bfba84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 10 Mar 2025 14:21:58 +0200 Subject: [PATCH 1451/1585] : Allow the passing of both iomem and non-iomem pointers to no_free_ptr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling no_free_ptr() for an __iomem pointer results in Sparse complaining about the types: warning: incorrect type in argument 1 (different address spaces) expected void const volatile *val got void [noderef] __iomem *__val [ The example is from drivers/platform/x86/intel/pmc/core_ssram.c:283 ] The problem is caused by the signature of __must_check_fn() added in: 85be6d842447 ("cleanup: Make no_free_ptr() __must_check") ... to enforce that the return value is always used. Use __force to allow both iomem and non-iomem pointers to be given for no_free_ptr(). Reported-by: kernel test robot Signed-off-by: Ilpo Järvinen Signed-off-by: Ingo Molnar Reviewed-by: Andy Shevchenko Reviewed-by: Dan Williams Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250310122158.20966-1-ilpo.jarvinen@linux.intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202403050547.qnZtuNlN-lkp@intel.com/ --- include/linux/cleanup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index ec00e3f7af2b3..ee2614adb7858 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -212,7 +212,7 @@ const volatile void * __must_check_fn(const volatile void *val) { return val; } #define no_free_ptr(p) \ - ((typeof(p)) __must_check_fn(__get_and_null(p, NULL))) + ((typeof(p)) __must_check_fn((__force const volatile void *)__get_and_null(p, NULL))) #define return_ptr(p) return no_free_ptr(p) -- GitLab From eab0396353be1c778eba1c0b5180176f04dd21ce Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Fri, 7 Mar 2025 10:18:20 +0800 Subject: [PATCH 1452/1585] net/mlx5: handle errors in mlx5_chains_create_table() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In mlx5_chains_create_table(), the return value of mlx5_get_fdb_sub_ns() and mlx5_get_flow_namespace() must be checked to prevent NULL pointer dereferences. If either function fails, the function should log error message with mlx5_core_warn() and return error pointer. Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities") Signed-off-by: Wentao Liang Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20250307021820.2646-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index a80ecb672f33d..711d14dea2485 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -196,6 +196,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, ns = mlx5_get_flow_namespace(chains->dev, chains->ns); } + if (!ns) { + mlx5_core_warn(chains->dev, "Failed to get flow namespace\n"); + return ERR_PTR(-EOPNOTSUPP); + } + ft_attr.autogroup.num_reserved_entries = 2; ft_attr.autogroup.max_num_groups = chains->group_num; ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); -- GitLab From d0a4a1b36d7a71b45972ef33762c3fc082bec1db Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Fri, 7 Mar 2025 10:12:55 +0100 Subject: [PATCH 1453/1585] net: ethtool: tsinfo: Fix dump command Fix missing initialization of ts_info->phc_index in the dump command, which could cause a netdev interface to incorrectly display a PTP provider at index 0 instead of "none". Fix it by initializing the phc_index to -1. In the same time, restore missing initialization of ts_info.cmd for the IOCTL case, as it was before the transition from ethnl_default_dumpit to custom ethnl_tsinfo_dumpit. Also, remove unnecessary zeroing of ts_info, as it is embedded within reply_data, which is fully zeroed two lines earlier. Fixes: b9e3f7dc9ed95 ("net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by net topology") Signed-off-by: Kory Maincent Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250307091255.463559-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/tsinfo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 691be6c445b38..ad3866c5a902b 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -290,7 +290,8 @@ static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb, reply_data = ctx->reply_data; memset(reply_data, 0, sizeof(*reply_data)); reply_data->base.dev = dev; - memset(&reply_data->ts_info, 0, sizeof(reply_data->ts_info)); + reply_data->ts_info.cmd = ETHTOOL_GET_TS_INFO; + reply_data->ts_info.phc_index = -1; return ehdr; } -- GitLab From cfa693bf9d5361608e2963f5dae053b3695af8eb Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 7 Mar 2025 11:12:23 +0100 Subject: [PATCH 1454/1585] net: usb: lan78xx: Sanitize return values of register read/write functions usb_control_msg() returns the number of transferred bytes or a negative error code. The current implementation propagates the transferred byte count, which is unintended. This affects code paths that assume a boolean success/failure check, such as the EEPROM detection logic. Fix this by ensuring lan78xx_read_reg() and lan78xx_write_reg() return only 0 on success and preserve negative error codes. This approach is consistent with existing usage, as the transferred byte count is not explicitly checked elsewhere. Fixes: 8b1b2ca83b20 ("net: usb: lan78xx: Improve error handling in EEPROM and OTP operations") Reported-by: Mark Brown Closes: https://lore.kernel.org/all/ac965de8-f320-430f-80f6-b16f4e1ba06d@sirena.org.uk Signed-off-by: Oleksij Rempel Tested-by: Mark Brown Link: https://patch.msgid.link/20250307101223.3025632-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a91bf9c7e31d2..137adf6d5b08a 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -627,7 +627,7 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data) kfree(buf); - return ret; + return ret < 0 ? ret : 0; } static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data) @@ -658,7 +658,7 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data) kfree(buf); - return ret; + return ret < 0 ? ret : 0; } static int lan78xx_update_reg(struct lan78xx_net *dev, u32 reg, u32 mask, -- GitLab From 9f7b2aa5034e24d3c49db73d5f760c0435fe31c2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:12 +0000 Subject: [PATCH 1455/1585] eth: bnxt: fix truesize for mb-xdp-pass case When mb-xdp is set and return is XDP_PASS, packet is converted from xdp_buff to sk_buff with xdp_update_skb_shared_info() in bnxt_xdp_build_skb(). bnxt_xdp_build_skb() passes incorrect truesize argument to xdp_update_skb_shared_info(). The truesize is calculated as BNXT_RX_PAGE_SIZE * sinfo->nr_frags but the skb_shared_info was wiped by napi_build_skb() before. So it stores sinfo->nr_frags before bnxt_xdp_build_skb() and use it instead of getting skb_shared_info from xdp_get_shared_info_from_buff(). Splat looks like: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 0 at net/core/skbuff.c:6072 skb_try_coalesce+0x504/0x590 Modules linked in: xt_nat xt_tcpudp veth af_packet xt_conntrack nft_chain_nat xt_MASQUERADE nf_conntrack_netlink xfrm_user xt_addrtype nft_coms CPU: 2 UID: 0 PID: 0 Comm: swapper/2 Not tainted 6.14.0-rc2+ #3 RIP: 0010:skb_try_coalesce+0x504/0x590 Code: 4b fd ff ff 49 8b 34 24 40 80 e6 40 0f 84 3d fd ff ff 49 8b 74 24 48 40 f6 c6 01 0f 84 2e fd ff ff 48 8d 4e ff e9 25 fd ff ff <0f> 0b e99 RSP: 0018:ffffb62c4120caa8 EFLAGS: 00010287 RAX: 0000000000000003 RBX: ffffb62c4120cb14 RCX: 0000000000000ec0 RDX: 0000000000001000 RSI: ffffa06e5d7dc000 RDI: 0000000000000003 RBP: ffffa06e5d7ddec0 R08: ffffa06e6120a800 R09: ffffa06e7a119900 R10: 0000000000002310 R11: ffffa06e5d7dcec0 R12: ffffe4360575f740 R13: ffffe43600000000 R14: 0000000000000002 R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffffa0755f700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f147b76b0f8 CR3: 00000001615d4000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? __warn+0x84/0x130 ? skb_try_coalesce+0x504/0x590 ? report_bug+0x18a/0x1a0 ? handle_bug+0x53/0x90 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? skb_try_coalesce+0x504/0x590 inet_frag_reasm_finish+0x11f/0x2e0 ip_defrag+0x37a/0x900 ip_local_deliver+0x51/0x120 ip_sublist_rcv_finish+0x64/0x70 ip_sublist_rcv+0x179/0x210 ip_list_rcv+0xf9/0x130 How to reproduce: ip link set $interface1 xdp obj xdp_pass.o ip link set $interface1 mtu 9000 up ip a a 10.0.0.1/24 dev $interface1 ip link set $interfac2 mtu 9000 up ip a a 10.0.0.2/24 dev $interface2 ping 10.0.0.1 -s 65000 Following ping.py patch adds xdp-mb-pass case. so ping.py is going to be able to reproduce this issue. Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250309134219.91670-2-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7b8b5b39c7bbe..6b5fe4ee7a99b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2038,6 +2038,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, struct rx_cmp_ext *rxcmp1; u32 tmp_raw_cons = *raw_cons; u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons); + struct skb_shared_info *sinfo; struct bnxt_sw_rx_bd *rx_buf; unsigned int len; u8 *data_ptr, agg_bufs, cmp_type; @@ -2164,6 +2165,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, false); if (!frag_len) goto oom_next_rx; + } xdp_active = true; } @@ -2173,6 +2175,12 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, rc = 1; goto next_rx; } + if (xdp_buff_has_frags(&xdp)) { + sinfo = xdp_get_shared_info_from_buff(&xdp); + agg_bufs = sinfo->nr_frags; + } else { + agg_bufs = 0; + } } if (len <= bp->rx_copybreak) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index e6c64e4bd66c3..e9b49cb5b735b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -476,7 +476,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, } xdp_update_skb_shared_info(skb, num_frags, sinfo->xdp_frags_size, - BNXT_RX_PAGE_SIZE * sinfo->nr_frags, + BNXT_RX_PAGE_SIZE * num_frags, xdp_buff_is_frag_pfmemalloc(xdp)); return skb; } -- GitLab From ca2456e073957781e1184de68551c65161b2bd30 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:13 +0000 Subject: [PATCH 1456/1585] eth: bnxt: return fail if interface is down in bnxt_queue_mem_alloc() The bnxt_queue_mem_alloc() is called to allocate new queue memory when a queue is restarted. It internally accesses rx buffer descriptor corresponding to the index. The rx buffer descriptor is allocated and set when the interface is up and it's freed when the interface is down. So, if queue is restarted if interface is down, kernel panic occurs. Splat looks like: BUG: unable to handle page fault for address: 000000000000b240 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 UID: 0 PID: 1563 Comm: ncdevmem2 Not tainted 6.14.0-rc2+ #9 844ddba6e7c459cafd0bf4db9a3198e Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 RIP: 0010:bnxt_queue_mem_alloc+0x3f/0x4e0 [bnxt_en] Code: 41 54 4d 89 c4 4d 69 c0 c0 05 00 00 55 48 89 f5 53 48 89 fb 4c 8d b5 40 05 00 00 48 83 ec 15 RSP: 0018:ffff9dcc83fef9e8 EFLAGS: 00010202 RAX: ffffffffc0457720 RBX: ffff934ed8d40000 RCX: 0000000000000000 RDX: 000000000000001f RSI: ffff934ea508f800 RDI: ffff934ea508f808 RBP: ffff934ea508f800 R08: 000000000000b240 R09: ffff934e84f4b000 R10: ffff9dcc83fefa30 R11: ffff934e84f4b000 R12: 000000000000001f R13: ffff934ed8d40ac0 R14: ffff934ea508fd40 R15: ffff934e84f4b000 FS: 00007fa73888c740(0000) GS:ffff93559f780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000b240 CR3: 0000000145a2e000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? __die+0x20/0x70 ? page_fault_oops+0x15a/0x460 ? exc_page_fault+0x6e/0x180 ? asm_exc_page_fault+0x22/0x30 ? __pfx_bnxt_queue_mem_alloc+0x10/0x10 [bnxt_en 7f85e76f4d724ba07471d7e39d9e773aea6597b7] ? bnxt_queue_mem_alloc+0x3f/0x4e0 [bnxt_en 7f85e76f4d724ba07471d7e39d9e773aea6597b7] netdev_rx_queue_restart+0xc5/0x240 net_devmem_bind_dmabuf_to_queue+0xf8/0x200 netdev_nl_bind_rx_doit+0x3a7/0x450 genl_family_rcv_msg_doit+0xd9/0x130 genl_rcv_msg+0x184/0x2b0 ? __pfx_netdev_nl_bind_rx_doit+0x10/0x10 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 ... Reviewed-by: Somnath Kotur Reviewed-by: Jakub Kicinski Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops") Signed-off-by: Taehee Yoo Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250309134219.91670-3-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6b5fe4ee7a99b..acb9500ef9307 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15447,6 +15447,9 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) struct bnxt_ring_struct *ring; int rc; + if (!bp->rx_ring) + return -ENETDOWN; + rxr = &bp->rx_ring[idx]; clone = qmem; memcpy(clone, rxr, sizeof(*rxr)); -- GitLab From 661958552eda5bf64bfafb4821cbdded935f1f68 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:14 +0000 Subject: [PATCH 1457/1585] eth: bnxt: do not use BNXT_VNIC_NTUPLE unconditionally in queue restart logic When a queue is restarted, it sets MRU to 0 for stopping packet flow. MRU variable is a member of vnic_info[], the first vnic_info is default and the second is ntuple. Only when ntuple is enabled(ethtool -K eth0 ntuple on), vnic_info for ntuple is allocated in init logic. The bp->nr_vnics indicates how many vnic_info are allocated. However bnxt_queue_{start | stop}() accesses vnic_info[BNXT_VNIC_NTUPLE] regardless of ntuple state. Reviewed-by: Somnath Kotur Fixes: b9d2956e869c ("bnxt_en: stop packet flow during bnxt_queue_stop/start") Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250309134219.91670-4-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index acb9500ef9307..218109ee1c234 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15643,7 +15643,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) cpr = &rxr->bnapi->cp_ring; cpr->sw_stats->rx.rx_resets++; - for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { + for (i = 0; i <= bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); @@ -15671,7 +15671,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) struct bnxt_vnic_info *vnic; int i; - for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { + for (i = 0; i <= bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; vnic->mru = 0; bnxt_hwrm_vnic_update(bp, vnic, -- GitLab From c03e7d05aa0e2f7e9a9ce5ad8a12471a53f941dc Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:15 +0000 Subject: [PATCH 1458/1585] eth: bnxt: do not update checksum in bnxt_xdp_build_skb() The bnxt_rx_pkt() updates ip_summed value at the end if checksum offload is enabled. When the XDP-MB program is attached and it returns XDP_PASS, the bnxt_xdp_build_skb() is called to update skb_shared_info. The main purpose of bnxt_xdp_build_skb() is to update skb_shared_info, but it updates ip_summed value too if checksum offload is enabled. This is actually duplicate work. When the bnxt_rx_pkt() updates ip_summed value, it checks if ip_summed is CHECKSUM_NONE or not. It means that ip_summed should be CHECKSUM_NONE at this moment. But ip_summed may already be updated to CHECKSUM_UNNECESSARY in the XDP-MB-PASS path. So the by skb_checksum_none_assert() WARNS about it. This is duplicate work and updating ip_summed in the bnxt_xdp_build_skb() is not needed. Splat looks like: WARNING: CPU: 3 PID: 5782 at ./include/linux/skbuff.h:5155 bnxt_rx_pkt+0x479b/0x7610 [bnxt_en] Modules linked in: bnxt_re bnxt_en rdma_ucm rdma_cm iw_cm ib_cm ib_uverbs veth xt_nat xt_tcpudp xt_conntrack nft_chain_nat xt_MASQUERADE nf_] CPU: 3 UID: 0 PID: 5782 Comm: socat Tainted: G W 6.14.0-rc4+ #27 Tainted: [W]=WARN Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 RIP: 0010:bnxt_rx_pkt+0x479b/0x7610 [bnxt_en] Code: 54 24 0c 4c 89 f1 4c 89 ff c1 ea 1f ff d3 0f 1f 00 49 89 c6 48 85 c0 0f 84 4c e5 ff ff 48 89 c7 e8 ca 3d a0 c8 e9 8f f4 ff ff <0f> 0b f RSP: 0018:ffff88881ba09928 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00000000c7590303 RCX: 0000000000000000 RDX: 1ffff1104e7d1610 RSI: 0000000000000001 RDI: ffff8881c91300b8 RBP: ffff88881ba09b28 R08: ffff888273e8b0d0 R09: ffff888273e8b070 R10: ffff888273e8b010 R11: ffff888278b0f000 R12: ffff888273e8b080 R13: ffff8881c9130e00 R14: ffff8881505d3800 R15: ffff888273e8b000 FS: 00007f5a2e7be080(0000) GS:ffff88881ba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff2e708ff8 CR3: 000000013e3b0000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? __warn+0xcd/0x2f0 ? bnxt_rx_pkt+0x479b/0x7610 ? report_bug+0x326/0x3c0 ? handle_bug+0x53/0xa0 ? exc_invalid_op+0x14/0x50 ? asm_exc_invalid_op+0x16/0x20 ? bnxt_rx_pkt+0x479b/0x7610 ? bnxt_rx_pkt+0x3e41/0x7610 ? __pfx_bnxt_rx_pkt+0x10/0x10 ? napi_complete_done+0x2cf/0x7d0 __bnxt_poll_work+0x4e8/0x1220 ? __pfx___bnxt_poll_work+0x10/0x10 ? __pfx_mark_lock.part.0+0x10/0x10 bnxt_poll_p5+0x36a/0xfa0 ? __pfx_bnxt_poll_p5+0x10/0x10 __napi_poll.constprop.0+0xa0/0x440 net_rx_action+0x899/0xd00 ... Following ping.py patch adds xdp-mb-pass case. so ping.py is going to be able to reproduce this issue. Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") Signed-off-by: Taehee Yoo Reviewed-by: Somnath Kotur Link: https://patch.msgid.link/20250309134219.91670-5-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 11 ++--------- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 3 +-- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 218109ee1c234..9afb2c5072b13 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2218,7 +2218,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (!skb) goto oom_next_rx; } else { - skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); + skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, + rxr->page_pool, &xdp); if (!skb) { /* we should be able to free the old skb here */ bnxt_xdp_buff_frags_free(rxr, &xdp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index e9b49cb5b735b..299822cacca48 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -460,20 +460,13 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) struct sk_buff * bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, - struct page_pool *pool, struct xdp_buff *xdp, - struct rx_cmp_ext *rxcmp1) + struct page_pool *pool, struct xdp_buff *xdp) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); if (!skb) return NULL; - skb_checksum_none_assert(skb); - if (RX_CMP_L4_CS_OK(rxcmp1)) { - if (bp->dev->features & NETIF_F_RXCSUM) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = RX_CMP_ENCAP(rxcmp1); - } - } + xdp_update_skb_shared_info(skb, num_frags, sinfo->xdp_frags_size, BNXT_RX_PAGE_SIZE * num_frags, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 0122782400b8a..220285e190fcd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -33,6 +33,5 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, struct xdp_buff *xdp); struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, struct page_pool *pool, - struct xdp_buff *xdp, - struct rx_cmp_ext *rxcmp1); + struct xdp_buff *xdp); #endif -- GitLab From f09af5fdfbd9b0fcee73aab1116904c53b199e97 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:16 +0000 Subject: [PATCH 1459/1585] eth: bnxt: fix kernel panic in the bnxt_get_queue_stats{rx | tx} When qstats-get operation is executed, callbacks of netdev_stats_ops are called. The bnxt_get_queue_stats{rx | tx} collect per-queue stats from sw_stats in the rings. But {rx | tx | cp}_ring are allocated when the interface is up. So, these rings are not allocated when the interface is down. The qstats-get is allowed even if the interface is down. However, the bnxt_get_queue_stats{rx | tx}() accesses cp_ring and tx_ring without null check. So, it needs to avoid accessing rings if the interface is down. Reproducer: ip link set $interface down ./cli.py --spec netdev.yaml --dump qstats-get OR ip link set $interface down python ./stats.py Splat looks like: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 1680fa067 P4D 1680fa067 PUD 16be3b067 PMD 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 UID: 0 PID: 1495 Comm: python3 Not tainted 6.14.0-rc4+ #32 5cd0f999d5a15c574ac72b3e4b907341 Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 RIP: 0010:bnxt_get_queue_stats_rx+0xf/0x70 [bnxt_en] Code: c6 87 b5 18 00 00 02 eb a2 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 01 RSP: 0018:ffffabef43cdb7e0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffffffc04c8710 RCX: 0000000000000000 RDX: ffffabef43cdb858 RSI: 0000000000000000 RDI: ffff8d504e850000 RBP: ffff8d506c9f9c00 R08: 0000000000000004 R09: ffff8d506bcd901c R10: 0000000000000015 R11: ffff8d506bcd9000 R12: 0000000000000000 R13: ffffabef43cdb8c0 R14: ffff8d504e850000 R15: 0000000000000000 FS: 00007f2c5462b080(0000) GS:ffff8d575f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000167fd0000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? __die+0x20/0x70 ? page_fault_oops+0x15a/0x460 ? sched_balance_find_src_group+0x58d/0xd10 ? exc_page_fault+0x6e/0x180 ? asm_exc_page_fault+0x22/0x30 ? bnxt_get_queue_stats_rx+0xf/0x70 [bnxt_en cdd546fd48563c280cfd30e9647efa420db07bf1] netdev_nl_stats_by_netdev+0x2b1/0x4e0 ? xas_load+0x9/0xb0 ? xas_find+0x183/0x1d0 ? xa_find+0x8b/0xe0 netdev_nl_qstats_get_dumpit+0xbf/0x1e0 genl_dumpit+0x31/0x90 netlink_dump+0x1a8/0x360 Fixes: af7b3b4adda5 ("eth: bnxt: support per-queue statistics") Signed-off-by: Taehee Yoo Reviewed-by: Somnath Kotur Link: https://patch.msgid.link/20250309134219.91670-6-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9afb2c5072b13..bee12d9b57aba 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15384,6 +15384,9 @@ static void bnxt_get_queue_stats_rx(struct net_device *dev, int i, struct bnxt_cp_ring_info *cpr; u64 *sw; + if (!bp->bnapi) + return; + cpr = &bp->bnapi[i]->cp_ring; sw = cpr->stats.sw_stats; @@ -15407,6 +15410,9 @@ static void bnxt_get_queue_stats_tx(struct net_device *dev, int i, struct bnxt_napi *bnapi; u64 *sw; + if (!bp->tx_ring) + return; + bnapi = bp->tx_ring[bp->tx_ring_map[i]].bnapi; sw = bnapi->cp_ring.stats.sw_stats; -- GitLab From 87dd2850835dd7886726b428a8ef7d73a60520c7 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:17 +0000 Subject: [PATCH 1460/1585] eth: bnxt: fix memory leak in queue reset When the queue is reset, the bnxt_alloc_one_tpa_info() is called to allocate tpa_info for the new queue. And then the old queue's tpa_info should be removed by the bnxt_free_one_tpa_info(), but it is not called. So memory leak occurs. It adds the bnxt_free_one_tpa_info() in the bnxt_queue_mem_free(). unreferenced object 0xffff888293cc0000 (size 16384): comm "ncdevmem", pid 2076, jiffies 4296604081 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 40 75 78 93 82 88 ff ff ........@ux..... 40 75 78 93 02 00 00 00 00 00 00 00 00 00 00 00 @ux............. backtrace (crc 5d7d4798): ___kmalloc_large_node+0x10d/0x1b0 __kmalloc_large_node_noprof+0x17/0x60 __kmalloc_noprof+0x3f6/0x520 bnxt_alloc_one_tpa_info+0x5f/0x300 [bnxt_en] bnxt_queue_mem_alloc+0x8e8/0x14f0 [bnxt_en] netdev_rx_queue_restart+0x233/0x620 net_devmem_bind_dmabuf_to_queue+0x2a3/0x600 netdev_nl_bind_rx_doit+0xc00/0x10a0 genl_family_rcv_msg_doit+0x1d4/0x2b0 genl_rcv_msg+0x3fb/0x6c0 netlink_rcv_skb+0x12c/0x360 genl_rcv+0x24/0x40 netlink_unicast+0x447/0x710 netlink_sendmsg+0x712/0xbc0 __sys_sendto+0x3fd/0x4d0 __x64_sys_sendto+0xdc/0x1b0 Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops") Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250309134219.91670-7-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bee12d9b57aba..55f553debd3b2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15539,6 +15539,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem) struct bnxt_ring_struct *ring; bnxt_free_one_rx_ring_skbs(bp, rxr); + bnxt_free_one_tpa_info(bp, rxr); xdp_rxq_info_unreg(&rxr->xdp_rxq); -- GitLab From a70f891e0fa0435379ad4950e156a15a4ef88b4d Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:18 +0000 Subject: [PATCH 1461/1585] net: devmem: do not WARN conditionally after netdev_rx_queue_restart() When devmem socket is closed, netdev_rx_queue_restart() is called to reset queue by the net_devmem_unbind_dmabuf(). But callback may return -ENETDOWN if the interface is down because queues are already freed when the interface is down so queue reset is not needed. So, it should not warn if the return value is -ENETDOWN. Signed-off-by: Taehee Yoo Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250309134219.91670-8-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index 3bba3f018df03..0e5a2c672efd8 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -109,6 +109,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) struct netdev_rx_queue *rxq; unsigned long xa_idx; unsigned int rxq_idx; + int err; if (binding->list.next) list_del(&binding->list); @@ -120,7 +121,8 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) rxq_idx = get_netdev_rx_queue_index(rxq); - WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); + err = netdev_rx_queue_restart(binding->dev, rxq_idx); + WARN_ON(err && err != -ENETDOWN); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); -- GitLab From 75cc19c8ff8932d7da23480a49d1f9a050289c37 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:19 +0000 Subject: [PATCH 1462/1585] selftests: drv-net: add xdp cases for ping.py ping.py has 3 cases, test_v4, test_v6 and test_tcp. But these cases are not executed on the XDP environment. So, it adds XDP environment, existing tests(test_v4, test_v6, and test_tcp) are executed too on the below XDP environment. So, it adds XDP cases. 1. xdp-generic + single-buffer 2. xdp-generic + multi-buffer 3. xdp-native + single-buffer 4. xdp-native + multi-buffer 5. xdp-offload It also makes test_{v4 | v6 | tcp} sending large size packets. this may help to check whether multi-buffer is working or not. Note that the physical interface may be down and then up when xdp is attached or detached. This takes some period to activate traffic. So sleep(10) is added if the test interface is the physical interface. netdevsim and veth type interfaces skip sleep. Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250309134219.91670-9-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 200 ++++++++++++++++-- .../testing/selftests/net/lib/xdp_dummy.bpf.c | 6 + 2 files changed, 191 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index eb83e7b487978..93f4b411b378f 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -1,49 +1,219 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import os +import random, string, time from lib.py import ksft_run, ksft_exit -from lib.py import ksft_eq -from lib.py import NetDrvEpEnv +from lib.py import ksft_eq, KsftSkipEx, KsftFailEx +from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port +from lib.py import ethtool, ip +remote_ifname="" +no_sleep=False -def test_v4(cfg) -> None: +def _test_v4(cfg) -> None: cfg.require_v4() cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}") cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote) + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v4}") + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v4}", host=cfg.remote) - -def test_v6(cfg) -> None: +def _test_v6(cfg) -> None: cfg.require_v6() - cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}") - cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote) - + cmd(f"ping -c 1 -W5 {cfg.remote_v6}") + cmd(f"ping -c 1 -W5 {cfg.v6}", host=cfg.remote) + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v6}") + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v6}", host=cfg.remote) -def test_tcp(cfg) -> None: +def _test_tcp(cfg) -> None: cfg.require_cmd("socat", remote=True) port = rand_port() listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, exit_wait=True) as nc: wait_port_listen(port) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", shell=True, host=cfg.remote) - ksft_eq(nc.stdout.strip(), "ping") + ksft_eq(nc.stdout.strip(), test_string) + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: wait_port_listen(port, host=cfg.remote) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) - ksft_eq(nc.stdout.strip(), "ping") - + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) + ksft_eq(nc.stdout.strip(), test_string) + +def _set_offload_checksum(cfg, netnl, on) -> None: + try: + ethtool(f" -K {cfg.ifname} rx {on} tx {on} ") + except: + return + +def _set_xdp_generic_sb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_generic_mb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_sb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) + xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if xdp_info['xdp']['mode'] != 1: + """ + If the interface doesn't support native-mode, it falls back to generic mode. + The mode value 1 is native and 2 is generic. + So it raises an exception if mode is not 1(native mode). + """ + raise KsftSkipEx('device does not support native-XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_mb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + try: + cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) + except Exception as e: + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + raise KsftSkipEx('device does not support native-multi-buffer XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_offload_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + try: + cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) + except Exception as e: + raise KsftSkipEx('device does not support offloaded XDP') + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + + if no_sleep != True: + time.sleep(10) + +def get_interface_info(cfg) -> None: + global remote_ifname + global no_sleep + + remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_v4} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout + remote_ifname = remote_info.rstrip('\n') + if remote_ifname == "": + raise KsftFailEx('Can not get remote interface') + local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": + no_sleep=True + if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth": + no_sleep=True + +def set_interface_init(cfg) -> None: + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + +def test_default(cfg, netnl) -> None: + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_generic_sb(cfg, netnl) -> None: + _set_xdp_generic_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpgeneric off" % cfg.ifname) + +def test_xdp_generic_mb(cfg, netnl) -> None: + _set_xdp_generic_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpgeneric off" % cfg.ifname) + +def test_xdp_native_sb(cfg, netnl) -> None: + _set_xdp_native_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdp off" % cfg.ifname) + +def test_xdp_native_mb(cfg, netnl) -> None: + _set_xdp_native_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdp off" % cfg.ifname) + +def test_xdp_offload(cfg, netnl) -> None: + _set_xdp_offload_on(cfg) + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpoffload off" % cfg.ifname) def main() -> None: with NetDrvEpEnv(__file__) as cfg: - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + get_interface_info(cfg) + set_interface_init(cfg) + ksft_run([test_default, + test_xdp_generic_sb, + test_xdp_generic_mb, + test_xdp_native_sb, + test_xdp_native_mb, + test_xdp_offload], + args=(cfg, EthtoolFamily())) + set_interface_init(cfg) ksft_exit() diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c index d988b2e0cee84..e73fab3edd9f7 100644 --- a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx) return XDP_PASS; } +SEC("xdp.frags") +int xdp_dummy_prog_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; -- GitLab From 77b2ab31fc65c595ca0a339f6c5b8ef3adfae5c6 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 7 Mar 2025 15:47:31 +0000 Subject: [PATCH 1463/1585] MAINTAINERS: sfc: remove Martin Habets Martin has left AMD and no longer works on the sfc driver. Signed-off-by: Edward Cree Link: https://patch.msgid.link/20250307154731.211368-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ca11a553d4121..07b2d3c7ae5d7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21502,7 +21502,6 @@ F: include/linux/slimbus.h SFC NETWORK DRIVER M: Edward Cree -M: Martin Habets L: netdev@vger.kernel.org L: linux-net-drivers@amd.com S: Maintained -- GitLab From 12d8f318347b1d4feac48e8ac351d3786af39599 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 7 Mar 2025 20:31:52 +0200 Subject: [PATCH 1464/1585] drm/dp_mst: Fix locking when skipping CSN before topology probing The handling of the MST Connection Status Notify message is skipped if the probing of the topology is still pending. Acquiring the drm_dp_mst_topology_mgr::probe_lock for this in drm_dp_mst_handle_up_req() is problematic: the task/work this function is called from is also responsible for handling MST down-request replies (in drm_dp_mst_handle_down_rep()). Thus drm_dp_mst_link_probe_work() - holding already probe_lock - could be blocked waiting for an MST down-request reply while drm_dp_mst_handle_up_req() is waiting for probe_lock while processing a CSN message. This leads to the probe work's down-request message timing out. A scenario similar to the above leading to a down-request timeout is handling a CSN message in drm_dp_mst_handle_conn_stat(), holding the probe_lock and sending down-request messages while a second CSN message sent by the sink subsequently is handled by drm_dp_mst_handle_up_req(). Fix the above by moving the logic to skip the CSN handling to drm_dp_mst_process_up_req(). This function is called from a work (separate from the task/work handling new up/down messages), already holding probe_lock. This solves the above timeout issue, since handling of down-request replies won't be blocked by probe_lock. Fixes: ddf983488c3e ("drm/dp_mst: Skip CSN if topology probing is not done yet") Cc: Wayne Lin Cc: Lyude Paul Cc: stable@vger.kernel.org # v6.6+ Reviewed-by: Wayne Lin Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20250307183152.3822170-1-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 06c91c5b7f7c8..6d09bef671da0 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -4025,6 +4025,22 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr) return 0; } +static bool primary_mstb_probing_is_done(struct drm_dp_mst_topology_mgr *mgr) +{ + bool probing_done = false; + + mutex_lock(&mgr->lock); + + if (mgr->mst_primary && drm_dp_mst_topology_try_get_mstb(mgr->mst_primary)) { + probing_done = mgr->mst_primary->link_address_sent; + drm_dp_mst_topology_put_mstb(mgr->mst_primary); + } + + mutex_unlock(&mgr->lock); + + return probing_done; +} + static inline bool drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_pending_up_req *up_req) @@ -4055,8 +4071,12 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, /* TODO: Add missing handler for DP_RESOURCE_STATUS_NOTIFY events */ if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) { - dowork = drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat); - hotplug = true; + if (!primary_mstb_probing_is_done(mgr)) { + drm_dbg_kms(mgr->dev, "Got CSN before finish topology probing. Skip it.\n"); + } else { + dowork = drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat); + hotplug = true; + } } drm_dp_mst_topology_put_mstb(mstb); @@ -4138,10 +4158,11 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) drm_dp_send_up_ack_reply(mgr, mst_primary, up_req->msg.req_type, false); + drm_dp_mst_topology_put_mstb(mst_primary); + if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { const struct drm_dp_connection_status_notify *conn_stat = &up_req->msg.u.conn_stat; - bool handle_csn; drm_dbg_kms(mgr->dev, "Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", conn_stat->port_number, @@ -4150,16 +4171,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) conn_stat->message_capability_status, conn_stat->input_port, conn_stat->peer_device_type); - - mutex_lock(&mgr->probe_lock); - handle_csn = mst_primary->link_address_sent; - mutex_unlock(&mgr->probe_lock); - - if (!handle_csn) { - drm_dbg_kms(mgr->dev, "Got CSN before finish topology probing. Skip it."); - kfree(up_req); - goto out_put_primary; - } } else if (up_req->msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { const struct drm_dp_resource_status_notify *res_stat = &up_req->msg.u.resource_stat; @@ -4174,9 +4185,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) list_add_tail(&up_req->next, &mgr->up_req_list); mutex_unlock(&mgr->up_req_lock); queue_work(system_long_wq, &mgr->up_req_work); - -out_put_primary: - drm_dp_mst_topology_put_mstb(mst_primary); out_clear_reply: reset_msg_rx_state(&mgr->up_req_recv); return ret; -- GitLab From 62531a1effa87bdab12d5104015af72e60d926ff Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 5 Mar 2025 14:15:09 +0200 Subject: [PATCH 1465/1585] net: switchdev: Convert blocking notification chain to a raw one A blocking notification chain uses a read-write semaphore to protect the integrity of the chain. The semaphore is acquired for writing when adding / removing notifiers to / from the chain and acquired for reading when traversing the chain and informing notifiers about an event. In case of the blocking switchdev notification chain, recursive notifications are possible which leads to the semaphore being acquired twice for reading and to lockdep warnings being generated [1]. Specifically, this can happen when the bridge driver processes a SWITCHDEV_BRPORT_UNOFFLOADED event which causes it to emit notifications about deferred events when calling switchdev_deferred_process(). Fix this by converting the notification chain to a raw notification chain in a similar fashion to the netdev notification chain. Protect the chain using the RTNL mutex by acquiring it when modifying the chain. Events are always informed under the RTNL mutex, but add an assertion in call_switchdev_blocking_notifiers() to make sure this is not violated in the future. Maintain the "blocking" prefix as events are always emitted from process context and listeners are allowed to block. [1]: WARNING: possible recursive locking detected 6.14.0-rc4-custom-g079270089484 #1 Not tainted -------------------------------------------- ip/52731 is trying to acquire lock: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 but task is already holding lock: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock((switchdev_blocking_notif_chain).rwsem); lock((switchdev_blocking_notif_chain).rwsem); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ip/52731: #0: ffffffff84f795b0 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x727/0x1dc0 #1: ffffffff8731f628 (&net->rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x790/0x1dc0 #2: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 stack backtrace: ... ? __pfx_down_read+0x10/0x10 ? __pfx_mark_lock+0x10/0x10 ? __pfx_switchdev_port_attr_set_deferred+0x10/0x10 blocking_notifier_call_chain+0x58/0xa0 switchdev_port_attr_notify.constprop.0+0xb3/0x1b0 ? __pfx_switchdev_port_attr_notify.constprop.0+0x10/0x10 ? mark_held_locks+0x94/0xe0 ? switchdev_deferred_process+0x11a/0x340 switchdev_port_attr_set_deferred+0x27/0xd0 switchdev_deferred_process+0x164/0x340 br_switchdev_port_unoffload+0xc8/0x100 [bridge] br_switchdev_blocking_event+0x29f/0x580 [bridge] notifier_call_chain+0xa2/0x440 blocking_notifier_call_chain+0x6e/0xa0 switchdev_bridge_port_unoffload+0xde/0x1a0 ... Fixes: f7a70d650b0b6 ("net: bridge: switchdev: Ensure deferred event delivery on unoffload") Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Reviewed-by: Simon Horman Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://patch.msgid.link/20250305121509.631207-1-amcohen@nvidia.com Signed-off-by: Paolo Abeni --- net/switchdev/switchdev.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 6488ead9e4645..4d5fbacef496f 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -472,7 +472,7 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev, EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); -static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); +static RAW_NOTIFIER_HEAD(switchdev_blocking_notif_chain); /** * register_switchdev_notifier - Register notifier @@ -518,17 +518,27 @@ EXPORT_SYMBOL_GPL(call_switchdev_notifiers); int register_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; + + rtnl_lock(); + err = raw_notifier_chain_register(chain, nb); + rtnl_unlock(); - return blocking_notifier_chain_register(chain, nb); + return err; } EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier); int unregister_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; - return blocking_notifier_chain_unregister(chain, nb); + rtnl_lock(); + err = raw_notifier_chain_unregister(chain, nb); + rtnl_unlock(); + + return err; } EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier); @@ -536,10 +546,11 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info, struct netlink_ext_ack *extack) { + ASSERT_RTNL(); info->dev = dev; info->extack = extack; - return blocking_notifier_call_chain(&switchdev_blocking_notif_chain, - val, info); + return raw_notifier_call_chain(&switchdev_blocking_notif_chain, + val, info); } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); -- GitLab From d4234d131b0a3f9e65973f1cdc71bb3560f5d14b Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Tue, 4 Mar 2025 15:27:00 +0800 Subject: [PATCH 1466/1585] arm64: mm: Populate vmemmap at the page level if not section aligned On the arm64 platform with 4K base page config, SECTION_SIZE_BITS is set to 27, making one section 128M. The related page struct which vmemmap points to is 2M then. Commit c1cc1552616d ("arm64: MMU initialisation") optimizes the vmemmap to populate at the PMD section level which was suitable initially since hot plug granule is always one section(128M). However, commit ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug") introduced a 2M(SUBSECTION_SIZE) hot plug granule, which disrupted the existing arm64 assumptions. The first problem is that if start or end is not aligned to a section boundary, such as when a subsection is hot added, populating the entire section is wasteful. The next problem is if we hotplug something that spans part of 128 MiB section (subsections, let's call it memblock1), and then hotplug something that spans another part of a 128 MiB section(subsections, let's call it memblock2), and subsequently unplug memblock1, vmemmap_free() will clear the entire PMD entry which also supports memblock2 even though memblock2 is still active. Assuming hotplug/unplug sizes are guaranteed to be symmetric. Do the fix similar to x86-64: populate to pages levels if start/end is not aligned with section boundary. Cc: stable@vger.kernel.org # v5.4+ Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug") Acked-by: David Hildenbrand Signed-off-by: Zhenhua Huang Reviewed-by: Oscar Salvador Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20250304072700.3405036-1-quic_zhenhuah@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b4df5bc5b1b8b..1dfe1a8efdbe4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1177,8 +1177,11 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); + /* [start, end] should be within one section */ + WARN_ON_ONCE(end - start > PAGES_PER_SECTION * sizeof(struct page)); - if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES)) + if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES) || + (end - start < PAGES_PER_SECTION * sizeof(struct page))) return vmemmap_populate_basepages(start, end, node, altmap); else return vmemmap_populate_hugepages(start, end, node, altmap); -- GitLab From f7edb07ad7c66eab3dce57384f33b9799d579133 Mon Sep 17 00:00:00 2001 From: Piotr Jaroszynski Date: Tue, 4 Mar 2025 00:51:27 -0800 Subject: [PATCH 1467/1585] Fix mmu notifiers for range-based invalidates Update the __flush_tlb_range_op macro not to modify its parameters as these are unexepcted semantics. In practice, this fixes the call to mmu_notifier_arch_invalidate_secondary_tlbs() in __flush_tlb_range_nosync() to use the correct range instead of an empty range with start=end. The empty range was (un)lucky as it results in taking the invalidate-all path that doesn't cause correctness issues, but can certainly result in suboptimal perf. This has been broken since commit 6bbd42e2df8f ("mmu_notifiers: call invalidate_range() when invalidating TLBs") when the call to the notifiers was added to __flush_tlb_range(). It predates the addition of the __flush_tlb_range_op() macro from commit 360839027a6e ("arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range") that made the bug hard to spot. Fixes: 6bbd42e2df8f ("mmu_notifiers: call invalidate_range() when invalidating TLBs") Signed-off-by: Piotr Jaroszynski Cc: Catalin Marinas Cc: Will Deacon Cc: Robin Murphy Cc: Alistair Popple Cc: Raghavendra Rao Ananta Cc: SeongJae Park Cc: Jason Gunthorpe Cc: John Hubbard Cc: Nicolin Chen Cc: linux-arm-kernel@lists.infradead.org Cc: iommu@lists.linux.dev Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Reviewed-by: Catalin Marinas Reviewed-by: Alistair Popple Link: https://lore.kernel.org/r/20250304085127.2238030-1-pjaroszynski@nvidia.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bc94e036a26b9..8104aee4f9a08 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -396,33 +396,35 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user, lpa2) \ do { \ + typeof(start) __flush_start = start; \ + typeof(pages) __flush_pages = pages; \ int num = 0; \ int scale = 3; \ int shift = lpa2 ? 16 : PAGE_SHIFT; \ unsigned long addr; \ \ - while (pages > 0) { \ + while (__flush_pages > 0) { \ if (!system_supports_tlb_range() || \ - pages == 1 || \ - (lpa2 && start != ALIGN(start, SZ_64K))) { \ - addr = __TLBI_VADDR(start, asid); \ + __flush_pages == 1 || \ + (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \ + addr = __TLBI_VADDR(__flush_start, asid); \ __tlbi_level(op, addr, tlb_level); \ if (tlbi_user) \ __tlbi_user_level(op, addr, tlb_level); \ - start += stride; \ - pages -= stride >> PAGE_SHIFT; \ + __flush_start += stride; \ + __flush_pages -= stride >> PAGE_SHIFT; \ continue; \ } \ \ - num = __TLBI_RANGE_NUM(pages, scale); \ + num = __TLBI_RANGE_NUM(__flush_pages, scale); \ if (num >= 0) { \ - addr = __TLBI_VADDR_RANGE(start >> shift, asid, \ + addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \ scale, num, tlb_level); \ __tlbi(r##op, addr); \ if (tlbi_user) \ __tlbi_user(r##op, addr); \ - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ - pages -= __TLBI_RANGE_PAGES(num, scale); \ + __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\ } \ scale--; \ } \ -- GitLab From 986a6f5eacb900ea0f6036ef724b26e76be40f65 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Mar 2025 15:25:31 -0700 Subject: [PATCH 1468/1585] vboxsf: Add __nonstring annotations for unterminated strings When a character array without a terminating NUL character has a static initializer, GCC 15's -Wunterminated-string-initialization will only warn if the array lacks the "nonstring" attribute[1]. Mark the arrays with __nonstring to and correctly identify the char array as "not a C string" and thereby eliminate the warning. This effectively reverts the change in 4e7487245abc ("vboxsf: fix building with GCC 15"), to add the annotation that has other uses (i.e. warning if the string is ever used with C string APIs). Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117178 [1] Cc: Hans de Goede Cc: Brahmajit Das Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20250310222530.work.374-kees@kernel.org Reviewed-by: Hans de Goede Signed-off-by: Christian Brauner --- fs/vboxsf/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 1d94bb7841081..0bc96ab6580b3 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -21,8 +21,7 @@ #define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ -static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376', - '\375' }; +static const unsigned char VBSF_MOUNT_SIGNATURE[4] __nonstring = "\000\377\376\375"; static int follow_symlinks; module_param(follow_symlinks, int, 0444); -- GitLab From f5d83cf0eeb90fade4d5c4d17d24b8bee9ceeecc Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 6 Mar 2025 10:32:45 +0800 Subject: [PATCH 1469/1585] net: mctp: unshare packets when reassembling Ensure that the frag_list used for reassembly isn't shared with other packets. This avoids incorrect reassembly when packets are cloned, and prevents a memory leak due to circular references between fragments and their skb_shared_info. The upcoming MCTP-over-USB driver uses skb_clone which can trigger the problem - other MCTP drivers don't share SKBs. A kunit test is added to reproduce the issue. Signed-off-by: Matt Johnston Fixes: 4a992bbd3650 ("mctp: Implement message fragmentation & reassembly") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306-matt-mctp-usb-v1-1-085502b3dd28@codeconstruct.com.au Signed-off-by: Paolo Abeni --- net/mctp/route.c | 10 +++- net/mctp/test/route-test.c | 109 +++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 2 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 3f2bd65ff5e3c..4c460160914f0 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -332,8 +332,14 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) & MCTP_HDR_SEQ_MASK; if (!key->reasm_head) { - key->reasm_head = skb; - key->reasm_tailp = &(skb_shinfo(skb)->frag_list); + /* Since we're manipulating the shared frag_list, ensure it isn't + * shared with any other SKBs. + */ + key->reasm_head = skb_unshare(skb, GFP_ATOMIC); + if (!key->reasm_head) + return -ENOMEM; + + key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list); key->last_seq = this_seq; return 0; } diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 17165b86ce22d..06c1897b685a8 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -921,6 +921,114 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test) __mctp_route_test_fini(test, dev, rt, sock); } +/* Input route to socket, using a fragmented message created from clones. + */ +static void mctp_test_route_input_cloned_frag(struct kunit *test) +{ + /* 5 packet fragments, forming 2 complete messages */ + const struct mctp_hdr hdrs[5] = { + RX_FRAG(FL_S, 0), + RX_FRAG(0, 1), + RX_FRAG(FL_E, 2), + RX_FRAG(FL_S, 0), + RX_FRAG(FL_E, 1), + }; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skb[5]; + struct sk_buff *rx_skb; + struct socket *sock; + size_t data_len; + u8 compare[100]; + u8 flat[100]; + size_t total; + void *p; + int rc; + + /* Arbitrary length */ + data_len = 3; + total = data_len + sizeof(struct mctp_hdr); + + __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + + /* Create a single skb initially with concatenated packets */ + skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total); + mctp_test_skb_set_dev(skb[0], dev); + memset(skb[0]->data, 0 * 0x11, skb[0]->len); + memcpy(skb[0]->data, &hdrs[0], sizeof(struct mctp_hdr)); + + /* Extract and populate packets */ + for (int i = 1; i < 5; i++) { + skb[i] = skb_clone(skb[i - 1], GFP_ATOMIC); + KUNIT_ASSERT_TRUE(test, skb[i]); + p = skb_pull(skb[i], total); + KUNIT_ASSERT_TRUE(test, p); + skb_reset_network_header(skb[i]); + memcpy(skb[i]->data, &hdrs[i], sizeof(struct mctp_hdr)); + memset(&skb[i]->data[sizeof(struct mctp_hdr)], i * 0x11, data_len); + } + for (int i = 0; i < 5; i++) + skb_trim(skb[i], total); + + /* SOM packets have a type byte to match the socket */ + skb[0]->data[4] = 0; + skb[3]->data[4] = 0; + + skb_dump("pkt1 ", skb[0], false); + skb_dump("pkt2 ", skb[1], false); + skb_dump("pkt3 ", skb[2], false); + skb_dump("pkt4 ", skb[3], false); + skb_dump("pkt5 ", skb[4], false); + + for (int i = 0; i < 5; i++) { + KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); + /* Take a reference so we can check refcounts at the end */ + skb_get(skb[i]); + } + + /* Feed the fragments into MCTP core */ + for (int i = 0; i < 5; i++) { + rc = mctp_route_input(&rt->rt, skb[i]); + KUNIT_EXPECT_EQ(test, rc, 0); + } + + /* Receive first reassembled message */ + rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, rx_skb->len, 3 * data_len); + rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); + for (int i = 0; i < rx_skb->len; i++) + compare[i] = (i / data_len) * 0x11; + /* Set type byte */ + compare[0] = 0; + + KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); + KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); + kfree_skb(rx_skb); + + /* Receive second reassembled message */ + rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, rx_skb->len, 2 * data_len); + rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); + for (int i = 0; i < rx_skb->len; i++) + compare[i] = (i / data_len + 3) * 0x11; + /* Set type byte */ + compare[0] = 0; + + KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); + KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); + kfree_skb(rx_skb); + + /* Check input skb refcounts */ + for (int i = 0; i < 5; i++) { + KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); + kfree_skb(skb[i]); + } + + __mctp_route_test_fini(test, dev, rt, sock); +} + #if IS_ENABLED(CONFIG_MCTP_FLOWS) static void mctp_test_flow_init(struct kunit *test, @@ -1144,6 +1252,7 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE(mctp_test_packet_flow), KUNIT_CASE(mctp_test_fragment_flow), KUNIT_CASE(mctp_test_route_output_key_create), + KUNIT_CASE(mctp_test_route_input_cloned_frag), {} }; -- GitLab From 0c5e145a350de3b38cd5ae77a401b12c46fb7c1d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 6 Mar 2025 02:39:22 +0000 Subject: [PATCH 1470/1585] bonding: fix incorrect MAC address setting to receive NS messages When validation on the backup slave is enabled, we need to validate the Neighbor Solicitation (NS) messages received on the backup slave. To receive these messages, the correct destination MAC address must be added to the slave. However, the target in bonding is a unicast address, which we cannot use directly. Instead, we should first convert it to a Solicited-Node Multicast Address and then derive the corresponding MAC address. Fix the incorrect MAC address setting on both slave_set_ns_maddr() and slave_set_ns_maddrs(). Since the two function names are similar. Add some description for the functions. Also only use one mac_addr variable in slave_set_ns_maddr() to save some code and logic. Fixes: 8eb36164d1a6 ("bonding: add ns target multicast address to slave device") Acked-by: Jay Vosburgh Reviewed-by: Nikolay Aleksandrov Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306023923.38777-2-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_options.c | 55 +++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 327b6ecdc77e0..d1b095af253bd 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1242,10 +1242,28 @@ static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *sla slave->dev->flags & IFF_MULTICAST; } +/** + * slave_set_ns_maddrs - add/del all NS mac addresses for slave + * @bond: bond device + * @slave: slave device + * @add: add or remove all the NS mac addresses + * + * This function tries to add or delete all the NS mac addresses on the slave + * + * Note, the IPv6 NS target address is the unicast address in Neighbor + * Solicitation (NS) message. The dest address of NS message should be + * solicited-node multicast address of the target. The dest mac of NS message + * is converted from the solicited-node multicast address. + * + * This function is called when + * * arp_validate changes + * * enslaving, releasing new slaves + */ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add) { struct in6_addr *targets = bond->params.ns_targets; char slot_maddr[MAX_ADDR_LEN]; + struct in6_addr mcaddr; int i; if (!slave_can_set_ns_maddr(bond, slave)) @@ -1255,7 +1273,8 @@ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool if (ipv6_addr_any(&targets[i])) break; - if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) { + addrconf_addr_solict_mult(&targets[i], &mcaddr); + if (!ndisc_mc_map(&mcaddr, slot_maddr, slave->dev, 0)) { if (add) dev_mc_add(slave->dev, slot_maddr); else @@ -1278,23 +1297,43 @@ void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave) slave_set_ns_maddrs(bond, slave, false); } +/** + * slave_set_ns_maddr - set new NS mac address for slave + * @bond: bond device + * @slave: slave device + * @target: the new IPv6 target + * @slot: the old IPv6 target in the slot + * + * This function tries to replace the old mac address to new one on the slave. + * + * Note, the target/slot IPv6 address is the unicast address in Neighbor + * Solicitation (NS) message. The dest address of NS message should be + * solicited-node multicast address of the target. The dest mac of NS message + * is converted from the solicited-node multicast address. + * + * This function is called when + * * An IPv6 NS target is added or removed. + */ static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave, struct in6_addr *target, struct in6_addr *slot) { - char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN]; + char mac_addr[MAX_ADDR_LEN]; + struct in6_addr mcast_addr; if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave)) return; - /* remove the previous maddr from slave */ + /* remove the previous mac addr from slave */ + addrconf_addr_solict_mult(slot, &mcast_addr); if (!ipv6_addr_any(slot) && - !ndisc_mc_map(slot, slot_maddr, slave->dev, 0)) - dev_mc_del(slave->dev, slot_maddr); + !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) + dev_mc_del(slave->dev, mac_addr); - /* add new maddr on slave if target is set */ + /* add new mac addr on slave if target is set */ + addrconf_addr_solict_mult(target, &mcast_addr); if (!ipv6_addr_any(target) && - !ndisc_mc_map(target, target_maddr, slave->dev, 0)) - dev_mc_add(slave->dev, target_maddr); + !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) + dev_mc_add(slave->dev, mac_addr); } static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot, -- GitLab From 9318dc2357b6b8b2ea1200ab7f2d5877851b7382 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 6 Mar 2025 02:39:23 +0000 Subject: [PATCH 1471/1585] selftests: bonding: fix incorrect mac address The correct mac address for NS target 2001:db8::254 is 33:33:ff:00:02:54, not 33:33:00:00:02:54. The same with client maddress. Fixes: 86fb6173d11e ("selftests: bonding: add ns multicast group testing") Acked-by: Jay Vosburgh Reviewed-by: Nikolay Aleksandrov Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306023923.38777-3-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/bonding/bond_options.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index edc56e2cc6069..7bc148889ca72 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -11,8 +11,8 @@ ALL_TESTS=" lib_dir=$(dirname "$0") source ${lib_dir}/bond_topo_3d1c.sh -c_maddr="33:33:00:00:00:10" -g_maddr="33:33:00:00:02:54" +c_maddr="33:33:ff:00:00:10" +g_maddr="33:33:ff:00:02:54" skip_prio() { -- GitLab From 415f135ace7fd824cde083184a922e39156055b5 Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Thu, 6 Mar 2025 15:05:10 +0800 Subject: [PATCH 1472/1585] rtase: Fix improper release of ring list entries in rtase_sw_reset Since rtase_init_ring, which is called within rtase_sw_reset, adds ring entries already present in the ring list back into the list, it causes the ring list to form a cycle. This results in list_for_each_entry_safe failing to find an endpoint during traversal, leading to an error. Therefore, it is necessary to remove the previously added ring_list nodes before calling rtase_init_ring. Fixes: 079600489960 ("rtase: Implement net_device_ops") Signed-off-by: Justin Lai Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306070510.18129-1-justinlai0215@realtek.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 3bd11cb56294c..2aacc1996796d 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1501,7 +1501,10 @@ static void rtase_wait_for_quiescence(const struct net_device *dev) static void rtase_sw_reset(struct net_device *dev) { struct rtase_private *tp = netdev_priv(dev); + struct rtase_ring *ring, *tmp; + struct rtase_int_vector *ivec; int ret; + u32 i; netif_stop_queue(dev); netif_carrier_off(dev); @@ -1512,6 +1515,13 @@ static void rtase_sw_reset(struct net_device *dev) rtase_tx_clear(tp); rtase_rx_clear(tp); + for (i = 0; i < tp->int_nums; i++) { + ivec = &tp->int_vector[i]; + list_for_each_entry_safe(ring, tmp, &ivec->ring_list, + ring_entry) + list_del(&ring->ring_entry); + } + ret = rtase_init_ring(dev); if (ret) { netdev_err(dev, "unable to init ring\n"); -- GitLab From 3a04334d6282d08fbdd6201e374db17d31927ba3 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Sat, 8 Mar 2025 00:58:27 +0800 Subject: [PATCH 1473/1585] bcachefs: Fix b->written overflow When bset past end of btree node, we should not add sectors to b->written, which will overflow b->written. Reported-by: syzbot+3cb3d9e8c3f197754825@syzkaller.appspotmail.com Tested-by: syzbot+3cb3d9e8c3f197754825@syzkaller.appspotmail.com Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index dece27d9db04e..756736f9243d7 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1186,7 +1186,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, le64_to_cpu(i->journal_seq), b->written, b->written + sectors, ptr_written); - b->written += sectors; + b->written = min(b->written + sectors, btree_sectors(c)); if (blacklisted && !first) continue; -- GitLab From e5c2bcc0cd47321d78bb4e865d7857304139f95d Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 11 Mar 2025 19:43:58 +0900 Subject: [PATCH 1474/1585] nvme: move error logging from nvme_end_req() to __nvme_end_req() Before the Commit 1f47ed294a2b ("block: cleanup and fix batch completion adding conditions"), blk_mq_add_to_batch() did not add failed passthrough requests to batch, and returned false. After the commit, blk_mq_add_to_batch() always adds passthrough requests to batch regardless of whether the request failed or not, and returns true. This affected error logging feature in the NVME driver. Before the commit, the call chain of failed passthrough request was as follows: nvme_handle_cqe() blk_mq_add_to_batch() .. false is returned, then call nvme_pci_complete_rq() nvme_pci_complete_rq() nvme_complete_rq() nvme_end_req() nvme_log_err_passthru() .. error logging __nvme_end_req() .. end of the rqeuest After the commit, the call chain is as follows: nvme_handle_cqe() blk_mq_add_to_batch() .. true is returned, then set nvme_pci_complete_batch() .. nvme_pci_complete_batch() nvme_complete_batch() nvme_complete_batch_req() __nvme_end_req() .. end of the request, without error logging To make the error logging feature work again for passthrough requests, move the nvme_log_err_passthru() call from nvme_end_req() to __nvme_end_req(). While at it, move nvme_log_error() call for non-passthrough requests together with nvme_log_err_passthru(). Even though the trigger commit does not affect non-passthrough requests, move it together for code simplicity. Fixes: 1f47ed294a2b ("block: cleanup and fix batch completion adding conditions") Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250311104359.1767728-2-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f028913e2e622..8359d0aa0e44b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -431,6 +431,12 @@ static inline void nvme_end_req_zoned(struct request *req) static inline void __nvme_end_req(struct request *req) { + if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) { + if (blk_rq_is_passthrough(req)) + nvme_log_err_passthru(req); + else + nvme_log_error(req); + } nvme_end_req_zoned(req); nvme_trace_bio_complete(req); if (req->cmd_flags & REQ_NVME_MPATH) @@ -441,12 +447,6 @@ void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) { - if (blk_rq_is_passthrough(req)) - nvme_log_err_passthru(req); - else - nvme_log_error(req); - } __nvme_end_req(req); blk_mq_end_request(req, status); } -- GitLab From ed92bc5264c4357d4fca292c769ea9967cd3d3b6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 10 Mar 2025 18:45:36 +0100 Subject: [PATCH 1475/1585] ASoC: codecs: wm0010: Fix error handling path in wm0010_spi_probe() Free some resources in the error handling path of the probe, as already done in the remove function. Fixes: e3523e01869d ("ASoC: wm0010: Add initial wm0010 DSP driver") Fixes: fd8b96574456 ("ASoC: wm0010: Clear IRQ as wake source and include missing header") Signed-off-by: Christophe JAILLET Reviewed-by: Charles Keepax Link: https://patch.msgid.link/5139ba1ab8c4c157ce04e56096a0f54a1683195c.1741549792.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- sound/soc/codecs/wm0010.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c index edd2cb185c42c..9e67fbfc2ccaf 100644 --- a/sound/soc/codecs/wm0010.c +++ b/sound/soc/codecs/wm0010.c @@ -920,7 +920,7 @@ static int wm0010_spi_probe(struct spi_device *spi) if (ret) { dev_err(wm0010->dev, "Failed to set IRQ %d as wake source: %d\n", irq, ret); - return ret; + goto free_irq; } if (spi->max_speed_hz) @@ -932,9 +932,18 @@ static int wm0010_spi_probe(struct spi_device *spi) &soc_component_dev_wm0010, wm0010_dai, ARRAY_SIZE(wm0010_dai)); if (ret < 0) - return ret; + goto disable_irq_wake; return 0; + +disable_irq_wake: + irq_set_irq_wake(wm0010->irq, 0); + +free_irq: + if (wm0010->irq) + free_irq(wm0010->irq, wm0010); + + return ret; } static void wm0010_spi_remove(struct spi_device *spi) -- GitLab From d2b9d97e89c79c95f8b517e4fa43fd100f936acc Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Fri, 7 Mar 2025 17:49:52 +0800 Subject: [PATCH 1476/1585] qlcnic: fix memory leak issues in qlcnic_sriov_common.c Add qlcnic_sriov_free_vlans() in qlcnic_sriov_alloc_vlans() if any sriov_vlans fails to be allocated. Add qlcnic_sriov_free_vlans() to free the memory allocated by qlcnic_sriov_alloc_vlans() if "sriov->allowed_vlans" fails to be allocated. Fixes: 91b7282b613d ("qlcnic: Support VLAN id config.") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Link: https://patch.msgid.link/20250307094952.14874-1-haoxiang_li2024@163.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index f9dd50152b1e3..28d24d59efb84 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -454,8 +454,10 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, num_vlans = sriov->num_allowed_vlans; sriov->allowed_vlans = kcalloc(num_vlans, sizeof(u16), GFP_KERNEL); - if (!sriov->allowed_vlans) + if (!sriov->allowed_vlans) { + qlcnic_sriov_free_vlans(adapter); return -ENOMEM; + } vlans = (u16 *)&cmd->rsp.arg[3]; for (i = 0; i < num_vlans; i++) @@ -2167,8 +2169,10 @@ int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) vf = &sriov->vf_info[i]; vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans, sizeof(*vf->sriov_vlans), GFP_KERNEL); - if (!vf->sriov_vlans) + if (!vf->sriov_vlans) { + qlcnic_sriov_free_vlans(adapter); return -ENOMEM; + } } return 0; -- GitLab From 58517f4df8424ec28dfe7290ccc61908eda57aae Mon Sep 17 00:00:00 2001 From: Roxana Nicolescu Date: Tue, 11 Mar 2025 15:06:10 +0000 Subject: [PATCH 1477/1585] bcachefs: Initialize from_inode members for bch_io_opts When there is no inode source, all "from_inode" members in the structure bhc_io_opts should be set false. Fixes: 7a7c43a0c1ecf ("bcachefs: Add bch_io_opts fields for indicating whether the opts came from the inode") Reported-by: syzbot+c17ad4b4367b72a853cb@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c17ad4b4367b72a853cb Signed-off-by: Roxana Nicolescu Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 04ec05206f8cf..339b80770f1dd 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1198,6 +1198,7 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, opts->_name##_from_inode = true; \ } else { \ opts->_name = c->opts._name; \ + opts->_name##_from_inode = false; \ } BCH_INODE_OPTS() #undef x -- GitLab From dbac8feb23382af1efa2e1a86049e079b6e42e12 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 11 Mar 2025 10:39:36 -0400 Subject: [PATCH 1478/1585] bcachefs: Make sure trans is unlocked when submitting read IO We were still using the trans after the unlock, leading to this bug in the retry path: 00255 ------------[ cut here ]------------ 00255 kernel BUG at fs/bcachefs/btree_iter.c:3348! 00255 Internal error: Oops - BUG: 00000000f2000800 [#1] SMP 00255 bcachefs (0ca38fe8-0a26-41f9-9b5d-6a27796c7803): /fiotest offset 86048768: no device to read from: 00255 u64s 8 type extent 4098:168192:U32_MAX len 128 ver 0: durability: 0 crc: c_size 128 size 128 offset 0 nonce 0 csum crc32c 0:8040a368 compress none ec: idx 83 block 1 ptr: 0:302:128 gen 0 00255 bcachefs (0ca38fe8-0a26-41f9-9b5d-6a27796c7803): /fiotest offset 85983232: no device to read from: 00255 u64s 8 type extent 4098:168064:U32_MAX len 128 ver 0: durability: 0 crc: c_size 128 size 128 offset 0 nonce 0 csum crc32c 0:43311336 compress none ec: idx 83 block 1 ptr: 0:302:0 gen 0 00255 Modules linked in: 00255 CPU: 5 UID: 0 PID: 304 Comm: kworker/u70:2 Not tainted 6.14.0-rc6-ktest-g526aae23d67d #16040 00255 Hardware name: linux,dummy-virt (DT) 00255 Workqueue: events_unbound bch2_rbio_retry 00255 pstate: 60001005 (nZCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00255 pc : __bch2_trans_get+0x100/0x378 00255 lr : __bch2_trans_get+0xa0/0x378 00255 sp : ffffff80c865b760 00255 x29: ffffff80c865b760 x28: 0000000000000000 x27: ffffff80d76ed880 00255 x26: 0000000000000018 x25: 0000000000000000 x24: ffffff80f4ec3760 00255 x23: ffffff80f4010140 x22: 0000000000000056 x21: ffffff80f4ec0000 00255 x20: ffffff80f4ec3788 x19: ffffff80d75f8000 x18: 00000000ffffffff 00255 x17: 2065707974203820 x16: 7334367520200a3a x15: 0000000000000008 00255 x14: 0000000000000001 x13: 0000000000000100 x12: 0000000000000006 00255 x11: ffffffc080b47a40 x10: 0000000000000000 x9 : ffffffc08038dea8 00255 x8 : ffffff80d75fc018 x7 : 0000000000000000 x6 : 0000000000003788 00255 x5 : 0000000000003760 x4 : ffffff80c922de80 x3 : ffffff80f18f0000 00255 x2 : ffffff80c922de80 x1 : 0000000000000130 x0 : 0000000000000006 00255 Call trace: 00255 __bch2_trans_get+0x100/0x378 (P) 00255 bch2_read_io_err+0x98/0x260 00255 bch2_read_endio+0xb8/0x2d0 00255 __bch2_read_extent+0xce8/0xfe0 00255 __bch2_read+0x2a8/0x978 00255 bch2_rbio_retry+0x188/0x318 00255 process_one_work+0x154/0x390 00255 worker_thread+0x20c/0x3b8 00255 kthread+0xf0/0x1b0 00255 ret_from_fork+0x10/0x20 00255 Code: 6b01001f 54ffff01 79408460 3617fec0 (d4210000) 00255 ---[ end trace 0000000000000000 ]--- 00255 Kernel panic - not syncing: Oops - BUG: Fatal exception 00255 SMP: stopping secondary CPUs 00255 Kernel Offset: disabled 00255 CPU features: 0x000,00000070,00000010,8240500b 00255 Memory Limit: none 00255 ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]--- Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 8c7b2d3d779df..726da68073e2b 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -951,12 +951,6 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, goto retry_pick; } - /* - * Unlock the iterator while the btree node's lock is still in - * cache, before doing the IO: - */ - bch2_trans_unlock(trans); - if (flags & BCH_READ_NODECODE) { /* * can happen if we retry, and the extent we were going to read @@ -1113,6 +1107,15 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, trace_and_count(c, read_split, &orig->bio); } + /* + * Unlock the iterator while the btree node's lock is still in + * cache, before doing the IO: + */ + if (!(flags & BCH_READ_IN_RETRY)) + bch2_trans_unlock(trans); + else + bch2_trans_unlock_long(trans); + if (!rbio->pick.idx) { if (unlikely(!rbio->have_ioref)) { struct printbuf buf = PRINTBUF; @@ -1160,6 +1163,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, if (likely(!(flags & BCH_READ_IN_RETRY))) { return 0; } else { + bch2_trans_unlock(trans); + int ret; rbio->context = RBIO_CONTEXT_UNBOUND; -- GitLab From 5b1122fc4995f308b21d7cfc64ef9880ac834d20 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Mar 2025 22:48:29 +0300 Subject: [PATCH 1479/1585] platform/x86/amd/pmf: fix cleanup in amd_pmf_init_smart_pc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a few problems in this code: First, if amd_pmf_tee_init() fails then the function returns directly instead of cleaning up. We cannot simply do a "goto error;" because the amd_pmf_tee_init() cleanup calls tee_shm_free(dev->fw_shm_pool); and amd_pmf_tee_deinit() calls it as well leading to a double free. I have re-written this code to use an unwind ladder to free the allocations. Second, if amd_pmf_start_policy_engine() fails on every iteration though the loop then the code calls amd_pmf_tee_deinit() twice which is also a double free. Call amd_pmf_tee_deinit() inside the loop for each failed iteration. Also on that path the error codes are not necessarily negative kernel error codes. Set the error code to -EINVAL. There is a very subtle third bug which is that if the call to input_register_device() in amd_pmf_register_input_device() fails then we call input_unregister_device() on an input device that wasn't registered. This will lead to a reference counting underflow because of the device_del(&dev->dev) in __input_unregister_device(). It's unlikely that anyone would ever hit this bug in real life. Fixes: 376a8c2a1443 ("platform/x86/amd/pmf: Update PMF Driver for Compatibility with new PMF-TA") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/232231fc-6a71-495e-971b-be2a76f6db4c@stanley.mountain Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/tee-if.c | 36 +++++++++++++++++++-------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index ceaff1ebb7b93..a1e43873a07b0 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -510,18 +510,18 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) ret = amd_pmf_set_dram_addr(dev, true); if (ret) - goto error; + goto err_cancel_work; dev->policy_base = devm_ioremap_resource(dev->dev, dev->res); if (IS_ERR(dev->policy_base)) { ret = PTR_ERR(dev->policy_base); - goto error; + goto err_free_dram_buf; } dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL); if (!dev->policy_buf) { ret = -ENOMEM; - goto error; + goto err_free_dram_buf; } memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz); @@ -531,13 +531,13 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); if (!dev->prev_data) { ret = -ENOMEM; - goto error; + goto err_free_policy; } for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) { ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]); if (ret) - return ret; + goto err_free_prev_data; ret = amd_pmf_start_policy_engine(dev); switch (ret) { @@ -550,27 +550,41 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) status = false; break; default: - goto error; + ret = -EINVAL; + amd_pmf_tee_deinit(dev); + goto err_free_prev_data; } if (status) break; } - if (!status && !pb_side_load) - goto error; + if (!status && !pb_side_load) { + ret = -EINVAL; + goto err_free_prev_data; + } if (pb_side_load) amd_pmf_open_pb(dev, dev->dbgfs_dir); ret = amd_pmf_register_input_device(dev); if (ret) - goto error; + goto err_pmf_remove_pb; return 0; -error: - amd_pmf_deinit_smart_pc(dev); +err_pmf_remove_pb: + if (pb_side_load && dev->esbin) + amd_pmf_remove_pb(dev); + amd_pmf_tee_deinit(dev); +err_free_prev_data: + kfree(dev->prev_data); +err_free_policy: + kfree(dev->policy_buf); +err_free_dram_buf: + kfree(dev->buf); +err_cancel_work: + cancel_delayed_work_sync(&dev->pb_work); return ret; } -- GitLab From a05507cef0ee6a0af402c0d7e994115033ff746b Mon Sep 17 00:00:00 2001 From: Lukas Hetzenecker Date: Tue, 11 Mar 2025 00:28:03 +0100 Subject: [PATCH 1480/1585] platform/surface: aggregator_registry: Add Support for Surface Pro 11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Pro 11 (Intel). Like with the Surface Pro 10 already, the node group is compatible, so it can be reused. Signed-off-by: Lukas Hetzenecker Link: https://lore.kernel.org/r/20250310232803.23691-1-lukas@hetzenecker.me Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_aggregator_registry.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index d4f32ad665305..a594d5fcfcfd1 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -371,7 +371,7 @@ static const struct software_node *ssam_node_group_sp8[] = { NULL, }; -/* Devices for Surface Pro 9 (Intel/x86) and 10 */ +/* Devices for Surface Pro 9, 10 and 11 (Intel/x86) */ static const struct software_node *ssam_node_group_sp9[] = { &ssam_node_root, &ssam_node_hub_kip, @@ -430,6 +430,9 @@ static const struct acpi_device_id ssam_platform_hub_acpi_match[] = { /* Surface Pro 10 */ { "MSHW0510", (unsigned long)ssam_node_group_sp9 }, + /* Surface Pro 11 */ + { "MSHW0583", (unsigned long)ssam_node_group_sp9 }, + /* Surface Book 2 */ { "MSHW0107", (unsigned long)ssam_node_group_gen5 }, -- GitLab From a8045e46c508b70fe4b30cc020fd0a2b0709b2e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 6 Mar 2025 13:08:27 -0800 Subject: [PATCH 1481/1585] drm/i915: Increase I915_PARAM_MMAP_GTT_VERSION version to indicate support for partial mmaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 255fc1703e42 ("drm/i915/gem: Calculate object page offset for partial memory mapping") was the last patch of several patches fixing multiple partial mmaps. But without a bump in I915_PARAM_MMAP_GTT_VERSION there is no clean way for UMD to know if it can do multiple partial mmaps. Fixes: 255fc1703e42 ("drm/i915/gem: Calculate object page offset for partial memory mapping") Cc: Andi Shyti Cc: Nirmoy Das Cc: Lionel Landwerlin Signed-off-by: José Roberto de Souza Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20250306210827.171147-1-jose.souza@intel.com (cherry picked from commit bfef148f3680e6b9d28e7fca46d9520f80c5e50e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 21274aa9bdddc..c3dabb8579605 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -164,6 +164,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) * 4 - Support multiple fault handlers per object depending on object's * backing storage (a.k.a. MMAP_OFFSET). * + * 5 - Support multiple partial mmaps(mmap part of BO + unmap a offset, multiple + * times with different size and offset). + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -191,7 +194,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 4; + return 5; } static inline struct i915_gtt_view -- GitLab From 5daa0c35a1f0e7a6c3b8ba9cb721e7d1ace6e619 Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Wed, 8 Jan 2025 23:35:08 +0000 Subject: [PATCH 1482/1585] rust: Disallow BTF generation with Rust + LTO The kernel cannot currently self-parse BTF containing Rust debug information. pahole uses the language of the CU to determine whether to filter out debug information when generating the BTF. When LTO is enabled, Rust code can cross CU boundaries, resulting in Rust debug information in CUs labeled as C. This results in a system which cannot parse its own BTF. Signed-off-by: Matthew Maurer Cc: stable@vger.kernel.org Fixes: c1177979af9c ("btf, scripts: Exclude Rust CUs with pahole") Link: https://lore.kernel.org/r/20250108-rust-btf-lto-incompat-v1-1-60243ff6d820@google.com Signed-off-by: Miguel Ojeda --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index d0d021b3fa3b3..324c2886b2ea3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1973,7 +1973,7 @@ config RUST depends on !MODVERSIONS || GENDWARFKSYMS depends on !GCC_PLUGIN_RANDSTRUCT depends on !RANDSTRUCT - depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE + depends on !DEBUG_INFO_BTF || (PAHOLE_HAS_LANG_EXCLUDE && !LTO) depends on !CFI_CLANG || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC select CFI_ICALL_NORMALIZE_INTEGERS if CFI_CLANG depends on !CALL_PADDING || RUSTC_VERSION >= 108100 -- GitLab From 2e0f91aba507a3cb59f7a12fc3ea2b7d4d6675b7 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 10 Feb 2025 12:03:24 -0500 Subject: [PATCH 1483/1585] scripts: generate_rust_analyzer: add missing macros deps The macros crate has depended on std and proc_macro since its introduction in commit 1fbde52bde73 ("rust: add `macros` crate"). These dependencies were omitted from commit 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") resulting in missing go-to-definition and autocomplete, and false-positive warnings emitted from rust-analyzer such as: [{ "resource": "/Users/tamird/src/linux/rust/macros/module.rs", "owner": "_generated_diagnostic_collection_name_#1", "code": { "value": "non_snake_case", "target": { "$mid": 1, "path": "/rustc/", "scheme": "https", "authority": "doc.rust-lang.org", "query": "search=non_snake_case" } }, "severity": 4, "message": "Variable `None` should have snake_case name, e.g. `none`", "source": "rust-analyzer", "startLineNumber": 123, "startColumn": 17, "endLineNumber": 123, "endColumn": 21 }] Add the missing dependencies to improve the developer experience. [ Fiona had a different approach (thanks!) at: https://lore.kernel.org/rust-for-linux/20241205115438.234221-1-me@kloenk.dev/ But Tamir and Fiona agreed to this one. - Miguel ] Fixes: 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") Reviewed-by: Fiona Behrens Diagnosed-by: Chayim Refael Friedman Link: https://github.com/rust-lang/rust-analyzer/issues/17759#issuecomment-2646328275 Signed-off-by: Tamir Duberstein Tested-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250210-rust-analyzer-macros-core-dep-v3-1-45eb4836f218@gmail.com [ Removed `return`. Changed tag name. Added Link. Slightly reworded. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index aa8ea1a4dbe5f..b40679a90843b 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -57,14 +57,26 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs): crates_indexes[display_name] = len(crates) crates.append(crate) - # First, the ones in `rust/` since they are a bit special. - append_crate( - "core", - sysroot_src / "core" / "src" / "lib.rs", - [], - cfg=crates_cfgs.get("core", []), - is_workspace_member=False, - ) + def append_sysroot_crate( + display_name, + deps, + cfg=[], + ): + append_crate( + display_name, + sysroot_src / display_name / "src" / "lib.rs", + deps, + cfg, + is_workspace_member=False, + ) + + # NB: sysroot crates reexport items from one another so setting up our transitive dependencies + # here is important for ensuring that rust-analyzer can resolve symbols. The sources of truth + # for this dependency graph are `(sysroot_src / crate / "Cargo.toml" for crate in crates)`. + append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", [])) + append_sysroot_crate("alloc", ["core"]) + append_sysroot_crate("std", ["alloc", "core"]) + append_sysroot_crate("proc_macro", ["core", "std"]) append_crate( "compiler_builtins", @@ -75,7 +87,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs): append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", - [], + ["std", "proc_macro"], is_proc_macro=True, ) -- GitLab From d1f928052439cad028438a8b8b34c1f01bc06068 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 10 Feb 2025 13:04:16 -0500 Subject: [PATCH 1484/1585] scripts: generate_rust_analyzer: add missing include_dirs Commit 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") specified OBJTREE for the bindings crate, and `source.include_dirs` for the kernel crate, likely in an attempt to support out-of-source builds for those crates where the generated files reside in `objtree` rather than `srctree`. This was insufficient because both bits of configuration are required for each crate; the result is that rust-analyzer is unable to resolve generated files for either crate in an out-of-source build. [ Originally we were not using `OBJTREE` in the `kernel` crate, but we did pass the variable anyway, so conceptually it could have been there since then. Regarding `include_dirs`, it started in `kernel` before being in mainline because we included the bindings directly there (i.e. there was no `bindings` crate). However, when that crate got created, we moved the `OBJTREE` there but not the `include_dirs`. Nowadays, though, we happen to need the `include_dirs` also in the `kernel` crate for `generated_arch_static_branch_asm.rs` which was not there back then -- Tamir confirms it is indeed required for that reason. - Miguel ] Add the missing bits to improve the developer experience. Fixes: 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") Signed-off-by: Tamir Duberstein Tested-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250210-rust-analyzer-bindings-include-v2-1-23dff845edc3@gmail.com [ Slightly reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index b40679a90843b..f2d6787e9c0c5 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -97,27 +97,27 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs): ["core", "compiler_builtins"], ) - append_crate( - "bindings", - srctree / "rust"/ "bindings" / "lib.rs", - ["core"], - cfg=cfg, - ) - crates[-1]["env"]["OBJTREE"] = str(objtree.resolve(True)) + def append_crate_with_generated( + display_name, + deps, + ): + append_crate( + display_name, + srctree / "rust"/ display_name / "lib.rs", + deps, + cfg=cfg, + ) + crates[-1]["env"]["OBJTREE"] = str(objtree.resolve(True)) + crates[-1]["source"] = { + "include_dirs": [ + str(srctree / "rust" / display_name), + str(objtree / "rust") + ], + "exclude_dirs": [], + } - append_crate( - "kernel", - srctree / "rust" / "kernel" / "lib.rs", - ["core", "macros", "build_error", "bindings"], - cfg=cfg, - ) - crates[-1]["source"] = { - "include_dirs": [ - str(srctree / "rust" / "kernel"), - str(objtree / "rust") - ], - "exclude_dirs": [], - } + append_crate_with_generated("bindings", ["core"]) + append_crate_with_generated("kernel", ["core", "macros", "build_error", "bindings"]) def is_root_crate(build_file, target): try: -- GitLab From a1eb95d6b5f4cf5cc7b081e85e374d1dd98a213b Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 10 Feb 2025 13:04:17 -0500 Subject: [PATCH 1485/1585] scripts: generate_rust_analyzer: add uapi crate Commit 4e1746656839 ("rust: uapi: Add UAPI crate") did not update rust-analyzer to include the new crate. Add the missing definition to improve the developer experience. Fixes: 4e1746656839 ("rust: uapi: Add UAPI crate") Signed-off-by: Tamir Duberstein Tested-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250210-rust-analyzer-bindings-include-v2-2-23dff845edc3@gmail.com [ Slightly reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index f2d6787e9c0c5..adae71544cbd6 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -117,7 +117,8 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs): } append_crate_with_generated("bindings", ["core"]) - append_crate_with_generated("kernel", ["core", "macros", "build_error", "bindings"]) + append_crate_with_generated("uapi", ["core"]) + append_crate_with_generated("kernel", ["core", "macros", "build_error", "bindings", "uapi"]) def is_root_crate(build_file, target): try: -- GitLab From 8ae227f8a7749eec92fc381dfbe213429c852278 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 11 Mar 2025 12:17:04 +0100 Subject: [PATCH 1486/1585] wifi: mac80211: fix MPDU length parsing for EHT 5/6 GHz The MPDU length is only configured using the EHT capabilities element on 2.4 GHz. On 5/6 GHz it is configured using the VHT or HE capabilities respectively. Fixes: cf0079279727 ("wifi: mac80211: parse A-MSDU len from EHT capabilities") Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20250311121704.0634d31f0883.I28063e4d3ef7d296b7e8a1c303460346a30bf09c@changeid Signed-off-by: Johannes Berg --- net/mac80211/eht.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 7a3116c36df9f..fd41046e3b681 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -2,7 +2,7 @@ /* * EHT handling * - * Copyright(c) 2021-2024 Intel Corporation + * Copyright(c) 2021-2025 Intel Corporation */ #include "ieee80211_i.h" @@ -76,6 +76,13 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta); link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + /* + * The MPDU length bits are reserved on all but 2.4 GHz and get set via + * VHT (5 GHz) or HE (6 GHz) capabilities. + */ + if (sband->band != NL80211_BAND_2GHZ) + return; + switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0], IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) { case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454: -- GitLab From 285df995f90e3d61d97f327d34b9659d92313314 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 28 Feb 2025 15:04:20 +0100 Subject: [PATCH 1487/1585] i2c: omap: fix IRQ storms On the GTA04A5 writing a reset command to the gyroscope causes IRQ storms because NACK IRQs are enabled and therefore triggered but not acked. Sending a reset command to the gyroscope by i2cset 1 0x69 0x14 0xb6 with an additional debug print in the ISR (not the thread) itself causes [ 363.353515] i2c i2c-1: ioctl, cmd=0x720, arg=0xbe801b00 [ 363.359039] omap_i2c 48072000.i2c: addr: 0x0069, len: 2, flags: 0x0, stop: 1 [ 363.366180] omap_i2c 48072000.i2c: IRQ LL (ISR = 0x1110) [ 363.371673] omap_i2c 48072000.i2c: IRQ (ISR = 0x0010) [ 363.376892] omap_i2c 48072000.i2c: IRQ LL (ISR = 0x0102) [ 363.382263] omap_i2c 48072000.i2c: IRQ LL (ISR = 0x0102) [ 363.387664] omap_i2c 48072000.i2c: IRQ LL (ISR = 0x0102) repeating till infinity [...] (0x2 = NACK, 0x100 = Bus free, which is not enabled) Apparently no other IRQ bit gets set, so this stalls. Do not ignore enabled interrupts and make sure they are acked. If the NACK IRQ is not needed, it should simply not enabled, but according to the above log, caring about it is necessary unless the Bus free IRQ is enabled and handled. The assumption that is will always come with a ARDY IRQ, which was the idea behind ignoring it, proves wrong. It is true for simple reads from an unused address. To still avoid the i2cdetect trouble which is the reason for commit c770657bd261 ("i2c: omap: Fix standard mode false ACK readings"), avoid doing much about NACK in omap_i2c_xfer_data() which is used by both IRQ mode and polling mode, so also the false detection fix is extended to polling usage and IRQ storms are avoided. By changing this, the hardirq handler is not needed anymore to filter stuff. The mentioned gyro reset now just causes a -ETIMEDOUT instead of hanging the system. Fixes: c770657bd261 ("i2c: omap: Fix standard mode false ACK readings"). CC: stable@kernel.org Signed-off-by: Andreas Kemnade Tested-by: Nishanth Menon Reviewed-by: Aniket Limaye Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250228140420.379498-1-andreas@kemnade.info --- drivers/i2c/busses/i2c-omap.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 92faf03d64cfb..f18c3e74b0762 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1048,23 +1048,6 @@ static int omap_i2c_transmit_data(struct omap_i2c_dev *omap, u8 num_bytes, return 0; } -static irqreturn_t -omap_i2c_isr(int irq, void *dev_id) -{ - struct omap_i2c_dev *omap = dev_id; - irqreturn_t ret = IRQ_HANDLED; - u16 mask; - u16 stat; - - stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG); - mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG) & ~OMAP_I2C_STAT_NACK; - - if (stat & mask) - ret = IRQ_WAKE_THREAD; - - return ret; -} - static int omap_i2c_xfer_data(struct omap_i2c_dev *omap) { u16 bits; @@ -1095,8 +1078,13 @@ static int omap_i2c_xfer_data(struct omap_i2c_dev *omap) } if (stat & OMAP_I2C_STAT_NACK) { - err |= OMAP_I2C_STAT_NACK; + omap->cmd_err |= OMAP_I2C_STAT_NACK; omap_i2c_ack_stat(omap, OMAP_I2C_STAT_NACK); + + if (!(stat & ~OMAP_I2C_STAT_NACK)) { + err = -EAGAIN; + break; + } } if (stat & OMAP_I2C_STAT_AL) { @@ -1472,7 +1460,7 @@ omap_i2c_probe(struct platform_device *pdev) IRQF_NO_SUSPEND, pdev->name, omap); else r = devm_request_threaded_irq(&pdev->dev, omap->irq, - omap_i2c_isr, omap_i2c_isr_thread, + NULL, omap_i2c_isr_thread, IRQF_NO_SUSPEND | IRQF_ONESHOT, pdev->name, omap); -- GitLab From 9b5463f349d019a261f1e80803447efca3126151 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 3 Mar 2025 20:53:08 +0100 Subject: [PATCH 1488/1585] i2c: ali1535: Fix an error handling path in ali1535_probe() If i2c_add_adapter() fails, the request_region() call in ali1535_setup() must be undone by a corresponding release_region() call, as done in the remove function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Christophe JAILLET Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/0daf63d7a2ce74c02e2664ba805bbfadab7d25e5.1741031571.git.christophe.jaillet@wanadoo.fr --- drivers/i2c/busses/i2c-ali1535.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index 544c94e86b896..1eac358380405 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -485,6 +485,8 @@ MODULE_DEVICE_TABLE(pci, ali1535_ids); static int ali1535_probe(struct pci_dev *dev, const struct pci_device_id *id) { + int ret; + if (ali1535_setup(dev)) { dev_warn(&dev->dev, "ALI1535 not detected, module not inserted.\n"); @@ -496,7 +498,15 @@ static int ali1535_probe(struct pci_dev *dev, const struct pci_device_id *id) snprintf(ali1535_adapter.name, sizeof(ali1535_adapter.name), "SMBus ALI1535 adapter at %04x", ali1535_offset); - return i2c_add_adapter(&ali1535_adapter); + ret = i2c_add_adapter(&ali1535_adapter); + if (ret) + goto release_region; + + return 0; + +release_region: + release_region(ali1535_smba, ALI1535_SMB_IOSIZE); + return ret; } static void ali1535_remove(struct pci_dev *dev) -- GitLab From 6e55caaf30c88209d097e575a169b1dface1ab69 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 3 Mar 2025 20:58:06 +0100 Subject: [PATCH 1489/1585] i2c: ali15x3: Fix an error handling path in ali15x3_probe() If i2c_add_adapter() fails, the request_region() call in ali15x3_setup() must be undone by a corresponding release_region() call, as done in the remove function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Christophe JAILLET Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/9b2090cbcc02659f425188ea05f2e02745c4e67b.1741031878.git.christophe.jaillet@wanadoo.fr --- drivers/i2c/busses/i2c-ali15x3.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 4761c72081022..418d11266671e 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -472,6 +472,8 @@ MODULE_DEVICE_TABLE (pci, ali15x3_ids); static int ali15x3_probe(struct pci_dev *dev, const struct pci_device_id *id) { + int ret; + if (ali15x3_setup(dev)) { dev_err(&dev->dev, "ALI15X3 not detected, module not inserted.\n"); @@ -483,7 +485,15 @@ static int ali15x3_probe(struct pci_dev *dev, const struct pci_device_id *id) snprintf(ali15x3_adapter.name, sizeof(ali15x3_adapter.name), "SMBus ALI15X3 adapter at %04x", ali15x3_smba); - return i2c_add_adapter(&ali15x3_adapter); + ret = i2c_add_adapter(&ali15x3_adapter); + if (ret) + goto release_region; + + return 0; + +release_region: + release_region(ali15x3_smba, ALI15X3_SMB_IOSIZE); + return ret; } static void ali15x3_remove(struct pci_dev *dev) -- GitLab From 2b22459792fcb4def9f0936d64575ac11a95a58d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 3 Mar 2025 21:26:54 +0100 Subject: [PATCH 1490/1585] i2c: sis630: Fix an error handling path in sis630_probe() If i2c_add_adapter() fails, the request_region() call in sis630_setup() must be undone by a corresponding release_region() call, as done in the remove function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/3d607601f2c38e896b10207963c6ab499ca5c307.1741033587.git.christophe.jaillet@wanadoo.fr Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-sis630.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c index 3505cf29cedda..a19c3d251804d 100644 --- a/drivers/i2c/busses/i2c-sis630.c +++ b/drivers/i2c/busses/i2c-sis630.c @@ -509,6 +509,8 @@ MODULE_DEVICE_TABLE(pci, sis630_ids); static int sis630_probe(struct pci_dev *dev, const struct pci_device_id *id) { + int ret; + if (sis630_setup(dev)) { dev_err(&dev->dev, "SIS630 compatible bus not detected, " @@ -522,7 +524,15 @@ static int sis630_probe(struct pci_dev *dev, const struct pci_device_id *id) snprintf(sis630_adapter.name, sizeof(sis630_adapter.name), "SMBus SIS630 adapter at %04x", smbus_base + SMB_STS); - return i2c_add_adapter(&sis630_adapter); + ret = i2c_add_adapter(&sis630_adapter); + if (ret) + goto release_region; + + return 0; + +release_region: + release_region(smbus_base + SMB_STS, SIS630_SMB_IOREGION); + return ret; } static void sis630_remove(struct pci_dev *dev) -- GitLab From 9bce6b5f8987678b9c6c1fe433af6b5fe41feadc Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 11 Mar 2025 19:43:59 +0900 Subject: [PATCH 1491/1585] block: change blk_mq_add_to_batch() third argument type to bool Commit 1f47ed294a2b ("block: cleanup and fix batch completion adding conditions") modified the evaluation criteria for the third argument, 'ioerror', in the blk_mq_add_to_batch() function. Initially, the function had checked if 'ioerror' equals zero. Following the commit, it started checking for negative error values, with the presumption that such values, for instance -EIO, would be passed in. However, blk_mq_add_to_batch() callers do not pass negative error values. Instead, they pass status codes defined in various ways: - NVMe PCI and Apple drivers pass NVMe status code - virtio_blk driver passes the virtblk request header status byte - null_blk driver passes blk_status_t These codes are either zero or positive, therefore the revised check fails to function as intended. Specifically, with the NVMe PCI driver, this modification led to the failure of the blktests test case nvme/039. In this test scenario, errors are artificially injected to the NVMe driver, resulting in positive NVMe status codes passed to blk_mq_add_to_batch(), which unexpectedly processes the failed I/O in a batch. Hence the failure. To correct the ioerror check within blk_mq_add_to_batch(), make all callers to uniformly pass the argument as boolean. Modify the callers to check their specific status codes and pass the boolean value 'is_error'. Also describe the arguments of blK_mq_add_to_batch as kerneldoc. Fixes: 1f47ed294a2b ("block: cleanup and fix batch completion adding conditions") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20250311104359.1767728-3-shinichiro.kawasaki@wdc.com [axboe: fold in documentation update] Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 4 ++-- drivers/block/virtio_blk.c | 5 +++-- drivers/nvme/host/apple.c | 3 ++- drivers/nvme/host/pci.c | 5 +++-- include/linux/blk-mq.h | 16 ++++++++++++---- 5 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index d94ef37480bd4..fdc7a0b2af109 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1549,8 +1549,8 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) cmd = blk_mq_rq_to_pdu(req); cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req), blk_rq_sectors(req)); - if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error, - blk_mq_end_request_batch)) + if (!blk_mq_add_to_batch(req, iob, cmd->error != BLK_STS_OK, + blk_mq_end_request_batch)) blk_mq_end_request(req, cmd->error); nr++; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a4af39fc7ea28..286cab5e53684 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1207,11 +1207,12 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { struct request *req = blk_mq_rq_from_pdu(vbr); + u8 status = virtblk_vbr_status(vbr); found++; if (!blk_mq_complete_request_remote(req) && - !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), - virtblk_complete_batch)) + !blk_mq_add_to_batch(req, iob, status != VIRTIO_BLK_S_OK, + virtblk_complete_batch)) virtblk_request_done(req); } diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index a060f69558e76..8971aca41e63d 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -599,7 +599,8 @@ static inline void apple_nvme_handle_cqe(struct apple_nvme_queue *q, } if (!nvme_try_complete_req(req, cqe->status, cqe->result) && - !blk_mq_add_to_batch(req, iob, nvme_req(req)->status, + !blk_mq_add_to_batch(req, iob, + nvme_req(req)->status != NVME_SC_SUCCESS, apple_nvme_complete_batch)) apple_nvme_complete_rq(req); } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 640590b217282..75de86e235ad7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1130,8 +1130,9 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); if (!nvme_try_complete_req(req, cqe->status, cqe->result) && - !blk_mq_add_to_batch(req, iob, nvme_req(req)->status, - nvme_pci_complete_batch)) + !blk_mq_add_to_batch(req, iob, + nvme_req(req)->status != NVME_SC_SUCCESS, + nvme_pci_complete_batch)) nvme_pci_complete_rq(req); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 71f4f0cc3dac6..aba9c24486aad 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -852,12 +852,20 @@ static inline bool blk_mq_is_reserved_rq(struct request *rq) return rq->rq_flags & RQF_RESV; } -/* +/** + * blk_mq_add_to_batch() - add a request to the completion batch + * @req: The request to add to batch + * @iob: The batch to add the request + * @is_error: Specify true if the request failed with an error + * @complete: The completaion handler for the request + * * Batched completions only work when there is no I/O error and no special * ->end_io handler. + * + * Return: true when the request was added to the batch, otherwise false */ static inline bool blk_mq_add_to_batch(struct request *req, - struct io_comp_batch *iob, int ioerror, + struct io_comp_batch *iob, bool is_error, void (*complete)(struct io_comp_batch *)) { /* @@ -865,7 +873,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, * 1) No batch container * 2) Has scheduler data attached * 3) Not a passthrough request and end_io set - * 4) Not a passthrough request and an ioerror + * 4) Not a passthrough request and failed with an error */ if (!iob) return false; @@ -874,7 +882,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, if (!blk_rq_is_passthrough(req)) { if (req->end_io) return false; - if (ioerror < 0) + if (is_error) return false; } -- GitLab From d653bfeb07ebb3499c403404c21ac58a16531607 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Sun, 9 Mar 2025 17:07:38 +0900 Subject: [PATCH 1492/1585] netfilter: nf_conncount: Fully initialize struct nf_conncount_tuple in insert_tree() Since commit b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race"), `cpu` and `jiffies32` were introduced to the struct nf_conncount_tuple. The commit made nf_conncount_add() initialize `conn->cpu` and `conn->jiffies32` when allocating the struct. In contrast, count_tree() was not changed to initialize them. By commit 34848d5c896e ("netfilter: nf_conncount: Split insert and traversal"), count_tree() was split and the relevant allocation code now resides in insert_tree(). Initialize `conn->cpu` and `conn->jiffies32` in insert_tree(). BUG: KMSAN: uninit-value in find_or_evict net/netfilter/nf_conncount.c:117 [inline] BUG: KMSAN: uninit-value in __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 find_or_evict net/netfilter/nf_conncount.c:117 [inline] __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 count_tree net/netfilter/nf_conncount.c:438 [inline] nf_conncount_count+0x82f/0x1e80 net/netfilter/nf_conncount.c:521 connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 __nft_match_eval net/netfilter/nft_compat.c:403 [inline] nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 NF_HOOK_LIST include/linux/netfilter.h:350 [inline] ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 __netif_receive_skb_list net/core/dev.c:6035 [inline] netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 xdp_recv_frames net/bpf/test_run.c:280 [inline] xdp_test_run_batch net/bpf/test_run.c:361 [inline] bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Uninit was created at: slab_post_alloc_hook mm/slub.c:4121 [inline] slab_alloc_node mm/slub.c:4164 [inline] kmem_cache_alloc_noprof+0x915/0xe10 mm/slub.c:4171 insert_tree net/netfilter/nf_conncount.c:372 [inline] count_tree net/netfilter/nf_conncount.c:450 [inline] nf_conncount_count+0x1415/0x1e80 net/netfilter/nf_conncount.c:521 connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 __nft_match_eval net/netfilter/nft_compat.c:403 [inline] nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 NF_HOOK_LIST include/linux/netfilter.h:350 [inline] ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 __netif_receive_skb_list net/core/dev.c:6035 [inline] netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 xdp_recv_frames net/bpf/test_run.c:280 [inline] xdp_test_run_batch net/bpf/test_run.c:361 [inline] bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Reported-by: syzbot+83fed965338b573115f7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=83fed965338b573115f7 Fixes: b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race") Signed-off-by: Kohei Enju Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index ebe38ed2e6f4f..913ede2f57f9a 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -377,6 +377,8 @@ insert_tree(struct net *net, conn->tuple = *tuple; conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); -- GitLab From c21b02fd9cbf15aed6e32c89e0fd70070281e3d1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 11 Mar 2025 12:52:45 +0100 Subject: [PATCH 1493/1585] selftests: netfilter: skip br_netfilter queue tests if kernel is tainted These scripts fail if the kernel is tainted which leads to wrong test failure reports in CI environments when an unrelated test triggers some splat. Check taint state at start of script and SKIP if its already dodgy. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/netfilter/br_netfilter.sh | 7 +++++++ .../testing/selftests/net/netfilter/br_netfilter_queue.sh | 7 +++++++ tools/testing/selftests/net/netfilter/nft_queue.sh | 1 + 3 files changed, 15 insertions(+) diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index c28379a965d83..1559ba275105e 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -13,6 +13,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh index 6a764d70ab06f..4788641717d93 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -4,6 +4,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 785e3875a6da4..784d1b46912b0 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -593,6 +593,7 @@ EOF echo "PASS: queue program exiting while packets queued" else echo "TAINT: queue program exiting while packets queued" + dmesg ret=1 fi } -- GitLab From 80b78c39eb86e6b55f56363b709eb817527da5aa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Mar 2025 10:45:53 +0300 Subject: [PATCH 1494/1585] ipvs: prevent integer overflow in do_ip_vs_get_ctl() The get->num_services variable is an unsigned int which is controlled by the user. The struct_size() function ensures that the size calculation does not overflow an unsigned long, however, we are saving the result to an int so the calculation can overflow. Both "len" and "get->num_services" come from the user. This check is just a sanity check to help the user and ensure they are using the API correctly. An integer overflow here is not a big deal. This has no security impact. Save the result from struct_size() type size_t to fix this integer overflow bug. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Dan Carpenter Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7d13110ce1882..0633276d96bfb 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3091,12 +3091,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_SERVICES: { struct ip_vs_get_services *get; - int size; + size_t size; get = (struct ip_vs_get_services *)arg; size = struct_size(get, entrytable, get->num_services); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } @@ -3132,12 +3132,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_DESTS: { struct ip_vs_get_dests *get; - int size; + size_t size; get = (struct ip_vs_get_dests *)arg; size = struct_size(get, entrytable, get->num_dests); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } -- GitLab From b11a74ac4f545626d0dc95a8ca8c41df90532bf3 Mon Sep 17 00:00:00 2001 From: Navon John Lukose Date: Sat, 8 Mar 2025 03:03:19 +0530 Subject: [PATCH 1495/1585] ALSA: hda/realtek: Add mute LED quirk for HP Pavilion x360 14-dy1xxx Add a fixup to enable the mute LED on HP Pavilion x360 Convertible 14-dy1xxx with ALC295 codec. The appropriate coefficient index and bits were identified through a brute-force method, as detailed in https://bbs.archlinux.org/viewtopic.php?pid=2079504#p2079504. Signed-off-by: Navon John Lukose Link: https://patch.msgid.link/20250307213319.35507-1-navonjohnlukose@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d2a1f836dbbf7..a84857a3c2bfb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4790,6 +4790,21 @@ static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec, } } +static void alc295_fixup_hp_mute_led_coefbit11(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mute_led_polarity = 0; + spec->mute_led_coef.idx = 0xb; + spec->mute_led_coef.mask = 3 << 3; + spec->mute_led_coef.on = 1 << 3; + spec->mute_led_coef.off = 1 << 4; + snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set); + } +} + static void alc285_fixup_hp_mute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -7656,6 +7671,7 @@ enum { ALC290_FIXUP_MONO_SPEAKERS_HSJACK, ALC290_FIXUP_SUBWOOFER, ALC290_FIXUP_SUBWOOFER_HSJACK, + ALC295_FIXUP_HP_MUTE_LED_COEFBIT11, ALC269_FIXUP_THINKPAD_ACPI, ALC269_FIXUP_LENOVO_XPAD_ACPI, ALC269_FIXUP_DMIC_THINKPAD_ACPI, @@ -9401,6 +9417,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC283_FIXUP_INT_MIC, }, + [ALC295_FIXUP_HP_MUTE_LED_COEFBIT11] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc295_fixup_hp_mute_led_coefbit11, + }, [ALC298_FIXUP_SAMSUNG_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc298_fixup_samsung_amp, @@ -10451,6 +10471,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11), SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), -- GitLab From d9e7c172a7f247f7ef0b151fa8c8f044b6a2a070 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 24 Feb 2025 15:40:58 +0100 Subject: [PATCH 1496/1585] media: rtl2832_sdr: assign vb2 lock before vb2_queue_init Commit c780d01cf1a6 ("media: vb2: vb2_core_queue_init(): sanity check lock and wait_prepare/finish") added a sanity check to ensure that if there are no wait_prepare/finish callbacks set by the driver, then the vb2_queue lock must be set, since otherwise the vb2 core cannot do correct locking. The rtl2832_sdr.c triggered this warning: it turns out that while the driver does set this lock, it sets it too late. So move it up to before the vb2_queue_init() call. Reported-by: Arthur Marsh Closes: https://lore.kernel.org/linux-media/20241211042355.8479-1-user@am64/ Fixes: 8fcd2795d22a ("media: rtl2832_sdr: drop vb2_ops_wait_prepare/finish") Cc: stable@vger.kernel.org Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Hans Verkuil --- drivers/media/dvb-frontends/rtl2832_sdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/rtl2832_sdr.c b/drivers/media/dvb-frontends/rtl2832_sdr.c index 05254d8717db8..0357624968f1b 100644 --- a/drivers/media/dvb-frontends/rtl2832_sdr.c +++ b/drivers/media/dvb-frontends/rtl2832_sdr.c @@ -1363,6 +1363,7 @@ static int rtl2832_sdr_probe(struct platform_device *pdev) dev->vb_queue.ops = &rtl2832_sdr_vb2_ops; dev->vb_queue.mem_ops = &vb2_vmalloc_memops; dev->vb_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; + dev->vb_queue.lock = &dev->vb_queue_lock; ret = vb2_queue_init(&dev->vb_queue); if (ret) { dev_err(&pdev->dev, "Could not initialize vb2 queue\n"); @@ -1421,7 +1422,6 @@ static int rtl2832_sdr_probe(struct platform_device *pdev) /* Init video_device structure */ dev->vdev = rtl2832_sdr_template; dev->vdev.queue = &dev->vb_queue; - dev->vdev.queue->lock = &dev->vb_queue_lock; video_set_drvdata(&dev->vdev, dev); /* Register the v4l2_device structure */ -- GitLab From 658fb7fe8e7f4014ea17a4da0e0c1d9bc319fa35 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 5 Mar 2025 18:27:32 +0100 Subject: [PATCH 1497/1585] ASoC: cs42l43: convert to SYSTEM_SLEEP_PM_OPS The custom suspend function causes a build warning when CONFIG_PM_SLEEP is disabled: sound/soc/codecs/cs42l43.c:2405:12: error: unused function 'cs42l43_codec_runtime_force_suspend' [-Werror,-Wunused-function] Change SET_SYSTEM_SLEEP_PM_OPS() to the newer SYSTEM_SLEEP_PM_OPS(), to avoid this. Fixes: 164b7dd4546b ("ASoC: cs42l43: Add jack delay debounce after suspend") Signed-off-by: Arnd Bergmann Reviewed-by: Maciej Strozek Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250305172738.3437513-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index d307b56a7f38e..ea84ac64c775e 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2417,7 +2417,7 @@ static int cs42l43_codec_runtime_force_suspend(struct device *dev) static const struct dev_pm_ops cs42l43_codec_pm_ops = { RUNTIME_PM_OPS(NULL, cs42l43_codec_runtime_resume, NULL) - SET_SYSTEM_SLEEP_PM_OPS(cs42l43_codec_runtime_force_suspend, pm_runtime_force_resume) + SYSTEM_SLEEP_PM_OPS(cs42l43_codec_runtime_force_suspend, pm_runtime_force_resume) }; static const struct platform_device_id cs42l43_codec_id_table[] = { -- GitLab From 18e0885bd2ca738407036434418a26a58394a60e Mon Sep 17 00:00:00 2001 From: Boon Khai Ng Date: Wed, 12 Mar 2025 11:05:44 +0800 Subject: [PATCH 1498/1585] USB: serial: ftdi_sio: add support for Altera USB Blaster 3 The Altera USB Blaster 3, available as both a cable and an on-board solution, is primarily used for programming and debugging FPGAs. It interfaces with host software such as Quartus Programmer, System Console, SignalTap, and Nios Debugger. The device utilizes either an FT2232 or FT4232 chip. Enabling the support for various configurations of the on-board USB Blaster 3 by including the appropriate VID/PID pairs, allowing it to function as a serial device via ftdi_sio. Note that this check-in does not include support for the cable solution, as it does not support UART functionality. The supported configurations are determined by the hardware design and include: 1) PID 0x6022, FT2232, 1 JTAG port (Port A) + Port B as UART 2) PID 0x6025, FT4232, 1 JTAG port (Port A) + Port C as UART 3) PID 0x6026, FT4232, 1 JTAG port (Port A) + Port C, D as UART 4) PID 0x6029, FT4232, 1 JTAG port (Port B) + Port C as UART 5) PID 0x602a, FT4232, 1 JTAG port (Port B) + Port C, D as UART 6) PID 0x602c, FT4232, 1 JTAG port (Port A) + Port B as UART 7) PID 0x602d, FT4232, 1 JTAG port (Port A) + Port B, C as UART 8) PID 0x602e, FT4232, 1 JTAG port (Port A) + Port B, C, D as UART These configurations allow for flexibility in how the USB Blaster 3 is used, depending on the specific needs of the hardware design. Signed-off-by: Boon Khai Ng Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 14 ++++++++++++++ drivers/usb/serial/ftdi_sio_ids.h | 13 +++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index e07c5e3eb18c0..9b34e23b70919 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1079,6 +1079,20 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, /* GMC devices */ { USB_DEVICE(GMC_VID, GMC_Z216C_PID) }, + /* Altera USB Blaster 3 */ + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6022_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6025_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 3) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6029_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 3) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602C_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 2) }, + { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 3) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 5ee60ba2a73cd..52be47d684ea6 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1612,3 +1612,16 @@ */ #define GMC_VID 0x1cd7 #define GMC_Z216C_PID 0x0217 /* GMC Z216C Adapter IR-USB */ + +/* + * Altera USB Blaster 3 (http://www.altera.com). + */ +#define ALTERA_VID 0x09fb +#define ALTERA_UB3_6022_PID 0x6022 +#define ALTERA_UB3_6025_PID 0x6025 +#define ALTERA_UB3_6026_PID 0x6026 +#define ALTERA_UB3_6029_PID 0x6029 +#define ALTERA_UB3_602A_PID 0x602a +#define ALTERA_UB3_602C_PID 0x602c +#define ALTERA_UB3_602D_PID 0x602d +#define ALTERA_UB3_602E_PID 0x602e -- GitLab From 066e053fe208a3b83ee89dc5a192146add688861 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:47 +0100 Subject: [PATCH 1499/1585] fsnotify: add pre-content hooks on mmap() Pre-content hooks in page faults introduces potential deadlock of HSM handler in userspace with filesystem freezing. The requirement with pre-content event is that for every accessed file range an event covering at least this range will be generated at least once before the file data is accesses. In preparation to disabling pre-content event hooks on page faults, add pre-content hooks at mmap() variants for the entire mmaped range, so HSM can fill content when user requests to map a portion of the file. Note that exec() variant also calls vm_mmap_pgoff() internally to map code sections, so pre-content hooks are also generated in this case. Link: https://lore.kernel.org/linux-fsdevel/7ehxrhbvehlrjwvrduoxsao5k3x4aw275patsb3krkwuq573yv@o2hskrfawbnc/ Suggested-by: Josef Bacik Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-2-amir73il@gmail.com --- include/linux/fsnotify.h | 21 +++++++++++++++++++++ mm/util.c | 3 +++ 2 files changed, 24 insertions(+) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 6a33288bd6a1f..83d3ac97f8262 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -170,6 +170,21 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } +/* + * fsnotify_mmap_perm - permission hook before mmap of file range + */ +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + /* + * mmap() generates only pre-content events. + */ + if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode))) + return 0; + + return fsnotify_pre_content(&file->f_path, &off, len); +} + /* * fsnotify_truncate_perm - permission hook before file truncate */ @@ -223,6 +238,12 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, return 0; } +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + return 0; +} + static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) { return 0; diff --git a/mm/util.c b/mm/util.c index b6b9684a14388..8c965474d329f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -569,6 +570,8 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, LIST_HEAD(uf); ret = security_mmap_file(file, prot, flag); + if (!ret) + ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len); if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; -- GitLab From 0882ca4eecfe8b0013f339144acf886a0a0de41f Mon Sep 17 00:00:00 2001 From: Yifan Zha Date: Wed, 5 Mar 2025 13:14:55 +0800 Subject: [PATCH 1500/1585] drm/amd/amdkfd: Evict all queues even HWS remove queue failed [Why] If reset is detected and kfd need to evict working queues, HWS moving queue will be failed. Then remaining queues are not evicted and in active state. After reset done, kfd uses HWS to termination remaining activated queues but HWS is resetted. So remove queue will be failed again. [How] Keep removing all queues even if HWS returns failed. It will not affect cpsch as it checks reset_domain->sem. v2: If any queue failed, evict queue returns error. v3: Declare err inside the if-block. Reviewed-by: Felix Kuehling Signed-off-by: Yifan Zha Signed-off-by: Alex Deucher (cherry picked from commit 42c854b8fb0cce512534aa2b7141948e80c6ebb0) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d4593374e7a1e..34c2c42c0f95c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1230,11 +1230,13 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, decrement_queue_count(dqm, qpd, q); if (dqm->dev->kfd->shared_resources.enable_mes) { - retval = remove_queue_mes(dqm, q, qpd); - if (retval) { + int err; + + err = remove_queue_mes(dqm, q, qpd); + if (err) { dev_err(dev, "Failed to evict queue %d\n", q->properties.queue_id); - goto out; + retval = err; } } } -- GitLab From 6cc30748e17ea2a64051ceaf83a8372484e597f1 Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Mon, 10 Mar 2025 18:08:05 +0100 Subject: [PATCH 1501/1585] drm/amdgpu: NULL-check BO's backing store when determining GFX12 PTE flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PRT BOs may not have any backing store, so bo->tbo.resource will be NULL. Check for that before dereferencing. Fixes: 0cce5f285d9a ("drm/amdkfd: Check correct memory types for is_system variable") Reviewed-by: Christian König Signed-off-by: Natalie Vock Signed-off-by: Alex Deucher (cherry picked from commit 3e3fcd29b505cebed659311337ea03b7698767fc) Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index b749f1c3f6a9a..0fb88e6d5d54b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -528,8 +528,9 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; - is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) || - (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); + is_system = bo->tbo.resource && + (bo->tbo.resource->mem_type == TTM_PL_TT || + bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) *flags |= AMDGPU_PTE_DCC; -- GitLab From 0c3057a5a04d07120b3d0ec9c79568fceb9c921e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 6 Mar 2025 15:23:54 -0800 Subject: [PATCH 1502/1585] net_sched: Prevent creation of classes with TC_H_ROOT The function qdisc_tree_reduce_backlog() uses TC_H_ROOT as a termination condition when traversing up the qdisc tree to update parent backlog counters. However, if a class is created with classid TC_H_ROOT, the traversal terminates prematurely at this class instead of reaching the actual root qdisc, causing parent statistics to be incorrectly maintained. In case of DRR, this could lead to a crash as reported by Mingi Cho. Prevent the creation of any Qdisc class with classid TC_H_ROOT (0xFFFFFFFF) across all qdisc types, as suggested by Jamal. Reported-by: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Fixes: 066a3b5b2346 ("[NET_SCHED] sch_api: fix qdisc_tree_decrease_qlen() loop") Link: https://patch.msgid.link/20250306232355.93864-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e3e91cf867eb9..6c625dcd06519 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2254,6 +2254,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, return -EOPNOTSUPP; } + /* Prevent creation of traffic classes with classid TC_H_ROOT */ + if (clid == TC_H_ROOT) { + NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT"); + return -EINVAL; + } + new_cl = cl; err = -EOPNOTSUPP; if (cops->change) -- GitLab From bb7737de5f593155aabbca283f4822176f4e7d6b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 6 Mar 2025 15:23:55 -0800 Subject: [PATCH 1503/1585] selftests/tc-testing: Add a test case for DRR class with TC_H_ROOT Integrate the reproduer from Mingi to TDC. All test results: 1..4 ok 1 0385 - Create DRR with default setting ok 2 2375 - Delete DRR with handle ok 3 3092 - Show DRR class ok 4 4009 - Reject creation of DRR class with classid TC_H_ROOT Cc: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306232355.93864-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/qdiscs/drr.json | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json index 7126ec3485cbd..2b61d8d79bde8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json @@ -61,5 +61,30 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "4009", + "name": "Reject creation of DRR class with classid TC_H_ROOT", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle ffff: drr", + "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr", + "$TC filter add dev $DUMMY parent ffff: prio 1 u32 match u16 0x0000 0xfe00 at 2 flowid ffff:ffff" + ], + "cmdUnderTest": "$TC class add dev $DUMMY parent ffff: classid ffff:ffff drr", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DUMMY", + "matchPattern": "class drr ffff:ffff", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] -- GitLab From 3bcde88d381a336ff252d67867c186ee602e6656 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Mar 2025 17:21:31 -0400 Subject: [PATCH 1504/1585] bcachefs: fix tiny leak in bch2_dev_add() Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 6d97d412fed98..0459c875e189b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1811,7 +1811,11 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_late; up_write(&c->state_lock); - return 0; +out: + printbuf_exit(&label); + printbuf_exit(&errbuf); + bch_err_fn(c, ret); + return ret; err_unlock: mutex_unlock(&c->sb_lock); @@ -1820,10 +1824,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (ca) bch2_dev_free(ca); bch2_free_super(&sb); - printbuf_exit(&label); - printbuf_exit(&errbuf); - bch_err_fn(c, ret); - return ret; + goto out; err_late: up_write(&c->state_lock); ca = NULL; -- GitLab From 0102fbf52b93e609fec0dab53b1fb4fe69113f5e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 11 Mar 2025 18:56:31 +0100 Subject: [PATCH 1505/1585] gpiolib: don't check the retval of get_direction() when registering a chip During chip registration we should neither check the return value of gc->get_direction() nor hold the SRCU lock when calling it. The former is because pin controllers may have pins set to alternate functions and return errors from their get_direction() callbacks. That's alright - we should default to the safe INPUT state and not bail-out. The latter is not needed because we haven't registered the chip yet so there's nothing to protect against dynamic removal. In fact: we currently hit a lockdep splat. Revert to calling the gc->get_direction() callback directly and *not* checking its value. Fixes: 9d846b1aebbe ("gpiolib: check the return value of gpio_chip::get_direction()") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/all/81f890fc-6688-42f0-9756-567efc8bb97a@samsung.com/ Reviewed-by: Andy Shevchenko Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250226-retval-fixes-v2-1-c8dc57182441@linaro.org Tested-by: Gene C Link: https://lore.kernel.org/r/20250311175631.83779-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 8741600af7efb..de708d0818581 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1056,24 +1056,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, desc->gdev = gdev; - if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) { - ret = gc->get_direction(gc, desc_index); - if (ret < 0) - /* - * FIXME: Bail-out here once all GPIO drivers - * are updated to not return errors in - * situations that can be considered normal - * operation. - */ - dev_warn(&gdev->dev, - "%s: get_direction failed: %d\n", - __func__, ret); - - assign_bit(FLAG_IS_OUT, &desc->flags, !ret); - } else { + /* + * We would typically want to check the return value of + * get_direction() here but we must not check the return value + * and bail-out as pin controllers can have pins configured to + * alternate functions and return -EINVAL. Also: there's no + * need to take the SRCU lock here. + */ + if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) + assign_bit(FLAG_IS_OUT, &desc->flags, + !gc->get_direction(gc, desc_index)); + else assign_bit(FLAG_IS_OUT, &desc->flags, !gc->direction_input); - } } ret = of_gpiochip_add(gc); -- GitLab From dcb73cbaaeb39c9fd00bf2e019f911725945e2fe Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 11 Mar 2025 15:31:43 +0100 Subject: [PATCH 1506/1585] gpio: cdev: use raw notifier for line state events We use a notifier to implement the mechanism of informing the user-space about changes in GPIO line status. We register with the notifier when the GPIO character device file is opened and unregister when the last reference to the associated file descriptor is dropped. Since commit fcc8b637c542 ("gpiolib: switch the line state notifier to atomic") we use the atomic notifier variant. Atomic notifiers call rcu_synchronize in atomic_notifier_chain_unregister() which caused a significant performance regression in some circumstances, observed by user-space when calling close() on the GPIO device file descriptor. Replace the atomic notifier with the raw variant and provide synchronization with a read-write spinlock. Fixes: fcc8b637c542 ("gpiolib: switch the line state notifier to atomic") Reported-by: David Jander Closes: https://lore.kernel.org/all/20250311110034.53959031@erd003.prtnl/ Tested-by: David Jander Tested-by: Kent Gibson Link: https://lore.kernel.org/r/20250311-gpiolib-line-state-raw-notifier-v2-1-138374581e1e@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-cdev.c | 15 +++++++++------ drivers/gpio/gpiolib.c | 8 +++++--- drivers/gpio/gpiolib.h | 5 ++++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 40f76a90fd7db..107d75558b5a8 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -2729,8 +2729,9 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) cdev->gdev = gpio_device_get(gdev); cdev->lineinfo_changed_nb.notifier_call = lineinfo_changed_notify; - ret = atomic_notifier_chain_register(&gdev->line_state_notifier, - &cdev->lineinfo_changed_nb); + scoped_guard(write_lock_irqsave, &gdev->line_state_lock) + ret = raw_notifier_chain_register(&gdev->line_state_notifier, + &cdev->lineinfo_changed_nb); if (ret) goto out_free_bitmap; @@ -2754,8 +2755,9 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) blocking_notifier_chain_unregister(&gdev->device_notifier, &cdev->device_unregistered_nb); out_unregister_line_notifier: - atomic_notifier_chain_unregister(&gdev->line_state_notifier, - &cdev->lineinfo_changed_nb); + scoped_guard(write_lock_irqsave, &gdev->line_state_lock) + raw_notifier_chain_unregister(&gdev->line_state_notifier, + &cdev->lineinfo_changed_nb); out_free_bitmap: gpio_device_put(gdev); bitmap_free(cdev->watched_lines); @@ -2779,8 +2781,9 @@ static int gpio_chrdev_release(struct inode *inode, struct file *file) blocking_notifier_chain_unregister(&gdev->device_notifier, &cdev->device_unregistered_nb); - atomic_notifier_chain_unregister(&gdev->line_state_notifier, - &cdev->lineinfo_changed_nb); + scoped_guard(write_lock_irqsave, &gdev->line_state_lock) + raw_notifier_chain_unregister(&gdev->line_state_notifier, + &cdev->lineinfo_changed_nb); bitmap_free(cdev->watched_lines); gpio_device_put(gdev); kfree(cdev); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index de708d0818581..0c00ed2ab4315 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1025,7 +1025,8 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, } } - ATOMIC_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier); + rwlock_init(&gdev->line_state_lock); + RAW_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier); BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier); ret = init_srcu_struct(&gdev->srcu); @@ -4188,8 +4189,9 @@ EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep); void gpiod_line_state_notify(struct gpio_desc *desc, unsigned long action) { - atomic_notifier_call_chain(&desc->gdev->line_state_notifier, - action, desc); + guard(read_lock_irqsave)(&desc->gdev->line_state_lock); + + raw_notifier_call_chain(&desc->gdev->line_state_notifier, action, desc); } /** diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 147156ec502b2..c129a03e20408 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,7 @@ * @list: links gpio_device:s together for traversal * @line_state_notifier: used to notify subscribers about lines being * requested, released or reconfigured + * @line_state_lock: RW-spinlock protecting the line state notifier * @line_state_wq: used to emit line state events from a separate thread in * process context * @device_notifier: used to notify character device wait queues about the GPIO @@ -72,7 +74,8 @@ struct gpio_device { const char *label; void *data; struct list_head list; - struct atomic_notifier_head line_state_notifier; + struct raw_notifier_head line_state_notifier; + rwlock_t line_state_lock; struct workqueue_struct *line_state_wq; struct blocking_notifier_head device_notifier; struct srcu_struct srcu; -- GitLab From 6edd78af9506bb182518da7f6feebd75655d9a0e Mon Sep 17 00:00:00 2001 From: Alexey Kashavkin Date: Sun, 2 Mar 2025 00:14:36 +0300 Subject: [PATCH 1507/1585] netfilter: nft_exthdr: fix offset with ipv4_find_option() There is an incorrect calculation in the offset variable which causes the nft_skb_copy_to_reg() function to always return -EFAULT. Adding the start variable is redundant. In the __ip_options_compile() function the correct offset is specified when finding the function. There is no need to add the size of the iphdr structure to the offset. Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") Signed-off-by: Alexey Kashavkin Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index b8d03364566c1..c74012c991255 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; struct iphdr *iph, _iph; - unsigned int start; bool found = false; __be32 info; int optlen; @@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (!iph) return -EBADMSG; - start = sizeof(struct iphdr); optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); if (optlen <= 0) @@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, /* Copy the options since __ip_options_compile() modifies * the options. */ - if (skb_copy_bits(skb, start, opt->__data, optlen)) + if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen)) return -EBADMSG; opt->optlen = optlen; @@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, found = target == IPOPT_SSRR ? opt->is_strictroute : !opt->is_strictroute; if (found) - *offset = opt->srr + start; + *offset = opt->srr; break; case IPOPT_RR: if (!opt->rr) break; - *offset = opt->rr + start; + *offset = opt->rr; found = true; break; case IPOPT_RA: if (!opt->router_alert) break; - *offset = opt->router_alert + start; + *offset = opt->router_alert; found = true; break; default: -- GitLab From 183185a18ff96751db52a46ccf93fff3a1f42815 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 7 Mar 2025 20:28:53 +0100 Subject: [PATCH 1508/1585] gre: Fix IPv6 link-local address generation. Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE devices in most cases and fall back to using add_v4_addrs() only in case the GRE configuration is incompatible with addrconf_addr_gen(). GRE used to use addrconf_addr_gen() until commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") restricted this use to gretap and ip6gretap devices, and created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. The original problem came when commit 9af28511be10 ("addrconf: refuse isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its addr parameter was 0. The commit says that this would create an invalid address, however, I couldn't find any RFC saying that the generated interface identifier would be wrong. Anyway, since gre over IPv4 devices pass their local tunnel address to __ipv6_isatap_ifid(), that commit broke their IPv6 link-local address generation when the local address was unspecified. Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") tried to fix that case by defining add_v4_addrs() and calling it to generate the IPv6 link-local address instead of using addrconf_addr_gen() (apart for gretap and ip6gretap devices, which would still use the regular addrconf_addr_gen(), since they have a MAC address). That broke several use cases because add_v4_addrs() isn't properly integrated into the rest of IPv6 Neighbor Discovery code. Several of these shortcomings have been fixed over time, but add_v4_addrs() remains broken on several aspects. In particular, it doesn't send any Router Sollicitations, so the SLAAC process doesn't start until the interface receives a Router Advertisement. Also, add_v4_addrs() mostly ignores the address generation mode of the interface (/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. Fix the situation by using add_v4_addrs() only in the specific scenario where the normal method would fail. That is, for interfaces that have all of the following characteristics: * run over IPv4, * transport IP packets directly, not Ethernet (that is, not gretap interfaces), * tunnel endpoint is INADDR_ANY (that is, 0), * device address generation mode is EUI64. In all other cases, revert back to the regular addrconf_addr_gen(). Also, remove the special case for ip6gre interfaces in add_v4_addrs(), since ip6gre devices now always use addrconf_addr_gen() instead. Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/559c32ce5c9976b269e6337ac9abb6a96abe5096.1741375285.git.gnault@redhat.com Signed-off-by: Paolo Abeni --- net/ipv6/addrconf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ac8cc10765360..8b6258819dade 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3209,16 +3209,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3529,7 +3526,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } -- GitLab From 6f50175ccad4278ed3a9394c00b797b75441bd6e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 7 Mar 2025 20:28:58 +0100 Subject: [PATCH 1509/1585] selftests: Add IPv6 link-local address generation tests for GRE devices. GRE devices have their special code for IPv6 link-local address generation that has been the source of several regressions in the past. Add selftest to check that all gre, ip6gre, gretap and ip6gretap get an IPv6 link-link local address in accordance with the net.ipv6.conf..addr_gen_mode sysctl. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://patch.msgid.link/2d6772af8e1da9016b2180ec3f8d9ee99f470c77.1741375285.git.gnault@redhat.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/gre_ipv6_lladdr.sh | 177 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100755 tools/testing/selftests/net/gre_ipv6_lladdr.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 73ee88d6b0430..5916f3b81c39f 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 0000000000000..5b34f6e1f8314 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 -- GitLab From a1e64addf3ff9257b45b78bc7d743781c3f41340 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sat, 8 Mar 2025 01:45:59 +0100 Subject: [PATCH 1510/1585] net: openvswitch: remove misbehaving actions length check The actions length check is unreliable and produces different results depending on the initial length of the provided netlink attribute and the composition of the actual actions inside of it. For example, a user can add 4088 empty clone() actions without triggering -EMSGSIZE, on attempt to add 4089 such actions the operation will fail with the -EMSGSIZE verdict. However, if another 16 KB of other actions will be *appended* to the previous 4089 clone() actions, the check passes and the flow is successfully installed into the openvswitch datapath. The reason for a such a weird behavior is the way memory is allocated. When ovs_flow_cmd_new() is invoked, it calls ovs_nla_copy_actions(), that in turn calls nla_alloc_flow_actions() with either the actual length of the user-provided actions or the MAX_ACTIONS_BUFSIZE. The function adds the size of the sw_flow_actions structure and then the actually allocated memory is rounded up to the closest power of two. So, if the user-provided actions are larger than MAX_ACTIONS_BUFSIZE, then MAX_ACTIONS_BUFSIZE + sizeof(*sfa) rounded up is 32K + 24 -> 64K. Later, while copying individual actions, we look at ksize(), which is 64K, so this way the MAX_ACTIONS_BUFSIZE check is not actually triggered and the user can easily allocate almost 64 KB of actions. However, when the initial size is less than MAX_ACTIONS_BUFSIZE, but the actions contain ones that require size increase while copying (such as clone() or sample()), then the limit check will be performed during the reserve_sfa_size() and the user will not be allowed to create actions that yield more than 32 KB internally. This is one part of the problem. The other part is that it's not actually possible for the userspace application to know beforehand if the particular set of actions will be rejected or not. Certain actions require more space in the internal representation, e.g. an empty clone() takes 4 bytes in the action list passed in by the user, but it takes 12 bytes in the internal representation due to an extra nested attribute, and some actions require less space in the internal representations, e.g. set(tunnel(..)) normally takes 64+ bytes in the action list provided by the user, but only needs to store a single pointer in the internal implementation, since all the data is stored in the tunnel_info structure instead. And the action size limit is applied to the internal representation, not to the action list passed by the user. So, it's not possible for the userpsace application to predict if the certain combination of actions will be rejected or not, because it is not possible for it to calculate how much space these actions will take in the internal representation without knowing kernel internals. All that is causing random failures in ovs-vswitchd in userspace and inability to handle certain traffic patterns as a result. For example, it is reported that adding a bit more than a 1100 VMs in an OpenStack setup breaks the network due to OVS not being able to handle ARP traffic anymore in some cases (it tries to install a proper datapath flow, but the kernel rejects it with -EMSGSIZE, even though the action list isn't actually that large.) Kernel behavior must be consistent and predictable in order for the userspace application to use it in a reasonable way. ovs-vswitchd has a mechanism to re-direct parts of the traffic and partially handle it in userspace if the required action list is oversized, but that doesn't work properly if we can't actually tell if the action list is oversized or not. Solution for this is to check the size of the user-provided actions instead of the internal representation. This commit just removes the check from the internal part because there is already an implicit size check imposed by the netlink protocol. The attribute can't be larger than 64 KB. Realistically, we could reduce the limit to 32 KB, but we'll be risking to break some existing setups that rely on the fact that it's possible to create nearly 64 KB action lists today. Vast majority of flows in real setups are below 100-ish bytes. So removal of the limit will not change real memory consumption on the system. The absolutely worst case scenario is if someone adds a flow with 64 KB of empty clone() actions. That will yield a 192 KB in the internal representation consuming 256 KB block of memory. However, that list of actions is not meaningful and also a no-op. Real world very large action lists (that can occur for a rare cases of BUM traffic handling) are unlikely to contain a large number of clones and will likely have a lot of tunnel attributes making the internal representation comparable in size to the original action list. So, it should be fine to just remove the limit. Commit in the 'Fixes' tag is the first one that introduced the difference between internal representation and the user-provided action lists, but there were many more afterwards that lead to the situation we have today. Fixes: 7d5437c709de ("openvswitch: Add tunneling interface.") Signed-off-by: Ilya Maximets Reviewed-by: Aaron Conole Link: https://patch.msgid.link/20250308004609.2881861-1-i.maximets@ovn.org Signed-off-by: Paolo Abeni --- net/openvswitch/flow_netlink.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 881ddd3696d54..95e0dd14dc1a3 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2317,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) OVS_FLOW_ATTR_MASK, true, skb); } -#define MAX_ACTIONS_BUFSIZE (32 * 1024) - static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); - sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); @@ -2480,15 +2476,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size exceeds max %u", - MAX_ACTIONS_BUFSIZE); - return ERR_PTR(-EMSGSIZE); - } - new_acts_size = MAX_ACTIONS_BUFSIZE; - } - acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return ERR_CAST(acts); @@ -3545,7 +3532,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, int err; u32 mpls_label_count = 0; - *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); + *sfa = nla_alloc_flow_actions(nla_len(attr)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); -- GitLab From 1063ae07383c0ddc5bcce170260c143825846b03 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 8 Mar 2025 13:05:43 -0500 Subject: [PATCH 1511/1585] Revert "openvswitch: switch to per-action label counting in conntrack" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, ovs_ct_set_labels() is only called for confirmed conntrack entries (ct) within ovs_ct_commit(). However, if the conntrack entry does not have the labels_ext extension, attempting to allocate it in ovs_ct_get_conn_labels() for a confirmed entry triggers a warning in nf_ct_ext_add(): WARN_ON(nf_ct_is_confirmed(ct)); This happens when the conntrack entry is created externally before OVS increments net->ct.labels_used. The issue has become more likely since commit fcb1aa5163b1 ("openvswitch: switch to per-action label counting in conntrack"), which changed to use per-action label counting and increment net->ct.labels_used when a flow with ct action is added. Since there’s no straightforward way to fully resolve this issue at the moment, this reverts the commit to avoid breaking existing use cases. Fixes: fcb1aa5163b1 ("openvswitch: switch to per-action label counting in conntrack") Reported-by: Jianbo Liu Signed-off-by: Xin Long Acked-by: Aaron Conole Link: https://patch.msgid.link/1bdeb2f3a812bca016a225d3de714427b2cd4772.1741457143.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- net/openvswitch/conntrack.c | 30 ++++++++++++++++++------------ net/openvswitch/datapath.h | 3 +++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3bb4810234aac..e573e92213029 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1368,8 +1368,11 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) attr == OVS_KEY_ATTR_CT_MARK) return true; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - attr == OVS_KEY_ATTR_CT_LABELS) - return true; + attr == OVS_KEY_ATTR_CT_LABELS) { + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + return ovs_net->xt_label; + } return false; } @@ -1378,7 +1381,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) { - unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_conntrack_info ct_info; const char *helper = NULL; u16 family; @@ -1407,12 +1409,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, return -ENOMEM; } - if (nf_connlabels_get(net, n_bits - 1)) { - nf_ct_tmpl_free(ct_info.ct); - OVS_NLERR(log, "Failed to set connlabel length"); - return -EOPNOTSUPP; - } - if (ct_info.timeout[0]) { if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, ct_info.timeout)) @@ -1581,7 +1577,6 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) if (ct_info->ct) { if (ct_info->timeout[0]) nf_ct_destroy_timeout(ct_info->ct); - nf_connlabels_put(nf_ct_net(ct_info->ct)); nf_ct_tmpl_free(ct_info->ct); } } @@ -2006,9 +2001,17 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { int ovs_ct_init(struct net *net) { -#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) + unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + if (nf_connlabels_get(net, n_bits - 1)) { + ovs_net->xt_label = false; + OVS_NLERR(true, "Failed to set connlabel length"); + } else { + ovs_net->xt_label = true; + } + +#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) return ovs_ct_limit_init(net, ovs_net); #else return 0; @@ -2017,9 +2020,12 @@ int ovs_ct_init(struct net *net) void ovs_ct_exit(struct net *net) { -#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_net *ovs_net = net_generic(net, ovs_net_id); +#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) ovs_ct_limit_exit(net, ovs_net); #endif + + if (ovs_net->xt_label) + nf_connlabels_put(net); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 365b9bb7f546e..9ca6231ea6470 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -160,6 +160,9 @@ struct ovs_net { #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_ct_limit_info *ct_limit_info; #endif + + /* Module reference for configuring conntrack. */ + bool xt_label; }; /** -- GitLab From 03ebae19925519cca5b314443c6082e0aeaa6321 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 11 Mar 2025 00:01:39 +0200 Subject: [PATCH 1512/1585] net/mlx5: DR, use the right action structs for STEv3 Some actions in ConnectX-8 (STEv3) have different structure, and they are handled separately in ste_ctx_v3. This separate handling was missing two actions: INSERT_HDR and REMOVE_HDR, which broke SWS for Linux Bridge. This patch resolves the issue by introducing dedicated callbacks for the insert and remove header functions, with version-specific implementations for each STE variant. Fixes: 4d617b57574f ("net/mlx5: DR, add support for ConnectX-8 steering") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1741644104-97767-2-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../mellanox/mlx5/core/steering/sws/dr_ste.h | 4 ++ .../mlx5/core/steering/sws/dr_ste_v1.c | 52 ++++++++++--------- .../mlx5/core/steering/sws/dr_ste_v1.h | 4 ++ .../mlx5/core/steering/sws/dr_ste_v2.c | 2 + .../mlx5/core/steering/sws/dr_ste_v3.c | 42 +++++++++++++++ 5 files changed, 79 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h index 5f409dc30aca8..3d5afc832fa56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h @@ -210,6 +210,10 @@ struct mlx5dr_ste_ctx { void (*set_encap_l3)(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, u32 reformat_id, int size); + void (*set_insert_hdr)(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, + u8 anchor, u8 offset, int size); + void (*set_remove_hdr)(u8 *hw_ste_p, u8 *s_action, u8 anchor, + u8 offset, int size); /* Send */ void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c index 7f83d77c43ef0..6447efbae00dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c @@ -266,10 +266,10 @@ void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, int size) dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, - u32 reformat_id, - u8 anchor, u8 offset, - int size) +void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, + u8 anchor, u8 offset, + int size) { MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); @@ -286,9 +286,9 @@ static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, - u8 anchor, u8 offset, - int size) +void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, + int size) { MLX5_SET(ste_single_action_remove_header_size_v1, s_action, action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); @@ -584,11 +584,11 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_insert_hdr(last_ste, action, - attr->reformat.id, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) { @@ -597,10 +597,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_remove_hdr(last_ste, action, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; } @@ -792,11 +792,11 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_insert_hdr(last_ste, action, - attr->reformat.id, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; allow_modify_hdr = false; @@ -808,10 +808,10 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, allow_modify_hdr = true; allow_ctr = true; } - dr_ste_v1_set_remove_hdr(last_ste, action, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; } @@ -2200,6 +2200,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = { .set_pop_vlan = &dr_ste_v1_set_pop_vlan, .set_rx_decap = &dr_ste_v1_set_rx_decap, .set_encap_l3 = &dr_ste_v1_set_encap_l3, + .set_insert_hdr = &dr_ste_v1_set_insert_hdr, + .set_remove_hdr = &dr_ste_v1_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h index a8d9e308d3392..591c20c95a6ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h @@ -156,6 +156,10 @@ void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num); void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, u32 reformat_id, int size); void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action); +void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, + u8 anchor, u8 offset, int size); +void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, u8 anchor, + u8 offset, int size); void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c index 0882dba0f64b9..d0ebaf820d42b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c @@ -69,6 +69,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = { .set_pop_vlan = &dr_ste_v1_set_pop_vlan, .set_rx_decap = &dr_ste_v1_set_rx_decap, .set_encap_l3 = &dr_ste_v1_set_encap_l3, + .set_insert_hdr = &dr_ste_v1_set_insert_hdr, + .set_remove_hdr = &dr_ste_v1_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c index cc60ce1d274ef..e468a9ae44e8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c @@ -79,6 +79,46 @@ static void dr_ste_v3_set_rx_decap(u8 *hw_ste_p, u8 *s_action) dr_ste_v1_set_reparse(hw_ste_p); } +static void dr_ste_v3_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, u8 anchor, + u8 offset, int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + start_offset, offset / 2); + + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + attributes, DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, int size) +{ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + remove_size, size / 2); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_offset, offset / 2); + + dr_ste_v1_set_reparse(hw_ste_p); +} + static int dr_ste_v3_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, u32 hw_action_sz, @@ -211,6 +251,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v3 = { .set_pop_vlan = &dr_ste_v3_set_pop_vlan, .set_rx_decap = &dr_ste_v3_set_rx_decap, .set_encap_l3 = &dr_ste_v3_set_encap_l3, + .set_insert_hdr = &dr_ste_v3_set_insert_hdr, + .set_remove_hdr = &dr_ste_v3_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; -- GitLab From 521992337f67f71ce4436b98bc32563ddb1a5ce3 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Tue, 11 Mar 2025 00:01:40 +0200 Subject: [PATCH 1513/1585] net/mlx5: HWS, Rightsize bwc matcher priority The bwc layer was clamping the matcher priority from 32 bits to 16 bits. This didn't show up until a matcher was resized, since the initial native matcher was created using the correct 32 bit value. The fix also reorders fields to avoid some padding. Fixes: 2111bb970c78 ("net/mlx5: HWS, added backward-compatible API handling") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1741644104-97767-3-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index f9f569131ddeb..47f7ed1415535 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -24,8 +24,8 @@ struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM]; + u32 priority; u8 num_of_at; - u16 priority; u8 size_log; atomic_t num_of_rules; struct list_head *rules; -- GitLab From 32d2724db5b2361ab293427ccd5c24f4f2bcca14 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 11 Mar 2025 00:01:41 +0200 Subject: [PATCH 1514/1585] net/mlx5: Fix incorrect IRQ pool usage when releasing IRQs mlx5_irq_pool_get() is a getter for completion IRQ pool only. However, after the cited commit, mlx5_irq_pool_get() is called during ctrl IRQ release flow to retrieve the pool, resulting in the use of an incorrect IRQ pool. Hence, use the newly introduced mlx5_irq_get_pool() getter to retrieve the correct IRQ pool based on the IRQ itself. While at it, rename mlx5_irq_pool_get() to mlx5_irq_table_get_comp_irq_pool() which accurately reflects its purpose and improves code readability. Fixes: 0477d5168bbb ("net/mlx5: Expose SFs IRQs") Signed-off-by: Shay Drory Reviewed-by: Maher Sanalla Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-4-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- .../net/ethernet/mellanox/mlx5/core/irq_affinity.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 13 ++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h | 2 +- 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 2b229b6226c6a..dfb079e59d858 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -871,8 +871,8 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { + struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev); struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 1477db7f5307e..2691d88cdee1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -175,7 +175,7 @@ mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool, void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) { - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + struct mlx5_irq_pool *pool = mlx5_irq_get_pool(irq); int cpu; cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index 0881e961d8b17..586688da9940e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -10,12 +10,15 @@ struct mlx5_irq; struct cpu_rmap; +struct mlx5_irq_pool; int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_irq_table_create(struct mlx5_core_dev *dev); void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev); +struct mlx5_irq_pool * +mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev); int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table); int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table); struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); @@ -38,7 +41,6 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); int mlx5_irq_get_index(struct mlx5_irq *irq); int mlx5_irq_get_irq(const struct mlx5_irq *irq); -struct mlx5_irq_pool; #ifdef CONFIG_MLX5_SF struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, struct cpumask *used_cpus, u16 vecidx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index d9362eabc6a1c..2c5f850c31f68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -378,6 +378,11 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) return irq->map.index; } +struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq) +{ + return irq->pool; +} + /* irq_pool API */ /* requesting an irq from a given pool according to given index */ @@ -405,18 +410,20 @@ static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_tab return irq_table->sf_ctrl_pool; } -static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) +static struct mlx5_irq_pool * +sf_comp_irq_pool_get(struct mlx5_irq_table *irq_table) { return irq_table->sf_comp_pool; } -struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) +struct mlx5_irq_pool * +mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev) { struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = NULL; if (mlx5_core_is_sf(dev)) - pool = sf_irq_pool_get(irq_table); + pool = sf_comp_irq_pool_get(irq_table); /* In some configs, there won't be a pool of SFs IRQs. Hence, returning * the PF IRQs pool in case the SF pool doesn't exist. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h index c4d377f8df308..cc064425fe160 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -28,7 +28,6 @@ struct mlx5_irq_pool { struct mlx5_core_dev *dev; }; -struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) { return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); @@ -40,5 +39,6 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, int mlx5_irq_get_locked(struct mlx5_irq *irq); int mlx5_irq_read_locked(struct mlx5_irq *irq); int mlx5_irq_put(struct mlx5_irq *irq); +struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq); #endif /* __PCI_IRQ_H__ */ -- GitLab From 32966984bee1defd9f5a8f9be274d7c32f911ba1 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 11 Mar 2025 00:01:42 +0200 Subject: [PATCH 1515/1585] net/mlx5: Lag, Check shared fdb before creating MultiPort E-Switch Currently, MultiPort E-Switch is requesting to create a LAG with shared FDB without checking the LAG is supporting shared FDB. Add the check. Fixes: a32327a3a02c ("net/mlx5: Lag, Control MultiPort E-Switch single FDB mode") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-5-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index cea5aa314f6c5..ed2ba272946b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -951,7 +951,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); } -static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) { int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev; @@ -1038,7 +1038,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) } if (do_bond && !__mlx5_lag_is_active(ldev)) { - bool shared_fdb = mlx5_shared_fdb_supported(ldev); + bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev); roce_lag = mlx5_lag_is_roce_lag(ldev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 01cf723669471..c2f256bb2bc20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -92,6 +92,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev) return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); } +bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev); bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index ffac0bd6c8952..1770297a112e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -83,7 +83,8 @@ static int enable_mpesw(struct mlx5_lag *ldev) if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || - !mlx5_lag_check_prereq(ldev)) + !mlx5_lag_check_prereq(ldev) || + !mlx5_lag_shared_fdb_supported(ldev)) return -EOPNOTSUPP; err = mlx5_mpesw_metadata_set(ldev); -- GitLab From 4b8eeed4fb105770ce6dc84a2c6ef953c7b71cbb Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 11 Mar 2025 00:01:43 +0200 Subject: [PATCH 1516/1585] net/mlx5: Bridge, fix the crash caused by LAG state check When removing LAG device from bridge, NETDEV_CHANGEUPPER event is triggered. Driver finds the lower devices (PFs) to flush all the offloaded entries. And mlx5_lag_is_shared_fdb is checked, it returns false if one of PF is unloaded. In such case, mlx5_esw_bridge_lag_rep_get() and its caller return NULL, instead of the alive PF, and the flush is skipped. Besides, the bridge fdb entry's lastuse is updated in mlx5 bridge event handler. But this SWITCHDEV_FDB_ADD_TO_BRIDGE event can be ignored in this case because the upper interface for bond is deleted, and the entry will never be aged because lastuse is never updated. To make things worse, as the entry is alive, mlx5 bridge workqueue keeps sending that event, which is then handled by kernel bridge notifier. It causes the following crash when accessing the passed bond netdev which is already destroyed. To fix this issue, remove such checks. LAG state is already checked in commit 15f8f168952f ("net/mlx5: Bridge, verify LAG state when adding bond to bridge"), driver still need to skip offload if LAG becomes invalid state after initialization. Oops: stack segment: 0000 [#1] SMP CPU: 3 UID: 0 PID: 23695 Comm: kworker/u40:3 Tainted: G OE 6.11.0_mlnx #1 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_bridge_wq mlx5_esw_bridge_update_work [mlx5_core] RIP: 0010:br_switchdev_event+0x2c/0x110 [bridge] Code: 44 00 00 48 8b 02 48 f7 00 00 02 00 00 74 69 41 54 55 53 48 83 ec 08 48 8b a8 08 01 00 00 48 85 ed 74 4a 48 83 fe 02 48 89 d3 <4c> 8b 65 00 74 23 76 49 48 83 fe 05 74 7e 48 83 fe 06 75 2f 0f b7 RSP: 0018:ffffc900092cfda0 EFLAGS: 00010297 RAX: ffff888123bfe000 RBX: ffffc900092cfe08 RCX: 00000000ffffffff RDX: ffffc900092cfe08 RSI: 0000000000000001 RDI: ffffffffa0c585f0 RBP: 6669746f6e690a30 R08: 0000000000000000 R09: ffff888123ae92c8 R10: 0000000000000000 R11: fefefefefefefeff R12: ffff888123ae9c60 R13: 0000000000000001 R14: ffffc900092cfe08 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88852c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f15914c8734 CR3: 0000000002830005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __die_body+0x1a/0x60 ? die+0x38/0x60 ? do_trap+0x10b/0x120 ? do_error_trap+0x64/0xa0 ? exc_stack_segment+0x33/0x50 ? asm_exc_stack_segment+0x22/0x30 ? br_switchdev_event+0x2c/0x110 [bridge] ? sched_balance_newidle.isra.149+0x248/0x390 notifier_call_chain+0x4b/0xa0 atomic_notifier_call_chain+0x16/0x20 mlx5_esw_bridge_update+0xec/0x170 [mlx5_core] mlx5_esw_bridge_update_work+0x19/0x40 [mlx5_core] process_scheduled_works+0x81/0x390 worker_thread+0x106/0x250 ? bh_worker+0x110/0x110 kthread+0xb7/0xe0 ? kthread_park+0x80/0x80 ret_from_fork+0x2d/0x50 ? kthread_park+0x80/0x80 ret_from_fork_asm+0x11/0x20 Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG") Signed-off-by: Jianbo Liu Reviewed-by: Vlad Buslov Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-6-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 5d128c5b4529a..0f5d7ea8956f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -48,15 +48,10 @@ mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) struct list_head *iter; netdev_for_each_lower_dev(dev, lower, iter) { - struct mlx5_core_dev *mdev; - struct mlx5e_priv *priv; - if (!mlx5e_eswitch_rep(lower)) continue; - priv = netdev_priv(lower); - mdev = priv->mdev; - if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) + if (mlx5_esw_bridge_dev_same_esw(lower, esw)) return lower; } @@ -125,7 +120,7 @@ static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device * priv = netdev_priv(rep); mdev = priv->mdev; if (netif_is_lag_master(dev)) - return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); + return mlx5_lag_is_master(mdev); return true; } @@ -455,6 +450,9 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, if (!rep) return NOTIFY_DONE; + if (netif_is_lag_master(dev) && !mlx5_lag_is_shared_fdb(esw->dev)) + return NOTIFY_DONE; + switch (event) { case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = container_of(info, -- GitLab From e92df790d07a8eea873efcb84776e7b71f81c7d5 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 11 Mar 2025 00:01:44 +0200 Subject: [PATCH 1517/1585] net/mlx5e: Prevent bridge link show failure for non-eswitch-allowed devices mlx5_eswitch_get_vepa returns -EPERM if the device lacks eswitch_manager capability, blocking mlx5e_bridge_getlink from retrieving VEPA mode. Since mlx5e_bridge_getlink implements ndo_bridge_getlink, returning -EPERM causes bridge link show to fail instead of skipping devices without this capability. To avoid this, return -EOPNOTSUPP from mlx5e_bridge_getlink when mlx5_eswitch_get_vepa fails, ensuring the command continues processing other devices while ignoring those without the necessary capability. Fixes: 4b89251de024 ("net/mlx5: Support ndo bridge_setlink and getlink") Signed-off-by: Carolina Jubran Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-7-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a814b63ed97e5..8fcaee381b0e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5132,11 +5132,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; u8 mode, setting; - int err; - err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); - if (err) - return err; + if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) + return -EOPNOTSUPP; mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, -- GitLab From 3e64bb2ae7d9f2b3a8259d4d6b86ed1984d5460a Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Tue, 11 Mar 2025 03:17:40 -0700 Subject: [PATCH 1518/1585] net: mana: cleanup mana struct after debugfs_remove() When on a MANA VM hibernation is triggered, as part of hibernate_snapshot(), mana_gd_suspend() and mana_gd_resume() are called. If during this mana_gd_resume(), a failure occurs with HWC creation, mana_port_debugfs pointer does not get reinitialized and ends up pointing to older, cleaned-up dentry. Further in the hibernation path, as part of power_down(), mana_gd_shutdown() is triggered. This call, unaware of the failures in resume, tries to cleanup the already cleaned up mana_port_debugfs value and hits the following bug: [ 191.359296] mana 7870:00:00.0: Shutdown was called [ 191.359918] BUG: kernel NULL pointer dereference, address: 0000000000000098 [ 191.360584] #PF: supervisor write access in kernel mode [ 191.361125] #PF: error_code(0x0002) - not-present page [ 191.361727] PGD 1080ea067 P4D 0 [ 191.362172] Oops: Oops: 0002 [#1] SMP NOPTI [ 191.362606] CPU: 11 UID: 0 PID: 1674 Comm: bash Not tainted 6.14.0-rc5+ #2 [ 191.363292] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 11/21/2024 [ 191.364124] RIP: 0010:down_write+0x19/0x50 [ 191.364537] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb e8 de cd ff ff 31 c0 ba 01 00 00 00 48 0f b1 13 75 16 65 48 8b 05 88 24 4c 6a 48 89 43 08 48 8b 5d [ 191.365867] RSP: 0000:ff45fbe0c1c037b8 EFLAGS: 00010246 [ 191.366350] RAX: 0000000000000000 RBX: 0000000000000098 RCX: ffffff8100000000 [ 191.366951] RDX: 0000000000000001 RSI: 0000000000000064 RDI: 0000000000000098 [ 191.367600] RBP: ff45fbe0c1c037c0 R08: 0000000000000000 R09: 0000000000000001 [ 191.368225] R10: ff45fbe0d2b01000 R11: 0000000000000008 R12: 0000000000000000 [ 191.368874] R13: 000000000000000b R14: ff43dc27509d67c0 R15: 0000000000000020 [ 191.369549] FS: 00007dbc5001e740(0000) GS:ff43dc663f380000(0000) knlGS:0000000000000000 [ 191.370213] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 191.370830] CR2: 0000000000000098 CR3: 0000000168e8e002 CR4: 0000000000b73ef0 [ 191.371557] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 191.372192] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 191.372906] Call Trace: [ 191.373262] [ 191.373621] ? show_regs+0x64/0x70 [ 191.374040] ? __die+0x24/0x70 [ 191.374468] ? page_fault_oops+0x290/0x5b0 [ 191.374875] ? do_user_addr_fault+0x448/0x800 [ 191.375357] ? exc_page_fault+0x7a/0x160 [ 191.375971] ? asm_exc_page_fault+0x27/0x30 [ 191.376416] ? down_write+0x19/0x50 [ 191.376832] ? down_write+0x12/0x50 [ 191.377232] simple_recursive_removal+0x4a/0x2a0 [ 191.377679] ? __pfx_remove_one+0x10/0x10 [ 191.378088] debugfs_remove+0x44/0x70 [ 191.378530] mana_detach+0x17c/0x4f0 [ 191.378950] ? __flush_work+0x1e2/0x3b0 [ 191.379362] ? __cond_resched+0x1a/0x50 [ 191.379787] mana_remove+0xf2/0x1a0 [ 191.380193] mana_gd_shutdown+0x3b/0x70 [ 191.380642] pci_device_shutdown+0x3a/0x80 [ 191.381063] device_shutdown+0x13e/0x230 [ 191.381480] kernel_power_off+0x35/0x80 [ 191.381890] hibernate+0x3c6/0x470 [ 191.382312] state_store+0xcb/0xd0 [ 191.382734] kobj_attr_store+0x12/0x30 [ 191.383211] sysfs_kf_write+0x3e/0x50 [ 191.383640] kernfs_fop_write_iter+0x140/0x1d0 [ 191.384106] vfs_write+0x271/0x440 [ 191.384521] ksys_write+0x72/0xf0 [ 191.384924] __x64_sys_write+0x19/0x20 [ 191.385313] x64_sys_call+0x2b0/0x20b0 [ 191.385736] do_syscall_64+0x79/0x150 [ 191.386146] ? __mod_memcg_lruvec_state+0xe7/0x240 [ 191.386676] ? __lruvec_stat_mod_folio+0x79/0xb0 [ 191.387124] ? __pfx_lru_add+0x10/0x10 [ 191.387515] ? queued_spin_unlock+0x9/0x10 [ 191.387937] ? do_anonymous_page+0x33c/0xa00 [ 191.388374] ? __handle_mm_fault+0xcf3/0x1210 [ 191.388805] ? __count_memcg_events+0xbe/0x180 [ 191.389235] ? handle_mm_fault+0xae/0x300 [ 191.389588] ? do_user_addr_fault+0x559/0x800 [ 191.390027] ? irqentry_exit_to_user_mode+0x43/0x230 [ 191.390525] ? irqentry_exit+0x1d/0x30 [ 191.390879] ? exc_page_fault+0x86/0x160 [ 191.391235] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 191.391745] RIP: 0033:0x7dbc4ff1c574 [ 191.392111] Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 [ 191.393412] RSP: 002b:00007ffd95a23ab8 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 [ 191.393990] RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007dbc4ff1c574 [ 191.394594] RDX: 0000000000000005 RSI: 00005a6eeadb0ce0 RDI: 0000000000000001 [ 191.395215] RBP: 00007ffd95a23ae0 R08: 00007dbc50003b20 R09: 0000000000000000 [ 191.395805] R10: 0000000000000001 R11: 0000000000000202 R12: 0000000000000005 [ 191.396404] R13: 00005a6eeadb0ce0 R14: 00007dbc500045c0 R15: 00007dbc50001ee0 [ 191.396987] To fix this, we explicitly set such mana debugfs variables to NULL after debugfs_remove() is called. Fixes: 6607c17c6c5e ("net: mana: Enable debugfs files for MANA device") Cc: stable@vger.kernel.org Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Michal Kubiak Link: https://patch.msgid.link/1741688260-28922-1-git-send-email-shradhagupta@linux.microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 11 ++++++++++- drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index be95336ce089a..11457b6296cc0 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1547,6 +1547,7 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * adapter-MTU file and apc->mana_pci_debugfs folder. */ debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; pci_iounmap(pdev, bar0_va); free_gc: pci_set_drvdata(pdev, NULL); @@ -1569,6 +1570,8 @@ static void mana_gd_remove(struct pci_dev *pdev) debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; + pci_iounmap(pdev, gc->bar0_va); vfree(gc); @@ -1622,6 +1625,8 @@ static void mana_gd_shutdown(struct pci_dev *pdev) debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; + pci_disable_device(pdev); } @@ -1648,8 +1653,10 @@ static int __init mana_driver_init(void) mana_debugfs_root = debugfs_create_dir("mana", NULL); err = pci_register_driver(&mana_driver); - if (err) + if (err) { debugfs_remove(mana_debugfs_root); + mana_debugfs_root = NULL; + } return err; } @@ -1659,6 +1666,8 @@ static void __exit mana_driver_exit(void) pci_unregister_driver(&mana_driver); debugfs_remove(mana_debugfs_root); + + mana_debugfs_root = NULL; } module_init(mana_driver_init); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index aa1e47233fe50..ae76ecc7a5d36 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -738,12 +738,11 @@ static const struct net_device_ops mana_devops = { static void mana_cleanup_port_context(struct mana_port_context *apc) { /* - * at this point all dir/files under the vport directory - * are already cleaned up. - * We are sure the apc->mana_port_debugfs remove will not - * cause any freed memory access issues + * make sure subsequent cleanup attempts don't end up removing already + * cleaned dentry pointer */ debugfs_remove(apc->mana_port_debugfs); + apc->mana_port_debugfs = NULL; kfree(apc->rxqs); apc->rxqs = NULL; } @@ -1254,6 +1253,7 @@ static void mana_destroy_eq(struct mana_context *ac) return; debugfs_remove_recursive(ac->mana_eqs_debugfs); + ac->mana_eqs_debugfs = NULL; for (i = 0; i < gc->max_num_queues; i++) { eq = ac->eqs[i].eq; @@ -1914,6 +1914,7 @@ static void mana_destroy_txq(struct mana_port_context *apc) for (i = 0; i < apc->num_queues; i++) { debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs); + apc->tx_qp[i].mana_tx_debugfs = NULL; napi = &apc->tx_qp[i].tx_cq.napi; if (apc->tx_qp[i].txq.napi_initialized) { @@ -2099,6 +2100,7 @@ static void mana_destroy_rxq(struct mana_port_context *apc, return; debugfs_remove_recursive(rxq->mana_rx_debugfs); + rxq->mana_rx_debugfs = NULL; napi = &rxq->rx_cq.napi; -- GitLab From 4490fe973669360efaef7350aeb9706f70164176 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 5 Mar 2025 21:44:02 -0600 Subject: [PATCH 1519/1585] platform/x86/amd: pmf: Fix missing hidden options for Smart PC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amd_pmf_get_slider_info() checks the current profile to report correct value to the TA inputs. If hidden options are in use then the wrong values will be reported to TA. Add the two compat options PLATFORM_PROFILE_BALANCED_PERFORMANCE and PLATFORM_PROFILE_QUIET for this use. Reported-by: Yijun Shen Fixes: 9a43102daf64d ("platform/x86/amd: pmf: Add balanced-performance to hidden choices") Fixes: 44e94fece5170 ("platform/x86/amd: pmf: Add 'quiet' to hidden choices") Signed-off-by: Mario Limonciello Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250306034402.50478-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/spc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index f34f3130c3307..1d90f9382024b 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -219,12 +219,14 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ switch (dev->current_profile) { case PLATFORM_PROFILE_PERFORMANCE: + case PLATFORM_PROFILE_BALANCED_PERFORMANCE: val = TA_BEST_PERFORMANCE; break; case PLATFORM_PROFILE_BALANCED: val = TA_BETTER_PERFORMANCE; break; case PLATFORM_PROFILE_LOW_POWER: + case PLATFORM_PROFILE_QUIET: val = TA_BEST_BATTERY; break; default: -- GitLab From fc99045effa81fdf509c2a97cbb7e6e8f2fd4443 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 12 Mar 2025 10:51:31 -0300 Subject: [PATCH 1520/1585] smb: client: fix regression with guest option When mounting a CIFS share with 'guest' mount option, mount.cifs(8) will set empty password= and password2= options. Currently we only handle empty strings from user= and password= options, so the mount will fail with cifs: Bad value for 'password2' Fix this by handling empty string from password2= option as well. Link: https://bbs.archlinux.org/viewtopic.php?id=303927 Reported-by: Adam Williamson Closes: https://lore.kernel.org/r/83c00b5fea81c07f6897a5dd3ef50fd3b290f56c.camel@redhat.com Fixes: 35f834265e0d ("smb3: fix broken reconnect when password changing on the server by allowing password rotation") Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index e9b286d9a7ba3..457452b4d42da 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -171,6 +171,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_string("username", Opt_user), fsparam_string("pass", Opt_pass), fsparam_string("password", Opt_pass), + fsparam_string("pass2", Opt_pass2), fsparam_string("password2", Opt_pass2), fsparam_string("ip", Opt_ip), fsparam_string("addr", Opt_ip), @@ -1131,6 +1132,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, } else if (!strcmp("user", param->key) || !strcmp("username", param->key)) { skip_parsing = true; opt = Opt_user; + } else if (!strcmp("pass2", param->key) || !strcmp("password2", param->key)) { + skip_parsing = true; + opt = Opt_pass2; } } -- GitLab From 7489161b1852390b4413d57f2457cd40b34da6cc Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Tue, 11 Mar 2025 17:22:03 +0300 Subject: [PATCH 1521/1585] cifs: Fix integer overflow while processing acregmax mount option User-provided mount parameter acregmax of type u32 is intended to have an upper limit, but before it is validated, the value is converted from seconds to jiffies which can lead to an integer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 5780464614f6 ("cifs: Add new parameter "acregmax" for distinct file and directory metadata timeout") Signed-off-by: Murad Masimov Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 457452b4d42da..5f3c5967a0f5a 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1344,11 +1344,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, } break; case Opt_acregmax: - ctx->acregmax = HZ * result.uint_32; - if (ctx->acregmax > CIFS_MAX_ACTIMEO) { + if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) { cifs_errorf(fc, "acregmax too large\n"); goto cifs_parse_mount_err; } + ctx->acregmax = HZ * result.uint_32; break; case Opt_acdirmax: ctx->acdirmax = HZ * result.uint_32; -- GitLab From 5b29891f91dfb8758baf1e2217bef4b16b2b165b Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Tue, 11 Mar 2025 17:22:04 +0300 Subject: [PATCH 1522/1585] cifs: Fix integer overflow while processing acdirmax mount option User-provided mount parameter acdirmax of type u32 is intended to have an upper limit, but before it is validated, the value is converted from seconds to jiffies which can lead to an integer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 4c9f948142a5 ("cifs: Add new mount parameter "acdirmax" to allow caching directory metadata") Signed-off-by: Murad Masimov Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 5f3c5967a0f5a..c983021c406bb 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1351,11 +1351,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->acregmax = HZ * result.uint_32; break; case Opt_acdirmax: - ctx->acdirmax = HZ * result.uint_32; - if (ctx->acdirmax > CIFS_MAX_ACTIMEO) { + if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) { cifs_errorf(fc, "acdirmax too large\n"); goto cifs_parse_mount_err; } + ctx->acdirmax = HZ * result.uint_32; break; case Opt_actimeo: if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) { -- GitLab From 64f690ee22c99e16084e0e45181b2a1eed2fa149 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Tue, 11 Mar 2025 17:22:05 +0300 Subject: [PATCH 1523/1585] cifs: Fix integer overflow while processing actimeo mount option User-provided mount parameter actimeo of type u32 is intended to have an upper limit, but before it is validated, the value is converted from seconds to jiffies which can lead to an integer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 6d20e8406f09 ("cifs: add attribute cache timeout (actimeo) tunable") Signed-off-by: Murad Masimov Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index c983021c406bb..85b062e7f48db 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1358,7 +1358,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->acdirmax = HZ * result.uint_32; break; case Opt_actimeo: - if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) { + if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) { cifs_errorf(fc, "timeout too large\n"); goto cifs_parse_mount_err; } -- GitLab From d5a30fddfe2f2e540f6c43b59cf701809995faef Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Tue, 11 Mar 2025 17:22:06 +0300 Subject: [PATCH 1524/1585] cifs: Fix integer overflow while processing closetimeo mount option User-provided mount parameter closetimeo of type u32 is intended to have an upper limit, but before it is validated, the value is converted from seconds to jiffies which can lead to an integer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 5efdd9122eff ("smb3: allow deferred close timeout to be configurable") Signed-off-by: Murad Masimov Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 85b062e7f48db..8c73d4d60d1a7 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1370,11 +1370,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->acdirmax = ctx->acregmax = HZ * result.uint_32; break; case Opt_closetimeo: - ctx->closetimeo = HZ * result.uint_32; - if (ctx->closetimeo > SMB3_MAX_DCLOSETIMEO) { + if (result.uint_32 > SMB3_MAX_DCLOSETIMEO / HZ) { cifs_errorf(fc, "closetimeo too large\n"); goto cifs_parse_mount_err; } + ctx->closetimeo = HZ * result.uint_32; break; case Opt_echo_interval: ctx->echo_interval = result.uint_32; -- GitLab From 605b249ea96770ac4fac4b8510a99e0f8442be5e Mon Sep 17 00:00:00 2001 From: Henrique Carvalho Date: Tue, 11 Mar 2025 15:23:59 -0300 Subject: [PATCH 1525/1585] smb: client: Fix match_session bug preventing session reuse Fix a bug in match_session() that can causes the session to not be reused in some cases. Reproduction steps: mount.cifs //server/share /mnt/a -o credentials=creds mount.cifs //server/share /mnt/b -o credentials=creds,sec=ntlmssp cat /proc/fs/cifs/DebugData | grep SessionId | wc -l mount.cifs //server/share /mnt/b -o credentials=creds,sec=ntlmssp mount.cifs //server/share /mnt/a -o credentials=creds cat /proc/fs/cifs/DebugData | grep SessionId | wc -l Cc: stable@vger.kernel.org Reviewed-by: Enzo Matsumiya Signed-off-by: Henrique Carvalho Signed-off-by: Steve French --- fs/smb/client/connect.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index f917de020dd5d..73f93a35eeddb 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1825,9 +1825,8 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx, bool match_super) { - if (ctx->sectype != Unspecified && - ctx->sectype != ses->sectype) - return 0; + struct TCP_Server_Info *server = ses->server; + enum securityEnum ctx_sec, ses_sec; if (!match_super && ctx->dfs_root_ses != ses->dfs_root_ses) return 0; @@ -1839,11 +1838,20 @@ static int match_session(struct cifs_ses *ses, if (ses->chan_max < ctx->max_channels) return 0; - switch (ses->sectype) { + ctx_sec = server->ops->select_sectype(server, ctx->sectype); + ses_sec = server->ops->select_sectype(server, ses->sectype); + + if (ctx_sec != ses_sec) + return 0; + + switch (ctx_sec) { + case IAKerb: case Kerberos: if (!uid_eq(ctx->cred_uid, ses->cred_uid)) return 0; break; + case NTLMv2: + case RawNTLMSSP: default: /* NULL username means anonymous session */ if (ses->user_name == NULL) { -- GitLab From bc71aab4513f7fecd23c051703ef5adea2230a54 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:48 +0100 Subject: [PATCH 1526/1585] Revert "ext4: add pre-content fsnotify hook for DAX faults" This reverts commit bb480760ffc7018e21ee6f60241c2b99ff26ee0e. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-3-amir73il@gmail.com --- fs/ext4/file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a5205149adba3..3bd96c3d4cd0c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -756,9 +756,6 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order) return VM_FAULT_SIGBUS; } } else { - result = filemap_fsnotify_fault(vmf); - if (unlikely(result)) - return result; filemap_invalidate_lock_shared(mapping); } result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops); -- GitLab From 27773ce1776279ed3220a34d2e6bfcecaee7fc66 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:49 +0100 Subject: [PATCH 1527/1585] Revert "xfs: add pre-content fsnotify hook for DAX faults" This reverts commit 7f4796a46571ced5d3d5b0942e1bfea1eedaaecd. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-4-amir73il@gmail.com --- fs/xfs/xfs_file.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f7a7d89c345ec..9a435b1ff2647 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1451,9 +1451,6 @@ xfs_dax_read_fault( trace_xfs_read_fault(ip, order); - ret = filemap_fsnotify_fault(vmf); - if (unlikely(ret)) - return ret; xfs_ilock(ip, XFS_MMAPLOCK_SHARED); ret = xfs_dax_fault_locked(vmf, order, false); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); @@ -1482,16 +1479,6 @@ xfs_write_fault( vm_fault_t ret; trace_xfs_write_fault(ip, order); - /* - * Usually we get here from ->page_mkwrite callback but in case of DAX - * we will get here also for ordinary write fault. Handle HSM - * notifications for that case. - */ - if (IS_DAX(inode)) { - ret = filemap_fsnotify_fault(vmf); - if (unlikely(ret)) - return ret; - } sb_start_pagefault(inode->i_sb); file_update_time(vmf->vma->vm_file); -- GitLab From 955fbe0ef19df4197595a98d0906c94025c4beef Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:50 +0100 Subject: [PATCH 1528/1585] Revert "fsnotify: generate pre-content permission event on page fault" This reverts commit 8392bc2ff8c8bf7c4c5e6dfa71ccd893a3c046f6. In the use case of buffered write whose input buffer is mmapped file on a filesystem with a pre-content mark, the prefaulting of the buffer can happen under the filesystem freeze protection (obtained in vfs_write()) which breaks assumptions of pre-content hook and introduces potential deadlock of HSM handler in userspace with filesystem freezing. Now that we have pre-content hooks at file mmap() time, disable the pre-content event hooks on page fault to avoid the potential deadlock. Reported-by: syzbot+7229071b47908b19d5b7@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-fsdevel/7ehxrhbvehlrjwvrduoxsao5k3x4aw275patsb3krkwuq573yv@o2hskrfawbnc/ Fixes: 8392bc2ff8c8 ("fsnotify: generate pre-content permission event on page fault") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-5-amir73il@gmail.com --- include/linux/mm.h | 1 - mm/filemap.c | 74 ---------------------------------------------- mm/nommu.c | 7 ----- 3 files changed, 82 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b1068ddcbb70..8483e09aeb2cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3420,7 +3420,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf); extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); -extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf); extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ diff --git a/mm/filemap.c b/mm/filemap.c index 2974691fdfad2..ff5fcdd961364 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include "internal.h" @@ -3336,48 +3335,6 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf) return ret; } -/** - * filemap_fsnotify_fault - maybe emit a pre-content event. - * @vmf: struct vm_fault containing details of the fault. - * - * If we have a pre-content watch on this file we will emit an event for this - * range. If we return anything the fault caller should return immediately, we - * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the - * fault again and then the fault handler will run the second time through. - * - * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened. - */ -vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) -{ - struct file *fpin = NULL; - int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS; - loff_t pos = vmf->pgoff >> PAGE_SHIFT; - size_t count = PAGE_SIZE; - int err; - - /* - * We already did this and now we're retrying with everything locked, - * don't emit the event and continue. - */ - if (vmf->flags & FAULT_FLAG_TRIED) - return 0; - - /* No watches, we're done. */ - if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode))) - return 0; - - fpin = maybe_unlock_mmap_for_io(vmf, fpin); - if (!fpin) - return VM_FAULT_SIGBUS; - - err = fsnotify_file_area_perm(fpin, mask, &pos, count); - fput(fpin); - if (err) - return VM_FAULT_SIGBUS; - return VM_FAULT_RETRY; -} -EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); - /** * filemap_fault - read in file data for page fault handling * @vmf: struct vm_fault containing details of the fault @@ -3481,37 +3438,6 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) * or because readahead was otherwise unable to retrieve it. */ if (unlikely(!folio_test_uptodate(folio))) { - /* - * If this is a precontent file we have can now emit an event to - * try and populate the folio. - */ - if (!(vmf->flags & FAULT_FLAG_TRIED) && - unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { - loff_t pos = folio_pos(folio); - size_t count = folio_size(folio); - - /* We're NOWAIT, we have to retry. */ - if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) { - folio_unlock(folio); - goto out_retry; - } - - if (mapping_locked) - filemap_invalidate_unlock_shared(mapping); - mapping_locked = false; - - folio_unlock(folio); - fpin = maybe_unlock_mmap_for_io(vmf, fpin); - if (!fpin) - goto out_retry; - - error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos, - count); - if (error) - ret = VM_FAULT_SIGBUS; - goto out_retry; - } - /* * If the invalidate lock is not held, the folio was in cache * and uptodate and now it is not. Strange but possible since we diff --git a/mm/nommu.c b/mm/nommu.c index baa79abdaf037..9cb6e99215e2b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1613,13 +1613,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, } EXPORT_SYMBOL(remap_vmalloc_range); -vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) -{ - BUG(); - return 0; -} -EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); - vm_fault_t filemap_fault(struct vm_fault *vmf) { BUG(); -- GitLab From 4f4dc3a9378bc2e6bcf331ee9e65a21abf67e7e0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:51 +0100 Subject: [PATCH 1529/1585] Revert "mm: don't allow huge faults for files with pre content watches" This reverts commit 20bf82a898b65c129af76deb96a1b415d3098a28. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-6-amir73il@gmail.com --- mm/memory.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b9661ccfa64fd..fb7b8dc751679 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -76,7 +76,6 @@ #include #include #include -#include #include @@ -5750,17 +5749,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - if (vma_is_anonymous(vma)) return do_huge_pmd_anonymous_page(vmf); - /* - * Currently we just emit PAGE_SIZE for our fault events, so don't allow - * a huge fault if we have a pre content watch on this file. This would - * be trivial to support, but there would need to be tests to ensure - * this works properly and those don't exist currently. - */ - if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode))) - return VM_FAULT_FALLBACK; if (vma->vm_ops->huge_fault) return vma->vm_ops->huge_fault(vmf, PMD_ORDER); return VM_FAULT_FALLBACK; @@ -5784,9 +5774,6 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf) } if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { - /* See comment in create_huge_pmd. */ - if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode))) - goto split; if (vma->vm_ops->huge_fault) { ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER); if (!(ret & VM_FAULT_FALLBACK)) @@ -5809,9 +5796,6 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf) /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vma)) return VM_FAULT_FALLBACK; - /* See comment in create_huge_pmd. */ - if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode))) - return VM_FAULT_FALLBACK; if (vma->vm_ops->huge_fault) return vma->vm_ops->huge_fault(vmf, PUD_ORDER); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -5829,9 +5813,6 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) if (vma_is_anonymous(vma)) goto split; if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { - /* See comment in create_huge_pmd. */ - if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode))) - goto split; if (vma->vm_ops->huge_fault) { ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER); if (!(ret & VM_FAULT_FALLBACK)) -- GitLab From 252256e416deb255607f0c4a69e7cfec079e5d61 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:52 +0100 Subject: [PATCH 1530/1585] Revert "fanotify: disable readahead if we have pre-content watches" This reverts commit fac84846a28c0950d4433118b3dffd44306df62d. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-7-amir73il@gmail.com --- mm/filemap.c | 12 ------------ mm/readahead.c | 14 -------------- 2 files changed, 26 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index ff5fcdd961364..6d616bb9001eb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3197,14 +3197,6 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) unsigned long vm_flags = vmf->vma->vm_flags; unsigned int mmap_miss; - /* - * If we have pre-content watches we need to disable readahead to make - * sure that we don't populate our mapping with 0 filled pages that we - * never emitted an event for. - */ - if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) - return fpin; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Use the readahead code, even if readahead is disabled */ if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) { @@ -3273,10 +3265,6 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, struct file *fpin = NULL; unsigned int mmap_miss; - /* See comment in do_sync_mmap_readahead. */ - if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) - return fpin; - /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; diff --git a/mm/readahead.c b/mm/readahead.c index 220155a5c9646..6a4e96b69702b 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -128,7 +128,6 @@ #include #include #include -#include #include "internal.h" @@ -558,15 +557,6 @@ void page_cache_sync_ra(struct readahead_control *ractl, unsigned long max_pages, contig_count; pgoff_t prev_index, miss; - /* - * If we have pre-content watches we need to disable readahead to make - * sure that we don't find 0 filled pages in cache that we never emitted - * events for. Filesystems supporting HSM must make sure to not call - * this function with ractl->file unset for files handled by HSM. - */ - if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode))) - return; - /* * Even if readahead is disabled, issue this request as readahead * as we'll need it to satisfy the requested range. The forced @@ -645,10 +635,6 @@ void page_cache_async_ra(struct readahead_control *ractl, if (!ra->ra_pages) return; - /* See the comment in page_cache_sync_ra. */ - if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode))) - return; - /* * Same bit is used for PG_readahead and PG_reclaim. */ -- GitLab From 69a5a13a22b1def29dce62b5b7c86e6098c20c68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2025 09:56:07 -0400 Subject: [PATCH 1531/1585] bcachefs: target_congested -> get_random_u32_below() get_random_u32_below() has a better algorithm than bch2_rand_range(), it just didn't exist at the time. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 726da68073e2b..aa91fcf51eecc 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -59,7 +59,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target) } rcu_read_unlock(); - return bch2_rand_range(nr * CONGESTED_MAX) < total; + return get_random_u32_below(nr * CONGESTED_MAX) < total; } #else -- GitLab From 9c18ea7ffee090b47afaa7dc41903fb1b436d7bd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2025 11:16:28 -0400 Subject: [PATCH 1532/1585] bcachefs: bch2_get_random_u64_below() steal the (clever) algorithm from get_random_u32_below() this fixes a bug where we were passing roundup_pow_of_two() a 64 bit number - we're squaring device latencies now: [ +1.681698] ------------[ cut here ]------------ [ +0.000010] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 [ +0.000011] shift exponent 64 is too large for 64-bit type 'long unsigned int' [ +0.000011] CPU: 1 UID: 0 PID: 196 Comm: kworker/u32:13 Not tainted 6.14.0-rc6-dave+ #10 [ +0.000012] Hardware name: ASUS System Product Name/PRIME B460I-PLUS, BIOS 1301 07/13/2021 [ +0.000005] Workqueue: events_unbound __bch2_read_endio [bcachefs] [ +0.000354] Call Trace: [ +0.000005] [ +0.000007] dump_stack_lvl+0x5d/0x80 [ +0.000018] ubsan_epilogue+0x5/0x30 [ +0.000008] __ubsan_handle_shift_out_of_bounds.cold+0x61/0xe6 [ +0.000011] bch2_rand_range.cold+0x17/0x20 [bcachefs] [ +0.000231] bch2_bkey_pick_read_device+0x547/0x920 [bcachefs] [ +0.000229] __bch2_read_extent+0x1e4/0x18e0 [bcachefs] [ +0.000241] ? bch2_btree_iter_peek_slot+0x3df/0x800 [bcachefs] [ +0.000180] ? bch2_read_retry_nodecode+0x270/0x330 [bcachefs] [ +0.000230] bch2_read_retry_nodecode+0x270/0x330 [bcachefs] [ +0.000230] bch2_rbio_retry+0x1fa/0x600 [bcachefs] [ +0.000224] ? bch2_printbuf_make_room+0x71/0xb0 [bcachefs] [ +0.000243] ? bch2_read_csum_err+0x4a4/0x610 [bcachefs] [ +0.000278] bch2_read_csum_err+0x4a4/0x610 [bcachefs] [ +0.000227] ? __bch2_read_endio+0x58b/0x870 [bcachefs] [ +0.000220] __bch2_read_endio+0x58b/0x870 [bcachefs] [ +0.000268] ? try_to_wake_up+0x31c/0x7f0 [ +0.000011] ? process_one_work+0x176/0x330 [ +0.000008] process_one_work+0x176/0x330 [ +0.000008] worker_thread+0x252/0x390 [ +0.000008] ? __pfx_worker_thread+0x10/0x10 [ +0.000006] kthread+0xec/0x230 [ +0.000011] ? __pfx_kthread+0x10/0x10 [ +0.000009] ret_from_fork+0x31/0x50 [ +0.000009] ? __pfx_kthread+0x10/0x10 [ +0.000008] ret_from_fork_asm+0x1a/0x30 [ +0.000012] [ +0.000046] ---[ end trace ]--- Reported-by: Roland Vet Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 2 +- fs/bcachefs/util.c | 23 ++++++++++++++--------- fs/bcachefs/util.h | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 05d5f71a7ca9f..2d8042f853dcd 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -99,7 +99,7 @@ static inline bool ptr_better(struct bch_fs *c, /* Pick at random, biased in favor of the faster device: */ - return bch2_rand_range(l1 + l2) > l1; + return bch2_get_random_u64_below(l1 + l2) > l1; } if (bch2_force_reconstruct_read) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index e0a876cbaa6b7..8e3ab4bf79a90 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -653,19 +653,24 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) return 0; } -size_t bch2_rand_range(size_t max) +u64 bch2_get_random_u64_below(u64 ceil) { - size_t rand; + if (ceil <= U32_MAX) + return __get_random_u32_below(ceil); - if (!max) - return 0; + /* this is the same (clever) algorithm as in __get_random_u32_below() */ + u64 rand = get_random_u64(); + u64 mult = ceil * rand; - do { - rand = get_random_long(); - rand &= roundup_pow_of_two(max) - 1; - } while (rand >= max); + if (unlikely(mult < ceil)) { + u64 bound = -ceil % ceil; + while (unlikely(mult < bound)) { + rand = get_random_u64(); + mult = ceil * rand; + } + } - return rand; + return mul_u64_u64_shr(ceil, rand, 64); } void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index e7c3541b38f3f..f4a4783219d9d 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -401,7 +401,7 @@ do { \ _ret; \ }) -size_t bch2_rand_range(size_t); +u64 bch2_get_random_u64_below(u64); void memcpy_to_bio(struct bio *, struct bvec_iter, const void *); void memcpy_from_bio(void *, struct bio *, struct bvec_iter); -- GitLab From 57e9417f69839cb10f7ffca684c38acd28ceb57b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 8 Mar 2025 10:50:08 -0500 Subject: [PATCH 1533/1585] dm-flakey: Fix memory corruption in optional corrupt_bio_byte feature Fix memory corruption due to incorrect parameter being passed to bio_init Signed-off-by: Kent Overstreet Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v6.5+ Fixes: 1d9a94389853 ("dm flakey: clone pages on write bio before corrupting them") --- drivers/md/dm-flakey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 731467d4ed101..b690905ab89ff 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -426,7 +426,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b if (!clone) return NULL; - bio_init(clone, fc->dev->bdev, bio->bi_inline_vecs, nr_iovecs, bio->bi_opf); + bio_init(clone, fc->dev->bdev, clone->bi_inline_vecs, nr_iovecs, bio->bi_opf); clone->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector); clone->bi_private = bio; -- GitLab From a2ab25529bbcea51b5e01dded79f45aeb94f644a Mon Sep 17 00:00:00 2001 From: Ajay Kaher Date: Thu, 13 Mar 2025 17:31:11 +0000 Subject: [PATCH 1534/1585] x86/vmware: Parse MP tables for SEV-SNP enabled guests under VMware hypervisors Under VMware hypervisors, SEV-SNP enabled VMs are fundamentally able to boot without UEFI, but this regressed a year ago due to: 0f4a1e80989a ("x86/sev: Skip ROM range scans and validation for SEV-SNP guests") In this case, mpparse_find_mptable() has to be called to parse MP tables which contains the necessary boot information. [ mingo: Updated the changelog. ] Fixes: 0f4a1e80989a ("x86/sev: Skip ROM range scans and validation for SEV-SNP guests") Co-developed-by: Ye Li Signed-off-by: Ye Li Signed-off-by: Ajay Kaher Signed-off-by: Ingo Molnar Tested-by: Ye Li Reviewed-by: Kevin Loughlin Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250313173111.10918-1-ajay.kaher@broadcom.com --- arch/x86/kernel/cpu/vmware.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 00189cdeb775f..cb3f900c46fcc 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -429,6 +430,9 @@ static void __init vmware_platform_setup(void) pr_warn("Failed to get TSC freq from the hypervisor\n"); } + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !efi_enabled(EFI_BOOT)) + x86_init.mpparse.find_mptable = mpparse_find_mptable; + vmware_paravirt_ops_setup(); #ifdef CONFIG_X86_IO_APIC -- GitLab From f2865c6300d75a9f187dd7918d248e010970fd44 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Mon, 10 Mar 2025 19:19:07 -0700 Subject: [PATCH 1535/1585] usb: typec: tcpm: fix state transition for SNK_WAIT_CAPABILITIES state in run_state_machine() A subtle error got introduced while manually fixing merge conflict in tcpm.c for commit 85c4efbe6088 ("Merge v6.12-rc6 into usb-next"). As a result of this error, the next state is unconditionally set to SNK_WAIT_CAPABILITIES_TIMEOUT while handling SNK_WAIT_CAPABILITIES state in run_state_machine(...). Fix this by setting new state of TCPM state machine to `upcoming_state` (that is set to different values based on conditions). Cc: stable@vger.kernel.org Fixes: 85c4efbe60888 ("Merge v6.12-rc6 into usb-next") Signed-off-by: Amit Sunil Dhamne Reviewed-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250310-fix-snk-wait-timeout-v6-14-rc6-v1-1-5db14475798f@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 6bf1a22c785af..62ca4a0ec55bb 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5117,16 +5117,16 @@ static void run_state_machine(struct tcpm_port *port) */ if (port->vbus_never_low) { port->vbus_never_low = false; - tcpm_set_state(port, SNK_SOFT_RESET, - port->timings.sink_wait_cap_time); + upcoming_state = SNK_SOFT_RESET; } else { if (!port->self_powered) upcoming_state = SNK_WAIT_CAPABILITIES_TIMEOUT; else upcoming_state = hard_reset_state(port); - tcpm_set_state(port, SNK_WAIT_CAPABILITIES_TIMEOUT, - port->timings.sink_wait_cap_time); } + + tcpm_set_state(port, upcoming_state, + port->timings.sink_wait_cap_time); break; case SNK_WAIT_CAPABILITIES_TIMEOUT: /* -- GitLab From 0b4ffbe4888a2c71185eaf5c1a02dd3586a9bc04 Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Fri, 14 Mar 2025 06:53:35 +0000 Subject: [PATCH 1536/1585] tracing: Correct the refcount if the hist/hist_debug file fails to open The function event_{hist,hist_debug}_open() maintains the refcount of 'file->tr' and 'file' through tracing_open_file_tr(). However, it does not roll back these counts on subsequent failure paths, resulting in a refcount leak. A very obvious case is that if the hist/hist_debug file belongs to a specific instance, the refcount leak will prevent the deletion of that instance, as it relies on the condition 'tr->ref == 1' within __remove_instance(). Fix this by calling tracing_release_file_tr() on all failure paths in event_{hist,hist_debug}_open() to correct the refcount. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Zheng Yejian Link: https://lore.kernel.org/20250314065335.1202817-1-wutengda@huaweicloud.com Fixes: 1cc111b9cddc ("tracing: Fix uaf issue when open the hist or hist_debug file") Signed-off-by: Tengda Wu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index ad7419e240556..53dc6719181e5 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5689,12 +5689,16 @@ static int event_hist_open(struct inode *inode, struct file *file) guard(mutex)(&event_mutex); event_file = event_file_data(file); - if (!event_file) - return -ENODEV; + if (!event_file) { + ret = -ENODEV; + goto err; + } hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL); - if (!hist_file) - return -ENOMEM; + if (!hist_file) { + ret = -ENOMEM; + goto err; + } hist_file->file = file; hist_file->last_act = get_hist_hit_count(event_file); @@ -5702,9 +5706,14 @@ static int event_hist_open(struct inode *inode, struct file *file) /* Clear private_data to avoid warning in single_open() */ file->private_data = NULL; ret = single_open(file, hist_show, hist_file); - if (ret) + if (ret) { kfree(hist_file); + goto err; + } + return 0; +err: + tracing_release_file_tr(inode, file); return ret; } @@ -5979,7 +5988,10 @@ static int event_hist_debug_open(struct inode *inode, struct file *file) /* Clear private_data to avoid warning in single_open() */ file->private_data = NULL; - return single_open(file, hist_debug_show, file); + ret = single_open(file, hist_debug_show, file); + if (ret) + tracing_release_file_tr(inode, file); + return ret; } const struct file_operations event_hist_debug_fops = { -- GitLab From ca3ac4bf4dc307cea5781dccccf41c1d14c2f82f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 Mar 2025 18:02:32 +0000 Subject: [PATCH 1537/1585] xfs: Use abs_diff instead of XFS_ABSDIFF We have a central definition for this function since 2023, used by a number of different parts of the kernel. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Carlos Maiolino Reviewed-by: Eric Sandeen Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_alloc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 3d33e17f2e5ce..7839efe050bfa 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -33,8 +33,6 @@ struct kmem_cache *xfs_extfree_item_cache; struct workqueue_struct *xfs_alloc_wq; -#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) - #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 @@ -410,8 +408,8 @@ xfs_alloc_compute_diff( if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { if (newlen1 < newlen2 || (newlen1 == newlen2 && - XFS_ABSDIFF(newbno1, wantbno) > - XFS_ABSDIFF(newbno2, wantbno))) + abs_diff(newbno1, wantbno) > + abs_diff(newbno2, wantbno))) newbno1 = newbno2; } else if (newbno2 != NULLAGBLOCK) newbno1 = newbno2; @@ -427,7 +425,7 @@ xfs_alloc_compute_diff( } else newbno1 = freeend - wantlen; *newbnop = newbno1; - return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); + return newbno1 == NULLAGBLOCK ? 0 : abs_diff(newbno1, wantbno); } /* -- GitLab From 03fc0a2dc9f8c292fad8a1bcfb6d1f0dec1824be Mon Sep 17 00:00:00 2001 From: Ike Panhc Date: Fri, 14 Mar 2025 12:57:32 +0800 Subject: [PATCH 1538/1585] MAINTAINERS: Update Ike Panhc's email address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I am no longer at Canonical and update with my personal email address. Signed-off-by: Ike Panhc Link: https://lore.kernel.org/r/20250314045732.389973-1-ike.pan@canonical.com Signed-off-by: Ilpo Järvinen --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index ae0adc499f4ac..449c5e3a5d26b 100644 --- a/.mailmap +++ b/.mailmap @@ -281,6 +281,7 @@ Henrik Rydberg Herbert Xu Huacai Chen Huacai Chen +Ike Panhc J. Bruce Fields J. Bruce Fields Jacob Shin diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa0654..bb4c069cc1e92 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11140,7 +11140,7 @@ S: Maintained F: drivers/i2c/busses/i2c-icy.c IDEAPAD LAPTOP EXTRAS DRIVER -M: Ike Panhc +M: Ike Panhc L: platform-driver-x86@vger.kernel.org S: Maintained W: http://launchpad.net/ideapad-laptop -- GitLab From 90fd9ad5b0c981693c8512d9da01f14fb6596e9d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 14 Mar 2025 09:54:43 -0400 Subject: [PATCH 1539/1585] bcachefs: Change btree wb assert to runtime error We just had a report of the assert for "btree in write buffer for non-write buffer btree" popping during the 6.14 upgrade. - 150TB filesystem, after a reboot the upgrade was able to continue from where it left off, so no major damage. But with 6.14 about to come out we want to get this tracked down asap, and need more data if other users hit this. Convert the BUG_ON() to an emergency read-only, and print out btree, the key itself, and stack trace from the original write buffer update (which did not have this check before). Reported-by: Stijn Tintel Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update.h | 8 ++++++++ fs/bcachefs/btree_write_buffer.c | 21 ++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 8f22ef9a7651a..47d8690f01bfc 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -126,10 +126,18 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *); +int bch2_btree_write_buffer_insert_err(struct btree_trans *, + enum btree_id, struct bkey_i *); + static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) { + if (unlikely(!btree_type_uses_write_buffer(btree))) { + int ret = bch2_btree_write_buffer_insert_err(trans, btree, k); + dump_stack(); + return ret; + } /* * Most updates skip the btree write buffer until journal replay is * finished because synchronization with journal replay relies on having diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index b56c4987b8c97..2c09d19dd6210 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -264,6 +264,22 @@ static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb) BUG_ON(wb->sorted.size < wb->flushing.keys.nr); } +int bch2_btree_write_buffer_insert_err(struct btree_trans *trans, + enum btree_id btree, struct bkey_i *k) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "attempting to do write buffer update on non wb btree="); + bch2_btree_id_to_text(&buf, btree); + prt_str(&buf, "\n"); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + + bch2_fs_inconsistent(c, "%s", buf.buf); + printbuf_exit(&buf); + return -EROFS; +} + static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -312,7 +328,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) darray_for_each(wb->sorted, i) { struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; - BUG_ON(!btree_type_uses_write_buffer(k->btree)); + if (unlikely(!btree_type_uses_write_buffer(k->btree))) { + ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k); + goto err; + } for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++) prefetch(&wb->flushing.keys.data[n->idx]); -- GitLab From 1a2b74d0a2a46c219b25fdb0efcf9cd7f55cfe5e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 14 Mar 2025 18:20:20 -0400 Subject: [PATCH 1540/1585] bcachefs: fix build on 32 bit in get_random_u64_below() bare 64 bit divides not allowed, whoops arm-linux-gnueabi-ld: drivers/char/random.o: in function `__get_random_u64_below': drivers/char/random.c:602:(.text+0xc70): undefined reference to `__aeabi_uldivmod' Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 8e3ab4bf79a90..da2cd11b3025d 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -663,7 +663,8 @@ u64 bch2_get_random_u64_below(u64 ceil) u64 mult = ceil * rand; if (unlikely(mult < ceil)) { - u64 bound = -ceil % ceil; + u64 bound; + div64_u64_rem(-ceil, ceil, &bound); while (unlikely(mult < bound)) { rand = get_random_u64(); mult = ceil * rand; -- GitLab From 4701f33a10702d5fc577c32434eb62adde0a1ae1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Mar 2025 12:55:17 -1000 Subject: [PATCH 1541/1585] Linux 6.14-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1d6a9ec8a2ace..50694bbbf828e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Baby Opossum Posse # *DOCUMENTATION* -- GitLab From 6216182fb776ae546c5566f21976d116c8650cee Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 24 Oct 2024 11:19:40 +0100 Subject: [PATCH 1542/1585] RISC-V: clarify what some RISCV_ISA* config options do During some discussion on IRC yesterday and on Pu's bpf patch [1] I noticed that these RISCV_ISA* Kconfig options are not really clear about their implications. Many of these options have no impact on what userspace is allowed to do, for example an application can use Zbb regardless of whether or not the kernel does. Change the help text to try and clarify whether or not an option affects just the kernel, or also userspace. None of these options actually control whether or not an extension is detected dynamically as that's done regardless of Kconfig options, so drop any text that implies the option is required for dynamic detection, rewording them as "do x when y is detected". Link: https://lore.kernel.org/linux-riscv/20240328-ferocity-repose-c554f75a676c@spud/ [1] Reviewed-by: Andrew Jones Signed-off-by: Conor Dooley Reviewed-by: Charlie Jenkins Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20241024-overdue-slogan-0b0f69d3da91@spud Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 62545946ecf43..278a38c94c5a6 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -527,7 +527,8 @@ config RISCV_ISA_C help Adds "C" to the ISA subsets that the toolchain is allowed to emit when building Linux, which results in compressed instructions in the - Linux binary. + Linux binary. This option produces a kernel that will not run on + systems that do not support compressed instructions. If you don't know what to do here, say Y. @@ -537,8 +538,8 @@ config RISCV_ISA_SVNAPOT depends on RISCV_ALTERNATIVE default y help - Allow kernel to detect the Svnapot ISA-extension dynamically at boot - time and enable its usage. + Enable support for the Svnapot ISA-extension when it is detected + at boot. The Svnapot extension is used to mark contiguous PTEs as a range of contiguous virtual-to-physical translations for a naturally @@ -556,9 +557,8 @@ config RISCV_ISA_SVPBMT depends on RISCV_ALTERNATIVE default y help - Adds support to dynamically detect the presence of the Svpbmt - ISA-extension (Supervisor-mode: page-based memory types) and - enable its usage. + Add support for the Svpbmt ISA-extension (Supervisor-mode: + page-based memory types) in the kernel when it is detected at boot. The memory type for a page contains a combination of attributes that indicate the cacheability, idempotency, and ordering @@ -577,14 +577,15 @@ config TOOLCHAIN_HAS_V depends on AS_HAS_OPTION_ARCH config RISCV_ISA_V - bool "VECTOR extension support" + bool "Vector extension support" depends on TOOLCHAIN_HAS_V depends on FPU select DYNAMIC_SIGFRAME default y help - Say N here if you want to disable all vector related procedure - in the kernel. + Add support for the Vector extension when it is detected at boot. + When this option is disabled, neither the kernel nor userspace may + use vector procedures. If you don't know what to do here, say Y. @@ -667,8 +668,8 @@ config RISCV_ISA_ZBB depends on RISCV_ALTERNATIVE default y help - Adds support to dynamically detect the presence of the ZBB - extension (basic bit manipulation) and enable its usage. + Add support for enabling optimisations in the kernel when the + Zbb extension is detected at boot. The Zbb extension provides instructions to accelerate a number of bit-specific operations (count bit population, sign extending, @@ -707,9 +708,9 @@ config RISCV_ISA_ZICBOM select RISCV_DMA_NONCOHERENT select DMA_DIRECT_REMAP help - Adds support to dynamically detect the presence of the ZICBOM - extension (Cache Block Management Operations) and enable its - usage. + Add support for the Zicbom extension (Cache Block Management + Operations) and enable its use in the kernel when it is detected + at boot. The Zicbom extension can be used to handle for example non-coherent DMA support on devices that need it. @@ -722,7 +723,7 @@ config RISCV_ISA_ZICBOZ default y help Enable the use of the Zicboz extension (cbo.zero instruction) - when available. + in the kernel when it is detected at boot. The Zicboz extension is used for faster zeroing of memory. @@ -760,8 +761,9 @@ config FPU bool "FPU support" default y help - Say N here if you want to disable all floating-point related procedure - in the kernel. + Add support for floating point operations when an FPU is detected at + boot. When this option is disabled, neither the kernel nor userspace + may use the floating point unit. If you don't know what to do here, say Y. -- GitLab From 9343aaba1f256ff42730db5a61efc32a86149776 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 24 Oct 2024 11:19:41 +0100 Subject: [PATCH 1543/1585] RISC-V: separate Zbb optimisations requiring and not requiring toolchain support It seems a bit ridiculous to require toolchain support for BPF to assemble Zbb instructions, so move the dependency on toolchain support for Zbb optimisations out of the Kconfig option and to the callsites. Zbb support has always depended on alternatives, so while adjusting the config options guarding optimisations, remove any checks for whether or not alternatives are enabled. Reviewed-by: Andrew Jones Signed-off-by: Conor Dooley Reviewed-by: Charlie Jenkins Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20241024-chump-freebase-d26b6d81af33@spud Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 4 ++-- arch/riscv/include/asm/arch_hweight.h | 6 +++--- arch/riscv/include/asm/bitops.h | 4 ++-- arch/riscv/include/asm/checksum.h | 3 +-- arch/riscv/lib/csum.c | 21 +++------------------ arch/riscv/lib/strcmp.S | 5 +++-- arch/riscv/lib/strlen.S | 5 +++-- arch/riscv/lib/strncmp.S | 5 +++-- 8 files changed, 20 insertions(+), 33 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 278a38c94c5a6..6ec7a500a25ff 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -664,12 +664,12 @@ config RISCV_ISA_ZBA config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" - depends on TOOLCHAIN_HAS_ZBB depends on RISCV_ALTERNATIVE default y help Add support for enabling optimisations in the kernel when the - Zbb extension is detected at boot. + Zbb extension is detected at boot. Some optimisations may + additionally depend on toolchain support for Zbb. The Zbb extension provides instructions to accelerate a number of bit-specific operations (count bit population, sign extending, diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h index 613769b9cdc90..0e7cdbbec8efd 100644 --- a/arch/riscv/include/asm/arch_hweight.h +++ b/arch/riscv/include/asm/arch_hweight.h @@ -19,7 +19,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -50,7 +50,7 @@ static inline unsigned int __arch_hweight8(unsigned int w) #if BITS_PER_LONG == 64 static __always_inline unsigned long __arch_hweight64(__u64 w) { -# ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -64,7 +64,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) return w; legacy: -# endif +#endif return __sw_hweight64(w); } #else /* BITS_PER_LONG == 64 */ diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index fae152ea0508d..410e2235d6589 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -15,7 +15,7 @@ #include #include -#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) +#if !(defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)) || defined(NO_ALTERNATIVE) #include #include #include @@ -175,7 +175,7 @@ static __always_inline int variable_fls(unsigned int x) variable_fls(x_); \ }) -#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) */ +#endif /* !(defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)) || defined(NO_ALTERNATIVE) */ #include #include diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h index 88e6f1499e889..da378856f1d59 100644 --- a/arch/riscv/include/asm/checksum.h +++ b/arch/riscv/include/asm/checksum.h @@ -49,8 +49,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * ZBB only saves three instructions on 32-bit and five on 64-bit so not * worth checking if supported without Alternatives. */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c index 7fb12c59e5719..9408f50ca59a8 100644 --- a/arch/riscv/lib/csum.c +++ b/arch/riscv/lib/csum.c @@ -40,12 +40,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, uproto = (__force unsigned int)htonl(proto); sum += uproto; - /* - * Zbb support saves 4 instructions, so not worth checking without - * alternatives if supported - */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; /* @@ -157,12 +152,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) csum = do_csum_common(ptr, end, data); #ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT - /* - * Zbb support saves 6 instructions, so not worth checking without - * alternatives if supported - */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; /* @@ -244,12 +234,7 @@ do_csum_no_alignment(const unsigned char *buff, int len) end = (const unsigned long *)(buff + len); csum = do_csum_common(ptr, end, data); - /* - * Zbb support saves 6 instructions, so not worth checking without - * alternatives if supported - */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; /* diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S index 57a5c00662315..65027e742af15 100644 --- a/arch/riscv/lib/strcmp.S +++ b/arch/riscv/lib/strcmp.S @@ -8,7 +8,8 @@ /* int strcmp(const char *cs, const char *ct) */ SYM_FUNC_START(strcmp) - ALTERNATIVE("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB) + __ALTERNATIVE_CFG("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) /* * Returns @@ -43,7 +44,7 @@ SYM_FUNC_START(strcmp) * The code was published as part of the bitmanip manual * in Appendix A. */ -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) strcmp_zbb: .option push diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S index 962983b73251e..eb4d2b7ed22b6 100644 --- a/arch/riscv/lib/strlen.S +++ b/arch/riscv/lib/strlen.S @@ -8,7 +8,8 @@ /* int strlen(const char *s) */ SYM_FUNC_START(strlen) - ALTERNATIVE("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB) + __ALTERNATIVE_CFG("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) /* * Returns @@ -33,7 +34,7 @@ SYM_FUNC_START(strlen) /* * Variant of strlen using the ZBB extension if available */ -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) strlen_zbb: #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S index 7b2d0ff9ed6c7..062000c468c83 100644 --- a/arch/riscv/lib/strncmp.S +++ b/arch/riscv/lib/strncmp.S @@ -8,7 +8,8 @@ /* int strncmp(const char *cs, const char *ct, size_t count) */ SYM_FUNC_START(strncmp) - ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB) + __ALTERNATIVE_CFG("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) /* * Returns @@ -46,7 +47,7 @@ SYM_FUNC_START(strncmp) /* * Variant of strncmp using the ZBB extension if available */ -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) strncmp_zbb: .option push -- GitLab From 03dc00a2b67854604c0aa8cbd32105f8b633451e Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Wed, 8 Jan 2025 05:57:00 -0800 Subject: [PATCH 1544/1585] riscv: Support huge pfnmaps Use RSW0 as the special bit for pmds and puds, just like for ptes. Also define the {pte,pmd,pud}_pgprot helpers which were previously missing and are needed for the follow_pfnmap APIs. Signed-off-by: Andrew Bresticker Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250108135700.2614848-1-abrestic@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgtable.h | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8b86ce1965dc8..ff35eb16853cd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -62,6 +62,7 @@ config RISCV select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU + select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS if MMU # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505 select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 050fdc49b5ad7..b6697dc21dafd 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -341,6 +341,14 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) +#define pte_pgprot pte_pgprot +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + static inline int pte_present(pte_t pte) { return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); @@ -674,6 +682,11 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} + static inline pmd_t pmd_mkhuge(pmd_t pmd) { return pmd; @@ -699,6 +712,18 @@ static inline unsigned long pud_pfn(pud_t pud) return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT); } +#define pmd_pgprot pmd_pgprot +static inline pgprot_t pmd_pgprot(pmd_t pmd) +{ + return pte_pgprot(pmd_pte(pmd)); +} + +#define pud_pgprot pud_pgprot +static inline pgprot_t pud_pgprot(pud_t pud) +{ + return pte_pgprot(pud_pte(pud)); +} + static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); @@ -768,6 +793,30 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) return pte_pmd(pte_mkdevmap(pmd_pte(pmd))); } +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP +static inline bool pmd_special(pmd_t pmd) +{ + return pte_special(pmd_pte(pmd)); +} + +static inline pmd_t pmd_mkspecial(pmd_t pmd) +{ + return pte_pmd(pte_mkspecial(pmd_pte(pmd))); +} +#endif + +#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP +static inline bool pud_special(pud_t pud) +{ + return pte_special(pud_pte(pud)); +} + +static inline pud_t pud_mkspecial(pud_t pud) +{ + return pte_pud(pte_mkspecial(pud_pte(pud))); +} +#endif + static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { -- GitLab From d3817d091fe6480de5bf3faba0fc2ce25f8d023e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 30 Aug 2024 10:49:32 +0200 Subject: [PATCH 1545/1585] riscv: remove useless pc check in stacktrace handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checking for pc to be a kernel text address at this location is useless since pc == handle_exception. Remove this check. [ alex: Fix merge conflict ] Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240830084934.3690037-1-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index d4355c770c36a..3fe9e6edef8f1 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -74,7 +74,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, &frame->ra); if (pc >= (unsigned long)handle_exception && pc < (unsigned long)&ret_from_exception_end) { - if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + if (unlikely(!fn(arg, pc))) break; pc = ((struct pt_regs *)sp)->epc; -- GitLab From 4458b8f68dc7ab8309291f1667157d0250938291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Fri, 13 Sep 2024 07:13:24 +0200 Subject: [PATCH 1546/1585] riscv: hwprobe: export Zicntr and Zihpm extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export Zicntr and Zihpm ISA extensions through the hwprobe syscall. [ alex: Fix hwprobe numbering ] Signed-off-by: Miquel Sabaté Solà Acked-by: Jesse Taube Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240913051324.8176-1-mikisabate@gmail.com Signed-off-by: Alexandre Ghiti --- Documentation/arch/riscv/hwprobe.rst | 6 ++++++ arch/riscv/include/uapi/asm/hwprobe.h | 2 ++ arch/riscv/kernel/sys_hwprobe.c | 2 ++ 3 files changed, 10 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index f273ea15a8e83..35a979dd164a6 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -183,6 +183,9 @@ The following keys are defined: defined in the Atomic Compare-and-Swap (CAS) instructions manual starting from commit 5059e0ca641c ("update to ratified"). + * :c:macro:`RISCV_HWPROBE_EXT_ZICNTR`: The Zicntr extension version 2.0 + is supported as defined in the RISC-V ISA manual. + * :c:macro:`RISCV_HWPROBE_EXT_ZICOND`: The Zicond extension is supported as defined in the RISC-V Integer Conditional (Zicond) operations extension manual starting from commit 95cf1f9 ("Add changes requested by Ved @@ -192,6 +195,9 @@ The following keys are defined: supported as defined in the RISC-V ISA manual starting from commit d8ab5c78c207 ("Zihintpause is ratified"). + * :c:macro:`RISCV_HWPROBE_EXT_ZIHPM`: The Zihpm extension version 2.0 + is supported as defined in the RISC-V ISA manual. + * :c:macro:`RISCV_HWPROBE_EXT_ZVE32X`: The Vector sub-extension Zve32x is supported, as defined by version 1.0 of the RISC-V Vector extension manual. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index c3c1cc951cb94..8cac35cb19d8b 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -73,6 +73,8 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47) #define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48) #define RISCV_HWPROBE_EXT_SUPM (1ULL << 49) +#define RISCV_HWPROBE_EXT_ZICNTR (1ULL << 50) +#define RISCV_HWPROBE_EXT_ZIHPM (1ULL << 51) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index bcd3b816306c2..b35cce0b57aec 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -108,9 +108,11 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCB); EXT_KEY(ZCMOP); EXT_KEY(ZICBOZ); + EXT_KEY(ZICNTR); EXT_KEY(ZICOND); EXT_KEY(ZIHINTNTL); EXT_KEY(ZIHINTPAUSE); + EXT_KEY(ZIHPM); EXT_KEY(ZIMOP); EXT_KEY(ZKND); EXT_KEY(ZKNE); -- GitLab From d9be2b9b60497a82aeceec3a98d8b37fdd2960f2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 13 Jan 2025 15:24:24 +0100 Subject: [PATCH 1547/1585] riscv: Call secondary mmu notifier when flushing the tlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required to allow the IOMMU driver to correctly flush its own TLB. Reviewed-by: Clément Léger Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20250113142424.30487-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/mm/tlbflush.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 9b6e86ce38674..bb77607c87aa2 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -78,10 +79,17 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, +static inline unsigned long get_mm_asid(struct mm_struct *mm) +{ + return mm ? cntx2asid(atomic_long_read(&mm->context.id)) : FLUSH_TLB_NO_ASID; +} + +static void __flush_tlb_range(struct mm_struct *mm, + const struct cpumask *cmask, unsigned long start, unsigned long size, unsigned long stride) { + unsigned long asid = get_mm_asid(mm); unsigned int cpu; if (cpumask_empty(cmask)) @@ -105,30 +113,26 @@ static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, } put_cpu(); -} -static inline unsigned long get_mm_asid(struct mm_struct *mm) -{ - return cntx2asid(atomic_long_read(&mm->context.id)); + if (mm) + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, start + size); } void flush_tlb_mm(struct mm_struct *mm) { - __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), - 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + __flush_tlb_range(mm, mm_cpumask(mm), 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); } void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int page_size) { - __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), - start, end - start, page_size); + __flush_tlb_range(mm, mm_cpumask(mm), start, end - start, page_size); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, PAGE_SIZE); } @@ -161,13 +165,13 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } } - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), start, end - start, stride_size); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID, + __flush_tlb_range(NULL, cpu_online_mask, start, end - start, PAGE_SIZE); } @@ -175,7 +179,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), start, end - start, PMD_SIZE); } #endif @@ -189,7 +193,10 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, struct mm_struct *mm, unsigned long uaddr) { + unsigned long start = uaddr & PAGE_MASK; + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, start + PAGE_SIZE); } void arch_flush_tlb_batched_pending(struct mm_struct *mm) @@ -199,7 +206,7 @@ void arch_flush_tlb_batched_pending(struct mm_struct *mm) void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) { - __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, - FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + __flush_tlb_range(NULL, &batch->cpumask, + 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); cpumask_clear(&batch->cpumask); } -- GitLab From d9708b1931fc0ebb21cd94a56283d09222847749 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 16 Dec 2024 20:39:10 -0500 Subject: [PATCH 1548/1585] riscv: Implement smp_cond_load8/16() with Zawrs RISC-V code uses the queued spinlock implementation, which calls the macros smp_cond_load_acquire for one byte. So, complement the implementation of byte and halfword versions. Signed-off-by: Guo Ren Signed-off-by: Guo Ren Cc: Andrew Jones Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241217013910.1039923-1-guoren@kernel.org Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/cmpxchg.h | 38 +++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 427c41dde6431..2ec119eb147bb 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -365,16 +365,48 @@ static __always_inline void __cmpwait(volatile void *ptr, { unsigned long tmp; + u32 *__ptr32b; + ulong __s, __val, __mask; + asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", 0, RISCV_ISA_EXT_ZAWRS, 1) : : : : no_zawrs); switch (size) { case 1: - fallthrough; + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); + __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; + __val = val << __s; + __mask = 0xff << __s; + + asm volatile( + " lr.w %0, %1\n" + " and %0, %0, %3\n" + " xor %0, %0, %2\n" + " bnez %0, 1f\n" + ZAWRS_WRS_NTO "\n" + "1:" + : "=&r" (tmp), "+A" (*(__ptr32b)) + : "r" (__val), "r" (__mask) + : "memory"); + break; case 2: - /* RISC-V doesn't have lr instructions on byte and half-word. */ - goto no_zawrs; + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); + __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; + __val = val << __s; + __mask = 0xffff << __s; + + asm volatile( + " lr.w %0, %1\n" + " and %0, %0, %3\n" + " xor %0, %0, %2\n" + " bnez %0, 1f\n" + ZAWRS_WRS_NTO "\n" + "1:" + : "=&r" (tmp), "+A" (*(__ptr32b)) + : "r" (__val), "r" (__mask) + : "memory"); + break; case 4: asm volatile( " lr.w %0, %1\n" -- GitLab From 35bc1883733c81ff307c9c73bd00191313e651af Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 13 Feb 2025 08:38:45 +0800 Subject: [PATCH 1549/1585] dt-bindings: riscv: add bfloat16 ISA extension description Add description for the BFloat16 precision Floating-Point ISA extension, (Zfbfmin, Zvfbfmin, Zvfbfwma). which was ratified in commit 4dc23d62 ("Added Chapter title to BF16") of the riscv-isa-manual. Signed-off-by: Inochi Amaoto Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250213003849.147358-2-inochiama@gmail.com Signed-off-by: Alexandre Ghiti --- .../devicetree/bindings/riscv/extensions.yaml | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 9c7dd7e75e0ca..0a1f1a76d1296 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -329,6 +329,12 @@ properties: instructions, as ratified in commit 056b6ff ("Zfa is ratified") of riscv-isa-manual. + - const: zfbfmin + description: + The standard Zfbfmin extension which provides minimal support for + 16-bit half-precision brain floating-point instructions, as ratified + in commit 4dc23d62 ("Added Chapter title to BF16") of riscv-isa-manual. + - const: zfh description: The standard Zfh extension for 16-bit half-precision binary @@ -525,6 +531,18 @@ properties: in commit 6f702a2 ("Vector extensions are now ratified") of riscv-v-spec. + - const: zvfbfmin + description: + The standard Zvfbfmin extension for minimal support for vectored + 16-bit half-precision brain floating-point instructions, as ratified + in commit 4dc23d62 ("Added Chapter title to BF16") of riscv-isa-manual. + + - const: zvfbfwma + description: + The standard Zvfbfwma extension for vectored half-precision brain + floating-point widening multiply-accumulate instructions, as ratified + in commit 4dc23d62 ("Added Chapter title to BF16") of riscv-isa-manual. + - const: zvfh description: The standard Zvfh extension for vectored half-precision @@ -663,6 +681,33 @@ properties: then: contains: const: zca + # Zfbfmin depends on F + - if: + contains: + const: zfbfmin + then: + contains: + const: f + # Zvfbfmin depends on V or Zve32f + - if: + contains: + const: zvfbfmin + then: + oneOf: + - contains: + const: v + - contains: + const: zve32f + # Zvfbfwma depends on Zfbfmin and Zvfbfmin + - if: + contains: + const: zvfbfwma + then: + allOf: + - contains: + const: zfbfmin + - contains: + const: zvfbfmin allOf: # Zcf extension does not exist on rv64 -- GitLab From e186c28dda11e8d6d829b08b9da2ec7c342edd78 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 13 Feb 2025 08:38:46 +0800 Subject: [PATCH 1550/1585] riscv: add ISA extension parsing for bfloat16 ISA extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add parsing for Zfbmin, Zvfbfmin, Zvfbfwma ISA extension which were ratified in 4dc23d62 ("Added Chapter title to BF16") of the riscv-isa-manual. Signed-off-by: Inochi Amaoto Reviewed-by: Clément Léger Link: https://lore.kernel.org/r/20250213003849.147358-3-inochiama@gmail.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/hwcap.h | 3 +++ arch/riscv/kernel/cpufeature.c | 35 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 869da082252a4..14cc29f2a723c 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -100,6 +100,9 @@ #define RISCV_ISA_EXT_ZICCRSE 91 #define RISCV_ISA_EXT_SVADE 92 #define RISCV_ISA_EXT_SVADU 93 +#define RISCV_ISA_EXT_ZFBFMIN 94 +#define RISCV_ISA_EXT_ZVFBFMIN 95 +#define RISCV_ISA_EXT_ZVFBFWMA 96 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c0916ed318c20..40021459a2574 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -76,6 +76,15 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned i } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); +static int riscv_ext_f_depends(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f)) + return 0; + + return -EPROBE_DEFER; +} + static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -136,6 +145,28 @@ static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data, return -EPROBE_DEFER; } +static int riscv_vector_f_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVE32F)) + return 0; + + return -EPROBE_DEFER; +} + +static int riscv_ext_zvfbfwma_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZFBFMIN) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVFBFMIN)) + return 0; + + return -EPROBE_DEFER; +} + static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -341,6 +372,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), + __RISCV_ISA_EXT_DATA_VALIDATE(zfbfmin, RISCV_ISA_EXT_ZFBFMIN, riscv_ext_f_depends), __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH), __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN), __RISCV_ISA_EXT_DATA(zca, RISCV_ISA_EXT_ZCA), @@ -373,6 +405,9 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_SUPERSET(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts), __RISCV_ISA_EXT_SUPERSET(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts), __RISCV_ISA_EXT_SUPERSET(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts), + __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfmin, RISCV_ISA_EXT_ZVFBFMIN, riscv_vector_f_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfwma, RISCV_ISA_EXT_ZVFBFWMA, + riscv_ext_zvfbfwma_validate), __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH), __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN), __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB), -- GitLab From a4863e002cf0dd6fb2f06796f16d7bc0974e9845 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 13 Feb 2025 08:38:47 +0800 Subject: [PATCH 1551/1585] riscv: hwprobe: export bfloat16 ISA extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export Zfbmin, Zvfbfmin, Zvfbfwma ISA extension through hwprobe. Signed-off-by: Inochi Amaoto Reviewed-by: Clément Léger Link: https://lore.kernel.org/r/20250213003849.147358-4-inochiama@gmail.com Signed-off-by: Alexandre Ghiti --- Documentation/arch/riscv/hwprobe.rst | 12 ++++++++++++ arch/riscv/include/uapi/asm/hwprobe.h | 3 +++ arch/riscv/kernel/sys_hwprobe.c | 3 +++ 3 files changed, 18 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 955fbcd19ce90..a9cb40e407a48 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -242,6 +242,18 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_EXT_SUPM`: The Supm extension is supported as defined in version 1.0 of the RISC-V Pointer Masking extensions. + * :c:macro:`RISCV_HWPROBE_EXT_ZFBFMIN`: The Zfbfmin extension is supported as + defined in the RISC-V ISA manual starting from commit 4dc23d6229de + ("Added Chapter title to BF16"). + + * :c:macro:`RISCV_HWPROBE_EXT_ZVFBFMIN`: The Zvfbfmin extension is supported as + defined in the RISC-V ISA manual starting from commit 4dc23d6229de + ("Added Chapter title to BF16"). + + * :c:macro:`RISCV_HWPROBE_EXT_ZVFBFWMA`: The Zvfbfwma extension is supported as + defined in the RISC-V ISA manual starting from commit 4dc23d6229de + ("Added Chapter title to BF16"). + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was mistakenly classified as a bitmask rather than a value. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 3af142b99f778..aecc1c800d54b 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -73,6 +73,9 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47) #define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48) #define RISCV_HWPROBE_EXT_SUPM (1ULL << 49) +#define RISCV_HWPROBE_EXT_ZFBFMIN (1ULL << 50) +#define RISCV_HWPROBE_EXT_ZVFBFMIN (1ULL << 51) +#define RISCV_HWPROBE_EXT_ZVFBFWMA (1ULL << 52) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index cb93adfffc486..bd215f58bd1b7 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -131,6 +131,8 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZVE64D); EXT_KEY(ZVE64F); EXT_KEY(ZVE64X); + EXT_KEY(ZVFBFMIN); + EXT_KEY(ZVFBFWMA); EXT_KEY(ZVFH); EXT_KEY(ZVFHMIN); EXT_KEY(ZVKB); @@ -147,6 +149,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCD); EXT_KEY(ZCF); EXT_KEY(ZFA); + EXT_KEY(ZFBFMIN); EXT_KEY(ZFH); EXT_KEY(ZFHMIN); } -- GitLab From de70b532f91bbcfa9f4100f1a2cd62810c799239 Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Wed, 26 Feb 2025 14:32:04 +0800 Subject: [PATCH 1552/1585] RISC-V: Enable cbo.clean/flush in usermode Enabling cbo.clean and cbo.flush in user mode makes it more convenient to manage the cache state and achieve better performance. Reviewed-by: Andrew Jones Signed-off-by: Yunhui Cui Link: https://lore.kernel.org/r/20250226063206.71216-2-cuiyunhui@bytedance.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/cpufeature.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 0ff97cf4dc2ab..ef34622902bf9 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -32,6 +32,7 @@ #define NUM_ALPHA_EXTS ('z' - 'a' + 1) static bool any_cpu_has_zicboz; +static bool any_cpu_has_zicbom; unsigned long elf_hwcap __read_mostly; @@ -100,6 +101,8 @@ static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data, pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); return -EINVAL; } + + any_cpu_has_zicbom = true; return 0; } @@ -1036,6 +1039,11 @@ void __init riscv_user_isa_enable(void) current->thread.envcfg |= ENVCFG_CBZE; else if (any_cpu_has_zicboz) pr_warn("Zicboz disabled as it is unavailable on some harts\n"); + + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOM)) + current->thread.envcfg |= ENVCFG_CBCFE; + else if (any_cpu_has_zicbom) + pr_warn("Zicbom disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE -- GitLab From eb10039709402cc1fab1533a1ecc1a54c2152e68 Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Wed, 26 Feb 2025 14:32:05 +0800 Subject: [PATCH 1553/1585] RISC-V: hwprobe: Expose Zicbom extension and its block size Expose Zicbom through hwprobe and also provide a key to extract its respective block size. [ alex: Fix merge conflicts and hwprobe numbering ] Reviewed-by: Andrew Jones Reviewed-by: Samuel Holland Signed-off-by: Yunhui Cui Link: https://lore.kernel.org/r/20250226063206.71216-3-cuiyunhui@bytedance.com Signed-off-by: Alexandre Ghiti --- Documentation/arch/riscv/hwprobe.rst | 6 ++++++ arch/riscv/include/asm/hwprobe.h | 2 +- arch/riscv/include/uapi/asm/hwprobe.h | 2 ++ arch/riscv/kernel/sys_hwprobe.c | 8 +++++++- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 67ef406cdb675..2792f12e90baa 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -260,6 +260,9 @@ The following keys are defined: defined in the RISC-V ISA manual starting from commit 4dc23d6229de ("Added Chapter title to BF16"). + * :c:macro:`RISCV_HWPROBE_EXT_ZICBOM`: The Zicbom extension is supported, as + ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was mistakenly classified as a bitmask rather than a value. @@ -321,3 +324,6 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor extension is supported in the T-Head ISA extensions spec starting from commit a18c801634 ("Add T-Head VECTOR vendor extension. "). + +* :c:macro:`RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE`: An unsigned int which + represents the size of the Zicbom block in bytes. diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index dd624523981c8..1f690fea0e03d 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include -#define RISCV_HWPROBE_MAX_KEY 11 +#define RISCV_HWPROBE_MAX_KEY 12 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 08ab52fe8004e..056decf65b1b7 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -78,6 +78,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZFBFMIN (1ULL << 52) #define RISCV_HWPROBE_EXT_ZVFBFMIN (1ULL << 53) #define RISCV_HWPROBE_EXT_ZVFBFWMA (1ULL << 54) +#define RISCV_HWPROBE_EXT_ZICBOM (1ULL << 55) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) @@ -100,6 +101,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 #define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11 +#define RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE 12 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 32509111fe0ac..cfe6ceaf30693 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -107,6 +107,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCA); EXT_KEY(ZCB); EXT_KEY(ZCMOP); + EXT_KEY(ZICBOM); EXT_KEY(ZICBOZ); EXT_KEY(ZICNTR); EXT_KEY(ZICOND); @@ -166,7 +167,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, pair->value &= ~missing; } -static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext) { struct riscv_hwprobe pair; @@ -284,6 +285,11 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) pair->value = riscv_cboz_block_size; break; + case RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE: + pair->value = 0; + if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOM)) + pair->value = riscv_cbom_block_size; + break; case RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS: pair->value = user_max_virt_addr(); break; -- GitLab From 36dec9e44805f80d32277be76d16c88373bdc20d Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Wed, 26 Feb 2025 14:32:06 +0800 Subject: [PATCH 1554/1585] RISC-V: selftests: Add TEST_ZICBOM into CBO tests Add test for Zicbom and its block size into CBO tests, when Zicbom is present, test that cbo.clean/flush may be issued and works. As the software can't verify the clean/flush functions, we just judged that cbo.clean/flush isn't executed illegally. Reviewed-by: Andrew Jones Reviewed-by: Samuel Holland Signed-off-by: Yunhui Cui Link: https://lore.kernel.org/r/20250226063206.71216-4-cuiyunhui@bytedance.com Signed-off-by: Alexandre Ghiti --- tools/testing/selftests/riscv/hwprobe/cbo.c | 66 +++++++++++++++++---- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index a40541bb7c7de..5e96ef785d0dd 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -50,6 +50,14 @@ static void cbo_clean(char *base) { cbo_insn(base, 1); } static void cbo_flush(char *base) { cbo_insn(base, 2); } static void cbo_zero(char *base) { cbo_insn(base, 4); } +static void test_no_cbo_inval(void *arg) +{ + ksft_print_msg("Testing cbo.inval instruction remain privileged\n"); + illegal_insn = false; + cbo_inval(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.inval\n"); +} + static void test_no_zicbom(void *arg) { ksft_print_msg("Testing Zicbom instructions remain privileged\n"); @@ -61,10 +69,6 @@ static void test_no_zicbom(void *arg) illegal_insn = false; cbo_flush(&mem[0]); ksft_test_result(illegal_insn, "No cbo.flush\n"); - - illegal_insn = false; - cbo_inval(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.inval\n"); } static void test_no_zicboz(void *arg) @@ -81,6 +85,30 @@ static bool is_power_of_2(__u64 n) return n != 0 && (n & (n - 1)) == 0; } +static void test_zicbom(void *arg) +{ + struct riscv_hwprobe pair = { + .key = RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE, + }; + cpu_set_t *cpus = (cpu_set_t *)arg; + __u64 block_size; + long rc; + + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0); + block_size = pair.value; + ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE && + is_power_of_2(block_size), "Zicbom block size\n"); + ksft_print_msg("Zicbom block size: %llu\n", block_size); + + illegal_insn = false; + cbo_clean(&mem[block_size]); + ksft_test_result(!illegal_insn, "cbo.clean\n"); + + illegal_insn = false; + cbo_flush(&mem[block_size]); + ksft_test_result(!illegal_insn, "cbo.flush\n"); +} + static void test_zicboz(void *arg) { struct riscv_hwprobe pair = { @@ -129,7 +157,7 @@ static void test_zicboz(void *arg) ksft_test_result_pass("cbo.zero check\n"); } -static void check_no_zicboz_cpus(cpu_set_t *cpus) +static void check_no_zicbo_cpus(cpu_set_t *cpus, __u64 cbo) { struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_IMA_EXT_0, @@ -137,6 +165,7 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus) cpu_set_t one_cpu; int i = 0, c = 0; long rc; + char *cbostr; while (i++ < CPU_COUNT(cpus)) { while (!CPU_ISSET(c, cpus)) @@ -148,10 +177,13 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus) rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0); assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); - if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) - ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n" - "Use taskset to select a set of harts where Zicboz\n" - "presence (present or not) is consistent for each hart\n"); + cbostr = cbo == RISCV_HWPROBE_EXT_ZICBOZ ? "Zicboz" : "Zicbom"; + + if (pair.value & cbo) + ksft_exit_fail_msg("%s is only present on a subset of harts.\n" + "Use taskset to select a set of harts where %s\n" + "presence (present or not) is consistent for each hart\n", + cbostr, cbostr); ++c; } } @@ -159,7 +191,9 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus) enum { TEST_ZICBOZ, TEST_NO_ZICBOZ, + TEST_ZICBOM, TEST_NO_ZICBOM, + TEST_NO_CBO_INVAL, }; static struct test_info { @@ -169,7 +203,9 @@ static struct test_info { } tests[] = { [TEST_ZICBOZ] = { .nr_tests = 3, test_zicboz }, [TEST_NO_ZICBOZ] = { .nr_tests = 1, test_no_zicboz }, - [TEST_NO_ZICBOM] = { .nr_tests = 3, test_no_zicbom }, + [TEST_ZICBOM] = { .nr_tests = 3, test_zicbom }, + [TEST_NO_ZICBOM] = { .nr_tests = 2, test_no_zicbom }, + [TEST_NO_CBO_INVAL] = { .nr_tests = 1, test_no_cbo_inval }, }; int main(int argc, char **argv) @@ -189,6 +225,7 @@ int main(int argc, char **argv) assert(rc == 0); tests[TEST_NO_ZICBOZ].enabled = true; tests[TEST_NO_ZICBOM].enabled = true; + tests[TEST_NO_CBO_INVAL].enabled = true; } rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus); @@ -206,7 +243,14 @@ int main(int argc, char **argv) tests[TEST_ZICBOZ].enabled = true; tests[TEST_NO_ZICBOZ].enabled = false; } else { - check_no_zicboz_cpus(&cpus); + check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOZ); + } + + if (pair.value & RISCV_HWPROBE_EXT_ZICBOM) { + tests[TEST_ZICBOM].enabled = true; + tests[TEST_NO_ZICBOM].enabled = false; + } else { + check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOM); } for (i = 0; i < ARRAY_SIZE(tests); ++i) -- GitLab From a4a58f510bd8c28f6191eed5698a5291b420c2d6 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Sat, 9 Nov 2024 09:46:05 +0800 Subject: [PATCH 1555/1585] riscv: Remove unused TASK_TI_FLAGS Since commit f0bddf50586d ("riscv: entry: Convert to generic entry"), TASK_TI_FLAGS is not used any more, so remove it. Fixes: f0bddf50586d ("riscv: entry: Convert to generic entry") Signed-off-by: Jinjie Ruan Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20241109014605.2801492-1-ruanjinjie@huawei.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/asm-offsets.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index e89455a6a0e50..16490755304e0 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -36,7 +36,6 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); - OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); -- GitLab From 7f238b12660e53d7905b0d9989866b95a32c2467 Mon Sep 17 00:00:00 2001 From: Chin Yik Ming Date: Thu, 30 Jan 2025 04:38:43 +0800 Subject: [PATCH 1556/1585] riscv: Simplify base extension checks and direct boolean return Reduce three lines checking to single line using a ternary conditional expression for getting the base extension word. In addition, the test_bit macro function already return a boolean which matches the return type of the caller, so directly return the result of the test_bit macro function. Signed-off-by: Chin Yik Ming Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250129203843.1136838-1-yikming2222@gmail.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/cpufeature.c | 6 ++---- arch/riscv/kernel/vendor_extensions.c | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index ef34622902bf9..6ec9499e2cf27 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -54,9 +54,7 @@ u32 thead_vlenb_of; */ unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap) { - if (!isa_bitmap) - return riscv_isa[0]; - return isa_bitmap[0]; + return !isa_bitmap ? riscv_isa[0] : isa_bitmap[0]; } EXPORT_SYMBOL_GPL(riscv_isa_extension_base); @@ -77,7 +75,7 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned i if (bit >= RISCV_ISA_EXT_MAX) return false; - return test_bit(bit, bmap) ? true : false; + return test_bit(bit, bmap); } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index a31ff84740ebc..9feb7f67a0a39 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -61,6 +61,6 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig if (bit >= RISCV_ISA_VENDOR_EXT_MAX) return false; - return test_bit(bit, bmap->isa) ? true : false; + return test_bit(bit, bmap->isa); } EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available); -- GitLab From ffef54ad41101f98ea6dd1dcd71c60bb6b7c8ee9 Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Wed, 19 Feb 2025 19:41:34 +0800 Subject: [PATCH 1557/1585] riscv: Add stimecmp save and restore If the HW support the SSTC extension, we should save and restore the stimecmp register while cpu non retention suspend. Signed-off-by: Nick Hu Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250219114135.27764-2-nick.hu@sifive.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/suspend.h | 4 ++++ arch/riscv/kernel/suspend.c | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h index 4ffb022b097f4..dc5782b5fbad0 100644 --- a/arch/riscv/include/asm/suspend.h +++ b/arch/riscv/include/asm/suspend.h @@ -18,6 +18,10 @@ struct suspend_context { unsigned long ie; #ifdef CONFIG_MMU unsigned long satp; + unsigned long stimecmp; +#if __riscv_xlen < 64 + unsigned long stimecmph; +#endif #endif }; diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 9a8a0dc035b26..24b3f57d467f8 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -30,6 +30,13 @@ void suspend_save_csrs(struct suspend_context *context) */ #ifdef CONFIG_MMU + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) { + context->stimecmp = csr_read(CSR_STIMECMP); +#if __riscv_xlen < 64 + context->stimecmph = csr_read(CSR_STIMECMPH); +#endif + } + context->satp = csr_read(CSR_SATP); #endif } @@ -43,6 +50,13 @@ void suspend_restore_csrs(struct suspend_context *context) csr_write(CSR_IE, context->ie); #ifdef CONFIG_MMU + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) { + csr_write(CSR_STIMECMP, context->stimecmp); +#if __riscv_xlen < 64 + csr_write(CSR_STIMECMPH, context->stimecmph); +#endif + } + csr_write(CSR_SATP, context->satp); #endif } -- GitLab From 70c93b026ed07078e933583591aa9ca6701cd9da Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Wed, 19 Feb 2025 19:41:35 +0800 Subject: [PATCH 1558/1585] clocksource/drivers/timer-riscv: Stop stimecmp when cpu hotplug Stop the timer when the cpu is going to be offline otherwise the timer interrupt may be pending while performing power-down. Suggested-by: Anup Patel Link: https://lore.kernel.org/lkml/20240829033904.477200-3-nick.hu@sifive.com/T/#u Signed-off-by: Nick Hu Reviewed-by: Anup Patel Acked-by: Daniel Lezcano Link: https://lore.kernel.org/r/20250219114135.27764-3-nick.hu@sifive.com Signed-off-by: Alexandre Ghiti --- drivers/clocksource/timer-riscv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 48ce50c5f5e68..4d7cf338824a3 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -126,7 +126,13 @@ static int riscv_timer_starting_cpu(unsigned int cpu) static int riscv_timer_dying_cpu(unsigned int cpu) { + /* + * Stop the timer when the cpu is going to be offline otherwise + * the timer interrupt may be pending while performing power-down. + */ + riscv_clock_event_stop(); disable_percpu_irq(riscv_clock_event_irq); + return 0; } -- GitLab From 418af0eafb48bbd972040703e5ff5ba94526b5b5 Mon Sep 17 00:00:00 2001 From: Chin Yik Ming Date: Fri, 15 Nov 2024 05:27:25 +0800 Subject: [PATCH 1559/1585] riscv: Fix a comment typo in set_mm_asid() s/verion/version Signed-off-by: Chin Yik Ming Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20241114212725.4172401-1-yikming2222@gmail.com Signed-off-by: Alexandre Ghiti --- arch/riscv/mm/context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 4abe3de23225e..55c20ad1f7444 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -158,7 +158,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) * * - We get a zero back from the cmpxchg and end up waiting on the * lock. Taking the lock synchronises with the rollover and so - * we are forced to see the updated verion. + * we are forced to see the updated version. * * - We get a valid context back from the cmpxchg then we continue * using old ASID because __flush_context() would have marked ASID -- GitLab From c2db83fe10331861d7feb60615c18a72eaf9d444 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Feb 2025 15:07:17 +0100 Subject: [PATCH 1560/1585] riscv: defconfig: Disable Renesas SoC support Follow-up to commit e36ddf3226864e09 ("riscv: defconfig: Disable RZ/Five peripheral support") in v6.12-rc1: - Disable ARCH_RENESAS, too, as currently RZ/Five is the sole Renesas RISC-V SoC, - Drop no longer needed explicit disable of USB_XHCI_RCAR, which depends on ARCH_RENESAS. Signed-off-by: Geert Uytterhoeven Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/e8a2fb273c8c68bd6d526b924b4212f397195b28.1738764211.git.geert+renesas@glider.be Signed-off-by: Alexandre Ghiti --- arch/riscv/configs/defconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 0f7dcbe3c45bb..3c8e16d71e175 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -26,7 +26,6 @@ CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_ARCH_MICROCHIP=y -CONFIG_ARCH_RENESAS=y CONFIG_ARCH_SIFIVE=y CONFIG_ARCH_SOPHGO=y CONFIG_ARCH_SPACEMIT=y @@ -202,7 +201,6 @@ CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_XHCI_PLATFORM=y -# CONFIG_USB_XHCI_RCAR is not set CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y -- GitLab From 72770690e02c082efbbbd78d768028cf8dd18b9c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Feb 2025 15:09:03 +0100 Subject: [PATCH 1561/1585] riscv: Remove duplicate CLINT_TIMER selections Since commit f862bbf4cdca696e ("riscv: Allow NOMMU kernels to run in S-mode") in v6.10, CLINT_TIMER is selected by the main RISCV symbol when RISCV_M_MODE is enabled. Signed-off-by: Geert Uytterhoeven Reviewed-by: Conor Dooley Acked-by: Conor Dooley Link: https://lore.kernel.org/r/ce55529a42fa232cacd580e38866c60701f91095.1738764474.git.geert+renesas@glider.be Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig.socs | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 17606940bb523..8b503e54fa1ba 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -59,7 +59,6 @@ config ARCH_THEAD config ARCH_VIRT bool "QEMU Virt Machine" - select CLINT_TIMER if RISCV_M_MODE select POWER_RESET select POWER_RESET_SYSCON select POWER_RESET_SYSCON_POWEROFF @@ -79,7 +78,6 @@ config ARCH_CANAAN config SOC_CANAAN_K210 bool "Canaan Kendryte K210 SoC" depends on !MMU && ARCH_CANAAN - select CLINT_TIMER if RISCV_M_MODE select ARCH_HAS_RESET_CONTROLLER select PINCTRL select COMMON_CLK -- GitLab From bba547810c66434475d8800b3411c59ef71eafe9 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Mon, 24 Feb 2025 18:42:21 -0800 Subject: [PATCH 1562/1585] riscv: tracing: Fix __write_overflow_field in ftrace_partial_regs() The size of ®s->a0 is unknown, causing the error: ../include/linux/fortify-string.h:571:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] Fix this by wrapping the required registers in pt_regs with struct_group() and reference the group when doing the offending memcpy(). Signed-off-by: Charlie Jenkins Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250224-fix_ftrace_partial_regs-v1-1-54b906417e86@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/ftrace.h | 2 +- arch/riscv/include/asm/ptrace.h | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index c4721ce44ca47..ec6db1162021f 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -207,7 +207,7 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs) { struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs); - memcpy(®s->a0, afregs->args, sizeof(afregs->args)); + memcpy(®s->a_regs, afregs->args, sizeof(afregs->args)); regs->epc = afregs->epc; regs->ra = afregs->ra; regs->sp = afregs->sp; diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index b5b0adcc85c18..2910231977cb7 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -23,14 +23,16 @@ struct pt_regs { unsigned long t2; unsigned long s0; unsigned long s1; - unsigned long a0; - unsigned long a1; - unsigned long a2; - unsigned long a3; - unsigned long a4; - unsigned long a5; - unsigned long a6; - unsigned long a7; + struct_group(a_regs, + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; + unsigned long a4; + unsigned long a5; + unsigned long a6; + unsigned long a7; + ); unsigned long s2; unsigned long s3; unsigned long s4; -- GitLab From 82e81b89501a9f19a3a0b0d7d9641d86c1956284 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Dec 2024 09:08:25 +0900 Subject: [PATCH 1563/1585] riscv: migrate to the generic rule for built-in DTB Commit 654102df2ac2 ("kbuild: add generic support for built-in boot DTBs") introduced generic support for built-in DTBs. Select GENERIC_BUILTIN_DTB when built-in DTB support is enabled. To keep consistency across architectures, this commit also renames CONFIG_BUILTIN_DTB_SOURCE to CONFIG_BUILTIN_DTB_NAME. Signed-off-by: Masahiro Yamada Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20241222000836.2578171-1-masahiroy@kernel.org Signed-off-by: Alexandre Ghiti --- arch/riscv/Kbuild | 1 - arch/riscv/Kconfig | 3 ++- arch/riscv/boot/dts/Makefile | 2 -- arch/riscv/configs/nommu_k210_defconfig | 2 +- arch/riscv/configs/nommu_k210_sdcard_defconfig | 2 +- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild index 2c585f7a0b6ef..126fb738fc442 100644 --- a/arch/riscv/Kbuild +++ b/arch/riscv/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += kernel/ mm/ net/ -obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ obj-$(CONFIG_CRYPTO) += crypto/ obj-y += errata/ obj-$(CONFIG_KVM) += kvm/ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index ff35eb16853cd..bae5fd79d6901 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1272,13 +1272,14 @@ config RISCV_ISA_FALLBACK config BUILTIN_DTB bool "Built-in device tree" depends on OF && NONPORTABLE + select GENERIC_BUILTIN_DTB help Build a device tree into the Linux image. This option should be selected if no bootloader is being used. If unsure, say N. -config BUILTIN_DTB_SOURCE +config BUILTIN_DTB_NAME string "Built-in device tree source" depends on BUILTIN_DTB help diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile index bff887d38abe4..64a898da9aee3 100644 --- a/arch/riscv/boot/dts/Makefile +++ b/arch/riscv/boot/dts/Makefile @@ -8,5 +8,3 @@ subdir-y += sophgo subdir-y += spacemit subdir-y += starfive subdir-y += thead - -obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_SOURCE)) diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 87ff5a1233af8..ee18d1e333f20 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -35,7 +35,7 @@ CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0" CONFIG_CMDLINE_FORCE=y CONFIG_BUILTIN_DTB=y -CONFIG_BUILTIN_DTB_SOURCE="canaan/k210_generic" +CONFIG_BUILTIN_DTB_NAME="canaan/k210_generic" # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set # CONFIG_GCC_PLUGINS is not set diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 95cbd574f291f..e770d81b738e1 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -27,7 +27,7 @@ CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro" CONFIG_CMDLINE_FORCE=y CONFIG_BUILTIN_DTB=y -CONFIG_BUILTIN_DTB_SOURCE="canaan/k210_generic" +CONFIG_BUILTIN_DTB_NAME="canaan/k210_generic" # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set # CONFIG_GCC_PLUGINS is not set -- GitLab From 5f1a58ed91a040d4625d854f9bb3dd4995919202 Mon Sep 17 00:00:00 2001 From: Juhan Jin Date: Thu, 6 Feb 2025 13:28:36 -0600 Subject: [PATCH 1564/1585] riscv: ftrace: Add parentheses in macro definitions of make_call_t0 and make_call_ra This patch adds parentheses to parameters caller and callee of macros make_call_t0 and make_call_ra. Every existing invocation of these two macros uses a single variable for each argument, so the absence of the parentheses seems okay. However, future invocations might use more complex expressions as arguments. For example, a future invocation might look like this: make_call_t0(a - b, c, call). Without parentheses in the macro definition, the macro invocation expands to: ... unsigned int offset = (unsigned long) c - (unsigned long) a - b; ... which is clearly wrong. The use of parentheses ensures arguments are correctly evaluated and potentially saves future users of make_call_t0 and make_call_ra debugging trouble. Fixes: 6724a76cff85 ("riscv: ftrace: Reduce the detour code size to half") Signed-off-by: Juhan Jin Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/tencent_AE90AA59903A628E87E9F80E563DA5BA5508@qq.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index ec6db1162021f..9704a73e515a5 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -92,7 +92,7 @@ struct dyn_arch_ftrace { #define make_call_t0(caller, callee, call) \ do { \ unsigned int offset = \ - (unsigned long) callee - (unsigned long) caller; \ + (unsigned long) (callee) - (unsigned long) (caller); \ call[0] = to_auipc_t0(offset); \ call[1] = to_jalr_t0(offset); \ } while (0) @@ -108,7 +108,7 @@ do { \ #define make_call_ra(caller, callee, call) \ do { \ unsigned int offset = \ - (unsigned long) callee - (unsigned long) caller; \ + (unsigned long) (callee) - (unsigned long) (caller); \ call[0] = to_auipc_ra(offset); \ call[1] = to_jalr_ra(offset); \ } while (0) -- GitLab From eac5b138814a69435dae04c41591023477b3a18d Mon Sep 17 00:00:00 2001 From: Zixian Zeng Date: Tue, 14 Jan 2025 00:30:20 +0800 Subject: [PATCH 1565/1585] riscv: remove redundant CMDLINE_FORCE check Drop redundant CMDLINE_FORCE check as it's already done in function early_init_dt_scan_chosen(). Signed-off-by: Zixian Zeng Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250114-rebund-v1-1-5632b2d54d6c@gmail.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/setup.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4fe45daa6281e..c174544eefc8e 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -235,11 +235,6 @@ static void __init parse_dtb(void) } else { pr_err("No DTB passed to the kernel\n"); } - -#ifdef CONFIG_CMDLINE_FORCE - strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); - pr_info("Forcing kernel command line to: %s\n", boot_command_line); -#endif } #if defined(CONFIG_RISCV_COMBO_SPINLOCKS) -- GitLab From eb8db421ce83f4acf3ca51e642120f42adea3a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 08:37:58 +0100 Subject: [PATCH 1566/1585] riscv: mm: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restricted pointers ("%pK") are not meant to be used through printk(). It can unintentionally expose security sensitive, raw pointer values. Use regular pointer formatting instead. Link: https://lore.kernel.org/lkml/20250113171731-dc10e3c1-da64-4af0-b767-7c7070468023@linutronix.de/ Signed-off-by: Thomas Weißschuh Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250217-restricted-pointers-riscv-v1-1-72a078076a76@linutronix.de Signed-off-by: Alexandre Ghiti --- arch/riscv/mm/physaddr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index 18706f457da7e..559d291fac5c6 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -12,7 +12,7 @@ phys_addr_t __virt_to_phys(unsigned long x) * Boundary checking aginst the kernel linear mapping space. */ WARN(!is_linear_mapping(x) && !is_kernel_mapping(x), - "virt_to_phys used for non-linear address: %pK (%pS)\n", + "virt_to_phys used for non-linear address: %p (%pS)\n", (void *)x, (void *)x); return __va_to_pa_nodebug(x); -- GitLab From 475afa39b123699e910c61ad9a51cedce4a0d310 Mon Sep 17 00:00:00 2001 From: Tingbo Liao Date: Fri, 28 Feb 2025 01:08:01 -0800 Subject: [PATCH 1567/1585] riscv: Fix the __riscv_copy_vec_words_unaligned implementation Correct the VEC_S macro definition to fix the implementation of vector words copy in the case of unalignment in RISC-V. Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Alexandre Ghiti Signed-off-by: Tingbo Liao Link: https://lore.kernel.org/r/20250228090801.8334-1-tingbo.liao@starfivetech.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/vec-copy-unaligned.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S index d16f19f1b3b65..7ce4de6f6e694 100644 --- a/arch/riscv/kernel/vec-copy-unaligned.S +++ b/arch/riscv/kernel/vec-copy-unaligned.S @@ -11,7 +11,7 @@ #define WORD_SEW CONCATENATE(e, WORD_EEW) #define VEC_L CONCATENATE(vle, WORD_EEW).v -#define VEC_S CONCATENATE(vle, WORD_EEW).v +#define VEC_S CONCATENATE(vse, WORD_EEW).v /* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using word loads and stores. */ -- GitLab From 33981b1c4e499021421686dcfa7b3d23a430d00e Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 28 Feb 2025 10:06:13 +0100 Subject: [PATCH 1568/1585] riscv: Fix missing __free_pages() in check_vector_unaligned_access() The locally allocated pages are never freed up, so add the corresponding __free_pages(). Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Link: https://lore.kernel.org/r/20250228090613.345309-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 91f189cf16113..074ac4abd023e 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -349,7 +349,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n", cpu); - return; + goto free; } if (word_cycles < byte_cycles) @@ -363,6 +363,9 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow"); per_cpu(vector_misaligned_access, cpu) = speed; + +free: + __free_pages(page, MISALIGNED_BUFFER_ORDER); } static int riscv_online_cpu_vec(unsigned int cpu) -- GitLab From e8eb8e1bdae94b9e003f5909519fd311d0936890 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Mon, 17 Mar 2025 03:12:13 +0000 Subject: [PATCH 1569/1585] riscv: fgraph: Select HAVE_FUNCTION_GRAPH_TRACER depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, fgraph on riscv relies on the infrastructure of DYNAMIC_FTRACE_WITH_ARGS. However, DYNAMIC_FTRACE_WITH_ARGS may be turned off on riscv, which will cause the enabled fgraph to be abnormal. Therefore, let's select HAVE_FUNCTION_GRAPH_TRACER depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS. Fixes: a3ed4157b7d8 ("fgraph: Replace fgraph_ret_regs with ftrace_regs") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503160820.dvqMpH0g-lkp@intel.com/ Signed-off-by: Pu Lehui Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20250317031214.4138436-1-pulehui@huaweicloud.com Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bae5fd79d6901..e2c80e88b5cdd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -150,7 +150,7 @@ config RISCV select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_GRAPH_FUNC select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION select HAVE_EBPF_JIT if MMU -- GitLab From 67a5ba8f742f247bc83e46dd2313c142b1383276 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Mon, 17 Mar 2025 03:12:14 +0000 Subject: [PATCH 1570/1585] riscv: fgraph: Fix stack layout to match __arch_ftrace_regs argument of ftrace_return_to_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Naresh Kamboju reported a "Bad frame pointer" kernel warning while running LTP trace ftrace_stress_test.sh in riscv. We can reproduce the same issue with the following command: ``` $ cd /sys/kernel/debug/tracing $ echo 'f:myprobe do_nanosleep%return args1=$retval' > dynamic_events $ echo 1 > events/fprobes/enable $ echo 1 > tracing_on $ sleep 1 ``` And we can get the following kernel warning: [ 127.692888] ------------[ cut here ]------------ [ 127.693755] Bad frame pointer: expected ff2000000065be50, received ba34c141e9594000 [ 127.693755] from func do_nanosleep return to ffffffff800ccb16 [ 127.698699] WARNING: CPU: 1 PID: 129 at kernel/trace/fgraph.c:755 ftrace_return_to_handler+0x1b2/0x1be [ 127.699894] Modules linked in: [ 127.700908] CPU: 1 UID: 0 PID: 129 Comm: sleep Not tainted 6.14.0-rc3-g0ab191c74642 #32 [ 127.701453] Hardware name: riscv-virtio,qemu (DT) [ 127.701859] epc : ftrace_return_to_handler+0x1b2/0x1be [ 127.702032] ra : ftrace_return_to_handler+0x1b2/0x1be [ 127.702151] epc : ffffffff8013b5e0 ra : ffffffff8013b5e0 sp : ff2000000065bd10 [ 127.702221] gp : ffffffff819c12f8 tp : ff60000080853100 t0 : 6e00000000000000 [ 127.702284] t1 : 0000000000000020 t2 : 6e7566206d6f7266 s0 : ff2000000065bd80 [ 127.702346] s1 : ff60000081262000 a0 : 000000000000007b a1 : ffffffff81894f20 [ 127.702408] a2 : 0000000000000010 a3 : fffffffffffffffe a4 : 0000000000000000 [ 127.702470] a5 : 0000000000000000 a6 : 0000000000000008 a7 : 0000000000000038 [ 127.702530] s2 : ba34c141e9594000 s3 : 0000000000000000 s4 : ff2000000065bdd0 [ 127.702591] s5 : 00007fff8adcf400 s6 : 000055556dc1d8c0 s7 : 0000000000000068 [ 127.702651] s8 : 00007fff8adf5d10 s9 : 000000000000006d s10: 0000000000000001 [ 127.702710] s11: 00005555737377c8 t3 : ffffffff819d899e t4 : ffffffff819d899e [ 127.702769] t5 : ffffffff819d89a0 t6 : ff2000000065bb18 [ 127.702826] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 [ 127.703292] [] ftrace_return_to_handler+0x1b2/0x1be [ 127.703760] [] return_to_handler+0x16/0x26 [ 127.704009] [] return_to_handler+0x0/0x26 [ 127.704057] [] common_nsleep+0x42/0x54 [ 127.704117] [] __riscv_sys_clock_nanosleep+0xba/0x10a [ 127.704176] [] do_trap_ecall_u+0x188/0x218 [ 127.704295] [] handle_exception+0x14a/0x156 [ 127.705436] ---[ end trace 0000000000000000 ]--- The reason is that the stack layout for constructing argument for the ftrace_return_to_handler in the return_to_handler does not match the __arch_ftrace_regs structure of riscv, leading to unexpected results. Fixes: a3ed4157b7d8 ("fgraph: Replace fgraph_ret_regs with ftrace_regs") Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/all/CA+G9fYvp_oAxeDFj88Tk2rfEZ7jtYKAKSwfYS66=57Db9TBdyA@mail.gmail.com Signed-off-by: Pu Lehui Reviewed-by: Alexandre Ghiti Tested-by: Björn Töpel Reviewed-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250317031214.4138436-2-pulehui@huaweicloud.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/mcount.S | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 068168046e0ef..da4a4000e57ea 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -12,8 +12,6 @@ #include #include -#define ABI_SIZE_ON_STACK 80 - .text .macro SAVE_ABI_STATE @@ -28,12 +26,12 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -ABI_SIZE_ON_STACK - REG_S ra, 1*SZREG(sp) - REG_S s0, 8*SZREG(sp) - REG_S a0, 10*SZREG(sp) - REG_S a1, 11*SZREG(sp) - addi s0, sp, ABI_SIZE_ON_STACK + addi sp, sp, -FREGS_SIZE_ON_STACK + REG_S ra, FREGS_RA(sp) + REG_S s0, FREGS_S0(sp) + REG_S a0, FREGS_A0(sp) + REG_S a1, FREGS_A1(sp) + addi s0, sp, FREGS_SIZE_ON_STACK .endm .macro RESTORE_ABI_STATE @@ -43,11 +41,11 @@ .endm .macro RESTORE_RET_ABI_STATE - REG_L ra, 1*SZREG(sp) - REG_L s0, 8*SZREG(sp) - REG_L a0, 10*SZREG(sp) - REG_L a1, 11*SZREG(sp) - addi sp, sp, ABI_SIZE_ON_STACK + REG_L ra, FREGS_RA(sp) + REG_L s0, FREGS_S0(sp) + REG_L a0, FREGS_A0(sp) + REG_L a1, FREGS_A1(sp) + addi sp, sp, FREGS_SIZE_ON_STACK .endm SYM_TYPED_FUNC_START(ftrace_stub) -- GitLab From a65e0f67da24d1bff0dc679a018ced26d1952683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:08 +0200 Subject: [PATCH 1571/1585] dt-bindings: riscv: add Zaamo and Zalrsc ISA extension description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add description for the Zaamo and Zalrsc ISA extension[1]. Link: https://github.com/riscv/riscv-zaamo-zalrsc [1] Signed-off-by: Clément Léger Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240619153913.867263-2-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- .../devicetree/bindings/riscv/extensions.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index f26997c3d34d3..73d8fec29ec2c 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -224,6 +224,12 @@ properties: as ratified at commit 4a69197e5617 ("Update to ratified state") of riscv-svvptc. + - const: zaamo + description: | + The standard Zaamo extension for atomic memory operations as + ratified at commit e87412e621f1 ("integrate Zaamo and Zalrsc text + (#1304)") of the unprivileged ISA specification. + - const: zabha description: | The Zabha extension for Byte and Halfword Atomic Memory Operations @@ -236,6 +242,12 @@ properties: is supported as ratified at commit 5059e0ca641c ("update to ratified") of the riscv-zacas. + - const: zalrsc + description: | + The standard Zalrsc extension for load-reserved/store-conditional as + ratified at commit e87412e621f1 ("integrate Zaamo and Zalrsc text + (#1304)") of the unprivileged ISA specification. + - const: zawrs description: | The Zawrs extension for entering a low-power state or for trapping @@ -718,6 +730,13 @@ properties: const: zfbfmin - contains: const: zvfbfmin + # Zacas depends on Zaamo + - if: + contains: + const: zacas + then: + contains: + const: zaamo allOf: # Zcf extension does not exist on rv64 -- GitLab From 35173b666a16ce631da9d70cd1b376162d9dea53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:09 +0200 Subject: [PATCH 1572/1585] riscv: add parsing for Zaamo and Zalrsc extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These 2 new extensions are actually a subset of the A extension which provides atomic memory operations and load-reserved/store-conditional instructions. Signed-off-by: Clément Léger Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240619153913.867263-3-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/hwcap.h | 2 ++ arch/riscv/kernel/cpufeature.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 14cc29f2a723c..e3cbf203cdde7 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -103,6 +103,8 @@ #define RISCV_ISA_EXT_ZFBFMIN 94 #define RISCV_ISA_EXT_ZVFBFMIN 95 #define RISCV_ISA_EXT_ZVFBFWMA 96 +#define RISCV_ISA_EXT_ZAAMO 97 +#define RISCV_ISA_EXT_ZALRSC 98 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 6ec9499e2cf27..af9c346da96e9 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -182,6 +182,11 @@ static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, return 0; } +static const unsigned int riscv_a_exts[] = { + RISCV_ISA_EXT_ZAAMO, + RISCV_ISA_EXT_ZALRSC, +}; + static const unsigned int riscv_zk_bundled_exts[] = { RISCV_ISA_EXT_ZBKB, RISCV_ISA_EXT_ZBKC, @@ -353,7 +358,7 @@ static const unsigned int riscv_c_exts[] = { const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(i, RISCV_ISA_EXT_i), __RISCV_ISA_EXT_DATA(m, RISCV_ISA_EXT_m), - __RISCV_ISA_EXT_DATA(a, RISCV_ISA_EXT_a), + __RISCV_ISA_EXT_SUPERSET(a, RISCV_ISA_EXT_a, riscv_a_exts), __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f), __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d), __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q), @@ -373,8 +378,10 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP), + __RISCV_ISA_EXT_DATA(zaamo, RISCV_ISA_EXT_ZAAMO), __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), + __RISCV_ISA_EXT_DATA(zalrsc, RISCV_ISA_EXT_ZALRSC), __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), __RISCV_ISA_EXT_DATA_VALIDATE(zfbfmin, RISCV_ISA_EXT_ZFBFMIN, riscv_ext_f_depends), -- GitLab From 9d45d1ff90a6888f6138eb7e1f2619ef427831d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:10 +0200 Subject: [PATCH 1573/1585] riscv: hwprobe: export Zaamo and Zalrsc extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export the Zaamo and Zalrsc extensions to userspace using hwprobe. Signed-off-by: Clément Léger Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240619153913.867263-4-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- Documentation/arch/riscv/hwprobe.rst | 8 ++++++++ arch/riscv/include/uapi/asm/hwprobe.h | 2 ++ arch/riscv/kernel/sys_hwprobe.c | 2 ++ 3 files changed, 12 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 2792f12e90baa..53607d962653b 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -245,6 +245,14 @@ The following keys are defined: ratified in commit 98918c844281 ("Merge pull request #1217 from riscv/zawrs") of riscv-isa-manual. + * :c:macro:`RISCV_HWPROBE_EXT_ZAAMO`: The Zaamo extension is supported as + defined in the in the RISC-V ISA manual starting from commit e87412e621f1 + ("integrate Zaamo and Zalrsc text (#1304)"). + + * :c:macro:`RISCV_HWPROBE_EXT_ZALRSC`: The Zalrsc extension is supported as + defined in the in the RISC-V ISA manual starting from commit e87412e621f1 + ("integrate Zaamo and Zalrsc text (#1304)"). + * :c:macro:`RISCV_HWPROBE_EXT_SUPM`: The Supm extension is supported as defined in version 1.0 of the RISC-V Pointer Masking extensions. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 056decf65b1b7..3c2fce939673b 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -79,6 +79,8 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZVFBFMIN (1ULL << 53) #define RISCV_HWPROBE_EXT_ZVFBFWMA (1ULL << 54) #define RISCV_HWPROBE_EXT_ZICBOM (1ULL << 55) +#define RISCV_HWPROBE_EXT_ZAAMO (1ULL << 56) +#define RISCV_HWPROBE_EXT_ZALRSC (1ULL << 57) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index cfe6ceaf30693..f7380ff9ed17a 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -95,7 +95,9 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * regardless of the kernel's configuration, as no other checks, besides * presence in the hart_isa bitmap, are made. */ + EXT_KEY(ZAAMO); EXT_KEY(ZACAS); + EXT_KEY(ZALRSC); EXT_KEY(ZAWRS); EXT_KEY(ZBA); EXT_KEY(ZBB); -- GitLab From 2d79608cf6112e787e6f90cf909bdadceed30dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:11 +0200 Subject: [PATCH 1574/1585] RISC-V: KVM: Allow Zaamo/Zalrsc extensions for Guest/VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable Zaamo/Zalrsc extensions for Guest/VM. Signed-off-by: Clément Léger Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20240619153913.867263-5-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/uapi/asm/kvm.h | 2 ++ arch/riscv/kvm/vcpu_onereg.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index f06bc5efcd79a..5f59fd226cc57 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -182,6 +182,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SVVPTC, KVM_RISCV_ISA_EXT_ZABHA, KVM_RISCV_ISA_EXT_ZICCRSE, + KVM_RISCV_ISA_EXT_ZAAMO, + KVM_RISCV_ISA_EXT_ZALRSC, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f6d27b59c6419..f05f12f809206 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -47,8 +47,10 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), @@ -149,8 +151,10 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_RISCV_ISA_EXT_ZAAMO: case KVM_RISCV_ISA_EXT_ZABHA: case KVM_RISCV_ISA_EXT_ZACAS: + case KVM_RISCV_ISA_EXT_ZALRSC: case KVM_RISCV_ISA_EXT_ZAWRS: case KVM_RISCV_ISA_EXT_ZBA: case KVM_RISCV_ISA_EXT_ZBB: -- GitLab From 7a9827e777da311672cdba2f4400362b1eebb2e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:12 +0200 Subject: [PATCH 1575/1585] KVM: riscv: selftests: Add Zaamo/Zalrsc extensions to get-reg-list test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The KVM RISC-V allows Zaamo/Zalrsc extensions for Guest/VM so add these extensions to get-reg-list test. Signed-off-by: Clément Léger Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20240619153913.867263-6-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 8515921dfdbf2..569f2d67c9b81 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -53,8 +53,10 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: @@ -434,8 +436,10 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), @@ -974,8 +978,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC); +KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO); KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); +KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC); KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); @@ -1045,8 +1051,10 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svnapot, &config_svpbmt, &config_svvptc, + &config_zaamo, &config_zabha, &config_zacas, + &config_zalrsc, &config_zawrs, &config_zba, &config_zbb, -- GitLab From a00e022be5315c5a1f47521a1cc6d3b71c8e9c44 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:16 +0100 Subject: [PATCH 1576/1585] riscv: Annotate unaligned access init functions Several functions used in unaligned access probing are only run at init time. Annotate them appropriately. Fixes: f413aae96cda ("riscv: Set unaligned access speed at compile time") Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-11-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/cpufeature.h | 4 ++-- arch/riscv/kernel/traps_misaligned.c | 8 ++++---- arch/riscv/kernel/unaligned_access_speed.c | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 569140d6e6399..19defdc2002d8 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -63,7 +63,7 @@ void __init riscv_user_isa_enable(void); #define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) -bool check_unaligned_access_emulated_all_cpus(void); +bool __init check_unaligned_access_emulated_all_cpus(void); #if defined(CONFIG_RISCV_SCALAR_MISALIGNED) void check_unaligned_access_emulated(struct work_struct *work __always_unused); void unaligned_emulation_finish(void); @@ -76,7 +76,7 @@ static inline bool unaligned_ctl_available(void) } #endif -bool check_vector_unaligned_access_emulated_all_cpus(void); +bool __init check_vector_unaligned_access_emulated_all_cpus(void); #if defined(CONFIG_RISCV_VECTOR_MISALIGNED) void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused); DECLARE_PER_CPU(long, vector_misaligned_access); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 7cc108aed74e8..aacbd9d7196e7 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -605,7 +605,7 @@ void check_vector_unaligned_access_emulated(struct work_struct *work __always_un kernel_vector_end(); } -bool check_vector_unaligned_access_emulated_all_cpus(void) +bool __init check_vector_unaligned_access_emulated_all_cpus(void) { int cpu; @@ -625,7 +625,7 @@ bool check_vector_unaligned_access_emulated_all_cpus(void) return true; } #else -bool check_vector_unaligned_access_emulated_all_cpus(void) +bool __init check_vector_unaligned_access_emulated_all_cpus(void) { return false; } @@ -659,7 +659,7 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused) } } -bool check_unaligned_access_emulated_all_cpus(void) +bool __init check_unaligned_access_emulated_all_cpus(void) { int cpu; @@ -684,7 +684,7 @@ bool unaligned_ctl_available(void) return unaligned_ctl; } #else -bool check_unaligned_access_emulated_all_cpus(void) +bool __init check_unaligned_access_emulated_all_cpus(void) { return false; } diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 91f189cf16113..b7a8ff7ba6df4 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -121,7 +121,7 @@ static int check_unaligned_access(void *param) return 0; } -static void check_unaligned_access_nonboot_cpu(void *param) +static void __init check_unaligned_access_nonboot_cpu(void *param) { unsigned int cpu = smp_processor_id(); struct page **pages = param; @@ -175,7 +175,7 @@ static void set_unaligned_access_static_branches(void) modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); } -static int lock_and_set_unaligned_access_static_branch(void) +static int __init lock_and_set_unaligned_access_static_branch(void) { cpus_read_lock(); set_unaligned_access_static_branches(); @@ -218,7 +218,7 @@ static int riscv_offline_cpu(unsigned int cpu) } /* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int check_unaligned_access_speed_all_cpus(void) +static int __init check_unaligned_access_speed_all_cpus(void) { unsigned int cpu; unsigned int cpu_count = num_possible_cpus(); @@ -264,7 +264,7 @@ static int check_unaligned_access_speed_all_cpus(void) return 0; } #else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static int check_unaligned_access_speed_all_cpus(void) +static int __init check_unaligned_access_speed_all_cpus(void) { return 0; } @@ -379,7 +379,7 @@ static int riscv_online_cpu_vec(unsigned int cpu) } /* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { schedule_on_each_cpu(check_vector_unaligned_access); @@ -393,13 +393,13 @@ static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unuse return 0; } #else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ -static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { return 0; } #endif -static int check_unaligned_access_all_cpus(void) +static int __init check_unaligned_access_all_cpus(void) { bool all_cpus_emulated, all_cpus_vec_unsupported; -- GitLab From 5af72a818612332a11171b16f27a62ec0a0f91d7 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:17 +0100 Subject: [PATCH 1577/1585] riscv: Fix riscv_online_cpu_vec We shouldn't probe when we already know vector is unsupported and we should probe when we see we don't yet know whether it's supported. Furthermore, we should ensure we've set the access type to unsupported when we don't have vector at all. Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-12-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index b7a8ff7ba6df4..161964cf2abce 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -367,10 +367,12 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus static int riscv_online_cpu_vec(unsigned int cpu) { - if (!has_vector()) + if (!has_vector()) { + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; return 0; + } - if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) return 0; check_vector_unaligned_access_emulated(NULL); -- GitLab From e6d0adf2eb5bb3244cb21a7a15899aa058bd384f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:18 +0100 Subject: [PATCH 1578/1585] riscv: Fix check_unaligned_access_all_cpus check_vector_unaligned_access_emulated_all_cpus(), like its name suggests, will return true when all cpus emulate unaligned vector accesses. If the function returned false it may have been because vector isn't supported at all (!has_vector()) or because at least one cpu doesn't emulate unaligned vector accesses. Since false may be returned for two cases, checking for it isn't sufficient when attempting to determine if we should proceed with the vector speed check. Move the !has_vector() functionality to check_unaligned_access_all_cpus() in order for check_vector_unaligned_access_emulated_all_cpus() to return false for a single case. Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-13-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps_misaligned.c | 6 ------ arch/riscv/kernel/unaligned_access_speed.c | 11 +++++++---- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index aacbd9d7196e7..4354c87c0376f 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -609,12 +609,6 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void) { int cpu; - if (!has_vector()) { - for_each_online_cpu(cpu) - per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; - return false; - } - schedule_on_each_cpu(check_vector_unaligned_access_emulated); for_each_online_cpu(cpu) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 161964cf2abce..02b485dc4bc42 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -403,13 +403,16 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway static int __init check_unaligned_access_all_cpus(void) { - bool all_cpus_emulated, all_cpus_vec_unsupported; + bool all_cpus_emulated; + int cpu; all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); - all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus(); - if (!all_cpus_vec_unsupported && - IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + if (!has_vector()) { + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + } else if (!check_vector_unaligned_access_emulated_all_cpus() && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { kthread_run(vec_check_unaligned_access_speed_all_cpus, NULL, "vec_check_unaligned_access_speed_all_cpus"); } -- GitLab From 813d39baee3229d31420af61460b97f4fafdd352 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:19 +0100 Subject: [PATCH 1579/1585] riscv: Change check_unaligned_access_speed_all_cpus to void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The return value of check_unaligned_access_speed_all_cpus() is always zero, so make the function void so we don't need to concern ourselves with it. The change also allows us to tidy up check_unaligned_access_all_cpus() a bit. Reviewed-by: Clément Léger Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-14-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 02b485dc4bc42..780f1c5f512a4 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -218,7 +218,7 @@ static int riscv_offline_cpu(unsigned int cpu) } /* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int __init check_unaligned_access_speed_all_cpus(void) +static void __init check_unaligned_access_speed_all_cpus(void) { unsigned int cpu; unsigned int cpu_count = num_possible_cpus(); @@ -226,7 +226,7 @@ static int __init check_unaligned_access_speed_all_cpus(void) if (!bufs) { pr_warn("Allocation failure, not measuring misaligned performance\n"); - return 0; + return; } /* @@ -261,12 +261,10 @@ static int __init check_unaligned_access_speed_all_cpus(void) } kfree(bufs); - return 0; } #else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static int __init check_unaligned_access_speed_all_cpus(void) +static void __init check_unaligned_access_speed_all_cpus(void) { - return 0; } #endif @@ -403,10 +401,10 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway static int __init check_unaligned_access_all_cpus(void) { - bool all_cpus_emulated; int cpu; - all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + if (!check_unaligned_access_emulated_all_cpus()) + check_unaligned_access_speed_all_cpus(); if (!has_vector()) { for_each_online_cpu(cpu) @@ -417,9 +415,6 @@ static int __init check_unaligned_access_all_cpus(void) NULL, "vec_check_unaligned_access_speed_all_cpus"); } - if (!all_cpus_emulated) - return check_unaligned_access_speed_all_cpus(); - return 0; } -- GitLab From 05ee21f0fcb8ca29bf42bd6cbce109f2e49c167f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:20 +0100 Subject: [PATCH 1580/1585] riscv: Fix set up of cpu hotplug callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CPU hotplug callbacks should be set up even if we detected all current cpus emulate misaligned accesses, since we want to ensure our expectations of all cpus emulating is maintained. Fixes: 6e5ce7f2eae3 ("riscv: Decouple emulated unaligned accesses from access speed") Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Clément Léger Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-15-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 27 +++++++++++----------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 780f1c5f512a4..c9d3237649bbc 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -247,13 +247,6 @@ static void __init check_unaligned_access_speed_all_cpus(void) /* Check core 0. */ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, riscv_offline_cpu); - out: for_each_cpu(cpu, cpu_online_mask) { if (bufs[cpu]) @@ -383,13 +376,6 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway { schedule_on_each_cpu(check_vector_unaligned_access); - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu_vec, NULL); - return 0; } #else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ @@ -415,6 +401,19 @@ static int __init check_unaligned_access_all_cpus(void) NULL, "vec_check_unaligned_access_speed_all_cpus"); } + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, riscv_offline_cpu); +#endif +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu_vec, NULL); +#endif + return 0; } -- GitLab From 2744ec472de31141ad354907ff98843dd6040917 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:21 +0100 Subject: [PATCH 1581/1585] riscv: Fix set up of vector cpu hotplug callback Whether or not we have RISCV_PROBE_VECTOR_UNALIGNED_ACCESS we need to set up a cpu hotplug callback to check if we have vector at all, since, when we don't have vector, we need to set vector_misaligned_access to unsupported rather than leave it the default of unknown. Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-16-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index c9d3237649bbc..d9d4ca1fadc7f 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -356,6 +356,20 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus per_cpu(vector_misaligned_access, cpu) = speed; } +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + schedule_on_each_cpu(check_vector_unaligned_access); + + return 0; +} +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + return 0; +} +#endif + static int riscv_online_cpu_vec(unsigned int cpu) { if (!has_vector()) { @@ -363,27 +377,16 @@ static int riscv_online_cpu_vec(unsigned int cpu) return 0; } +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) return 0; check_vector_unaligned_access_emulated(NULL); check_vector_unaligned_access(NULL); - return 0; -} - -/* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) -{ - schedule_on_each_cpu(check_vector_unaligned_access); +#endif return 0; } -#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ -static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) -{ - return 0; -} -#endif static int __init check_unaligned_access_all_cpus(void) { @@ -409,10 +412,8 @@ static int __init check_unaligned_access_all_cpus(void) cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", riscv_online_cpu, riscv_offline_cpu); #endif -#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", riscv_online_cpu_vec, NULL); -#endif return 0; } -- GitLab From aecb09e091dc143345a9e4b282d0444554445f4b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:22 +0100 Subject: [PATCH 1582/1585] riscv: Add parameter for skipping access speed tests Allow skipping scalar and vector unaligned access speed tests. This is useful for testing alternative code paths and to skip the tests in environments where they run too slowly. All CPUs must have the same unaligned access speed. The code movement is because we now need the scalar cpu hotplug callback to always run, so we need to bring it and its supporting functions out of CONFIG_RISCV_PROBE_UNALIGNED_ACCESS. Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-17-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 187 +++++++++++++-------- 1 file changed, 121 insertions(+), 66 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index d9d4ca1fadc7f..18e3345495444 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -24,8 +24,12 @@ DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS +static long unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +static long unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + static cpumask_t fast_misaligned_access; + +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS static int check_unaligned_access(void *param) { int cpu = smp_processor_id(); @@ -130,6 +134,50 @@ static void __init check_unaligned_access_nonboot_cpu(void *param) check_unaligned_access(pages[cpu]); } +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static void __init check_unaligned_access_speed_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + +out: + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); +} +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static void __init check_unaligned_access_speed_all_cpus(void) +{ +} +#endif + DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); static void modify_unaligned_access_branches(cpumask_t *mask, int weight) @@ -188,21 +236,29 @@ arch_initcall_sync(lock_and_set_unaligned_access_static_branch); static int riscv_online_cpu(unsigned int cpu) { - static struct page *buf; - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + goto exit; + } else if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; goto exit; - - check_unaligned_access_emulated(NULL); - buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!buf) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return -ENOMEM; } - check_unaligned_access(buf); - __free_pages(buf, MISALIGNED_BUFFER_ORDER); +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS + { + static struct page *buf; + + check_unaligned_access_emulated(NULL); + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); + } +#endif exit: set_unaligned_access_static_branches(); @@ -217,50 +273,6 @@ static int riscv_offline_cpu(unsigned int cpu) return 0; } -/* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static void __init check_unaligned_access_speed_all_cpus(void) -{ - unsigned int cpu; - unsigned int cpu_count = num_possible_cpus(); - struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); - - if (!bufs) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return; - } - - /* - * Allocate separate buffers for each CPU so there's no fighting over - * cache lines. - */ - for_each_cpu(cpu, cpu_online_mask) { - bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!bufs[cpu]) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - goto out; - } - } - - /* Check everybody except 0, who stays behind to tend jiffies. */ - on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); - - /* Check core 0. */ - smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - -out: - for_each_cpu(cpu, cpu_online_mask) { - if (bufs[cpu]) - __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); - } - - kfree(bufs); -} -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static void __init check_unaligned_access_speed_all_cpus(void) -{ -} -#endif - #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS static void check_vector_unaligned_access(struct work_struct *work __always_unused) { @@ -372,8 +384,8 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway static int riscv_online_cpu_vec(unsigned int cpu) { - if (!has_vector()) { - per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; return 0; } @@ -388,30 +400,73 @@ static int riscv_online_cpu_vec(unsigned int cpu) return 0; } +static const char * const speed_str[] __initconst = { NULL, NULL, "slow", "fast", "unsupported" }; + +static int __init set_unaligned_scalar_speed_param(char *str) +{ + if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_FAST])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED; + else + return -EINVAL; + + return 1; +} +__setup("unaligned_scalar_speed=", set_unaligned_scalar_speed_param); + +static int __init set_unaligned_vector_speed_param(char *str) +{ + if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_FAST])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + else + return -EINVAL; + + return 1; +} +__setup("unaligned_vector_speed=", set_unaligned_vector_speed_param); + static int __init check_unaligned_access_all_cpus(void) { int cpu; - if (!check_unaligned_access_emulated_all_cpus()) + if (unaligned_scalar_speed_param == RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN && + !check_unaligned_access_emulated_all_cpus()) { check_unaligned_access_speed_all_cpus(); - - if (!has_vector()) { + } else { + pr_info("scalar unaligned access speed set to '%s' by command line\n", + speed_str[unaligned_scalar_speed_param]); for_each_online_cpu(cpu) - per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; - } else if (!check_vector_unaligned_access_emulated_all_cpus() && - IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; + } + + if (!has_vector()) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + + if (unaligned_vector_speed_param == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN && + !check_vector_unaligned_access_emulated_all_cpus() && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { kthread_run(vec_check_unaligned_access_speed_all_cpus, NULL, "vec_check_unaligned_access_speed_all_cpus"); + } else { + pr_info("vector unaligned access speed set to '%s' by command line\n", + speed_str[unaligned_vector_speed_param]); + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; } /* * Setup hotplug callbacks for any new CPUs that come online or go * offline. */ -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", riscv_online_cpu, riscv_offline_cpu); -#endif cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", riscv_online_cpu_vec, NULL); -- GitLab From 9fe58530a8cd1402aaa35cad19143777a6f65393 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:23 +0100 Subject: [PATCH 1583/1585] Documentation/kernel-parameters: Add riscv unaligned speed parameters Document riscv parameters used to select scalar and vector unaligned access speeds. Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-18-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- Documentation/admin-guide/kernel-parameters.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb8752b42ec85..9e3c5fecfa52e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7477,6 +7477,22 @@ Note that genuine overcurrent events won't be reported either. + unaligned_scalar_speed= + [RISCV] + Format: {slow | fast | unsupported} + Allow skipping scalar unaligned access speed tests. This + is useful for testing alternative code paths and to skip + the tests in environments where they run too slowly. All + CPUs must have the same scalar unaligned access speed. + + unaligned_vector_speed= + [RISCV] + Format: {slow | fast | unsupported} + Allow skipping vector unaligned access speed tests. This + is useful for testing alternative code paths and to skip + the tests in environments where they run too slowly. All + CPUs must have the same vector unaligned access speed. + unknown_nmi_panic [X86] Cause panic on unknown NMI. -- GitLab From afa8a93932aa63b107d81bd438454760d8c7c8a3 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 19 Mar 2025 11:35:19 -0700 Subject: [PATCH 1584/1585] riscv: Move nop definition to insn-def.h We have duplicated the definition of the nop instruction in ftrace.h and in jump_label.c. Move this definition into the generic file insn-def.h so that they can share the definition with each other and with future files. Signed-off-by: Charlie Jenkins Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250319-runtime_const_riscv-v10-1-745b31a11d65@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/ftrace.h | 1 - arch/riscv/include/asm/insn-def.h | 3 +++ arch/riscv/kernel/ftrace.c | 6 +++--- arch/riscv/kernel/jump_label.c | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index c4721ce44ca47..b7f361a50f644 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -79,7 +79,6 @@ struct dyn_arch_ftrace { #define AUIPC_RA (0x00000097) #define JALR_T0 (0x000282e7) #define AUIPC_T0 (0x00000297) -#define NOP4 (0x00000013) #define to_jalr_t0(offset) \ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0) diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 9a913010cdd93..71060a2f838e2 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -199,5 +199,8 @@ #define RISCV_PAUSE ".4byte 0x100000f" #define ZAWRS_WRS_NTO ".4byte 0x00d00073" #define ZAWRS_WRS_STO ".4byte 0x01d00073" +#define RISCV_NOP4 ".4byte 0x00000013" + +#define RISCV_INSN_NOP4 _AC(0x00000013, U) #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 3524db5e4fa01..674dcdfae7a14 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -36,7 +36,7 @@ static int ftrace_check_current_call(unsigned long hook_pos, unsigned int *expected) { unsigned int replaced[2]; - unsigned int nops[2] = {NOP4, NOP4}; + unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; /* we expect nops at the hook position */ if (!expected) @@ -68,7 +68,7 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, bool enable, bool ra) { unsigned int call[2]; - unsigned int nops[2] = {NOP4, NOP4}; + unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; if (ra) make_call_ra(hook_pos, target, call); @@ -97,7 +97,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int nops[2] = {NOP4, NOP4}; + unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) return -EPERM; diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index 654ed159c830b..b4c1a6a3fbd28 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -11,8 +11,8 @@ #include #include #include +#include -#define RISCV_INSN_NOP 0x00000013U #define RISCV_INSN_JAL 0x0000006fU bool arch_jump_label_transform_queue(struct jump_entry *entry, @@ -33,7 +33,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, (((u32)offset & GENMASK(10, 1)) << (21 - 1)) | (((u32)offset & GENMASK(20, 20)) << (31 - 20)); } else { - insn = RISCV_INSN_NOP; + insn = RISCV_INSN_NOP4; } if (early_boot_irqs_disabled) { -- GitLab From a44fb5722199de8338d991db5ad3d509192179bb Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 19 Mar 2025 11:35:20 -0700 Subject: [PATCH 1585/1585] riscv: Add runtime constant support Implement the runtime constant infrastructure for riscv. Use this infrastructure to generate constants to be used by the d_hash() function. This is the riscv variant of commit 94a2bc0f611c ("arm64: add 'runtime constant' support") and commit e3c92e81711d ("runtime constants: add x86 architecture support"). [ alex: Remove trailing whitespace ] Signed-off-by: Charlie Jenkins Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250319-runtime_const_riscv-v10-2-745b31a11d65@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 22 ++ arch/riscv/include/asm/asm.h | 1 + arch/riscv/include/asm/runtime-const.h | 265 +++++++++++++++++++++++++ arch/riscv/kernel/vmlinux.lds.S | 3 + 4 files changed, 291 insertions(+) create mode 100644 arch/riscv/include/asm/runtime-const.h diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7612c52e9b1e3..c123f7c0579c1 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -783,6 +783,28 @@ config RISCV_ISA_ZBC If you don't know what to do here, say Y. +config TOOLCHAIN_HAS_ZBKB + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbkb) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbkb) + depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900 + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZBKB + bool "Zbkb extension support for bit manipulation instructions" + depends on TOOLCHAIN_HAS_ZBKB + depends on RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the ZBKB + extension (bit manipulation for cryptography) and enable its usage. + + The Zbkb extension provides instructions to accelerate a number + of common cryptography operations (pack, zip, etc). + + If you don't know what to do here, say Y. + config RISCV_ISA_ZICBOM bool "Zicbom extension support for non-coherent DMA operation" depends on MMU diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 776354895b81e..a8a2af6dfe9d2 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -27,6 +27,7 @@ #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) +#define SRLI __REG_SEL(srliw, srli) #if __SIZEOF_POINTER__ == 8 #ifdef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h new file mode 100644 index 0000000000000..ea2e49c7149cd --- /dev/null +++ b/arch/riscv/include/asm/runtime-const.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_RUNTIME_CONST_H +#define _ASM_RISCV_RUNTIME_CONST_H + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_32BIT +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret; \ + asm_inline(".option push\n\t" \ + ".option norvc\n\t" \ + "1:\t" \ + "lui %[__ret],0x89abd\n\t" \ + "addi %[__ret],%[__ret],-0x211\n\t" \ + ".option pop\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : [__ret] "=r" (__ret)); \ + __ret; \ +}) +#else +/* + * Loading 64-bit constants into a register from immediates is a non-trivial + * task on riscv64. To get it somewhat performant, load 32 bits into two + * different registers and then combine the results. + * + * If the processor supports the Zbkb extension, we can combine the final + * "slli,slli,srli,add" into the single "pack" instruction. If the processor + * doesn't support Zbkb but does support the Zbb extension, we can + * combine the final "slli,srli,add" into one instruction "add.uw". + */ +#define RISCV_RUNTIME_CONST_64_PREAMBLE \ + ".option push\n\t" \ + ".option norvc\n\t" \ + "1:\t" \ + "lui %[__ret],0x89abd\n\t" \ + "lui %[__tmp],0x1234\n\t" \ + "addiw %[__ret],%[__ret],-0x211\n\t" \ + "addiw %[__tmp],%[__tmp],0x567\n\t" \ + +#define RISCV_RUNTIME_CONST_64_BASE \ + "slli %[__tmp],%[__tmp],32\n\t" \ + "slli %[__ret],%[__ret],32\n\t" \ + "srli %[__ret],%[__ret],32\n\t" \ + "add %[__ret],%[__ret],%[__tmp]\n\t" \ + +#define RISCV_RUNTIME_CONST_64_ZBA \ + ".option push\n\t" \ + ".option arch,+zba\n\t" \ + "slli %[__tmp],%[__tmp],32\n\t" \ + "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".option pop\n\t" \ + +#define RISCV_RUNTIME_CONST_64_ZBKB \ + ".option push\n\t" \ + ".option arch,+zbkb\n\t" \ + "pack %[__ret],%[__ret],%[__tmp]\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".option pop\n\t" \ + +#define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + ".option pop\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + +#if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_RISCV_ISA_ZBKB) +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + ALTERNATIVE_2( \ + RISCV_RUNTIME_CONST_64_BASE, \ + RISCV_RUNTIME_CONST_64_ZBA, \ + 0, RISCV_ISA_EXT_ZBA, 1, \ + RISCV_RUNTIME_CONST_64_ZBKB, \ + 0, RISCV_ISA_EXT_ZBKB, 1 \ + ) \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#elif defined(CONFIG_RISCV_ISA_ZBA) +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + ALTERNATIVE( \ + RISCV_RUNTIME_CONST_64_BASE, \ + RISCV_RUNTIME_CONST_64_ZBA, \ + 0, RISCV_ISA_EXT_ZBA, 1 \ + ) \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#elif defined(CONFIG_RISCV_ISA_ZBKB) +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + ALTERNATIVE( \ + RISCV_RUNTIME_CONST_64_BASE, \ + RISCV_RUNTIME_CONST_64_ZBKB, \ + 0, RISCV_ISA_EXT_ZBKB, 1 \ + ) \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#else +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + RISCV_RUNTIME_CONST_64_BASE \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#endif +#endif + +#define runtime_const_shift_right_32(val, sym) \ +({ \ + u32 __ret; \ + asm_inline(".option push\n\t" \ + ".option norvc\n\t" \ + "1:\t" \ + SRLI " %[__ret],%[__val],12\n\t" \ + ".option pop\n\t" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : [__ret] "=r" (__ret) \ + : [__val] "r" (val)); \ + __ret; \ +}) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +static inline void __runtime_fixup_caches(void *where, unsigned int insns) +{ + /* On riscv there are currently only cache-wide flushes so va is ignored. */ + __always_unused uintptr_t va = (uintptr_t)where; + + flush_icache_range(va, va + 4 * insns); +} + +/* + * The 32-bit immediate is stored in a lui+addi pairing. + * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction. + * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction. + */ +static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val) +{ + unsigned int lower_immediate, upper_immediate; + u32 lui_insn, addi_insn, addi_insn_mask; + __le32 lui_res, addi_res; + + /* Mask out upper 12 bit of addi */ + addi_insn_mask = 0x000fffff; + + lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16; + addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16; + + lower_immediate = sign_extend32(val, 11); + upper_immediate = (val - lower_immediate); + + if (upper_immediate & 0xfffff000) { + /* replace upper 20 bits of lui with upper immediate */ + lui_insn &= 0x00000fff; + lui_insn |= upper_immediate & 0xfffff000; + } else { + /* replace lui with nop if immediate is small enough to fit in addi */ + lui_insn = RISCV_INSN_NOP4; + /* + * lui is being skipped, so do a load instead of an add. A load + * is performed by adding with the x0 register. Setting rs to + * zero with the following mask will accomplish this goal. + */ + addi_insn_mask &= 0x07fff; + } + + if (lower_immediate & 0x00000fff) { + /* replace upper 12 bits of addi with lower 12 bits of val */ + addi_insn &= addi_insn_mask; + addi_insn |= (lower_immediate & 0x00000fff) << 20; + } else { + /* replace addi with nop if lower_immediate is empty */ + addi_insn = RISCV_INSN_NOP4; + } + + addi_res = cpu_to_le32(addi_insn); + lui_res = cpu_to_le32(lui_insn); + mutex_lock(&text_mutex); + patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res)); + patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res)); + mutex_unlock(&text_mutex); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ +#ifdef CONFIG_32BIT + __runtime_fixup_32(where, where + 4, val); + __runtime_fixup_caches(where, 2); +#else + __runtime_fixup_32(where, where + 8, val); + __runtime_fixup_32(where + 4, where + 12, val >> 32); + __runtime_fixup_caches(where, 4); +#endif +} + +/* + * Replace the least significant 5 bits of the srli/srliw immediate that is + * located at bits 20-24 + */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + __le16 *parcel = where; + __le32 res; + u32 insn; + + insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; + + insn &= 0xfe0fffff; + insn |= (val & 0b11111) << 20; + + res = cpu_to_le32(insn); + mutex_lock(&text_mutex); + patch_text_nosync(where, &res, sizeof(insn)); + mutex_unlock(&text_mutex); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif /* _ASM_RISCV_RUNTIME_CONST_H */ diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 002ca58dd998c..61bd5ba6680a7 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -97,6 +97,9 @@ SECTIONS { EXIT_DATA } + + RUNTIME_CONST_VARIABLES + PERCPU_SECTION(L1_CACHE_BYTES) .rel.dyn : { -- GitLab